Loops and element handles in Playwright
In [1]:
Copied!
# There are two versions of Playwright:
# the synchronous version and the async version.
# You can't use the synchronous version in Jupyter,
# so we'll import the async one.
import asyncio
from playwright.async_api import async_playwright
# There are two versions of Playwright:
# the synchronous version and the async version.
# You can't use the synchronous version in Jupyter,
# so we'll import the async one.
import asyncio
from playwright.async_api import async_playwright
In [2]:
Copied!
# Visit a page using chromium (could also do .firefox or .webkit)
playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless = False)
page = await browser.new_page()
# Visit a page using chromium (could also do .firefox or .webkit)
playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless = False)
page = await browser.new_page()
Out[2]:
<Response url='https://www.nytimes.com/' request=<Request url='https://www.nytimes.com/' method='GET'>>
In [12]:
Copied!
await page.goto('http://localhost:8000')
await page.goto('http://localhost:8000')
Out[12]:
<Response url='http://localhost:8000/' request=<Request url='http://localhost:8000/' method='GET'>>
In [14]:
Copied!
links = page.locator('a')
count = await links.count()
for i in range(count):
print("Looking at", i)
link = links.nth(i)
await link.click()
print(await page.content())
await page.go_back()
print("Done")
links = page.locator('a')
count = await links.count()
for i in range(count):
print("Looking at", i)
link = links.nth(i)
await link.click()
print(await page.content())
await page.go_back()
print("Done")
Looking at 0 <html><head></head><body>First page</body></html> Looking at 1 <html><head></head><body>Second page</body></html> Done
In [24]:
Copied!
links = await page.locator('a').element_handles()
for link in links:
text = await link.text_content()
href = await link.get_attribute('href')
print("Link text is", text)
print("Link URL is", href)
links = await page.locator('a').element_handles()
for link in links:
text = await link.text_content()
href = await link.get_attribute('href')
print("Link text is", text)
print("Link URL is", href)
Link text is one Link URL is 1.html Link text is two Link URL is 2.html
In [25]:
Copied!
links = page.locator('a')
count = await links.count()
for i in range(count):
link = links.nth(i)
text = await link.text_content()
href = await link.get_attribute('href')
print("Link text is", text)
print("Link URL is", href)
links = page.locator('a')
count = await links.count()
for i in range(count):
link = links.nth(i)
text = await link.text_content()
href = await link.get_attribute('href')
print("Link text is", text)
print("Link URL is", href)
Link text is one Link URL is 1.html Link text is two Link URL is 2.html
In [26]:
Copied!
links = await page.locator('a').element_handles()
for link in links:
text = await link.text_content()
href = await link.get_attribute('href')
print("Link text is", text)
print("Link URL is", href)
await link.click()
await page.go_back()
links = await page.locator('a').element_handles()
for link in links:
text = await link.text_content()
href = await link.get_attribute('href')
print("Link text is", text)
print("Link URL is", href)
await link.click()
await page.go_back()
Link text is one Link URL is 1.html
--------------------------------------------------------------------------- Error Traceback (most recent call last) /var/folders/_m/b8tjbm6n4zs1q2mvjvg25x1m0000gn/T/ipykernel_56635/1937803864.py in <cell line: 3>() 2 3 for link in links: ----> 4 text = await link.text_content() 5 href = await link.get_attribute('href') 6 print("Link text is", text) ~/.pyenv/versions/3.10.3/lib/python3.10/site-packages/playwright/async_api/_generated.py in text_content(self) 1531 """ 1532 -> 1533 return mapping.from_maybe_impl(await self._impl_obj.text_content()) 1534 1535 async def inner_text(self) -> str: ~/.pyenv/versions/3.10.3/lib/python3.10/site-packages/playwright/_impl/_element_handle.py in text_content(self) 68 69 async def text_content(self) -> Optional[str]: ---> 70 return await self._channel.send("textContent") 71 72 async def inner_text(self) -> str: ~/.pyenv/versions/3.10.3/lib/python3.10/site-packages/playwright/_impl/_connection.py in send(self, method, params) 41 42 async def send(self, method: str, params: Dict = None) -> Any: ---> 43 return await self._connection.wrap_api_call( 44 lambda: self.inner_send(method, params, False) 45 ) ~/.pyenv/versions/3.10.3/lib/python3.10/site-packages/playwright/_impl/_connection.py in wrap_api_call(self, cb, is_internal) 367 self._api_zone.set(metadata) 368 try: --> 369 return await cb() 370 finally: 371 self._api_zone.set(None) ~/.pyenv/versions/3.10.3/lib/python3.10/site-packages/playwright/_impl/_connection.py in inner_send(self, method, params, return_as_dict) 76 if not callback.future.done(): 77 callback.future.cancel() ---> 78 result = next(iter(done)).result() 79 # Protocol now has named return values, assume result is one level deeper unless 80 # there is explicit ambiguity. Error: Element is not attached to the DOM
In [ ]:
Copied!