30 lines
1.1 KiB
Python
30 lines
1.1 KiB
Python
import asyncio
|
|
from playwright.async_api import async_playwright
|
|
|
|
async def main():
|
|
async with async_playwright() as p:
|
|
browser = await p.chromium.launch(headless=True)
|
|
# Using a convincing user agent
|
|
context = await browser.new_context(
|
|
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
viewport={"width": 1920, "height": 1080},
|
|
java_script_enabled=True
|
|
)
|
|
page = await context.new_page()
|
|
print("Fetching CText...")
|
|
try:
|
|
await page.goto("https://ctext.org/wiki.pl?if=gb&res=87723&remap=gb", timeout=30000)
|
|
await page.wait_for_timeout(3000) # wait a bit for CF or similar
|
|
title = await page.title()
|
|
print(f"CText Title: {title}")
|
|
|
|
# extract some text
|
|
content = await page.evaluate("() => document.body.innerText")
|
|
print(f"CText Content preview:\n{content[:500]}")
|
|
except Exception as e:
|
|
print(f"CText Playwright Error: {e}")
|
|
|
|
await browser.close()
|
|
|
|
asyncio.run(main())
|