import re import requests from bs4 import BeautifulSoup url = "https://www.shidianguji.com/book/CADAL02059421/chapter/1lmkv0n02yhom?version=2" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)' } response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') # Check script tags for "__INIT_DATA__" or similar state hydration scripts = soup.find_all('script') for s in scripts: if s.string and ('__INIT_DATA__' in s.string or 'window.__INITIAL_STATE__' in s.string): print(f"Found init state data of length: {len(s.string)}") print(s.string[:500]) # Check normal text elements content = soup.find_all('p') print(f"Found {len(content)} paragraphs.") if content: for p in content[:5]: print(p.text) print("\n--- Let's look at another part ---") # Try extracting text directly text = soup.get_text() # Find the title or some known text like "五车韵瑞" idx = text.find("五车韵瑞") if idx != -1: print(text[idx:idx+500])