Update: 初学记、佩文韵府 and 五车韵瑞
This commit is contained in:
12
佩文韵府/test_dump2.py
Normal file
12
佩文韵府/test_dump2.py
Normal file
@@ -0,0 +1,12 @@
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
with open("html_files/卷001之1.html", "r", encoding="utf-8") as f:
|
||||
soup = BeautifulSoup(f.read(), "html.parser")
|
||||
|
||||
poem_div = soup.find("div", class_="poem")
|
||||
if poem_div:
|
||||
lines = poem_div.get_text().split("\n")
|
||||
lines = [line.strip() for line in lines if line.strip()]
|
||||
for i, line in enumerate(lines[:50]):
|
||||
print(f"{i}: {line}")
|
||||
Reference in New Issue
Block a user