Update: 初学记、佩文韵府 and 五车韵瑞
This commit is contained in:
18
佩文韵府/test_split.py
Normal file
18
佩文韵府/test_split.py
Normal file
@@ -0,0 +1,18 @@
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
with open("html_files/卷001之1.html", "r", encoding="utf-8") as f:
|
||||
soup = BeautifulSoup(f.read(), "html.parser")
|
||||
|
||||
poem_div = soup.find("div", class_="poem")
|
||||
text = poem_div.get_text()
|
||||
|
||||
# Extract the list of characters.
|
||||
# It appears after "一東韻一" or similar.
|
||||
m = re.search(r"(上平聲|下平聲|上聲|去聲|入聲)\s+(.*?韻.*?)\n(.*?)\n", text)
|
||||
if m:
|
||||
print("Tone:", m.group(1))
|
||||
print("Rhyme:", m.group(2))
|
||||
print("Chars line:", m.group(3))
|
||||
rhyme_chars = [c for c in m.group(3).replace(' ', ' ').split() if len(c) == 1]
|
||||
print("Chars:", rhyme_chars)
|
||||
Reference in New Issue
Block a user