Update: 初学记、佩文韵府 and 五车韵瑞

This commit is contained in:
denglifan
2026-03-22 16:18:35 +08:00
parent df475fd03f
commit 183b842090
553 changed files with 754048 additions and 169 deletions

View File

@@ -0,0 +1,31 @@
import json
from parser import parse_html
def test_parse_html():
file_path = "html_files/卷001之1.html"
result = parse_html(file_path)
# Save for manual inspection
with open("output.json", "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
# Check that it returns a dictionary
assert isinstance(result, dict)
# Let's see what keys are in it
keys = list(result.keys())
print("Keys found:", keys)
if len(keys) > 0:
first_key = keys[0]
assert "" in result[first_key]
assert "" in result[first_key]
assert "" in result[first_key]
assert "小韵描述" in result[first_key]
assert "词条" in result[first_key]
assert "对语" in result[first_key]
assert "摘句" in result[first_key]
if __name__ == "__main__":
test_parse_html()
print("Tests passed!")