Update: 初学记、佩文韵府 and 五车韵瑞
This commit is contained in:
31
佩文韵府/test_parser.py
Normal file
31
佩文韵府/test_parser.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import json
|
||||
from parser import parse_html
|
||||
|
||||
def test_parse_html():
|
||||
file_path = "html_files/卷001之1.html"
|
||||
result = parse_html(file_path)
|
||||
|
||||
# Save for manual inspection
|
||||
with open("output.json", "w", encoding="utf-8") as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# Check that it returns a dictionary
|
||||
assert isinstance(result, dict)
|
||||
|
||||
# Let's see what keys are in it
|
||||
keys = list(result.keys())
|
||||
print("Keys found:", keys)
|
||||
|
||||
if len(keys) > 0:
|
||||
first_key = keys[0]
|
||||
assert "卷" in result[first_key]
|
||||
assert "声" in result[first_key]
|
||||
assert "韵" in result[first_key]
|
||||
assert "小韵描述" in result[first_key]
|
||||
assert "词条" in result[first_key]
|
||||
assert "对语" in result[first_key]
|
||||
assert "摘句" in result[first_key]
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_parse_html()
|
||||
print("Tests passed!")
|
||||
Reference in New Issue
Block a user