import json
from parser import parse_html
def test_parse_html():
file_path = "html_files/卷001之1.html"
result = parse_html(file_path)
# Save for manual inspection
with open("output.json", "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
# Check that it returns a dictionary
assert isinstance(result, dict)
# Let's see what keys are in it
keys = list(result.keys())
print("Keys found:", keys)
if len(keys) > 0:
first_key = keys[0]
assert "卷" in result[first_key]
assert "声" in result[first_key]
assert "韵" in result[first_key]
assert "小韵描述" in result[first_key]
assert "词条" in result[first_key]
assert "对语" in result[first_key]
assert "摘句" in result[first_key]
if __name__ == "__main__":
test_parse_html()
print("Tests passed!")