Files
spider-ctext/佩文韵府/test_parser.py
2026-03-22 16:18:35 +08:00

32 lines
926 B
Python

import json
from parser import parse_html
def test_parse_html():
file_path = "html_files/卷001之1.html"
result = parse_html(file_path)
# Save for manual inspection
with open("output.json", "w", encoding="utf-8") as f:
json.dump(result, f, ensure_ascii=False, indent=2)
# Check that it returns a dictionary
assert isinstance(result, dict)
# Let's see what keys are in it
keys = list(result.keys())
print("Keys found:", keys)
if len(keys) > 0:
first_key = keys[0]
assert "" in result[first_key]
assert "" in result[first_key]
assert "" in result[first_key]
assert "小韵描述" in result[first_key]
assert "词条" in result[first_key]
assert "对语" in result[first_key]
assert "摘句" in result[first_key]
if __name__ == "__main__":
test_parse_html()
print("Tests passed!")