32 lines
926 B
Python
32 lines
926 B
Python
import json
|
|
from parser import parse_html
|
|
|
|
def test_parse_html():
|
|
file_path = "html_files/卷001之1.html"
|
|
result = parse_html(file_path)
|
|
|
|
# Save for manual inspection
|
|
with open("output.json", "w", encoding="utf-8") as f:
|
|
json.dump(result, f, ensure_ascii=False, indent=2)
|
|
|
|
# Check that it returns a dictionary
|
|
assert isinstance(result, dict)
|
|
|
|
# Let's see what keys are in it
|
|
keys = list(result.keys())
|
|
print("Keys found:", keys)
|
|
|
|
if len(keys) > 0:
|
|
first_key = keys[0]
|
|
assert "卷" in result[first_key]
|
|
assert "声" in result[first_key]
|
|
assert "韵" in result[first_key]
|
|
assert "小韵描述" in result[first_key]
|
|
assert "词条" in result[first_key]
|
|
assert "对语" in result[first_key]
|
|
assert "摘句" in result[first_key]
|
|
|
|
if __name__ == "__main__":
|
|
test_parse_html()
|
|
print("Tests passed!")
|