33 lines
967 B
Python
33 lines
967 B
Python
import json
|
|
import re
|
|
|
|
with open('peiwenyunfu.json', 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
prefixes = ["韻藻", "增"]
|
|
|
|
mismatches = []
|
|
total_pipes = 0
|
|
|
|
for rhyme, r_data in data.items():
|
|
if rhyme in ["metadata", "preface"]:
|
|
continue
|
|
for word, content in r_data.get("词条", {}).items():
|
|
clean_word = word
|
|
for p in prefixes:
|
|
if clean_word.startswith(p) and len(clean_word) > len(p):
|
|
clean_word = clean_word[len(p):]
|
|
|
|
pipe_count = content.count("丨")
|
|
if pipe_count == 0:
|
|
continue
|
|
total_pipes += 1
|
|
if pipe_count % len(clean_word) != 0:
|
|
mismatches.append((word, clean_word, pipe_count, content))
|
|
|
|
print(f"Total entries with pipes: {total_pipes}")
|
|
print(f"Total mismatches after stripping: {len(mismatches)}")
|
|
for w, cw, p, c in mismatches[:10]:
|
|
print(f"{w} -> {cw} (len {len(cw)}), pipes: {p}")
|
|
print(f" {c}")
|