Update: 初学记、佩文韵府 and 五车韵瑞
This commit is contained in:
32
佩文韵府/test_strip.py
Normal file
32
佩文韵府/test_strip.py
Normal file
@@ -0,0 +1,32 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
with open('peiwenyunfu.json', 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
prefixes = ["韻藻", "增"]
|
||||
|
||||
mismatches = []
|
||||
total_pipes = 0
|
||||
|
||||
for rhyme, r_data in data.items():
|
||||
if rhyme in ["metadata", "preface"]:
|
||||
continue
|
||||
for word, content in r_data.get("词条", {}).items():
|
||||
clean_word = word
|
||||
for p in prefixes:
|
||||
if clean_word.startswith(p) and len(clean_word) > len(p):
|
||||
clean_word = clean_word[len(p):]
|
||||
|
||||
pipe_count = content.count("丨")
|
||||
if pipe_count == 0:
|
||||
continue
|
||||
total_pipes += 1
|
||||
if pipe_count % len(clean_word) != 0:
|
||||
mismatches.append((word, clean_word, pipe_count, content))
|
||||
|
||||
print(f"Total entries with pipes: {total_pipes}")
|
||||
print(f"Total mismatches after stripping: {len(mismatches)}")
|
||||
for w, cw, p, c in mismatches[:10]:
|
||||
print(f"{w} -> {cw} (len {len(cw)}), pipes: {p}")
|
||||
print(f" {c}")
|
||||
Reference in New Issue
Block a user