Update: 初学记、佩文韵府 and 五车韵瑞

2026-03-22 16:18:35 +08:00
parent df475fd03f
commit 183b842090
553 changed files with 754048 additions and 169 deletions
--- a/佩文韵府/test_strip.py
+++ b/佩文韵府/test_strip.py
@@ -0,0 +1,32 @@
+import json
+import re
+
+with open('peiwenyunfu.json', 'r', encoding='utf-8') as f:
+    data = json.load(f)
+
+prefixes = ["韻藻", "增"]
+
+mismatches = []
+total_pipes = 0
+
+for rhyme, r_data in data.items():
+    if rhyme in ["metadata", "preface"]:
+        continue
+    for word, content in r_data.get("词条", {}).items():
+        clean_word = word
+        for p in prefixes:
+            if clean_word.startswith(p) and len(clean_word) > len(p):
+                clean_word = clean_word[len(p):]
+                
+        pipe_count = content.count("丨")
+        if pipe_count == 0:
+            continue
+        total_pipes += 1
+        if pipe_count % len(clean_word) != 0:
+            mismatches.append((word, clean_word, pipe_count, content))
+
+print(f"Total entries with pipes: {total_pipes}")
+print(f"Total mismatches after stripping: {len(mismatches)}")
+for w, cw, p, c in mismatches[:10]:
+    print(f"{w} -> {cw} (len {len(cw)}), pipes: {p}")
+    print(f"  {c}")