Update: 初学记、佩文韵府 and 五车韵瑞

2026-03-22 16:18:35 +08:00
parent df475fd03f
commit 183b842090
553 changed files with 754048 additions and 169 deletions
--- a/佩文韵府/test_placeholder.py
+++ b/佩文韵府/test_placeholder.py
@@ -0,0 +1,49 @@
+import json
+import re
+
+with open('peiwenyunfu.json', 'r', encoding='utf-8') as f:
+    data = json.load(f)
+
+# Analyze a few
+count_match = 0
+count_mismatch = 0
+
+for rhyme, r_data in list(data.items())[:5]: # Skip metadata, preface
+    if rhyme in ["metadata", "preface"]:
+        continue
+    print(f"\nRhyme: {rhyme}")
+    
+    # 1. 小韵描述
+    desc = r_data.get("小韵描述", "")
+    desc_fixed = desc.replace("丨", rhyme)
+    print(f"Desc original: {desc[:30]}...")
+    print(f"Desc fixed:    {desc_fixed[:30]}...")
+    
+    # 2. 词条
+    for word, content in list(r_data.get("词条", {}).items())[:5]:
+        pipe_count = content.count("丨")
+        word_len = len(word)
+        if pipe_count == 0:
+            continue
+        
+        print(f"Word: {word} (len {word_len}), pipes: {pipe_count}")
+        print(f"Original: {content}")
+        
+        # Test replacing
+        if pipe_count % word_len == 0:
+            # We can replace them in groups of word_len
+            fixed_content = ""
+            pipe_idx = 0
+            for char in content:
+                if char == "丨":
+                    fixed_content += word[pipe_idx % word_len]
+                    pipe_idx += 1
+                else:
+                    fixed_content += char
+            print(f"Fixed:    {fixed_content}")
+            count_match += 1
+        else:
+            print("MISMATCH length!")
+            count_mismatch += 1
+
+print(f"\nMatches: {count_match}, Mismatches: {count_mismatch}")