Update: 初学记、佩文韵府 and 五车韵瑞

2026-03-22 16:18:35 +08:00
parent df475fd03f
commit 183b842090
553 changed files with 754048 additions and 169 deletions
--- a/佩文韵府/fix_pipes.py
+++ b/佩文韵府/fix_pipes.py
@@ -0,0 +1,69 @@
+import json
+import re
+
+print("Loading peiwenyunfu.json...")
+with open('peiwenyunfu.json', 'r', encoding='utf-8') as f:
+    data = json.load(f)
+
+prefixes = ["韻藻", "增", "増"]
+
+def clean_headword(word):
+    clean_word = word
+    # Try stripping prefixes
+    for _ in range(2): # In case there's "増韻藻" or something
+        for p in prefixes:
+            if clean_word.startswith(p) and len(clean_word) > len(p):
+                clean_word = clean_word[len(p):]
+    return clean_word
+
+def replace_pipes_in_content(content, word):
+    clean_word = clean_headword(word)
+    word_len = len(clean_word)
+    
+    if word_len == 0 or "丨" not in content:
+        return content
+
+    def repl(match):
+        nonlocal pipe_idx
+        block = match.group(0)
+        block_len = len(block)
+        
+        if block_len % word_len == 0:
+            # Full word match! Reset alignment.
+            pipe_idx = 0
+            return clean_word * (block_len // word_len)
+        else:
+            # Partial word match. Use current sequence.
+            res = ""
+            for _ in range(block_len):
+                res += clean_word[pipe_idx % word_len]
+                pipe_idx += 1
+            return res
+
+    pipe_idx = 0
+    return re.sub(r'丨+', repl, content)
+
+print("Processing...")
+for rhyme, r_data in data.items():
+    if rhyme in ["metadata", "preface"]:
+        continue
+    
+    # 1. Fix 小韵描述
+    if "小韵描述" in r_data and r_data["小韵描述"]:
+        # The placeholder should be replaced by the rhyme char
+        # BUT wait! The rhyme char might be simplified in our dictionary keys!
+        # The user's prompt used "东" for replacement in 小韵描述. 
+        # So we just use the dictionary key `rhyme`.
+        r_data["小韵描述"] = r_data["小韵描述"].replace("丨", rhyme)
+        
+    # 2. Fix 词条
+    if "词条" in r_data:
+        new_citiao = {}
+        for word, content in r_data["词条"].items():
+            new_citiao[word] = replace_pipes_in_content(content, word)
+        r_data["词条"] = new_citiao
+
+print("Saving peiwenyunfu.json...")
+with open('peiwenyunfu.json', 'w', encoding='utf-8') as f:
+    json.dump(data, f, ensure_ascii=False, indent=4)
+print("Done!")