Update: 初学记、佩文韵府 and 五车韵瑞

2026-03-22 16:18:35 +08:00
parent df475fd03f
commit 183b842090
553 changed files with 754048 additions and 169 deletions
--- a/佩文韵府/test_heuristic.py
+++ b/佩文韵府/test_heuristic.py
@@ -0,0 +1,54 @@
+import json
+import re
+
+with open('peiwenyunfu.json', 'r', encoding='utf-8') as f:
+    data = json.load(f)
+
+prefixes = ["韻藻", "增"]
+
+def replace_pipes(content, word):
+    clean_word = word
+    for p in prefixes:
+        if clean_word.startswith(p) and len(clean_word) > len(p):
+            clean_word = clean_word[len(p):]
+            break # only strip one prefix
+            
+    word_len = len(clean_word)
+    if word_len == 0:
+        return content.replace("丨", "")
+        
+    result = []
+    pipe_idx = 0
+    chars_since_last_pipe = 0
+    
+    for char in content:
+        if char == "丨":
+            if chars_since_last_pipe >= 5:
+                # Long gap -> reset pipe_idx!
+                # Wait, only reset if we aren't in the middle of a perfect mapping?
+                # Actually, if the gap is >=5, it's definitely a new occurrence.
+                pipe_idx = 0
+            
+            result.append(clean_word[pipe_idx % word_len])
+            pipe_idx += 1
+            chars_since_last_pipe = 0
+        else:
+            result.append(char)
+            chars_since_last_pipe += 1
+            
+    return "".join(result)
+
+# Test specific words
+test_cases = [
+    ("首陽東", "詩采葑采葑丨丨之丨"),
+    ("馬首東", "左傳欒黶曰吾丨丨欲丨乃歸下軍從之"),
+    ("澗瀍東", "書洛誥我乃卜丨水東丨水西惟洛食我又卜瀍水丨亦惟洛食"),
+    ("日夜東", "丨丨虞集詩絳桃風急丨丨丨王惲詩付與衡漳丨丨丨許有壬詩江水舟"),
+    ("東海東", "樓鑰詩萬里逺在丨丨丨張經詩崑崙之西丨"),
+]
+
+for w, c in test_cases:
+    print(f"Word: {w}")
+    print(f"Orig: {c}")
+    print(f"Fix : {replace_pipes(c, w)}")
+    print("-" * 40)