Files
spider-ctext/佩文韵府/test_heuristic.py
2026-03-22 16:18:35 +08:00

55 lines
1.7 KiB
Python

import json
import re
with open('peiwenyunfu.json', 'r', encoding='utf-8') as f:
data = json.load(f)
prefixes = ["韻藻", ""]
def replace_pipes(content, word):
clean_word = word
for p in prefixes:
if clean_word.startswith(p) and len(clean_word) > len(p):
clean_word = clean_word[len(p):]
break # only strip one prefix
word_len = len(clean_word)
if word_len == 0:
return content.replace("", "")
result = []
pipe_idx = 0
chars_since_last_pipe = 0
for char in content:
if char == "":
if chars_since_last_pipe >= 5:
# Long gap -> reset pipe_idx!
# Wait, only reset if we aren't in the middle of a perfect mapping?
# Actually, if the gap is >=5, it's definitely a new occurrence.
pipe_idx = 0
result.append(clean_word[pipe_idx % word_len])
pipe_idx += 1
chars_since_last_pipe = 0
else:
result.append(char)
chars_since_last_pipe += 1
return "".join(result)
# Test specific words
test_cases = [
("首陽東", "詩采葑采葑丨丨之丨"),
("馬首東", "左傳欒黶曰吾丨丨欲丨乃歸下軍從之"),
("澗瀍東", "書洛誥我乃卜丨水東丨水西惟洛食我又卜瀍水丨亦惟洛食"),
("日夜東", "丨丨虞集詩絳桃風急丨丨丨王惲詩付與衡漳丨丨丨許有壬詩江水舟"),
("東海東", "樓鑰詩萬里逺在丨丨丨張經詩崑崙之西丨"),
]
for w, c in test_cases:
print(f"Word: {w}")
print(f"Orig: {c}")
print(f"Fix : {replace_pipes(c, w)}")
print("-" * 40)