import json import re print("Loading peiwenyunfu.json...") with open('peiwenyunfu.json', 'r', encoding='utf-8') as f: data = json.load(f) prefixes = ["韻藻", "增", "増"] def clean_headword(word): clean_word = word # Try stripping prefixes for _ in range(2): # In case there's "増韻藻" or something for p in prefixes: if clean_word.startswith(p) and len(clean_word) > len(p): clean_word = clean_word[len(p):] return clean_word def replace_pipes_in_content(content, word): clean_word = clean_headword(word) word_len = len(clean_word) if word_len == 0 or "丨" not in content: return content def repl(match): nonlocal pipe_idx block = match.group(0) block_len = len(block) if block_len % word_len == 0: # Full word match! Reset alignment. pipe_idx = 0 return clean_word * (block_len // word_len) else: # Partial word match. Use current sequence. res = "" for _ in range(block_len): res += clean_word[pipe_idx % word_len] pipe_idx += 1 return res pipe_idx = 0 return re.sub(r'丨+', repl, content) print("Processing...") for rhyme, r_data in data.items(): if rhyme in ["metadata", "preface"]: continue # 1. Fix 小韵描述 if "小韵描述" in r_data and r_data["小韵描述"]: # The placeholder should be replaced by the rhyme char # BUT wait! The rhyme char might be simplified in our dictionary keys! # The user's prompt used "东" for replacement in 小韵描述. # So we just use the dictionary key `rhyme`. r_data["小韵描述"] = r_data["小韵描述"].replace("丨", rhyme) # 2. Fix 词条 if "词条" in r_data: new_citiao = {} for word, content in r_data["词条"].items(): new_citiao[word] = replace_pipes_in_content(content, word) r_data["词条"] = new_citiao print("Saving peiwenyunfu.json...") with open('peiwenyunfu.json', 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=4) print("Done!")