import json import re with open('peiwenyunfu.json', 'r', encoding='utf-8') as f: data = json.load(f) # Analyze a few count_match = 0 count_mismatch = 0 for rhyme, r_data in list(data.items())[:5]: # Skip metadata, preface if rhyme in ["metadata", "preface"]: continue print(f"\nRhyme: {rhyme}") # 1. 小韵描述 desc = r_data.get("小韵描述", "") desc_fixed = desc.replace("丨", rhyme) print(f"Desc original: {desc[:30]}...") print(f"Desc fixed: {desc_fixed[:30]}...") # 2. 词条 for word, content in list(r_data.get("词条", {}).items())[:5]: pipe_count = content.count("丨") word_len = len(word) if pipe_count == 0: continue print(f"Word: {word} (len {word_len}), pipes: {pipe_count}") print(f"Original: {content}") # Test replacing if pipe_count % word_len == 0: # We can replace them in groups of word_len fixed_content = "" pipe_idx = 0 for char in content: if char == "丨": fixed_content += word[pipe_idx % word_len] pipe_idx += 1 else: fixed_content += char print(f"Fixed: {fixed_content}") count_match += 1 else: print("MISMATCH length!") count_mismatch += 1 print(f"\nMatches: {count_match}, Mismatches: {count_mismatch}")