import json
import re

with open('peiwenyunfu.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

# Analyze a few
count_match = 0
count_mismatch = 0

for rhyme, r_data in list(data.items())[:5]: # Skip metadata, preface
    if rhyme in ["metadata", "preface"]:
        continue
    print(f"\nRhyme: {rhyme}")
    
    # 1. 小韵描述
    desc = r_data.get("小韵描述", "")
    desc_fixed = desc.replace("丨", rhyme)
    print(f"Desc original: {desc[:30]}...")
    print(f"Desc fixed:    {desc_fixed[:30]}...")
    
    # 2. 词条
    for word, content in list(r_data.get("词条", {}).items())[:5]:
        pipe_count = content.count("丨")
        word_len = len(word)
        if pipe_count == 0:
            continue
        
        print(f"Word: {word} (len {word_len}), pipes: {pipe_count}")
        print(f"Original: {content}")
        
        # Test replacing
        if pipe_count % word_len == 0:
            # We can replace them in groups of word_len
            fixed_content = ""
            pipe_idx = 0
            for char in content:
                if char == "丨":
                    fixed_content += word[pipe_idx % word_len]
                    pipe_idx += 1
                else:
                    fixed_content += char
            print(f"Fixed:    {fixed_content}")
            count_match += 1
        else:
            print("MISMATCH length!")
            count_mismatch += 1

print(f"\nMatches: {count_match}, Mismatches: {count_mismatch}")