50 lines
1.5 KiB
Python
50 lines
1.5 KiB
Python
import json
|
|
import re
|
|
|
|
with open('peiwenyunfu.json', 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
# Analyze a few
|
|
count_match = 0
|
|
count_mismatch = 0
|
|
|
|
for rhyme, r_data in list(data.items())[:5]: # Skip metadata, preface
|
|
if rhyme in ["metadata", "preface"]:
|
|
continue
|
|
print(f"\nRhyme: {rhyme}")
|
|
|
|
# 1. 小韵描述
|
|
desc = r_data.get("小韵描述", "")
|
|
desc_fixed = desc.replace("丨", rhyme)
|
|
print(f"Desc original: {desc[:30]}...")
|
|
print(f"Desc fixed: {desc_fixed[:30]}...")
|
|
|
|
# 2. 词条
|
|
for word, content in list(r_data.get("词条", {}).items())[:5]:
|
|
pipe_count = content.count("丨")
|
|
word_len = len(word)
|
|
if pipe_count == 0:
|
|
continue
|
|
|
|
print(f"Word: {word} (len {word_len}), pipes: {pipe_count}")
|
|
print(f"Original: {content}")
|
|
|
|
# Test replacing
|
|
if pipe_count % word_len == 0:
|
|
# We can replace them in groups of word_len
|
|
fixed_content = ""
|
|
pipe_idx = 0
|
|
for char in content:
|
|
if char == "丨":
|
|
fixed_content += word[pipe_idx % word_len]
|
|
pipe_idx += 1
|
|
else:
|
|
fixed_content += char
|
|
print(f"Fixed: {fixed_content}")
|
|
count_match += 1
|
|
else:
|
|
print("MISMATCH length!")
|
|
count_mismatch += 1
|
|
|
|
print(f"\nMatches: {count_match}, Mismatches: {count_mismatch}")
|