Update: 初学记、佩文韵府 and 五车韵瑞
This commit is contained in:
49
佩文韵府/test_placeholder.py
Normal file
49
佩文韵府/test_placeholder.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import json
|
||||
import re
|
||||
|
||||
with open('peiwenyunfu.json', 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
# Analyze a few
|
||||
count_match = 0
|
||||
count_mismatch = 0
|
||||
|
||||
for rhyme, r_data in list(data.items())[:5]: # Skip metadata, preface
|
||||
if rhyme in ["metadata", "preface"]:
|
||||
continue
|
||||
print(f"\nRhyme: {rhyme}")
|
||||
|
||||
# 1. 小韵描述
|
||||
desc = r_data.get("小韵描述", "")
|
||||
desc_fixed = desc.replace("丨", rhyme)
|
||||
print(f"Desc original: {desc[:30]}...")
|
||||
print(f"Desc fixed: {desc_fixed[:30]}...")
|
||||
|
||||
# 2. 词条
|
||||
for word, content in list(r_data.get("词条", {}).items())[:5]:
|
||||
pipe_count = content.count("丨")
|
||||
word_len = len(word)
|
||||
if pipe_count == 0:
|
||||
continue
|
||||
|
||||
print(f"Word: {word} (len {word_len}), pipes: {pipe_count}")
|
||||
print(f"Original: {content}")
|
||||
|
||||
# Test replacing
|
||||
if pipe_count % word_len == 0:
|
||||
# We can replace them in groups of word_len
|
||||
fixed_content = ""
|
||||
pipe_idx = 0
|
||||
for char in content:
|
||||
if char == "丨":
|
||||
fixed_content += word[pipe_idx % word_len]
|
||||
pipe_idx += 1
|
||||
else:
|
||||
fixed_content += char
|
||||
print(f"Fixed: {fixed_content}")
|
||||
count_match += 1
|
||||
else:
|
||||
print("MISMATCH length!")
|
||||
count_mismatch += 1
|
||||
|
||||
print(f"\nMatches: {count_match}, Mismatches: {count_mismatch}")
|
||||
Reference in New Issue
Block a user