Update: 初学记、佩文韵府 and 五车韵瑞
This commit is contained in:
57
佩文韵府/fix_structure.py
Normal file
57
佩文韵府/fix_structure.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import json
|
||||
|
||||
print("Loading...")
|
||||
with open('peiwenyunfu.json', 'r', encoding='utf-8') as f:
|
||||
data = json.load(f)
|
||||
|
||||
new_data = {}
|
||||
new_data['metadata'] = data['metadata']
|
||||
new_data['preface'] = data['preface']
|
||||
|
||||
prefixes = ["韻藻", "韵藻", "増", "增"]
|
||||
|
||||
def clean_key(k):
|
||||
changed = True
|
||||
while changed:
|
||||
changed = False
|
||||
for p in prefixes:
|
||||
if k.startswith(p) and len(k) > len(p):
|
||||
k = k[len(p):]
|
||||
changed = True
|
||||
return k
|
||||
|
||||
for rhyme, r_data in data.items():
|
||||
if rhyme in ['metadata', 'preface']: continue
|
||||
|
||||
# Store the original main rhyme
|
||||
new_data[rhyme] = {
|
||||
"卷": r_data["卷"],
|
||||
"声": r_data["声"],
|
||||
"韵": r_data["韵"],
|
||||
"小韵描述": r_data["小韵描述"],
|
||||
"韵藻": {},
|
||||
"对语": r_data.get("对语", ""),
|
||||
"摘句": r_data.get("摘句", "")
|
||||
}
|
||||
|
||||
current_rhyme = rhyme
|
||||
|
||||
for k, v in r_data.get("词条", {}).items():
|
||||
if len(k) == 1 and any(x in v[:15] for x in ['切', '説文', '廣韻', '玉篇', '集韻', '韻㑹', '音', '同', '釋名', '爾雅']):
|
||||
current_rhyme = k
|
||||
new_data[current_rhyme] = {
|
||||
"卷": r_data["卷"],
|
||||
"声": r_data["声"],
|
||||
"韵": r_data["韵"],
|
||||
"小韵描述": k + v,
|
||||
"韵藻": {},
|
||||
"对语": "",
|
||||
"摘句": ""
|
||||
}
|
||||
else:
|
||||
cleaned = clean_key(k)
|
||||
new_data[current_rhyme]["韵藻"][cleaned] = v
|
||||
|
||||
with open('peiwenyunfu_v2.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(new_data, f, ensure_ascii=False, indent=4)
|
||||
print(f"Old size: {len(data)}, New size: {len(new_data)}")
|
||||
Reference in New Issue
Block a user