import json print("Loading...") with open('peiwenyunfu.json', 'r', encoding='utf-8') as f: data = json.load(f) new_data = {} new_data['metadata'] = data['metadata'] new_data['preface'] = data['preface'] prefixes = ["韻藻", "韵藻", "増", "增"] def clean_key(k): changed = True while changed: changed = False for p in prefixes: if k.startswith(p) and len(k) > len(p): k = k[len(p):] changed = True return k for rhyme, r_data in data.items(): if rhyme in ['metadata', 'preface']: continue # Store the original main rhyme new_data[rhyme] = { "卷": r_data["卷"], "声": r_data["声"], "韵": r_data["韵"], "小韵描述": r_data["小韵描述"], "韵藻": {}, "对语": r_data.get("对语", ""), "摘句": r_data.get("摘句", "") } current_rhyme = rhyme for k, v in r_data.get("词条", {}).items(): if len(k) == 1 and any(x in v[:15] for x in ['切', '説文', '廣韻', '玉篇', '集韻', '韻㑹', '音', '同', '釋名', '爾雅']): current_rhyme = k new_data[current_rhyme] = { "卷": r_data["卷"], "声": r_data["声"], "韵": r_data["韵"], "小韵描述": k + v, "韵藻": {}, "对语": "", "摘句": "" } else: cleaned = clean_key(k) new_data[current_rhyme]["韵藻"][cleaned] = v with open('peiwenyunfu_v2.json', 'w', encoding='utf-8') as f: json.dump(new_data, f, ensure_ascii=False, indent=4) print(f"Old size: {len(data)}, New size: {len(new_data)}")