Update: 初学记、佩文韵府 and 五车韵瑞
This commit is contained in:
41
初学记/test_split.py
Normal file
41
初学记/test_split.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import re
|
||||
|
||||
def extract_sections(content_text):
|
||||
result = {"叙事": "", "事对": "", "诗文": ""}
|
||||
|
||||
rest_after_narrative = content_text
|
||||
|
||||
shìduì_start = rest_after_narrative.find("事對")
|
||||
if shìduì_start == -1:
|
||||
shìduì_start = rest_after_narrative.find("事对")
|
||||
|
||||
genre_pattern = r"〉(賦|詩|讚|表|碑|頌|銘|檄|文|啓|書|歌|曲|引|記|箴|七|連珠|弔|祭文|詔|令|誄|序|論|賛|讃|啟|辭|辞|操|對|对|銘|論)"
|
||||
|
||||
if shìduì_start != -1:
|
||||
result["叙事"] = rest_after_narrative[:shìduì_start]
|
||||
rest_after_shiduì = rest_after_narrative[shìduì_start + 2 :]
|
||||
|
||||
shiwen_match = re.search(genre_pattern, rest_after_shiduì)
|
||||
|
||||
if shiwen_match:
|
||||
split_idx = shiwen_match.start() + 1
|
||||
result["事对"] = rest_after_shiduì[:split_idx]
|
||||
result["诗文"] = rest_after_shiduì[split_idx:]
|
||||
else:
|
||||
result["事对"] = rest_after_shiduì
|
||||
else:
|
||||
shiwen_match = re.search(genre_pattern, rest_after_narrative)
|
||||
if shiwen_match:
|
||||
split_idx = shiwen_match.start() + 1
|
||||
result["叙事"] = rest_after_narrative[:split_idx]
|
||||
result["诗文"] = rest_after_narrative[split_idx:]
|
||||
else:
|
||||
result["叙事"] = rest_after_narrative
|
||||
|
||||
for k in result:
|
||||
result[k] = result[k].replace("〈", "(").replace("〉", ")")
|
||||
|
||||
return result
|
||||
|
||||
import json
|
||||
print(extract_sections("這里是叙事事對這裡是事對〉詩這里是詩文"))
|
||||
Reference in New Issue
Block a user