42 lines
1.5 KiB
Python
42 lines
1.5 KiB
Python
import re
|
||
|
||
def extract_sections(content_text):
|
||
result = {"叙事": "", "事对": "", "诗文": ""}
|
||
|
||
rest_after_narrative = content_text
|
||
|
||
shìduì_start = rest_after_narrative.find("事對")
|
||
if shìduì_start == -1:
|
||
shìduì_start = rest_after_narrative.find("事对")
|
||
|
||
genre_pattern = r"〉(賦|詩|讚|表|碑|頌|銘|檄|文|啓|書|歌|曲|引|記|箴|七|連珠|弔|祭文|詔|令|誄|序|論|賛|讃|啟|辭|辞|操|對|对|銘|論)"
|
||
|
||
if shìduì_start != -1:
|
||
result["叙事"] = rest_after_narrative[:shìduì_start]
|
||
rest_after_shiduì = rest_after_narrative[shìduì_start + 2 :]
|
||
|
||
shiwen_match = re.search(genre_pattern, rest_after_shiduì)
|
||
|
||
if shiwen_match:
|
||
split_idx = shiwen_match.start() + 1
|
||
result["事对"] = rest_after_shiduì[:split_idx]
|
||
result["诗文"] = rest_after_shiduì[split_idx:]
|
||
else:
|
||
result["事对"] = rest_after_shiduì
|
||
else:
|
||
shiwen_match = re.search(genre_pattern, rest_after_narrative)
|
||
if shiwen_match:
|
||
split_idx = shiwen_match.start() + 1
|
||
result["叙事"] = rest_after_narrative[:split_idx]
|
||
result["诗文"] = rest_after_narrative[split_idx:]
|
||
else:
|
||
result["叙事"] = rest_after_narrative
|
||
|
||
for k in result:
|
||
result[k] = result[k].replace("〈", "(").replace("〉", ")")
|
||
|
||
return result
|
||
|
||
import json
|
||
print(extract_sections("這里是叙事事對這裡是事對〉詩這里是詩文"))
|