import re text = "對語〈渭北 江東〉〈 平北 安東〉摘句〈力障百川東〉" tokens = re.findall(r"([^〈〉]*)((?:〈[^〉]+〉)+)", text) for i, (word, desc_blocks) in enumerate(tokens): print(f"Token {i}: WORD='{word}' DESCS={desc_blocks}")