diff --git a/lightrag/utils.py b/lightrag/utils.py index c6991629..400cd851 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -1448,9 +1448,9 @@ def normalize_extracted_info(name: str, is_entity=False) -> str: # (?=[\u4e00-\u9fa5]): Positive lookahead for Chinese character name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[\u4e00-\u9fa5])", "", name) - # Remove spaces between Chinese and English/numbers - name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9])", "", name) - name = re.sub(r"(?<=[a-zA-Z0-9])\s+(?=[\u4e00-\u9fa5])", "", name) + # Remove spaces between Chinese and English/numbers/symbols + name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])", "", name) + name = re.sub(r"(?<=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])\s+(?=[\u4e00-\u9fa5])", "", name) # Remove English quotation marks from the beginning and end if len(name) >= 2 and name.startswith('"') and name.endswith('"'):