diff --git a/lightrag/utils.py b/lightrag/utils.py index 400cd851..c486f602 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -1449,8 +1449,12 @@ def normalize_extracted_info(name: str, is_entity=False) -> str: name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[\u4e00-\u9fa5])", "", name) # Remove spaces between Chinese and English/numbers/symbols - name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])", "", name) - name = re.sub(r"(?<=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])\s+(?=[\u4e00-\u9fa5])", "", name) + name = re.sub( + r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])", "", name + ) + name = re.sub( + r"(?<=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])\s+(?=[\u4e00-\u9fa5])", "", name + ) # Remove English quotation marks from the beginning and end if len(name) >= 2 and name.startswith('"') and name.endswith('"'):