Remove space between chinese chars and Egnlish symbols

This commit is contained in:
yangdx
2025-04-21 19:21:30 +08:00
parent ff65cba544
commit 17f5439952

View File

@@ -1448,9 +1448,9 @@ def normalize_extracted_info(name: str, is_entity=False) -> str:
# (?=[\u4e00-\u9fa5]): Positive lookahead for Chinese character # (?=[\u4e00-\u9fa5]): Positive lookahead for Chinese character
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[\u4e00-\u9fa5])", "", name) name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[\u4e00-\u9fa5])", "", name)
# Remove spaces between Chinese and English/numbers # Remove spaces between Chinese and English/numbers/symbols
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9])", "", name) name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])", "", name)
name = re.sub(r"(?<=[a-zA-Z0-9])\s+(?=[\u4e00-\u9fa5])", "", name) name = re.sub(r"(?<=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])\s+(?=[\u4e00-\u9fa5])", "", name)
# Remove English quotation marks from the beginning and end # Remove English quotation marks from the beginning and end
if len(name) >= 2 and name.startswith('"') and name.endswith('"'): if len(name) >= 2 and name.startswith('"') and name.endswith('"'):