Remove space between chinese chars and Egnlish symbols
This commit is contained in:
@@ -1448,9 +1448,9 @@ def normalize_extracted_info(name: str, is_entity=False) -> str:
|
||||
# (?=[\u4e00-\u9fa5]): Positive lookahead for Chinese character
|
||||
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[\u4e00-\u9fa5])", "", name)
|
||||
|
||||
# Remove spaces between Chinese and English/numbers
|
||||
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9])", "", name)
|
||||
name = re.sub(r"(?<=[a-zA-Z0-9])\s+(?=[\u4e00-\u9fa5])", "", name)
|
||||
# Remove spaces between Chinese and English/numbers/symbols
|
||||
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])", "", name)
|
||||
name = re.sub(r"(?<=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])\s+(?=[\u4e00-\u9fa5])", "", name)
|
||||
|
||||
# Remove English quotation marks from the beginning and end
|
||||
if len(name) >= 2 and name.startswith('"') and name.endswith('"'):
|
||||
|
Reference in New Issue
Block a user