Remove space between chinese chars and Egnlish symbols
This commit is contained in:
@@ -1448,9 +1448,9 @@ def normalize_extracted_info(name: str, is_entity=False) -> str:
|
|||||||
# (?=[\u4e00-\u9fa5]): Positive lookahead for Chinese character
|
# (?=[\u4e00-\u9fa5]): Positive lookahead for Chinese character
|
||||||
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[\u4e00-\u9fa5])", "", name)
|
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[\u4e00-\u9fa5])", "", name)
|
||||||
|
|
||||||
# Remove spaces between Chinese and English/numbers
|
# Remove spaces between Chinese and English/numbers/symbols
|
||||||
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9])", "", name)
|
name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])", "", name)
|
||||||
name = re.sub(r"(?<=[a-zA-Z0-9])\s+(?=[\u4e00-\u9fa5])", "", name)
|
name = re.sub(r"(?<=[a-zA-Z0-9\(\)\[\]@#$%!&\*\-=+_])\s+(?=[\u4e00-\u9fa5])", "", name)
|
||||||
|
|
||||||
# Remove English quotation marks from the beginning and end
|
# Remove English quotation marks from the beginning and end
|
||||||
if len(name) >= 2 and name.startswith('"') and name.endswith('"'):
|
if len(name) >= 2 and name.startswith('"') and name.endswith('"'):
|
||||||
|
Reference in New Issue
Block a user