Fix special chars problem for Postgres

This commit is contained in:
yangdx
2025-04-17 22:58:36 +08:00
parent 14b4bc96ce
commit a3ca134e97
3 changed files with 10 additions and 5 deletions

View File

@@ -1049,10 +1049,10 @@ class PGGraphStorage(BaseGraphStorage):
Returns: Returns:
Normalized node ID suitable for Cypher queries Normalized node ID suitable for Cypher queries
""" """
# Remove quotes
normalized_id = node_id.strip('"')
# Escape backslashes # Escape backslashes
normalized_id = node_id
normalized_id = normalized_id.replace("\\", "\\\\") normalized_id = normalized_id.replace("\\", "\\\\")
normalized_id = normalized_id.replace('"', '\\"')
return normalized_id return normalized_id
async def initialize(self): async def initialize(self):

View File

@@ -157,8 +157,8 @@ async def _handle_single_entity_extraction(
return None return None
# Clean and validate entity name # Clean and validate entity name
entity_name = clean_str(record_attributes[1]).strip('"') entity_name = clean_str(record_attributes[1]).strip()
if not entity_name.strip(): if not entity_name:
logger.warning( logger.warning(
f"Entity extraction error: empty entity name in: {record_attributes}" f"Entity extraction error: empty entity name in: {record_attributes}"
) )

View File

@@ -1385,7 +1385,12 @@ def normalize_extracted_info(name: str, is_entity=False) -> str:
name = re.sub(r"(?<=[a-zA-Z0-9])\s+(?=[\u4e00-\u9fa5])", "", name) name = re.sub(r"(?<=[a-zA-Z0-9])\s+(?=[\u4e00-\u9fa5])", "", name)
# Remove English quotation marks from the beginning and end # Remove English quotation marks from the beginning and end
name = name.strip('"').strip("'") if (
len(name) >= 2
and name.startswith('"')
and name.endswith('"')
):
name = name[1:-1]
if is_entity: if is_entity:
# remove Chinese quotes # remove Chinese quotes