Fix edit entity and relation bugs
This commit is contained in:
@@ -229,3 +229,43 @@ class ChromaVectorDBStorage(BaseVectorStorage):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
|
logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
||||||
|
"""Search for records with IDs starting with a specific prefix.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prefix: The prefix to search for in record IDs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of records with matching ID prefixes
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Get all records from the collection
|
||||||
|
# Since ChromaDB doesn't directly support prefix search on IDs,
|
||||||
|
# we'll get all records and filter in Python
|
||||||
|
results = self._collection.get(
|
||||||
|
include=["metadatas", "documents", "embeddings"]
|
||||||
|
)
|
||||||
|
|
||||||
|
matching_records = []
|
||||||
|
|
||||||
|
# Filter records where ID starts with the prefix
|
||||||
|
for i, record_id in enumerate(results["ids"]):
|
||||||
|
if record_id.startswith(prefix):
|
||||||
|
matching_records.append(
|
||||||
|
{
|
||||||
|
"id": record_id,
|
||||||
|
"content": results["documents"][i],
|
||||||
|
"vector": results["embeddings"][i],
|
||||||
|
**results["metadatas"][i],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Found {len(matching_records)} records with prefix '{prefix}'"
|
||||||
|
)
|
||||||
|
return matching_records
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during prefix search in ChromaDB: {str(e)}")
|
||||||
|
raise
|
||||||
|
@@ -371,3 +371,24 @@ class FaissVectorDBStorage(BaseVectorStorage):
|
|||||||
return False # Return error
|
return False # Return error
|
||||||
|
|
||||||
return True # Return success
|
return True # Return success
|
||||||
|
|
||||||
|
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
||||||
|
"""Search for records with IDs starting with a specific prefix.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prefix: The prefix to search for in record IDs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of records with matching ID prefixes
|
||||||
|
"""
|
||||||
|
matching_records = []
|
||||||
|
|
||||||
|
# Search for records with IDs starting with the prefix
|
||||||
|
for faiss_id, meta in self._id_to_meta.items():
|
||||||
|
if "__id__" in meta and meta["__id__"].startswith(prefix):
|
||||||
|
# Create a copy of all metadata and add "id" field
|
||||||
|
record = {**meta, "id": meta["__id__"]}
|
||||||
|
matching_records.append(record)
|
||||||
|
|
||||||
|
logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
|
||||||
|
return matching_records
|
||||||
|
@@ -206,3 +206,28 @@ class MilvusVectorDBStorage(BaseVectorStorage):
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
|
logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
|
||||||
|
|
||||||
|
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
||||||
|
"""Search for records with IDs starting with a specific prefix.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prefix: The prefix to search for in record IDs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of records with matching ID prefixes
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Use Milvus query with expression to find IDs with the given prefix
|
||||||
|
expression = f'id like "{prefix}%"'
|
||||||
|
results = self._client.query(
|
||||||
|
collection_name=self.namespace,
|
||||||
|
filter=expression,
|
||||||
|
output_fields=list(self.meta_fields) + ["id"],
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(f"Found {len(results)} records with prefix '{prefix}'")
|
||||||
|
return results
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error searching for records with prefix '{prefix}': {e}")
|
||||||
|
return []
|
||||||
|
@@ -1045,6 +1045,32 @@ class MongoVectorDBStorage(BaseVectorStorage):
|
|||||||
except PyMongoError as e:
|
except PyMongoError as e:
|
||||||
logger.error(f"Error deleting relations for {entity_name}: {str(e)}")
|
logger.error(f"Error deleting relations for {entity_name}: {str(e)}")
|
||||||
|
|
||||||
|
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
||||||
|
"""Search for records with IDs starting with a specific prefix.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prefix: The prefix to search for in record IDs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of records with matching ID prefixes
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Use MongoDB regex to find documents where _id starts with the prefix
|
||||||
|
cursor = self._data.find({"_id": {"$regex": f"^{prefix}"}})
|
||||||
|
matching_records = await cursor.to_list(length=None)
|
||||||
|
|
||||||
|
# Format results
|
||||||
|
results = [{**doc, "id": doc["_id"]} for doc in matching_records]
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Found {len(results)} records with prefix '{prefix}' in {self.namespace}"
|
||||||
|
)
|
||||||
|
return results
|
||||||
|
|
||||||
|
except PyMongoError as e:
|
||||||
|
logger.error(f"Error searching by prefix in {self.namespace}: {str(e)}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
async def get_or_create_collection(db: AsyncIOMotorDatabase, collection_name: str):
|
async def get_or_create_collection(db: AsyncIOMotorDatabase, collection_name: str):
|
||||||
collection_names = await db.list_collection_names()
|
collection_names = await db.list_collection_names()
|
||||||
|
@@ -236,3 +236,23 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|||||||
return False # Return error
|
return False # Return error
|
||||||
|
|
||||||
return True # Return success
|
return True # Return success
|
||||||
|
|
||||||
|
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
||||||
|
"""Search for records with IDs starting with a specific prefix.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prefix: The prefix to search for in record IDs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of records with matching ID prefixes
|
||||||
|
"""
|
||||||
|
storage = await self.client_storage
|
||||||
|
matching_records = []
|
||||||
|
|
||||||
|
# Search for records with IDs starting with the prefix
|
||||||
|
for record in storage["data"]:
|
||||||
|
if "__id__" in record and record["__id__"].startswith(prefix):
|
||||||
|
matching_records.append({**record, "id": record["__id__"]})
|
||||||
|
|
||||||
|
logger.debug(f"Found {len(matching_records)} records with prefix '{prefix}'")
|
||||||
|
return matching_records
|
||||||
|
@@ -494,6 +494,41 @@ class OracleVectorDBStorage(BaseVectorStorage):
|
|||||||
logger.error(f"Error deleting relations for entity {entity_name}: {e}")
|
logger.error(f"Error deleting relations for entity {entity_name}: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
||||||
|
"""Search for records with IDs starting with a specific prefix.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prefix: The prefix to search for in record IDs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of records with matching ID prefixes
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Determine the appropriate table based on namespace
|
||||||
|
table_name = namespace_to_table_name(self.namespace)
|
||||||
|
|
||||||
|
# Create SQL query to find records with IDs starting with prefix
|
||||||
|
search_sql = f"""
|
||||||
|
SELECT * FROM {table_name}
|
||||||
|
WHERE workspace = :workspace
|
||||||
|
AND id LIKE :prefix_pattern
|
||||||
|
ORDER BY id
|
||||||
|
"""
|
||||||
|
|
||||||
|
params = {"workspace": self.db.workspace, "prefix_pattern": f"{prefix}%"}
|
||||||
|
|
||||||
|
# Execute query and get results
|
||||||
|
results = await self.db.query(search_sql, params, multirows=True)
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Found {len(results) if results else 0} records with prefix '{prefix}'"
|
||||||
|
)
|
||||||
|
return results or []
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error searching records with prefix '{prefix}': {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
@final
|
@final
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@@ -575,6 +575,41 @@ class PGVectorStorage(BaseVectorStorage):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error deleting relations for entity {entity_name}: {e}")
|
logger.error(f"Error deleting relations for entity {entity_name}: {e}")
|
||||||
|
|
||||||
|
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
||||||
|
"""Search for records with IDs starting with a specific prefix.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prefix: The prefix to search for in record IDs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of records with matching ID prefixes
|
||||||
|
"""
|
||||||
|
table_name = namespace_to_table_name(self.namespace)
|
||||||
|
if not table_name:
|
||||||
|
logger.error(f"Unknown namespace for prefix search: {self.namespace}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
search_sql = f"SELECT * FROM {table_name} WHERE workspace=$1 AND id LIKE $2"
|
||||||
|
params = {"workspace": self.db.workspace, "prefix": f"{prefix}%"}
|
||||||
|
|
||||||
|
try:
|
||||||
|
results = await self.db.query(search_sql, params, multirows=True)
|
||||||
|
logger.debug(f"Found {len(results)} records with prefix '{prefix}'")
|
||||||
|
|
||||||
|
# Format results to match the expected return format
|
||||||
|
formatted_results = []
|
||||||
|
for record in results:
|
||||||
|
formatted_record = dict(record)
|
||||||
|
# Ensure id field is available (for consistency with NanoVectorDB implementation)
|
||||||
|
if "id" not in formatted_record:
|
||||||
|
formatted_record["id"] = record["id"]
|
||||||
|
formatted_results.append(formatted_record)
|
||||||
|
|
||||||
|
return formatted_results
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during prefix search for '{prefix}': {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
@final
|
@final
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@@ -233,3 +233,45 @@ class QdrantVectorDBStorage(BaseVectorStorage):
|
|||||||
logger.debug(f"No relations found for entity {entity_name}")
|
logger.debug(f"No relations found for entity {entity_name}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error deleting relations for {entity_name}: {e}")
|
logger.error(f"Error deleting relations for {entity_name}: {e}")
|
||||||
|
|
||||||
|
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
||||||
|
"""Search for records with IDs starting with a specific prefix.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prefix: The prefix to search for in record IDs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of records with matching ID prefixes
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Use scroll method to find records with IDs starting with the prefix
|
||||||
|
results = self._client.scroll(
|
||||||
|
collection_name=self.namespace,
|
||||||
|
scroll_filter=models.Filter(
|
||||||
|
must=[
|
||||||
|
models.FieldCondition(
|
||||||
|
key="id", match=models.MatchText(text=prefix, prefix=True)
|
||||||
|
)
|
||||||
|
]
|
||||||
|
),
|
||||||
|
with_payload=True,
|
||||||
|
with_vectors=False,
|
||||||
|
limit=1000, # Adjust as needed for your use case
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract matching points
|
||||||
|
matching_records = results[0]
|
||||||
|
|
||||||
|
# Format the results to match expected return format
|
||||||
|
formatted_results = [
|
||||||
|
{**point.payload, "id": point.id} for point in matching_records
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Found {len(formatted_results)} records with prefix '{prefix}'"
|
||||||
|
)
|
||||||
|
return formatted_results
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error searching for prefix '{prefix}': {e}")
|
||||||
|
return []
|
||||||
|
@@ -414,6 +414,55 @@ class TiDBVectorDBStorage(BaseVectorStorage):
|
|||||||
# Ti handles persistence automatically
|
# Ti handles persistence automatically
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
async def search_by_prefix(self, prefix: str) -> list[dict[str, Any]]:
|
||||||
|
"""Search for records with IDs starting with a specific prefix.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prefix: The prefix to search for in record IDs
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of records with matching ID prefixes
|
||||||
|
"""
|
||||||
|
# Determine which table to query based on namespace
|
||||||
|
if self.namespace == NameSpace.VECTOR_STORE_ENTITIES:
|
||||||
|
sql_template = """
|
||||||
|
SELECT entity_id as id, name as entity_name, entity_type, description, content
|
||||||
|
FROM LIGHTRAG_GRAPH_NODES
|
||||||
|
WHERE entity_id LIKE :prefix_pattern AND workspace = :workspace
|
||||||
|
"""
|
||||||
|
elif self.namespace == NameSpace.VECTOR_STORE_RELATIONSHIPS:
|
||||||
|
sql_template = """
|
||||||
|
SELECT relation_id as id, source_name as src_id, target_name as tgt_id,
|
||||||
|
keywords, description, content
|
||||||
|
FROM LIGHTRAG_GRAPH_EDGES
|
||||||
|
WHERE relation_id LIKE :prefix_pattern AND workspace = :workspace
|
||||||
|
"""
|
||||||
|
elif self.namespace == NameSpace.VECTOR_STORE_CHUNKS:
|
||||||
|
sql_template = """
|
||||||
|
SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id
|
||||||
|
FROM LIGHTRAG_DOC_CHUNKS
|
||||||
|
WHERE chunk_id LIKE :prefix_pattern AND workspace = :workspace
|
||||||
|
"""
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"Namespace {self.namespace} not supported for prefix search"
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Add prefix pattern parameter with % for SQL LIKE
|
||||||
|
prefix_pattern = f"{prefix}%"
|
||||||
|
params = {"prefix_pattern": prefix_pattern, "workspace": self.db.workspace}
|
||||||
|
|
||||||
|
try:
|
||||||
|
results = await self.db.query(sql_template, params=params, multirows=True)
|
||||||
|
logger.debug(
|
||||||
|
f"Found {len(results) if results else 0} records with prefix '{prefix}'"
|
||||||
|
)
|
||||||
|
return results if results else []
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error searching records with prefix '{prefix}': {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
@final
|
@final
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -968,4 +1017,20 @@ SQL_TEMPLATES = {
|
|||||||
WHERE (source_name = :source AND target_name = :target)
|
WHERE (source_name = :source AND target_name = :target)
|
||||||
AND workspace = :workspace
|
AND workspace = :workspace
|
||||||
""",
|
""",
|
||||||
|
# Search by prefix SQL templates
|
||||||
|
"search_entity_by_prefix": """
|
||||||
|
SELECT entity_id as id, name as entity_name, entity_type, description, content
|
||||||
|
FROM LIGHTRAG_GRAPH_NODES
|
||||||
|
WHERE entity_id LIKE :prefix_pattern AND workspace = :workspace
|
||||||
|
""",
|
||||||
|
"search_relationship_by_prefix": """
|
||||||
|
SELECT relation_id as id, source_name as src_id, target_name as tgt_id, keywords, description, content
|
||||||
|
FROM LIGHTRAG_GRAPH_EDGES
|
||||||
|
WHERE relation_id LIKE :prefix_pattern AND workspace = :workspace
|
||||||
|
""",
|
||||||
|
"search_chunk_by_prefix": """
|
||||||
|
SELECT chunk_id as id, content, tokens, chunk_order_index, full_doc_id
|
||||||
|
FROM LIGHTRAG_DOC_CHUNKS
|
||||||
|
WHERE chunk_id LIKE :prefix_pattern AND workspace = :workspace
|
||||||
|
""",
|
||||||
}
|
}
|
||||||
|
@@ -2044,6 +2044,9 @@ class LightRAG:
|
|||||||
# Delete old entity record from vector database
|
# Delete old entity record from vector database
|
||||||
old_entity_id = compute_mdhash_id(entity_name, prefix="ent-")
|
old_entity_id = compute_mdhash_id(entity_name, prefix="ent-")
|
||||||
await self.entities_vdb.delete([old_entity_id])
|
await self.entities_vdb.delete([old_entity_id])
|
||||||
|
logger.info(
|
||||||
|
f"Deleted old entity '{entity_name}' and its vector embedding from database"
|
||||||
|
)
|
||||||
|
|
||||||
# Update relationship vector representations
|
# Update relationship vector representations
|
||||||
for src, tgt, edge_data in relations_to_update:
|
for src, tgt, edge_data in relations_to_update:
|
||||||
@@ -2171,6 +2174,15 @@ class LightRAG:
|
|||||||
f"Relation from '{source_entity}' to '{target_entity}' does not exist"
|
f"Relation from '{source_entity}' to '{target_entity}' does not exist"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Important: First delete the old relation record from the vector database
|
||||||
|
old_relation_id = compute_mdhash_id(
|
||||||
|
source_entity + target_entity, prefix="rel-"
|
||||||
|
)
|
||||||
|
await self.relationships_vdb.delete([old_relation_id])
|
||||||
|
logger.info(
|
||||||
|
f"Deleted old relation record from vector database for relation {source_entity} -> {target_entity}"
|
||||||
|
)
|
||||||
|
|
||||||
# 2. Update relation information in the graph
|
# 2. Update relation information in the graph
|
||||||
new_edge_data = {**edge_data, **updated_data}
|
new_edge_data = {**edge_data, **updated_data}
|
||||||
await self.chunk_entity_relation_graph.upsert_edge(
|
await self.chunk_entity_relation_graph.upsert_edge(
|
||||||
@@ -2669,12 +2681,29 @@ class LightRAG:
|
|||||||
|
|
||||||
# 9. Delete source entities
|
# 9. Delete source entities
|
||||||
for entity_name in source_entities:
|
for entity_name in source_entities:
|
||||||
# Delete entity node
|
# Delete entity node from knowledge graph
|
||||||
await self.chunk_entity_relation_graph.delete_node(entity_name)
|
await self.chunk_entity_relation_graph.delete_node(entity_name)
|
||||||
# Delete record from vector database
|
|
||||||
|
# Delete entity record from vector database
|
||||||
entity_id = compute_mdhash_id(entity_name, prefix="ent-")
|
entity_id = compute_mdhash_id(entity_name, prefix="ent-")
|
||||||
await self.entities_vdb.delete([entity_id])
|
await self.entities_vdb.delete([entity_id])
|
||||||
logger.info(f"Deleted source entity '{entity_name}'")
|
|
||||||
|
# Also ensure any relationships specific to this entity are deleted from vector DB
|
||||||
|
# This is a safety check, as these should have been transformed to the target entity already
|
||||||
|
entity_relation_prefix = compute_mdhash_id(entity_name, prefix="rel-")
|
||||||
|
relations_with_entity = await self.relationships_vdb.search_by_prefix(
|
||||||
|
entity_relation_prefix
|
||||||
|
)
|
||||||
|
if relations_with_entity:
|
||||||
|
relation_ids = [r["id"] for r in relations_with_entity]
|
||||||
|
await self.relationships_vdb.delete(relation_ids)
|
||||||
|
logger.info(
|
||||||
|
f"Deleted {len(relation_ids)} relation records for entity '{entity_name}' from vector database"
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Deleted source entity '{entity_name}' and its vector embedding from database"
|
||||||
|
)
|
||||||
|
|
||||||
# 10. Save changes
|
# 10. Save changes
|
||||||
await self._merge_entities_done()
|
await self._merge_entities_done()
|
||||||
|
Reference in New Issue
Block a user