fix delete_by_doc_id

This commit is contained in:
zrguo
2025-03-03 19:17:34 +08:00
parent ef2a5ad191
commit a27cf5a09e

View File

@@ -395,6 +395,7 @@ class LightRAG:
namespace=make_namespace( namespace=make_namespace(
self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE self.namespace_prefix, NameSpace.KV_STORE_LLM_RESPONSE_CACHE
), ),
global_config=asdict(self),
embedding_func=self.embedding_func, embedding_func=self.embedding_func,
) )
@@ -1421,17 +1422,19 @@ class LightRAG:
# 3. Before deleting, check the related entities and relationships for these chunks # 3. Before deleting, check the related entities and relationships for these chunks
for chunk_id in chunk_ids: for chunk_id in chunk_ids:
# Check entities # Check entities
entities_storage = await self.entities_vdb.client_storage
entities = [ entities = [
dp dp
for dp in self.entities_vdb.client_storage["data"] for dp in entities_storage["data"]
if chunk_id in dp.get("source_id") if chunk_id in dp.get("source_id")
] ]
logger.debug(f"Chunk {chunk_id} has {len(entities)} related entities") logger.debug(f"Chunk {chunk_id} has {len(entities)} related entities")
# Check relationships # Check relationships
relationships_storage = await self.relationships_vdb.client_storage
relations = [ relations = [
dp dp
for dp in self.relationships_vdb.client_storage["data"] for dp in relationships_storage["data"]
if chunk_id in dp.get("source_id") if chunk_id in dp.get("source_id")
] ]
logger.debug(f"Chunk {chunk_id} has {len(relations)} related relations") logger.debug(f"Chunk {chunk_id} has {len(relations)} related relations")
@@ -1495,7 +1498,9 @@ class LightRAG:
for entity in entities_to_delete: for entity in entities_to_delete:
await self.entities_vdb.delete_entity(entity) await self.entities_vdb.delete_entity(entity)
logger.debug(f"Deleted entity {entity} from vector DB") logger.debug(f"Deleted entity {entity} from vector DB")
self.chunk_entity_relation_graph.remove_nodes(list(entities_to_delete)) await self.chunk_entity_relation_graph.remove_nodes(
list(entities_to_delete)
)
logger.debug(f"Deleted {len(entities_to_delete)} entities from graph") logger.debug(f"Deleted {len(entities_to_delete)} entities from graph")
# Update entities # Update entities
@@ -1514,7 +1519,7 @@ class LightRAG:
rel_id_1 = compute_mdhash_id(tgt + src, prefix="rel-") rel_id_1 = compute_mdhash_id(tgt + src, prefix="rel-")
await self.relationships_vdb.delete([rel_id_0, rel_id_1]) await self.relationships_vdb.delete([rel_id_0, rel_id_1])
logger.debug(f"Deleted relationship {src}-{tgt} from vector DB") logger.debug(f"Deleted relationship {src}-{tgt} from vector DB")
self.chunk_entity_relation_graph.remove_edges( await self.chunk_entity_relation_graph.remove_edges(
list(relationships_to_delete) list(relationships_to_delete)
) )
logger.debug( logger.debug(
@@ -1545,9 +1550,10 @@ class LightRAG:
async def process_data(data_type, vdb, chunk_id): async def process_data(data_type, vdb, chunk_id):
# Check data (entities or relationships) # Check data (entities or relationships)
storage = await vdb.client_storage
data_with_chunk = [ data_with_chunk = [
dp dp
for dp in vdb.client_storage["data"] for dp in storage["data"]
if chunk_id in (dp.get("source_id") or "").split(GRAPH_FIELD_SEP) if chunk_id in (dp.get("source_id") or "").split(GRAPH_FIELD_SEP)
] ]