Update delete_by_doc_id

This commit is contained in:
zrguo
2025-03-04 16:36:58 +08:00
parent 1ee6c23a53
commit 3264f6a118

View File

@@ -1555,39 +1555,45 @@ class LightRAG:
await self.text_chunks.delete(chunk_ids) await self.text_chunks.delete(chunk_ids)
# 5. Find and process entities and relationships that have these chunks as source # 5. Find and process entities and relationships that have these chunks as source
# Get all nodes in the graph # Get all nodes and edges from the graph storage using storage-agnostic methods
nodes = self.chunk_entity_relation_graph._graph.nodes(data=True)
edges = self.chunk_entity_relation_graph._graph.edges(data=True)
# Track which entities and relationships need to be deleted or updated
entities_to_delete = set() entities_to_delete = set()
entities_to_update = {} # entity_name -> new_source_id entities_to_update = {} # entity_name -> new_source_id
relationships_to_delete = set() relationships_to_delete = set()
relationships_to_update = {} # (src, tgt) -> new_source_id relationships_to_update = {} # (src, tgt) -> new_source_id
# Process entities # Process entities - use storage-agnostic methods
for node, data in nodes: all_labels = await self.chunk_entity_relation_graph.get_all_labels()
if "source_id" in data: for node_label in all_labels:
node_data = await self.chunk_entity_relation_graph.get_node(node_label)
if node_data and "source_id" in node_data:
# Split source_id using GRAPH_FIELD_SEP # Split source_id using GRAPH_FIELD_SEP
sources = set(data["source_id"].split(GRAPH_FIELD_SEP)) sources = set(node_data["source_id"].split(GRAPH_FIELD_SEP))
sources.difference_update(chunk_ids) sources.difference_update(chunk_ids)
if not sources: if not sources:
entities_to_delete.add(node) entities_to_delete.add(node_label)
logger.debug( logger.debug(
f"Entity {node} marked for deletion - no remaining sources" f"Entity {node_label} marked for deletion - no remaining sources"
) )
else: else:
new_source_id = GRAPH_FIELD_SEP.join(sources) new_source_id = GRAPH_FIELD_SEP.join(sources)
entities_to_update[node] = new_source_id entities_to_update[node_label] = new_source_id
logger.debug( logger.debug(
f"Entity {node} will be updated with new source_id: {new_source_id}" f"Entity {node_label} will be updated with new source_id: {new_source_id}"
) )
# Process relationships # Process relationships
for src, tgt, data in edges: for node_label in all_labels:
if "source_id" in data: node_edges = await self.chunk_entity_relation_graph.get_node_edges(
node_label
)
if node_edges:
for src, tgt in node_edges:
edge_data = await self.chunk_entity_relation_graph.get_edge(
src, tgt
)
if edge_data and "source_id" in edge_data:
# Split source_id using GRAPH_FIELD_SEP # Split source_id using GRAPH_FIELD_SEP
sources = set(data["source_id"].split(GRAPH_FIELD_SEP)) sources = set(edge_data["source_id"].split(GRAPH_FIELD_SEP))
sources.difference_update(chunk_ids) sources.difference_update(chunk_ids)
if not sources: if not sources:
relationships_to_delete.add((src, tgt)) relationships_to_delete.add((src, tgt))
@@ -1613,9 +1619,12 @@ class LightRAG:
# Update entities # Update entities
for entity, new_source_id in entities_to_update.items(): for entity, new_source_id in entities_to_update.items():
node_data = self.chunk_entity_relation_graph._graph.nodes[entity] node_data = await self.chunk_entity_relation_graph.get_node(entity)
if node_data:
node_data["source_id"] = new_source_id node_data["source_id"] = new_source_id
await self.chunk_entity_relation_graph.upsert_node(entity, node_data) await self.chunk_entity_relation_graph.upsert_node(
entity, node_data
)
logger.debug( logger.debug(
f"Updated entity {entity} with new source_id: {new_source_id}" f"Updated entity {entity} with new source_id: {new_source_id}"
) )
@@ -1636,9 +1645,12 @@ class LightRAG:
# Update relationships # Update relationships
for (src, tgt), new_source_id in relationships_to_update.items(): for (src, tgt), new_source_id in relationships_to_update.items():
edge_data = self.chunk_entity_relation_graph._graph.edges[src, tgt] edge_data = await self.chunk_entity_relation_graph.get_edge(src, tgt)
if edge_data:
edge_data["source_id"] = new_source_id edge_data["source_id"] = new_source_id
await self.chunk_entity_relation_graph.upsert_edge(src, tgt, edge_data) await self.chunk_entity_relation_graph.upsert_edge(
src, tgt, edge_data
)
logger.debug( logger.debug(
f"Updated relationship {src}-{tgt} with new source_id: {new_source_id}" f"Updated relationship {src}-{tgt} with new source_id: {new_source_id}"
) )