Update delete_by_doc_id
This commit is contained in:
@@ -1555,39 +1555,45 @@ class LightRAG:
|
|||||||
await self.text_chunks.delete(chunk_ids)
|
await self.text_chunks.delete(chunk_ids)
|
||||||
|
|
||||||
# 5. Find and process entities and relationships that have these chunks as source
|
# 5. Find and process entities and relationships that have these chunks as source
|
||||||
# Get all nodes in the graph
|
# Get all nodes and edges from the graph storage using storage-agnostic methods
|
||||||
nodes = self.chunk_entity_relation_graph._graph.nodes(data=True)
|
|
||||||
edges = self.chunk_entity_relation_graph._graph.edges(data=True)
|
|
||||||
|
|
||||||
# Track which entities and relationships need to be deleted or updated
|
|
||||||
entities_to_delete = set()
|
entities_to_delete = set()
|
||||||
entities_to_update = {} # entity_name -> new_source_id
|
entities_to_update = {} # entity_name -> new_source_id
|
||||||
relationships_to_delete = set()
|
relationships_to_delete = set()
|
||||||
relationships_to_update = {} # (src, tgt) -> new_source_id
|
relationships_to_update = {} # (src, tgt) -> new_source_id
|
||||||
|
|
||||||
# Process entities
|
# Process entities - use storage-agnostic methods
|
||||||
for node, data in nodes:
|
all_labels = await self.chunk_entity_relation_graph.get_all_labels()
|
||||||
if "source_id" in data:
|
for node_label in all_labels:
|
||||||
|
node_data = await self.chunk_entity_relation_graph.get_node(node_label)
|
||||||
|
if node_data and "source_id" in node_data:
|
||||||
# Split source_id using GRAPH_FIELD_SEP
|
# Split source_id using GRAPH_FIELD_SEP
|
||||||
sources = set(data["source_id"].split(GRAPH_FIELD_SEP))
|
sources = set(node_data["source_id"].split(GRAPH_FIELD_SEP))
|
||||||
sources.difference_update(chunk_ids)
|
sources.difference_update(chunk_ids)
|
||||||
if not sources:
|
if not sources:
|
||||||
entities_to_delete.add(node)
|
entities_to_delete.add(node_label)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Entity {node} marked for deletion - no remaining sources"
|
f"Entity {node_label} marked for deletion - no remaining sources"
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
new_source_id = GRAPH_FIELD_SEP.join(sources)
|
new_source_id = GRAPH_FIELD_SEP.join(sources)
|
||||||
entities_to_update[node] = new_source_id
|
entities_to_update[node_label] = new_source_id
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Entity {node} will be updated with new source_id: {new_source_id}"
|
f"Entity {node_label} will be updated with new source_id: {new_source_id}"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process relationships
|
# Process relationships
|
||||||
for src, tgt, data in edges:
|
for node_label in all_labels:
|
||||||
if "source_id" in data:
|
node_edges = await self.chunk_entity_relation_graph.get_node_edges(
|
||||||
|
node_label
|
||||||
|
)
|
||||||
|
if node_edges:
|
||||||
|
for src, tgt in node_edges:
|
||||||
|
edge_data = await self.chunk_entity_relation_graph.get_edge(
|
||||||
|
src, tgt
|
||||||
|
)
|
||||||
|
if edge_data and "source_id" in edge_data:
|
||||||
# Split source_id using GRAPH_FIELD_SEP
|
# Split source_id using GRAPH_FIELD_SEP
|
||||||
sources = set(data["source_id"].split(GRAPH_FIELD_SEP))
|
sources = set(edge_data["source_id"].split(GRAPH_FIELD_SEP))
|
||||||
sources.difference_update(chunk_ids)
|
sources.difference_update(chunk_ids)
|
||||||
if not sources:
|
if not sources:
|
||||||
relationships_to_delete.add((src, tgt))
|
relationships_to_delete.add((src, tgt))
|
||||||
@@ -1613,9 +1619,12 @@ class LightRAG:
|
|||||||
|
|
||||||
# Update entities
|
# Update entities
|
||||||
for entity, new_source_id in entities_to_update.items():
|
for entity, new_source_id in entities_to_update.items():
|
||||||
node_data = self.chunk_entity_relation_graph._graph.nodes[entity]
|
node_data = await self.chunk_entity_relation_graph.get_node(entity)
|
||||||
|
if node_data:
|
||||||
node_data["source_id"] = new_source_id
|
node_data["source_id"] = new_source_id
|
||||||
await self.chunk_entity_relation_graph.upsert_node(entity, node_data)
|
await self.chunk_entity_relation_graph.upsert_node(
|
||||||
|
entity, node_data
|
||||||
|
)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Updated entity {entity} with new source_id: {new_source_id}"
|
f"Updated entity {entity} with new source_id: {new_source_id}"
|
||||||
)
|
)
|
||||||
@@ -1636,9 +1645,12 @@ class LightRAG:
|
|||||||
|
|
||||||
# Update relationships
|
# Update relationships
|
||||||
for (src, tgt), new_source_id in relationships_to_update.items():
|
for (src, tgt), new_source_id in relationships_to_update.items():
|
||||||
edge_data = self.chunk_entity_relation_graph._graph.edges[src, tgt]
|
edge_data = await self.chunk_entity_relation_graph.get_edge(src, tgt)
|
||||||
|
if edge_data:
|
||||||
edge_data["source_id"] = new_source_id
|
edge_data["source_id"] = new_source_id
|
||||||
await self.chunk_entity_relation_graph.upsert_edge(src, tgt, edge_data)
|
await self.chunk_entity_relation_graph.upsert_edge(
|
||||||
|
src, tgt, edge_data
|
||||||
|
)
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Updated relationship {src}-{tgt} with new source_id: {new_source_id}"
|
f"Updated relationship {src}-{tgt} with new source_id: {new_source_id}"
|
||||||
)
|
)
|
||||||
|
Reference in New Issue
Block a user