fix linting errors
This commit is contained in:
@@ -735,7 +735,9 @@ class LightRAG:
|
|||||||
logger.debug(f"Starting deletion for document {doc_id}")
|
logger.debug(f"Starting deletion for document {doc_id}")
|
||||||
|
|
||||||
# 2. Get all related chunks
|
# 2. Get all related chunks
|
||||||
chunks = await self.text_chunks.filter(lambda x: x.get("full_doc_id") == doc_id)
|
chunks = await self.text_chunks.filter(
|
||||||
|
lambda x: x.get("full_doc_id") == doc_id
|
||||||
|
)
|
||||||
chunk_ids = list(chunks.keys())
|
chunk_ids = list(chunks.keys())
|
||||||
logger.debug(f"Found {len(chunk_ids)} chunks to delete")
|
logger.debug(f"Found {len(chunk_ids)} chunks to delete")
|
||||||
|
|
||||||
@@ -743,14 +745,16 @@ class LightRAG:
|
|||||||
for chunk_id in chunk_ids:
|
for chunk_id in chunk_ids:
|
||||||
# Check entities
|
# Check entities
|
||||||
entities = [
|
entities = [
|
||||||
dp for dp in self.entities_vdb.client_storage["data"]
|
dp
|
||||||
|
for dp in self.entities_vdb.client_storage["data"]
|
||||||
if dp.get("source_id") == chunk_id
|
if dp.get("source_id") == chunk_id
|
||||||
]
|
]
|
||||||
logger.debug(f"Chunk {chunk_id} has {len(entities)} related entities")
|
logger.debug(f"Chunk {chunk_id} has {len(entities)} related entities")
|
||||||
|
|
||||||
# Check relationships
|
# Check relationships
|
||||||
relations = [
|
relations = [
|
||||||
dp for dp in self.relationships_vdb.client_storage["data"]
|
dp
|
||||||
|
for dp in self.relationships_vdb.client_storage["data"]
|
||||||
if dp.get("source_id") == chunk_id
|
if dp.get("source_id") == chunk_id
|
||||||
]
|
]
|
||||||
logger.debug(f"Chunk {chunk_id} has {len(relations)} related relations")
|
logger.debug(f"Chunk {chunk_id} has {len(relations)} related relations")
|
||||||
@@ -775,31 +779,39 @@ class LightRAG:
|
|||||||
|
|
||||||
# Process entities
|
# Process entities
|
||||||
for node, data in nodes:
|
for node, data in nodes:
|
||||||
if 'source_id' in data:
|
if "source_id" in data:
|
||||||
# Split source_id using GRAPH_FIELD_SEP
|
# Split source_id using GRAPH_FIELD_SEP
|
||||||
sources = set(data['source_id'].split(GRAPH_FIELD_SEP))
|
sources = set(data["source_id"].split(GRAPH_FIELD_SEP))
|
||||||
sources.difference_update(chunk_ids)
|
sources.difference_update(chunk_ids)
|
||||||
if not sources:
|
if not sources:
|
||||||
entities_to_delete.add(node)
|
entities_to_delete.add(node)
|
||||||
logger.debug(f"Entity {node} marked for deletion - no remaining sources")
|
logger.debug(
|
||||||
|
f"Entity {node} marked for deletion - no remaining sources"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
new_source_id = GRAPH_FIELD_SEP.join(sources)
|
new_source_id = GRAPH_FIELD_SEP.join(sources)
|
||||||
entities_to_update[node] = new_source_id
|
entities_to_update[node] = new_source_id
|
||||||
logger.debug(f"Entity {node} will be updated with new source_id: {new_source_id}")
|
logger.debug(
|
||||||
|
f"Entity {node} will be updated with new source_id: {new_source_id}"
|
||||||
|
)
|
||||||
|
|
||||||
# Process relationships
|
# Process relationships
|
||||||
for src, tgt, data in edges:
|
for src, tgt, data in edges:
|
||||||
if 'source_id' in data:
|
if "source_id" in data:
|
||||||
# Split source_id using GRAPH_FIELD_SEP
|
# Split source_id using GRAPH_FIELD_SEP
|
||||||
sources = set(data['source_id'].split(GRAPH_FIELD_SEP))
|
sources = set(data["source_id"].split(GRAPH_FIELD_SEP))
|
||||||
sources.difference_update(chunk_ids)
|
sources.difference_update(chunk_ids)
|
||||||
if not sources:
|
if not sources:
|
||||||
relationships_to_delete.add((src, tgt))
|
relationships_to_delete.add((src, tgt))
|
||||||
logger.debug(f"Relationship {src}-{tgt} marked for deletion - no remaining sources")
|
logger.debug(
|
||||||
|
f"Relationship {src}-{tgt} marked for deletion - no remaining sources"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
new_source_id = GRAPH_FIELD_SEP.join(sources)
|
new_source_id = GRAPH_FIELD_SEP.join(sources)
|
||||||
relationships_to_update[(src, tgt)] = new_source_id
|
relationships_to_update[(src, tgt)] = new_source_id
|
||||||
logger.debug(f"Relationship {src}-{tgt} will be updated with new source_id: {new_source_id}")
|
logger.debug(
|
||||||
|
f"Relationship {src}-{tgt} will be updated with new source_id: {new_source_id}"
|
||||||
|
)
|
||||||
|
|
||||||
# Delete entities
|
# Delete entities
|
||||||
if entities_to_delete:
|
if entities_to_delete:
|
||||||
@@ -812,9 +824,11 @@ class LightRAG:
|
|||||||
# Update entities
|
# Update entities
|
||||||
for entity, new_source_id in entities_to_update.items():
|
for entity, new_source_id in entities_to_update.items():
|
||||||
node_data = self.chunk_entity_relation_graph._graph.nodes[entity]
|
node_data = self.chunk_entity_relation_graph._graph.nodes[entity]
|
||||||
node_data['source_id'] = new_source_id
|
node_data["source_id"] = new_source_id
|
||||||
await self.chunk_entity_relation_graph.upsert_node(entity, node_data)
|
await self.chunk_entity_relation_graph.upsert_node(entity, node_data)
|
||||||
logger.debug(f"Updated entity {entity} with new source_id: {new_source_id}")
|
logger.debug(
|
||||||
|
f"Updated entity {entity} with new source_id: {new_source_id}"
|
||||||
|
)
|
||||||
|
|
||||||
# Delete relationships
|
# Delete relationships
|
||||||
if relationships_to_delete:
|
if relationships_to_delete:
|
||||||
@@ -823,15 +837,21 @@ class LightRAG:
|
|||||||
rel_id_1 = compute_mdhash_id(tgt + src, prefix="rel-")
|
rel_id_1 = compute_mdhash_id(tgt + src, prefix="rel-")
|
||||||
await self.relationships_vdb.delete([rel_id_0, rel_id_1])
|
await self.relationships_vdb.delete([rel_id_0, rel_id_1])
|
||||||
logger.debug(f"Deleted relationship {src}-{tgt} from vector DB")
|
logger.debug(f"Deleted relationship {src}-{tgt} from vector DB")
|
||||||
self.chunk_entity_relation_graph.remove_edges(list(relationships_to_delete))
|
self.chunk_entity_relation_graph.remove_edges(
|
||||||
logger.debug(f"Deleted {len(relationships_to_delete)} relationships from graph")
|
list(relationships_to_delete)
|
||||||
|
)
|
||||||
|
logger.debug(
|
||||||
|
f"Deleted {len(relationships_to_delete)} relationships from graph"
|
||||||
|
)
|
||||||
|
|
||||||
# Update relationships
|
# Update relationships
|
||||||
for (src, tgt), new_source_id in relationships_to_update.items():
|
for (src, tgt), new_source_id in relationships_to_update.items():
|
||||||
edge_data = self.chunk_entity_relation_graph._graph.edges[src, tgt]
|
edge_data = self.chunk_entity_relation_graph._graph.edges[src, tgt]
|
||||||
edge_data['source_id'] = new_source_id
|
edge_data["source_id"] = new_source_id
|
||||||
await self.chunk_entity_relation_graph.upsert_edge(src, tgt, edge_data)
|
await self.chunk_entity_relation_graph.upsert_edge(src, tgt, edge_data)
|
||||||
logger.debug(f"Updated relationship {src}-{tgt} with new source_id: {new_source_id}")
|
logger.debug(
|
||||||
|
f"Updated relationship {src}-{tgt} with new source_id: {new_source_id}"
|
||||||
|
)
|
||||||
|
|
||||||
# 6. Delete original document and status
|
# 6. Delete original document and status
|
||||||
await self.full_docs.delete([doc_id])
|
await self.full_docs.delete([doc_id])
|
||||||
@@ -863,19 +883,27 @@ class LightRAG:
|
|||||||
for chunk_id in chunk_ids:
|
for chunk_id in chunk_ids:
|
||||||
# Check entities
|
# Check entities
|
||||||
entities_with_chunk = [
|
entities_with_chunk = [
|
||||||
dp for dp in self.entities_vdb.client_storage["data"]
|
dp
|
||||||
if chunk_id in (dp.get("source_id") or "").split(GRAPH_FIELD_SEP)
|
for dp in self.entities_vdb.client_storage["data"]
|
||||||
|
if chunk_id
|
||||||
|
in (dp.get("source_id") or "").split(GRAPH_FIELD_SEP)
|
||||||
]
|
]
|
||||||
if entities_with_chunk:
|
if entities_with_chunk:
|
||||||
logger.error(f"Found {len(entities_with_chunk)} entities still referencing chunk {chunk_id}")
|
logger.error(
|
||||||
|
f"Found {len(entities_with_chunk)} entities still referencing chunk {chunk_id}"
|
||||||
|
)
|
||||||
|
|
||||||
# Check relationships
|
# Check relationships
|
||||||
relations_with_chunk = [
|
relations_with_chunk = [
|
||||||
dp for dp in self.relationships_vdb.client_storage["data"]
|
dp
|
||||||
if chunk_id in (dp.get("source_id") or "").split(GRAPH_FIELD_SEP)
|
for dp in self.relationships_vdb.client_storage["data"]
|
||||||
|
if chunk_id
|
||||||
|
in (dp.get("source_id") or "").split(GRAPH_FIELD_SEP)
|
||||||
]
|
]
|
||||||
if relations_with_chunk:
|
if relations_with_chunk:
|
||||||
logger.error(f"Found {len(relations_with_chunk)} relations still referencing chunk {chunk_id}")
|
logger.error(
|
||||||
|
f"Found {len(relations_with_chunk)} relations still referencing chunk {chunk_id}"
|
||||||
|
)
|
||||||
|
|
||||||
await verify_deletion()
|
await verify_deletion()
|
||||||
|
|
||||||
@@ -886,7 +914,9 @@ class LightRAG:
|
|||||||
"""Synchronous version of adelete"""
|
"""Synchronous version of adelete"""
|
||||||
return asyncio.run(self.adelete_by_doc_id(doc_id))
|
return asyncio.run(self.adelete_by_doc_id(doc_id))
|
||||||
|
|
||||||
async def get_entity_info(self, entity_name: str, include_vector_data: bool = False):
|
async def get_entity_info(
|
||||||
|
self, entity_name: str, include_vector_data: bool = False
|
||||||
|
):
|
||||||
"""Get detailed information of an entity
|
"""Get detailed information of an entity
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -904,7 +934,7 @@ class LightRAG:
|
|||||||
|
|
||||||
# Get information from the graph
|
# Get information from the graph
|
||||||
node_data = await self.chunk_entity_relation_graph.get_node(entity_name)
|
node_data = await self.chunk_entity_relation_graph.get_node(entity_name)
|
||||||
source_id = node_data.get('source_id') if node_data else None
|
source_id = node_data.get("source_id") if node_data else None
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"entity_name": entity_name,
|
"entity_name": entity_name,
|
||||||
@@ -929,12 +959,15 @@ class LightRAG:
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
import tracemalloc
|
import tracemalloc
|
||||||
|
|
||||||
tracemalloc.start()
|
tracemalloc.start()
|
||||||
return asyncio.run(self.get_entity_info(entity_name, include_vector_data))
|
return asyncio.run(self.get_entity_info(entity_name, include_vector_data))
|
||||||
finally:
|
finally:
|
||||||
tracemalloc.stop()
|
tracemalloc.stop()
|
||||||
|
|
||||||
async def get_relation_info(self, src_entity: str, tgt_entity: str, include_vector_data: bool = False):
|
async def get_relation_info(
|
||||||
|
self, src_entity: str, tgt_entity: str, include_vector_data: bool = False
|
||||||
|
):
|
||||||
"""Get detailed information of a relationship
|
"""Get detailed information of a relationship
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -954,8 +987,10 @@ class LightRAG:
|
|||||||
tgt_entity = f'"{tgt_entity.upper()}"'
|
tgt_entity = f'"{tgt_entity.upper()}"'
|
||||||
|
|
||||||
# Get information from the graph
|
# Get information from the graph
|
||||||
edge_data = await self.chunk_entity_relation_graph.get_edge(src_entity, tgt_entity)
|
edge_data = await self.chunk_entity_relation_graph.get_edge(
|
||||||
source_id = edge_data.get('source_id') if edge_data else None
|
src_entity, tgt_entity
|
||||||
|
)
|
||||||
|
source_id = edge_data.get("source_id") if edge_data else None
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"src_entity": src_entity,
|
"src_entity": src_entity,
|
||||||
@@ -972,7 +1007,9 @@ class LightRAG:
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def get_relation_info_sync(self, src_entity: str, tgt_entity: str, include_vector_data: bool = False):
|
def get_relation_info_sync(
|
||||||
|
self, src_entity: str, tgt_entity: str, include_vector_data: bool = False
|
||||||
|
):
|
||||||
"""Synchronous version of getting relationship information
|
"""Synchronous version of getting relationship information
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
@@ -982,8 +1019,10 @@ class LightRAG:
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
import tracemalloc
|
import tracemalloc
|
||||||
|
|
||||||
tracemalloc.start()
|
tracemalloc.start()
|
||||||
return asyncio.run(self.get_relation_info(src_entity, tgt_entity, include_vector_data))
|
return asyncio.run(
|
||||||
|
self.get_relation_info(src_entity, tgt_entity, include_vector_data)
|
||||||
|
)
|
||||||
finally:
|
finally:
|
||||||
tracemalloc.stop()
|
tracemalloc.stop()
|
||||||
|
|
||||||
|
@@ -188,14 +188,18 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
self._client.delete(ids)
|
self._client.delete(ids)
|
||||||
logger.info(f"Successfully deleted {len(ids)} vectors from {self.namespace}")
|
logger.info(
|
||||||
|
f"Successfully deleted {len(ids)} vectors from {self.namespace}"
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
|
logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
|
||||||
|
|
||||||
async def delete_entity(self, entity_name: str):
|
async def delete_entity(self, entity_name: str):
|
||||||
try:
|
try:
|
||||||
entity_id = compute_mdhash_id(entity_name, prefix="ent-")
|
entity_id = compute_mdhash_id(entity_name, prefix="ent-")
|
||||||
logger.debug(f"Attempting to delete entity {entity_name} with ID {entity_id}")
|
logger.debug(
|
||||||
|
f"Attempting to delete entity {entity_name} with ID {entity_id}"
|
||||||
|
)
|
||||||
# Check if the entity exists
|
# Check if the entity exists
|
||||||
if self._client.get([entity_id]):
|
if self._client.get([entity_id]):
|
||||||
await self.delete([entity_id])
|
await self.delete([entity_id])
|
||||||
@@ -208,7 +212,8 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|||||||
async def delete_entity_relation(self, entity_name: str):
|
async def delete_entity_relation(self, entity_name: str):
|
||||||
try:
|
try:
|
||||||
relations = [
|
relations = [
|
||||||
dp for dp in self.client_storage["data"]
|
dp
|
||||||
|
for dp in self.client_storage["data"]
|
||||||
if dp["src_id"] == entity_name or dp["tgt_id"] == entity_name
|
if dp["src_id"] == entity_name or dp["tgt_id"] == entity_name
|
||||||
]
|
]
|
||||||
logger.debug(f"Found {len(relations)} relations for entity {entity_name}")
|
logger.debug(f"Found {len(relations)} relations for entity {entity_name}")
|
||||||
@@ -216,7 +221,9 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|||||||
|
|
||||||
if ids_to_delete:
|
if ids_to_delete:
|
||||||
await self.delete(ids_to_delete)
|
await self.delete(ids_to_delete)
|
||||||
logger.debug(f"Deleted {len(ids_to_delete)} relations for {entity_name}")
|
logger.debug(
|
||||||
|
f"Deleted {len(ids_to_delete)} relations for {entity_name}"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
logger.debug(f"No relations found for entity {entity_name}")
|
logger.debug(f"No relations found for entity {entity_name}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
Reference in New Issue
Block a user