fix delete_by_doc_id

This commit is contained in:
zrguo
2025-03-04 13:22:33 +08:00
parent 0f430ca1a7
commit fd9f71e0ee
3 changed files with 42 additions and 8 deletions

View File

@@ -44,6 +44,15 @@ class JsonKVStorage(BaseKVStorage):
) )
write_json(data_dict, self._file_name) write_json(data_dict, self._file_name)
async def get_all(self) -> dict[str, Any]:
"""Get all data from storage
Returns:
Dictionary containing all stored data
"""
async with self._storage_lock:
return dict(self._data)
async def get_by_id(self, id: str) -> dict[str, Any] | None: async def get_by_id(self, id: str) -> dict[str, Any] | None:
async with self._storage_lock: async with self._storage_lock:
return self._data.get(id) return self._data.get(id)

View File

@@ -174,6 +174,14 @@ class TiDBKVStorage(BaseKVStorage):
self.db = None self.db = None
################ QUERY METHODS ################ ################ QUERY METHODS ################
async def get_all(self) -> dict[str, Any]:
"""Get all data from storage
Returns:
Dictionary containing all stored data
"""
async with self._storage_lock:
return dict(self._data)
async def get_by_id(self, id: str) -> dict[str, Any] | None: async def get_by_id(self, id: str) -> dict[str, Any] | None:
"""Fetch doc_full data by id.""" """Fetch doc_full data by id."""

View File

@@ -1431,14 +1431,22 @@ class LightRAG:
logger.debug(f"Starting deletion for document {doc_id}") logger.debug(f"Starting deletion for document {doc_id}")
doc_to_chunk_id = doc_id.replace("doc", "chunk") # 2. Get all chunks related to this document
# Find all chunks where full_doc_id equals the current doc_id
all_chunks = await self.text_chunks.get_all()
related_chunks = {
chunk_id: chunk_data
for chunk_id, chunk_data in all_chunks.items()
if isinstance(chunk_data, dict)
and chunk_data.get("full_doc_id") == doc_id
}
# 2. Get all related chunks if not related_chunks:
chunks = await self.text_chunks.get_by_id(doc_to_chunk_id) logger.warning(f"No chunks found for document {doc_id}")
if not chunks:
return return
chunk_ids = {chunks["full_doc_id"].replace("doc", "chunk")} # Get all related chunk IDs
chunk_ids = set(related_chunks.keys())
logger.debug(f"Found {len(chunk_ids)} chunks to delete") logger.debug(f"Found {len(chunk_ids)} chunks to delete")
# 3. Before deleting, check the related entities and relationships for these chunks # 3. Before deleting, check the related entities and relationships for these chunks
@@ -1626,9 +1634,18 @@ class LightRAG:
logger.warning(f"Document {doc_id} still exists in full_docs") logger.warning(f"Document {doc_id} still exists in full_docs")
# Verify if chunks have been deleted # Verify if chunks have been deleted
remaining_chunks = await self.text_chunks.get_by_id(doc_to_chunk_id) all_remaining_chunks = await self.text_chunks.get_all()
if remaining_chunks: remaining_related_chunks = {
logger.warning(f"Found {len(remaining_chunks)} remaining chunks") chunk_id: chunk_data
for chunk_id, chunk_data in all_remaining_chunks.items()
if isinstance(chunk_data, dict)
and chunk_data.get("full_doc_id") == doc_id
}
if remaining_related_chunks:
logger.warning(
f"Found {len(remaining_related_chunks)} remaining chunks"
)
# Verify entities and relationships # Verify entities and relationships
for chunk_id in chunk_ids: for chunk_id in chunk_ids: