cleanup code
This commit is contained in:
@@ -629,12 +629,7 @@ class LightRAG:
|
||||
# 4. Store original document
|
||||
for doc_id, doc in new_docs.items():
|
||||
await self.full_docs.upsert(
|
||||
{
|
||||
doc_id: {
|
||||
"content": doc["content"],
|
||||
"status": DocStatus.PENDING
|
||||
}
|
||||
}
|
||||
{doc_id: {"content": doc["content"], "status": DocStatus.PENDING}}
|
||||
)
|
||||
logger.info(f"Stored {len(new_docs)} new unique documents")
|
||||
|
||||
@@ -642,10 +637,14 @@ class LightRAG:
|
||||
"""Get pendding documents, split into chunks,insert chunks"""
|
||||
# 1. get all pending and failed documents
|
||||
_todo_doc_keys = []
|
||||
|
||||
_failed_doc = await self.full_docs.get_by_status_and_ids(status=DocStatus.FAILED)
|
||||
_pendding_doc = await self.full_docs.get_by_status_and_ids(status=DocStatus.PENDING)
|
||||
|
||||
|
||||
_failed_doc = await self.full_docs.get_by_status_and_ids(
|
||||
status=DocStatus.FAILED
|
||||
)
|
||||
_pendding_doc = await self.full_docs.get_by_status_and_ids(
|
||||
status=DocStatus.PENDING
|
||||
)
|
||||
|
||||
if _failed_doc:
|
||||
_todo_doc_keys.extend([doc["id"] for doc in _failed_doc])
|
||||
if _pendding_doc:
|
||||
@@ -685,15 +684,19 @@ class LightRAG:
|
||||
)
|
||||
}
|
||||
chunk_cnt += len(chunks)
|
||||
|
||||
|
||||
try:
|
||||
# Store chunks in vector database
|
||||
await self.chunks_vdb.upsert(chunks)
|
||||
# Update doc status
|
||||
await self.text_chunks.upsert({**chunks, "status": DocStatus.PENDING})
|
||||
await self.text_chunks.upsert(
|
||||
{**chunks, "status": DocStatus.PENDING}
|
||||
)
|
||||
except Exception as e:
|
||||
# Mark as failed if any step fails
|
||||
await self.text_chunks.upsert({**chunks, "status": DocStatus.FAILED})
|
||||
await self.text_chunks.upsert(
|
||||
{**chunks, "status": DocStatus.FAILED}
|
||||
)
|
||||
raise e
|
||||
except Exception as e:
|
||||
import traceback
|
||||
@@ -707,8 +710,12 @@ class LightRAG:
|
||||
"""Get pendding or failed chunks, extract entities and relationships from each chunk"""
|
||||
# 1. get all pending and failed chunks
|
||||
_todo_chunk_keys = []
|
||||
_failed_chunks = await self.text_chunks.get_by_status_and_ids(status=DocStatus.FAILED)
|
||||
_pendding_chunks = await self.text_chunks.get_by_status_and_ids(status=DocStatus.PENDING)
|
||||
_failed_chunks = await self.text_chunks.get_by_status_and_ids(
|
||||
status=DocStatus.FAILED
|
||||
)
|
||||
_pendding_chunks = await self.text_chunks.get_by_status_and_ids(
|
||||
status=DocStatus.PENDING
|
||||
)
|
||||
if _failed_chunks:
|
||||
_todo_chunk_keys.extend([doc["id"] for doc in _failed_chunks])
|
||||
if _pendding_chunks:
|
||||
@@ -742,11 +749,15 @@ class LightRAG:
|
||||
if maybe_new_kg is None:
|
||||
logger.info("No entities or relationships extracted!")
|
||||
# Update status to processed
|
||||
await self.text_chunks.upsert({chunk_id: {"status": DocStatus.PROCESSED}})
|
||||
await self.text_chunks.upsert(
|
||||
{chunk_id: {"status": DocStatus.PROCESSED}}
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Failed to extract entities and relationships")
|
||||
# Mark as failed if any step fails
|
||||
await self.text_chunks.upsert({chunk_id: {"status": DocStatus.FAILED}})
|
||||
await self.text_chunks.upsert(
|
||||
{chunk_id: {"status": DocStatus.FAILED}}
|
||||
)
|
||||
raise e
|
||||
|
||||
with tqdm_async(
|
||||
|
Reference in New Issue
Block a user