cleaned code
This commit is contained in:
@@ -487,14 +487,13 @@ class LightRAG:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# 3. Filter out already processed documents
|
# 3. Filter out already processed documents
|
||||||
_add_doc_keys: set[str] = set()
|
add_doc_keys: set[str] = set()
|
||||||
for doc_id in new_docs.keys():
|
for doc_id in new_docs.keys():
|
||||||
current_doc = await self.doc_status.get_by_id(doc_id)
|
current_doc = await self.doc_status.get_by_id(doc_id)
|
||||||
|
|
||||||
if not current_doc or current_doc["status"] == DocStatus.FAILED:
|
if not current_doc or current_doc["status"] == DocStatus.FAILED:
|
||||||
_add_doc_keys.add(doc_id)
|
add_doc_keys.add(doc_id)
|
||||||
|
|
||||||
new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
|
new_docs = {k: v for k, v in new_docs.items() if k in add_doc_keys}
|
||||||
|
|
||||||
if not new_docs:
|
if not new_docs:
|
||||||
logger.info("All documents have been processed or are duplicates")
|
logger.info("All documents have been processed or are duplicates")
|
||||||
@@ -503,7 +502,7 @@ class LightRAG:
|
|||||||
# 4. Store original document
|
# 4. Store original document
|
||||||
for doc_id, doc in new_docs.items():
|
for doc_id, doc in new_docs.items():
|
||||||
await self.full_docs.upsert(
|
await self.full_docs.upsert(
|
||||||
{doc_id: {"content": doc["content"], "status": DocStatus.PENDING}}
|
{doc_id: doc}
|
||||||
)
|
)
|
||||||
logger.info(f"Stored {len(new_docs)} new unique documents")
|
logger.info(f"Stored {len(new_docs)} new unique documents")
|
||||||
|
|
||||||
@@ -610,7 +609,23 @@ class LightRAG:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
async def apipeline_process_extract_graph(self):
|
async def apipeline_process_extract_graph(self):
|
||||||
"""Get pendding or failed chunks, extract entities and relationships from each chunk"""
|
"""
|
||||||
|
Process pending or failed chunks to extract entities and relationships.
|
||||||
|
|
||||||
|
This method retrieves all chunks that are currently marked as pending or have previously failed.
|
||||||
|
It then extracts entities and relationships from each chunk and updates the status accordingly.
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
1. Retrieve all pending and failed chunks.
|
||||||
|
2. For each chunk, attempt to extract entities and relationships.
|
||||||
|
3. Update the chunk's status to processed if successful, or failed if an error occurs.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: If there is an error during the extraction process.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None
|
||||||
|
"""
|
||||||
# 1. get all pending and failed chunks
|
# 1. get all pending and failed chunks
|
||||||
to_process_doc_keys: list[str] = []
|
to_process_doc_keys: list[str] = []
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user