cleaned code

2025-02-09 11:35:31 +01:00
parent 1159a69e4e
commit 4acf92dfd9
1 changed files with 21 additions and 6 deletions
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -487,14 +487,13 @@ class LightRAG:
        }
        # 3. Filter out already processed documents
-        _add_doc_keys: set[str] = set()
+        add_doc_keys: set[str] = set()
        for doc_id in new_docs.keys():
            current_doc = await self.doc_status.get_by_id(doc_id)
            if not current_doc or current_doc["status"] == DocStatus.FAILED:
-                _add_doc_keys.add(doc_id)
+                add_doc_keys.add(doc_id)
-        new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
+        new_docs = {k: v for k, v in new_docs.items() if k in add_doc_keys}
        if not new_docs:
            logger.info("All documents have been processed or are duplicates")
@@ -503,7 +502,7 @@ class LightRAG:
        # 4. Store original document
        for doc_id, doc in new_docs.items():
            await self.full_docs.upsert(
-                {doc_id: {"content": doc["content"], "status": DocStatus.PENDING}}
+                {doc_id: doc}
            )
        logger.info(f"Stored {len(new_docs)} new unique documents")
@@ -610,7 +609,23 @@ class LightRAG:
                    continue
    async def apipeline_process_extract_graph(self):
-        """Get pendding or failed chunks, extract entities and relationships from each chunk"""
+        """
        Process pending or failed chunks to extract entities and relationships.
        This method retrieves all chunks that are currently marked as pending or have previously failed.
        It then extracts entities and relationships from each chunk and updates the status accordingly.
        Steps:
        1. Retrieve all pending and failed chunks.
        2. For each chunk, attempt to extract entities and relationships.
        3. Update the chunk's status to processed if successful, or failed if an error occurs.
        Raises:
            Exception: If there is an error during the extraction process.
        Returns:
            None
        """
        # 1. get all pending and failed chunks
        to_process_doc_keys: list[str] = []