cleaned docs

2025-02-09 14:36:49 +01:00
parent 37943a65a3
commit abcdcd5a73
2 changed files with 31 additions and 27 deletions
--- a/examples/lightrag_oracle_demo.py
+++ b/examples/lightrag_oracle_demo.py
@@ -121,9 +121,8 @@ async def main():
            texts = [x for x in all_text.split("\n") if x]
        # New mode use pipeline
-        await rag.apipeline_process_documents(texts)
+        await rag.apipeline_enqueue_documents(texts)
-        await rag.apipeline_process_chunks()
+        await rag.apipeline_process_enqueue_documents()
        await rag.apipeline_process_extract_graph()
        # Old method use ainsert
        # await rag.ainsert(texts)
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -395,7 +395,9 @@ class LightRAG:
            split_by_character is None, this parameter is ignored.
        """
        await self.apipeline_process_documents(string_or_strings)
-        await self.apipeline_process_enqueue_documents(split_by_character, split_by_character_only)
+        await self.apipeline_process_enqueue_documents(
            split_by_character, split_by_character_only
        )
    def insert_custom_chunks(self, full_text: str, text_chunks: list[str]):
        loop = always_get_an_event_loop()
@@ -551,20 +553,23 @@ class LightRAG:
            return
        # Get allready processed documents (text chunks and full docs)
-        text_chunks_processed_doc_ids = await self.text_chunks.filter_keys(pending_doc_ids)
+        text_chunks_processed_doc_ids = await self.text_chunks.filter_keys(
            pending_doc_ids
        )
        full_docs_processed_doc_ids = await self.full_docs.filter_keys(pending_doc_ids)
        # 2. split docs into chunks, insert chunks, update doc status
        batch_size = self.addon_params.get("insert_batch_size", 10)
        batch_docs_list = [
-            pending_doc_ids[i : i + batch_size] for i in range(0, len(pending_doc_ids), batch_size)
+            pending_doc_ids[i : i + batch_size]
            for i in range(0, len(pending_doc_ids), batch_size)
        ]
        # 3. iterate over batches
        tasks: dict[str, list[Coroutine[Any, Any, None]]] = {}
        for batch_idx, doc_ids in tqdm_async(
            enumerate(batch_docs_list),
-            desc=f"Process Batches",
+            desc="Process Batches",
        ):
            # 4. iterate over batch
            for doc_id in tqdm_async(
@@ -607,7 +612,9 @@ class LightRAG:
                # Check if document already processed the doc
                if doc_id not in full_docs_processed_doc_ids:
                    tasks[doc_id].append(
-                        self.full_docs.upsert({doc_id: {"content": status_doc["content"]}})
+                        self.full_docs.upsert(
                            {doc_id: {"content": status_doc["content"]}}
                        )
                    )
                # Check if chunks already processed  the doc
@@ -630,9 +637,7 @@ class LightRAG:
                        await self._insert_done()
                    except Exception as e:
-                        logger.error(
+                        logger.error(f"Failed to process document {doc_id}: {str(e)}")
                            f"Failed to process document {doc_id}: {str(e)}"
                        )                        
                        await self.doc_status.upsert(
                            {
                                doc_id: {