From d70d7ff20e875402b2259a9614ad29b465b8eb1a Mon Sep 17 00:00:00 2001
From: Yannick Stephan <stephan.yannick@me.com>
Date: Sun, 9 Feb 2025 20:05:59 +0100
Subject: [PATCH] added at call check

---
 lightrag/lightrag.py | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index bf395e29..bf03447e 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -538,16 +538,6 @@ class LightRAG:
             logger.info("All documents have been processed or are duplicates")
             return
 
-        to_process_docs_ids = set(to_process_docs.keys())
-
-        # Get allready processed documents (text chunks and full docs)
-        text_chunks_processed_doc_ids = await self.text_chunks.filter_keys(
-            to_process_docs_ids
-        )
-        full_docs_processed_doc_ids = await self.full_docs.filter_keys(
-            to_process_docs_ids
-        )
-
         # 2. split docs into chunks, insert chunks, update doc status
         batch_size = self.addon_params.get("insert_batch_size", 10)
         batch_docs_list = [
@@ -597,14 +587,15 @@ class LightRAG:
                 await self._process_entity_relation_graph(chunks)
 
                 tasks[id_doc] = []
+
                 # Check if document already processed the doc
-                if id_doc not in full_docs_processed_doc_ids:
+                if await self.full_docs.get_by_id(id_doc) is None:
                     tasks[id_doc].append(
                         self.full_docs.upsert({id_doc: {"content": status_doc.content}})
                     )
 
                 # Check if chunks already processed  the doc
-                if id_doc not in text_chunks_processed_doc_ids:
+                if await self.text_chunks.get_by_id(id_doc) is None:
                     tasks[id_doc].append(self.text_chunks.upsert(chunks))
 
                 # Process document (text chunks and full docs) in parallel