From 55cfb4dab17b52d307ded3edd10cc9c1103d6056 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Sun, 9 Feb 2025 19:24:41 +0100 Subject: [PATCH] fixed typo --- lightrag/lightrag.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 3bd3cc8f..6ff283c6 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -493,15 +493,14 @@ class LightRAG: } # 3. Filter out already processed documents - new_doc_keys: set[str] = set() # Get docs ids - in_process_keys = set(new_docs.keys()) - # Get in progress docs ids - excluded_ids = await self.doc_status.filter_keys(list(in_process_keys)) - # Exclude already in process - new_doc_keys = in_process_keys - excluded_ids - # Filter - new_docs = {doc_id: new_docs[doc_id] for doc_id in new_doc_keys} + all_new_doc_ids = set(new_docs.keys()) + # Retrieve IDs that are already being processed + existing_ids = await self.doc_status.filter_keys(all_new_doc_ids) + # Exclude IDs of documents that are already in progress + unique_new_doc_ids = all_new_doc_ids - existing_ids + # Filter new_docs to only include documents with unique IDs + new_docs = {doc_id: new_docs[doc_id] for doc_id in unique_new_doc_ids} if not new_docs: logger.info("All documents have been processed or are duplicates")