From 411782797b0913b738932f725c911095bca8456c Mon Sep 17 00:00:00 2001 From: yangdx Date: Sat, 22 Feb 2025 10:18:39 +0800 Subject: [PATCH 1/4] Fix linting --- lightrag/lightrag.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index eca23d4f..67ef3aab 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -591,7 +591,7 @@ class LightRAG: if isinstance(input, str): input = [input] - # Clean input text and remove duplicates + # Clean input text and remove duplicates input = list(set(self.clean_text(doc) for doc in input)) # 1. Validate ids if provided or generate MD5 hash IDs @@ -608,10 +608,7 @@ class LightRAG: contents = {id_: doc for id_, doc in zip(ids, input)} else: # Generate contents dict of MD5 hash IDs and documents - contents = { - compute_mdhash_id(doc, prefix="doc-"): doc - for doc in input - } + contents = {compute_mdhash_id(doc, prefix="doc-"): doc for doc in input} # 2. Remove duplicate contents unique_contents = { From 845e914f1bcc8d8cd1543146b319f31318d359a6 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 23 Feb 2025 15:46:47 +0800 Subject: [PATCH 2/4] fix: make ids parameter optional and optimize input text cleaning - Add default None value for ids parameter - Move text cleaning into else branch - Only clean text when auto-generating ids - Preserve original text with custom ids - Improve code readability --- lightrag/lightrag.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 67ef3aab..efc49c2a 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -577,7 +577,7 @@ class LightRAG: await self._insert_done() async def apipeline_enqueue_documents( - self, input: str | list[str], ids: list[str] | None + self, input: str | list[str], ids: list[str] | None = None ) -> None: """ Pipeline for Processing Documents @@ -591,9 +591,6 @@ class LightRAG: if isinstance(input, str): input = [input] - # Clean input text and remove duplicates - input = list(set(self.clean_text(doc) for doc in input)) - # 1. Validate ids if provided or generate MD5 hash IDs if ids is not None: # Check if the number of IDs matches the number of documents @@ -607,6 +604,8 @@ class LightRAG: # Generate contents dict of IDs provided by user and documents contents = {id_: doc for id_, doc in zip(ids, input)} else: + # Clean input text and remove duplicates + input = list(set(self.clean_text(doc) for doc in input)) # Generate contents dict of MD5 hash IDs and documents contents = {compute_mdhash_id(doc, prefix="doc-"): doc for doc in input} From dbeda8a9ff43eed8937d90ee00f679ccae3b3d42 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 23 Feb 2025 16:12:08 +0800 Subject: [PATCH 3/4] Change scanning logs from INFO to DEBUG level --- lightrag/api/routers/document_routes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index 25ca24e4..5c742f39 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -161,7 +161,7 @@ class DocumentManager: """Scan input directory for new files""" new_files = [] for ext in self.supported_extensions: - logging.info(f"Scanning for {ext} files in {self.input_dir}") + logging.debug(f"Scanning for {ext} files in {self.input_dir}") for file_path in self.input_dir.rglob(f"*{ext}"): if file_path not in self.indexed_files: new_files.append(file_path) From df95f251dcaf6d6623d36a9f3f8b8def1c33e74e Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 23 Feb 2025 16:42:31 +0800 Subject: [PATCH 4/4] Move server ready message to lifespan --- lightrag/api/lightrag_server.py | 2 ++ lightrag/api/utils_api.py | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index b656b67f..10a9b52c 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -181,6 +181,8 @@ def create_app(args): "Skip document scanning(another scanning is active)" ) + ASCIIColors.green("\nServer is ready to accept connections! 🚀\n") + yield finally: diff --git a/lightrag/api/utils_api.py b/lightrag/api/utils_api.py index a24e731e..8784f265 100644 --- a/lightrag/api/utils_api.py +++ b/lightrag/api/utils_api.py @@ -548,7 +548,5 @@ def display_splash_screen(args: argparse.Namespace) -> None: Make sure to include the X-API-Key header in all your requests. """) - ASCIIColors.green("Server is ready to accept connections! 🚀\n") - # Ensure splash output flush to system log sys.stdout.flush()