From 08364e99fb03aedb10eb9791a0d338ce98757380 Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 14 Feb 2025 01:12:39 +0800 Subject: [PATCH 1/4] Move document scanning trigger by command line to background task - Added background task management - Prevented concurrent scanning - Tracked scanning progress - Improved startup performance - Enhanced error handling --- lightrag/api/lightrag_server.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 28085723..f6c05438 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -6,7 +6,7 @@ from fastapi import ( Form, BackgroundTasks, ) - +import asyncio import threading import os import json @@ -730,6 +730,8 @@ def create_app(args): postgres_db = None oracle_db = None tidb_db = None + # Store background tasks + app.state.background_tasks = set() try: # Check if PostgreSQL is needed @@ -794,20 +796,19 @@ def create_app(args): # Auto scan documents if enabled if args.auto_scan_at_startup: - try: - new_files = doc_manager.scan_directory_for_new_files() - for file_path in new_files: - try: - await index_file(file_path) - except Exception as e: - trace_exception(e) - logging.error(f"Error indexing file {file_path}: {str(e)}") - - ASCIIColors.info( - f"Indexed {len(new_files)} documents from {args.input_dir}" - ) - except Exception as e: - logging.error(f"Error during startup indexing: {str(e)}") + # Start scanning in background + with progress_lock: + if not scan_progress["is_scanning"]: + scan_progress["is_scanning"] = True + scan_progress["indexed_count"] = 0 + scan_progress["progress"] = 0 + # Create background task + task = asyncio.create_task(run_scanning_process()) + app.state.background_tasks.add(task) + task.add_done_callback(app.state.background_tasks.discard) + ASCIIColors.info(f"Started background scanning of documents from {args.input_dir}") + else: + ASCIIColors.info("Skip document scanning cause anohter scanning is active") yield From cd3815e82508aa1ff8746cd1a90162c01f66e16a Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 14 Feb 2025 01:14:12 +0800 Subject: [PATCH 2/4] Fix linting --- lightrag/api/lightrag_server.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index f6c05438..ba0a95cc 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -806,9 +806,13 @@ def create_app(args): task = asyncio.create_task(run_scanning_process()) app.state.background_tasks.add(task) task.add_done_callback(app.state.background_tasks.discard) - ASCIIColors.info(f"Started background scanning of documents from {args.input_dir}") + ASCIIColors.info( + f"Started background scanning of documents from {args.input_dir}" + ) else: - ASCIIColors.info("Skip document scanning cause anohter scanning is active") + ASCIIColors.info( + "Skip document scanning(anohter scanning is active)" + ) yield From 6f999aa5e5c9b317899bf333b2264b10246e5bd4 Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 14 Feb 2025 02:31:58 +0800 Subject: [PATCH 3/4] Refactor file conversion to use async execution - Wrap sync conversion in async function - Use asyncio.to_thread for non-blocking IO - Maintain same functionality as before - Clean up temporary files properly - Improve responsiveness of file processing --- lightrag/api/lightrag_server.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index ba0a95cc..6bdcd9b1 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -1150,9 +1150,14 @@ def create_app(args): pm.install("docling") from docling.document_converter import DocumentConverter - converter = DocumentConverter() - result = converter.convert(file_path) - content = result.document.export_to_markdown() + async def convert_doc(): + def sync_convert(): + converter = DocumentConverter() + result = converter.convert(file_path) + return result.document.export_to_markdown() + return await asyncio.to_thread(sync_convert) + + content = await convert_doc() case _: raise ValueError(f"Unsupported file format: {ext}") @@ -1444,9 +1449,14 @@ def create_app(args): f.write(await file.read()) try: - converter = DocumentConverter() - result = converter.convert(str(temp_path)) - content = result.document.export_to_markdown() + async def convert_doc(): + def sync_convert(): + converter = DocumentConverter() + result = converter.convert(str(temp_path)) + return result.document.export_to_markdown() + return await asyncio.to_thread(sync_convert) + + content = await convert_doc() finally: # Clean up the temporary file temp_path.unlink() From 35d6e2d32228e48972e8196ec6fa1dfd27d47c79 Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 14 Feb 2025 02:32:33 +0800 Subject: [PATCH 4/4] Fix linting --- lightrag/api/lightrag_server.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 6bdcd9b1..f2971b34 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -1155,8 +1155,9 @@ def create_app(args): converter = DocumentConverter() result = converter.convert(file_path) return result.document.export_to_markdown() + return await asyncio.to_thread(sync_convert) - + content = await convert_doc() case _: @@ -1449,13 +1450,15 @@ def create_app(args): f.write(await file.read()) try: + async def convert_doc(): def sync_convert(): converter = DocumentConverter() result = converter.convert(str(temp_path)) return result.document.export_to_markdown() + return await asyncio.to_thread(sync_convert) - + content = await convert_doc() finally: # Clean up the temporary file