diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 28085723..f2971b34 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -6,7 +6,7 @@ from fastapi import ( Form, BackgroundTasks, ) - +import asyncio import threading import os import json @@ -730,6 +730,8 @@ def create_app(args): postgres_db = None oracle_db = None tidb_db = None + # Store background tasks + app.state.background_tasks = set() try: # Check if PostgreSQL is needed @@ -794,20 +796,23 @@ def create_app(args): # Auto scan documents if enabled if args.auto_scan_at_startup: - try: - new_files = doc_manager.scan_directory_for_new_files() - for file_path in new_files: - try: - await index_file(file_path) - except Exception as e: - trace_exception(e) - logging.error(f"Error indexing file {file_path}: {str(e)}") - - ASCIIColors.info( - f"Indexed {len(new_files)} documents from {args.input_dir}" - ) - except Exception as e: - logging.error(f"Error during startup indexing: {str(e)}") + # Start scanning in background + with progress_lock: + if not scan_progress["is_scanning"]: + scan_progress["is_scanning"] = True + scan_progress["indexed_count"] = 0 + scan_progress["progress"] = 0 + # Create background task + task = asyncio.create_task(run_scanning_process()) + app.state.background_tasks.add(task) + task.add_done_callback(app.state.background_tasks.discard) + ASCIIColors.info( + f"Started background scanning of documents from {args.input_dir}" + ) + else: + ASCIIColors.info( + "Skip document scanning(anohter scanning is active)" + ) yield @@ -1145,9 +1150,15 @@ def create_app(args): pm.install("docling") from docling.document_converter import DocumentConverter - converter = DocumentConverter() - result = converter.convert(file_path) - content = result.document.export_to_markdown() + async def convert_doc(): + def sync_convert(): + converter = DocumentConverter() + result = converter.convert(file_path) + return result.document.export_to_markdown() + + return await asyncio.to_thread(sync_convert) + + content = await convert_doc() case _: raise ValueError(f"Unsupported file format: {ext}") @@ -1439,9 +1450,16 @@ def create_app(args): f.write(await file.read()) try: - converter = DocumentConverter() - result = converter.convert(str(temp_path)) - content = result.document.export_to_markdown() + + async def convert_doc(): + def sync_convert(): + converter = DocumentConverter() + result = converter.convert(str(temp_path)) + return result.document.export_to_markdown() + + return await asyncio.to_thread(sync_convert) + + content = await convert_doc() finally: # Clean up the temporary file temp_path.unlink()