From fc425f1397338f546c236c7c9cc4531a0fe0eb74 Mon Sep 17 00:00:00 2001 From: yangdx Date: Thu, 24 Apr 2025 14:00:43 +0800 Subject: [PATCH] Send all found files to pipeline at once --- lightrag/api/routers/document_routes.py | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index 0111e237..de471f29 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -674,27 +674,9 @@ async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager): if not new_files: return - # Get MAX_PARALLEL_INSERT from global_args - max_parallel = global_args.max_parallel_insert - # Calculate batch size as 2 * MAX_PARALLEL_INSERT - batch_size = 2 * max_parallel - - # Process files in batches - for i in range(0, total_files, batch_size): - batch_files = new_files[i : i + batch_size] - batch_num = i // batch_size + 1 - total_batches = (total_files + batch_size - 1) // batch_size - - logger.info( - f"Processing batch {batch_num}/{total_batches} with {len(batch_files)} files" - ) - await pipeline_index_files(rag, batch_files) - - # Log progress - processed = min(i + batch_size, total_files) - logger.info( - f"Processed {processed}/{total_files} files ({processed/total_files*100:.1f}%)" - ) + # Process all files at once + await pipeline_index_files(rag, new_files) + logger.info(f"Processed {total_files}/{total_files} files (100.0%)") except Exception as e: logger.error(f"Error during scanning process: {str(e)}")