Files are now processed in batches in auto scan

This commit is contained in:
yangdx
2025-03-21 13:41:37 +08:00
parent 67eee2d2d5
commit 0761af19c6
2 changed files with 24 additions and 2 deletions

View File

@@ -472,11 +472,30 @@ async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager):
total_files = len(new_files) total_files = len(new_files)
logger.info(f"Found {total_files} new files to index.") logger.info(f"Found {total_files} new files to index.")
if new_files: if not new_files:
await pipeline_index_files(rag, new_files) return
# Get MAX_PARALLEL_INSERT from global_args
max_parallel = global_args["max_parallel_insert"]
# Calculate batch size as 2 * MAX_PARALLEL_INSERT
batch_size = 2 * max_parallel
# Process files in batches
for i in range(0, total_files, batch_size):
batch_files = new_files[i:i+batch_size]
batch_num = i // batch_size + 1
total_batches = (total_files + batch_size - 1) // batch_size
logger.info(f"Processing batch {batch_num}/{total_batches} with {len(batch_files)} files")
await pipeline_index_files(rag, batch_files)
# Log progress
processed = min(i + batch_size, total_files)
logger.info(f"Processed {processed}/{total_files} files ({processed/total_files*100:.1f}%)")
except Exception as e: except Exception as e:
logger.error(f"Error during scanning process: {str(e)}") logger.error(f"Error during scanning process: {str(e)}")
logger.error(traceback.format_exc())
def create_document_routes( def create_document_routes(

View File

@@ -365,6 +365,9 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
"LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE "LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
) )
# Get MAX_PARALLEL_INSERT from environment
global_args["max_parallel_insert"] = get_env_value("MAX_PARALLEL_INSERT", 2, int)
# Handle openai-ollama special case # Handle openai-ollama special case
if args.llm_binding == "openai-ollama": if args.llm_binding == "openai-ollama":
args.llm_binding = "openai" args.llm_binding = "openai"