From 5c18213b24a97051acbc4987461bf1c0e119bd41 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 26 Mar 2025 16:58:31 +0800 Subject: [PATCH] fix: optimize job name handling in document processing pipeline - Move job name setting to before batch processing - Fix document and batch counter accumulation --- lightrag/lightrag.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index a2e345a5..718cf576 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -842,17 +842,10 @@ class LightRAG: logger.info("No documents to process") return - # Get first document's file path and total count for job name - first_doc_id, first_doc = next(iter(to_process_docs.items())) - first_doc_path = first_doc.file_path - path_prefix = first_doc_path[:20] + ("..." if len(first_doc_path) > 20 else "") - total_files = len(to_process_docs) - job_name = f"{path_prefix}[{total_files} files]" - pipeline_status.update( { "busy": True, - "job_name": job_name, + "job_name": "Default Job", "job_start": datetime.now().isoformat(), "docs": 0, "batchs": 0, @@ -891,11 +884,19 @@ class LightRAG: logger.info(log_message) # Update pipeline status with current batch information - pipeline_status["docs"] += len(to_process_docs) - pipeline_status["batchs"] += len(docs_batches) + pipeline_status["docs"] = len(to_process_docs) + pipeline_status["batchs"] = len(docs_batches) pipeline_status["latest_message"] = log_message pipeline_status["history_messages"].append(log_message) + # Get first document's file path and total count for job name + first_doc_id, first_doc = next(iter(to_process_docs.items())) + first_doc_path = first_doc.file_path + path_prefix = first_doc_path[:20] + ("..." if len(first_doc_path) > 20 else "") + total_files = len(to_process_docs) + job_name = f"{path_prefix}[{total_files} files]" + pipeline_status["job_name"] = job_name + async def process_document( doc_id: str, status_doc: DocProcessingStatus,