From 5c18213b24a97051acbc4987461bf1c0e119bd41 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 26 Mar 2025 16:58:31 +0800
Subject: [PATCH] fix: optimize job name handling in document processing
 pipeline

- Move job name setting to before batch processing
- Fix document and batch counter accumulation
---
 lightrag/lightrag.py | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index a2e345a5..718cf576 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -842,17 +842,10 @@ class LightRAG:
                     logger.info("No documents to process")
                     return
 
-                # Get first document's file path and total count for job name
-                first_doc_id, first_doc = next(iter(to_process_docs.items()))
-                first_doc_path = first_doc.file_path
-                path_prefix = first_doc_path[:20] + ("..." if len(first_doc_path) > 20 else "")
-                total_files = len(to_process_docs)
-                job_name = f"{path_prefix}[{total_files} files]"
-
                 pipeline_status.update(
                     {
                         "busy": True,
-                        "job_name": job_name,
+                        "job_name": "Default Job",
                         "job_start": datetime.now().isoformat(),
                         "docs": 0,
                         "batchs": 0,
@@ -891,11 +884,19 @@ class LightRAG:
                 logger.info(log_message)
 
                 # Update pipeline status with current batch information
-                pipeline_status["docs"] += len(to_process_docs)
-                pipeline_status["batchs"] += len(docs_batches)
+                pipeline_status["docs"] = len(to_process_docs)
+                pipeline_status["batchs"] = len(docs_batches)
                 pipeline_status["latest_message"] = log_message
                 pipeline_status["history_messages"].append(log_message)
 
+                # Get first document's file path and total count for job name
+                first_doc_id, first_doc = next(iter(to_process_docs.items()))
+                first_doc_path = first_doc.file_path
+                path_prefix = first_doc_path[:20] + ("..." if len(first_doc_path) > 20 else "")
+                total_files = len(to_process_docs)
+                job_name = f"{path_prefix}[{total_files} files]"
+                pipeline_status["job_name"] = job_name
+
                 async def process_document(
                     doc_id: str,
                     status_doc: DocProcessingStatus,