fix: optimize job name handling in document processing pipeline

- Move job name setting to before batch processing
- Fix document and batch counter accumulation
This commit is contained in:
yangdx
2025-03-26 16:58:31 +08:00
parent 03934b1385
commit 5c18213b24

View File

@@ -842,17 +842,10 @@ class LightRAG:
logger.info("No documents to process") logger.info("No documents to process")
return return
# Get first document's file path and total count for job name
first_doc_id, first_doc = next(iter(to_process_docs.items()))
first_doc_path = first_doc.file_path
path_prefix = first_doc_path[:20] + ("..." if len(first_doc_path) > 20 else "")
total_files = len(to_process_docs)
job_name = f"{path_prefix}[{total_files} files]"
pipeline_status.update( pipeline_status.update(
{ {
"busy": True, "busy": True,
"job_name": job_name, "job_name": "Default Job",
"job_start": datetime.now().isoformat(), "job_start": datetime.now().isoformat(),
"docs": 0, "docs": 0,
"batchs": 0, "batchs": 0,
@@ -891,11 +884,19 @@ class LightRAG:
logger.info(log_message) logger.info(log_message)
# Update pipeline status with current batch information # Update pipeline status with current batch information
pipeline_status["docs"] += len(to_process_docs) pipeline_status["docs"] = len(to_process_docs)
pipeline_status["batchs"] += len(docs_batches) pipeline_status["batchs"] = len(docs_batches)
pipeline_status["latest_message"] = log_message pipeline_status["latest_message"] = log_message
pipeline_status["history_messages"].append(log_message) pipeline_status["history_messages"].append(log_message)
# Get first document's file path and total count for job name
first_doc_id, first_doc = next(iter(to_process_docs.items()))
first_doc_path = first_doc.file_path
path_prefix = first_doc_path[:20] + ("..." if len(first_doc_path) > 20 else "")
total_files = len(to_process_docs)
job_name = f"{path_prefix}[{total_files} files]"
pipeline_status["job_name"] = job_name
async def process_document( async def process_document(
doc_id: str, doc_id: str,
status_doc: DocProcessingStatus, status_doc: DocProcessingStatus,