fix: optimize job name handling in document processing pipeline
- Move job name setting to before batch processing - Fix document and batch counter accumulation
This commit is contained in:
@@ -842,17 +842,10 @@ class LightRAG:
|
|||||||
logger.info("No documents to process")
|
logger.info("No documents to process")
|
||||||
return
|
return
|
||||||
|
|
||||||
# Get first document's file path and total count for job name
|
|
||||||
first_doc_id, first_doc = next(iter(to_process_docs.items()))
|
|
||||||
first_doc_path = first_doc.file_path
|
|
||||||
path_prefix = first_doc_path[:20] + ("..." if len(first_doc_path) > 20 else "")
|
|
||||||
total_files = len(to_process_docs)
|
|
||||||
job_name = f"{path_prefix}[{total_files} files]"
|
|
||||||
|
|
||||||
pipeline_status.update(
|
pipeline_status.update(
|
||||||
{
|
{
|
||||||
"busy": True,
|
"busy": True,
|
||||||
"job_name": job_name,
|
"job_name": "Default Job",
|
||||||
"job_start": datetime.now().isoformat(),
|
"job_start": datetime.now().isoformat(),
|
||||||
"docs": 0,
|
"docs": 0,
|
||||||
"batchs": 0,
|
"batchs": 0,
|
||||||
@@ -891,11 +884,19 @@ class LightRAG:
|
|||||||
logger.info(log_message)
|
logger.info(log_message)
|
||||||
|
|
||||||
# Update pipeline status with current batch information
|
# Update pipeline status with current batch information
|
||||||
pipeline_status["docs"] += len(to_process_docs)
|
pipeline_status["docs"] = len(to_process_docs)
|
||||||
pipeline_status["batchs"] += len(docs_batches)
|
pipeline_status["batchs"] = len(docs_batches)
|
||||||
pipeline_status["latest_message"] = log_message
|
pipeline_status["latest_message"] = log_message
|
||||||
pipeline_status["history_messages"].append(log_message)
|
pipeline_status["history_messages"].append(log_message)
|
||||||
|
|
||||||
|
# Get first document's file path and total count for job name
|
||||||
|
first_doc_id, first_doc = next(iter(to_process_docs.items()))
|
||||||
|
first_doc_path = first_doc.file_path
|
||||||
|
path_prefix = first_doc_path[:20] + ("..." if len(first_doc_path) > 20 else "")
|
||||||
|
total_files = len(to_process_docs)
|
||||||
|
job_name = f"{path_prefix}[{total_files} files]"
|
||||||
|
pipeline_status["job_name"] = job_name
|
||||||
|
|
||||||
async def process_document(
|
async def process_document(
|
||||||
doc_id: str,
|
doc_id: str,
|
||||||
status_doc: DocProcessingStatus,
|
status_doc: DocProcessingStatus,
|
||||||
|
Reference in New Issue
Block a user