Refactor shared storage locks to separate pipeline, storage and internal locks for deadlock preventing

2025-03-01 10:48:55 +08:00
parent d3de57c1e4
commit c07a5039b7
3 changed files with 59 additions and 38 deletions
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -669,15 +669,15 @@ class LightRAG:
        3. Process each chunk for entity and relation extraction
        4. Update the document status
        """
-        from lightrag.kg.shared_storage import get_namespace_data, get_storage_lock
+        from lightrag.kg.shared_storage import get_namespace_data, get_pipeline_status_lock

        # Get pipeline status shared data and lock
        pipeline_status = await get_namespace_data("pipeline_status")
-        storage_lock = get_storage_lock()
+        pipeline_status_lock = get_pipeline_status_lock()

        # Check if another process is already processing the queue
        process_documents = False
-        async with storage_lock:
+        async with pipeline_status_lock:
            # Ensure only one worker is processing documents
            if not pipeline_status.get("busy", False):
                # Cleaning history_messages without breaking it as a shared list object
@@ -851,7 +851,7 @@ class LightRAG:

                # Check if there's a pending request to process more documents (with lock)
                has_pending_request = False
-                async with storage_lock:
+                async with pipeline_status_lock:
                    has_pending_request = pipeline_status.get("request_pending", False)
                    if has_pending_request:
                        # Clear the request flag before checking for more documents
@@ -869,7 +869,7 @@ class LightRAG:
            log_message = "Document processing pipeline completed"
            logger.info(log_message)
            # Always reset busy status when done or if an exception occurs (with lock)
-            async with storage_lock:
+            async with pipeline_status_lock:
                pipeline_status["busy"] = False
                pipeline_status["latest_message"] = log_message
                pipeline_status["history_messages"].append(log_message)