Add graph_db_lock to esure consistency across multiple processes for node and edge edition jobs

This commit is contained in:
yangdx
2025-04-14 00:07:31 +08:00
parent 00c8394601
commit 6dd67748ca

View File

@@ -11,12 +11,18 @@ from functools import partial
from typing import Any, AsyncIterator, Callable, Iterator, cast, final, Literal
import pandas as pd
from .kg.shared_storage import get_graph_db_lock
from lightrag.kg import (
STORAGES,
verify_storage_implementation,
)
from lightrag.kg.shared_storage import (
get_namespace_data,
get_pipeline_status_lock,
)
from .base import (
BaseGraphStorage,
BaseKVStorage,
@@ -779,10 +785,6 @@ class LightRAG:
3. Process each chunk for entity and relation extraction
4. Update the document status
"""
from lightrag.kg.shared_storage import (
get_namespace_data,
get_pipeline_status_lock,
)
# Get pipeline status shared data and lock
pipeline_status = await get_namespace_data("pipeline_status")
@@ -1431,8 +1433,10 @@ class LightRAG:
loop = always_get_an_event_loop()
return loop.run_until_complete(self.adelete_by_entity(entity_name))
# TODO: Lock all KG relative DB to esure consistency across multiple processes
async def adelete_by_entity(self, entity_name: str) -> None:
graph_db_lock = get_graph_db_lock(enable_logging=False)
# Use graph database lock to ensure atomic graph and vector db operations
async with graph_db_lock:
try:
await self.entities_vdb.delete_entity(entity_name)
await self.relationships_vdb.delete_entity_relation(entity_name)
@@ -1469,7 +1473,6 @@ class LightRAG:
self.adelete_by_relation(source_entity, target_entity)
)
# TODO: Lock all KG relative DB to esure consistency across multiple processes
async def adelete_by_relation(self, source_entity: str, target_entity: str) -> None:
"""Asynchronously delete a relation between two entities.
@@ -1477,6 +1480,9 @@ class LightRAG:
source_entity: Name of the source entity
target_entity: Name of the target entity
"""
graph_db_lock = get_graph_db_lock(enable_logging=False)
# Use graph database lock to ensure atomic graph and vector db operations
async with graph_db_lock:
try:
# TODO: check if has_edge function works on reverse relation
# Check if the relation exists
@@ -1539,7 +1545,8 @@ class LightRAG:
"""
return await self.doc_status.get_docs_by_status(status)
# TODO: Lock all KG relative DB to esure consistency across multiple processes
# TODO: Deprecated (Deleting documents can cause hallucinations in RAG.)
# Document delete is not working properly for most of the storage implementations.
async def adelete_by_doc_id(self, doc_id: str) -> None:
"""Delete a document and all its related data
@@ -1898,7 +1905,6 @@ class LightRAG:
"""Synchronous version of aclear_cache."""
return always_get_an_event_loop().run_until_complete(self.aclear_cache(modes))
# TODO: Lock all KG relative DB to esure consistency across multiple processes
async def aedit_entity(
self, entity_name: str, updated_data: dict[str, str], allow_rename: bool = True
) -> dict[str, Any]:
@@ -1914,6 +1920,9 @@ class LightRAG:
Returns:
Dictionary containing updated entity information
"""
graph_db_lock = get_graph_db_lock(enable_logging=False)
# Use graph database lock to ensure atomic graph and vector db operations
async with graph_db_lock:
try:
# 1. Get current entity information
node_exists = await self.chunk_entity_relation_graph.has_node(entity_name)
@@ -2111,7 +2120,6 @@ class LightRAG:
]
)
# TODO: Lock all KG relative DB to esure consistency across multiple processes
async def aedit_relation(
self, source_entity: str, target_entity: str, updated_data: dict[str, Any]
) -> dict[str, Any]:
@@ -2127,6 +2135,9 @@ class LightRAG:
Returns:
Dictionary containing updated relation information
"""
graph_db_lock = get_graph_db_lock(enable_logging=False)
# Use graph database lock to ensure atomic graph and vector db operations
async with graph_db_lock:
try:
# 1. Get current relation information
edge_exists = await self.chunk_entity_relation_graph.has_edge(
@@ -2245,6 +2256,9 @@ class LightRAG:
Returns:
Dictionary containing created entity information
"""
graph_db_lock = get_graph_db_lock(enable_logging=False)
# Use graph database lock to ensure atomic graph and vector db operations
async with graph_db_lock:
try:
# Check if entity already exists
existing_node = await self.chunk_entity_relation_graph.has_node(entity_name)
@@ -2325,6 +2339,9 @@ class LightRAG:
Returns:
Dictionary containing created relation information
"""
graph_db_lock = get_graph_db_lock(enable_logging=False)
# Use graph database lock to ensure atomic graph and vector db operations
async with graph_db_lock:
try:
# Check if both entities exist
source_exists = await self.chunk_entity_relation_graph.has_node(
@@ -2426,7 +2443,6 @@ class LightRAG:
self.acreate_relation(source_entity, target_entity, relation_data)
)
# TODO: Lock all KG relative DB to esure consistency across multiple processes
async def amerge_entities(
self,
source_entities: list[str],
@@ -2454,6 +2470,9 @@ class LightRAG:
Returns:
Dictionary containing the merged entity information
"""
graph_db_lock = get_graph_db_lock(enable_logging=False)
# Use graph database lock to ensure atomic graph and vector db operations
async with graph_db_lock:
try:
# Default merge strategy
default_strategy = {