feat(storage): Add shared memory support for file-based storage implementations
This commit adds multiprocessing shared memory support to file-based storage implementations: - JsonDocStatusStorage - JsonKVStorage - NanoVectorDBStorage - NetworkXStorage Each storage module now uses module-level global variables with multiprocessing.Manager() to ensure data consistency across multiple uvicorn workers. All processes will see updates immediately when data is modified through ainsert function.
This commit is contained in:
@@ -2,6 +2,8 @@ import asyncio
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, final
|
||||
import threading
|
||||
from multiprocessing import Manager
|
||||
|
||||
from lightrag.base import (
|
||||
BaseKVStorage,
|
||||
@@ -12,6 +14,25 @@ from lightrag.utils import (
|
||||
write_json,
|
||||
)
|
||||
|
||||
# Global variables for shared memory management
|
||||
_init_lock = threading.Lock()
|
||||
_manager = None
|
||||
_shared_kv_data = None
|
||||
|
||||
|
||||
def _get_manager():
|
||||
"""Get or create the global manager instance"""
|
||||
global _manager, _shared_kv_data
|
||||
with _init_lock:
|
||||
if _manager is None:
|
||||
try:
|
||||
_manager = Manager()
|
||||
_shared_kv_data = _manager.dict()
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize shared memory manager: {e}")
|
||||
raise RuntimeError(f"Shared memory initialization failed: {e}")
|
||||
return _manager
|
||||
|
||||
|
||||
@final
|
||||
@dataclass
|
||||
@@ -19,9 +40,28 @@ class JsonKVStorage(BaseKVStorage):
|
||||
def __post_init__(self):
|
||||
working_dir = self.global_config["working_dir"]
|
||||
self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
|
||||
self._data: dict[str, Any] = load_json(self._file_name) or {}
|
||||
self._lock = asyncio.Lock()
|
||||
logger.info(f"Load KV {self.namespace} with {len(self._data)} data")
|
||||
|
||||
# Ensure manager is initialized
|
||||
_get_manager()
|
||||
|
||||
# Get or create namespace data
|
||||
if self.namespace not in _shared_kv_data:
|
||||
with _init_lock:
|
||||
if self.namespace not in _shared_kv_data:
|
||||
try:
|
||||
initial_data = load_json(self._file_name) or {}
|
||||
_shared_kv_data[self.namespace] = initial_data
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to initialize shared data for namespace {self.namespace}: {e}")
|
||||
raise RuntimeError(f"Shared data initialization failed: {e}")
|
||||
|
||||
try:
|
||||
self._data = _shared_kv_data[self.namespace]
|
||||
logger.info(f"Load KV {self.namespace} with {len(self._data)} data")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to access shared memory: {e}")
|
||||
raise RuntimeError(f"Cannot access shared memory: {e}")
|
||||
|
||||
async def index_done_callback(self) -> None:
|
||||
write_json(self._data, self._file_name)
|
||||
|
Reference in New Issue
Block a user