Refactor initialization logic for vector, KV and graph storage implementations

• Add try_initialize_namespace check
• Move init code out of storage locks
• Reduce redundant init conditions
• Simplify initialization flow
• Make init thread-safer
This commit is contained in:
yangdx
2025-02-27 14:55:07 +08:00
parent 03d05b094d
commit f007ebf006
4 changed files with 67 additions and 60 deletions

View File

@@ -10,7 +10,7 @@ from lightrag.utils import (
logger,
write_json,
)
from .shared_storage import get_namespace_data, get_storage_lock
from .shared_storage import get_namespace_data, get_storage_lock, try_initialize_namespace
@final
@@ -20,11 +20,15 @@ class JsonKVStorage(BaseKVStorage):
working_dir = self.global_config["working_dir"]
self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
self._storage_lock = get_storage_lock()
# check need_init must before get_namespace_data
need_init = try_initialize_namespace(self.namespace)
self._data = get_namespace_data(self.namespace)
with self._storage_lock:
if not self._data:
self._data: dict[str, Any] = load_json(self._file_name) or {}
logger.info(f"Load KV {self.namespace} with {len(self._data)} data")
if need_init:
loaded_data = load_json(self._file_name) or {}
with self._storage_lock:
self._data.update(loaded_data)
logger.info(f"Load KV {self.namespace} with {len(loaded_data)} data")
async def index_done_callback(self) -> None:
# 文件写入需要加锁,防止多个进程同时写入导致文件损坏