refactor: improve storage initialization with named instances to aid logging

• Add storage names to instance list
• Use tuples to store name with instance
• Update type hints for storage instances
• Improve logging with actual storage names
• Clean up loop variable naming
This commit is contained in:
yangdx
2025-02-12 22:55:47 +08:00
parent 3372af7c3d
commit 274cd73a8f

View File

@@ -609,15 +609,15 @@ class LightRAG:
) )
# Collect all storage instances # Collect all storage instances with their names
storage_instances = [ storage_instances = [
self.full_docs, ("full_docs", self.full_docs),
self.text_chunks, ("text_chunks", self.text_chunks),
self.chunk_entity_relation_graph, ("chunk_entity_relation_graph", self.chunk_entity_relation_graph),
self.entities_vdb, ("entities_vdb", self.entities_vdb),
self.relationships_vdb, ("relationships_vdb", self.relationships_vdb),
self.chunks_vdb, ("chunks_vdb", self.chunks_vdb),
self.doc_status, ("doc_status", self.doc_status),
] ]
# Initialize database connections if needed # Initialize database connections if needed
@@ -646,7 +646,7 @@ class LightRAG:
storage_class = lazy_external_import(import_path, storage_name) storage_class = lazy_external_import(import_path, storage_name)
return storage_class return storage_class
async def _initialize_database_if_needed(self, storage_instances: list): async def _initialize_database_if_needed(self, storage_instances: list[tuple[str, Any]]):
"""Intialize database connection and inject it to storage implementation if needed""" """Intialize database connection and inject it to storage implementation if needed"""
from .kg.postgres_impl import PostgreSQLDB from .kg.postgres_impl import PostgreSQLDB
from .kg.oracle_impl import OracleDB from .kg.oracle_impl import OracleDB
@@ -670,53 +670,53 @@ class LightRAG:
# Checking if PostgreSQL is needed # Checking if PostgreSQL is needed
if any( if any(
isinstance( isinstance(
storage, storage_instance,
(PGKVStorage, PGVectorStorage, PGGraphStorage, PGDocStatusStorage), (PGKVStorage, PGVectorStorage, PGGraphStorage, PGDocStatusStorage),
) )
for storage in storage_instances for _, storage_instance in storage_instances
): ):
postgres_db = PostgreSQLDB(self._get_postgres_config()) postgres_db = PostgreSQLDB(self._get_postgres_config())
await postgres_db.initdb() await postgres_db.initdb()
await postgres_db.check_tables() await postgres_db.check_tables()
for storage in storage_instances: for storage_name, storage_instance in storage_instances:
if isinstance( if isinstance(
storage, storage_instance,
(PGKVStorage, PGVectorStorage, PGGraphStorage, PGDocStatusStorage), (PGKVStorage, PGVectorStorage, PGGraphStorage, PGDocStatusStorage),
): ):
storage.db = postgres_db storage_instance.db = postgres_db
logger.info(f"Injected postgres_db to {storage.__class__.__name__}") logger.info(f"Injected postgres_db to {storage_name}")
# Checking if Oracle is needed # Checking if Oracle is needed
if any( if any(
isinstance( isinstance(
storage, (OracleKVStorage, OracleVectorDBStorage, OracleGraphStorage) storage_instance, (OracleKVStorage, OracleVectorDBStorage, OracleGraphStorage)
) )
for storage in storage_instances for _, storage_instance in storage_instances
): ):
oracle_db = OracleDB(self._get_oracle_config()) oracle_db = OracleDB(self._get_oracle_config())
await oracle_db.check_tables() await oracle_db.check_tables()
for storage in storage_instances: for storage_name, storage_instance in storage_instances:
if isinstance( if isinstance(
storage, storage_instance,
(OracleKVStorage, OracleVectorDBStorage, OracleGraphStorage), (OracleKVStorage, OracleVectorDBStorage, OracleGraphStorage),
): ):
storage.db = oracle_db storage_instance.db = oracle_db
logger.info(f"Injected oracle_db to {storage.__class__.__name__}") logger.info(f"Injected oracle_db to {storage_name}")
# Checking if TiDB is needed # Checking if TiDB is needed
if any( if any(
isinstance(storage, (TiDBKVStorage, TiDBVectorDBStorage, TiDBGraphStorage)) isinstance(storage_instance, (TiDBKVStorage, TiDBVectorDBStorage, TiDBGraphStorage))
for storage in storage_instances for _, storage_instance in storage_instances
): ):
tidb_db = TiDB(self._get_tidb_config()) tidb_db = TiDB(self._get_tidb_config())
await tidb_db.check_tables() await tidb_db.check_tables()
# 注入db实例 # 注入db实例
for storage in storage_instances: for storage_name, storage_instance in storage_instances:
if isinstance( if isinstance(
storage, (TiDBKVStorage, TiDBVectorDBStorage, TiDBGraphStorage) storage_instance, (TiDBKVStorage, TiDBVectorDBStorage, TiDBGraphStorage)
): ):
storage.db = tidb_db storage_instance.db = tidb_db
logger.info(f"Injected tidb_db to {storage.__class__.__name__}") logger.info(f"Injected tidb_db to {storage_name}")
def set_storage_client(self, db_client): def set_storage_client(self, db_client):
# Inject db to storage implementation (only tested on Oracle Database # Inject db to storage implementation (only tested on Oracle Database