From 8ab369c2af7a7ba5ca551d069d93d769faa46ca6 Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 18 Feb 2025 16:08:20 +0800 Subject: [PATCH 1/8] Remove unused package --- lightrag/api/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/lightrag/api/requirements.txt b/lightrag/api/requirements.txt index 7b2593c0..068a84b9 100644 --- a/lightrag/api/requirements.txt +++ b/lightrag/api/requirements.txt @@ -1,6 +1,5 @@ ascii_colors fastapi -nest_asyncio numpy pipmaster python-dotenv From 75ee4592b8172aa2ec22794efa7c47aa1aafe0b1 Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 18 Feb 2025 16:14:11 +0800 Subject: [PATCH 2/8] refactor: Implement dynamic database module imports - Consolidate database instance management - Improve database management and error handling - Enhance error handling and logging --- lightrag/api/lightrag_server.py | 170 ++++++++++++++------------------ 1 file changed, 74 insertions(+), 96 deletions(-) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 7a50a512..661e25d0 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -41,25 +41,28 @@ from .ollama_api import ( OllamaAPI, ) from .ollama_api import ollama_server_infos -from ..kg.postgres_impl import ( - PostgreSQLDB, - PGKVStorage, - PGVectorStorage, - PGGraphStorage, - PGDocStatusStorage, -) -from ..kg.oracle_impl import ( - OracleDB, - OracleKVStorage, - OracleVectorDBStorage, - OracleGraphStorage, -) -from ..kg.tidb_impl import ( - TiDB, - TiDBKVStorage, - TiDBVectorDBStorage, - TiDBGraphStorage, -) +def get_db_type_from_storage_class(class_name: str) -> str | None: + """Determine database type based on storage class name""" + if class_name.startswith("PG"): + return "postgres" + elif class_name.startswith("Oracle"): + return "oracle" + elif class_name.startswith("TiDB"): + return "tidb" + return None + +def import_db_module(db_type: str): + """Dynamically import database module""" + if db_type == "postgres": + from ..kg.postgres_impl import PostgreSQLDB + return PostgreSQLDB + elif db_type == "oracle": + from ..kg.oracle_impl import OracleDB + return OracleDB + elif db_type == "tidb": + from ..kg.tidb_impl import TiDB + return TiDB + return None # Load environment variables try: @@ -333,28 +336,28 @@ def parse_args() -> argparse.Namespace: default=get_env_value( "LIGHTRAG_KV_STORAGE", DefaultRAGStorageConfig.KV_STORAGE ), - help=f"KV存储实现 (default: {DefaultRAGStorageConfig.KV_STORAGE})", + help=f"KV storage implementation (default: {DefaultRAGStorageConfig.KV_STORAGE})", ) parser.add_argument( "--doc-status-storage", default=get_env_value( "LIGHTRAG_DOC_STATUS_STORAGE", DefaultRAGStorageConfig.DOC_STATUS_STORAGE ), - help=f"文档状态存储实现 (default: {DefaultRAGStorageConfig.DOC_STATUS_STORAGE})", + help=f"Document status storage implementation (default: {DefaultRAGStorageConfig.DOC_STATUS_STORAGE})", ) parser.add_argument( "--graph-storage", default=get_env_value( "LIGHTRAG_GRAPH_STORAGE", DefaultRAGStorageConfig.GRAPH_STORAGE ), - help=f"图存储实现 (default: {DefaultRAGStorageConfig.GRAPH_STORAGE})", + help=f"Graph storage implementation (default: {DefaultRAGStorageConfig.GRAPH_STORAGE})", ) parser.add_argument( "--vector-storage", default=get_env_value( "LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE ), - help=f"向量存储实现 (default: {DefaultRAGStorageConfig.VECTOR_STORAGE})", + help=f"Vector storage implementation (default: {DefaultRAGStorageConfig.VECTOR_STORAGE})", ) # Bindings configuration @@ -890,72 +893,47 @@ def create_app(args): async def lifespan(app: FastAPI): """Lifespan context manager for startup and shutdown events""" # Initialize database connections - postgres_db = None - oracle_db = None - tidb_db = None + db_instances = {} # Store background tasks app.state.background_tasks = set() try: - # Check if PostgreSQL is needed - if any( - isinstance( - storage_instance, - (PGKVStorage, PGVectorStorage, PGGraphStorage, PGDocStatusStorage), - ) - for _, storage_instance in storage_instances - ): - postgres_db = PostgreSQLDB(_get_postgres_config()) - await postgres_db.initdb() - await postgres_db.check_tables() - for storage_name, storage_instance in storage_instances: - if isinstance( - storage_instance, - ( - PGKVStorage, - PGVectorStorage, - PGGraphStorage, - PGDocStatusStorage, - ), - ): - storage_instance.db = postgres_db - logger.info(f"Injected postgres_db to {storage_name}") + # Check which database types are used + db_types = set() + for storage_name, storage_instance in storage_instances: + db_type = get_db_type_from_storage_class(storage_instance.__class__.__name__) + if db_type: + db_types.add(db_type) - # Check if Oracle is needed - if any( - isinstance( - storage_instance, - (OracleKVStorage, OracleVectorDBStorage, OracleGraphStorage), - ) - for _, storage_instance in storage_instances - ): - oracle_db = OracleDB(_get_oracle_config()) - await oracle_db.check_tables() - for storage_name, storage_instance in storage_instances: - if isinstance( - storage_instance, - (OracleKVStorage, OracleVectorDBStorage, OracleGraphStorage), - ): - storage_instance.db = oracle_db - logger.info(f"Injected oracle_db to {storage_name}") + # Import and initialize databases as needed + for db_type in db_types: + if db_type == "postgres": + DB = import_db_module("postgres") + db = DB(_get_postgres_config()) + await db.initdb() + await db.check_tables() + db_instances["postgres"] = db + elif db_type == "oracle": + DB = import_db_module("oracle") + db = DB(_get_oracle_config()) + await db.check_tables() + db_instances["oracle"] = db + elif db_type == "tidb": + DB = import_db_module("tidb") + db = DB(_get_tidb_config()) + await db.check_tables() + db_instances["tidb"] = db - # Check if TiDB is needed - if any( - isinstance( - storage_instance, - (TiDBKVStorage, TiDBVectorDBStorage, TiDBGraphStorage), - ) - for _, storage_instance in storage_instances - ): - tidb_db = TiDB(_get_tidb_config()) - await tidb_db.check_tables() - for storage_name, storage_instance in storage_instances: - if isinstance( - storage_instance, - (TiDBKVStorage, TiDBVectorDBStorage, TiDBGraphStorage), - ): - storage_instance.db = tidb_db - logger.info(f"Injected tidb_db to {storage_name}") + # Inject database instances into storage classes + for storage_name, storage_instance in storage_instances: + db_type = get_db_type_from_storage_class(storage_instance.__class__.__name__) + if db_type: + if db_type not in db_instances: + error_msg = f"Database type '{db_type}' is required by {storage_name} but not initialized" + logger.error(error_msg) + raise RuntimeError(error_msg) + storage_instance.db = db_instances[db_type] + logger.info(f"Injected {db_type} db to {storage_name}") # Auto scan documents if enabled if args.auto_scan_at_startup: @@ -980,18 +958,18 @@ def create_app(args): yield finally: - # Cleanup database connections - if postgres_db and hasattr(postgres_db, "pool"): - await postgres_db.pool.close() - logger.info("Closed PostgreSQL connection pool") - - if oracle_db and hasattr(oracle_db, "pool"): - await oracle_db.pool.close() - logger.info("Closed Oracle connection pool") - - if tidb_db and hasattr(tidb_db, "pool"): - await tidb_db.pool.close() - logger.info("Closed TiDB connection pool") + # Clean up database connections + for db_type, db in db_instances.items(): + if hasattr(db, "pool"): + await db.pool.close() + # Use more accurate database name display + db_names = { + "postgres": "PostgreSQL", + "oracle": "Oracle", + "tidb": "TiDB" + } + db_name = db_names.get(db_type, db_type) + logger.info(f"Closed {db_name} database connection pool") # Initialize FastAPI app = FastAPI( @@ -1311,7 +1289,7 @@ def create_app(args): case ".pdf": if not pm.is_installed("pypdf2"): pm.install("pypdf2") - from PyPDF2 import PdfReader + from PyPDF2 import PdfReader # type: ignore from io import BytesIO pdf_file = BytesIO(file) From 5acd0541801b1f94d7a98217776c57e2257bef3d Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 18 Feb 2025 16:33:57 +0800 Subject: [PATCH 3/8] Fix linting --- lightrag/api/lightrag_server.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 661e25d0..95118b51 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -41,6 +41,8 @@ from .ollama_api import ( OllamaAPI, ) from .ollama_api import ollama_server_infos + + def get_db_type_from_storage_class(class_name: str) -> str | None: """Determine database type based on storage class name""" if class_name.startswith("PG"): @@ -51,19 +53,24 @@ def get_db_type_from_storage_class(class_name: str) -> str | None: return "tidb" return None + def import_db_module(db_type: str): """Dynamically import database module""" if db_type == "postgres": from ..kg.postgres_impl import PostgreSQLDB + return PostgreSQLDB elif db_type == "oracle": from ..kg.oracle_impl import OracleDB + return OracleDB elif db_type == "tidb": from ..kg.tidb_impl import TiDB + return TiDB return None + # Load environment variables try: load_dotenv(override=True) @@ -901,7 +908,9 @@ def create_app(args): # Check which database types are used db_types = set() for storage_name, storage_instance in storage_instances: - db_type = get_db_type_from_storage_class(storage_instance.__class__.__name__) + db_type = get_db_type_from_storage_class( + storage_instance.__class__.__name__ + ) if db_type: db_types.add(db_type) @@ -926,7 +935,9 @@ def create_app(args): # Inject database instances into storage classes for storage_name, storage_instance in storage_instances: - db_type = get_db_type_from_storage_class(storage_instance.__class__.__name__) + db_type = get_db_type_from_storage_class( + storage_instance.__class__.__name__ + ) if db_type: if db_type not in db_instances: error_msg = f"Database type '{db_type}' is required by {storage_name} but not initialized" @@ -966,7 +977,7 @@ def create_app(args): db_names = { "postgres": "PostgreSQL", "oracle": "Oracle", - "tidb": "TiDB" + "tidb": "TiDB", } db_name = db_names.get(db_type, db_type) logger.info(f"Closed {db_name} database connection pool") @@ -1289,7 +1300,7 @@ def create_app(args): case ".pdf": if not pm.is_installed("pypdf2"): pm.install("pypdf2") - from PyPDF2 import PdfReader # type: ignore + from PyPDF2 import PdfReader # type: ignore from io import BytesIO pdf_file = BytesIO(file) From 89c01c686f93eb228d2ef36a662bd6fc034da63d Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 18 Feb 2025 17:46:28 +0800 Subject: [PATCH 4/8] Fix casting dateime it to str in needed - Added datetime formatting utility and standardized timestamp handling in DocStatusResponse --- lightrag/api/lightrag_server.py | 38 +++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 95118b51..fba81086 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -798,6 +798,36 @@ class InsertResponse(BaseModel): class DocStatusResponse(BaseModel): + @staticmethod + def format_datetime(dt: Any) -> Optional[str]: + """Format datetime to ISO string + + Args: + dt: Datetime object or string + + Returns: + Formatted datetime string or None + """ + if dt is None: + return None + if isinstance(dt, str): + return dt + return dt.isoformat() + + """Response model for document status + + Attributes: + id: Document identifier + content_summary: Summary of document content + content_length: Length of document content + status: Current processing status + created_at: Creation timestamp (ISO format string) + updated_at: Last update timestamp (ISO format string) + chunks_count: Number of chunks (optional) + error: Error message if any (optional) + metadata: Additional metadata (optional) + """ + id: str content_summary: str content_length: int @@ -1858,8 +1888,12 @@ def create_app(args): content_summary=doc_status.content_summary, content_length=doc_status.content_length, status=doc_status.status, - created_at=doc_status.created_at, - updated_at=doc_status.updated_at, + created_at=DocStatusResponse.format_datetime( + doc_status.created_at + ), + updated_at=DocStatusResponse.format_datetime( + doc_status.updated_at + ), chunks_count=doc_status.chunks_count, error=doc_status.error, metadata=doc_status.metadata, From cce4e8b9b34250043a535d8261299e30ce832656 Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 18 Feb 2025 19:40:54 +0800 Subject: [PATCH 5/8] Update Python version requirement from 3.10 to 3.11 because of StrEnum --- lightrag/tools/lightrag_visualizer/README-zh.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/tools/lightrag_visualizer/README-zh.md b/lightrag/tools/lightrag_visualizer/README-zh.md index 949178ff..bd82e1b2 100644 --- a/lightrag/tools/lightrag_visualizer/README-zh.md +++ b/lightrag/tools/lightrag_visualizer/README-zh.md @@ -90,6 +90,6 @@ ## 系统要求 -- Python 3.10+ +- Python 3.11+ - OpenGL 3.3+ 兼容的显卡 - 支持的操作系统:Windows/Linux/MacOS From 8008d0472846d820815b0a2d3772c9bbbe601ae3 Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 18 Feb 2025 20:25:01 +0800 Subject: [PATCH 6/8] Update Python version requirement from 3.10 to 3.11 --- docs/DockerDeployment.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/DockerDeployment.md b/docs/DockerDeployment.md index e7955cf8..50389c8f 100644 --- a/docs/DockerDeployment.md +++ b/docs/DockerDeployment.md @@ -5,7 +5,7 @@ A lightweight Knowledge Graph Retrieval-Augmented Generation system with multipl ## 🚀 Installation ### Prerequisites -- Python 3.10+ +- Python 3.11+ - Git - Docker (optional for Docker deployment) From 4e215ed4ddbe1026e313aa067e12e9384a714185 Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 18 Feb 2025 20:34:33 +0800 Subject: [PATCH 7/8] Revert "Update Python version requirement from 3.10 to 3.11" This reverts commit 8008d0472846d820815b0a2d3772c9bbbe601ae3. --- docs/DockerDeployment.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/DockerDeployment.md b/docs/DockerDeployment.md index 50389c8f..e7955cf8 100644 --- a/docs/DockerDeployment.md +++ b/docs/DockerDeployment.md @@ -5,7 +5,7 @@ A lightweight Knowledge Graph Retrieval-Augmented Generation system with multipl ## 🚀 Installation ### Prerequisites -- Python 3.11+ +- Python 3.10+ - Git - Docker (optional for Docker deployment) From 91566562987bcb0c129a3b870b7562de98a4fdfd Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 18 Feb 2025 20:34:56 +0800 Subject: [PATCH 8/8] Revert "Update Python version requirement from 3.10 to 3.11 because of StrEnum" This reverts commit cce4e8b9b34250043a535d8261299e30ce832656. --- lightrag/tools/lightrag_visualizer/README-zh.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/tools/lightrag_visualizer/README-zh.md b/lightrag/tools/lightrag_visualizer/README-zh.md index bd82e1b2..949178ff 100644 --- a/lightrag/tools/lightrag_visualizer/README-zh.md +++ b/lightrag/tools/lightrag_visualizer/README-zh.md @@ -90,6 +90,6 @@ ## 系统要求 -- Python 3.11+ +- Python 3.10+ - OpenGL 3.3+ 兼容的显卡 - 支持的操作系统:Windows/Linux/MacOS