From c8ecfa2d68614d47ce55caea73004032cc848c4d Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 6 May 2025 22:00:43 +0800 Subject: [PATCH] feat: Centralize configuration and update defaults This commit introduces `lightrag/constants.py` to centralize default values for various configurations across the API and core components. Key changes: - Added `constants.py` to centralize default values - Improved the `get_env_value` function in `api/config.py` to correctly handle string "None" as a None value and to catch `TypeError` during value conversion. - Updated the default `SUMMARY_LANGUAGE` to "English" - Set default `WORKERS` to 2 --- lightrag/api/config.py | 31 ++++++++++++++++++------------- lightrag/api/gunicorn_config.py | 17 ++++++++++------- lightrag/api/lightrag_server.py | 12 +++++++++--- lightrag/api/run_with_gunicorn.py | 19 +++++++++++++------ lightrag/api/utils_api.py | 14 +++++++++++--- lightrag/constants.py | 18 ++++++++++++++++++ lightrag/lightrag.py | 15 ++++++++++++--- lightrag/utils.py | 12 +++++++++--- 8 files changed, 100 insertions(+), 38 deletions(-) create mode 100644 lightrag/constants.py diff --git a/lightrag/api/config.py b/lightrag/api/config.py index 268b41cb..696c1d48 100644 --- a/lightrag/api/config.py +++ b/lightrag/api/config.py @@ -7,6 +7,11 @@ import argparse import logging from dotenv import load_dotenv +from lightrag.constants import ( + DEFAULT_WOKERS, + DEFAULT_TIMEOUT, +) + # use the .env that is inside the current folder # allows to use different .env file for each lightrag instance # the OS environment variables take precedence over the .env file @@ -45,7 +50,9 @@ def get_default_host(binding_type: str) -> str: ) # fallback to ollama if unknown -def get_env_value(env_key: str, default: any, value_type: type = str) -> any: +def get_env_value( + env_key: str, default: any, value_type: type = str, special_none: bool = False +) -> any: """ Get value from environment variable with type conversion @@ -53,6 +60,7 @@ def get_env_value(env_key: str, default: any, value_type: type = str) -> any: env_key (str): Environment variable key default (any): Default value if env variable is not set value_type (type): Type to convert the value to + special_none (bool): If True, return None when value is "None" Returns: any: Converted value from environment or default @@ -61,11 +69,15 @@ def get_env_value(env_key: str, default: any, value_type: type = str) -> any: if value is None: return default + # Handle special case for "None" string + if special_none and value == "None": + return None + if value_type is bool: return value.lower() in ("true", "1", "yes", "t", "on") try: return value_type(value) - except ValueError: + except (ValueError, TypeError): return default @@ -109,17 +121,10 @@ def parse_args() -> argparse.Namespace: help="Directory containing input documents (default: from env or ./inputs)", ) - def timeout_type(value): - if value is None: - return 150 - if value is None or value == "None": - return None - return int(value) - parser.add_argument( "--timeout", - default=get_env_value("TIMEOUT", None, timeout_type), - type=timeout_type, + default=get_env_value("TIMEOUT", DEFAULT_TIMEOUT, int, special_none=True), + type=int, help="Timeout in seconds (useful when using slow AI). Use None for infinite timeout", ) @@ -226,7 +231,7 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--workers", type=int, - default=get_env_value("WORKERS", 1, int), + default=get_env_value("WORKERS", DEFAULT_WOKERS, int), help="Number of worker processes (default: from env or 1)", ) @@ -307,7 +312,7 @@ def parse_args() -> argparse.Namespace: # Add environment variables that were previously read directly args.cors_origins = get_env_value("CORS_ORIGINS", "*") - args.summary_language = get_env_value("SUMMARY_LANGUAGE", "en") + args.summary_language = get_env_value("SUMMARY_LANGUAGE", "English") args.whitelist_paths = get_env_value("WHITELIST_PATHS", "/health,/api/*") # For JWT Auth diff --git a/lightrag/api/gunicorn_config.py b/lightrag/api/gunicorn_config.py index 0aef108e..25c9e48f 100644 --- a/lightrag/api/gunicorn_config.py +++ b/lightrag/api/gunicorn_config.py @@ -3,17 +3,24 @@ import os import logging from lightrag.kg.shared_storage import finalize_share_data from lightrag.utils import setup_logger +from lightrag.api.config import get_env_value +from lightrag.constants import ( + DEFAULT_LOG_MAX_BYTES, + DEFAULT_LOG_BACKUP_COUNT, + DEFAULT_LOG_FILENAME, +) + # Get log directory path from environment variable log_dir = os.getenv("LOG_DIR", os.getcwd()) -log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log")) +log_file_path = os.path.abspath(os.path.join(log_dir, DEFAULT_LOG_FILENAME)) # Ensure log directory exists os.makedirs(os.path.dirname(log_file_path), exist_ok=True) # Get log file max size and backup count from environment variables -log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB -log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups +log_max_bytes = get_env_value("LOG_MAX_BYTES", DEFAULT_LOG_MAX_BYTES, int) +log_backup_count = get_env_value("LOG_BACKUP_COUNT", DEFAULT_LOG_BACKUP_COUNT, int) # These variables will be set by run_with_gunicorn.py workers = None @@ -29,10 +36,6 @@ preload_app = True worker_class = "uvicorn.workers.UvicornWorker" # Other Gunicorn configurations -timeout = int( - os.getenv("TIMEOUT", 150 * 2) -) # Default 150s *2 to match run_with_gunicorn.py -keepalive = int(os.getenv("KEEPALIVE", 5)) # Default 5s # Logging configuration errorlog = os.getenv("ERROR_LOG", log_file_path) # Default write to lightrag.log diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 42a45daa..6c90d35f 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -26,12 +26,18 @@ from .config import ( global_args, update_uvicorn_mode_config, get_default_host, + get_env_value, ) import sys from lightrag import LightRAG, __version__ as core_version from lightrag.api import __api_version__ from lightrag.types import GPTKeywordExtractionFormat from lightrag.utils import EmbeddingFunc +from lightrag.constants import ( + DEFAULT_LOG_MAX_BYTES, + DEFAULT_LOG_BACKUP_COUNT, + DEFAULT_LOG_FILENAME, +) from lightrag.api.routers.document_routes import ( DocumentManager, create_document_routes, @@ -514,14 +520,14 @@ def configure_logging(): # Get log directory path from environment variable log_dir = os.getenv("LOG_DIR", os.getcwd()) - log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log")) + log_file_path = os.path.abspath(os.path.join(log_dir, DEFAULT_LOG_FILENAME)) print(f"\nLightRAG log file: {log_file_path}\n") os.makedirs(os.path.dirname(log_dir), exist_ok=True) # Get log file max size and backup count from environment variables - log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB - log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups + log_max_bytes = get_env_value("LOG_MAX_BYTES", DEFAULT_LOG_MAX_BYTES, int) + log_backup_count = get_env_value("LOG_BACKUP_COUNT", DEFAULT_LOG_BACKUP_COUNT, int) logging.config.dictConfig( { diff --git a/lightrag/api/run_with_gunicorn.py b/lightrag/api/run_with_gunicorn.py index 5b41af8a..11df0801 100644 --- a/lightrag/api/run_with_gunicorn.py +++ b/lightrag/api/run_with_gunicorn.py @@ -8,8 +8,13 @@ import sys import signal import pipmaster as pm from lightrag.api.utils_api import display_splash_screen, check_env_file +from lightrag.api.config import global_args, get_env_value from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data -from .config import global_args + +from lightrag.constants import ( + DEFAULT_WOKERS, + DEFAULT_TIMEOUT, +) def check_and_install_dependencies(): @@ -122,7 +127,7 @@ def main(): gunicorn_config.workers = ( global_args.workers if global_args.workers - else int(os.getenv("WORKERS", 1)) + else get_env_value("WORKERS", DEFAULT_WOKERS, int) ) # Bind configuration prioritizes command line arguments @@ -134,7 +139,7 @@ def main(): port = ( global_args.port if global_args.port != 9621 - else int(os.getenv("PORT", 9621)) + else get_env_value("PORT", 9621, int) ) gunicorn_config.bind = f"{host}:{port}" @@ -149,11 +154,13 @@ def main(): gunicorn_config.timeout = ( global_args.timeout * 2 if global_args.timeout is not None - else int(os.getenv("TIMEOUT", 150 * 2)) + else get_env_value( + "TIMEOUT", DEFAULT_TIMEOUT + 30, int, special_none=True + ) ) # Keepalive configuration - gunicorn_config.keepalive = int(os.getenv("KEEPALIVE", 5)) + gunicorn_config.keepalive = get_env_value("KEEPALIVE", 5, int) # SSL configuration prioritizes command line arguments if global_args.ssl or os.getenv("SSL", "").lower() in ( @@ -202,7 +209,7 @@ def main(): app = GunicornApp("") # Force workers to be an integer and greater than 1 for multi-process mode - workers_count = int(global_args.workers) + workers_count = global_args.workers if workers_count > 1: # Set a flag to indicate we're in the main process os.environ["LIGHTRAG_MAIN_PROCESS"] = "1" diff --git a/lightrag/api/utils_api.py b/lightrag/api/utils_api.py index a1dade88..13ec4ddd 100644 --- a/lightrag/api/utils_api.py +++ b/lightrag/api/utils_api.py @@ -9,11 +9,15 @@ import sys from ascii_colors import ASCIIColors from lightrag.api import __api_version__ as api_version from lightrag import __version__ as core_version +from lightrag.constants import ( + DEFAULT_MAX_TOKEN_SUMMARY, + DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, +) from fastapi import HTTPException, Security, Request, status from fastapi.security import APIKeyHeader, OAuth2PasswordBearer from starlette.status import HTTP_403_FORBIDDEN from .auth import auth_handler -from .config import ollama_server_infos, global_args +from .config import ollama_server_infos, global_args, get_env_value def check_env_file(): @@ -264,9 +268,13 @@ def display_splash_screen(args: argparse.Namespace) -> None: ASCIIColors.white(" ├─ Top-K: ", end="") ASCIIColors.yellow(f"{args.top_k}") ASCIIColors.white(" ├─ Max Token Summary: ", end="") - ASCIIColors.yellow(f"{int(os.getenv('MAX_TOKEN_SUMMARY', 500))}") + ASCIIColors.yellow( + f"{get_env_value('MAX_TOKEN_SUMMARY', DEFAULT_MAX_TOKEN_SUMMARY, int)}" + ) ASCIIColors.white(" └─ Force LLM Summary on Merge: ", end="") - ASCIIColors.yellow(f"{int(os.getenv('FORCE_LLM_SUMMARY_ON_MERGE', 6))}") + ASCIIColors.yellow( + f"{get_env_value('FORCE_LLM_SUMMARY_ON_MERGE', DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, int)}" + ) # System Configuration ASCIIColors.magenta("\n💾 Storage Configuration:") diff --git a/lightrag/constants.py b/lightrag/constants.py new file mode 100644 index 00000000..787e1c49 --- /dev/null +++ b/lightrag/constants.py @@ -0,0 +1,18 @@ +""" +Centralized configuration constants for LightRAG. + +This module defines default values for configuration constants used across +different parts of the LightRAG system. Centralizing these values ensures +consistency and makes maintenance easier. +""" + +# Default values for environment variables +DEFAULT_MAX_TOKEN_SUMMARY = 500 +DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 6 +DEFAULT_WOKERS = 2 +DEFAULT_TIMEOUT = 150 + +# Logging configuration defaults +DEFAULT_LOG_MAX_BYTES = 10485760 # Default 10MB +DEFAULT_LOG_BACKUP_COUNT = 5 # Default 5 backups +DEFAULT_LOG_FILENAME = "lightrag.log" # Default log filename diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 17d36166..452941a6 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -20,6 +20,11 @@ from typing import ( List, Dict, ) +from lightrag.constants import ( + DEFAULT_MAX_TOKEN_SUMMARY, + DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, +) +from lightrag.api.config import get_env_value from lightrag.kg import ( STORAGES, @@ -119,10 +124,14 @@ class LightRAG: entity_extract_max_gleaning: int = field(default=1) """Maximum number of entity extraction attempts for ambiguous content.""" - summary_to_max_tokens: int = field(default=int(os.getenv("MAX_TOKEN_SUMMARY", 500))) + summary_to_max_tokens: int = field( + default=get_env_value("MAX_TOKEN_SUMMARY", DEFAULT_MAX_TOKEN_SUMMARY, int) + ) force_llm_summary_on_merge: int = field( - default=int(os.getenv("FORCE_LLM_SUMMARY_ON_MERGE", 6)) + default=get_env_value( + "FORCE_LLM_SUMMARY_ON_MERGE", DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, int + ) ) # Text chunking @@ -245,7 +254,7 @@ class LightRAG: addon_params: dict[str, Any] = field( default_factory=lambda: { - "language": os.getenv("SUMMARY_LANGUAGE", PROMPTS["DEFAULT_LANGUAGE"]) + "language": get_env_value("SUMMARY_LANGUAGE", "English", str) } ) diff --git a/lightrag/utils.py b/lightrag/utils.py index 9af97c7a..720f859e 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -17,6 +17,12 @@ import xml.etree.ElementTree as ET import numpy as np from lightrag.prompt import PROMPTS from dotenv import load_dotenv +from lightrag.constants import ( + DEFAULT_LOG_MAX_BYTES, + DEFAULT_LOG_BACKUP_COUNT, + DEFAULT_LOG_FILENAME, +) +from lightrag.api.config import get_env_value # Use TYPE_CHECKING to avoid circular imports if TYPE_CHECKING: @@ -152,14 +158,14 @@ def setup_logger( # Get log file path if log_file_path is None: log_dir = os.getenv("LOG_DIR", os.getcwd()) - log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log")) + log_file_path = os.path.abspath(os.path.join(log_dir, DEFAULT_LOG_FILENAME)) # Ensure log directory exists os.makedirs(os.path.dirname(log_file_path), exist_ok=True) # Get log file max size and backup count from environment variables - log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB - log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups + log_max_bytes = get_env_value("LOG_MAX_BYTES", DEFAULT_LOG_MAX_BYTES, int) + log_backup_count = get_env_value("LOG_BACKUP_COUNT", DEFAULT_LOG_BACKUP_COUNT, int) try: # Add file handler