feat: Centralize configuration and update defaults

This commit introduces `lightrag/constants.py` to centralize default values for various configurations across the API and core components.

Key changes:
- Added `constants.py` to centralize default values
- Improved the `get_env_value` function in `api/config.py` to correctly handle string "None" as a None value and to catch `TypeError` during value conversion.
- Updated the default `SUMMARY_LANGUAGE` to "English"
- Set default `WORKERS` to 2
This commit is contained in:
yangdx
2025-05-06 22:00:43 +08:00
parent ed9d2b9c59
commit c8ecfa2d68
8 changed files with 100 additions and 38 deletions

View File

@@ -7,6 +7,11 @@ import argparse
import logging
from dotenv import load_dotenv
from lightrag.constants import (
DEFAULT_WOKERS,
DEFAULT_TIMEOUT,
)
# use the .env that is inside the current folder
# allows to use different .env file for each lightrag instance
# the OS environment variables take precedence over the .env file
@@ -45,7 +50,9 @@ def get_default_host(binding_type: str) -> str:
) # fallback to ollama if unknown
def get_env_value(env_key: str, default: any, value_type: type = str) -> any:
def get_env_value(
env_key: str, default: any, value_type: type = str, special_none: bool = False
) -> any:
"""
Get value from environment variable with type conversion
@@ -53,6 +60,7 @@ def get_env_value(env_key: str, default: any, value_type: type = str) -> any:
env_key (str): Environment variable key
default (any): Default value if env variable is not set
value_type (type): Type to convert the value to
special_none (bool): If True, return None when value is "None"
Returns:
any: Converted value from environment or default
@@ -61,11 +69,15 @@ def get_env_value(env_key: str, default: any, value_type: type = str) -> any:
if value is None:
return default
# Handle special case for "None" string
if special_none and value == "None":
return None
if value_type is bool:
return value.lower() in ("true", "1", "yes", "t", "on")
try:
return value_type(value)
except ValueError:
except (ValueError, TypeError):
return default
@@ -109,17 +121,10 @@ def parse_args() -> argparse.Namespace:
help="Directory containing input documents (default: from env or ./inputs)",
)
def timeout_type(value):
if value is None:
return 150
if value is None or value == "None":
return None
return int(value)
parser.add_argument(
"--timeout",
default=get_env_value("TIMEOUT", None, timeout_type),
type=timeout_type,
default=get_env_value("TIMEOUT", DEFAULT_TIMEOUT, int, special_none=True),
type=int,
help="Timeout in seconds (useful when using slow AI). Use None for infinite timeout",
)
@@ -226,7 +231,7 @@ def parse_args() -> argparse.Namespace:
parser.add_argument(
"--workers",
type=int,
default=get_env_value("WORKERS", 1, int),
default=get_env_value("WORKERS", DEFAULT_WOKERS, int),
help="Number of worker processes (default: from env or 1)",
)
@@ -307,7 +312,7 @@ def parse_args() -> argparse.Namespace:
# Add environment variables that were previously read directly
args.cors_origins = get_env_value("CORS_ORIGINS", "*")
args.summary_language = get_env_value("SUMMARY_LANGUAGE", "en")
args.summary_language = get_env_value("SUMMARY_LANGUAGE", "English")
args.whitelist_paths = get_env_value("WHITELIST_PATHS", "/health,/api/*")
# For JWT Auth

View File

@@ -3,17 +3,24 @@ import os
import logging
from lightrag.kg.shared_storage import finalize_share_data
from lightrag.utils import setup_logger
from lightrag.api.config import get_env_value
from lightrag.constants import (
DEFAULT_LOG_MAX_BYTES,
DEFAULT_LOG_BACKUP_COUNT,
DEFAULT_LOG_FILENAME,
)
# Get log directory path from environment variable
log_dir = os.getenv("LOG_DIR", os.getcwd())
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
log_file_path = os.path.abspath(os.path.join(log_dir, DEFAULT_LOG_FILENAME))
# Ensure log directory exists
os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
# Get log file max size and backup count from environment variables
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
log_max_bytes = get_env_value("LOG_MAX_BYTES", DEFAULT_LOG_MAX_BYTES, int)
log_backup_count = get_env_value("LOG_BACKUP_COUNT", DEFAULT_LOG_BACKUP_COUNT, int)
# These variables will be set by run_with_gunicorn.py
workers = None
@@ -29,10 +36,6 @@ preload_app = True
worker_class = "uvicorn.workers.UvicornWorker"
# Other Gunicorn configurations
timeout = int(
os.getenv("TIMEOUT", 150 * 2)
) # Default 150s *2 to match run_with_gunicorn.py
keepalive = int(os.getenv("KEEPALIVE", 5)) # Default 5s
# Logging configuration
errorlog = os.getenv("ERROR_LOG", log_file_path) # Default write to lightrag.log

View File

@@ -26,12 +26,18 @@ from .config import (
global_args,
update_uvicorn_mode_config,
get_default_host,
get_env_value,
)
import sys
from lightrag import LightRAG, __version__ as core_version
from lightrag.api import __api_version__
from lightrag.types import GPTKeywordExtractionFormat
from lightrag.utils import EmbeddingFunc
from lightrag.constants import (
DEFAULT_LOG_MAX_BYTES,
DEFAULT_LOG_BACKUP_COUNT,
DEFAULT_LOG_FILENAME,
)
from lightrag.api.routers.document_routes import (
DocumentManager,
create_document_routes,
@@ -514,14 +520,14 @@ def configure_logging():
# Get log directory path from environment variable
log_dir = os.getenv("LOG_DIR", os.getcwd())
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
log_file_path = os.path.abspath(os.path.join(log_dir, DEFAULT_LOG_FILENAME))
print(f"\nLightRAG log file: {log_file_path}\n")
os.makedirs(os.path.dirname(log_dir), exist_ok=True)
# Get log file max size and backup count from environment variables
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
log_max_bytes = get_env_value("LOG_MAX_BYTES", DEFAULT_LOG_MAX_BYTES, int)
log_backup_count = get_env_value("LOG_BACKUP_COUNT", DEFAULT_LOG_BACKUP_COUNT, int)
logging.config.dictConfig(
{

View File

@@ -8,8 +8,13 @@ import sys
import signal
import pipmaster as pm
from lightrag.api.utils_api import display_splash_screen, check_env_file
from lightrag.api.config import global_args, get_env_value
from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data
from .config import global_args
from lightrag.constants import (
DEFAULT_WOKERS,
DEFAULT_TIMEOUT,
)
def check_and_install_dependencies():
@@ -122,7 +127,7 @@ def main():
gunicorn_config.workers = (
global_args.workers
if global_args.workers
else int(os.getenv("WORKERS", 1))
else get_env_value("WORKERS", DEFAULT_WOKERS, int)
)
# Bind configuration prioritizes command line arguments
@@ -134,7 +139,7 @@ def main():
port = (
global_args.port
if global_args.port != 9621
else int(os.getenv("PORT", 9621))
else get_env_value("PORT", 9621, int)
)
gunicorn_config.bind = f"{host}:{port}"
@@ -149,11 +154,13 @@ def main():
gunicorn_config.timeout = (
global_args.timeout * 2
if global_args.timeout is not None
else int(os.getenv("TIMEOUT", 150 * 2))
else get_env_value(
"TIMEOUT", DEFAULT_TIMEOUT + 30, int, special_none=True
)
)
# Keepalive configuration
gunicorn_config.keepalive = int(os.getenv("KEEPALIVE", 5))
gunicorn_config.keepalive = get_env_value("KEEPALIVE", 5, int)
# SSL configuration prioritizes command line arguments
if global_args.ssl or os.getenv("SSL", "").lower() in (
@@ -202,7 +209,7 @@ def main():
app = GunicornApp("")
# Force workers to be an integer and greater than 1 for multi-process mode
workers_count = int(global_args.workers)
workers_count = global_args.workers
if workers_count > 1:
# Set a flag to indicate we're in the main process
os.environ["LIGHTRAG_MAIN_PROCESS"] = "1"

View File

@@ -9,11 +9,15 @@ import sys
from ascii_colors import ASCIIColors
from lightrag.api import __api_version__ as api_version
from lightrag import __version__ as core_version
from lightrag.constants import (
DEFAULT_MAX_TOKEN_SUMMARY,
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
)
from fastapi import HTTPException, Security, Request, status
from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
from starlette.status import HTTP_403_FORBIDDEN
from .auth import auth_handler
from .config import ollama_server_infos, global_args
from .config import ollama_server_infos, global_args, get_env_value
def check_env_file():
@@ -264,9 +268,13 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.white(" ├─ Top-K: ", end="")
ASCIIColors.yellow(f"{args.top_k}")
ASCIIColors.white(" ├─ Max Token Summary: ", end="")
ASCIIColors.yellow(f"{int(os.getenv('MAX_TOKEN_SUMMARY', 500))}")
ASCIIColors.yellow(
f"{get_env_value('MAX_TOKEN_SUMMARY', DEFAULT_MAX_TOKEN_SUMMARY, int)}"
)
ASCIIColors.white(" └─ Force LLM Summary on Merge: ", end="")
ASCIIColors.yellow(f"{int(os.getenv('FORCE_LLM_SUMMARY_ON_MERGE', 6))}")
ASCIIColors.yellow(
f"{get_env_value('FORCE_LLM_SUMMARY_ON_MERGE', DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, int)}"
)
# System Configuration
ASCIIColors.magenta("\n💾 Storage Configuration:")

18
lightrag/constants.py Normal file
View File

@@ -0,0 +1,18 @@
"""
Centralized configuration constants for LightRAG.
This module defines default values for configuration constants used across
different parts of the LightRAG system. Centralizing these values ensures
consistency and makes maintenance easier.
"""
# Default values for environment variables
DEFAULT_MAX_TOKEN_SUMMARY = 500
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 6
DEFAULT_WOKERS = 2
DEFAULT_TIMEOUT = 150
# Logging configuration defaults
DEFAULT_LOG_MAX_BYTES = 10485760 # Default 10MB
DEFAULT_LOG_BACKUP_COUNT = 5 # Default 5 backups
DEFAULT_LOG_FILENAME = "lightrag.log" # Default log filename

View File

@@ -20,6 +20,11 @@ from typing import (
List,
Dict,
)
from lightrag.constants import (
DEFAULT_MAX_TOKEN_SUMMARY,
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
)
from lightrag.api.config import get_env_value
from lightrag.kg import (
STORAGES,
@@ -119,10 +124,14 @@ class LightRAG:
entity_extract_max_gleaning: int = field(default=1)
"""Maximum number of entity extraction attempts for ambiguous content."""
summary_to_max_tokens: int = field(default=int(os.getenv("MAX_TOKEN_SUMMARY", 500)))
summary_to_max_tokens: int = field(
default=get_env_value("MAX_TOKEN_SUMMARY", DEFAULT_MAX_TOKEN_SUMMARY, int)
)
force_llm_summary_on_merge: int = field(
default=int(os.getenv("FORCE_LLM_SUMMARY_ON_MERGE", 6))
default=get_env_value(
"FORCE_LLM_SUMMARY_ON_MERGE", DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, int
)
)
# Text chunking
@@ -245,7 +254,7 @@ class LightRAG:
addon_params: dict[str, Any] = field(
default_factory=lambda: {
"language": os.getenv("SUMMARY_LANGUAGE", PROMPTS["DEFAULT_LANGUAGE"])
"language": get_env_value("SUMMARY_LANGUAGE", "English", str)
}
)

View File

@@ -17,6 +17,12 @@ import xml.etree.ElementTree as ET
import numpy as np
from lightrag.prompt import PROMPTS
from dotenv import load_dotenv
from lightrag.constants import (
DEFAULT_LOG_MAX_BYTES,
DEFAULT_LOG_BACKUP_COUNT,
DEFAULT_LOG_FILENAME,
)
from lightrag.api.config import get_env_value
# Use TYPE_CHECKING to avoid circular imports
if TYPE_CHECKING:
@@ -152,14 +158,14 @@ def setup_logger(
# Get log file path
if log_file_path is None:
log_dir = os.getenv("LOG_DIR", os.getcwd())
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
log_file_path = os.path.abspath(os.path.join(log_dir, DEFAULT_LOG_FILENAME))
# Ensure log directory exists
os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
# Get log file max size and backup count from environment variables
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
log_max_bytes = get_env_value("LOG_MAX_BYTES", DEFAULT_LOG_MAX_BYTES, int)
log_backup_count = get_env_value("LOG_BACKUP_COUNT", DEFAULT_LOG_BACKUP_COUNT, int)
try:
# Add file handler