feat: Centralize configuration and update defaults
This commit introduces `lightrag/constants.py` to centralize default values for various configurations across the API and core components. Key changes: - Added `constants.py` to centralize default values - Improved the `get_env_value` function in `api/config.py` to correctly handle string "None" as a None value and to catch `TypeError` during value conversion. - Updated the default `SUMMARY_LANGUAGE` to "English" - Set default `WORKERS` to 2
This commit is contained in:
@@ -7,6 +7,11 @@ import argparse
|
||||
import logging
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from lightrag.constants import (
|
||||
DEFAULT_WOKERS,
|
||||
DEFAULT_TIMEOUT,
|
||||
)
|
||||
|
||||
# use the .env that is inside the current folder
|
||||
# allows to use different .env file for each lightrag instance
|
||||
# the OS environment variables take precedence over the .env file
|
||||
@@ -45,7 +50,9 @@ def get_default_host(binding_type: str) -> str:
|
||||
) # fallback to ollama if unknown
|
||||
|
||||
|
||||
def get_env_value(env_key: str, default: any, value_type: type = str) -> any:
|
||||
def get_env_value(
|
||||
env_key: str, default: any, value_type: type = str, special_none: bool = False
|
||||
) -> any:
|
||||
"""
|
||||
Get value from environment variable with type conversion
|
||||
|
||||
@@ -53,6 +60,7 @@ def get_env_value(env_key: str, default: any, value_type: type = str) -> any:
|
||||
env_key (str): Environment variable key
|
||||
default (any): Default value if env variable is not set
|
||||
value_type (type): Type to convert the value to
|
||||
special_none (bool): If True, return None when value is "None"
|
||||
|
||||
Returns:
|
||||
any: Converted value from environment or default
|
||||
@@ -61,11 +69,15 @@ def get_env_value(env_key: str, default: any, value_type: type = str) -> any:
|
||||
if value is None:
|
||||
return default
|
||||
|
||||
# Handle special case for "None" string
|
||||
if special_none and value == "None":
|
||||
return None
|
||||
|
||||
if value_type is bool:
|
||||
return value.lower() in ("true", "1", "yes", "t", "on")
|
||||
try:
|
||||
return value_type(value)
|
||||
except ValueError:
|
||||
except (ValueError, TypeError):
|
||||
return default
|
||||
|
||||
|
||||
@@ -109,17 +121,10 @@ def parse_args() -> argparse.Namespace:
|
||||
help="Directory containing input documents (default: from env or ./inputs)",
|
||||
)
|
||||
|
||||
def timeout_type(value):
|
||||
if value is None:
|
||||
return 150
|
||||
if value is None or value == "None":
|
||||
return None
|
||||
return int(value)
|
||||
|
||||
parser.add_argument(
|
||||
"--timeout",
|
||||
default=get_env_value("TIMEOUT", None, timeout_type),
|
||||
type=timeout_type,
|
||||
default=get_env_value("TIMEOUT", DEFAULT_TIMEOUT, int, special_none=True),
|
||||
type=int,
|
||||
help="Timeout in seconds (useful when using slow AI). Use None for infinite timeout",
|
||||
)
|
||||
|
||||
@@ -226,7 +231,7 @@ def parse_args() -> argparse.Namespace:
|
||||
parser.add_argument(
|
||||
"--workers",
|
||||
type=int,
|
||||
default=get_env_value("WORKERS", 1, int),
|
||||
default=get_env_value("WORKERS", DEFAULT_WOKERS, int),
|
||||
help="Number of worker processes (default: from env or 1)",
|
||||
)
|
||||
|
||||
@@ -307,7 +312,7 @@ def parse_args() -> argparse.Namespace:
|
||||
|
||||
# Add environment variables that were previously read directly
|
||||
args.cors_origins = get_env_value("CORS_ORIGINS", "*")
|
||||
args.summary_language = get_env_value("SUMMARY_LANGUAGE", "en")
|
||||
args.summary_language = get_env_value("SUMMARY_LANGUAGE", "English")
|
||||
args.whitelist_paths = get_env_value("WHITELIST_PATHS", "/health,/api/*")
|
||||
|
||||
# For JWT Auth
|
||||
|
@@ -3,17 +3,24 @@ import os
|
||||
import logging
|
||||
from lightrag.kg.shared_storage import finalize_share_data
|
||||
from lightrag.utils import setup_logger
|
||||
from lightrag.api.config import get_env_value
|
||||
from lightrag.constants import (
|
||||
DEFAULT_LOG_MAX_BYTES,
|
||||
DEFAULT_LOG_BACKUP_COUNT,
|
||||
DEFAULT_LOG_FILENAME,
|
||||
)
|
||||
|
||||
|
||||
# Get log directory path from environment variable
|
||||
log_dir = os.getenv("LOG_DIR", os.getcwd())
|
||||
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
|
||||
log_file_path = os.path.abspath(os.path.join(log_dir, DEFAULT_LOG_FILENAME))
|
||||
|
||||
# Ensure log directory exists
|
||||
os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
|
||||
|
||||
# Get log file max size and backup count from environment variables
|
||||
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
|
||||
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
|
||||
log_max_bytes = get_env_value("LOG_MAX_BYTES", DEFAULT_LOG_MAX_BYTES, int)
|
||||
log_backup_count = get_env_value("LOG_BACKUP_COUNT", DEFAULT_LOG_BACKUP_COUNT, int)
|
||||
|
||||
# These variables will be set by run_with_gunicorn.py
|
||||
workers = None
|
||||
@@ -29,10 +36,6 @@ preload_app = True
|
||||
worker_class = "uvicorn.workers.UvicornWorker"
|
||||
|
||||
# Other Gunicorn configurations
|
||||
timeout = int(
|
||||
os.getenv("TIMEOUT", 150 * 2)
|
||||
) # Default 150s *2 to match run_with_gunicorn.py
|
||||
keepalive = int(os.getenv("KEEPALIVE", 5)) # Default 5s
|
||||
|
||||
# Logging configuration
|
||||
errorlog = os.getenv("ERROR_LOG", log_file_path) # Default write to lightrag.log
|
||||
|
@@ -26,12 +26,18 @@ from .config import (
|
||||
global_args,
|
||||
update_uvicorn_mode_config,
|
||||
get_default_host,
|
||||
get_env_value,
|
||||
)
|
||||
import sys
|
||||
from lightrag import LightRAG, __version__ as core_version
|
||||
from lightrag.api import __api_version__
|
||||
from lightrag.types import GPTKeywordExtractionFormat
|
||||
from lightrag.utils import EmbeddingFunc
|
||||
from lightrag.constants import (
|
||||
DEFAULT_LOG_MAX_BYTES,
|
||||
DEFAULT_LOG_BACKUP_COUNT,
|
||||
DEFAULT_LOG_FILENAME,
|
||||
)
|
||||
from lightrag.api.routers.document_routes import (
|
||||
DocumentManager,
|
||||
create_document_routes,
|
||||
@@ -514,14 +520,14 @@ def configure_logging():
|
||||
|
||||
# Get log directory path from environment variable
|
||||
log_dir = os.getenv("LOG_DIR", os.getcwd())
|
||||
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
|
||||
log_file_path = os.path.abspath(os.path.join(log_dir, DEFAULT_LOG_FILENAME))
|
||||
|
||||
print(f"\nLightRAG log file: {log_file_path}\n")
|
||||
os.makedirs(os.path.dirname(log_dir), exist_ok=True)
|
||||
|
||||
# Get log file max size and backup count from environment variables
|
||||
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
|
||||
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
|
||||
log_max_bytes = get_env_value("LOG_MAX_BYTES", DEFAULT_LOG_MAX_BYTES, int)
|
||||
log_backup_count = get_env_value("LOG_BACKUP_COUNT", DEFAULT_LOG_BACKUP_COUNT, int)
|
||||
|
||||
logging.config.dictConfig(
|
||||
{
|
||||
|
@@ -8,8 +8,13 @@ import sys
|
||||
import signal
|
||||
import pipmaster as pm
|
||||
from lightrag.api.utils_api import display_splash_screen, check_env_file
|
||||
from lightrag.api.config import global_args, get_env_value
|
||||
from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data
|
||||
from .config import global_args
|
||||
|
||||
from lightrag.constants import (
|
||||
DEFAULT_WOKERS,
|
||||
DEFAULT_TIMEOUT,
|
||||
)
|
||||
|
||||
|
||||
def check_and_install_dependencies():
|
||||
@@ -122,7 +127,7 @@ def main():
|
||||
gunicorn_config.workers = (
|
||||
global_args.workers
|
||||
if global_args.workers
|
||||
else int(os.getenv("WORKERS", 1))
|
||||
else get_env_value("WORKERS", DEFAULT_WOKERS, int)
|
||||
)
|
||||
|
||||
# Bind configuration prioritizes command line arguments
|
||||
@@ -134,7 +139,7 @@ def main():
|
||||
port = (
|
||||
global_args.port
|
||||
if global_args.port != 9621
|
||||
else int(os.getenv("PORT", 9621))
|
||||
else get_env_value("PORT", 9621, int)
|
||||
)
|
||||
gunicorn_config.bind = f"{host}:{port}"
|
||||
|
||||
@@ -149,11 +154,13 @@ def main():
|
||||
gunicorn_config.timeout = (
|
||||
global_args.timeout * 2
|
||||
if global_args.timeout is not None
|
||||
else int(os.getenv("TIMEOUT", 150 * 2))
|
||||
else get_env_value(
|
||||
"TIMEOUT", DEFAULT_TIMEOUT + 30, int, special_none=True
|
||||
)
|
||||
)
|
||||
|
||||
# Keepalive configuration
|
||||
gunicorn_config.keepalive = int(os.getenv("KEEPALIVE", 5))
|
||||
gunicorn_config.keepalive = get_env_value("KEEPALIVE", 5, int)
|
||||
|
||||
# SSL configuration prioritizes command line arguments
|
||||
if global_args.ssl or os.getenv("SSL", "").lower() in (
|
||||
@@ -202,7 +209,7 @@ def main():
|
||||
app = GunicornApp("")
|
||||
|
||||
# Force workers to be an integer and greater than 1 for multi-process mode
|
||||
workers_count = int(global_args.workers)
|
||||
workers_count = global_args.workers
|
||||
if workers_count > 1:
|
||||
# Set a flag to indicate we're in the main process
|
||||
os.environ["LIGHTRAG_MAIN_PROCESS"] = "1"
|
||||
|
@@ -9,11 +9,15 @@ import sys
|
||||
from ascii_colors import ASCIIColors
|
||||
from lightrag.api import __api_version__ as api_version
|
||||
from lightrag import __version__ as core_version
|
||||
from lightrag.constants import (
|
||||
DEFAULT_MAX_TOKEN_SUMMARY,
|
||||
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
|
||||
)
|
||||
from fastapi import HTTPException, Security, Request, status
|
||||
from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
|
||||
from starlette.status import HTTP_403_FORBIDDEN
|
||||
from .auth import auth_handler
|
||||
from .config import ollama_server_infos, global_args
|
||||
from .config import ollama_server_infos, global_args, get_env_value
|
||||
|
||||
|
||||
def check_env_file():
|
||||
@@ -264,9 +268,13 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
||||
ASCIIColors.white(" ├─ Top-K: ", end="")
|
||||
ASCIIColors.yellow(f"{args.top_k}")
|
||||
ASCIIColors.white(" ├─ Max Token Summary: ", end="")
|
||||
ASCIIColors.yellow(f"{int(os.getenv('MAX_TOKEN_SUMMARY', 500))}")
|
||||
ASCIIColors.yellow(
|
||||
f"{get_env_value('MAX_TOKEN_SUMMARY', DEFAULT_MAX_TOKEN_SUMMARY, int)}"
|
||||
)
|
||||
ASCIIColors.white(" └─ Force LLM Summary on Merge: ", end="")
|
||||
ASCIIColors.yellow(f"{int(os.getenv('FORCE_LLM_SUMMARY_ON_MERGE', 6))}")
|
||||
ASCIIColors.yellow(
|
||||
f"{get_env_value('FORCE_LLM_SUMMARY_ON_MERGE', DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, int)}"
|
||||
)
|
||||
|
||||
# System Configuration
|
||||
ASCIIColors.magenta("\n💾 Storage Configuration:")
|
||||
|
18
lightrag/constants.py
Normal file
18
lightrag/constants.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""
|
||||
Centralized configuration constants for LightRAG.
|
||||
|
||||
This module defines default values for configuration constants used across
|
||||
different parts of the LightRAG system. Centralizing these values ensures
|
||||
consistency and makes maintenance easier.
|
||||
"""
|
||||
|
||||
# Default values for environment variables
|
||||
DEFAULT_MAX_TOKEN_SUMMARY = 500
|
||||
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE = 6
|
||||
DEFAULT_WOKERS = 2
|
||||
DEFAULT_TIMEOUT = 150
|
||||
|
||||
# Logging configuration defaults
|
||||
DEFAULT_LOG_MAX_BYTES = 10485760 # Default 10MB
|
||||
DEFAULT_LOG_BACKUP_COUNT = 5 # Default 5 backups
|
||||
DEFAULT_LOG_FILENAME = "lightrag.log" # Default log filename
|
@@ -20,6 +20,11 @@ from typing import (
|
||||
List,
|
||||
Dict,
|
||||
)
|
||||
from lightrag.constants import (
|
||||
DEFAULT_MAX_TOKEN_SUMMARY,
|
||||
DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE,
|
||||
)
|
||||
from lightrag.api.config import get_env_value
|
||||
|
||||
from lightrag.kg import (
|
||||
STORAGES,
|
||||
@@ -119,10 +124,14 @@ class LightRAG:
|
||||
entity_extract_max_gleaning: int = field(default=1)
|
||||
"""Maximum number of entity extraction attempts for ambiguous content."""
|
||||
|
||||
summary_to_max_tokens: int = field(default=int(os.getenv("MAX_TOKEN_SUMMARY", 500)))
|
||||
summary_to_max_tokens: int = field(
|
||||
default=get_env_value("MAX_TOKEN_SUMMARY", DEFAULT_MAX_TOKEN_SUMMARY, int)
|
||||
)
|
||||
|
||||
force_llm_summary_on_merge: int = field(
|
||||
default=int(os.getenv("FORCE_LLM_SUMMARY_ON_MERGE", 6))
|
||||
default=get_env_value(
|
||||
"FORCE_LLM_SUMMARY_ON_MERGE", DEFAULT_FORCE_LLM_SUMMARY_ON_MERGE, int
|
||||
)
|
||||
)
|
||||
|
||||
# Text chunking
|
||||
@@ -245,7 +254,7 @@ class LightRAG:
|
||||
|
||||
addon_params: dict[str, Any] = field(
|
||||
default_factory=lambda: {
|
||||
"language": os.getenv("SUMMARY_LANGUAGE", PROMPTS["DEFAULT_LANGUAGE"])
|
||||
"language": get_env_value("SUMMARY_LANGUAGE", "English", str)
|
||||
}
|
||||
)
|
||||
|
||||
|
@@ -17,6 +17,12 @@ import xml.etree.ElementTree as ET
|
||||
import numpy as np
|
||||
from lightrag.prompt import PROMPTS
|
||||
from dotenv import load_dotenv
|
||||
from lightrag.constants import (
|
||||
DEFAULT_LOG_MAX_BYTES,
|
||||
DEFAULT_LOG_BACKUP_COUNT,
|
||||
DEFAULT_LOG_FILENAME,
|
||||
)
|
||||
from lightrag.api.config import get_env_value
|
||||
|
||||
# Use TYPE_CHECKING to avoid circular imports
|
||||
if TYPE_CHECKING:
|
||||
@@ -152,14 +158,14 @@ def setup_logger(
|
||||
# Get log file path
|
||||
if log_file_path is None:
|
||||
log_dir = os.getenv("LOG_DIR", os.getcwd())
|
||||
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
|
||||
log_file_path = os.path.abspath(os.path.join(log_dir, DEFAULT_LOG_FILENAME))
|
||||
|
||||
# Ensure log directory exists
|
||||
os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
|
||||
|
||||
# Get log file max size and backup count from environment variables
|
||||
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
|
||||
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
|
||||
log_max_bytes = get_env_value("LOG_MAX_BYTES", DEFAULT_LOG_MAX_BYTES, int)
|
||||
log_backup_count = get_env_value("LOG_BACKUP_COUNT", DEFAULT_LOG_BACKUP_COUNT, int)
|
||||
|
||||
try:
|
||||
# Add file handler
|
||||
|
Reference in New Issue
Block a user