Fix linting

This commit is contained in:
yangdx
2025-02-27 19:05:51 +08:00
parent 946095ef80
commit 64f22966a3
8 changed files with 196 additions and 112 deletions

View File

@@ -8,7 +8,7 @@ from lightrag.api.utils_api import parse_args
args = parse_args() args = parse_args()
# Determine worker count - from environment variable or command line arguments # Determine worker count - from environment variable or command line arguments
workers = int(os.getenv('WORKERS', args.workers)) workers = int(os.getenv("WORKERS", args.workers))
# If not specified, use CPU count * 2 + 1 (Gunicorn recommended configuration) # If not specified, use CPU count * 2 + 1 (Gunicorn recommended configuration)
if workers <= 1: if workers <= 1:
@@ -24,7 +24,7 @@ preload_app = True
worker_class = "uvicorn.workers.UvicornWorker" worker_class = "uvicorn.workers.UvicornWorker"
# Other Gunicorn configurations # Other Gunicorn configurations
timeout = int(os.getenv('TIMEOUT', 120)) timeout = int(os.getenv("TIMEOUT", 120))
keepalive = 5 keepalive = 5
# Optional SSL configuration # Optional SSL configuration
@@ -33,9 +33,10 @@ if args.ssl:
keyfile = args.ssl_keyfile keyfile = args.ssl_keyfile
# Logging configuration # Logging configuration
errorlog = os.getenv('ERROR_LOG', '-') # '-' means stderr errorlog = os.getenv("ERROR_LOG", "-") # '-' means stderr
accesslog = os.getenv('ACCESS_LOG', '-') # '-' means stderr accesslog = os.getenv("ACCESS_LOG", "-") # '-' means stderr
loglevel = os.getenv('LOG_LEVEL', 'info') loglevel = os.getenv("LOG_LEVEL", "info")
def on_starting(server): def on_starting(server):
""" """
@@ -46,21 +47,25 @@ def on_starting(server):
print(f"GUNICORN MASTER PROCESS: on_starting jobs for all {workers} workers") print(f"GUNICORN MASTER PROCESS: on_starting jobs for all {workers} workers")
print(f"Process ID: {os.getpid()}") print(f"Process ID: {os.getpid()}")
print("=" * 80) print("=" * 80)
# Memory usage monitoring # Memory usage monitoring
try: try:
import psutil import psutil
process = psutil.Process(os.getpid()) process = psutil.Process(os.getpid())
memory_info = process.memory_info() memory_info = process.memory_info()
msg = f"Memory usage after initialization: {memory_info.rss / 1024 / 1024:.2f} MB" msg = (
f"Memory usage after initialization: {memory_info.rss / 1024 / 1024:.2f} MB"
)
print(msg) print(msg)
except ImportError: except ImportError:
print("psutil not installed, skipping memory usage reporting") print("psutil not installed, skipping memory usage reporting")
print("=" * 80) print("=" * 80)
print("Gunicorn initialization complete, forking workers...") print("Gunicorn initialization complete, forking workers...")
print("=" * 80) print("=" * 80)
def on_exit(server): def on_exit(server):
""" """
Executed when Gunicorn is shutting down. Executed when Gunicorn is shutting down.
@@ -70,10 +75,10 @@ def on_exit(server):
print("GUNICORN MASTER PROCESS: Shutting down") print("GUNICORN MASTER PROCESS: Shutting down")
print(f"Process ID: {os.getpid()}") print(f"Process ID: {os.getpid()}")
print("=" * 80) print("=" * 80)
# Release shared resources # Release shared resources
finalize_share_data() finalize_share_data()
print("=" * 80) print("=" * 80)
print("Gunicorn shutdown complete") print("Gunicorn shutdown complete")
print("=" * 80) print("=" * 80)

View File

@@ -471,12 +471,13 @@ def configure_logging():
def main(): def main():
# Check if running under Gunicorn # Check if running under Gunicorn
if 'GUNICORN_CMD_ARGS' in os.environ: if "GUNICORN_CMD_ARGS" in os.environ:
# If started with Gunicorn, return directly as Gunicorn will call get_application # If started with Gunicorn, return directly as Gunicorn will call get_application
print("Running under Gunicorn - worker management handled by Gunicorn") print("Running under Gunicorn - worker management handled by Gunicorn")
return return
from multiprocessing import freeze_support from multiprocessing import freeze_support
freeze_support() freeze_support()
args = parse_args() args = parse_args()
@@ -487,10 +488,10 @@ def main():
configure_logging() configure_logging()
display_splash_screen(args) display_splash_screen(args)
# Create application instance directly instead of using factory function # Create application instance directly instead of using factory function
app = create_app(args) app = create_app(args)
# Start Uvicorn in single process mode # Start Uvicorn in single process mode
uvicorn_config = { uvicorn_config = {
"app": app, # Pass application instance directly instead of string path "app": app, # Pass application instance directly instead of string path
@@ -498,7 +499,7 @@ def main():
"port": args.port, "port": args.port,
"log_config": None, # Disable default config "log_config": None, # Disable default config
} }
if args.ssl: if args.ssl:
uvicorn_config.update( uvicorn_config.update(
{ {
@@ -506,7 +507,7 @@ def main():
"ssl_keyfile": args.ssl_keyfile, "ssl_keyfile": args.ssl_keyfile,
} }
) )
print(f"Starting Uvicorn server in single-process mode on {args.host}:{args.port}") print(f"Starting Uvicorn server in single-process mode on {args.host}:{args.port}")
uvicorn.run(**uvicorn_config) uvicorn.run(**uvicorn_config)

View File

@@ -10,7 +10,11 @@ from lightrag.utils import (
logger, logger,
write_json, write_json,
) )
from .shared_storage import get_namespace_data, get_storage_lock, try_initialize_namespace from .shared_storage import (
get_namespace_data,
get_storage_lock,
try_initialize_namespace,
)
@final @final
@@ -20,7 +24,7 @@ class JsonKVStorage(BaseKVStorage):
working_dir = self.global_config["working_dir"] working_dir = self.global_config["working_dir"]
self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json") self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
self._storage_lock = get_storage_lock() self._storage_lock = get_storage_lock()
# check need_init must before get_namespace_data # check need_init must before get_namespace_data
need_init = try_initialize_namespace(self.namespace) need_init = try_initialize_namespace(self.namespace)
self._data = get_namespace_data(self.namespace) self._data = get_namespace_data(self.namespace)

View File

@@ -11,7 +11,12 @@ from lightrag.utils import (
) )
import pipmaster as pm import pipmaster as pm
from lightrag.base import BaseVectorStorage from lightrag.base import BaseVectorStorage
from .shared_storage import get_storage_lock, get_namespace_object, is_multiprocess, try_initialize_namespace from .shared_storage import (
get_storage_lock,
get_namespace_object,
is_multiprocess,
try_initialize_namespace,
)
if not pm.is_installed("nano-vectordb"): if not pm.is_installed("nano-vectordb"):
pm.install("nano-vectordb") pm.install("nano-vectordb")

View File

@@ -6,7 +6,12 @@ import numpy as np
from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
from lightrag.utils import logger from lightrag.utils import logger
from lightrag.base import BaseGraphStorage from lightrag.base import BaseGraphStorage
from .shared_storage import get_storage_lock, get_namespace_object, is_multiprocess, try_initialize_namespace from .shared_storage import (
get_storage_lock,
get_namespace_object,
is_multiprocess,
try_initialize_namespace,
)
import pipmaster as pm import pipmaster as pm
@@ -74,16 +79,14 @@ class NetworkXStorage(BaseGraphStorage):
self.global_config["working_dir"], f"graph_{self.namespace}.graphml" self.global_config["working_dir"], f"graph_{self.namespace}.graphml"
) )
self._storage_lock = get_storage_lock() self._storage_lock = get_storage_lock()
# check need_init must before get_namespace_object # check need_init must before get_namespace_object
need_init = try_initialize_namespace(self.namespace) need_init = try_initialize_namespace(self.namespace)
self._graph = get_namespace_object(self.namespace) self._graph = get_namespace_object(self.namespace)
if need_init: if need_init:
if is_multiprocess: if is_multiprocess:
preloaded_graph = NetworkXStorage.load_nx_graph( preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
self._graphml_xml_file
)
self._graph.value = preloaded_graph or nx.Graph() self._graph.value = preloaded_graph or nx.Graph()
if preloaded_graph: if preloaded_graph:
logger.info( logger.info(
@@ -92,9 +95,7 @@ class NetworkXStorage(BaseGraphStorage):
else: else:
logger.info("Created new empty graph") logger.info("Created new empty graph")
else: else:
preloaded_graph = NetworkXStorage.load_nx_graph( preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
self._graphml_xml_file
)
self._graph = preloaded_graph or nx.Graph() self._graph = preloaded_graph or nx.Graph()
if preloaded_graph: if preloaded_graph:
logger.info( logger.info(

View File

@@ -4,16 +4,17 @@ from multiprocessing.synchronize import Lock as ProcessLock
from threading import Lock as ThreadLock from threading import Lock as ThreadLock
from multiprocessing import Manager from multiprocessing import Manager
from typing import Any, Dict, Optional, Union from typing import Any, Dict, Optional, Union
from lightrag.utils import logger
# Define a direct print function for critical logs that must be visible in all processes # Define a direct print function for critical logs that must be visible in all processes
def direct_log(message, level="INFO"): def direct_log(message, level="INFO"):
""" """
Log a message directly to stderr to ensure visibility in all processes, Log a message directly to stderr to ensure visibility in all processes,
including the Gunicorn master process. including the Gunicorn master process.
""" """
print(f"{level}: {message}", file=sys.stderr, flush=True) print(f"{level}: {message}", file=sys.stderr, flush=True)
LockType = Union[ProcessLock, ThreadLock] LockType = Union[ProcessLock, ThreadLock]
_manager = None _manager = None
@@ -31,39 +32,53 @@ _global_lock: Optional[LockType] = None
def initialize_share_data(workers: int = 1): def initialize_share_data(workers: int = 1):
""" """
Initialize shared storage data for single or multi-process mode. Initialize shared storage data for single or multi-process mode.
When used with Gunicorn's preload feature, this function is called once in the When used with Gunicorn's preload feature, this function is called once in the
master process before forking worker processes, allowing all workers to share master process before forking worker processes, allowing all workers to share
the same initialized data. the same initialized data.
In single-process mode, this function is called during LightRAG object initialization. In single-process mode, this function is called during LightRAG object initialization.
The function determines whether to use cross-process shared variables for data storage The function determines whether to use cross-process shared variables for data storage
based on the number of workers. If workers=1, it uses thread locks and local dictionaries. based on the number of workers. If workers=1, it uses thread locks and local dictionaries.
If workers>1, it uses process locks and shared dictionaries managed by multiprocessing.Manager. If workers>1, it uses process locks and shared dictionaries managed by multiprocessing.Manager.
Args: Args:
workers (int): Number of worker processes. If 1, single-process mode is used. workers (int): Number of worker processes. If 1, single-process mode is used.
If > 1, multi-process mode with shared memory is used. If > 1, multi-process mode with shared memory is used.
""" """
global _manager, is_multiprocess, is_multiprocess, _global_lock, _shared_dicts, _share_objects, _init_flags, _initialized global \
_manager, \
is_multiprocess, \
is_multiprocess, \
_global_lock, \
_shared_dicts, \
_share_objects, \
_init_flags, \
_initialized
# Check if already initialized # Check if already initialized
if _initialized: if _initialized:
direct_log(f"Process {os.getpid()} Shared-Data already initialized (multiprocess={is_multiprocess})") direct_log(
f"Process {os.getpid()} Shared-Data already initialized (multiprocess={is_multiprocess})"
)
return return
_manager = Manager() _manager = Manager()
# Force multi-process mode if workers > 1 # Force multi-process mode if workers > 1
if workers > 1: if workers > 1:
is_multiprocess = True is_multiprocess = True
_global_lock = _manager.Lock() _global_lock = _manager.Lock()
# Create shared dictionaries with manager # Create shared dictionaries with manager
_shared_dicts = _manager.dict() _shared_dicts = _manager.dict()
_share_objects = _manager.dict() _share_objects = _manager.dict()
_init_flags = _manager.dict() # Use shared dictionary to store initialization flags _init_flags = (
direct_log(f"Process {os.getpid()} Shared-Data created for Multiple Process (workers={workers})") _manager.dict()
) # Use shared dictionary to store initialization flags
direct_log(
f"Process {os.getpid()} Shared-Data created for Multiple Process (workers={workers})"
)
else: else:
is_multiprocess = False is_multiprocess = False
_global_lock = ThreadLock() _global_lock = ThreadLock()
@@ -75,6 +90,7 @@ def initialize_share_data(workers: int = 1):
# Mark as initialized # Mark as initialized
_initialized = True _initialized = True
def try_initialize_namespace(namespace: str) -> bool: def try_initialize_namespace(namespace: str) -> bool:
""" """
Try to initialize a namespace. Returns True if the current process gets initialization permission. Try to initialize a namespace. Returns True if the current process gets initialization permission.
@@ -83,8 +99,11 @@ def try_initialize_namespace(namespace: str) -> bool:
global _init_flags, _manager global _init_flags, _manager
if _init_flags is None: if _init_flags is None:
direct_log(f"Error: try to create nanmespace before Shared-Data is initialized, pid={os.getpid()}", level="ERROR") direct_log(
raise ValueError("Shared dictionaries not initialized") f"Error: try to create nanmespace before Shared-Data is initialized, pid={os.getpid()}",
level="ERROR",
)
raise ValueError("Shared dictionaries not initialized")
if namespace not in _init_flags: if namespace not in _init_flags:
_init_flags[namespace] = True _init_flags[namespace] = True
@@ -112,7 +131,10 @@ def get_namespace_object(namespace: str) -> Any:
"""Get an object for specific namespace""" """Get an object for specific namespace"""
if _share_objects is None: if _share_objects is None:
direct_log(f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}", level="ERROR") direct_log(
f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}",
level="ERROR",
)
raise ValueError("Shared dictionaries not initialized") raise ValueError("Shared dictionaries not initialized")
lock = _get_global_lock() lock = _get_global_lock()
@@ -123,14 +145,20 @@ def get_namespace_object(namespace: str) -> Any:
_share_objects[namespace] = _manager.Value("O", None) _share_objects[namespace] = _manager.Value("O", None)
else: else:
_share_objects[namespace] = None _share_objects[namespace] = None
direct_log(f"Created namespace({namespace}): type={type(_share_objects[namespace])}, pid={os.getpid()}") direct_log(
f"Created namespace({namespace}): type={type(_share_objects[namespace])}, pid={os.getpid()}"
)
return _share_objects[namespace] return _share_objects[namespace]
def get_namespace_data(namespace: str) -> Dict[str, Any]: def get_namespace_data(namespace: str) -> Dict[str, Any]:
"""get storage space for specific storage type(namespace)""" """get storage space for specific storage type(namespace)"""
if _shared_dicts is None: if _shared_dicts is None:
direct_log(f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}", level="ERROR") direct_log(
f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}",
level="ERROR",
)
raise ValueError("Shared dictionaries not initialized") raise ValueError("Shared dictionaries not initialized")
lock = _get_global_lock() lock = _get_global_lock()
@@ -140,8 +168,10 @@ def get_namespace_data(namespace: str) -> Dict[str, Any]:
_shared_dicts[namespace] = _manager.dict() _shared_dicts[namespace] = _manager.dict()
else: else:
_shared_dicts[namespace] = {} _shared_dicts[namespace] = {}
direct_log(f"Created namespace({namespace}): type={type(_shared_dicts[namespace])}, pid={os.getpid()}") direct_log(
f"Created namespace({namespace}): type={type(_shared_dicts[namespace])}, pid={os.getpid()}"
)
return _shared_dicts[namespace] return _shared_dicts[namespace]
@@ -153,22 +183,33 @@ def get_scan_progress() -> Dict[str, Any]:
def finalize_share_data(): def finalize_share_data():
""" """
Release shared resources and clean up. Release shared resources and clean up.
This function should be called when the application is shutting down This function should be called when the application is shutting down
to properly release shared resources and avoid memory leaks. to properly release shared resources and avoid memory leaks.
In multi-process mode, it shuts down the Manager and releases all shared objects. In multi-process mode, it shuts down the Manager and releases all shared objects.
In single-process mode, it simply resets the global variables. In single-process mode, it simply resets the global variables.
""" """
global _manager, is_multiprocess, _global_lock, _shared_dicts, _share_objects, _init_flags, _initialized global \
_manager, \
is_multiprocess, \
_global_lock, \
_shared_dicts, \
_share_objects, \
_init_flags, \
_initialized
# Check if already initialized # Check if already initialized
if not _initialized: if not _initialized:
direct_log(f"Process {os.getpid()} storage data not initialized, nothing to finalize") direct_log(
f"Process {os.getpid()} storage data not initialized, nothing to finalize"
)
return return
direct_log(f"Process {os.getpid()} finalizing storage data (multiprocess={is_multiprocess})") direct_log(
f"Process {os.getpid()} finalizing storage data (multiprocess={is_multiprocess})"
)
# In multi-process mode, shut down the Manager # In multi-process mode, shut down the Manager
if is_multiprocess and _manager is not None: if is_multiprocess and _manager is not None:
try: try:
@@ -179,13 +220,15 @@ def finalize_share_data():
_share_objects.clear() _share_objects.clear()
if _init_flags is not None: if _init_flags is not None:
_init_flags.clear() _init_flags.clear()
# Shut down the Manager # Shut down the Manager
_manager.shutdown() _manager.shutdown()
direct_log(f"Process {os.getpid()} Manager shutdown complete") direct_log(f"Process {os.getpid()} Manager shutdown complete")
except Exception as e: except Exception as e:
direct_log(f"Process {os.getpid()} Error shutting down Manager: {e}", level="ERROR") direct_log(
f"Process {os.getpid()} Error shutting down Manager: {e}", level="ERROR"
)
# Reset global variables # Reset global variables
_manager = None _manager = None
_initialized = None _initialized = None
@@ -194,5 +237,5 @@ def finalize_share_data():
_share_objects = None _share_objects = None
_init_flags = None _init_flags = None
_global_lock = None _global_lock = None
direct_log(f"Process {os.getpid()} storage data finalization complete") direct_log(f"Process {os.getpid()} storage data finalization complete")

View File

@@ -271,12 +271,17 @@ class LightRAG:
set_logger(self.log_file_path, self.log_level) set_logger(self.log_file_path, self.log_level)
logger.info(f"Logger initialized for working directory: {self.working_dir}") logger.info(f"Logger initialized for working directory: {self.working_dir}")
from lightrag.kg.shared_storage import initialize_share_data, try_initialize_namespace, get_namespace_data from lightrag.kg.shared_storage import (
initialize_share_data,
try_initialize_namespace,
get_namespace_data,
)
initialize_share_data() initialize_share_data()
need_init = try_initialize_namespace("scan_progress") need_init = try_initialize_namespace("scan_progress")
scan_progress = get_namespace_data("scan_progress") scan_progress = get_namespace_data("scan_progress")
logger.info(f"scan_progress type after init: {type(scan_progress)}") logger.info(f"scan_progress type after init: {type(scan_progress)}")
scan_progress.update( scan_progress.update(
{ {
"is_scanning": False, "is_scanning": False,
@@ -286,9 +291,6 @@ class LightRAG:
"progress": 0, "progress": 0,
} }
) )
scan_progress = get_namespace_data("scan_progress")
logger.info(f"scan_progress type after update: {type(scan_progress)}")
logger.info(f"Scan_progres value after update: {scan_progress}")
if not os.path.exists(self.working_dir): if not os.path.exists(self.working_dir):
logger.info(f"Creating working directory {self.working_dir}") logger.info(f"Creating working directory {self.working_dir}")

View File

@@ -2,127 +2,149 @@
""" """
Start LightRAG server with Gunicorn Start LightRAG server with Gunicorn
""" """
import os import os
import sys import sys
import json import json
import signal import signal
import argparse import argparse
import subprocess
from lightrag.api.utils_api import parse_args, display_splash_screen from lightrag.api.utils_api import parse_args, display_splash_screen
from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data
# Signal handler for graceful shutdown # Signal handler for graceful shutdown
def signal_handler(sig, frame): def signal_handler(sig, frame):
print("\n\n" + "="*80) print("\n\n" + "=" * 80)
print("RECEIVED TERMINATION SIGNAL") print("RECEIVED TERMINATION SIGNAL")
print(f"Process ID: {os.getpid()}") print(f"Process ID: {os.getpid()}")
print("="*80 + "\n") print("=" * 80 + "\n")
# Release shared resources # Release shared resources
finalize_share_data() finalize_share_data()
# Exit with success status # Exit with success status
sys.exit(0) sys.exit(0)
def main(): def main():
# Register signal handlers for graceful shutdown # Register signal handlers for graceful shutdown
signal.signal(signal.SIGINT, signal_handler) # Ctrl+C signal.signal(signal.SIGINT, signal_handler) # Ctrl+C
signal.signal(signal.SIGTERM, signal_handler) # kill command signal.signal(signal.SIGTERM, signal_handler) # kill command
# Create a parser to handle Gunicorn-specific parameters # Create a parser to handle Gunicorn-specific parameters
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(description="Start LightRAG server with Gunicorn")
description="Start LightRAG server with Gunicorn"
)
parser.add_argument( parser.add_argument(
"--workers", "--workers",
type=int, type=int,
help="Number of worker processes (overrides the default or config.ini setting)" help="Number of worker processes (overrides the default or config.ini setting)",
) )
parser.add_argument( parser.add_argument(
"--timeout", "--timeout", type=int, help="Worker timeout in seconds (default: 120)"
type=int,
help="Worker timeout in seconds (default: 120)"
) )
parser.add_argument( parser.add_argument(
"--log-level", "--log-level",
choices=["debug", "info", "warning", "error", "critical"], choices=["debug", "info", "warning", "error", "critical"],
help="Gunicorn log level" help="Gunicorn log level",
) )
# Parse Gunicorn-specific arguments # Parse Gunicorn-specific arguments
gunicorn_args, remaining_args = parser.parse_known_args() gunicorn_args, remaining_args = parser.parse_known_args()
# Pass remaining arguments to LightRAG's parse_args # Pass remaining arguments to LightRAG's parse_args
sys.argv = [sys.argv[0]] + remaining_args sys.argv = [sys.argv[0]] + remaining_args
args = parse_args() args = parse_args()
# If workers specified, override args value # If workers specified, override args value
if gunicorn_args.workers: if gunicorn_args.workers:
args.workers = gunicorn_args.workers args.workers = gunicorn_args.workers
os.environ["WORKERS"] = str(gunicorn_args.workers) os.environ["WORKERS"] = str(gunicorn_args.workers)
# If timeout specified, set environment variable # If timeout specified, set environment variable
if gunicorn_args.timeout: if gunicorn_args.timeout:
os.environ["TIMEOUT"] = str(gunicorn_args.timeout) os.environ["TIMEOUT"] = str(gunicorn_args.timeout)
# If log-level specified, set environment variable # If log-level specified, set environment variable
if gunicorn_args.log_level: if gunicorn_args.log_level:
os.environ["LOG_LEVEL"] = gunicorn_args.log_level os.environ["LOG_LEVEL"] = gunicorn_args.log_level
# Save all LightRAG args to environment variable for worker processes # Save all LightRAG args to environment variable for worker processes
# This is the key step for passing arguments to lightrag_server.py # This is the key step for passing arguments to lightrag_server.py
os.environ["LIGHTRAG_ARGS"] = json.dumps(vars(args)) os.environ["LIGHTRAG_ARGS"] = json.dumps(vars(args))
# Display startup information # Display startup information
display_splash_screen(args) display_splash_screen(args)
print("🚀 Starting LightRAG with Gunicorn") print("🚀 Starting LightRAG with Gunicorn")
print(f"🔄 Worker management: Gunicorn (workers={args.workers})") print(f"🔄 Worker management: Gunicorn (workers={args.workers})")
print("🔍 Preloading app: Enabled") print("🔍 Preloading app: Enabled")
print("📝 Note: Using Gunicorn's preload feature for shared data initialization") print("📝 Note: Using Gunicorn's preload feature for shared data initialization")
print("\n\n" + "="*80) print("\n\n" + "=" * 80)
print("MAIN PROCESS INITIALIZATION") print("MAIN PROCESS INITIALIZATION")
print(f"Process ID: {os.getpid()}") print(f"Process ID: {os.getpid()}")
print(f"Workers setting: {args.workers}") print(f"Workers setting: {args.workers}")
print("="*80 + "\n") print("=" * 80 + "\n")
# Start application with Gunicorn using direct Python API # Start application with Gunicorn using direct Python API
# Ensure WORKERS environment variable is set before importing gunicorn_config # Ensure WORKERS environment variable is set before importing gunicorn_config
if args.workers > 1: if args.workers > 1:
os.environ["WORKERS"] = str(args.workers) os.environ["WORKERS"] = str(args.workers)
# Import Gunicorn's StandaloneApplication # Import Gunicorn's StandaloneApplication
from gunicorn.app.base import BaseApplication from gunicorn.app.base import BaseApplication
# Define a custom application class that loads our config # Define a custom application class that loads our config
class GunicornApp(BaseApplication): class GunicornApp(BaseApplication):
def __init__(self, app, options=None): def __init__(self, app, options=None):
self.options = options or {} self.options = options or {}
self.application = app self.application = app
super().__init__() super().__init__()
def load_config(self): def load_config(self):
# Define valid Gunicorn configuration options # Define valid Gunicorn configuration options
valid_options = { valid_options = {
'bind', 'workers', 'worker_class', 'timeout', 'keepalive', "bind",
'preload_app', 'errorlog', 'accesslog', 'loglevel', "workers",
'certfile', 'keyfile', 'limit_request_line', 'limit_request_fields', "worker_class",
'limit_request_field_size', 'graceful_timeout', 'max_requests', "timeout",
'max_requests_jitter' "keepalive",
"preload_app",
"errorlog",
"accesslog",
"loglevel",
"certfile",
"keyfile",
"limit_request_line",
"limit_request_fields",
"limit_request_field_size",
"graceful_timeout",
"max_requests",
"max_requests_jitter",
} }
# Special hooks that need to be set separately # Special hooks that need to be set separately
special_hooks = { special_hooks = {
'on_starting', 'on_reload', 'on_exit', 'pre_fork', 'post_fork', "on_starting",
'pre_exec', 'pre_request', 'post_request', 'worker_init', "on_reload",
'worker_exit', 'nworkers_changed', 'child_exit' "on_exit",
"pre_fork",
"post_fork",
"pre_exec",
"pre_request",
"post_request",
"worker_init",
"worker_exit",
"nworkers_changed",
"child_exit",
} }
# Import the gunicorn_config module directly # Import the gunicorn_config module directly
import importlib.util import importlib.util
spec = importlib.util.spec_from_file_location("gunicorn_config", "gunicorn_config.py")
spec = importlib.util.spec_from_file_location(
"gunicorn_config", "gunicorn_config.py"
)
self.config_module = importlib.util.module_from_spec(spec) self.config_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(self.config_module) spec.loader.exec_module(self.config_module)
# Set configuration options # Set configuration options
for key in dir(self.config_module): for key in dir(self.config_module):
if key in valid_options: if key in valid_options:
@@ -135,7 +157,7 @@ def main():
value = getattr(self.config_module, key) value = getattr(self.config_module, key)
if callable(value): if callable(value):
self.cfg.set(key, value) self.cfg.set(key, value)
# Override with command line arguments if provided # Override with command line arguments if provided
if gunicorn_args.workers: if gunicorn_args.workers:
self.cfg.set("workers", gunicorn_args.workers) self.cfg.set("workers", gunicorn_args.workers)
@@ -143,18 +165,18 @@ def main():
self.cfg.set("timeout", gunicorn_args.timeout) self.cfg.set("timeout", gunicorn_args.timeout)
if gunicorn_args.log_level: if gunicorn_args.log_level:
self.cfg.set("loglevel", gunicorn_args.log_level) self.cfg.set("loglevel", gunicorn_args.log_level)
def load(self): def load(self):
# Import the application # Import the application
from lightrag.api.lightrag_server import get_application from lightrag.api.lightrag_server import get_application
return get_application() return get_application()
# Create the application # Create the application
app = GunicornApp("") app = GunicornApp("")
# Directly call initialize_share_data with the correct workers value # Directly call initialize_share_data with the correct workers value
from lightrag.kg.shared_storage import initialize_share_data
# Force workers to be an integer and greater than 1 for multi-process mode # Force workers to be an integer and greater than 1 for multi-process mode
workers_count = int(args.workers) workers_count = int(args.workers)
if workers_count > 1: if workers_count > 1:
@@ -163,10 +185,11 @@ def main():
initialize_share_data(workers_count) initialize_share_data(workers_count)
else: else:
initialize_share_data(1) initialize_share_data(1)
# Run the application # Run the application
print("\nStarting Gunicorn with direct Python API...") print("\nStarting Gunicorn with direct Python API...")
app.run() app.run()
if __name__ == "__main__": if __name__ == "__main__":
main() main()