Fix linting

This commit is contained in:
yangdx
2025-02-27 19:05:51 +08:00
parent 946095ef80
commit 64f22966a3
8 changed files with 196 additions and 112 deletions

View File

@@ -8,7 +8,7 @@ from lightrag.api.utils_api import parse_args
args = parse_args() args = parse_args()
# Determine worker count - from environment variable or command line arguments # Determine worker count - from environment variable or command line arguments
workers = int(os.getenv('WORKERS', args.workers)) workers = int(os.getenv("WORKERS", args.workers))
# If not specified, use CPU count * 2 + 1 (Gunicorn recommended configuration) # If not specified, use CPU count * 2 + 1 (Gunicorn recommended configuration)
if workers <= 1: if workers <= 1:
@@ -24,7 +24,7 @@ preload_app = True
worker_class = "uvicorn.workers.UvicornWorker" worker_class = "uvicorn.workers.UvicornWorker"
# Other Gunicorn configurations # Other Gunicorn configurations
timeout = int(os.getenv('TIMEOUT', 120)) timeout = int(os.getenv("TIMEOUT", 120))
keepalive = 5 keepalive = 5
# Optional SSL configuration # Optional SSL configuration
@@ -33,9 +33,10 @@ if args.ssl:
keyfile = args.ssl_keyfile keyfile = args.ssl_keyfile
# Logging configuration # Logging configuration
errorlog = os.getenv('ERROR_LOG', '-') # '-' means stderr errorlog = os.getenv("ERROR_LOG", "-") # '-' means stderr
accesslog = os.getenv('ACCESS_LOG', '-') # '-' means stderr accesslog = os.getenv("ACCESS_LOG", "-") # '-' means stderr
loglevel = os.getenv('LOG_LEVEL', 'info') loglevel = os.getenv("LOG_LEVEL", "info")
def on_starting(server): def on_starting(server):
""" """
@@ -50,9 +51,12 @@ def on_starting(server):
# Memory usage monitoring # Memory usage monitoring
try: try:
import psutil import psutil
process = psutil.Process(os.getpid()) process = psutil.Process(os.getpid())
memory_info = process.memory_info() memory_info = process.memory_info()
msg = f"Memory usage after initialization: {memory_info.rss / 1024 / 1024:.2f} MB" msg = (
f"Memory usage after initialization: {memory_info.rss / 1024 / 1024:.2f} MB"
)
print(msg) print(msg)
except ImportError: except ImportError:
print("psutil not installed, skipping memory usage reporting") print("psutil not installed, skipping memory usage reporting")
@@ -61,6 +65,7 @@ def on_starting(server):
print("Gunicorn initialization complete, forking workers...") print("Gunicorn initialization complete, forking workers...")
print("=" * 80) print("=" * 80)
def on_exit(server): def on_exit(server):
""" """
Executed when Gunicorn is shutting down. Executed when Gunicorn is shutting down.

View File

@@ -471,12 +471,13 @@ def configure_logging():
def main(): def main():
# Check if running under Gunicorn # Check if running under Gunicorn
if 'GUNICORN_CMD_ARGS' in os.environ: if "GUNICORN_CMD_ARGS" in os.environ:
# If started with Gunicorn, return directly as Gunicorn will call get_application # If started with Gunicorn, return directly as Gunicorn will call get_application
print("Running under Gunicorn - worker management handled by Gunicorn") print("Running under Gunicorn - worker management handled by Gunicorn")
return return
from multiprocessing import freeze_support from multiprocessing import freeze_support
freeze_support() freeze_support()
args = parse_args() args = parse_args()

View File

@@ -10,7 +10,11 @@ from lightrag.utils import (
logger, logger,
write_json, write_json,
) )
from .shared_storage import get_namespace_data, get_storage_lock, try_initialize_namespace from .shared_storage import (
get_namespace_data,
get_storage_lock,
try_initialize_namespace,
)
@final @final

View File

@@ -11,7 +11,12 @@ from lightrag.utils import (
) )
import pipmaster as pm import pipmaster as pm
from lightrag.base import BaseVectorStorage from lightrag.base import BaseVectorStorage
from .shared_storage import get_storage_lock, get_namespace_object, is_multiprocess, try_initialize_namespace from .shared_storage import (
get_storage_lock,
get_namespace_object,
is_multiprocess,
try_initialize_namespace,
)
if not pm.is_installed("nano-vectordb"): if not pm.is_installed("nano-vectordb"):
pm.install("nano-vectordb") pm.install("nano-vectordb")

View File

@@ -6,7 +6,12 @@ import numpy as np
from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
from lightrag.utils import logger from lightrag.utils import logger
from lightrag.base import BaseGraphStorage from lightrag.base import BaseGraphStorage
from .shared_storage import get_storage_lock, get_namespace_object, is_multiprocess, try_initialize_namespace from .shared_storage import (
get_storage_lock,
get_namespace_object,
is_multiprocess,
try_initialize_namespace,
)
import pipmaster as pm import pipmaster as pm
@@ -81,9 +86,7 @@ class NetworkXStorage(BaseGraphStorage):
if need_init: if need_init:
if is_multiprocess: if is_multiprocess:
preloaded_graph = NetworkXStorage.load_nx_graph( preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
self._graphml_xml_file
)
self._graph.value = preloaded_graph or nx.Graph() self._graph.value = preloaded_graph or nx.Graph()
if preloaded_graph: if preloaded_graph:
logger.info( logger.info(
@@ -92,9 +95,7 @@ class NetworkXStorage(BaseGraphStorage):
else: else:
logger.info("Created new empty graph") logger.info("Created new empty graph")
else: else:
preloaded_graph = NetworkXStorage.load_nx_graph( preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
self._graphml_xml_file
)
self._graph = preloaded_graph or nx.Graph() self._graph = preloaded_graph or nx.Graph()
if preloaded_graph: if preloaded_graph:
logger.info( logger.info(

View File

@@ -4,7 +4,7 @@ from multiprocessing.synchronize import Lock as ProcessLock
from threading import Lock as ThreadLock from threading import Lock as ThreadLock
from multiprocessing import Manager from multiprocessing import Manager
from typing import Any, Dict, Optional, Union from typing import Any, Dict, Optional, Union
from lightrag.utils import logger
# Define a direct print function for critical logs that must be visible in all processes # Define a direct print function for critical logs that must be visible in all processes
def direct_log(message, level="INFO"): def direct_log(message, level="INFO"):
@@ -14,6 +14,7 @@ def direct_log(message, level="INFO"):
""" """
print(f"{level}: {message}", file=sys.stderr, flush=True) print(f"{level}: {message}", file=sys.stderr, flush=True)
LockType = Union[ProcessLock, ThreadLock] LockType = Union[ProcessLock, ThreadLock]
_manager = None _manager = None
@@ -46,11 +47,21 @@ def initialize_share_data(workers: int = 1):
workers (int): Number of worker processes. If 1, single-process mode is used. workers (int): Number of worker processes. If 1, single-process mode is used.
If > 1, multi-process mode with shared memory is used. If > 1, multi-process mode with shared memory is used.
""" """
global _manager, is_multiprocess, is_multiprocess, _global_lock, _shared_dicts, _share_objects, _init_flags, _initialized global \
_manager, \
is_multiprocess, \
is_multiprocess, \
_global_lock, \
_shared_dicts, \
_share_objects, \
_init_flags, \
_initialized
# Check if already initialized # Check if already initialized
if _initialized: if _initialized:
direct_log(f"Process {os.getpid()} Shared-Data already initialized (multiprocess={is_multiprocess})") direct_log(
f"Process {os.getpid()} Shared-Data already initialized (multiprocess={is_multiprocess})"
)
return return
_manager = Manager() _manager = Manager()
@@ -62,8 +73,12 @@ def initialize_share_data(workers: int = 1):
# Create shared dictionaries with manager # Create shared dictionaries with manager
_shared_dicts = _manager.dict() _shared_dicts = _manager.dict()
_share_objects = _manager.dict() _share_objects = _manager.dict()
_init_flags = _manager.dict() # Use shared dictionary to store initialization flags _init_flags = (
direct_log(f"Process {os.getpid()} Shared-Data created for Multiple Process (workers={workers})") _manager.dict()
) # Use shared dictionary to store initialization flags
direct_log(
f"Process {os.getpid()} Shared-Data created for Multiple Process (workers={workers})"
)
else: else:
is_multiprocess = False is_multiprocess = False
_global_lock = ThreadLock() _global_lock = ThreadLock()
@@ -75,6 +90,7 @@ def initialize_share_data(workers: int = 1):
# Mark as initialized # Mark as initialized
_initialized = True _initialized = True
def try_initialize_namespace(namespace: str) -> bool: def try_initialize_namespace(namespace: str) -> bool:
""" """
Try to initialize a namespace. Returns True if the current process gets initialization permission. Try to initialize a namespace. Returns True if the current process gets initialization permission.
@@ -83,7 +99,10 @@ def try_initialize_namespace(namespace: str) -> bool:
global _init_flags, _manager global _init_flags, _manager
if _init_flags is None: if _init_flags is None:
direct_log(f"Error: try to create nanmespace before Shared-Data is initialized, pid={os.getpid()}", level="ERROR") direct_log(
f"Error: try to create nanmespace before Shared-Data is initialized, pid={os.getpid()}",
level="ERROR",
)
raise ValueError("Shared dictionaries not initialized") raise ValueError("Shared dictionaries not initialized")
if namespace not in _init_flags: if namespace not in _init_flags:
@@ -112,7 +131,10 @@ def get_namespace_object(namespace: str) -> Any:
"""Get an object for specific namespace""" """Get an object for specific namespace"""
if _share_objects is None: if _share_objects is None:
direct_log(f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}", level="ERROR") direct_log(
f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}",
level="ERROR",
)
raise ValueError("Shared dictionaries not initialized") raise ValueError("Shared dictionaries not initialized")
lock = _get_global_lock() lock = _get_global_lock()
@@ -123,14 +145,20 @@ def get_namespace_object(namespace: str) -> Any:
_share_objects[namespace] = _manager.Value("O", None) _share_objects[namespace] = _manager.Value("O", None)
else: else:
_share_objects[namespace] = None _share_objects[namespace] = None
direct_log(f"Created namespace({namespace}): type={type(_share_objects[namespace])}, pid={os.getpid()}") direct_log(
f"Created namespace({namespace}): type={type(_share_objects[namespace])}, pid={os.getpid()}"
)
return _share_objects[namespace] return _share_objects[namespace]
def get_namespace_data(namespace: str) -> Dict[str, Any]: def get_namespace_data(namespace: str) -> Dict[str, Any]:
"""get storage space for specific storage type(namespace)""" """get storage space for specific storage type(namespace)"""
if _shared_dicts is None: if _shared_dicts is None:
direct_log(f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}", level="ERROR") direct_log(
f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}",
level="ERROR",
)
raise ValueError("Shared dictionaries not initialized") raise ValueError("Shared dictionaries not initialized")
lock = _get_global_lock() lock = _get_global_lock()
@@ -140,7 +168,9 @@ def get_namespace_data(namespace: str) -> Dict[str, Any]:
_shared_dicts[namespace] = _manager.dict() _shared_dicts[namespace] = _manager.dict()
else: else:
_shared_dicts[namespace] = {} _shared_dicts[namespace] = {}
direct_log(f"Created namespace({namespace}): type={type(_shared_dicts[namespace])}, pid={os.getpid()}") direct_log(
f"Created namespace({namespace}): type={type(_shared_dicts[namespace])}, pid={os.getpid()}"
)
return _shared_dicts[namespace] return _shared_dicts[namespace]
@@ -160,14 +190,25 @@ def finalize_share_data():
In multi-process mode, it shuts down the Manager and releases all shared objects. In multi-process mode, it shuts down the Manager and releases all shared objects.
In single-process mode, it simply resets the global variables. In single-process mode, it simply resets the global variables.
""" """
global _manager, is_multiprocess, _global_lock, _shared_dicts, _share_objects, _init_flags, _initialized global \
_manager, \
is_multiprocess, \
_global_lock, \
_shared_dicts, \
_share_objects, \
_init_flags, \
_initialized
# Check if already initialized # Check if already initialized
if not _initialized: if not _initialized:
direct_log(f"Process {os.getpid()} storage data not initialized, nothing to finalize") direct_log(
f"Process {os.getpid()} storage data not initialized, nothing to finalize"
)
return return
direct_log(f"Process {os.getpid()} finalizing storage data (multiprocess={is_multiprocess})") direct_log(
f"Process {os.getpid()} finalizing storage data (multiprocess={is_multiprocess})"
)
# In multi-process mode, shut down the Manager # In multi-process mode, shut down the Manager
if is_multiprocess and _manager is not None: if is_multiprocess and _manager is not None:
@@ -184,7 +225,9 @@ def finalize_share_data():
_manager.shutdown() _manager.shutdown()
direct_log(f"Process {os.getpid()} Manager shutdown complete") direct_log(f"Process {os.getpid()} Manager shutdown complete")
except Exception as e: except Exception as e:
direct_log(f"Process {os.getpid()} Error shutting down Manager: {e}", level="ERROR") direct_log(
f"Process {os.getpid()} Error shutting down Manager: {e}", level="ERROR"
)
# Reset global variables # Reset global variables
_manager = None _manager = None

View File

@@ -271,7 +271,12 @@ class LightRAG:
set_logger(self.log_file_path, self.log_level) set_logger(self.log_file_path, self.log_level)
logger.info(f"Logger initialized for working directory: {self.working_dir}") logger.info(f"Logger initialized for working directory: {self.working_dir}")
from lightrag.kg.shared_storage import initialize_share_data, try_initialize_namespace, get_namespace_data from lightrag.kg.shared_storage import (
initialize_share_data,
try_initialize_namespace,
get_namespace_data,
)
initialize_share_data() initialize_share_data()
need_init = try_initialize_namespace("scan_progress") need_init = try_initialize_namespace("scan_progress")
@@ -286,9 +291,6 @@ class LightRAG:
"progress": 0, "progress": 0,
} }
) )
scan_progress = get_namespace_data("scan_progress")
logger.info(f"scan_progress type after update: {type(scan_progress)}")
logger.info(f"Scan_progres value after update: {scan_progress}")
if not os.path.exists(self.working_dir): if not os.path.exists(self.working_dir):
logger.info(f"Creating working directory {self.working_dir}") logger.info(f"Creating working directory {self.working_dir}")

View File

@@ -2,15 +2,16 @@
""" """
Start LightRAG server with Gunicorn Start LightRAG server with Gunicorn
""" """
import os import os
import sys import sys
import json import json
import signal import signal
import argparse import argparse
import subprocess
from lightrag.api.utils_api import parse_args, display_splash_screen from lightrag.api.utils_api import parse_args, display_splash_screen
from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data
# Signal handler for graceful shutdown # Signal handler for graceful shutdown
def signal_handler(sig, frame): def signal_handler(sig, frame):
print("\n\n" + "=" * 80) print("\n\n" + "=" * 80)
@@ -24,28 +25,25 @@ def signal_handler(sig, frame):
# Exit with success status # Exit with success status
sys.exit(0) sys.exit(0)
def main(): def main():
# Register signal handlers for graceful shutdown # Register signal handlers for graceful shutdown
signal.signal(signal.SIGINT, signal_handler) # Ctrl+C signal.signal(signal.SIGINT, signal_handler) # Ctrl+C
signal.signal(signal.SIGTERM, signal_handler) # kill command signal.signal(signal.SIGTERM, signal_handler) # kill command
# Create a parser to handle Gunicorn-specific parameters # Create a parser to handle Gunicorn-specific parameters
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(description="Start LightRAG server with Gunicorn")
description="Start LightRAG server with Gunicorn"
)
parser.add_argument( parser.add_argument(
"--workers", "--workers",
type=int, type=int,
help="Number of worker processes (overrides the default or config.ini setting)" help="Number of worker processes (overrides the default or config.ini setting)",
) )
parser.add_argument( parser.add_argument(
"--timeout", "--timeout", type=int, help="Worker timeout in seconds (default: 120)"
type=int,
help="Worker timeout in seconds (default: 120)"
) )
parser.add_argument( parser.add_argument(
"--log-level", "--log-level",
choices=["debug", "info", "warning", "error", "critical"], choices=["debug", "info", "warning", "error", "critical"],
help="Gunicorn log level" help="Gunicorn log level",
) )
# Parse Gunicorn-specific arguments # Parse Gunicorn-specific arguments
@@ -103,23 +101,47 @@ def main():
def load_config(self): def load_config(self):
# Define valid Gunicorn configuration options # Define valid Gunicorn configuration options
valid_options = { valid_options = {
'bind', 'workers', 'worker_class', 'timeout', 'keepalive', "bind",
'preload_app', 'errorlog', 'accesslog', 'loglevel', "workers",
'certfile', 'keyfile', 'limit_request_line', 'limit_request_fields', "worker_class",
'limit_request_field_size', 'graceful_timeout', 'max_requests', "timeout",
'max_requests_jitter' "keepalive",
"preload_app",
"errorlog",
"accesslog",
"loglevel",
"certfile",
"keyfile",
"limit_request_line",
"limit_request_fields",
"limit_request_field_size",
"graceful_timeout",
"max_requests",
"max_requests_jitter",
} }
# Special hooks that need to be set separately # Special hooks that need to be set separately
special_hooks = { special_hooks = {
'on_starting', 'on_reload', 'on_exit', 'pre_fork', 'post_fork', "on_starting",
'pre_exec', 'pre_request', 'post_request', 'worker_init', "on_reload",
'worker_exit', 'nworkers_changed', 'child_exit' "on_exit",
"pre_fork",
"post_fork",
"pre_exec",
"pre_request",
"post_request",
"worker_init",
"worker_exit",
"nworkers_changed",
"child_exit",
} }
# Import the gunicorn_config module directly # Import the gunicorn_config module directly
import importlib.util import importlib.util
spec = importlib.util.spec_from_file_location("gunicorn_config", "gunicorn_config.py")
spec = importlib.util.spec_from_file_location(
"gunicorn_config", "gunicorn_config.py"
)
self.config_module = importlib.util.module_from_spec(spec) self.config_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(self.config_module) spec.loader.exec_module(self.config_module)
@@ -147,13 +169,13 @@ def main():
def load(self): def load(self):
# Import the application # Import the application
from lightrag.api.lightrag_server import get_application from lightrag.api.lightrag_server import get_application
return get_application() return get_application()
# Create the application # Create the application
app = GunicornApp("") app = GunicornApp("")
# Directly call initialize_share_data with the correct workers value # Directly call initialize_share_data with the correct workers value
from lightrag.kg.shared_storage import initialize_share_data
# Force workers to be an integer and greater than 1 for multi-process mode # Force workers to be an integer and greater than 1 for multi-process mode
workers_count = int(args.workers) workers_count = int(args.workers)
@@ -168,5 +190,6 @@ def main():
print("\nStarting Gunicorn with direct Python API...") print("\nStarting Gunicorn with direct Python API...")
app.run() app.run()
if __name__ == "__main__": if __name__ == "__main__":
main() main()