Fix linting

This commit is contained in:
yangdx
2025-02-27 19:05:51 +08:00
parent 946095ef80
commit 64f22966a3
8 changed files with 196 additions and 112 deletions

View File

@@ -8,7 +8,7 @@ from lightrag.api.utils_api import parse_args
args = parse_args()
# Determine worker count - from environment variable or command line arguments
workers = int(os.getenv('WORKERS', args.workers))
workers = int(os.getenv("WORKERS", args.workers))
# If not specified, use CPU count * 2 + 1 (Gunicorn recommended configuration)
if workers <= 1:
@@ -24,7 +24,7 @@ preload_app = True
worker_class = "uvicorn.workers.UvicornWorker"
# Other Gunicorn configurations
timeout = int(os.getenv('TIMEOUT', 120))
timeout = int(os.getenv("TIMEOUT", 120))
keepalive = 5
# Optional SSL configuration
@@ -33,9 +33,10 @@ if args.ssl:
keyfile = args.ssl_keyfile
# Logging configuration
errorlog = os.getenv('ERROR_LOG', '-') # '-' means stderr
accesslog = os.getenv('ACCESS_LOG', '-') # '-' means stderr
loglevel = os.getenv('LOG_LEVEL', 'info')
errorlog = os.getenv("ERROR_LOG", "-") # '-' means stderr
accesslog = os.getenv("ACCESS_LOG", "-") # '-' means stderr
loglevel = os.getenv("LOG_LEVEL", "info")
def on_starting(server):
"""
@@ -50,9 +51,12 @@ def on_starting(server):
# Memory usage monitoring
try:
import psutil
process = psutil.Process(os.getpid())
memory_info = process.memory_info()
msg = f"Memory usage after initialization: {memory_info.rss / 1024 / 1024:.2f} MB"
msg = (
f"Memory usage after initialization: {memory_info.rss / 1024 / 1024:.2f} MB"
)
print(msg)
except ImportError:
print("psutil not installed, skipping memory usage reporting")
@@ -61,6 +65,7 @@ def on_starting(server):
print("Gunicorn initialization complete, forking workers...")
print("=" * 80)
def on_exit(server):
"""
Executed when Gunicorn is shutting down.

View File

@@ -471,12 +471,13 @@ def configure_logging():
def main():
# Check if running under Gunicorn
if 'GUNICORN_CMD_ARGS' in os.environ:
if "GUNICORN_CMD_ARGS" in os.environ:
# If started with Gunicorn, return directly as Gunicorn will call get_application
print("Running under Gunicorn - worker management handled by Gunicorn")
return
from multiprocessing import freeze_support
freeze_support()
args = parse_args()

View File

@@ -10,7 +10,11 @@ from lightrag.utils import (
logger,
write_json,
)
from .shared_storage import get_namespace_data, get_storage_lock, try_initialize_namespace
from .shared_storage import (
get_namespace_data,
get_storage_lock,
try_initialize_namespace,
)
@final

View File

@@ -11,7 +11,12 @@ from lightrag.utils import (
)
import pipmaster as pm
from lightrag.base import BaseVectorStorage
from .shared_storage import get_storage_lock, get_namespace_object, is_multiprocess, try_initialize_namespace
from .shared_storage import (
get_storage_lock,
get_namespace_object,
is_multiprocess,
try_initialize_namespace,
)
if not pm.is_installed("nano-vectordb"):
pm.install("nano-vectordb")

View File

@@ -6,7 +6,12 @@ import numpy as np
from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
from lightrag.utils import logger
from lightrag.base import BaseGraphStorage
from .shared_storage import get_storage_lock, get_namespace_object, is_multiprocess, try_initialize_namespace
from .shared_storage import (
get_storage_lock,
get_namespace_object,
is_multiprocess,
try_initialize_namespace,
)
import pipmaster as pm
@@ -81,9 +86,7 @@ class NetworkXStorage(BaseGraphStorage):
if need_init:
if is_multiprocess:
preloaded_graph = NetworkXStorage.load_nx_graph(
self._graphml_xml_file
)
preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
self._graph.value = preloaded_graph or nx.Graph()
if preloaded_graph:
logger.info(
@@ -92,9 +95,7 @@ class NetworkXStorage(BaseGraphStorage):
else:
logger.info("Created new empty graph")
else:
preloaded_graph = NetworkXStorage.load_nx_graph(
self._graphml_xml_file
)
preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
self._graph = preloaded_graph or nx.Graph()
if preloaded_graph:
logger.info(

View File

@@ -4,7 +4,7 @@ from multiprocessing.synchronize import Lock as ProcessLock
from threading import Lock as ThreadLock
from multiprocessing import Manager
from typing import Any, Dict, Optional, Union
from lightrag.utils import logger
# Define a direct print function for critical logs that must be visible in all processes
def direct_log(message, level="INFO"):
@@ -14,6 +14,7 @@ def direct_log(message, level="INFO"):
"""
print(f"{level}: {message}", file=sys.stderr, flush=True)
LockType = Union[ProcessLock, ThreadLock]
_manager = None
@@ -46,11 +47,21 @@ def initialize_share_data(workers: int = 1):
workers (int): Number of worker processes. If 1, single-process mode is used.
If > 1, multi-process mode with shared memory is used.
"""
global _manager, is_multiprocess, is_multiprocess, _global_lock, _shared_dicts, _share_objects, _init_flags, _initialized
global \
_manager, \
is_multiprocess, \
is_multiprocess, \
_global_lock, \
_shared_dicts, \
_share_objects, \
_init_flags, \
_initialized
# Check if already initialized
if _initialized:
direct_log(f"Process {os.getpid()} Shared-Data already initialized (multiprocess={is_multiprocess})")
direct_log(
f"Process {os.getpid()} Shared-Data already initialized (multiprocess={is_multiprocess})"
)
return
_manager = Manager()
@@ -62,8 +73,12 @@ def initialize_share_data(workers: int = 1):
# Create shared dictionaries with manager
_shared_dicts = _manager.dict()
_share_objects = _manager.dict()
_init_flags = _manager.dict() # Use shared dictionary to store initialization flags
direct_log(f"Process {os.getpid()} Shared-Data created for Multiple Process (workers={workers})")
_init_flags = (
_manager.dict()
) # Use shared dictionary to store initialization flags
direct_log(
f"Process {os.getpid()} Shared-Data created for Multiple Process (workers={workers})"
)
else:
is_multiprocess = False
_global_lock = ThreadLock()
@@ -75,6 +90,7 @@ def initialize_share_data(workers: int = 1):
# Mark as initialized
_initialized = True
def try_initialize_namespace(namespace: str) -> bool:
"""
Try to initialize a namespace. Returns True if the current process gets initialization permission.
@@ -83,7 +99,10 @@ def try_initialize_namespace(namespace: str) -> bool:
global _init_flags, _manager
if _init_flags is None:
direct_log(f"Error: try to create nanmespace before Shared-Data is initialized, pid={os.getpid()}", level="ERROR")
direct_log(
f"Error: try to create nanmespace before Shared-Data is initialized, pid={os.getpid()}",
level="ERROR",
)
raise ValueError("Shared dictionaries not initialized")
if namespace not in _init_flags:
@@ -112,7 +131,10 @@ def get_namespace_object(namespace: str) -> Any:
"""Get an object for specific namespace"""
if _share_objects is None:
direct_log(f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}", level="ERROR")
direct_log(
f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}",
level="ERROR",
)
raise ValueError("Shared dictionaries not initialized")
lock = _get_global_lock()
@@ -123,14 +145,20 @@ def get_namespace_object(namespace: str) -> Any:
_share_objects[namespace] = _manager.Value("O", None)
else:
_share_objects[namespace] = None
direct_log(f"Created namespace({namespace}): type={type(_share_objects[namespace])}, pid={os.getpid()}")
direct_log(
f"Created namespace({namespace}): type={type(_share_objects[namespace])}, pid={os.getpid()}"
)
return _share_objects[namespace]
def get_namespace_data(namespace: str) -> Dict[str, Any]:
"""get storage space for specific storage type(namespace)"""
if _shared_dicts is None:
direct_log(f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}", level="ERROR")
direct_log(
f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}",
level="ERROR",
)
raise ValueError("Shared dictionaries not initialized")
lock = _get_global_lock()
@@ -140,7 +168,9 @@ def get_namespace_data(namespace: str) -> Dict[str, Any]:
_shared_dicts[namespace] = _manager.dict()
else:
_shared_dicts[namespace] = {}
direct_log(f"Created namespace({namespace}): type={type(_shared_dicts[namespace])}, pid={os.getpid()}")
direct_log(
f"Created namespace({namespace}): type={type(_shared_dicts[namespace])}, pid={os.getpid()}"
)
return _shared_dicts[namespace]
@@ -160,14 +190,25 @@ def finalize_share_data():
In multi-process mode, it shuts down the Manager and releases all shared objects.
In single-process mode, it simply resets the global variables.
"""
global _manager, is_multiprocess, _global_lock, _shared_dicts, _share_objects, _init_flags, _initialized
global \
_manager, \
is_multiprocess, \
_global_lock, \
_shared_dicts, \
_share_objects, \
_init_flags, \
_initialized
# Check if already initialized
if not _initialized:
direct_log(f"Process {os.getpid()} storage data not initialized, nothing to finalize")
direct_log(
f"Process {os.getpid()} storage data not initialized, nothing to finalize"
)
return
direct_log(f"Process {os.getpid()} finalizing storage data (multiprocess={is_multiprocess})")
direct_log(
f"Process {os.getpid()} finalizing storage data (multiprocess={is_multiprocess})"
)
# In multi-process mode, shut down the Manager
if is_multiprocess and _manager is not None:
@@ -184,7 +225,9 @@ def finalize_share_data():
_manager.shutdown()
direct_log(f"Process {os.getpid()} Manager shutdown complete")
except Exception as e:
direct_log(f"Process {os.getpid()} Error shutting down Manager: {e}", level="ERROR")
direct_log(
f"Process {os.getpid()} Error shutting down Manager: {e}", level="ERROR"
)
# Reset global variables
_manager = None

View File

@@ -271,7 +271,12 @@ class LightRAG:
set_logger(self.log_file_path, self.log_level)
logger.info(f"Logger initialized for working directory: {self.working_dir}")
from lightrag.kg.shared_storage import initialize_share_data, try_initialize_namespace, get_namespace_data
from lightrag.kg.shared_storage import (
initialize_share_data,
try_initialize_namespace,
get_namespace_data,
)
initialize_share_data()
need_init = try_initialize_namespace("scan_progress")
@@ -286,9 +291,6 @@ class LightRAG:
"progress": 0,
}
)
scan_progress = get_namespace_data("scan_progress")
logger.info(f"scan_progress type after update: {type(scan_progress)}")
logger.info(f"Scan_progres value after update: {scan_progress}")
if not os.path.exists(self.working_dir):
logger.info(f"Creating working directory {self.working_dir}")

View File

@@ -2,15 +2,16 @@
"""
Start LightRAG server with Gunicorn
"""
import os
import sys
import json
import signal
import argparse
import subprocess
from lightrag.api.utils_api import parse_args, display_splash_screen
from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data
# Signal handler for graceful shutdown
def signal_handler(sig, frame):
print("\n\n" + "=" * 80)
@@ -24,28 +25,25 @@ def signal_handler(sig, frame):
# Exit with success status
sys.exit(0)
def main():
# Register signal handlers for graceful shutdown
signal.signal(signal.SIGINT, signal_handler) # Ctrl+C
signal.signal(signal.SIGTERM, signal_handler) # kill command
# Create a parser to handle Gunicorn-specific parameters
parser = argparse.ArgumentParser(
description="Start LightRAG server with Gunicorn"
)
parser = argparse.ArgumentParser(description="Start LightRAG server with Gunicorn")
parser.add_argument(
"--workers",
type=int,
help="Number of worker processes (overrides the default or config.ini setting)"
help="Number of worker processes (overrides the default or config.ini setting)",
)
parser.add_argument(
"--timeout",
type=int,
help="Worker timeout in seconds (default: 120)"
"--timeout", type=int, help="Worker timeout in seconds (default: 120)"
)
parser.add_argument(
"--log-level",
choices=["debug", "info", "warning", "error", "critical"],
help="Gunicorn log level"
help="Gunicorn log level",
)
# Parse Gunicorn-specific arguments
@@ -103,23 +101,47 @@ def main():
def load_config(self):
# Define valid Gunicorn configuration options
valid_options = {
'bind', 'workers', 'worker_class', 'timeout', 'keepalive',
'preload_app', 'errorlog', 'accesslog', 'loglevel',
'certfile', 'keyfile', 'limit_request_line', 'limit_request_fields',
'limit_request_field_size', 'graceful_timeout', 'max_requests',
'max_requests_jitter'
"bind",
"workers",
"worker_class",
"timeout",
"keepalive",
"preload_app",
"errorlog",
"accesslog",
"loglevel",
"certfile",
"keyfile",
"limit_request_line",
"limit_request_fields",
"limit_request_field_size",
"graceful_timeout",
"max_requests",
"max_requests_jitter",
}
# Special hooks that need to be set separately
special_hooks = {
'on_starting', 'on_reload', 'on_exit', 'pre_fork', 'post_fork',
'pre_exec', 'pre_request', 'post_request', 'worker_init',
'worker_exit', 'nworkers_changed', 'child_exit'
"on_starting",
"on_reload",
"on_exit",
"pre_fork",
"post_fork",
"pre_exec",
"pre_request",
"post_request",
"worker_init",
"worker_exit",
"nworkers_changed",
"child_exit",
}
# Import the gunicorn_config module directly
import importlib.util
spec = importlib.util.spec_from_file_location("gunicorn_config", "gunicorn_config.py")
spec = importlib.util.spec_from_file_location(
"gunicorn_config", "gunicorn_config.py"
)
self.config_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(self.config_module)
@@ -147,13 +169,13 @@ def main():
def load(self):
# Import the application
from lightrag.api.lightrag_server import get_application
return get_application()
# Create the application
app = GunicornApp("")
# Directly call initialize_share_data with the correct workers value
from lightrag.kg.shared_storage import initialize_share_data
# Force workers to be an integer and greater than 1 for multi-process mode
workers_count = int(args.workers)
@@ -168,5 +190,6 @@ def main():
print("\nStarting Gunicorn with direct Python API...")
app.run()
if __name__ == "__main__":
main()