Fix linting
This commit is contained in:
@@ -8,7 +8,7 @@ from lightrag.api.utils_api import parse_args
|
|||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
# Determine worker count - from environment variable or command line arguments
|
# Determine worker count - from environment variable or command line arguments
|
||||||
workers = int(os.getenv('WORKERS', args.workers))
|
workers = int(os.getenv("WORKERS", args.workers))
|
||||||
|
|
||||||
# If not specified, use CPU count * 2 + 1 (Gunicorn recommended configuration)
|
# If not specified, use CPU count * 2 + 1 (Gunicorn recommended configuration)
|
||||||
if workers <= 1:
|
if workers <= 1:
|
||||||
@@ -24,7 +24,7 @@ preload_app = True
|
|||||||
worker_class = "uvicorn.workers.UvicornWorker"
|
worker_class = "uvicorn.workers.UvicornWorker"
|
||||||
|
|
||||||
# Other Gunicorn configurations
|
# Other Gunicorn configurations
|
||||||
timeout = int(os.getenv('TIMEOUT', 120))
|
timeout = int(os.getenv("TIMEOUT", 120))
|
||||||
keepalive = 5
|
keepalive = 5
|
||||||
|
|
||||||
# Optional SSL configuration
|
# Optional SSL configuration
|
||||||
@@ -33,9 +33,10 @@ if args.ssl:
|
|||||||
keyfile = args.ssl_keyfile
|
keyfile = args.ssl_keyfile
|
||||||
|
|
||||||
# Logging configuration
|
# Logging configuration
|
||||||
errorlog = os.getenv('ERROR_LOG', '-') # '-' means stderr
|
errorlog = os.getenv("ERROR_LOG", "-") # '-' means stderr
|
||||||
accesslog = os.getenv('ACCESS_LOG', '-') # '-' means stderr
|
accesslog = os.getenv("ACCESS_LOG", "-") # '-' means stderr
|
||||||
loglevel = os.getenv('LOG_LEVEL', 'info')
|
loglevel = os.getenv("LOG_LEVEL", "info")
|
||||||
|
|
||||||
|
|
||||||
def on_starting(server):
|
def on_starting(server):
|
||||||
"""
|
"""
|
||||||
@@ -46,21 +47,25 @@ def on_starting(server):
|
|||||||
print(f"GUNICORN MASTER PROCESS: on_starting jobs for all {workers} workers")
|
print(f"GUNICORN MASTER PROCESS: on_starting jobs for all {workers} workers")
|
||||||
print(f"Process ID: {os.getpid()}")
|
print(f"Process ID: {os.getpid()}")
|
||||||
print("=" * 80)
|
print("=" * 80)
|
||||||
|
|
||||||
# Memory usage monitoring
|
# Memory usage monitoring
|
||||||
try:
|
try:
|
||||||
import psutil
|
import psutil
|
||||||
|
|
||||||
process = psutil.Process(os.getpid())
|
process = psutil.Process(os.getpid())
|
||||||
memory_info = process.memory_info()
|
memory_info = process.memory_info()
|
||||||
msg = f"Memory usage after initialization: {memory_info.rss / 1024 / 1024:.2f} MB"
|
msg = (
|
||||||
|
f"Memory usage after initialization: {memory_info.rss / 1024 / 1024:.2f} MB"
|
||||||
|
)
|
||||||
print(msg)
|
print(msg)
|
||||||
except ImportError:
|
except ImportError:
|
||||||
print("psutil not installed, skipping memory usage reporting")
|
print("psutil not installed, skipping memory usage reporting")
|
||||||
|
|
||||||
print("=" * 80)
|
print("=" * 80)
|
||||||
print("Gunicorn initialization complete, forking workers...")
|
print("Gunicorn initialization complete, forking workers...")
|
||||||
print("=" * 80)
|
print("=" * 80)
|
||||||
|
|
||||||
|
|
||||||
def on_exit(server):
|
def on_exit(server):
|
||||||
"""
|
"""
|
||||||
Executed when Gunicorn is shutting down.
|
Executed when Gunicorn is shutting down.
|
||||||
@@ -70,10 +75,10 @@ def on_exit(server):
|
|||||||
print("GUNICORN MASTER PROCESS: Shutting down")
|
print("GUNICORN MASTER PROCESS: Shutting down")
|
||||||
print(f"Process ID: {os.getpid()}")
|
print(f"Process ID: {os.getpid()}")
|
||||||
print("=" * 80)
|
print("=" * 80)
|
||||||
|
|
||||||
# Release shared resources
|
# Release shared resources
|
||||||
finalize_share_data()
|
finalize_share_data()
|
||||||
|
|
||||||
print("=" * 80)
|
print("=" * 80)
|
||||||
print("Gunicorn shutdown complete")
|
print("Gunicorn shutdown complete")
|
||||||
print("=" * 80)
|
print("=" * 80)
|
||||||
|
@@ -471,12 +471,13 @@ def configure_logging():
|
|||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Check if running under Gunicorn
|
# Check if running under Gunicorn
|
||||||
if 'GUNICORN_CMD_ARGS' in os.environ:
|
if "GUNICORN_CMD_ARGS" in os.environ:
|
||||||
# If started with Gunicorn, return directly as Gunicorn will call get_application
|
# If started with Gunicorn, return directly as Gunicorn will call get_application
|
||||||
print("Running under Gunicorn - worker management handled by Gunicorn")
|
print("Running under Gunicorn - worker management handled by Gunicorn")
|
||||||
return
|
return
|
||||||
|
|
||||||
from multiprocessing import freeze_support
|
from multiprocessing import freeze_support
|
||||||
|
|
||||||
freeze_support()
|
freeze_support()
|
||||||
|
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
@@ -487,10 +488,10 @@ def main():
|
|||||||
configure_logging()
|
configure_logging()
|
||||||
|
|
||||||
display_splash_screen(args)
|
display_splash_screen(args)
|
||||||
|
|
||||||
# Create application instance directly instead of using factory function
|
# Create application instance directly instead of using factory function
|
||||||
app = create_app(args)
|
app = create_app(args)
|
||||||
|
|
||||||
# Start Uvicorn in single process mode
|
# Start Uvicorn in single process mode
|
||||||
uvicorn_config = {
|
uvicorn_config = {
|
||||||
"app": app, # Pass application instance directly instead of string path
|
"app": app, # Pass application instance directly instead of string path
|
||||||
@@ -498,7 +499,7 @@ def main():
|
|||||||
"port": args.port,
|
"port": args.port,
|
||||||
"log_config": None, # Disable default config
|
"log_config": None, # Disable default config
|
||||||
}
|
}
|
||||||
|
|
||||||
if args.ssl:
|
if args.ssl:
|
||||||
uvicorn_config.update(
|
uvicorn_config.update(
|
||||||
{
|
{
|
||||||
@@ -506,7 +507,7 @@ def main():
|
|||||||
"ssl_keyfile": args.ssl_keyfile,
|
"ssl_keyfile": args.ssl_keyfile,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"Starting Uvicorn server in single-process mode on {args.host}:{args.port}")
|
print(f"Starting Uvicorn server in single-process mode on {args.host}:{args.port}")
|
||||||
uvicorn.run(**uvicorn_config)
|
uvicorn.run(**uvicorn_config)
|
||||||
|
|
||||||
|
@@ -10,7 +10,11 @@ from lightrag.utils import (
|
|||||||
logger,
|
logger,
|
||||||
write_json,
|
write_json,
|
||||||
)
|
)
|
||||||
from .shared_storage import get_namespace_data, get_storage_lock, try_initialize_namespace
|
from .shared_storage import (
|
||||||
|
get_namespace_data,
|
||||||
|
get_storage_lock,
|
||||||
|
try_initialize_namespace,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@final
|
@final
|
||||||
@@ -20,7 +24,7 @@ class JsonKVStorage(BaseKVStorage):
|
|||||||
working_dir = self.global_config["working_dir"]
|
working_dir = self.global_config["working_dir"]
|
||||||
self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
|
self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
|
||||||
self._storage_lock = get_storage_lock()
|
self._storage_lock = get_storage_lock()
|
||||||
|
|
||||||
# check need_init must before get_namespace_data
|
# check need_init must before get_namespace_data
|
||||||
need_init = try_initialize_namespace(self.namespace)
|
need_init = try_initialize_namespace(self.namespace)
|
||||||
self._data = get_namespace_data(self.namespace)
|
self._data = get_namespace_data(self.namespace)
|
||||||
|
@@ -11,7 +11,12 @@ from lightrag.utils import (
|
|||||||
)
|
)
|
||||||
import pipmaster as pm
|
import pipmaster as pm
|
||||||
from lightrag.base import BaseVectorStorage
|
from lightrag.base import BaseVectorStorage
|
||||||
from .shared_storage import get_storage_lock, get_namespace_object, is_multiprocess, try_initialize_namespace
|
from .shared_storage import (
|
||||||
|
get_storage_lock,
|
||||||
|
get_namespace_object,
|
||||||
|
is_multiprocess,
|
||||||
|
try_initialize_namespace,
|
||||||
|
)
|
||||||
|
|
||||||
if not pm.is_installed("nano-vectordb"):
|
if not pm.is_installed("nano-vectordb"):
|
||||||
pm.install("nano-vectordb")
|
pm.install("nano-vectordb")
|
||||||
|
@@ -6,7 +6,12 @@ import numpy as np
|
|||||||
from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
|
from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
|
||||||
from lightrag.utils import logger
|
from lightrag.utils import logger
|
||||||
from lightrag.base import BaseGraphStorage
|
from lightrag.base import BaseGraphStorage
|
||||||
from .shared_storage import get_storage_lock, get_namespace_object, is_multiprocess, try_initialize_namespace
|
from .shared_storage import (
|
||||||
|
get_storage_lock,
|
||||||
|
get_namespace_object,
|
||||||
|
is_multiprocess,
|
||||||
|
try_initialize_namespace,
|
||||||
|
)
|
||||||
|
|
||||||
import pipmaster as pm
|
import pipmaster as pm
|
||||||
|
|
||||||
@@ -74,16 +79,14 @@ class NetworkXStorage(BaseGraphStorage):
|
|||||||
self.global_config["working_dir"], f"graph_{self.namespace}.graphml"
|
self.global_config["working_dir"], f"graph_{self.namespace}.graphml"
|
||||||
)
|
)
|
||||||
self._storage_lock = get_storage_lock()
|
self._storage_lock = get_storage_lock()
|
||||||
|
|
||||||
# check need_init must before get_namespace_object
|
# check need_init must before get_namespace_object
|
||||||
need_init = try_initialize_namespace(self.namespace)
|
need_init = try_initialize_namespace(self.namespace)
|
||||||
self._graph = get_namespace_object(self.namespace)
|
self._graph = get_namespace_object(self.namespace)
|
||||||
|
|
||||||
if need_init:
|
if need_init:
|
||||||
if is_multiprocess:
|
if is_multiprocess:
|
||||||
preloaded_graph = NetworkXStorage.load_nx_graph(
|
preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
|
||||||
self._graphml_xml_file
|
|
||||||
)
|
|
||||||
self._graph.value = preloaded_graph or nx.Graph()
|
self._graph.value = preloaded_graph or nx.Graph()
|
||||||
if preloaded_graph:
|
if preloaded_graph:
|
||||||
logger.info(
|
logger.info(
|
||||||
@@ -92,9 +95,7 @@ class NetworkXStorage(BaseGraphStorage):
|
|||||||
else:
|
else:
|
||||||
logger.info("Created new empty graph")
|
logger.info("Created new empty graph")
|
||||||
else:
|
else:
|
||||||
preloaded_graph = NetworkXStorage.load_nx_graph(
|
preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
|
||||||
self._graphml_xml_file
|
|
||||||
)
|
|
||||||
self._graph = preloaded_graph or nx.Graph()
|
self._graph = preloaded_graph or nx.Graph()
|
||||||
if preloaded_graph:
|
if preloaded_graph:
|
||||||
logger.info(
|
logger.info(
|
||||||
|
@@ -4,16 +4,17 @@ from multiprocessing.synchronize import Lock as ProcessLock
|
|||||||
from threading import Lock as ThreadLock
|
from threading import Lock as ThreadLock
|
||||||
from multiprocessing import Manager
|
from multiprocessing import Manager
|
||||||
from typing import Any, Dict, Optional, Union
|
from typing import Any, Dict, Optional, Union
|
||||||
from lightrag.utils import logger
|
|
||||||
|
|
||||||
# Define a direct print function for critical logs that must be visible in all processes
|
# Define a direct print function for critical logs that must be visible in all processes
|
||||||
def direct_log(message, level="INFO"):
|
def direct_log(message, level="INFO"):
|
||||||
"""
|
"""
|
||||||
Log a message directly to stderr to ensure visibility in all processes,
|
Log a message directly to stderr to ensure visibility in all processes,
|
||||||
including the Gunicorn master process.
|
including the Gunicorn master process.
|
||||||
"""
|
"""
|
||||||
print(f"{level}: {message}", file=sys.stderr, flush=True)
|
print(f"{level}: {message}", file=sys.stderr, flush=True)
|
||||||
|
|
||||||
|
|
||||||
LockType = Union[ProcessLock, ThreadLock]
|
LockType = Union[ProcessLock, ThreadLock]
|
||||||
|
|
||||||
_manager = None
|
_manager = None
|
||||||
@@ -31,39 +32,53 @@ _global_lock: Optional[LockType] = None
|
|||||||
def initialize_share_data(workers: int = 1):
|
def initialize_share_data(workers: int = 1):
|
||||||
"""
|
"""
|
||||||
Initialize shared storage data for single or multi-process mode.
|
Initialize shared storage data for single or multi-process mode.
|
||||||
|
|
||||||
When used with Gunicorn's preload feature, this function is called once in the
|
When used with Gunicorn's preload feature, this function is called once in the
|
||||||
master process before forking worker processes, allowing all workers to share
|
master process before forking worker processes, allowing all workers to share
|
||||||
the same initialized data.
|
the same initialized data.
|
||||||
|
|
||||||
In single-process mode, this function is called during LightRAG object initialization.
|
In single-process mode, this function is called during LightRAG object initialization.
|
||||||
|
|
||||||
The function determines whether to use cross-process shared variables for data storage
|
The function determines whether to use cross-process shared variables for data storage
|
||||||
based on the number of workers. If workers=1, it uses thread locks and local dictionaries.
|
based on the number of workers. If workers=1, it uses thread locks and local dictionaries.
|
||||||
If workers>1, it uses process locks and shared dictionaries managed by multiprocessing.Manager.
|
If workers>1, it uses process locks and shared dictionaries managed by multiprocessing.Manager.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
workers (int): Number of worker processes. If 1, single-process mode is used.
|
workers (int): Number of worker processes. If 1, single-process mode is used.
|
||||||
If > 1, multi-process mode with shared memory is used.
|
If > 1, multi-process mode with shared memory is used.
|
||||||
"""
|
"""
|
||||||
global _manager, is_multiprocess, is_multiprocess, _global_lock, _shared_dicts, _share_objects, _init_flags, _initialized
|
global \
|
||||||
|
_manager, \
|
||||||
|
is_multiprocess, \
|
||||||
|
is_multiprocess, \
|
||||||
|
_global_lock, \
|
||||||
|
_shared_dicts, \
|
||||||
|
_share_objects, \
|
||||||
|
_init_flags, \
|
||||||
|
_initialized
|
||||||
|
|
||||||
# Check if already initialized
|
# Check if already initialized
|
||||||
if _initialized:
|
if _initialized:
|
||||||
direct_log(f"Process {os.getpid()} Shared-Data already initialized (multiprocess={is_multiprocess})")
|
direct_log(
|
||||||
|
f"Process {os.getpid()} Shared-Data already initialized (multiprocess={is_multiprocess})"
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
_manager = Manager()
|
_manager = Manager()
|
||||||
|
|
||||||
# Force multi-process mode if workers > 1
|
# Force multi-process mode if workers > 1
|
||||||
if workers > 1:
|
if workers > 1:
|
||||||
is_multiprocess = True
|
is_multiprocess = True
|
||||||
_global_lock = _manager.Lock()
|
_global_lock = _manager.Lock()
|
||||||
# Create shared dictionaries with manager
|
# Create shared dictionaries with manager
|
||||||
_shared_dicts = _manager.dict()
|
_shared_dicts = _manager.dict()
|
||||||
_share_objects = _manager.dict()
|
_share_objects = _manager.dict()
|
||||||
_init_flags = _manager.dict() # Use shared dictionary to store initialization flags
|
_init_flags = (
|
||||||
direct_log(f"Process {os.getpid()} Shared-Data created for Multiple Process (workers={workers})")
|
_manager.dict()
|
||||||
|
) # Use shared dictionary to store initialization flags
|
||||||
|
direct_log(
|
||||||
|
f"Process {os.getpid()} Shared-Data created for Multiple Process (workers={workers})"
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
is_multiprocess = False
|
is_multiprocess = False
|
||||||
_global_lock = ThreadLock()
|
_global_lock = ThreadLock()
|
||||||
@@ -75,6 +90,7 @@ def initialize_share_data(workers: int = 1):
|
|||||||
# Mark as initialized
|
# Mark as initialized
|
||||||
_initialized = True
|
_initialized = True
|
||||||
|
|
||||||
|
|
||||||
def try_initialize_namespace(namespace: str) -> bool:
|
def try_initialize_namespace(namespace: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Try to initialize a namespace. Returns True if the current process gets initialization permission.
|
Try to initialize a namespace. Returns True if the current process gets initialization permission.
|
||||||
@@ -83,8 +99,11 @@ def try_initialize_namespace(namespace: str) -> bool:
|
|||||||
global _init_flags, _manager
|
global _init_flags, _manager
|
||||||
|
|
||||||
if _init_flags is None:
|
if _init_flags is None:
|
||||||
direct_log(f"Error: try to create nanmespace before Shared-Data is initialized, pid={os.getpid()}", level="ERROR")
|
direct_log(
|
||||||
raise ValueError("Shared dictionaries not initialized")
|
f"Error: try to create nanmespace before Shared-Data is initialized, pid={os.getpid()}",
|
||||||
|
level="ERROR",
|
||||||
|
)
|
||||||
|
raise ValueError("Shared dictionaries not initialized")
|
||||||
|
|
||||||
if namespace not in _init_flags:
|
if namespace not in _init_flags:
|
||||||
_init_flags[namespace] = True
|
_init_flags[namespace] = True
|
||||||
@@ -112,7 +131,10 @@ def get_namespace_object(namespace: str) -> Any:
|
|||||||
"""Get an object for specific namespace"""
|
"""Get an object for specific namespace"""
|
||||||
|
|
||||||
if _share_objects is None:
|
if _share_objects is None:
|
||||||
direct_log(f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}", level="ERROR")
|
direct_log(
|
||||||
|
f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}",
|
||||||
|
level="ERROR",
|
||||||
|
)
|
||||||
raise ValueError("Shared dictionaries not initialized")
|
raise ValueError("Shared dictionaries not initialized")
|
||||||
|
|
||||||
lock = _get_global_lock()
|
lock = _get_global_lock()
|
||||||
@@ -123,14 +145,20 @@ def get_namespace_object(namespace: str) -> Any:
|
|||||||
_share_objects[namespace] = _manager.Value("O", None)
|
_share_objects[namespace] = _manager.Value("O", None)
|
||||||
else:
|
else:
|
||||||
_share_objects[namespace] = None
|
_share_objects[namespace] = None
|
||||||
direct_log(f"Created namespace({namespace}): type={type(_share_objects[namespace])}, pid={os.getpid()}")
|
direct_log(
|
||||||
|
f"Created namespace({namespace}): type={type(_share_objects[namespace])}, pid={os.getpid()}"
|
||||||
|
)
|
||||||
|
|
||||||
return _share_objects[namespace]
|
return _share_objects[namespace]
|
||||||
|
|
||||||
|
|
||||||
def get_namespace_data(namespace: str) -> Dict[str, Any]:
|
def get_namespace_data(namespace: str) -> Dict[str, Any]:
|
||||||
"""get storage space for specific storage type(namespace)"""
|
"""get storage space for specific storage type(namespace)"""
|
||||||
if _shared_dicts is None:
|
if _shared_dicts is None:
|
||||||
direct_log(f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}", level="ERROR")
|
direct_log(
|
||||||
|
f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}",
|
||||||
|
level="ERROR",
|
||||||
|
)
|
||||||
raise ValueError("Shared dictionaries not initialized")
|
raise ValueError("Shared dictionaries not initialized")
|
||||||
|
|
||||||
lock = _get_global_lock()
|
lock = _get_global_lock()
|
||||||
@@ -140,8 +168,10 @@ def get_namespace_data(namespace: str) -> Dict[str, Any]:
|
|||||||
_shared_dicts[namespace] = _manager.dict()
|
_shared_dicts[namespace] = _manager.dict()
|
||||||
else:
|
else:
|
||||||
_shared_dicts[namespace] = {}
|
_shared_dicts[namespace] = {}
|
||||||
direct_log(f"Created namespace({namespace}): type={type(_shared_dicts[namespace])}, pid={os.getpid()}")
|
direct_log(
|
||||||
|
f"Created namespace({namespace}): type={type(_shared_dicts[namespace])}, pid={os.getpid()}"
|
||||||
|
)
|
||||||
|
|
||||||
return _shared_dicts[namespace]
|
return _shared_dicts[namespace]
|
||||||
|
|
||||||
|
|
||||||
@@ -153,22 +183,33 @@ def get_scan_progress() -> Dict[str, Any]:
|
|||||||
def finalize_share_data():
|
def finalize_share_data():
|
||||||
"""
|
"""
|
||||||
Release shared resources and clean up.
|
Release shared resources and clean up.
|
||||||
|
|
||||||
This function should be called when the application is shutting down
|
This function should be called when the application is shutting down
|
||||||
to properly release shared resources and avoid memory leaks.
|
to properly release shared resources and avoid memory leaks.
|
||||||
|
|
||||||
In multi-process mode, it shuts down the Manager and releases all shared objects.
|
In multi-process mode, it shuts down the Manager and releases all shared objects.
|
||||||
In single-process mode, it simply resets the global variables.
|
In single-process mode, it simply resets the global variables.
|
||||||
"""
|
"""
|
||||||
global _manager, is_multiprocess, _global_lock, _shared_dicts, _share_objects, _init_flags, _initialized
|
global \
|
||||||
|
_manager, \
|
||||||
|
is_multiprocess, \
|
||||||
|
_global_lock, \
|
||||||
|
_shared_dicts, \
|
||||||
|
_share_objects, \
|
||||||
|
_init_flags, \
|
||||||
|
_initialized
|
||||||
|
|
||||||
# Check if already initialized
|
# Check if already initialized
|
||||||
if not _initialized:
|
if not _initialized:
|
||||||
direct_log(f"Process {os.getpid()} storage data not initialized, nothing to finalize")
|
direct_log(
|
||||||
|
f"Process {os.getpid()} storage data not initialized, nothing to finalize"
|
||||||
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
direct_log(f"Process {os.getpid()} finalizing storage data (multiprocess={is_multiprocess})")
|
direct_log(
|
||||||
|
f"Process {os.getpid()} finalizing storage data (multiprocess={is_multiprocess})"
|
||||||
|
)
|
||||||
|
|
||||||
# In multi-process mode, shut down the Manager
|
# In multi-process mode, shut down the Manager
|
||||||
if is_multiprocess and _manager is not None:
|
if is_multiprocess and _manager is not None:
|
||||||
try:
|
try:
|
||||||
@@ -179,13 +220,15 @@ def finalize_share_data():
|
|||||||
_share_objects.clear()
|
_share_objects.clear()
|
||||||
if _init_flags is not None:
|
if _init_flags is not None:
|
||||||
_init_flags.clear()
|
_init_flags.clear()
|
||||||
|
|
||||||
# Shut down the Manager
|
# Shut down the Manager
|
||||||
_manager.shutdown()
|
_manager.shutdown()
|
||||||
direct_log(f"Process {os.getpid()} Manager shutdown complete")
|
direct_log(f"Process {os.getpid()} Manager shutdown complete")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
direct_log(f"Process {os.getpid()} Error shutting down Manager: {e}", level="ERROR")
|
direct_log(
|
||||||
|
f"Process {os.getpid()} Error shutting down Manager: {e}", level="ERROR"
|
||||||
|
)
|
||||||
|
|
||||||
# Reset global variables
|
# Reset global variables
|
||||||
_manager = None
|
_manager = None
|
||||||
_initialized = None
|
_initialized = None
|
||||||
@@ -194,5 +237,5 @@ def finalize_share_data():
|
|||||||
_share_objects = None
|
_share_objects = None
|
||||||
_init_flags = None
|
_init_flags = None
|
||||||
_global_lock = None
|
_global_lock = None
|
||||||
|
|
||||||
direct_log(f"Process {os.getpid()} storage data finalization complete")
|
direct_log(f"Process {os.getpid()} storage data finalization complete")
|
||||||
|
@@ -271,12 +271,17 @@ class LightRAG:
|
|||||||
set_logger(self.log_file_path, self.log_level)
|
set_logger(self.log_file_path, self.log_level)
|
||||||
logger.info(f"Logger initialized for working directory: {self.working_dir}")
|
logger.info(f"Logger initialized for working directory: {self.working_dir}")
|
||||||
|
|
||||||
from lightrag.kg.shared_storage import initialize_share_data, try_initialize_namespace, get_namespace_data
|
from lightrag.kg.shared_storage import (
|
||||||
|
initialize_share_data,
|
||||||
|
try_initialize_namespace,
|
||||||
|
get_namespace_data,
|
||||||
|
)
|
||||||
|
|
||||||
initialize_share_data()
|
initialize_share_data()
|
||||||
|
|
||||||
need_init = try_initialize_namespace("scan_progress")
|
need_init = try_initialize_namespace("scan_progress")
|
||||||
scan_progress = get_namespace_data("scan_progress")
|
scan_progress = get_namespace_data("scan_progress")
|
||||||
logger.info(f"scan_progress type after init: {type(scan_progress)}")
|
logger.info(f"scan_progress type after init: {type(scan_progress)}")
|
||||||
scan_progress.update(
|
scan_progress.update(
|
||||||
{
|
{
|
||||||
"is_scanning": False,
|
"is_scanning": False,
|
||||||
@@ -286,9 +291,6 @@ class LightRAG:
|
|||||||
"progress": 0,
|
"progress": 0,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
scan_progress = get_namespace_data("scan_progress")
|
|
||||||
logger.info(f"scan_progress type after update: {type(scan_progress)}")
|
|
||||||
logger.info(f"Scan_progres value after update: {scan_progress}")
|
|
||||||
|
|
||||||
if not os.path.exists(self.working_dir):
|
if not os.path.exists(self.working_dir):
|
||||||
logger.info(f"Creating working directory {self.working_dir}")
|
logger.info(f"Creating working directory {self.working_dir}")
|
||||||
|
@@ -2,127 +2,149 @@
|
|||||||
"""
|
"""
|
||||||
Start LightRAG server with Gunicorn
|
Start LightRAG server with Gunicorn
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import json
|
import json
|
||||||
import signal
|
import signal
|
||||||
import argparse
|
import argparse
|
||||||
import subprocess
|
|
||||||
from lightrag.api.utils_api import parse_args, display_splash_screen
|
from lightrag.api.utils_api import parse_args, display_splash_screen
|
||||||
from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data
|
from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data
|
||||||
|
|
||||||
|
|
||||||
# Signal handler for graceful shutdown
|
# Signal handler for graceful shutdown
|
||||||
def signal_handler(sig, frame):
|
def signal_handler(sig, frame):
|
||||||
print("\n\n" + "="*80)
|
print("\n\n" + "=" * 80)
|
||||||
print("RECEIVED TERMINATION SIGNAL")
|
print("RECEIVED TERMINATION SIGNAL")
|
||||||
print(f"Process ID: {os.getpid()}")
|
print(f"Process ID: {os.getpid()}")
|
||||||
print("="*80 + "\n")
|
print("=" * 80 + "\n")
|
||||||
|
|
||||||
# Release shared resources
|
# Release shared resources
|
||||||
finalize_share_data()
|
finalize_share_data()
|
||||||
|
|
||||||
# Exit with success status
|
# Exit with success status
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# Register signal handlers for graceful shutdown
|
# Register signal handlers for graceful shutdown
|
||||||
signal.signal(signal.SIGINT, signal_handler) # Ctrl+C
|
signal.signal(signal.SIGINT, signal_handler) # Ctrl+C
|
||||||
signal.signal(signal.SIGTERM, signal_handler) # kill command
|
signal.signal(signal.SIGTERM, signal_handler) # kill command
|
||||||
# Create a parser to handle Gunicorn-specific parameters
|
# Create a parser to handle Gunicorn-specific parameters
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(description="Start LightRAG server with Gunicorn")
|
||||||
description="Start LightRAG server with Gunicorn"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--workers",
|
"--workers",
|
||||||
type=int,
|
type=int,
|
||||||
help="Number of worker processes (overrides the default or config.ini setting)"
|
help="Number of worker processes (overrides the default or config.ini setting)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--timeout",
|
"--timeout", type=int, help="Worker timeout in seconds (default: 120)"
|
||||||
type=int,
|
|
||||||
help="Worker timeout in seconds (default: 120)"
|
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--log-level",
|
"--log-level",
|
||||||
choices=["debug", "info", "warning", "error", "critical"],
|
choices=["debug", "info", "warning", "error", "critical"],
|
||||||
help="Gunicorn log level"
|
help="Gunicorn log level",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Parse Gunicorn-specific arguments
|
# Parse Gunicorn-specific arguments
|
||||||
gunicorn_args, remaining_args = parser.parse_known_args()
|
gunicorn_args, remaining_args = parser.parse_known_args()
|
||||||
|
|
||||||
# Pass remaining arguments to LightRAG's parse_args
|
# Pass remaining arguments to LightRAG's parse_args
|
||||||
sys.argv = [sys.argv[0]] + remaining_args
|
sys.argv = [sys.argv[0]] + remaining_args
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
# If workers specified, override args value
|
# If workers specified, override args value
|
||||||
if gunicorn_args.workers:
|
if gunicorn_args.workers:
|
||||||
args.workers = gunicorn_args.workers
|
args.workers = gunicorn_args.workers
|
||||||
os.environ["WORKERS"] = str(gunicorn_args.workers)
|
os.environ["WORKERS"] = str(gunicorn_args.workers)
|
||||||
|
|
||||||
# If timeout specified, set environment variable
|
# If timeout specified, set environment variable
|
||||||
if gunicorn_args.timeout:
|
if gunicorn_args.timeout:
|
||||||
os.environ["TIMEOUT"] = str(gunicorn_args.timeout)
|
os.environ["TIMEOUT"] = str(gunicorn_args.timeout)
|
||||||
|
|
||||||
# If log-level specified, set environment variable
|
# If log-level specified, set environment variable
|
||||||
if gunicorn_args.log_level:
|
if gunicorn_args.log_level:
|
||||||
os.environ["LOG_LEVEL"] = gunicorn_args.log_level
|
os.environ["LOG_LEVEL"] = gunicorn_args.log_level
|
||||||
|
|
||||||
# Save all LightRAG args to environment variable for worker processes
|
# Save all LightRAG args to environment variable for worker processes
|
||||||
# This is the key step for passing arguments to lightrag_server.py
|
# This is the key step for passing arguments to lightrag_server.py
|
||||||
os.environ["LIGHTRAG_ARGS"] = json.dumps(vars(args))
|
os.environ["LIGHTRAG_ARGS"] = json.dumps(vars(args))
|
||||||
|
|
||||||
# Display startup information
|
# Display startup information
|
||||||
display_splash_screen(args)
|
display_splash_screen(args)
|
||||||
|
|
||||||
print("🚀 Starting LightRAG with Gunicorn")
|
print("🚀 Starting LightRAG with Gunicorn")
|
||||||
print(f"🔄 Worker management: Gunicorn (workers={args.workers})")
|
print(f"🔄 Worker management: Gunicorn (workers={args.workers})")
|
||||||
print("🔍 Preloading app: Enabled")
|
print("🔍 Preloading app: Enabled")
|
||||||
print("📝 Note: Using Gunicorn's preload feature for shared data initialization")
|
print("📝 Note: Using Gunicorn's preload feature for shared data initialization")
|
||||||
print("\n\n" + "="*80)
|
print("\n\n" + "=" * 80)
|
||||||
print("MAIN PROCESS INITIALIZATION")
|
print("MAIN PROCESS INITIALIZATION")
|
||||||
print(f"Process ID: {os.getpid()}")
|
print(f"Process ID: {os.getpid()}")
|
||||||
print(f"Workers setting: {args.workers}")
|
print(f"Workers setting: {args.workers}")
|
||||||
print("="*80 + "\n")
|
print("=" * 80 + "\n")
|
||||||
|
|
||||||
# Start application with Gunicorn using direct Python API
|
# Start application with Gunicorn using direct Python API
|
||||||
# Ensure WORKERS environment variable is set before importing gunicorn_config
|
# Ensure WORKERS environment variable is set before importing gunicorn_config
|
||||||
if args.workers > 1:
|
if args.workers > 1:
|
||||||
os.environ["WORKERS"] = str(args.workers)
|
os.environ["WORKERS"] = str(args.workers)
|
||||||
|
|
||||||
# Import Gunicorn's StandaloneApplication
|
# Import Gunicorn's StandaloneApplication
|
||||||
from gunicorn.app.base import BaseApplication
|
from gunicorn.app.base import BaseApplication
|
||||||
|
|
||||||
# Define a custom application class that loads our config
|
# Define a custom application class that loads our config
|
||||||
class GunicornApp(BaseApplication):
|
class GunicornApp(BaseApplication):
|
||||||
def __init__(self, app, options=None):
|
def __init__(self, app, options=None):
|
||||||
self.options = options or {}
|
self.options = options or {}
|
||||||
self.application = app
|
self.application = app
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
def load_config(self):
|
def load_config(self):
|
||||||
# Define valid Gunicorn configuration options
|
# Define valid Gunicorn configuration options
|
||||||
valid_options = {
|
valid_options = {
|
||||||
'bind', 'workers', 'worker_class', 'timeout', 'keepalive',
|
"bind",
|
||||||
'preload_app', 'errorlog', 'accesslog', 'loglevel',
|
"workers",
|
||||||
'certfile', 'keyfile', 'limit_request_line', 'limit_request_fields',
|
"worker_class",
|
||||||
'limit_request_field_size', 'graceful_timeout', 'max_requests',
|
"timeout",
|
||||||
'max_requests_jitter'
|
"keepalive",
|
||||||
|
"preload_app",
|
||||||
|
"errorlog",
|
||||||
|
"accesslog",
|
||||||
|
"loglevel",
|
||||||
|
"certfile",
|
||||||
|
"keyfile",
|
||||||
|
"limit_request_line",
|
||||||
|
"limit_request_fields",
|
||||||
|
"limit_request_field_size",
|
||||||
|
"graceful_timeout",
|
||||||
|
"max_requests",
|
||||||
|
"max_requests_jitter",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Special hooks that need to be set separately
|
# Special hooks that need to be set separately
|
||||||
special_hooks = {
|
special_hooks = {
|
||||||
'on_starting', 'on_reload', 'on_exit', 'pre_fork', 'post_fork',
|
"on_starting",
|
||||||
'pre_exec', 'pre_request', 'post_request', 'worker_init',
|
"on_reload",
|
||||||
'worker_exit', 'nworkers_changed', 'child_exit'
|
"on_exit",
|
||||||
|
"pre_fork",
|
||||||
|
"post_fork",
|
||||||
|
"pre_exec",
|
||||||
|
"pre_request",
|
||||||
|
"post_request",
|
||||||
|
"worker_init",
|
||||||
|
"worker_exit",
|
||||||
|
"nworkers_changed",
|
||||||
|
"child_exit",
|
||||||
}
|
}
|
||||||
|
|
||||||
# Import the gunicorn_config module directly
|
# Import the gunicorn_config module directly
|
||||||
import importlib.util
|
import importlib.util
|
||||||
spec = importlib.util.spec_from_file_location("gunicorn_config", "gunicorn_config.py")
|
|
||||||
|
spec = importlib.util.spec_from_file_location(
|
||||||
|
"gunicorn_config", "gunicorn_config.py"
|
||||||
|
)
|
||||||
self.config_module = importlib.util.module_from_spec(spec)
|
self.config_module = importlib.util.module_from_spec(spec)
|
||||||
spec.loader.exec_module(self.config_module)
|
spec.loader.exec_module(self.config_module)
|
||||||
|
|
||||||
# Set configuration options
|
# Set configuration options
|
||||||
for key in dir(self.config_module):
|
for key in dir(self.config_module):
|
||||||
if key in valid_options:
|
if key in valid_options:
|
||||||
@@ -135,7 +157,7 @@ def main():
|
|||||||
value = getattr(self.config_module, key)
|
value = getattr(self.config_module, key)
|
||||||
if callable(value):
|
if callable(value):
|
||||||
self.cfg.set(key, value)
|
self.cfg.set(key, value)
|
||||||
|
|
||||||
# Override with command line arguments if provided
|
# Override with command line arguments if provided
|
||||||
if gunicorn_args.workers:
|
if gunicorn_args.workers:
|
||||||
self.cfg.set("workers", gunicorn_args.workers)
|
self.cfg.set("workers", gunicorn_args.workers)
|
||||||
@@ -143,18 +165,18 @@ def main():
|
|||||||
self.cfg.set("timeout", gunicorn_args.timeout)
|
self.cfg.set("timeout", gunicorn_args.timeout)
|
||||||
if gunicorn_args.log_level:
|
if gunicorn_args.log_level:
|
||||||
self.cfg.set("loglevel", gunicorn_args.log_level)
|
self.cfg.set("loglevel", gunicorn_args.log_level)
|
||||||
|
|
||||||
def load(self):
|
def load(self):
|
||||||
# Import the application
|
# Import the application
|
||||||
from lightrag.api.lightrag_server import get_application
|
from lightrag.api.lightrag_server import get_application
|
||||||
|
|
||||||
return get_application()
|
return get_application()
|
||||||
|
|
||||||
# Create the application
|
# Create the application
|
||||||
app = GunicornApp("")
|
app = GunicornApp("")
|
||||||
|
|
||||||
# Directly call initialize_share_data with the correct workers value
|
# Directly call initialize_share_data with the correct workers value
|
||||||
from lightrag.kg.shared_storage import initialize_share_data
|
|
||||||
|
|
||||||
# Force workers to be an integer and greater than 1 for multi-process mode
|
# Force workers to be an integer and greater than 1 for multi-process mode
|
||||||
workers_count = int(args.workers)
|
workers_count = int(args.workers)
|
||||||
if workers_count > 1:
|
if workers_count > 1:
|
||||||
@@ -163,10 +185,11 @@ def main():
|
|||||||
initialize_share_data(workers_count)
|
initialize_share_data(workers_count)
|
||||||
else:
|
else:
|
||||||
initialize_share_data(1)
|
initialize_share_data(1)
|
||||||
|
|
||||||
# Run the application
|
# Run the application
|
||||||
print("\nStarting Gunicorn with direct Python API...")
|
print("\nStarting Gunicorn with direct Python API...")
|
||||||
app.run()
|
app.run()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
Reference in New Issue
Block a user