Fix linting
This commit is contained in:
@@ -471,12 +471,13 @@ def configure_logging():
|
||||
|
||||
def main():
|
||||
# Check if running under Gunicorn
|
||||
if 'GUNICORN_CMD_ARGS' in os.environ:
|
||||
if "GUNICORN_CMD_ARGS" in os.environ:
|
||||
# If started with Gunicorn, return directly as Gunicorn will call get_application
|
||||
print("Running under Gunicorn - worker management handled by Gunicorn")
|
||||
return
|
||||
|
||||
from multiprocessing import freeze_support
|
||||
|
||||
freeze_support()
|
||||
|
||||
args = parse_args()
|
||||
@@ -487,10 +488,10 @@ def main():
|
||||
configure_logging()
|
||||
|
||||
display_splash_screen(args)
|
||||
|
||||
|
||||
# Create application instance directly instead of using factory function
|
||||
app = create_app(args)
|
||||
|
||||
|
||||
# Start Uvicorn in single process mode
|
||||
uvicorn_config = {
|
||||
"app": app, # Pass application instance directly instead of string path
|
||||
@@ -498,7 +499,7 @@ def main():
|
||||
"port": args.port,
|
||||
"log_config": None, # Disable default config
|
||||
}
|
||||
|
||||
|
||||
if args.ssl:
|
||||
uvicorn_config.update(
|
||||
{
|
||||
@@ -506,7 +507,7 @@ def main():
|
||||
"ssl_keyfile": args.ssl_keyfile,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
print(f"Starting Uvicorn server in single-process mode on {args.host}:{args.port}")
|
||||
uvicorn.run(**uvicorn_config)
|
||||
|
||||
|
@@ -10,7 +10,11 @@ from lightrag.utils import (
|
||||
logger,
|
||||
write_json,
|
||||
)
|
||||
from .shared_storage import get_namespace_data, get_storage_lock, try_initialize_namespace
|
||||
from .shared_storage import (
|
||||
get_namespace_data,
|
||||
get_storage_lock,
|
||||
try_initialize_namespace,
|
||||
)
|
||||
|
||||
|
||||
@final
|
||||
@@ -20,7 +24,7 @@ class JsonKVStorage(BaseKVStorage):
|
||||
working_dir = self.global_config["working_dir"]
|
||||
self._file_name = os.path.join(working_dir, f"kv_store_{self.namespace}.json")
|
||||
self._storage_lock = get_storage_lock()
|
||||
|
||||
|
||||
# check need_init must before get_namespace_data
|
||||
need_init = try_initialize_namespace(self.namespace)
|
||||
self._data = get_namespace_data(self.namespace)
|
||||
|
@@ -11,7 +11,12 @@ from lightrag.utils import (
|
||||
)
|
||||
import pipmaster as pm
|
||||
from lightrag.base import BaseVectorStorage
|
||||
from .shared_storage import get_storage_lock, get_namespace_object, is_multiprocess, try_initialize_namespace
|
||||
from .shared_storage import (
|
||||
get_storage_lock,
|
||||
get_namespace_object,
|
||||
is_multiprocess,
|
||||
try_initialize_namespace,
|
||||
)
|
||||
|
||||
if not pm.is_installed("nano-vectordb"):
|
||||
pm.install("nano-vectordb")
|
||||
|
@@ -6,7 +6,12 @@ import numpy as np
|
||||
from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
|
||||
from lightrag.utils import logger
|
||||
from lightrag.base import BaseGraphStorage
|
||||
from .shared_storage import get_storage_lock, get_namespace_object, is_multiprocess, try_initialize_namespace
|
||||
from .shared_storage import (
|
||||
get_storage_lock,
|
||||
get_namespace_object,
|
||||
is_multiprocess,
|
||||
try_initialize_namespace,
|
||||
)
|
||||
|
||||
import pipmaster as pm
|
||||
|
||||
@@ -74,16 +79,14 @@ class NetworkXStorage(BaseGraphStorage):
|
||||
self.global_config["working_dir"], f"graph_{self.namespace}.graphml"
|
||||
)
|
||||
self._storage_lock = get_storage_lock()
|
||||
|
||||
|
||||
# check need_init must before get_namespace_object
|
||||
need_init = try_initialize_namespace(self.namespace)
|
||||
self._graph = get_namespace_object(self.namespace)
|
||||
|
||||
|
||||
if need_init:
|
||||
if is_multiprocess:
|
||||
preloaded_graph = NetworkXStorage.load_nx_graph(
|
||||
self._graphml_xml_file
|
||||
)
|
||||
preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
|
||||
self._graph.value = preloaded_graph or nx.Graph()
|
||||
if preloaded_graph:
|
||||
logger.info(
|
||||
@@ -92,9 +95,7 @@ class NetworkXStorage(BaseGraphStorage):
|
||||
else:
|
||||
logger.info("Created new empty graph")
|
||||
else:
|
||||
preloaded_graph = NetworkXStorage.load_nx_graph(
|
||||
self._graphml_xml_file
|
||||
)
|
||||
preloaded_graph = NetworkXStorage.load_nx_graph(self._graphml_xml_file)
|
||||
self._graph = preloaded_graph or nx.Graph()
|
||||
if preloaded_graph:
|
||||
logger.info(
|
||||
|
@@ -4,16 +4,17 @@ from multiprocessing.synchronize import Lock as ProcessLock
|
||||
from threading import Lock as ThreadLock
|
||||
from multiprocessing import Manager
|
||||
from typing import Any, Dict, Optional, Union
|
||||
from lightrag.utils import logger
|
||||
|
||||
|
||||
# Define a direct print function for critical logs that must be visible in all processes
|
||||
def direct_log(message, level="INFO"):
|
||||
"""
|
||||
Log a message directly to stderr to ensure visibility in all processes,
|
||||
including the Gunicorn master process.
|
||||
"""
|
||||
"""
|
||||
print(f"{level}: {message}", file=sys.stderr, flush=True)
|
||||
|
||||
|
||||
LockType = Union[ProcessLock, ThreadLock]
|
||||
|
||||
_manager = None
|
||||
@@ -31,39 +32,53 @@ _global_lock: Optional[LockType] = None
|
||||
def initialize_share_data(workers: int = 1):
|
||||
"""
|
||||
Initialize shared storage data for single or multi-process mode.
|
||||
|
||||
|
||||
When used with Gunicorn's preload feature, this function is called once in the
|
||||
master process before forking worker processes, allowing all workers to share
|
||||
the same initialized data.
|
||||
|
||||
|
||||
In single-process mode, this function is called during LightRAG object initialization.
|
||||
|
||||
|
||||
The function determines whether to use cross-process shared variables for data storage
|
||||
based on the number of workers. If workers=1, it uses thread locks and local dictionaries.
|
||||
If workers>1, it uses process locks and shared dictionaries managed by multiprocessing.Manager.
|
||||
|
||||
|
||||
Args:
|
||||
workers (int): Number of worker processes. If 1, single-process mode is used.
|
||||
If > 1, multi-process mode with shared memory is used.
|
||||
"""
|
||||
global _manager, is_multiprocess, is_multiprocess, _global_lock, _shared_dicts, _share_objects, _init_flags, _initialized
|
||||
|
||||
global \
|
||||
_manager, \
|
||||
is_multiprocess, \
|
||||
is_multiprocess, \
|
||||
_global_lock, \
|
||||
_shared_dicts, \
|
||||
_share_objects, \
|
||||
_init_flags, \
|
||||
_initialized
|
||||
|
||||
# Check if already initialized
|
||||
if _initialized:
|
||||
direct_log(f"Process {os.getpid()} Shared-Data already initialized (multiprocess={is_multiprocess})")
|
||||
direct_log(
|
||||
f"Process {os.getpid()} Shared-Data already initialized (multiprocess={is_multiprocess})"
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
_manager = Manager()
|
||||
|
||||
# Force multi-process mode if workers > 1
|
||||
if workers > 1:
|
||||
is_multiprocess = True
|
||||
_global_lock = _manager.Lock()
|
||||
_global_lock = _manager.Lock()
|
||||
# Create shared dictionaries with manager
|
||||
_shared_dicts = _manager.dict()
|
||||
_share_objects = _manager.dict()
|
||||
_init_flags = _manager.dict() # Use shared dictionary to store initialization flags
|
||||
direct_log(f"Process {os.getpid()} Shared-Data created for Multiple Process (workers={workers})")
|
||||
_init_flags = (
|
||||
_manager.dict()
|
||||
) # Use shared dictionary to store initialization flags
|
||||
direct_log(
|
||||
f"Process {os.getpid()} Shared-Data created for Multiple Process (workers={workers})"
|
||||
)
|
||||
else:
|
||||
is_multiprocess = False
|
||||
_global_lock = ThreadLock()
|
||||
@@ -75,6 +90,7 @@ def initialize_share_data(workers: int = 1):
|
||||
# Mark as initialized
|
||||
_initialized = True
|
||||
|
||||
|
||||
def try_initialize_namespace(namespace: str) -> bool:
|
||||
"""
|
||||
Try to initialize a namespace. Returns True if the current process gets initialization permission.
|
||||
@@ -83,8 +99,11 @@ def try_initialize_namespace(namespace: str) -> bool:
|
||||
global _init_flags, _manager
|
||||
|
||||
if _init_flags is None:
|
||||
direct_log(f"Error: try to create nanmespace before Shared-Data is initialized, pid={os.getpid()}", level="ERROR")
|
||||
raise ValueError("Shared dictionaries not initialized")
|
||||
direct_log(
|
||||
f"Error: try to create nanmespace before Shared-Data is initialized, pid={os.getpid()}",
|
||||
level="ERROR",
|
||||
)
|
||||
raise ValueError("Shared dictionaries not initialized")
|
||||
|
||||
if namespace not in _init_flags:
|
||||
_init_flags[namespace] = True
|
||||
@@ -112,7 +131,10 @@ def get_namespace_object(namespace: str) -> Any:
|
||||
"""Get an object for specific namespace"""
|
||||
|
||||
if _share_objects is None:
|
||||
direct_log(f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}", level="ERROR")
|
||||
direct_log(
|
||||
f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}",
|
||||
level="ERROR",
|
||||
)
|
||||
raise ValueError("Shared dictionaries not initialized")
|
||||
|
||||
lock = _get_global_lock()
|
||||
@@ -123,14 +145,20 @@ def get_namespace_object(namespace: str) -> Any:
|
||||
_share_objects[namespace] = _manager.Value("O", None)
|
||||
else:
|
||||
_share_objects[namespace] = None
|
||||
direct_log(f"Created namespace({namespace}): type={type(_share_objects[namespace])}, pid={os.getpid()}")
|
||||
direct_log(
|
||||
f"Created namespace({namespace}): type={type(_share_objects[namespace])}, pid={os.getpid()}"
|
||||
)
|
||||
|
||||
return _share_objects[namespace]
|
||||
|
||||
|
||||
def get_namespace_data(namespace: str) -> Dict[str, Any]:
|
||||
"""get storage space for specific storage type(namespace)"""
|
||||
if _shared_dicts is None:
|
||||
direct_log(f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}", level="ERROR")
|
||||
direct_log(
|
||||
f"Error: try to getnanmespace before Shared-Data is initialized, pid={os.getpid()}",
|
||||
level="ERROR",
|
||||
)
|
||||
raise ValueError("Shared dictionaries not initialized")
|
||||
|
||||
lock = _get_global_lock()
|
||||
@@ -140,8 +168,10 @@ def get_namespace_data(namespace: str) -> Dict[str, Any]:
|
||||
_shared_dicts[namespace] = _manager.dict()
|
||||
else:
|
||||
_shared_dicts[namespace] = {}
|
||||
direct_log(f"Created namespace({namespace}): type={type(_shared_dicts[namespace])}, pid={os.getpid()}")
|
||||
|
||||
direct_log(
|
||||
f"Created namespace({namespace}): type={type(_shared_dicts[namespace])}, pid={os.getpid()}"
|
||||
)
|
||||
|
||||
return _shared_dicts[namespace]
|
||||
|
||||
|
||||
@@ -153,22 +183,33 @@ def get_scan_progress() -> Dict[str, Any]:
|
||||
def finalize_share_data():
|
||||
"""
|
||||
Release shared resources and clean up.
|
||||
|
||||
|
||||
This function should be called when the application is shutting down
|
||||
to properly release shared resources and avoid memory leaks.
|
||||
|
||||
|
||||
In multi-process mode, it shuts down the Manager and releases all shared objects.
|
||||
In single-process mode, it simply resets the global variables.
|
||||
"""
|
||||
global _manager, is_multiprocess, _global_lock, _shared_dicts, _share_objects, _init_flags, _initialized
|
||||
|
||||
global \
|
||||
_manager, \
|
||||
is_multiprocess, \
|
||||
_global_lock, \
|
||||
_shared_dicts, \
|
||||
_share_objects, \
|
||||
_init_flags, \
|
||||
_initialized
|
||||
|
||||
# Check if already initialized
|
||||
if not _initialized:
|
||||
direct_log(f"Process {os.getpid()} storage data not initialized, nothing to finalize")
|
||||
direct_log(
|
||||
f"Process {os.getpid()} storage data not initialized, nothing to finalize"
|
||||
)
|
||||
return
|
||||
|
||||
direct_log(f"Process {os.getpid()} finalizing storage data (multiprocess={is_multiprocess})")
|
||||
|
||||
|
||||
direct_log(
|
||||
f"Process {os.getpid()} finalizing storage data (multiprocess={is_multiprocess})"
|
||||
)
|
||||
|
||||
# In multi-process mode, shut down the Manager
|
||||
if is_multiprocess and _manager is not None:
|
||||
try:
|
||||
@@ -179,13 +220,15 @@ def finalize_share_data():
|
||||
_share_objects.clear()
|
||||
if _init_flags is not None:
|
||||
_init_flags.clear()
|
||||
|
||||
|
||||
# Shut down the Manager
|
||||
_manager.shutdown()
|
||||
direct_log(f"Process {os.getpid()} Manager shutdown complete")
|
||||
except Exception as e:
|
||||
direct_log(f"Process {os.getpid()} Error shutting down Manager: {e}", level="ERROR")
|
||||
|
||||
direct_log(
|
||||
f"Process {os.getpid()} Error shutting down Manager: {e}", level="ERROR"
|
||||
)
|
||||
|
||||
# Reset global variables
|
||||
_manager = None
|
||||
_initialized = None
|
||||
@@ -194,5 +237,5 @@ def finalize_share_data():
|
||||
_share_objects = None
|
||||
_init_flags = None
|
||||
_global_lock = None
|
||||
|
||||
|
||||
direct_log(f"Process {os.getpid()} storage data finalization complete")
|
||||
|
@@ -271,12 +271,17 @@ class LightRAG:
|
||||
set_logger(self.log_file_path, self.log_level)
|
||||
logger.info(f"Logger initialized for working directory: {self.working_dir}")
|
||||
|
||||
from lightrag.kg.shared_storage import initialize_share_data, try_initialize_namespace, get_namespace_data
|
||||
from lightrag.kg.shared_storage import (
|
||||
initialize_share_data,
|
||||
try_initialize_namespace,
|
||||
get_namespace_data,
|
||||
)
|
||||
|
||||
initialize_share_data()
|
||||
|
||||
need_init = try_initialize_namespace("scan_progress")
|
||||
need_init = try_initialize_namespace("scan_progress")
|
||||
scan_progress = get_namespace_data("scan_progress")
|
||||
logger.info(f"scan_progress type after init: {type(scan_progress)}")
|
||||
logger.info(f"scan_progress type after init: {type(scan_progress)}")
|
||||
scan_progress.update(
|
||||
{
|
||||
"is_scanning": False,
|
||||
@@ -286,9 +291,6 @@ class LightRAG:
|
||||
"progress": 0,
|
||||
}
|
||||
)
|
||||
scan_progress = get_namespace_data("scan_progress")
|
||||
logger.info(f"scan_progress type after update: {type(scan_progress)}")
|
||||
logger.info(f"Scan_progres value after update: {scan_progress}")
|
||||
|
||||
if not os.path.exists(self.working_dir):
|
||||
logger.info(f"Creating working directory {self.working_dir}")
|
||||
|
Reference in New Issue
Block a user