From 1ca6837219ea38c512a3cf13504c930c3cddf162 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 2 Mar 2025 12:52:25 +0800 Subject: [PATCH 1/7] Add max nodes limit for graph retrieval of networkX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Set MAX_GRAPH_NODES env var (default 1000) • Change edge type to "RELATED" --- .env.example | 1 + lightrag/api/routers/graph_routes.py | 19 +++++++++++++++++-- lightrag/kg/networkx_impl.py | 14 ++++++++------ 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/.env.example b/.env.example index de9b6452..70cb575c 100644 --- a/.env.example +++ b/.env.example @@ -3,6 +3,7 @@ # PORT=9621 # WORKERS=1 # NAMESPACE_PREFIX=lightrag # separating data from difference Lightrag instances +# MAX_GRAPH_NODES=1000 # Max nodes return from grap retrieval # CORS_ORIGINS=http://localhost:3000,http://localhost:8080 ### Optional SSL Configuration diff --git a/lightrag/api/routers/graph_routes.py b/lightrag/api/routers/graph_routes.py index 95a72758..aa1803c2 100644 --- a/lightrag/api/routers/graph_routes.py +++ b/lightrag/api/routers/graph_routes.py @@ -16,12 +16,27 @@ def create_graph_routes(rag, api_key: Optional[str] = None): @router.get("/graph/label/list", dependencies=[Depends(optional_api_key)]) async def get_graph_labels(): - """Get all graph labels""" + """ + Get all graph labels + + Returns: + List[str]: List of graph labels + """ return await rag.get_graph_labels() @router.get("/graphs", dependencies=[Depends(optional_api_key)]) async def get_knowledge_graph(label: str, max_depth: int = 3): - """Get knowledge graph for a specific label""" + """ + Get knowledge graph for a specific label. + Maximum number of nodes is limited to env MAX_GRAPH_NODES(default: 1000) + + Args: + label (str): Label to get knowledge graph for + max_depth (int, optional): Maximum depth of graph. Defaults to 3. + + Returns: + Dict[str, List[str]]: Knowledge graph for label + """ return await rag.get_knowledge_graph(node_label=label, max_depth=max_depth) return router diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index f11e9c0e..b1cc45fe 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -24,6 +24,8 @@ from .shared_storage import ( is_multiprocess, ) +MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000)) + @final @dataclass @@ -234,6 +236,7 @@ class NetworkXStorage(BaseGraphStorage): ) -> KnowledgeGraph: """ Get complete connected subgraph for specified node (including the starting node itself) + Maximum number of nodes is limited to env MAX_GRAPH_NODES(default: 1000) Args: node_label: Label of the starting node @@ -269,18 +272,17 @@ class NetworkXStorage(BaseGraphStorage): subgraph = nx.ego_graph(graph, nodes_to_explore[0], radius=max_depth) # Check if number of nodes exceeds max_graph_nodes - max_graph_nodes = 500 - if len(subgraph.nodes()) > max_graph_nodes: + if len(subgraph.nodes()) > MAX_GRAPH_NODES: origin_nodes = len(subgraph.nodes()) node_degrees = dict(subgraph.degree()) top_nodes = sorted(node_degrees.items(), key=lambda x: x[1], reverse=True)[ - :max_graph_nodes + :MAX_GRAPH_NODES ] top_node_ids = [node[0] for node in top_nodes] - # Create new subgraph with only top nodes + # Create new subgraph and keep nodes only with most degree subgraph = subgraph.subgraph(top_node_ids) logger.info( - f"Reduced graph from {origin_nodes} nodes to {max_graph_nodes} nodes (depth={max_depth})" + f"Reduced graph from {origin_nodes} nodes to {MAX_GRAPH_NODES} nodes (depth={max_depth})" ) # Add nodes to result @@ -320,7 +322,7 @@ class NetworkXStorage(BaseGraphStorage): result.edges.append( KnowledgeGraphEdge( id=edge_id, - type="DIRECTED", + type="RELATED", source=str(source), target=str(target), properties=edge_data, From 0f1eb42c8dd7e2440f6c4f1c18afbfc37ad2b9c0 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 2 Mar 2025 15:39:14 +0800 Subject: [PATCH 2/7] Add node limit and prioritization for knowledge graph retrieval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Add MAX_GRAPH_NODES limit from env var • Prioritize nodes by label match & connection --- lightrag/kg/neo4j_impl.py | 56 ++++++++++++++++++++++++++++-------- lightrag/kg/networkx_impl.py | 47 +++++++++++++++++++++++++++--- 2 files changed, 87 insertions(+), 16 deletions(-) diff --git a/lightrag/kg/neo4j_impl.py b/lightrag/kg/neo4j_impl.py index f5c2237a..2fb2c494 100644 --- a/lightrag/kg/neo4j_impl.py +++ b/lightrag/kg/neo4j_impl.py @@ -23,7 +23,7 @@ import pipmaster as pm if not pm.is_installed("neo4j"): pm.install("neo4j") -from neo4j import ( +from neo4j import ( # type: ignore AsyncGraphDatabase, exceptions as neo4jExceptions, AsyncDriver, @@ -34,6 +34,9 @@ from neo4j import ( config = configparser.ConfigParser() config.read("config.ini", "utf-8") +# 从环境变量获取最大图节点数,默认为1000 +MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000)) + @final @dataclass @@ -471,12 +474,17 @@ class Neo4JStorage(BaseGraphStorage): ) -> KnowledgeGraph: """ Get complete connected subgraph for specified node (including the starting node itself) + Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000). + When reducing the number of nodes, the prioritization criteria are as follows: + 1. Label matching nodes take precedence + 2. Followed by nodes directly connected to the matching nodes + 3. Finally, the degree of the nodes - Key fixes: - 1. Include the starting node itself - 2. Handle multi-label nodes - 3. Clarify relationship directions - 4. Add depth control + Args: + node_label (str): Label of the starting node + max_depth (int, optional): Maximum depth of the graph. Defaults to 5. + Returns: + KnowledgeGraph: Complete connected subgraph for specified node """ label = node_label.strip('"') result = KnowledgeGraph() @@ -485,14 +493,22 @@ class Neo4JStorage(BaseGraphStorage): async with self._driver.session(database=self._DATABASE) as session: try: - main_query = "" if label == "*": main_query = """ MATCH (n) - WITH collect(DISTINCT n) AS nodes - MATCH ()-[r]-() - RETURN nodes, collect(DISTINCT r) AS relationships; + OPTIONAL MATCH (n)-[r]-() + WITH n, count(r) AS degree + ORDER BY degree DESC + LIMIT $max_nodes + WITH collect(n) AS nodes + MATCH (a)-[r]->(b) + WHERE a IN nodes AND b IN nodes + RETURN nodes, collect(DISTINCT r) AS relationships """ + result_set = await session.run( + main_query, {"max_nodes": MAX_GRAPH_NODES} + ) + else: # Critical debug step: first verify if starting node exists validate_query = f"MATCH (n:`{label}`) RETURN n LIMIT 1" @@ -512,9 +528,25 @@ class Neo4JStorage(BaseGraphStorage): bfs: true }}) YIELD nodes, relationships - RETURN nodes, relationships + WITH start, nodes, relationships + UNWIND nodes AS node + OPTIONAL MATCH (node)-[r]-() + WITH node, count(r) AS degree, start, nodes, relationships, + CASE + WHEN id(node) = id(start) THEN 2 + WHEN EXISTS((start)-->(node)) OR EXISTS((node)-->(start)) THEN 1 + ELSE 0 + END AS priority + ORDER BY priority DESC, degree DESC + LIMIT $max_nodes + WITH collect(node) AS filtered_nodes, nodes, relationships + RETURN filtered_nodes AS nodes, + [rel IN relationships WHERE startNode(rel) IN filtered_nodes AND endNode(rel) IN filtered_nodes] AS relationships """ - result_set = await session.run(main_query) + result_set = await session.run( + main_query, {"max_nodes": MAX_GRAPH_NODES} + ) + record = await result_set.single() if record: diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index b1cc45fe..462fb832 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -236,7 +236,11 @@ class NetworkXStorage(BaseGraphStorage): ) -> KnowledgeGraph: """ Get complete connected subgraph for specified node (including the starting node itself) - Maximum number of nodes is limited to env MAX_GRAPH_NODES(default: 1000) + Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000). + When reducing the number of nodes, the prioritization criteria are as follows: + 1. Label matching nodes take precedence + 2. Followed by nodes directly connected to the matching nodes + 3. Finally, the degree of the nodes Args: node_label: Label of the starting node @@ -268,14 +272,49 @@ class NetworkXStorage(BaseGraphStorage): logger.warning(f"No nodes found with label {node_label}") return result - # Get subgraph using ego_graph - subgraph = nx.ego_graph(graph, nodes_to_explore[0], radius=max_depth) + # Get subgraph using ego_graph from all matching nodes + combined_subgraph = nx.Graph() + for start_node in nodes_to_explore: + node_subgraph = nx.ego_graph(graph, start_node, radius=max_depth) + combined_subgraph = nx.compose(combined_subgraph, node_subgraph) + subgraph = combined_subgraph # Check if number of nodes exceeds max_graph_nodes if len(subgraph.nodes()) > MAX_GRAPH_NODES: origin_nodes = len(subgraph.nodes()) + + # 获取节点度数 node_degrees = dict(subgraph.degree()) - top_nodes = sorted(node_degrees.items(), key=lambda x: x[1], reverse=True)[ + + # 标记起点节点和直接连接的节点 + start_nodes = set() + direct_connected_nodes = set() + + if node_label != "*" and nodes_to_explore: + # 所有在 nodes_to_explore 中的节点都是起点节点 + start_nodes = set(nodes_to_explore) + + # 获取与所有起点直接连接的节点 + for start_node in start_nodes: + direct_connected_nodes.update(subgraph.neighbors(start_node)) + + # 从直接连接节点中移除起点节点(避免重复) + direct_connected_nodes -= start_nodes + + # 按优先级和度数排序 + def priority_key(node_item): + node, degree = node_item + # 优先级排序:起点(2) > 直接连接(1) > 其他节点(0) + if node in start_nodes: + priority = 2 + elif node in direct_connected_nodes: + priority = 1 + else: + priority = 0 + return (priority, degree) # 先按优先级,再按度数 + + # 排序并选择前MAX_GRAPH_NODES个节点 + top_nodes = sorted(node_degrees.items(), key=priority_key, reverse=True)[ :MAX_GRAPH_NODES ] top_node_ids = [node[0] for node in top_nodes] From 68bf02abb6224ba1212af0ada9da54c23b0b3185 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 2 Mar 2025 16:20:37 +0800 Subject: [PATCH 3/7] refactor: improve graph querying with label substring matching and security fixes --- lightrag/kg/neo4j_impl.py | 26 ++++++++++++++++---------- lightrag/kg/networkx_impl.py | 2 +- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/lightrag/kg/neo4j_impl.py b/lightrag/kg/neo4j_impl.py index 2fb2c494..8052b1f7 100644 --- a/lightrag/kg/neo4j_impl.py +++ b/lightrag/kg/neo4j_impl.py @@ -34,7 +34,7 @@ from neo4j import ( # type: ignore config = configparser.ConfigParser() config.read("config.ini", "utf-8") -# 从环境变量获取最大图节点数,默认为1000 +# Get maximum number of graph nodes from environment variable, default is 1000 MAX_GRAPH_NODES = int(os.getenv("MAX_GRAPH_NODES", 1000)) @@ -473,20 +473,22 @@ class Neo4JStorage(BaseGraphStorage): self, node_label: str, max_depth: int = 5 ) -> KnowledgeGraph: """ - Get complete connected subgraph for specified node (including the starting node itself) + Retrieve a connected subgraph of nodes where the label includes the specified `node_label`. Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000). When reducing the number of nodes, the prioritization criteria are as follows: - 1. Label matching nodes take precedence + 1. Label matching nodes take precedence (nodes containing the specified label string) 2. Followed by nodes directly connected to the matching nodes 3. Finally, the degree of the nodes Args: - node_label (str): Label of the starting node + node_label (str): String to match in node labels (will match any node containing this string in its label) max_depth (int, optional): Maximum depth of the graph. Defaults to 5. Returns: KnowledgeGraph: Complete connected subgraph for specified node """ label = node_label.strip('"') + # Escape single quotes to prevent injection attacks + escaped_label = label.replace("'", "\\'") result = KnowledgeGraph() seen_nodes = set() seen_edges = set() @@ -510,16 +512,20 @@ class Neo4JStorage(BaseGraphStorage): ) else: - # Critical debug step: first verify if starting node exists - validate_query = f"MATCH (n:`{label}`) RETURN n LIMIT 1" + validate_query = f""" + MATCH (n) + WHERE any(label IN labels(n) WHERE label CONTAINS '{escaped_label}') + RETURN n LIMIT 1 + """ validate_result = await session.run(validate_query) if not await validate_result.single(): - logger.warning(f"Starting node {label} does not exist!") + logger.warning(f"No nodes containing '{label}' in their labels found!") return result - # Optimized query (including direction handling and self-loops) + # Main query uses partial matching main_query = f""" - MATCH (start:`{label}`) + MATCH (start) + WHERE any(label IN labels(start) WHERE label CONTAINS '{escaped_label}') WITH start CALL apoc.path.subgraphAll(start, {{ relationshipFilter: '>', @@ -598,7 +604,7 @@ class Neo4JStorage(BaseGraphStorage): result = {"nodes": [], "edges": []} visited_nodes = set() visited_edges = set() - + async def traverse(current_label: str, current_depth: int): if current_depth > max_depth: return diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index 462fb832..92d36fa6 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -235,7 +235,7 @@ class NetworkXStorage(BaseGraphStorage): self, node_label: str, max_depth: int = 5 ) -> KnowledgeGraph: """ - Get complete connected subgraph for specified node (including the starting node itself) + Retrieve a connected subgraph of nodes where the label includes the specified `node_label`. Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000). When reducing the number of nodes, the prioritization criteria are as follows: 1. Label matching nodes take precedence From 465737efed6e0d4b81854d87a142762a9d631b98 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 2 Mar 2025 17:32:25 +0800 Subject: [PATCH 4/7] Fix linting --- lightrag/api/routers/graph_routes.py | 7 ++++++- lightrag/kg/neo4j_impl.py | 8 +++++--- lightrag/kg/networkx_impl.py | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/lightrag/api/routers/graph_routes.py b/lightrag/api/routers/graph_routes.py index aa1803c2..e6f894a2 100644 --- a/lightrag/api/routers/graph_routes.py +++ b/lightrag/api/routers/graph_routes.py @@ -27,7 +27,12 @@ def create_graph_routes(rag, api_key: Optional[str] = None): @router.get("/graphs", dependencies=[Depends(optional_api_key)]) async def get_knowledge_graph(label: str, max_depth: int = 3): """ - Get knowledge graph for a specific label. + Retrieve a connected subgraph of nodes where the label includes the specified label. + Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000). + When reducing the number of nodes, the prioritization criteria are as follows: + 1. Label matching nodes take precedence + 2. Followed by nodes directly connected to the matching nodes + 3. Finally, the degree of the nodes Maximum number of nodes is limited to env MAX_GRAPH_NODES(default: 1000) Args: diff --git a/lightrag/kg/neo4j_impl.py b/lightrag/kg/neo4j_impl.py index 8052b1f7..dccee330 100644 --- a/lightrag/kg/neo4j_impl.py +++ b/lightrag/kg/neo4j_impl.py @@ -475,7 +475,7 @@ class Neo4JStorage(BaseGraphStorage): """ Retrieve a connected subgraph of nodes where the label includes the specified `node_label`. Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000). - When reducing the number of nodes, the prioritization criteria are as follows: + When reducing the number of nodes, the prioritization criteria are as follows: 1. Label matching nodes take precedence (nodes containing the specified label string) 2. Followed by nodes directly connected to the matching nodes 3. Finally, the degree of the nodes @@ -519,7 +519,9 @@ class Neo4JStorage(BaseGraphStorage): """ validate_result = await session.run(validate_query) if not await validate_result.single(): - logger.warning(f"No nodes containing '{label}' in their labels found!") + logger.warning( + f"No nodes containing '{label}' in their labels found!" + ) return result # Main query uses partial matching @@ -604,7 +606,7 @@ class Neo4JStorage(BaseGraphStorage): result = {"nodes": [], "edges": []} visited_nodes = set() visited_edges = set() - + async def traverse(current_label: str, current_depth: int): if current_depth > max_depth: return diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index 92d36fa6..9601a35e 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -237,7 +237,7 @@ class NetworkXStorage(BaseGraphStorage): """ Retrieve a connected subgraph of nodes where the label includes the specified `node_label`. Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000). - When reducing the number of nodes, the prioritization criteria are as follows: + When reducing the number of nodes, the prioritization criteria are as follows: 1. Label matching nodes take precedence 2. Followed by nodes directly connected to the matching nodes 3. Finally, the degree of the nodes From 11fdb60fe5ef30ec6cb447f6762f8cad1ff67b0b Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 3 Mar 2025 01:30:41 +0800 Subject: [PATCH 5/7] Remove Chinese comments --- lightrag/kg/networkx_impl.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index 9601a35e..563fc554 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -283,37 +283,32 @@ class NetworkXStorage(BaseGraphStorage): if len(subgraph.nodes()) > MAX_GRAPH_NODES: origin_nodes = len(subgraph.nodes()) - # 获取节点度数 node_degrees = dict(subgraph.degree()) - # 标记起点节点和直接连接的节点 start_nodes = set() direct_connected_nodes = set() if node_label != "*" and nodes_to_explore: - # 所有在 nodes_to_explore 中的节点都是起点节点 start_nodes = set(nodes_to_explore) - - # 获取与所有起点直接连接的节点 + # Get nodes directly connected to all start nodes for start_node in start_nodes: direct_connected_nodes.update(subgraph.neighbors(start_node)) - # 从直接连接节点中移除起点节点(避免重复) + # Remove start nodes from directly connected nodes (avoid duplicates) direct_connected_nodes -= start_nodes - # 按优先级和度数排序 def priority_key(node_item): node, degree = node_item - # 优先级排序:起点(2) > 直接连接(1) > 其他节点(0) + # Priority order: start(2) > directly connected(1) > other nodes(0) if node in start_nodes: priority = 2 elif node in direct_connected_nodes: priority = 1 else: priority = 0 - return (priority, degree) # 先按优先级,再按度数 + return (priority, degree) - # 排序并选择前MAX_GRAPH_NODES个节点 + # Sort by priority and degree and select top MAX_GRAPH_NODES nodes top_nodes = sorted(node_degrees.items(), key=priority_key, reverse=True)[ :MAX_GRAPH_NODES ] From c21d5744f9f7abb5b2058f8ff4007ef54d7c58e7 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 3 Mar 2025 02:05:54 +0800 Subject: [PATCH 6/7] Remove duplicated run_with_gunicorn.py --- run_with_gunicorn.py | 203 ------------------------------------------- 1 file changed, 203 deletions(-) delete mode 100755 run_with_gunicorn.py diff --git a/run_with_gunicorn.py b/run_with_gunicorn.py deleted file mode 100755 index 2e4e3cf7..00000000 --- a/run_with_gunicorn.py +++ /dev/null @@ -1,203 +0,0 @@ -#!/usr/bin/env python -""" -Start LightRAG server with Gunicorn -""" - -import os -import sys -import signal -import pipmaster as pm -from lightrag.api.utils_api import parse_args, display_splash_screen -from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data - - -def check_and_install_dependencies(): - """Check and install required dependencies""" - required_packages = [ - "gunicorn", - "tiktoken", - "psutil", - # Add other required packages here - ] - - for package in required_packages: - if not pm.is_installed(package): - print(f"Installing {package}...") - pm.install(package) - print(f"{package} installed successfully") - - -# Signal handler for graceful shutdown -def signal_handler(sig, frame): - print("\n\n" + "=" * 80) - print("RECEIVED TERMINATION SIGNAL") - print(f"Process ID: {os.getpid()}") - print("=" * 80 + "\n") - - # Release shared resources - finalize_share_data() - - # Exit with success status - sys.exit(0) - - -def main(): - # Check and install dependencies - check_and_install_dependencies() - - # Register signal handlers for graceful shutdown - signal.signal(signal.SIGINT, signal_handler) # Ctrl+C - signal.signal(signal.SIGTERM, signal_handler) # kill command - - # Parse all arguments using parse_args - args = parse_args(is_uvicorn_mode=False) - - # Display startup information - display_splash_screen(args) - - print("🚀 Starting LightRAG with Gunicorn") - print(f"🔄 Worker management: Gunicorn (workers={args.workers})") - print("🔍 Preloading app: Enabled") - print("📝 Note: Using Gunicorn's preload feature for shared data initialization") - print("\n\n" + "=" * 80) - print("MAIN PROCESS INITIALIZATION") - print(f"Process ID: {os.getpid()}") - print(f"Workers setting: {args.workers}") - print("=" * 80 + "\n") - - # Import Gunicorn's StandaloneApplication - from gunicorn.app.base import BaseApplication - - # Define a custom application class that loads our config - class GunicornApp(BaseApplication): - def __init__(self, app, options=None): - self.options = options or {} - self.application = app - super().__init__() - - def load_config(self): - # Define valid Gunicorn configuration options - valid_options = { - "bind", - "workers", - "worker_class", - "timeout", - "keepalive", - "preload_app", - "errorlog", - "accesslog", - "loglevel", - "certfile", - "keyfile", - "limit_request_line", - "limit_request_fields", - "limit_request_field_size", - "graceful_timeout", - "max_requests", - "max_requests_jitter", - } - - # Special hooks that need to be set separately - special_hooks = { - "on_starting", - "on_reload", - "on_exit", - "pre_fork", - "post_fork", - "pre_exec", - "pre_request", - "post_request", - "worker_init", - "worker_exit", - "nworkers_changed", - "child_exit", - } - - # Import and configure the gunicorn_config module - import gunicorn_config - - # Set configuration variables in gunicorn_config, prioritizing command line arguments - gunicorn_config.workers = ( - args.workers if args.workers else int(os.getenv("WORKERS", 1)) - ) - - # Bind configuration prioritizes command line arguments - host = args.host if args.host != "0.0.0.0" else os.getenv("HOST", "0.0.0.0") - port = args.port if args.port != 9621 else int(os.getenv("PORT", 9621)) - gunicorn_config.bind = f"{host}:{port}" - - # Log level configuration prioritizes command line arguments - gunicorn_config.loglevel = ( - args.log_level.lower() - if args.log_level - else os.getenv("LOG_LEVEL", "info") - ) - - # Timeout configuration prioritizes command line arguments - gunicorn_config.timeout = ( - args.timeout if args.timeout else int(os.getenv("TIMEOUT", 150)) - ) - - # Keepalive configuration - gunicorn_config.keepalive = int(os.getenv("KEEPALIVE", 5)) - - # SSL configuration prioritizes command line arguments - if args.ssl or os.getenv("SSL", "").lower() in ( - "true", - "1", - "yes", - "t", - "on", - ): - gunicorn_config.certfile = ( - args.ssl_certfile - if args.ssl_certfile - else os.getenv("SSL_CERTFILE") - ) - gunicorn_config.keyfile = ( - args.ssl_keyfile if args.ssl_keyfile else os.getenv("SSL_KEYFILE") - ) - - # Set configuration options from the module - for key in dir(gunicorn_config): - if key in valid_options: - value = getattr(gunicorn_config, key) - # Skip functions like on_starting and None values - if not callable(value) and value is not None: - self.cfg.set(key, value) - # Set special hooks - elif key in special_hooks: - value = getattr(gunicorn_config, key) - if callable(value): - self.cfg.set(key, value) - - if hasattr(gunicorn_config, "logconfig_dict"): - self.cfg.set( - "logconfig_dict", getattr(gunicorn_config, "logconfig_dict") - ) - - def load(self): - # Import the application - from lightrag.api.lightrag_server import get_application - - return get_application(args) - - # Create the application - app = GunicornApp("") - - # Force workers to be an integer and greater than 1 for multi-process mode - workers_count = int(args.workers) - if workers_count > 1: - # Set a flag to indicate we're in the main process - os.environ["LIGHTRAG_MAIN_PROCESS"] = "1" - initialize_share_data(workers_count) - else: - initialize_share_data(1) - - # Run the application - print("\nStarting Gunicorn with direct Python API...") - app.run() - - -if __name__ == "__main__": - main() From 0ea274a30dce6627986a4f40d51912f7c462b0bf Mon Sep 17 00:00:00 2001 From: MdNazishArmanShorthillsAI Date: Mon, 3 Mar 2025 13:53:45 +0530 Subject: [PATCH 7/7] Improved cashing check --- lightrag/lightrag.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 4f1ad7dc..04f66adc 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -1150,7 +1150,7 @@ class LightRAG: """ if param.mode in ["local", "global", "hybrid"]: response = await kg_query( - query, + query.strip(), self.chunk_entity_relation_graph, self.entities_vdb, self.relationships_vdb, @@ -1171,7 +1171,7 @@ class LightRAG: ) elif param.mode == "naive": response = await naive_query( - query, + query.strip(), self.chunks_vdb, self.text_chunks, param, @@ -1190,7 +1190,7 @@ class LightRAG: ) elif param.mode == "mix": response = await mix_kg_vector_query( - query, + query.strip(), self.chunk_entity_relation_graph, self.entities_vdb, self.relationships_vdb,