Merge pull request #1003 from danielaskdd/add-graph-search-mode
Feat: Added minimum degree filter for graph queries
This commit is contained in:
@@ -3,7 +3,6 @@ This module contains all graph-related routes for the LightRAG API.
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import APIRouter, Depends
|
||||
|
||||
from ..utils_api import get_api_key_dependency, get_auth_dependency
|
||||
@@ -25,23 +24,33 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
|
||||
return await rag.get_graph_labels()
|
||||
|
||||
@router.get("/graphs", dependencies=[Depends(optional_api_key)])
|
||||
async def get_knowledge_graph(label: str, max_depth: int = 3):
|
||||
async def get_knowledge_graph(
|
||||
label: str, max_depth: int = 3, min_degree: int = 0, inclusive: bool = False
|
||||
):
|
||||
"""
|
||||
Retrieve a connected subgraph of nodes where the label includes the specified label.
|
||||
Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000).
|
||||
When reducing the number of nodes, the prioritization criteria are as follows:
|
||||
1. Label matching nodes take precedence
|
||||
2. Followed by nodes directly connected to the matching nodes
|
||||
3. Finally, the degree of the nodes
|
||||
1. min_degree does not affect nodes directly connected to the matching nodes
|
||||
2. Label matching nodes take precedence
|
||||
3. Followed by nodes directly connected to the matching nodes
|
||||
4. Finally, the degree of the nodes
|
||||
Maximum number of nodes is limited to env MAX_GRAPH_NODES(default: 1000)
|
||||
|
||||
Args:
|
||||
label (str): Label to get knowledge graph for
|
||||
max_depth (int, optional): Maximum depth of graph. Defaults to 3.
|
||||
inclusive_search (bool, optional): If True, search for nodes that include the label. Defaults to False.
|
||||
min_degree (int, optional): Minimum degree of nodes. Defaults to 0.
|
||||
|
||||
Returns:
|
||||
Dict[str, List[str]]: Knowledge graph for label
|
||||
"""
|
||||
return await rag.get_knowledge_graph(node_label=label, max_depth=max_depth)
|
||||
return await rag.get_knowledge_graph(
|
||||
node_label=label,
|
||||
max_depth=max_depth,
|
||||
inclusive=inclusive,
|
||||
min_degree=min_degree,
|
||||
)
|
||||
|
||||
return router
|
||||
|
1
lightrag/api/webui/assets/index-CH-3l4_Z.css
Normal file
1
lightrag/api/webui/assets/index-CH-3l4_Z.css
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -5,8 +5,8 @@
|
||||
<link rel="icon" type="image/svg+xml" href="./logo.png" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Lightrag</title>
|
||||
<script type="module" crossorigin src="./assets/index-DbuMPJAD.js"></script>
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-rP-YlyR1.css">
|
||||
<script type="module" crossorigin src="./assets/index-CJz72b6Q.js"></script>
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-CH-3l4_Z.css">
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
|
@@ -204,7 +204,7 @@ class BaseGraphStorage(StorageNameSpace, ABC):
|
||||
|
||||
@abstractmethod
|
||||
async def get_knowledge_graph(
|
||||
self, node_label: str, max_depth: int = 5
|
||||
self, node_label: str, max_depth: int = 3
|
||||
) -> KnowledgeGraph:
|
||||
"""Retrieve a subgraph of the knowledge graph starting from a given node."""
|
||||
|
||||
|
@@ -232,19 +232,26 @@ class NetworkXStorage(BaseGraphStorage):
|
||||
return sorted(list(labels))
|
||||
|
||||
async def get_knowledge_graph(
|
||||
self, node_label: str, max_depth: int = 5
|
||||
self,
|
||||
node_label: str,
|
||||
max_depth: int = 3,
|
||||
min_degree: int = 0,
|
||||
inclusive: bool = False,
|
||||
) -> KnowledgeGraph:
|
||||
"""
|
||||
Retrieve a connected subgraph of nodes where the label includes the specified `node_label`.
|
||||
Maximum number of nodes is constrained by the environment variable `MAX_GRAPH_NODES` (default: 1000).
|
||||
When reducing the number of nodes, the prioritization criteria are as follows:
|
||||
1. Label matching nodes take precedence
|
||||
2. Followed by nodes directly connected to the matching nodes
|
||||
3. Finally, the degree of the nodes
|
||||
1. min_degree does not affect nodes directly connected to the matching nodes
|
||||
2. Label matching nodes take precedence
|
||||
3. Followed by nodes directly connected to the matching nodes
|
||||
4. Finally, the degree of the nodes
|
||||
|
||||
Args:
|
||||
node_label: Label of the starting node
|
||||
max_depth: Maximum depth of the subgraph
|
||||
min_degree: Minimum degree of nodes to include. Defaults to 0
|
||||
inclusive: Do an inclusive search if true
|
||||
|
||||
Returns:
|
||||
KnowledgeGraph object containing nodes and edges
|
||||
@@ -255,6 +262,10 @@ class NetworkXStorage(BaseGraphStorage):
|
||||
|
||||
graph = await self._get_graph()
|
||||
|
||||
# Initialize sets for start nodes and direct connected nodes
|
||||
start_nodes = set()
|
||||
direct_connected_nodes = set()
|
||||
|
||||
# Handle special case for "*" label
|
||||
if node_label == "*":
|
||||
# For "*", return the entire graph including all nodes and edges
|
||||
@@ -262,11 +273,16 @@ class NetworkXStorage(BaseGraphStorage):
|
||||
graph.copy()
|
||||
) # Create a copy to avoid modifying the original graph
|
||||
else:
|
||||
# Find nodes with matching node id (partial match)
|
||||
# Find nodes with matching node id based on search_mode
|
||||
nodes_to_explore = []
|
||||
for n, attr in graph.nodes(data=True):
|
||||
if node_label in str(n): # Use partial matching
|
||||
nodes_to_explore.append(n)
|
||||
node_str = str(n)
|
||||
if not inclusive:
|
||||
if node_label == node_str: # Use exact matching
|
||||
nodes_to_explore.append(n)
|
||||
else: # inclusive mode
|
||||
if node_label in node_str: # Use partial matching
|
||||
nodes_to_explore.append(n)
|
||||
|
||||
if not nodes_to_explore:
|
||||
logger.warning(f"No nodes found with label {node_label}")
|
||||
@@ -277,26 +293,37 @@ class NetworkXStorage(BaseGraphStorage):
|
||||
for start_node in nodes_to_explore:
|
||||
node_subgraph = nx.ego_graph(graph, start_node, radius=max_depth)
|
||||
combined_subgraph = nx.compose(combined_subgraph, node_subgraph)
|
||||
|
||||
# Get start nodes and direct connected nodes
|
||||
if nodes_to_explore:
|
||||
start_nodes = set(nodes_to_explore)
|
||||
# Get nodes directly connected to all start nodes
|
||||
for start_node in start_nodes:
|
||||
direct_connected_nodes.update(
|
||||
combined_subgraph.neighbors(start_node)
|
||||
)
|
||||
|
||||
# Remove start nodes from directly connected nodes (avoid duplicates)
|
||||
direct_connected_nodes -= start_nodes
|
||||
|
||||
subgraph = combined_subgraph
|
||||
|
||||
# Filter nodes based on min_degree, but keep start nodes and direct connected nodes
|
||||
if min_degree > 0:
|
||||
nodes_to_keep = [
|
||||
node
|
||||
for node, degree in subgraph.degree()
|
||||
if node in start_nodes
|
||||
or node in direct_connected_nodes
|
||||
or degree >= min_degree
|
||||
]
|
||||
subgraph = subgraph.subgraph(nodes_to_keep)
|
||||
|
||||
# Check if number of nodes exceeds max_graph_nodes
|
||||
if len(subgraph.nodes()) > MAX_GRAPH_NODES:
|
||||
origin_nodes = len(subgraph.nodes())
|
||||
|
||||
node_degrees = dict(subgraph.degree())
|
||||
|
||||
start_nodes = set()
|
||||
direct_connected_nodes = set()
|
||||
|
||||
if node_label != "*" and nodes_to_explore:
|
||||
start_nodes = set(nodes_to_explore)
|
||||
# Get nodes directly connected to all start nodes
|
||||
for start_node in start_nodes:
|
||||
direct_connected_nodes.update(subgraph.neighbors(start_node))
|
||||
|
||||
# Remove start nodes from directly connected nodes (avoid duplicates)
|
||||
direct_connected_nodes -= start_nodes
|
||||
|
||||
def priority_key(node_item):
|
||||
node, degree = node_item
|
||||
# Priority order: start(2) > directly connected(1) > other nodes(0)
|
||||
@@ -356,7 +383,7 @@ class NetworkXStorage(BaseGraphStorage):
|
||||
result.edges.append(
|
||||
KnowledgeGraphEdge(
|
||||
id=edge_id,
|
||||
type="RELATED",
|
||||
type="DIRECTED",
|
||||
source=str(source),
|
||||
target=str(target),
|
||||
properties=edge_data,
|
||||
|
@@ -504,11 +504,39 @@ class LightRAG:
|
||||
return text
|
||||
|
||||
async def get_knowledge_graph(
|
||||
self, node_label: str, max_depth: int
|
||||
self,
|
||||
node_label: str,
|
||||
max_depth: int = 3,
|
||||
min_degree: int = 0,
|
||||
inclusive: bool = False,
|
||||
) -> KnowledgeGraph:
|
||||
return await self.chunk_entity_relation_graph.get_knowledge_graph(
|
||||
node_label=node_label, max_depth=max_depth
|
||||
)
|
||||
"""Get knowledge graph for a given label
|
||||
|
||||
Args:
|
||||
node_label (str): Label to get knowledge graph for
|
||||
max_depth (int): Maximum depth of graph
|
||||
min_degree (int, optional): Minimum degree of nodes to include. Defaults to 0.
|
||||
inclusive (bool, optional): Whether to use inclusive search mode. Defaults to False.
|
||||
|
||||
Returns:
|
||||
KnowledgeGraph: Knowledge graph containing nodes and edges
|
||||
"""
|
||||
# get params supported by get_knowledge_graph of specified storage
|
||||
import inspect
|
||||
|
||||
storage_params = inspect.signature(
|
||||
self.chunk_entity_relation_graph.get_knowledge_graph
|
||||
).parameters
|
||||
|
||||
kwargs = {"node_label": node_label, "max_depth": max_depth}
|
||||
|
||||
if "min_degree" in storage_params and min_degree > 0:
|
||||
kwargs["min_degree"] = min_degree
|
||||
|
||||
if "inclusive" in storage_params:
|
||||
kwargs["inclusive"] = inclusive
|
||||
|
||||
return await self.chunk_entity_relation_graph.get_knowledge_graph(**kwargs)
|
||||
|
||||
def _get_storage_class(self, storage_name: str) -> Callable[..., Any]:
|
||||
import_path = STORAGES[storage_name]
|
||||
|
Reference in New Issue
Block a user