From c620f9c4f2b1662f5c1d9d526c0d634e97b6750b Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 25 Apr 2025 09:22:53 +0800 Subject: [PATCH 1/3] Prioritize high-degree neighbors in BFS traversal for NetoworkX storage --- lightrag/kg/networkx_impl.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index 70a055b0..666d1efc 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -265,9 +265,14 @@ class NetworkXStorage(BaseGraphStorage): if depth < max_depth: # Add neighbor nodes to queue with incremented depth neighbors = list(graph.neighbors(current)) - queue.extend( - [(n, depth + 1) for n in neighbors if n not in visited] - ) + # Filter out already visited neighbors + unvisited_neighbors = [n for n in neighbors if n not in visited] + # Get the degree of each neighbor node + neighbor_degrees = [(n, graph.degree(n)) for n in unvisited_neighbors] + # Sort neighbors by degree in descending order + sorted_neighbors = sorted(neighbor_degrees, key=lambda x: x[1], reverse=True) + # Add sorted neighbors to the queue + queue.extend([(n, depth + 1) for n, _ in sorted_neighbors]) # Check if graph is truncated - if we still have nodes in the queue # and we've reached max_nodes, then the graph is truncated From bd11bcae328fd7f0ee96b61603bd77e576611260 Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 25 Apr 2025 09:43:18 +0800 Subject: [PATCH 2/3] Fix linting --- lightrag/kg/networkx_impl.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index 666d1efc..7fa96c9d 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -268,9 +268,13 @@ class NetworkXStorage(BaseGraphStorage): # Filter out already visited neighbors unvisited_neighbors = [n for n in neighbors if n not in visited] # Get the degree of each neighbor node - neighbor_degrees = [(n, graph.degree(n)) for n in unvisited_neighbors] + neighbor_degrees = [ + (n, graph.degree(n)) for n in unvisited_neighbors + ] # Sort neighbors by degree in descending order - sorted_neighbors = sorted(neighbor_degrees, key=lambda x: x[1], reverse=True) + sorted_neighbors = sorted( + neighbor_degrees, key=lambda x: x[1], reverse=True + ) # Add sorted neighbors to the queue queue.extend([(n, depth + 1) for n, _ in sorted_neighbors]) From 9ec7f5c8b321b0eb0b152e6c7765c70f57f45dc4 Mon Sep 17 00:00:00 2001 From: yangdx Date: Fri, 25 Apr 2025 11:25:29 +0800 Subject: [PATCH 3/3] Fix degree sorting problem in BFS --- lightrag/kg/networkx_impl.py | 59 ++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 23 deletions(-) diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index 7fa96c9d..c92bbd30 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -249,34 +249,47 @@ class NetworkXStorage(BaseGraphStorage): logger.warning(f"Node {node_label} not found in the graph") return KnowledgeGraph() # Return empty graph - # Use BFS to get nodes + # Use modified BFS to get nodes, prioritizing high-degree nodes at the same depth bfs_nodes = [] visited = set() - queue = [(node_label, 0)] # (node, depth) tuple + # Store (node, depth, degree) in the queue + queue = [(node_label, 0, graph.degree(node_label))] - # Breadth-first search + # Modified breadth-first search with degree-based prioritization while queue and len(bfs_nodes) < max_nodes: - current, depth = queue.pop(0) - if current not in visited: - visited.add(current) - bfs_nodes.append(current) + # Get the current depth from the first node in queue + current_depth = queue[0][1] - # Only explore neighbors if we haven't reached max_depth - if depth < max_depth: - # Add neighbor nodes to queue with incremented depth - neighbors = list(graph.neighbors(current)) - # Filter out already visited neighbors - unvisited_neighbors = [n for n in neighbors if n not in visited] - # Get the degree of each neighbor node - neighbor_degrees = [ - (n, graph.degree(n)) for n in unvisited_neighbors - ] - # Sort neighbors by degree in descending order - sorted_neighbors = sorted( - neighbor_degrees, key=lambda x: x[1], reverse=True - ) - # Add sorted neighbors to the queue - queue.extend([(n, depth + 1) for n, _ in sorted_neighbors]) + # Collect all nodes at the current depth + current_level_nodes = [] + while queue and queue[0][1] == current_depth: + current_level_nodes.append(queue.pop(0)) + + # Sort nodes at current depth by degree (highest first) + current_level_nodes.sort(key=lambda x: x[2], reverse=True) + + # Process all nodes at current depth in order of degree + for current_node, depth, degree in current_level_nodes: + if current_node not in visited: + visited.add(current_node) + bfs_nodes.append(current_node) + + # Only explore neighbors if we haven't reached max_depth + if depth < max_depth: + # Add neighbor nodes to queue with incremented depth + neighbors = list(graph.neighbors(current_node)) + # Filter out already visited neighbors + unvisited_neighbors = [ + n for n in neighbors if n not in visited + ] + # Add neighbors to the queue with their degrees + for neighbor in unvisited_neighbors: + neighbor_degree = graph.degree(neighbor) + queue.append((neighbor, depth + 1, neighbor_degree)) + + # Check if we've reached max_nodes + if len(bfs_nodes) >= max_nodes: + break # Check if graph is truncated - if we still have nodes in the queue # and we've reached max_nodes, then the graph is truncated