From f29628125b348c355bc61f05361c94d624bb37e9 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 24 Feb 2025 02:36:36 +0800 Subject: [PATCH 1/9] Fix typo in parameter name from 'nodel_label' to 'node_label' --- lightrag/api/routers/graph_routes.py | 2 +- lightrag/lightrag.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lightrag/api/routers/graph_routes.py b/lightrag/api/routers/graph_routes.py index bfdb838c..38d9dbb7 100644 --- a/lightrag/api/routers/graph_routes.py +++ b/lightrag/api/routers/graph_routes.py @@ -22,6 +22,6 @@ def create_graph_routes(rag, api_key: Optional[str] = None): @router.get("/graphs", dependencies=[Depends(optional_api_key)]) async def get_knowledge_graph(label: str): """Get knowledge graph for a specific label""" - return await rag.get_knowledge_graph(nodel_label=label, max_depth=100) + return await rag.get_knowledge_graph(node_label=label, max_depth=100) return router diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index fa39db59..46638243 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -466,10 +466,10 @@ class LightRAG: return text async def get_knowledge_graph( - self, nodel_label: str, max_depth: int + self, node_label: str, max_depth: int ) -> KnowledgeGraph: return await self.chunk_entity_relation_graph.get_knowledge_graph( - node_label=nodel_label, max_depth=max_depth + node_label=node_label, max_depth=max_depth ) def _get_storage_class(self, storage_name: str) -> Callable[..., Any]: From b13c947360091b7110497033b19c5c17b1ffd508 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 24 Feb 2025 02:37:42 +0800 Subject: [PATCH 2/9] Implement get_all_labels and get_knowledge_graph methods in NetworkXStorage --- lightrag/kg/networkx_impl.py | 107 ++++++++++++++++++++++++++++++++++- 1 file changed, 104 insertions(+), 3 deletions(-) diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index 9850b8c4..a6958704 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -5,7 +5,7 @@ from typing import Any, final import numpy as np -from lightrag.types import KnowledgeGraph +from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge from lightrag.utils import ( logger, ) @@ -169,9 +169,110 @@ class NetworkXStorage(BaseGraphStorage): self._graph.remove_edge(source, target) async def get_all_labels(self) -> list[str]: - raise NotImplementedError + """ + Get all node labels in the graph + Returns: + [label1, label2, ...] # Alphabetically sorted label list + """ + # Get all labels from nodes + labels = set() + for node in self._graph.nodes(): + # node_data = dict(self._graph.nodes[node]) + # if "entity_type" in node_data: + # if isinstance(node_data["entity_type"], list): + # labels.update(node_data["entity_type"]) + # else: + # labels.add(node_data["entity_type"]) + labels.add(str(node)) # Add node id as a label + + # Return sorted list + return sorted(list(labels)) async def get_knowledge_graph( self, node_label: str, max_depth: int = 5 ) -> KnowledgeGraph: - raise NotImplementedError + """ + Get complete connected subgraph for specified node (including the starting node itself) + + Args: + node_label: Label of the starting node + max_depth: Maximum depth of the subgraph + + Returns: + KnowledgeGraph object containing nodes and edges + """ + result = KnowledgeGraph() + seen_nodes = set() + seen_edges = set() + + # Handle special case for "*" label + if node_label == "*": + # For "*", return the entire graph including all nodes and edges + subgraph = self._graph.copy() # Create a copy to avoid modifying the original graph + else: + # Find nodes with matching node id (partial match) + nodes_to_explore = [] + for n, attr in self._graph.nodes(data=True): + if node_label in str(n): # Use partial matching + nodes_to_explore.append(n) + + if not nodes_to_explore: + logger.warning(f"No nodes found with label {node_label}") + return result + + # Get subgraph using ego_graph + subgraph = nx.ego_graph(self._graph, nodes_to_explore[0], radius=max_depth) + + # Add nodes to result + for node in subgraph.nodes(): + if str(node) in seen_nodes: + continue + + node_data = dict(subgraph.nodes[node]) + # Get entity_type as labels + labels = [] + if "entity_type" in node_data: + if isinstance(node_data["entity_type"], list): + labels.extend(node_data["entity_type"]) + else: + labels.append(node_data["entity_type"]) + + # Create node with properties + node_properties = {k: v for k, v in node_data.items()} + + result.nodes.append( + KnowledgeGraphNode( + id=str(node), + labels=[str(node)], + properties=node_properties + ) + ) + seen_nodes.add(str(node)) + + # Add edges to result + for edge in subgraph.edges(): + source, target = edge + edge_id = f"{source}-{target}" + if edge_id in seen_edges: + continue + + edge_data = dict(subgraph.edges[edge]) + + # Create edge with complete information + result.edges.append( + KnowledgeGraphEdge( + id=edge_id, + type="DIRECTED", + source=str(source), + target=str(target), + properties=edge_data, + ) + ) + seen_edges.add(edge_id) + + # logger.info(result.edges) + + logger.info( + f"Subgraph query successful | Node count: {len(result.nodes)} | Edge count: {len(result.edges)}" + ) + return result From b4543561f6f0fe9db025ef88798ba297774bbc9c Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 24 Feb 2025 03:32:33 +0800 Subject: [PATCH 3/9] Limit subgraph size to 500 nodes in NetworkXStorage - Add max_graph_nodes check - Reduce subgraph by degree - Log graph size reduction --- lightrag/kg/networkx_impl.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index a6958704..e68d2887 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -219,10 +219,21 @@ class NetworkXStorage(BaseGraphStorage): if not nodes_to_explore: logger.warning(f"No nodes found with label {node_label}") return result - + # Get subgraph using ego_graph subgraph = nx.ego_graph(self._graph, nodes_to_explore[0], radius=max_depth) + # Check if number of nodes exceeds max_graph_nodes + max_graph_nodes=500 + if len(subgraph.nodes()) > max_graph_nodes: + origin_nodes=len(subgraph.nodes()) + node_degrees = dict(subgraph.degree()) + top_nodes = sorted(node_degrees.items(), key=lambda x: x[1], reverse=True)[:max_graph_nodes] + top_node_ids = [node[0] for node in top_nodes] + # Create new subgraph with only top nodes + subgraph = subgraph.subgraph(top_node_ids) + logger.info(f"Reduced graph from {origin_nodes} nodes to {max_graph_nodes} nodes by degree (depth={max_depth})") + # Add nodes to result for node in subgraph.nodes(): if str(node) in seen_nodes: From 6cf555ebd58a69ecbde528f79bc90feeeb754604 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 24 Feb 2025 03:33:22 +0800 Subject: [PATCH 4/9] Fix linting --- lightrag/kg/networkx_impl.py | 44 ++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index e68d2887..e8180dda 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -184,7 +184,7 @@ class NetworkXStorage(BaseGraphStorage): # else: # labels.add(node_data["entity_type"]) labels.add(str(node)) # Add node id as a label - + # Return sorted list return sorted(list(labels)) @@ -193,52 +193,58 @@ class NetworkXStorage(BaseGraphStorage): ) -> KnowledgeGraph: """ Get complete connected subgraph for specified node (including the starting node itself) - + Args: node_label: Label of the starting node max_depth: Maximum depth of the subgraph - + Returns: KnowledgeGraph object containing nodes and edges """ result = KnowledgeGraph() seen_nodes = set() seen_edges = set() - + # Handle special case for "*" label if node_label == "*": # For "*", return the entire graph including all nodes and edges - subgraph = self._graph.copy() # Create a copy to avoid modifying the original graph + subgraph = ( + self._graph.copy() + ) # Create a copy to avoid modifying the original graph else: # Find nodes with matching node id (partial match) nodes_to_explore = [] for n, attr in self._graph.nodes(data=True): if node_label in str(n): # Use partial matching nodes_to_explore.append(n) - + if not nodes_to_explore: logger.warning(f"No nodes found with label {node_label}") return result # Get subgraph using ego_graph subgraph = nx.ego_graph(self._graph, nodes_to_explore[0], radius=max_depth) - + # Check if number of nodes exceeds max_graph_nodes - max_graph_nodes=500 + max_graph_nodes = 500 if len(subgraph.nodes()) > max_graph_nodes: - origin_nodes=len(subgraph.nodes()) + origin_nodes = len(subgraph.nodes()) node_degrees = dict(subgraph.degree()) - top_nodes = sorted(node_degrees.items(), key=lambda x: x[1], reverse=True)[:max_graph_nodes] + top_nodes = sorted(node_degrees.items(), key=lambda x: x[1], reverse=True)[ + :max_graph_nodes + ] top_node_ids = [node[0] for node in top_nodes] # Create new subgraph with only top nodes subgraph = subgraph.subgraph(top_node_ids) - logger.info(f"Reduced graph from {origin_nodes} nodes to {max_graph_nodes} nodes by degree (depth={max_depth})") + logger.info( + f"Reduced graph from {origin_nodes} nodes to {max_graph_nodes} nodes by degree (depth={max_depth})" + ) # Add nodes to result for node in subgraph.nodes(): if str(node) in seen_nodes: continue - + node_data = dict(subgraph.nodes[node]) # Get entity_type as labels labels = [] @@ -247,28 +253,26 @@ class NetworkXStorage(BaseGraphStorage): labels.extend(node_data["entity_type"]) else: labels.append(node_data["entity_type"]) - + # Create node with properties node_properties = {k: v for k, v in node_data.items()} result.nodes.append( KnowledgeGraphNode( - id=str(node), - labels=[str(node)], - properties=node_properties + id=str(node), labels=[str(node)], properties=node_properties ) ) seen_nodes.add(str(node)) - + # Add edges to result for edge in subgraph.edges(): source, target = edge edge_id = f"{source}-{target}" if edge_id in seen_edges: continue - + edge_data = dict(subgraph.edges[edge]) - + # Create edge with complete information result.edges.append( KnowledgeGraphEdge( @@ -280,7 +284,7 @@ class NetworkXStorage(BaseGraphStorage): ) ) seen_edges.add(edge_id) - + # logger.info(result.edges) logger.info( From facf7c11fe108819e3007576f4b7cd6c8f4c44ee Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 24 Feb 2025 03:34:44 +0800 Subject: [PATCH 5/9] Reduce knowledge graph max_depth from 100 to 3 --- lightrag/api/routers/graph_routes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/api/routers/graph_routes.py b/lightrag/api/routers/graph_routes.py index 38d9dbb7..28a5561a 100644 --- a/lightrag/api/routers/graph_routes.py +++ b/lightrag/api/routers/graph_routes.py @@ -22,6 +22,6 @@ def create_graph_routes(rag, api_key: Optional[str] = None): @router.get("/graphs", dependencies=[Depends(optional_api_key)]) async def get_knowledge_graph(label: str): """Get knowledge graph for a specific label""" - return await rag.get_knowledge_graph(node_label=label, max_depth=100) + return await rag.get_knowledge_graph(node_label=label, max_depth=3) return router From 9dd18ccd895d697d1797fd243382e0e90b66c702 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 24 Feb 2025 03:52:43 +0800 Subject: [PATCH 6/9] Simplify log message --- lightrag/kg/networkx_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index e8180dda..2f238350 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -237,7 +237,7 @@ class NetworkXStorage(BaseGraphStorage): # Create new subgraph with only top nodes subgraph = subgraph.subgraph(top_node_ids) logger.info( - f"Reduced graph from {origin_nodes} nodes to {max_graph_nodes} nodes by degree (depth={max_depth})" + f"Reduced graph from {origin_nodes} nodes to {max_graph_nodes} nodes (depth={max_depth})" ) # Add nodes to result From b8543b8701f70b20c772783b03c6f0e0f872d140 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 24 Feb 2025 09:55:03 +0800 Subject: [PATCH 7/9] Remove unnecessary comment in networkx_impl.py --- lightrag/kg/networkx_impl.py | 1 - 1 file changed, 1 deletion(-) diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index 2f238350..b4321458 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -174,7 +174,6 @@ class NetworkXStorage(BaseGraphStorage): Returns: [label1, label2, ...] # Alphabetically sorted label list """ - # Get all labels from nodes labels = set() for node in self._graph.nodes(): # node_data = dict(self._graph.nodes[node]) From 9fd0ab185f741b58bd795807da5e22ecf82c6235 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 24 Feb 2025 16:28:18 +0800 Subject: [PATCH 8/9] Removed unnecessary comment --- lightrag/kg/networkx_impl.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/lightrag/kg/networkx_impl.py b/lightrag/kg/networkx_impl.py index b4321458..1f5d34d0 100644 --- a/lightrag/kg/networkx_impl.py +++ b/lightrag/kg/networkx_impl.py @@ -176,12 +176,6 @@ class NetworkXStorage(BaseGraphStorage): """ labels = set() for node in self._graph.nodes(): - # node_data = dict(self._graph.nodes[node]) - # if "entity_type" in node_data: - # if isinstance(node_data["entity_type"], list): - # labels.update(node_data["entity_type"]) - # else: - # labels.add(node_data["entity_type"]) labels.add(str(node)) # Add node id as a label # Return sorted list From 4f76b1c23e3c7544482f35f21b42a9fb66b8cdc7 Mon Sep 17 00:00:00 2001 From: Konrad Wojciechowski Date: Mon, 24 Feb 2025 03:29:39 +0100 Subject: [PATCH 9/9] fix AttributeError: 'NoneType' object has no attribute 'dim' --- lightrag/llm/hf.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lightrag/llm/hf.py b/lightrag/llm/hf.py index d678c611..fb5208b0 100644 --- a/lightrag/llm/hf.py +++ b/lightrag/llm/hf.py @@ -139,11 +139,14 @@ async def hf_model_complete( async def hf_embed(texts: list[str], tokenizer, embed_model) -> np.ndarray: device = next(embed_model.parameters()).device - input_ids = tokenizer( + encoded_texts = tokenizer( texts, return_tensors="pt", padding=True, truncation=True - ).input_ids.to(device) + ).to(device) with torch.no_grad(): - outputs = embed_model(input_ids) + outputs = embed_model( + input_ids=encoded_texts["input_ids"], + attention_mask=encoded_texts["attention_mask"], + ) embeddings = outputs.last_hidden_state.mean(dim=1) if embeddings.dtype == torch.bfloat16: return embeddings.detach().to(torch.float32).cpu().numpy()