From 36b606d0db554cd1b46495534baa6c885afccbbd Mon Sep 17 00:00:00 2001 From: sa9arr Date: Sat, 17 May 2025 19:32:25 +0545 Subject: [PATCH 1/3] Fix: Correct GraphML to JSON mapping in xml_to_json function --- examples/graph_visual_with_neo4j.py | 62 ++++++++++++++++++++++++++++- lightrag/utils.py | 1 - 2 files changed, 61 insertions(+), 2 deletions(-) diff --git a/examples/graph_visual_with_neo4j.py b/examples/graph_visual_with_neo4j.py index 35ef85a8..637cb36d 100644 --- a/examples/graph_visual_with_neo4j.py +++ b/examples/graph_visual_with_neo4j.py @@ -1,6 +1,6 @@ import os import json -from lightrag.utils import xml_to_json +import xml.etree.ElementTree as ET from neo4j import GraphDatabase # Constants @@ -13,6 +13,66 @@ NEO4J_URI = "bolt://localhost:7687" NEO4J_USERNAME = "neo4j" NEO4J_PASSWORD = "your_password" +def xml_to_json(xml_file): + try: + tree = ET.parse(xml_file) + root = tree.getroot() + + # Print the root element's tag and attributes to confirm the file has been correctly loaded + print(f"Root element: {root.tag}") + print(f"Root attributes: {root.attrib}") + + data = {"nodes": [], "edges": []} + + # Use namespace + namespace = {"": "http://graphml.graphdrawing.org/xmlns"} + + for node in root.findall(".//node", namespace): + node_data = { + "id": node.get("id").strip('"'), + "entity_type": node.find("./data[@key='d1']", namespace).text.strip('"') + if node.find("./data[@key='d1']", namespace) is not None + else "", + "description": node.find("./data[@key='d2']", namespace).text + if node.find("./data[@key='d2']", namespace) is not None + else "", + "source_id": node.find("./data[@key='d3']", namespace).text + if node.find("./data[@key='d3']", namespace) is not None + else "", + } + data["nodes"].append(node_data) + + for edge in root.findall(".//edge", namespace): + edge_data = { + "source": edge.get("source").strip('"'), + "target": edge.get("target").strip('"'), + "weight": float(edge.find("./data[@key='d5']", namespace).text) + if edge.find("./data[@key='d5']", namespace) is not None + else 0.0, + "description": edge.find("./data[@key='d6']", namespace).text + if edge.find("./data[@key='d6']", namespace) is not None + else "", + "keywords": edge.find("./data[@key='d7']", namespace).text + if edge.find("./data[@key='d7']", namespace) is not None + else "", + "source_id": edge.find("./data[@key='d8']", namespace).text + if edge.find("./data[@key='d8']", namespace) is not None + else "", + } + data["edges"].append(edge_data) + + + # Print the number of nodes and edges found + print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges") + + return data + except ET.ParseError as e: + print(f"Error parsing XML file: {e}") + return None + except Exception as e: + print(f"An error occurred: {e}") + return None + def convert_xml_to_json(xml_path, output_path): """Converts XML file to JSON and saves the output.""" diff --git a/lightrag/utils.py b/lightrag/utils.py index 7ecb11e3..4a3378db 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -13,7 +13,6 @@ from dataclasses import dataclass from functools import wraps from hashlib import md5 from typing import Any, Protocol, Callable, TYPE_CHECKING, List -import xml.etree.ElementTree as ET import numpy as np from lightrag.prompt import PROMPTS from dotenv import load_dotenv From 38b862e9937adffceeca65cd6c04980295cae71a Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 18 May 2025 07:16:52 +0800 Subject: [PATCH 2/3] Remove unsed functions --- lightrag/utils.py | 65 ----------------------------------------------- 1 file changed, 65 deletions(-) diff --git a/lightrag/utils.py b/lightrag/utils.py index 4a3378db..2e75b9b9 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -752,71 +752,6 @@ def truncate_list_by_token_size( return list_data -def save_data_to_file(data, file_name): - with open(file_name, "w", encoding="utf-8") as f: - json.dump(data, f, ensure_ascii=False, indent=4) - - -def xml_to_json(xml_file): - try: - tree = ET.parse(xml_file) - root = tree.getroot() - - # Print the root element's tag and attributes to confirm the file has been correctly loaded - print(f"Root element: {root.tag}") - print(f"Root attributes: {root.attrib}") - - data = {"nodes": [], "edges": []} - - # Use namespace - namespace = {"": "http://graphml.graphdrawing.org/xmlns"} - - for node in root.findall(".//node", namespace): - node_data = { - "id": node.get("id").strip('"'), - "entity_type": node.find("./data[@key='d0']", namespace).text.strip('"') - if node.find("./data[@key='d0']", namespace) is not None - else "", - "description": node.find("./data[@key='d1']", namespace).text - if node.find("./data[@key='d1']", namespace) is not None - else "", - "source_id": node.find("./data[@key='d2']", namespace).text - if node.find("./data[@key='d2']", namespace) is not None - else "", - } - data["nodes"].append(node_data) - - for edge in root.findall(".//edge", namespace): - edge_data = { - "source": edge.get("source").strip('"'), - "target": edge.get("target").strip('"'), - "weight": float(edge.find("./data[@key='d3']", namespace).text) - if edge.find("./data[@key='d3']", namespace) is not None - else 0.0, - "description": edge.find("./data[@key='d4']", namespace).text - if edge.find("./data[@key='d4']", namespace) is not None - else "", - "keywords": edge.find("./data[@key='d5']", namespace).text - if edge.find("./data[@key='d5']", namespace) is not None - else "", - "source_id": edge.find("./data[@key='d6']", namespace).text - if edge.find("./data[@key='d6']", namespace) is not None - else "", - } - data["edges"].append(edge_data) - - # Print the number of nodes and edges found - print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges") - - return data - except ET.ParseError as e: - print(f"Error parsing XML file: {e}") - return None - except Exception as e: - print(f"An error occurred: {e}") - return None - - def process_combine_contexts(*context_lists): """ Combine multiple context lists and remove duplicate content From 7263a1ccf9ba1e24aaf046e752da55f97a803a4b Mon Sep 17 00:00:00 2001 From: yangdx Date: Sun, 18 May 2025 07:17:21 +0800 Subject: [PATCH 3/3] Fix linting --- examples/graph_visual_with_neo4j.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/graph_visual_with_neo4j.py b/examples/graph_visual_with_neo4j.py index 637cb36d..1cd2e7a3 100644 --- a/examples/graph_visual_with_neo4j.py +++ b/examples/graph_visual_with_neo4j.py @@ -13,6 +13,7 @@ NEO4J_URI = "bolt://localhost:7687" NEO4J_USERNAME = "neo4j" NEO4J_PASSWORD = "your_password" + def xml_to_json(xml_file): try: tree = ET.parse(xml_file) @@ -61,7 +62,6 @@ def xml_to_json(xml_file): } data["edges"].append(edge_data) - # Print the number of nodes and edges found print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges")