diff --git a/lightrag/utils.py b/lightrag/utils.py index 4a3378db..2e75b9b9 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -752,71 +752,6 @@ def truncate_list_by_token_size( return list_data -def save_data_to_file(data, file_name): - with open(file_name, "w", encoding="utf-8") as f: - json.dump(data, f, ensure_ascii=False, indent=4) - - -def xml_to_json(xml_file): - try: - tree = ET.parse(xml_file) - root = tree.getroot() - - # Print the root element's tag and attributes to confirm the file has been correctly loaded - print(f"Root element: {root.tag}") - print(f"Root attributes: {root.attrib}") - - data = {"nodes": [], "edges": []} - - # Use namespace - namespace = {"": "http://graphml.graphdrawing.org/xmlns"} - - for node in root.findall(".//node", namespace): - node_data = { - "id": node.get("id").strip('"'), - "entity_type": node.find("./data[@key='d0']", namespace).text.strip('"') - if node.find("./data[@key='d0']", namespace) is not None - else "", - "description": node.find("./data[@key='d1']", namespace).text - if node.find("./data[@key='d1']", namespace) is not None - else "", - "source_id": node.find("./data[@key='d2']", namespace).text - if node.find("./data[@key='d2']", namespace) is not None - else "", - } - data["nodes"].append(node_data) - - for edge in root.findall(".//edge", namespace): - edge_data = { - "source": edge.get("source").strip('"'), - "target": edge.get("target").strip('"'), - "weight": float(edge.find("./data[@key='d3']", namespace).text) - if edge.find("./data[@key='d3']", namespace) is not None - else 0.0, - "description": edge.find("./data[@key='d4']", namespace).text - if edge.find("./data[@key='d4']", namespace) is not None - else "", - "keywords": edge.find("./data[@key='d5']", namespace).text - if edge.find("./data[@key='d5']", namespace) is not None - else "", - "source_id": edge.find("./data[@key='d6']", namespace).text - if edge.find("./data[@key='d6']", namespace) is not None - else "", - } - data["edges"].append(edge_data) - - # Print the number of nodes and edges found - print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges") - - return data - except ET.ParseError as e: - print(f"Error parsing XML file: {e}") - return None - except Exception as e: - print(f"An error occurred: {e}") - return None - - def process_combine_contexts(*context_lists): """ Combine multiple context lists and remove duplicate content