Compare commits
13 Commits
d66bb5eefe
...
8916f8a912
Author | SHA1 | Date | |
---|---|---|---|
8916f8a912
|
|||
|
702e87492c | ||
|
dde340f449 | ||
|
85bed30764 | ||
|
45cebc71c5 | ||
|
0961a21722 | ||
|
d97da6068a | ||
|
e492394fb6 | ||
|
b4615247c9 | ||
|
3ffa1009fe | ||
|
7263a1ccf9 | ||
|
38b862e993 | ||
|
36b606d0db |
@@ -1,6 +1,6 @@
|
||||
import os
|
||||
import json
|
||||
from lightrag.utils import xml_to_json
|
||||
import xml.etree.ElementTree as ET
|
||||
from neo4j import GraphDatabase
|
||||
|
||||
# Constants
|
||||
@@ -14,6 +14,66 @@ NEO4J_USERNAME = "neo4j"
|
||||
NEO4J_PASSWORD = "your_password"
|
||||
|
||||
|
||||
def xml_to_json(xml_file):
|
||||
try:
|
||||
tree = ET.parse(xml_file)
|
||||
root = tree.getroot()
|
||||
|
||||
# Print the root element's tag and attributes to confirm the file has been correctly loaded
|
||||
print(f"Root element: {root.tag}")
|
||||
print(f"Root attributes: {root.attrib}")
|
||||
|
||||
data = {"nodes": [], "edges": []}
|
||||
|
||||
# Use namespace
|
||||
namespace = {"": "http://graphml.graphdrawing.org/xmlns"}
|
||||
|
||||
for node in root.findall(".//node", namespace):
|
||||
node_data = {
|
||||
"id": node.get("id").strip('"'),
|
||||
"entity_type": node.find("./data[@key='d1']", namespace).text.strip('"')
|
||||
if node.find("./data[@key='d1']", namespace) is not None
|
||||
else "",
|
||||
"description": node.find("./data[@key='d2']", namespace).text
|
||||
if node.find("./data[@key='d2']", namespace) is not None
|
||||
else "",
|
||||
"source_id": node.find("./data[@key='d3']", namespace).text
|
||||
if node.find("./data[@key='d3']", namespace) is not None
|
||||
else "",
|
||||
}
|
||||
data["nodes"].append(node_data)
|
||||
|
||||
for edge in root.findall(".//edge", namespace):
|
||||
edge_data = {
|
||||
"source": edge.get("source").strip('"'),
|
||||
"target": edge.get("target").strip('"'),
|
||||
"weight": float(edge.find("./data[@key='d5']", namespace).text)
|
||||
if edge.find("./data[@key='d5']", namespace) is not None
|
||||
else 0.0,
|
||||
"description": edge.find("./data[@key='d6']", namespace).text
|
||||
if edge.find("./data[@key='d6']", namespace) is not None
|
||||
else "",
|
||||
"keywords": edge.find("./data[@key='d7']", namespace).text
|
||||
if edge.find("./data[@key='d7']", namespace) is not None
|
||||
else "",
|
||||
"source_id": edge.find("./data[@key='d8']", namespace).text
|
||||
if edge.find("./data[@key='d8']", namespace) is not None
|
||||
else "",
|
||||
}
|
||||
data["edges"].append(edge_data)
|
||||
|
||||
# Print the number of nodes and edges found
|
||||
print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges")
|
||||
|
||||
return data
|
||||
except ET.ParseError as e:
|
||||
print(f"Error parsing XML file: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def convert_xml_to_json(xml_path, output_path):
|
||||
"""Converts XML file to JSON and saves the output."""
|
||||
if not os.path.exists(xml_path):
|
||||
|
@@ -18,12 +18,12 @@ os.environ["REDIS_URI"] = "redis://localhost:6379"
|
||||
# neo4j
|
||||
BATCH_SIZE_NODES = 500
|
||||
BATCH_SIZE_EDGES = 100
|
||||
os.environ["NEO4J_URI"] = "bolt://117.50.173.35:7687"
|
||||
os.environ["NEO4J_URI"] = "neo4j://localhost:7687"
|
||||
os.environ["NEO4J_USERNAME"] = "neo4j"
|
||||
os.environ["NEO4J_PASSWORD"] = "12345678"
|
||||
|
||||
# milvus
|
||||
os.environ["MILVUS_URI"] = "http://117.50.173.35:19530"
|
||||
os.environ["MILVUS_URI"] = "http://localhost:19530"
|
||||
os.environ["MILVUS_USER"] = "root"
|
||||
os.environ["MILVUS_PASSWORD"] = "Milvus"
|
||||
os.environ["MILVUS_DB_NAME"] = "lightrag"
|
||||
|
@@ -1,5 +1,5 @@
|
||||
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
|
||||
|
||||
__version__ = "1.3.7"
|
||||
__version__ = "1.3.8"
|
||||
__author__ = "Zirui Guo"
|
||||
__url__ = "https://github.com/HKUDS/LightRAG"
|
||||
|
@@ -1 +1 @@
|
||||
__api_version__ = "0170"
|
||||
__api_version__ = "0171"
|
||||
|
@@ -478,16 +478,31 @@ def create_app(args):
|
||||
logger.error(f"Error getting health status: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
# Custom StaticFiles class to prevent caching of HTML files
|
||||
class NoCacheStaticFiles(StaticFiles):
|
||||
# Custom StaticFiles class for smart caching
|
||||
class SmartStaticFiles(StaticFiles): # Renamed from NoCacheStaticFiles
|
||||
async def get_response(self, path: str, scope):
|
||||
response = await super().get_response(path, scope)
|
||||
|
||||
if path.endswith(".html"):
|
||||
response.headers["Cache-Control"] = (
|
||||
"no-cache, no-store, must-revalidate"
|
||||
)
|
||||
response.headers["Pragma"] = "no-cache"
|
||||
response.headers["Expires"] = "0"
|
||||
elif (
|
||||
"/assets/" in path
|
||||
): # Assets (JS, CSS, images, fonts) generated by Vite with hash in filename
|
||||
response.headers["Cache-Control"] = (
|
||||
"public, max-age=31536000, immutable"
|
||||
)
|
||||
# Add other rules here if needed for non-HTML, non-asset files
|
||||
|
||||
# Ensure correct Content-Type
|
||||
if path.endswith(".js"):
|
||||
response.headers["Content-Type"] = "application/javascript"
|
||||
elif path.endswith(".css"):
|
||||
response.headers["Content-Type"] = "text/css"
|
||||
|
||||
return response
|
||||
|
||||
# Webui mount webui/index.html
|
||||
@@ -495,7 +510,9 @@ def create_app(args):
|
||||
static_dir.mkdir(exist_ok=True)
|
||||
app.mount(
|
||||
"/webui",
|
||||
NoCacheStaticFiles(directory=static_dir, html=True, check_dir=True),
|
||||
SmartStaticFiles(
|
||||
directory=static_dir, html=True, check_dir=True
|
||||
), # Use SmartStaticFiles
|
||||
name="webui",
|
||||
)
|
||||
|
||||
|
@@ -311,6 +311,17 @@ class MongoDocStatusStorage(DocStatusStorage):
|
||||
logger.error(f"Error dropping doc status {self._collection_name}: {e}")
|
||||
return {"status": "error", "message": str(e)}
|
||||
|
||||
async def delete(self, ids: list[str]) -> None:
|
||||
try:
|
||||
result = await self._data.delete_many({"_id": {"$in": ids}})
|
||||
deleted_count = result.deleted_count
|
||||
|
||||
logger.info(
|
||||
f"Dropped {deleted_count} documents from doc status {self._collection_name}"
|
||||
)
|
||||
except PyMongoError as e:
|
||||
logger.error(f"Error deleting doc status {self._collection_name}: {e}")
|
||||
|
||||
|
||||
@final
|
||||
@dataclass
|
||||
|
@@ -13,7 +13,6 @@ from dataclasses import dataclass
|
||||
from functools import wraps
|
||||
from hashlib import md5
|
||||
from typing import Any, Protocol, Callable, TYPE_CHECKING, List
|
||||
import xml.etree.ElementTree as ET
|
||||
import numpy as np
|
||||
from lightrag.prompt import PROMPTS
|
||||
from dotenv import load_dotenv
|
||||
@@ -753,71 +752,6 @@ def truncate_list_by_token_size(
|
||||
return list_data
|
||||
|
||||
|
||||
def save_data_to_file(data, file_name):
|
||||
with open(file_name, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=4)
|
||||
|
||||
|
||||
def xml_to_json(xml_file):
|
||||
try:
|
||||
tree = ET.parse(xml_file)
|
||||
root = tree.getroot()
|
||||
|
||||
# Print the root element's tag and attributes to confirm the file has been correctly loaded
|
||||
print(f"Root element: {root.tag}")
|
||||
print(f"Root attributes: {root.attrib}")
|
||||
|
||||
data = {"nodes": [], "edges": []}
|
||||
|
||||
# Use namespace
|
||||
namespace = {"": "http://graphml.graphdrawing.org/xmlns"}
|
||||
|
||||
for node in root.findall(".//node", namespace):
|
||||
node_data = {
|
||||
"id": node.get("id").strip('"'),
|
||||
"entity_type": node.find("./data[@key='d0']", namespace).text.strip('"')
|
||||
if node.find("./data[@key='d0']", namespace) is not None
|
||||
else "",
|
||||
"description": node.find("./data[@key='d1']", namespace).text
|
||||
if node.find("./data[@key='d1']", namespace) is not None
|
||||
else "",
|
||||
"source_id": node.find("./data[@key='d2']", namespace).text
|
||||
if node.find("./data[@key='d2']", namespace) is not None
|
||||
else "",
|
||||
}
|
||||
data["nodes"].append(node_data)
|
||||
|
||||
for edge in root.findall(".//edge", namespace):
|
||||
edge_data = {
|
||||
"source": edge.get("source").strip('"'),
|
||||
"target": edge.get("target").strip('"'),
|
||||
"weight": float(edge.find("./data[@key='d3']", namespace).text)
|
||||
if edge.find("./data[@key='d3']", namespace) is not None
|
||||
else 0.0,
|
||||
"description": edge.find("./data[@key='d4']", namespace).text
|
||||
if edge.find("./data[@key='d4']", namespace) is not None
|
||||
else "",
|
||||
"keywords": edge.find("./data[@key='d5']", namespace).text
|
||||
if edge.find("./data[@key='d5']", namespace) is not None
|
||||
else "",
|
||||
"source_id": edge.find("./data[@key='d6']", namespace).text
|
||||
if edge.find("./data[@key='d6']", namespace) is not None
|
||||
else "",
|
||||
}
|
||||
data["edges"].append(edge_data)
|
||||
|
||||
# Print the number of nodes and edges found
|
||||
print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges")
|
||||
|
||||
return data
|
||||
except ET.ParseError as e:
|
||||
print(f"Error parsing XML file: {e}")
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"An error occurred: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def process_combine_contexts(*context_lists):
|
||||
"""
|
||||
Combine multiple context lists and remove duplicate content
|
||||
|
Reference in New Issue
Block a user