Compare commits

...

13 Commits

Author SHA1 Message Date
8916f8a912 feat: add delete method for mongo storage implement
All checks were successful
Linting and Formatting / lint-and-format (push) Successful in 3m47s
2025-05-22 04:41:52 +08:00
yangdx
702e87492c Bump api version to 0171 2025-05-21 16:52:33 +08:00
Daniel.y
dde340f449 Merge pull request #1605 from HKUDS/fix-mime-for-windows
Fix js files MIME type error for Windows environment
2025-05-21 16:48:56 +08:00
yangdx
85bed30764 Fix linting 2025-05-21 16:46:36 +08:00
yangdx
45cebc71c5 Refactor: Optimize static file caching for WebUI
- Renamed `NoCacheStaticFiles` to `SmartStaticFiles`.
- Implemented long-term caching (1 year, immutable) for versioned assets in `/webui/assets/`.
- Ensured `index.html` remains un-cached.
- Set correct `Content-Type` for JS and CSS files.
2025-05-21 16:46:18 +08:00
yangdx
0961a21722 Set correct Content-Type header for JavaScript files
• Fix missing Content-Type header for .js files
• Ensure proper MIME type handling
• Improve browser compatibility
2025-05-21 16:17:42 +08:00
yangdx
d97da6068a Fix linting 2025-05-20 17:57:42 +08:00
yangdx
e492394fb6 Fix linting 2025-05-20 17:56:52 +08:00
yangdx
b4615247c9 Bump core version to 1.3.8 2025-05-18 07:20:00 +08:00
yangdx
3ffa1009fe Merge branch 'fix-graphml-json-mapping' 2025-05-18 07:17:41 +08:00
yangdx
7263a1ccf9 Fix linting 2025-05-18 07:17:21 +08:00
yangdx
38b862e993 Remove unsed functions 2025-05-18 07:16:52 +08:00
sa9arr
36b606d0db Fix: Correct GraphML to JSON mapping in xml_to_json function 2025-05-17 19:32:25 +05:45
7 changed files with 96 additions and 74 deletions

View File

@@ -1,6 +1,6 @@
import os
import json
from lightrag.utils import xml_to_json
import xml.etree.ElementTree as ET
from neo4j import GraphDatabase
# Constants
@@ -14,6 +14,66 @@ NEO4J_USERNAME = "neo4j"
NEO4J_PASSWORD = "your_password"
def xml_to_json(xml_file):
try:
tree = ET.parse(xml_file)
root = tree.getroot()
# Print the root element's tag and attributes to confirm the file has been correctly loaded
print(f"Root element: {root.tag}")
print(f"Root attributes: {root.attrib}")
data = {"nodes": [], "edges": []}
# Use namespace
namespace = {"": "http://graphml.graphdrawing.org/xmlns"}
for node in root.findall(".//node", namespace):
node_data = {
"id": node.get("id").strip('"'),
"entity_type": node.find("./data[@key='d1']", namespace).text.strip('"')
if node.find("./data[@key='d1']", namespace) is not None
else "",
"description": node.find("./data[@key='d2']", namespace).text
if node.find("./data[@key='d2']", namespace) is not None
else "",
"source_id": node.find("./data[@key='d3']", namespace).text
if node.find("./data[@key='d3']", namespace) is not None
else "",
}
data["nodes"].append(node_data)
for edge in root.findall(".//edge", namespace):
edge_data = {
"source": edge.get("source").strip('"'),
"target": edge.get("target").strip('"'),
"weight": float(edge.find("./data[@key='d5']", namespace).text)
if edge.find("./data[@key='d5']", namespace) is not None
else 0.0,
"description": edge.find("./data[@key='d6']", namespace).text
if edge.find("./data[@key='d6']", namespace) is not None
else "",
"keywords": edge.find("./data[@key='d7']", namespace).text
if edge.find("./data[@key='d7']", namespace) is not None
else "",
"source_id": edge.find("./data[@key='d8']", namespace).text
if edge.find("./data[@key='d8']", namespace) is not None
else "",
}
data["edges"].append(edge_data)
# Print the number of nodes and edges found
print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges")
return data
except ET.ParseError as e:
print(f"Error parsing XML file: {e}")
return None
except Exception as e:
print(f"An error occurred: {e}")
return None
def convert_xml_to_json(xml_path, output_path):
"""Converts XML file to JSON and saves the output."""
if not os.path.exists(xml_path):

View File

@@ -18,12 +18,12 @@ os.environ["REDIS_URI"] = "redis://localhost:6379"
# neo4j
BATCH_SIZE_NODES = 500
BATCH_SIZE_EDGES = 100
os.environ["NEO4J_URI"] = "bolt://117.50.173.35:7687"
os.environ["NEO4J_URI"] = "neo4j://localhost:7687"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "12345678"
# milvus
os.environ["MILVUS_URI"] = "http://117.50.173.35:19530"
os.environ["MILVUS_URI"] = "http://localhost:19530"
os.environ["MILVUS_USER"] = "root"
os.environ["MILVUS_PASSWORD"] = "Milvus"
os.environ["MILVUS_DB_NAME"] = "lightrag"

View File

@@ -1,5 +1,5 @@
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
__version__ = "1.3.7"
__version__ = "1.3.8"
__author__ = "Zirui Guo"
__url__ = "https://github.com/HKUDS/LightRAG"

View File

@@ -1 +1 @@
__api_version__ = "0170"
__api_version__ = "0171"

View File

@@ -478,16 +478,31 @@ def create_app(args):
logger.error(f"Error getting health status: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
# Custom StaticFiles class to prevent caching of HTML files
class NoCacheStaticFiles(StaticFiles):
# Custom StaticFiles class for smart caching
class SmartStaticFiles(StaticFiles): # Renamed from NoCacheStaticFiles
async def get_response(self, path: str, scope):
response = await super().get_response(path, scope)
if path.endswith(".html"):
response.headers["Cache-Control"] = (
"no-cache, no-store, must-revalidate"
)
response.headers["Pragma"] = "no-cache"
response.headers["Expires"] = "0"
elif (
"/assets/" in path
): # Assets (JS, CSS, images, fonts) generated by Vite with hash in filename
response.headers["Cache-Control"] = (
"public, max-age=31536000, immutable"
)
# Add other rules here if needed for non-HTML, non-asset files
# Ensure correct Content-Type
if path.endswith(".js"):
response.headers["Content-Type"] = "application/javascript"
elif path.endswith(".css"):
response.headers["Content-Type"] = "text/css"
return response
# Webui mount webui/index.html
@@ -495,7 +510,9 @@ def create_app(args):
static_dir.mkdir(exist_ok=True)
app.mount(
"/webui",
NoCacheStaticFiles(directory=static_dir, html=True, check_dir=True),
SmartStaticFiles(
directory=static_dir, html=True, check_dir=True
), # Use SmartStaticFiles
name="webui",
)

View File

@@ -311,6 +311,17 @@ class MongoDocStatusStorage(DocStatusStorage):
logger.error(f"Error dropping doc status {self._collection_name}: {e}")
return {"status": "error", "message": str(e)}
async def delete(self, ids: list[str]) -> None:
try:
result = await self._data.delete_many({"_id": {"$in": ids}})
deleted_count = result.deleted_count
logger.info(
f"Dropped {deleted_count} documents from doc status {self._collection_name}"
)
except PyMongoError as e:
logger.error(f"Error deleting doc status {self._collection_name}: {e}")
@final
@dataclass

View File

@@ -13,7 +13,6 @@ from dataclasses import dataclass
from functools import wraps
from hashlib import md5
from typing import Any, Protocol, Callable, TYPE_CHECKING, List
import xml.etree.ElementTree as ET
import numpy as np
from lightrag.prompt import PROMPTS
from dotenv import load_dotenv
@@ -753,71 +752,6 @@ def truncate_list_by_token_size(
return list_data
def save_data_to_file(data, file_name):
with open(file_name, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
def xml_to_json(xml_file):
try:
tree = ET.parse(xml_file)
root = tree.getroot()
# Print the root element's tag and attributes to confirm the file has been correctly loaded
print(f"Root element: {root.tag}")
print(f"Root attributes: {root.attrib}")
data = {"nodes": [], "edges": []}
# Use namespace
namespace = {"": "http://graphml.graphdrawing.org/xmlns"}
for node in root.findall(".//node", namespace):
node_data = {
"id": node.get("id").strip('"'),
"entity_type": node.find("./data[@key='d0']", namespace).text.strip('"')
if node.find("./data[@key='d0']", namespace) is not None
else "",
"description": node.find("./data[@key='d1']", namespace).text
if node.find("./data[@key='d1']", namespace) is not None
else "",
"source_id": node.find("./data[@key='d2']", namespace).text
if node.find("./data[@key='d2']", namespace) is not None
else "",
}
data["nodes"].append(node_data)
for edge in root.findall(".//edge", namespace):
edge_data = {
"source": edge.get("source").strip('"'),
"target": edge.get("target").strip('"'),
"weight": float(edge.find("./data[@key='d3']", namespace).text)
if edge.find("./data[@key='d3']", namespace) is not None
else 0.0,
"description": edge.find("./data[@key='d4']", namespace).text
if edge.find("./data[@key='d4']", namespace) is not None
else "",
"keywords": edge.find("./data[@key='d5']", namespace).text
if edge.find("./data[@key='d5']", namespace) is not None
else "",
"source_id": edge.find("./data[@key='d6']", namespace).text
if edge.find("./data[@key='d6']", namespace) is not None
else "",
}
data["edges"].append(edge_data)
# Print the number of nodes and edges found
print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges")
return data
except ET.ParseError as e:
print(f"Error parsing XML file: {e}")
return None
except Exception as e:
print(f"An error occurred: {e}")
return None
def process_combine_contexts(*context_lists):
"""
Combine multiple context lists and remove duplicate content