feat: add delete method for mongo storage implement

Bump api version to 0171
Merge pull request #1605 from HKUDS/fix-mime-for-windows
2025-05-22 04:41:52 +08:00 · 2025-05-21 16:52:33 +08:00 · 2025-05-21 16:48:56 +08:00 · 2025-05-21 16:46:36 +08:00 · 2025-05-21 16:46:18 +08:00 · 2025-05-21 16:17:42 +08:00
7 changed files with 96 additions and 74 deletions
--- a/examples/graph_visual_with_neo4j.py
+++ b/examples/graph_visual_with_neo4j.py
@@ -1,6 +1,6 @@
 import os
 import json
-from lightrag.utils import xml_to_json
+import xml.etree.ElementTree as ET
 from neo4j import GraphDatabase

 # Constants
@@ -14,6 +14,66 @@ NEO4J_USERNAME = "neo4j"
 NEO4J_PASSWORD = "your_password"


+def xml_to_json(xml_file):
+    try:
+        tree = ET.parse(xml_file)
+        root = tree.getroot()
+
+        # Print the root element's tag and attributes to confirm the file has been correctly loaded
+        print(f"Root element: {root.tag}")
+        print(f"Root attributes: {root.attrib}")
+
+        data = {"nodes": [], "edges": []}
+
+        # Use namespace
+        namespace = {"": "http://graphml.graphdrawing.org/xmlns"}
+
+        for node in root.findall(".//node", namespace):
+            node_data = {
+                "id": node.get("id").strip('"'),
+                "entity_type": node.find("./data[@key='d1']", namespace).text.strip('"')
+                if node.find("./data[@key='d1']", namespace) is not None
+                else "",
+                "description": node.find("./data[@key='d2']", namespace).text
+                if node.find("./data[@key='d2']", namespace) is not None
+                else "",
+                "source_id": node.find("./data[@key='d3']", namespace).text
+                if node.find("./data[@key='d3']", namespace) is not None
+                else "",
+            }
+            data["nodes"].append(node_data)
+
+        for edge in root.findall(".//edge", namespace):
+            edge_data = {
+                "source": edge.get("source").strip('"'),
+                "target": edge.get("target").strip('"'),
+                "weight": float(edge.find("./data[@key='d5']", namespace).text)
+                if edge.find("./data[@key='d5']", namespace) is not None
+                else 0.0,
+                "description": edge.find("./data[@key='d6']", namespace).text
+                if edge.find("./data[@key='d6']", namespace) is not None
+                else "",
+                "keywords": edge.find("./data[@key='d7']", namespace).text
+                if edge.find("./data[@key='d7']", namespace) is not None
+                else "",
+                "source_id": edge.find("./data[@key='d8']", namespace).text
+                if edge.find("./data[@key='d8']", namespace) is not None
+                else "",
+            }
+            data["edges"].append(edge_data)
+
+        # Print the number of nodes and edges found
+        print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges")
+
+        return data
+    except ET.ParseError as e:
+        print(f"Error parsing XML file: {e}")
+        return None
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        return None
+
+
 def convert_xml_to_json(xml_path, output_path):
    """Converts XML file to JSON and saves the output."""
    if not os.path.exists(xml_path):
--- a/examples/unofficial-sample/lightrag_openai_neo4j_milvus_redis_demo.py
+++ b/examples/unofficial-sample/lightrag_openai_neo4j_milvus_redis_demo.py
@@ -18,12 +18,12 @@ os.environ["REDIS_URI"] = "redis://localhost:6379"
 # neo4j
 BATCH_SIZE_NODES = 500
 BATCH_SIZE_EDGES = 100
-os.environ["NEO4J_URI"] = "bolt://117.50.173.35:7687"
+os.environ["NEO4J_URI"] = "neo4j://localhost:7687"
 os.environ["NEO4J_USERNAME"] = "neo4j"
 os.environ["NEO4J_PASSWORD"] = "12345678"

 # milvus
-os.environ["MILVUS_URI"] = "http://117.50.173.35:19530"
+os.environ["MILVUS_URI"] = "http://localhost:19530"
 os.environ["MILVUS_USER"] = "root"
 os.environ["MILVUS_PASSWORD"] = "Milvus"
 os.environ["MILVUS_DB_NAME"] = "lightrag"
--- a/lightrag/init.py
+++ b/lightrag/init.py
@@ -1,5 +1,5 @@
 from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam

-__version__ = "1.3.7"
+__version__ = "1.3.8"
 __author__ = "Zirui Guo"
 __url__ = "https://github.com/HKUDS/LightRAG"
--- a/lightrag/api/init.py
+++ b/lightrag/api/init.py
@@ -1 +1 @@
-__api_version__ = "0170"
+__api_version__ = "0171"
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -478,16 +478,31 @@ def create_app(args):
            logger.error(f"Error getting health status: {str(e)}")
            raise HTTPException(status_code=500, detail=str(e))

-    # Custom StaticFiles class to prevent caching of HTML files
-    class NoCacheStaticFiles(StaticFiles):
+    # Custom StaticFiles class for smart caching
+    class SmartStaticFiles(StaticFiles):  # Renamed from NoCacheStaticFiles
        async def get_response(self, path: str, scope):
            response = await super().get_response(path, scope)
+
            if path.endswith(".html"):
                response.headers["Cache-Control"] = (
                    "no-cache, no-store, must-revalidate"
                )
                response.headers["Pragma"] = "no-cache"
                response.headers["Expires"] = "0"
+            elif (
+                "/assets/" in path
+            ):  # Assets (JS, CSS, images, fonts) generated by Vite with hash in filename
+                response.headers["Cache-Control"] = (
+                    "public, max-age=31536000, immutable"
+                )
+            # Add other rules here if needed for non-HTML, non-asset files
+
+            # Ensure correct Content-Type
+            if path.endswith(".js"):
+                response.headers["Content-Type"] = "application/javascript"
+            elif path.endswith(".css"):
+                response.headers["Content-Type"] = "text/css"
+
            return response

    # Webui mount webui/index.html
@@ -495,7 +510,9 @@ def create_app(args):
    static_dir.mkdir(exist_ok=True)
    app.mount(
        "/webui",
-        NoCacheStaticFiles(directory=static_dir, html=True, check_dir=True),
+        SmartStaticFiles(
+            directory=static_dir, html=True, check_dir=True
+        ),  # Use SmartStaticFiles
        name="webui",
    )

--- a/lightrag/kg/mongo_impl.py
+++ b/lightrag/kg/mongo_impl.py
@@ -311,6 +311,17 @@ class MongoDocStatusStorage(DocStatusStorage):
            logger.error(f"Error dropping doc status {self._collection_name}: {e}")
            return {"status": "error", "message": str(e)}

+    async def delete(self, ids: list[str]) -> None:
+        try:
+            result = await self._data.delete_many({"_id": {"$in": ids}})
+            deleted_count = result.deleted_count
+
+            logger.info(
+                f"Dropped {deleted_count} documents from doc status {self._collection_name}"
+            )
+        except PyMongoError as e:
+            logger.error(f"Error deleting doc status {self._collection_name}: {e}")
+

@final
@dataclass
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -13,7 +13,6 @@ from dataclasses import dataclass
 from functools import wraps
 from hashlib import md5
 from typing import Any, Protocol, Callable, TYPE_CHECKING, List
-import xml.etree.ElementTree as ET
 import numpy as np
 from lightrag.prompt import PROMPTS
 from dotenv import load_dotenv
@@ -753,71 +752,6 @@ def truncate_list_by_token_size(
    return list_data


-def save_data_to_file(data, file_name):
-    with open(file_name, "w", encoding="utf-8") as f:
-        json.dump(data, f, ensure_ascii=False, indent=4)
-
-
-def xml_to_json(xml_file):
-    try:
-        tree = ET.parse(xml_file)
-        root = tree.getroot()
-
-        # Print the root element's tag and attributes to confirm the file has been correctly loaded
-        print(f"Root element: {root.tag}")
-        print(f"Root attributes: {root.attrib}")
-
-        data = {"nodes": [], "edges": []}
-
-        # Use namespace
-        namespace = {"": "http://graphml.graphdrawing.org/xmlns"}
-
-        for node in root.findall(".//node", namespace):
-            node_data = {
-                "id": node.get("id").strip('"'),
-                "entity_type": node.find("./data[@key='d0']", namespace).text.strip('"')
-                if node.find("./data[@key='d0']", namespace) is not None
-                else "",
-                "description": node.find("./data[@key='d1']", namespace).text
-                if node.find("./data[@key='d1']", namespace) is not None
-                else "",
-                "source_id": node.find("./data[@key='d2']", namespace).text
-                if node.find("./data[@key='d2']", namespace) is not None
-                else "",
-            }
-            data["nodes"].append(node_data)
-
-        for edge in root.findall(".//edge", namespace):
-            edge_data = {
-                "source": edge.get("source").strip('"'),
-                "target": edge.get("target").strip('"'),
-                "weight": float(edge.find("./data[@key='d3']", namespace).text)
-                if edge.find("./data[@key='d3']", namespace) is not None
-                else 0.0,
-                "description": edge.find("./data[@key='d4']", namespace).text
-                if edge.find("./data[@key='d4']", namespace) is not None
-                else "",
-                "keywords": edge.find("./data[@key='d5']", namespace).text
-                if edge.find("./data[@key='d5']", namespace) is not None
-                else "",
-                "source_id": edge.find("./data[@key='d6']", namespace).text
-                if edge.find("./data[@key='d6']", namespace) is not None
-                else "",
-            }
-            data["edges"].append(edge_data)
-
-        # Print the number of nodes and edges found
-        print(f"Found {len(data['nodes'])} nodes and {len(data['edges'])} edges")
-
-        return data
-    except ET.ParseError as e:
-        print(f"Error parsing XML file: {e}")
-        return None
-    except Exception as e:
-        print(f"An error occurred: {e}")
-        return None
-
-
 def process_combine_contexts(*context_lists):
    """
    Combine multiple context lists and remove duplicate content
Author	SHA1	Message	Date
Gardel	8916f8a912	feat: add delete method for mongo storage implement All checks were successful Linting and Formatting / lint-and-format (push) Successful in 3m47s Details	2025-05-22 04:41:52 +08:00
yangdx	702e87492c	Bump api version to 0171	2025-05-21 16:52:33 +08:00
Daniel.y	dde340f449	Merge pull request #1605 from HKUDS/fix-mime-for-windows Fix js files MIME type error for Windows environment	2025-05-21 16:48:56 +08:00
yangdx	85bed30764	Fix linting	2025-05-21 16:46:36 +08:00
yangdx	45cebc71c5	Refactor: Optimize static file caching for WebUI - Renamed `NoCacheStaticFiles` to `SmartStaticFiles`. - Implemented long-term caching (1 year, immutable) for versioned assets in `/webui/assets/`. - Ensured `index.html` remains un-cached. - Set correct `Content-Type` for JS and CSS files.	2025-05-21 16:46:18 +08:00
yangdx	0961a21722	Set correct Content-Type header for JavaScript files • Fix missing Content-Type header for .js files • Ensure proper MIME type handling • Improve browser compatibility	2025-05-21 16:17:42 +08:00
yangdx	d97da6068a	Fix linting	2025-05-20 17:57:42 +08:00
yangdx	e492394fb6	Fix linting	2025-05-20 17:56:52 +08:00
yangdx	b4615247c9	Bump core version to 1.3.8	2025-05-18 07:20:00 +08:00
yangdx	3ffa1009fe	Merge branch 'fix-graphml-json-mapping'	2025-05-18 07:17:41 +08:00
yangdx	7263a1ccf9	Fix linting	2025-05-18 07:17:21 +08:00
yangdx	38b862e993	Remove unsed functions	2025-05-18 07:16:52 +08:00
sa9arr	36b606d0db	Fix: Correct GraphML to JSON mapping in xml_to_json function	2025-05-17 19:32:25 +05:45