Merge branch 'main' into graph-storage-batch-query

2025-04-14 13:35:33 +08:00
parent 262c93d8da 819cfeffa0
commit e6d0ba866e
25 changed files with 2942 additions and 1430 deletions
--- a/lightrag/api/init.py
+++ b/lightrag/api/init.py
@@ -1 +1 @@
-__api_version__ = "0148"
+__api_version__ = "0150"
--- a/lightrag/api/routers/graph_routes.py
+++ b/lightrag/api/routers/graph_routes.py
@@ -2,14 +2,29 @@
 This module contains all graph-related routes for the LightRAG API.
 """

-from typing import Optional
-from fastapi import APIRouter, Depends, Query
+from typing import Optional, Dict, Any
+import traceback
+from fastapi import APIRouter, Depends, Query, HTTPException
+from pydantic import BaseModel

+from lightrag.utils import logger
 from ..utils_api import get_combined_auth_dependency

 router = APIRouter(tags=["graph"])


+class EntityUpdateRequest(BaseModel):
+    entity_name: str
+    updated_data: Dict[str, Any]
+    allow_rename: bool = False
+
+
+class RelationUpdateRequest(BaseModel):
+    source_id: str
+    target_id: str
+    updated_data: Dict[str, Any]
+
+
 def create_graph_routes(rag, api_key: Optional[str] = None):
    combined_auth = get_combined_auth_dependency(api_key)

@@ -21,7 +36,14 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
        Returns:
            List[str]: List of graph labels
        """
-        return await rag.get_graph_labels()
+        try:
+            return await rag.get_graph_labels()
+        except Exception as e:
+            logger.error(f"Error getting graph labels: {str(e)}")
+            logger.error(traceback.format_exc())
+            raise HTTPException(
+                status_code=500, detail=f"Error getting graph labels: {str(e)}"
+            )

    @router.get("/graphs", dependencies=[Depends(combined_auth)])
    async def get_knowledge_graph(
@@ -43,10 +65,109 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
        Returns:
            Dict[str, List[str]]: Knowledge graph for label
        """
-        return await rag.get_knowledge_graph(
-            node_label=label,
-            max_depth=max_depth,
-            max_nodes=max_nodes,
-        )
+        try:
+            return await rag.get_knowledge_graph(
+                node_label=label,
+                max_depth=max_depth,
+                max_nodes=max_nodes,
+            )
+        except Exception as e:
+            logger.error(f"Error getting knowledge graph for label '{label}': {str(e)}")
+            logger.error(traceback.format_exc())
+            raise HTTPException(
+                status_code=500, detail=f"Error getting knowledge graph: {str(e)}"
+            )
+
+    @router.get("/graph/entity/exists", dependencies=[Depends(combined_auth)])
+    async def check_entity_exists(
+        name: str = Query(..., description="Entity name to check"),
+    ):
+        """
+        Check if an entity with the given name exists in the knowledge graph
+
+        Args:
+            name (str): Name of the entity to check
+
+        Returns:
+            Dict[str, bool]: Dictionary with 'exists' key indicating if entity exists
+        """
+        try:
+            exists = await rag.chunk_entity_relation_graph.has_node(name)
+            return {"exists": exists}
+        except Exception as e:
+            logger.error(f"Error checking entity existence for '{name}': {str(e)}")
+            logger.error(traceback.format_exc())
+            raise HTTPException(
+                status_code=500, detail=f"Error checking entity existence: {str(e)}"
+            )
+
+    @router.post("/graph/entity/edit", dependencies=[Depends(combined_auth)])
+    async def update_entity(request: EntityUpdateRequest):
+        """
+        Update an entity's properties in the knowledge graph
+
+        Args:
+            request (EntityUpdateRequest): Request containing entity name, updated data, and rename flag
+
+        Returns:
+            Dict: Updated entity information
+        """
+        try:
+            result = await rag.aedit_entity(
+                entity_name=request.entity_name,
+                updated_data=request.updated_data,
+                allow_rename=request.allow_rename,
+            )
+            return {
+                "status": "success",
+                "message": "Entity updated successfully",
+                "data": result,
+            }
+        except ValueError as ve:
+            logger.error(
+                f"Validation error updating entity '{request.entity_name}': {str(ve)}"
+            )
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(f"Error updating entity '{request.entity_name}': {str(e)}")
+            logger.error(traceback.format_exc())
+            raise HTTPException(
+                status_code=500, detail=f"Error updating entity: {str(e)}"
+            )
+
+    @router.post("/graph/relation/edit", dependencies=[Depends(combined_auth)])
+    async def update_relation(request: RelationUpdateRequest):
+        """Update a relation's properties in the knowledge graph
+
+        Args:
+            request (RelationUpdateRequest): Request containing source ID, target ID and updated data
+
+        Returns:
+            Dict: Updated relation information
+        """
+        try:
+            result = await rag.aedit_relation(
+                source_entity=request.source_id,
+                target_entity=request.target_id,
+                updated_data=request.updated_data,
+            )
+            return {
+                "status": "success",
+                "message": "Relation updated successfully",
+                "data": result,
+            }
+        except ValueError as ve:
+            logger.error(
+                f"Validation error updating relation between '{request.source_id}' and '{request.target_id}': {str(ve)}"
+            )
+            raise HTTPException(status_code=400, detail=str(ve))
+        except Exception as e:
+            logger.error(
+                f"Error updating relation between '{request.source_id}' and '{request.target_id}': {str(e)}"
+            )
+            logger.error(traceback.format_exc())
+            raise HTTPException(
+                status_code=500, detail=f"Error updating relation: {str(e)}"
+            )

    return router
--- a/lightrag/api/webui/assets/index-BJDb04H1.css
+++ b/lightrag/api/webui/assets/index-BJDb04H1.css
--- a/lightrag/api/webui/assets/index-CIRM3gxn.js
+++ b/lightrag/api/webui/assets/index-CIRM3gxn.js
--- a/lightrag/api/webui/assets/index-CTB4Vp_z.css
+++ b/lightrag/api/webui/assets/index-CTB4Vp_z.css
--- a/lightrag/api/webui/index.html
+++ b/lightrag/api/webui/index.html
@@ -8,8 +8,8 @@
    <link rel="icon" type="image/svg+xml" href="logo.png" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Lightrag</title>
-    <script type="module" crossorigin src="/webui/assets/index-CkwV8nfm.js"></script>
-    <link rel="stylesheet" crossorigin href="/webui/assets/index-CTB4Vp_z.css">
+    <script type="module" crossorigin src="/webui/assets/index-CIRM3gxn.js"></script>
+    <link rel="stylesheet" crossorigin href="/webui/assets/index-BJDb04H1.css">
  </head>
  <body>
    <div id="root"></div>
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -893,6 +893,351 @@ def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
        return new_loop


+async def aexport_data(
+    chunk_entity_relation_graph,
+    entities_vdb,
+    relationships_vdb,
+    output_path: str,
+    file_format: str = "csv",
+    include_vector_data: bool = False,
+) -> None:
+    """
+    Asynchronously exports all entities, relations, and relationships to various formats.
+
+    Args:
+        chunk_entity_relation_graph: Graph storage instance for entities and relations
+        entities_vdb: Vector database storage for entities
+        relationships_vdb: Vector database storage for relationships
+        output_path: The path to the output file (including extension).
+        file_format: Output format - "csv", "excel", "md", "txt".
+            - csv: Comma-separated values file
+            - excel: Microsoft Excel file with multiple sheets
+            - md: Markdown tables
+            - txt: Plain text formatted output
+        include_vector_data: Whether to include data from the vector database.
+    """
+    # Collect data
+    entities_data = []
+    relations_data = []
+    relationships_data = []
+
+    # --- Entities ---
+    all_entities = await chunk_entity_relation_graph.get_all_labels()
+    for entity_name in all_entities:
+        # Get entity information from graph
+        node_data = await chunk_entity_relation_graph.get_node(entity_name)
+        source_id = node_data.get("source_id") if node_data else None
+
+        entity_info = {
+            "graph_data": node_data,
+            "source_id": source_id,
+        }
+
+        # Optional: Get vector database information
+        if include_vector_data:
+            entity_id = compute_mdhash_id(entity_name, prefix="ent-")
+            vector_data = await entities_vdb.get_by_id(entity_id)
+            entity_info["vector_data"] = vector_data
+
+        entity_row = {
+            "entity_name": entity_name,
+            "source_id": source_id,
+            "graph_data": str(
+                entity_info["graph_data"]
+            ),  # Convert to string to ensure compatibility
+        }
+        if include_vector_data and "vector_data" in entity_info:
+            entity_row["vector_data"] = str(entity_info["vector_data"])
+        entities_data.append(entity_row)
+
+    # --- Relations ---
+    for src_entity in all_entities:
+        for tgt_entity in all_entities:
+            if src_entity == tgt_entity:
+                continue
+
+            edge_exists = await chunk_entity_relation_graph.has_edge(
+                src_entity, tgt_entity
+            )
+            if edge_exists:
+                # Get edge information from graph
+                edge_data = await chunk_entity_relation_graph.get_edge(
+                    src_entity, tgt_entity
+                )
+                source_id = edge_data.get("source_id") if edge_data else None
+
+                relation_info = {
+                    "graph_data": edge_data,
+                    "source_id": source_id,
+                }
+
+                # Optional: Get vector database information
+                if include_vector_data:
+                    rel_id = compute_mdhash_id(src_entity + tgt_entity, prefix="rel-")
+                    vector_data = await relationships_vdb.get_by_id(rel_id)
+                    relation_info["vector_data"] = vector_data
+
+                relation_row = {
+                    "src_entity": src_entity,
+                    "tgt_entity": tgt_entity,
+                    "source_id": relation_info["source_id"],
+                    "graph_data": str(relation_info["graph_data"]),  # Convert to string
+                }
+                if include_vector_data and "vector_data" in relation_info:
+                    relation_row["vector_data"] = str(relation_info["vector_data"])
+                relations_data.append(relation_row)
+
+    # --- Relationships (from VectorDB) ---
+    all_relationships = await relationships_vdb.client_storage
+    for rel in all_relationships["data"]:
+        relationships_data.append(
+            {
+                "relationship_id": rel["__id__"],
+                "data": str(rel),  # Convert to string for compatibility
+            }
+        )
+
+    # Export based on format
+    if file_format == "csv":
+        # CSV export
+        with open(output_path, "w", newline="", encoding="utf-8") as csvfile:
+            # Entities
+            if entities_data:
+                csvfile.write("# ENTITIES\n")
+                writer = csv.DictWriter(csvfile, fieldnames=entities_data[0].keys())
+                writer.writeheader()
+                writer.writerows(entities_data)
+                csvfile.write("\n\n")
+
+            # Relations
+            if relations_data:
+                csvfile.write("# RELATIONS\n")
+                writer = csv.DictWriter(csvfile, fieldnames=relations_data[0].keys())
+                writer.writeheader()
+                writer.writerows(relations_data)
+                csvfile.write("\n\n")
+
+            # Relationships
+            if relationships_data:
+                csvfile.write("# RELATIONSHIPS\n")
+                writer = csv.DictWriter(
+                    csvfile, fieldnames=relationships_data[0].keys()
+                )
+                writer.writeheader()
+                writer.writerows(relationships_data)
+
+    elif file_format == "excel":
+        # Excel export
+        import pandas as pd
+
+        entities_df = pd.DataFrame(entities_data) if entities_data else pd.DataFrame()
+        relations_df = (
+            pd.DataFrame(relations_data) if relations_data else pd.DataFrame()
+        )
+        relationships_df = (
+            pd.DataFrame(relationships_data) if relationships_data else pd.DataFrame()
+        )
+
+        with pd.ExcelWriter(output_path, engine="xlsxwriter") as writer:
+            if not entities_df.empty:
+                entities_df.to_excel(writer, sheet_name="Entities", index=False)
+            if not relations_df.empty:
+                relations_df.to_excel(writer, sheet_name="Relations", index=False)
+            if not relationships_df.empty:
+                relationships_df.to_excel(
+                    writer, sheet_name="Relationships", index=False
+                )
+
+    elif file_format == "md":
+        # Markdown export
+        with open(output_path, "w", encoding="utf-8") as mdfile:
+            mdfile.write("# LightRAG Data Export\n\n")
+
+            # Entities
+            mdfile.write("## Entities\n\n")
+            if entities_data:
+                # Write header
+                mdfile.write("| " + " | ".join(entities_data[0].keys()) + " |\n")
+                mdfile.write(
+                    "| " + " | ".join(["---"] * len(entities_data[0].keys())) + " |\n"
+                )
+
+                # Write rows
+                for entity in entities_data:
+                    mdfile.write(
+                        "| " + " | ".join(str(v) for v in entity.values()) + " |\n"
+                    )
+                mdfile.write("\n\n")
+            else:
+                mdfile.write("*No entity data available*\n\n")
+
+            # Relations
+            mdfile.write("## Relations\n\n")
+            if relations_data:
+                # Write header
+                mdfile.write("| " + " | ".join(relations_data[0].keys()) + " |\n")
+                mdfile.write(
+                    "| " + " | ".join(["---"] * len(relations_data[0].keys())) + " |\n"
+                )
+
+                # Write rows
+                for relation in relations_data:
+                    mdfile.write(
+                        "| " + " | ".join(str(v) for v in relation.values()) + " |\n"
+                    )
+                mdfile.write("\n\n")
+            else:
+                mdfile.write("*No relation data available*\n\n")
+
+            # Relationships
+            mdfile.write("## Relationships\n\n")
+            if relationships_data:
+                # Write header
+                mdfile.write("| " + " | ".join(relationships_data[0].keys()) + " |\n")
+                mdfile.write(
+                    "| "
+                    + " | ".join(["---"] * len(relationships_data[0].keys()))
+                    + " |\n"
+                )
+
+                # Write rows
+                for relationship in relationships_data:
+                    mdfile.write(
+                        "| "
+                        + " | ".join(str(v) for v in relationship.values())
+                        + " |\n"
+                    )
+            else:
+                mdfile.write("*No relationship data available*\n\n")
+
+    elif file_format == "txt":
+        # Plain text export
+        with open(output_path, "w", encoding="utf-8") as txtfile:
+            txtfile.write("LIGHTRAG DATA EXPORT\n")
+            txtfile.write("=" * 80 + "\n\n")
+
+            # Entities
+            txtfile.write("ENTITIES\n")
+            txtfile.write("-" * 80 + "\n")
+            if entities_data:
+                # Create fixed width columns
+                col_widths = {
+                    k: max(len(k), max(len(str(e[k])) for e in entities_data))
+                    for k in entities_data[0]
+                }
+                header = "  ".join(k.ljust(col_widths[k]) for k in entities_data[0])
+                txtfile.write(header + "\n")
+                txtfile.write("-" * len(header) + "\n")
+
+                # Write rows
+                for entity in entities_data:
+                    row = "  ".join(
+                        str(v).ljust(col_widths[k]) for k, v in entity.items()
+                    )
+                    txtfile.write(row + "\n")
+                txtfile.write("\n\n")
+            else:
+                txtfile.write("No entity data available\n\n")
+
+            # Relations
+            txtfile.write("RELATIONS\n")
+            txtfile.write("-" * 80 + "\n")
+            if relations_data:
+                # Create fixed width columns
+                col_widths = {
+                    k: max(len(k), max(len(str(r[k])) for r in relations_data))
+                    for k in relations_data[0]
+                }
+                header = "  ".join(k.ljust(col_widths[k]) for k in relations_data[0])
+                txtfile.write(header + "\n")
+                txtfile.write("-" * len(header) + "\n")
+
+                # Write rows
+                for relation in relations_data:
+                    row = "  ".join(
+                        str(v).ljust(col_widths[k]) for k, v in relation.items()
+                    )
+                    txtfile.write(row + "\n")
+                txtfile.write("\n\n")
+            else:
+                txtfile.write("No relation data available\n\n")
+
+            # Relationships
+            txtfile.write("RELATIONSHIPS\n")
+            txtfile.write("-" * 80 + "\n")
+            if relationships_data:
+                # Create fixed width columns
+                col_widths = {
+                    k: max(len(k), max(len(str(r[k])) for r in relationships_data))
+                    for k in relationships_data[0]
+                }
+                header = "  ".join(
+                    k.ljust(col_widths[k]) for k in relationships_data[0]
+                )
+                txtfile.write(header + "\n")
+                txtfile.write("-" * len(header) + "\n")
+
+                # Write rows
+                for relationship in relationships_data:
+                    row = "  ".join(
+                        str(v).ljust(col_widths[k]) for k, v in relationship.items()
+                    )
+                    txtfile.write(row + "\n")
+            else:
+                txtfile.write("No relationship data available\n\n")
+
+    else:
+        raise ValueError(
+            f"Unsupported file format: {file_format}. "
+            f"Choose from: csv, excel, md, txt"
+        )
+    if file_format is not None:
+        print(f"Data exported to: {output_path} with format: {file_format}")
+    else:
+        print("Data displayed as table format")
+
+
+def export_data(
+    chunk_entity_relation_graph,
+    entities_vdb,
+    relationships_vdb,
+    output_path: str,
+    file_format: str = "csv",
+    include_vector_data: bool = False,
+) -> None:
+    """
+    Synchronously exports all entities, relations, and relationships to various formats.
+
+    Args:
+        chunk_entity_relation_graph: Graph storage instance for entities and relations
+        entities_vdb: Vector database storage for entities
+        relationships_vdb: Vector database storage for relationships
+        output_path: The path to the output file (including extension).
+        file_format: Output format - "csv", "excel", "md", "txt".
+            - csv: Comma-separated values file
+            - excel: Microsoft Excel file with multiple sheets
+            - md: Markdown tables
+            - txt: Plain text formatted output
+        include_vector_data: Whether to include data from the vector database.
+    """
+    try:
+        loop = asyncio.get_event_loop()
+    except RuntimeError:
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+
+    loop.run_until_complete(
+        aexport_data(
+            chunk_entity_relation_graph,
+            entities_vdb,
+            relationships_vdb,
+            output_path,
+            file_format,
+            include_vector_data,
+        )
+    )
+
+
 def lazy_external_import(module_name: str, class_name: str) -> Callable[..., Any]:
    """Lazily import a class from an external module based on the package of the caller."""
    # Get the caller's module and package
--- a/lightrag/utils_graph.py
+++ b/lightrag/utils_graph.py