Merge branch 'main' into graph-storage-batch-query

This commit is contained in:
yangdx
2025-04-14 13:35:33 +08:00
25 changed files with 2942 additions and 1430 deletions

View File

@@ -1 +1 @@
__api_version__ = "0148"
__api_version__ = "0150"

View File

@@ -2,14 +2,29 @@
This module contains all graph-related routes for the LightRAG API.
"""
from typing import Optional
from fastapi import APIRouter, Depends, Query
from typing import Optional, Dict, Any
import traceback
from fastapi import APIRouter, Depends, Query, HTTPException
from pydantic import BaseModel
from lightrag.utils import logger
from ..utils_api import get_combined_auth_dependency
router = APIRouter(tags=["graph"])
class EntityUpdateRequest(BaseModel):
entity_name: str
updated_data: Dict[str, Any]
allow_rename: bool = False
class RelationUpdateRequest(BaseModel):
source_id: str
target_id: str
updated_data: Dict[str, Any]
def create_graph_routes(rag, api_key: Optional[str] = None):
combined_auth = get_combined_auth_dependency(api_key)
@@ -21,7 +36,14 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
Returns:
List[str]: List of graph labels
"""
return await rag.get_graph_labels()
try:
return await rag.get_graph_labels()
except Exception as e:
logger.error(f"Error getting graph labels: {str(e)}")
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error getting graph labels: {str(e)}"
)
@router.get("/graphs", dependencies=[Depends(combined_auth)])
async def get_knowledge_graph(
@@ -43,10 +65,109 @@ def create_graph_routes(rag, api_key: Optional[str] = None):
Returns:
Dict[str, List[str]]: Knowledge graph for label
"""
return await rag.get_knowledge_graph(
node_label=label,
max_depth=max_depth,
max_nodes=max_nodes,
)
try:
return await rag.get_knowledge_graph(
node_label=label,
max_depth=max_depth,
max_nodes=max_nodes,
)
except Exception as e:
logger.error(f"Error getting knowledge graph for label '{label}': {str(e)}")
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error getting knowledge graph: {str(e)}"
)
@router.get("/graph/entity/exists", dependencies=[Depends(combined_auth)])
async def check_entity_exists(
name: str = Query(..., description="Entity name to check"),
):
"""
Check if an entity with the given name exists in the knowledge graph
Args:
name (str): Name of the entity to check
Returns:
Dict[str, bool]: Dictionary with 'exists' key indicating if entity exists
"""
try:
exists = await rag.chunk_entity_relation_graph.has_node(name)
return {"exists": exists}
except Exception as e:
logger.error(f"Error checking entity existence for '{name}': {str(e)}")
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error checking entity existence: {str(e)}"
)
@router.post("/graph/entity/edit", dependencies=[Depends(combined_auth)])
async def update_entity(request: EntityUpdateRequest):
"""
Update an entity's properties in the knowledge graph
Args:
request (EntityUpdateRequest): Request containing entity name, updated data, and rename flag
Returns:
Dict: Updated entity information
"""
try:
result = await rag.aedit_entity(
entity_name=request.entity_name,
updated_data=request.updated_data,
allow_rename=request.allow_rename,
)
return {
"status": "success",
"message": "Entity updated successfully",
"data": result,
}
except ValueError as ve:
logger.error(
f"Validation error updating entity '{request.entity_name}': {str(ve)}"
)
raise HTTPException(status_code=400, detail=str(ve))
except Exception as e:
logger.error(f"Error updating entity '{request.entity_name}': {str(e)}")
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error updating entity: {str(e)}"
)
@router.post("/graph/relation/edit", dependencies=[Depends(combined_auth)])
async def update_relation(request: RelationUpdateRequest):
"""Update a relation's properties in the knowledge graph
Args:
request (RelationUpdateRequest): Request containing source ID, target ID and updated data
Returns:
Dict: Updated relation information
"""
try:
result = await rag.aedit_relation(
source_entity=request.source_id,
target_entity=request.target_id,
updated_data=request.updated_data,
)
return {
"status": "success",
"message": "Relation updated successfully",
"data": result,
}
except ValueError as ve:
logger.error(
f"Validation error updating relation between '{request.source_id}' and '{request.target_id}': {str(ve)}"
)
raise HTTPException(status_code=400, detail=str(ve))
except Exception as e:
logger.error(
f"Error updating relation between '{request.source_id}' and '{request.target_id}': {str(e)}"
)
logger.error(traceback.format_exc())
raise HTTPException(
status_code=500, detail=f"Error updating relation: {str(e)}"
)
return router

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -8,8 +8,8 @@
<link rel="icon" type="image/svg+xml" href="logo.png" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Lightrag</title>
<script type="module" crossorigin src="/webui/assets/index-CkwV8nfm.js"></script>
<link rel="stylesheet" crossorigin href="/webui/assets/index-CTB4Vp_z.css">
<script type="module" crossorigin src="/webui/assets/index-CIRM3gxn.js"></script>
<link rel="stylesheet" crossorigin href="/webui/assets/index-BJDb04H1.css">
</head>
<body>
<div id="root"></div>

File diff suppressed because it is too large Load Diff

View File

@@ -893,6 +893,351 @@ def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
return new_loop
async def aexport_data(
chunk_entity_relation_graph,
entities_vdb,
relationships_vdb,
output_path: str,
file_format: str = "csv",
include_vector_data: bool = False,
) -> None:
"""
Asynchronously exports all entities, relations, and relationships to various formats.
Args:
chunk_entity_relation_graph: Graph storage instance for entities and relations
entities_vdb: Vector database storage for entities
relationships_vdb: Vector database storage for relationships
output_path: The path to the output file (including extension).
file_format: Output format - "csv", "excel", "md", "txt".
- csv: Comma-separated values file
- excel: Microsoft Excel file with multiple sheets
- md: Markdown tables
- txt: Plain text formatted output
include_vector_data: Whether to include data from the vector database.
"""
# Collect data
entities_data = []
relations_data = []
relationships_data = []
# --- Entities ---
all_entities = await chunk_entity_relation_graph.get_all_labels()
for entity_name in all_entities:
# Get entity information from graph
node_data = await chunk_entity_relation_graph.get_node(entity_name)
source_id = node_data.get("source_id") if node_data else None
entity_info = {
"graph_data": node_data,
"source_id": source_id,
}
# Optional: Get vector database information
if include_vector_data:
entity_id = compute_mdhash_id(entity_name, prefix="ent-")
vector_data = await entities_vdb.get_by_id(entity_id)
entity_info["vector_data"] = vector_data
entity_row = {
"entity_name": entity_name,
"source_id": source_id,
"graph_data": str(
entity_info["graph_data"]
), # Convert to string to ensure compatibility
}
if include_vector_data and "vector_data" in entity_info:
entity_row["vector_data"] = str(entity_info["vector_data"])
entities_data.append(entity_row)
# --- Relations ---
for src_entity in all_entities:
for tgt_entity in all_entities:
if src_entity == tgt_entity:
continue
edge_exists = await chunk_entity_relation_graph.has_edge(
src_entity, tgt_entity
)
if edge_exists:
# Get edge information from graph
edge_data = await chunk_entity_relation_graph.get_edge(
src_entity, tgt_entity
)
source_id = edge_data.get("source_id") if edge_data else None
relation_info = {
"graph_data": edge_data,
"source_id": source_id,
}
# Optional: Get vector database information
if include_vector_data:
rel_id = compute_mdhash_id(src_entity + tgt_entity, prefix="rel-")
vector_data = await relationships_vdb.get_by_id(rel_id)
relation_info["vector_data"] = vector_data
relation_row = {
"src_entity": src_entity,
"tgt_entity": tgt_entity,
"source_id": relation_info["source_id"],
"graph_data": str(relation_info["graph_data"]), # Convert to string
}
if include_vector_data and "vector_data" in relation_info:
relation_row["vector_data"] = str(relation_info["vector_data"])
relations_data.append(relation_row)
# --- Relationships (from VectorDB) ---
all_relationships = await relationships_vdb.client_storage
for rel in all_relationships["data"]:
relationships_data.append(
{
"relationship_id": rel["__id__"],
"data": str(rel), # Convert to string for compatibility
}
)
# Export based on format
if file_format == "csv":
# CSV export
with open(output_path, "w", newline="", encoding="utf-8") as csvfile:
# Entities
if entities_data:
csvfile.write("# ENTITIES\n")
writer = csv.DictWriter(csvfile, fieldnames=entities_data[0].keys())
writer.writeheader()
writer.writerows(entities_data)
csvfile.write("\n\n")
# Relations
if relations_data:
csvfile.write("# RELATIONS\n")
writer = csv.DictWriter(csvfile, fieldnames=relations_data[0].keys())
writer.writeheader()
writer.writerows(relations_data)
csvfile.write("\n\n")
# Relationships
if relationships_data:
csvfile.write("# RELATIONSHIPS\n")
writer = csv.DictWriter(
csvfile, fieldnames=relationships_data[0].keys()
)
writer.writeheader()
writer.writerows(relationships_data)
elif file_format == "excel":
# Excel export
import pandas as pd
entities_df = pd.DataFrame(entities_data) if entities_data else pd.DataFrame()
relations_df = (
pd.DataFrame(relations_data) if relations_data else pd.DataFrame()
)
relationships_df = (
pd.DataFrame(relationships_data) if relationships_data else pd.DataFrame()
)
with pd.ExcelWriter(output_path, engine="xlsxwriter") as writer:
if not entities_df.empty:
entities_df.to_excel(writer, sheet_name="Entities", index=False)
if not relations_df.empty:
relations_df.to_excel(writer, sheet_name="Relations", index=False)
if not relationships_df.empty:
relationships_df.to_excel(
writer, sheet_name="Relationships", index=False
)
elif file_format == "md":
# Markdown export
with open(output_path, "w", encoding="utf-8") as mdfile:
mdfile.write("# LightRAG Data Export\n\n")
# Entities
mdfile.write("## Entities\n\n")
if entities_data:
# Write header
mdfile.write("| " + " | ".join(entities_data[0].keys()) + " |\n")
mdfile.write(
"| " + " | ".join(["---"] * len(entities_data[0].keys())) + " |\n"
)
# Write rows
for entity in entities_data:
mdfile.write(
"| " + " | ".join(str(v) for v in entity.values()) + " |\n"
)
mdfile.write("\n\n")
else:
mdfile.write("*No entity data available*\n\n")
# Relations
mdfile.write("## Relations\n\n")
if relations_data:
# Write header
mdfile.write("| " + " | ".join(relations_data[0].keys()) + " |\n")
mdfile.write(
"| " + " | ".join(["---"] * len(relations_data[0].keys())) + " |\n"
)
# Write rows
for relation in relations_data:
mdfile.write(
"| " + " | ".join(str(v) for v in relation.values()) + " |\n"
)
mdfile.write("\n\n")
else:
mdfile.write("*No relation data available*\n\n")
# Relationships
mdfile.write("## Relationships\n\n")
if relationships_data:
# Write header
mdfile.write("| " + " | ".join(relationships_data[0].keys()) + " |\n")
mdfile.write(
"| "
+ " | ".join(["---"] * len(relationships_data[0].keys()))
+ " |\n"
)
# Write rows
for relationship in relationships_data:
mdfile.write(
"| "
+ " | ".join(str(v) for v in relationship.values())
+ " |\n"
)
else:
mdfile.write("*No relationship data available*\n\n")
elif file_format == "txt":
# Plain text export
with open(output_path, "w", encoding="utf-8") as txtfile:
txtfile.write("LIGHTRAG DATA EXPORT\n")
txtfile.write("=" * 80 + "\n\n")
# Entities
txtfile.write("ENTITIES\n")
txtfile.write("-" * 80 + "\n")
if entities_data:
# Create fixed width columns
col_widths = {
k: max(len(k), max(len(str(e[k])) for e in entities_data))
for k in entities_data[0]
}
header = " ".join(k.ljust(col_widths[k]) for k in entities_data[0])
txtfile.write(header + "\n")
txtfile.write("-" * len(header) + "\n")
# Write rows
for entity in entities_data:
row = " ".join(
str(v).ljust(col_widths[k]) for k, v in entity.items()
)
txtfile.write(row + "\n")
txtfile.write("\n\n")
else:
txtfile.write("No entity data available\n\n")
# Relations
txtfile.write("RELATIONS\n")
txtfile.write("-" * 80 + "\n")
if relations_data:
# Create fixed width columns
col_widths = {
k: max(len(k), max(len(str(r[k])) for r in relations_data))
for k in relations_data[0]
}
header = " ".join(k.ljust(col_widths[k]) for k in relations_data[0])
txtfile.write(header + "\n")
txtfile.write("-" * len(header) + "\n")
# Write rows
for relation in relations_data:
row = " ".join(
str(v).ljust(col_widths[k]) for k, v in relation.items()
)
txtfile.write(row + "\n")
txtfile.write("\n\n")
else:
txtfile.write("No relation data available\n\n")
# Relationships
txtfile.write("RELATIONSHIPS\n")
txtfile.write("-" * 80 + "\n")
if relationships_data:
# Create fixed width columns
col_widths = {
k: max(len(k), max(len(str(r[k])) for r in relationships_data))
for k in relationships_data[0]
}
header = " ".join(
k.ljust(col_widths[k]) for k in relationships_data[0]
)
txtfile.write(header + "\n")
txtfile.write("-" * len(header) + "\n")
# Write rows
for relationship in relationships_data:
row = " ".join(
str(v).ljust(col_widths[k]) for k, v in relationship.items()
)
txtfile.write(row + "\n")
else:
txtfile.write("No relationship data available\n\n")
else:
raise ValueError(
f"Unsupported file format: {file_format}. "
f"Choose from: csv, excel, md, txt"
)
if file_format is not None:
print(f"Data exported to: {output_path} with format: {file_format}")
else:
print("Data displayed as table format")
def export_data(
chunk_entity_relation_graph,
entities_vdb,
relationships_vdb,
output_path: str,
file_format: str = "csv",
include_vector_data: bool = False,
) -> None:
"""
Synchronously exports all entities, relations, and relationships to various formats.
Args:
chunk_entity_relation_graph: Graph storage instance for entities and relations
entities_vdb: Vector database storage for entities
relationships_vdb: Vector database storage for relationships
output_path: The path to the output file (including extension).
file_format: Output format - "csv", "excel", "md", "txt".
- csv: Comma-separated values file
- excel: Microsoft Excel file with multiple sheets
- md: Markdown tables
- txt: Plain text formatted output
include_vector_data: Whether to include data from the vector database.
"""
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
loop.run_until_complete(
aexport_data(
chunk_entity_relation_graph,
entities_vdb,
relationships_vdb,
output_path,
file_format,
include_vector_data,
)
)
def lazy_external_import(module_name: str, class_name: str) -> Callable[..., Any]:
"""Lazily import a class from an external module based on the package of the caller."""
# Get the caller's module and package

1066
lightrag/utils_graph.py Normal file

File diff suppressed because it is too large Load Diff