Merge branch 'main' of github.com:lcjqyml/LightRAG
This commit is contained in:
17
README.md
17
README.md
@@ -45,6 +45,7 @@ This repository hosts the code of LightRAG. The structure of this code is based
|
|||||||
🎉 News
|
🎉 News
|
||||||
</summary>
|
</summary>
|
||||||
|
|
||||||
|
- [X] [2025.03.18]🎯📢LightRAG now supports citation functionality.
|
||||||
- [X] [2025.02.05]🎯📢Our team has released [VideoRAG](https://github.com/HKUDS/VideoRAG) understanding extremely long-context videos.
|
- [X] [2025.02.05]🎯📢Our team has released [VideoRAG](https://github.com/HKUDS/VideoRAG) understanding extremely long-context videos.
|
||||||
- [X] [2025.01.13]🎯📢Our team has released [MiniRAG](https://github.com/HKUDS/MiniRAG) making RAG simpler with small models.
|
- [X] [2025.01.13]🎯📢Our team has released [MiniRAG](https://github.com/HKUDS/MiniRAG) making RAG simpler with small models.
|
||||||
- [X] [2025.01.06]🎯📢You can now [use PostgreSQL for Storage](#using-postgresql-for-storage).
|
- [X] [2025.01.06]🎯📢You can now [use PostgreSQL for Storage](#using-postgresql-for-storage).
|
||||||
@@ -673,6 +674,22 @@ rag.insert(text_content.decode('utf-8'))
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary><b>Citation Functionality</b></summary>
|
||||||
|
|
||||||
|
By providing file paths, the system ensures that sources can be traced back to their original documents.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Define documents and their file paths
|
||||||
|
documents = ["Document content 1", "Document content 2"]
|
||||||
|
file_paths = ["path/to/doc1.txt", "path/to/doc2.txt"]
|
||||||
|
|
||||||
|
# Insert documents with file paths
|
||||||
|
rag.insert(documents, file_paths=file_paths)
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
## Storage
|
## Storage
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
|
@@ -10,6 +10,7 @@ import logging.config
|
|||||||
import uvicorn
|
import uvicorn
|
||||||
import pipmaster as pm
|
import pipmaster as pm
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
from fastapi.responses import RedirectResponse
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import configparser
|
import configparser
|
||||||
from ascii_colors import ASCIIColors
|
from ascii_colors import ASCIIColors
|
||||||
@@ -341,6 +342,11 @@ def create_app(args):
|
|||||||
ollama_api = OllamaAPI(rag, top_k=args.top_k)
|
ollama_api = OllamaAPI(rag, top_k=args.top_k)
|
||||||
app.include_router(ollama_api.router, prefix="/api")
|
app.include_router(ollama_api.router, prefix="/api")
|
||||||
|
|
||||||
|
@app.get("/")
|
||||||
|
async def redirect_to_webui():
|
||||||
|
"""Redirect root path to /webui"""
|
||||||
|
return RedirectResponse(url="/webui")
|
||||||
|
|
||||||
@app.get("/auth-status", dependencies=[Depends(optional_api_key)])
|
@app.get("/auth-status", dependencies=[Depends(optional_api_key)])
|
||||||
async def get_auth_status():
|
async def get_auth_status():
|
||||||
"""Get authentication status and guest token if auth is not configured"""
|
"""Get authentication status and guest token if auth is not configured"""
|
||||||
|
@@ -42,45 +42,38 @@ def get_auth_dependency():
|
|||||||
request: Request,
|
request: Request,
|
||||||
token: str = Depends(OAuth2PasswordBearer(tokenUrl="login", auto_error=False)),
|
token: str = Depends(OAuth2PasswordBearer(tokenUrl="login", auto_error=False)),
|
||||||
):
|
):
|
||||||
if request.url.path in whitelist:
|
|
||||||
return
|
|
||||||
|
|
||||||
# Check if authentication is configured
|
# Check if authentication is configured
|
||||||
auth_configured = bool(
|
auth_configured = bool(
|
||||||
os.getenv("AUTH_USERNAME") and os.getenv("AUTH_PASSWORD")
|
os.getenv("AUTH_USERNAME") and os.getenv("AUTH_PASSWORD")
|
||||||
)
|
)
|
||||||
|
|
||||||
# If authentication is not configured, accept any token including guest tokens
|
# If authentication is not configured, skip all validation
|
||||||
if not auth_configured:
|
if not auth_configured:
|
||||||
if token: # If token is provided, still validate it
|
|
||||||
try:
|
|
||||||
# Validate token but don't raise exception
|
|
||||||
token_info = auth_handler.validate_token(token)
|
|
||||||
# Check if it's a guest token
|
|
||||||
if token_info.get("role") != "guest":
|
|
||||||
# Non-guest tokens are not valid when auth is not configured
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
# Ignore validation errors but log them
|
|
||||||
print(f"Token validation error (ignored): {str(e)}")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
# If authentication is configured, validate the token and reject guest tokens
|
# For configured auth, allow whitelist paths without token
|
||||||
|
if request.url.path in whitelist:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Require token for all other paths when auth is configured
|
||||||
if not token:
|
if not token:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED, detail="Token required"
|
status_code=status.HTTP_401_UNAUTHORIZED, detail="Token required"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
token_info = auth_handler.validate_token(token)
|
token_info = auth_handler.validate_token(token)
|
||||||
|
|
||||||
# Reject guest tokens when authentication is configured
|
# Reject guest tokens when authentication is configured
|
||||||
if token_info.get("role") == "guest":
|
if token_info.get("role") == "guest":
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||||
detail="Authentication required. Guest access not allowed when authentication is configured.",
|
detail="Authentication required. Guest access not allowed when authentication is configured.",
|
||||||
)
|
)
|
||||||
|
except Exception:
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token"
|
||||||
|
)
|
||||||
|
|
||||||
# At this point, we have a valid non-guest token
|
|
||||||
return
|
return
|
||||||
|
|
||||||
return dependency
|
return dependency
|
||||||
|
File diff suppressed because one or more lines are too long
@@ -8,8 +8,8 @@
|
|||||||
<link rel="icon" type="image/svg+xml" href="logo.png" />
|
<link rel="icon" type="image/svg+xml" href="logo.png" />
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||||
<title>Lightrag</title>
|
<title>Lightrag</title>
|
||||||
<script type="module" crossorigin src="./assets/index-DSwGiLVk.js"></script>
|
<script type="module" crossorigin src="/webui/assets/index-CSrxfS-k.js"></script>
|
||||||
<link rel="stylesheet" crossorigin href="./assets/index-mPRIIErN.css">
|
<link rel="stylesheet" crossorigin href="/webui/assets/index-mPRIIErN.css">
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<div id="root"></div>
|
<div id="root"></div>
|
||||||
|
@@ -257,6 +257,8 @@ class DocProcessingStatus:
|
|||||||
"""First 100 chars of document content, used for preview"""
|
"""First 100 chars of document content, used for preview"""
|
||||||
content_length: int
|
content_length: int
|
||||||
"""Total length of document"""
|
"""Total length of document"""
|
||||||
|
file_path: str
|
||||||
|
"""File path of the document"""
|
||||||
status: DocStatus
|
status: DocStatus
|
||||||
"""Current processing status"""
|
"""Current processing status"""
|
||||||
created_at: str
|
created_at: str
|
||||||
|
@@ -87,6 +87,9 @@ class JsonDocStatusStorage(DocStatusStorage):
|
|||||||
# If content is missing, use content_summary as content
|
# If content is missing, use content_summary as content
|
||||||
if "content" not in data and "content_summary" in data:
|
if "content" not in data and "content_summary" in data:
|
||||||
data["content"] = data["content_summary"]
|
data["content"] = data["content_summary"]
|
||||||
|
# If file_path is not in data, use document id as file path
|
||||||
|
if "file_path" not in data:
|
||||||
|
data["file_path"] = "no-file-path"
|
||||||
result[k] = DocProcessingStatus(**data)
|
result[k] = DocProcessingStatus(**data)
|
||||||
except KeyError as e:
|
except KeyError as e:
|
||||||
logger.error(f"Missing required field for document {k}: {e}")
|
logger.error(f"Missing required field for document {k}: {e}")
|
||||||
|
@@ -423,6 +423,7 @@ class PGVectorStorage(BaseVectorStorage):
|
|||||||
"full_doc_id": item["full_doc_id"],
|
"full_doc_id": item["full_doc_id"],
|
||||||
"content": item["content"],
|
"content": item["content"],
|
||||||
"content_vector": json.dumps(item["__vector__"].tolist()),
|
"content_vector": json.dumps(item["__vector__"].tolist()),
|
||||||
|
"file_path": item["file_path"],
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error to prepare upsert,\nsql: {e}\nitem: {item}")
|
logger.error(f"Error to prepare upsert,\nsql: {e}\nitem: {item}")
|
||||||
@@ -445,6 +446,7 @@ class PGVectorStorage(BaseVectorStorage):
|
|||||||
"content": item["content"],
|
"content": item["content"],
|
||||||
"content_vector": json.dumps(item["__vector__"].tolist()),
|
"content_vector": json.dumps(item["__vector__"].tolist()),
|
||||||
"chunk_ids": chunk_ids,
|
"chunk_ids": chunk_ids,
|
||||||
|
"file_path": item["file_path"],
|
||||||
# TODO: add document_id
|
# TODO: add document_id
|
||||||
}
|
}
|
||||||
return upsert_sql, data
|
return upsert_sql, data
|
||||||
@@ -465,6 +467,7 @@ class PGVectorStorage(BaseVectorStorage):
|
|||||||
"content": item["content"],
|
"content": item["content"],
|
||||||
"content_vector": json.dumps(item["__vector__"].tolist()),
|
"content_vector": json.dumps(item["__vector__"].tolist()),
|
||||||
"chunk_ids": chunk_ids,
|
"chunk_ids": chunk_ids,
|
||||||
|
"file_path": item["file_path"],
|
||||||
# TODO: add document_id
|
# TODO: add document_id
|
||||||
}
|
}
|
||||||
return upsert_sql, data
|
return upsert_sql, data
|
||||||
@@ -732,7 +735,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||||||
if result is None or result == []:
|
if result is None or result == []:
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
return DocProcessingStatus(
|
return dict(
|
||||||
content=result[0]["content"],
|
content=result[0]["content"],
|
||||||
content_length=result[0]["content_length"],
|
content_length=result[0]["content_length"],
|
||||||
content_summary=result[0]["content_summary"],
|
content_summary=result[0]["content_summary"],
|
||||||
@@ -740,6 +743,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||||||
chunks_count=result[0]["chunks_count"],
|
chunks_count=result[0]["chunks_count"],
|
||||||
created_at=result[0]["created_at"],
|
created_at=result[0]["created_at"],
|
||||||
updated_at=result[0]["updated_at"],
|
updated_at=result[0]["updated_at"],
|
||||||
|
file_path=result[0]["file_path"],
|
||||||
)
|
)
|
||||||
|
|
||||||
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
|
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
|
||||||
@@ -774,6 +778,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||||||
created_at=element["created_at"],
|
created_at=element["created_at"],
|
||||||
updated_at=element["updated_at"],
|
updated_at=element["updated_at"],
|
||||||
chunks_count=element["chunks_count"],
|
chunks_count=element["chunks_count"],
|
||||||
|
file_path=element["file_path"],
|
||||||
)
|
)
|
||||||
for element in result
|
for element in result
|
||||||
}
|
}
|
||||||
@@ -793,14 +798,15 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||||||
if not data:
|
if not data:
|
||||||
return
|
return
|
||||||
|
|
||||||
sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status)
|
sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status,file_path)
|
||||||
values($1,$2,$3,$4,$5,$6,$7)
|
values($1,$2,$3,$4,$5,$6,$7,$8)
|
||||||
on conflict(id,workspace) do update set
|
on conflict(id,workspace) do update set
|
||||||
content = EXCLUDED.content,
|
content = EXCLUDED.content,
|
||||||
content_summary = EXCLUDED.content_summary,
|
content_summary = EXCLUDED.content_summary,
|
||||||
content_length = EXCLUDED.content_length,
|
content_length = EXCLUDED.content_length,
|
||||||
chunks_count = EXCLUDED.chunks_count,
|
chunks_count = EXCLUDED.chunks_count,
|
||||||
status = EXCLUDED.status,
|
status = EXCLUDED.status,
|
||||||
|
file_path = EXCLUDED.file_path,
|
||||||
updated_at = CURRENT_TIMESTAMP"""
|
updated_at = CURRENT_TIMESTAMP"""
|
||||||
for k, v in data.items():
|
for k, v in data.items():
|
||||||
# chunks_count is optional
|
# chunks_count is optional
|
||||||
@@ -814,6 +820,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
|||||||
"content_length": v["content_length"],
|
"content_length": v["content_length"],
|
||||||
"chunks_count": v["chunks_count"] if "chunks_count" in v else -1,
|
"chunks_count": v["chunks_count"] if "chunks_count" in v else -1,
|
||||||
"status": v["status"],
|
"status": v["status"],
|
||||||
|
"file_path": v["file_path"],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1058,7 +1065,6 @@ class PGGraphStorage(BaseGraphStorage):
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
query (str): a cypher query to be executed
|
query (str): a cypher query to be executed
|
||||||
params (dict): parameters for the query
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
list[dict[str, Any]]: a list of dictionaries containing the result set
|
list[dict[str, Any]]: a list of dictionaries containing the result set
|
||||||
@@ -1549,6 +1555,7 @@ TABLES = {
|
|||||||
tokens INTEGER,
|
tokens INTEGER,
|
||||||
content TEXT,
|
content TEXT,
|
||||||
content_vector VECTOR,
|
content_vector VECTOR,
|
||||||
|
file_path VARCHAR(256),
|
||||||
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
update_time TIMESTAMP,
|
update_time TIMESTAMP,
|
||||||
CONSTRAINT LIGHTRAG_DOC_CHUNKS_PK PRIMARY KEY (workspace, id)
|
CONSTRAINT LIGHTRAG_DOC_CHUNKS_PK PRIMARY KEY (workspace, id)
|
||||||
@@ -1564,6 +1571,7 @@ TABLES = {
|
|||||||
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
update_time TIMESTAMP,
|
update_time TIMESTAMP,
|
||||||
chunk_id TEXT NULL,
|
chunk_id TEXT NULL,
|
||||||
|
file_path TEXT NULL,
|
||||||
CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
|
CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id)
|
||||||
)"""
|
)"""
|
||||||
},
|
},
|
||||||
@@ -1578,6 +1586,7 @@ TABLES = {
|
|||||||
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
update_time TIMESTAMP,
|
update_time TIMESTAMP,
|
||||||
chunk_id TEXT NULL,
|
chunk_id TEXT NULL,
|
||||||
|
file_path TEXT NULL,
|
||||||
CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)
|
CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id)
|
||||||
)"""
|
)"""
|
||||||
},
|
},
|
||||||
@@ -1602,6 +1611,7 @@ TABLES = {
|
|||||||
content_length int4 NULL,
|
content_length int4 NULL,
|
||||||
chunks_count int4 NULL,
|
chunks_count int4 NULL,
|
||||||
status varchar(64) NULL,
|
status varchar(64) NULL,
|
||||||
|
file_path TEXT NULL,
|
||||||
created_at timestamp DEFAULT CURRENT_TIMESTAMP NULL,
|
created_at timestamp DEFAULT CURRENT_TIMESTAMP NULL,
|
||||||
updated_at timestamp DEFAULT CURRENT_TIMESTAMP NULL,
|
updated_at timestamp DEFAULT CURRENT_TIMESTAMP NULL,
|
||||||
CONSTRAINT LIGHTRAG_DOC_STATUS_PK PRIMARY KEY (workspace, id)
|
CONSTRAINT LIGHTRAG_DOC_STATUS_PK PRIMARY KEY (workspace, id)
|
||||||
@@ -1650,35 +1660,38 @@ SQL_TEMPLATES = {
|
|||||||
update_time = CURRENT_TIMESTAMP
|
update_time = CURRENT_TIMESTAMP
|
||||||
""",
|
""",
|
||||||
"upsert_chunk": """INSERT INTO LIGHTRAG_DOC_CHUNKS (workspace, id, tokens,
|
"upsert_chunk": """INSERT INTO LIGHTRAG_DOC_CHUNKS (workspace, id, tokens,
|
||||||
chunk_order_index, full_doc_id, content, content_vector)
|
chunk_order_index, full_doc_id, content, content_vector, file_path)
|
||||||
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
VALUES ($1, $2, $3, $4, $5, $6, $7, $8)
|
||||||
ON CONFLICT (workspace,id) DO UPDATE
|
ON CONFLICT (workspace,id) DO UPDATE
|
||||||
SET tokens=EXCLUDED.tokens,
|
SET tokens=EXCLUDED.tokens,
|
||||||
chunk_order_index=EXCLUDED.chunk_order_index,
|
chunk_order_index=EXCLUDED.chunk_order_index,
|
||||||
full_doc_id=EXCLUDED.full_doc_id,
|
full_doc_id=EXCLUDED.full_doc_id,
|
||||||
content = EXCLUDED.content,
|
content = EXCLUDED.content,
|
||||||
content_vector=EXCLUDED.content_vector,
|
content_vector=EXCLUDED.content_vector,
|
||||||
|
file_path=EXCLUDED.file_path,
|
||||||
update_time = CURRENT_TIMESTAMP
|
update_time = CURRENT_TIMESTAMP
|
||||||
""",
|
""",
|
||||||
"upsert_entity": """INSERT INTO LIGHTRAG_VDB_ENTITY (workspace, id, entity_name, content,
|
"upsert_entity": """INSERT INTO LIGHTRAG_VDB_ENTITY (workspace, id, entity_name, content,
|
||||||
content_vector, chunk_ids)
|
content_vector, chunk_ids, file_path)
|
||||||
VALUES ($1, $2, $3, $4, $5, $6::varchar[])
|
VALUES ($1, $2, $3, $4, $5, $6::varchar[], $7::varchar[])
|
||||||
ON CONFLICT (workspace,id) DO UPDATE
|
ON CONFLICT (workspace,id) DO UPDATE
|
||||||
SET entity_name=EXCLUDED.entity_name,
|
SET entity_name=EXCLUDED.entity_name,
|
||||||
content=EXCLUDED.content,
|
content=EXCLUDED.content,
|
||||||
content_vector=EXCLUDED.content_vector,
|
content_vector=EXCLUDED.content_vector,
|
||||||
chunk_ids=EXCLUDED.chunk_ids,
|
chunk_ids=EXCLUDED.chunk_ids,
|
||||||
|
file_path=EXCLUDED.file_path,
|
||||||
update_time=CURRENT_TIMESTAMP
|
update_time=CURRENT_TIMESTAMP
|
||||||
""",
|
""",
|
||||||
"upsert_relationship": """INSERT INTO LIGHTRAG_VDB_RELATION (workspace, id, source_id,
|
"upsert_relationship": """INSERT INTO LIGHTRAG_VDB_RELATION (workspace, id, source_id,
|
||||||
target_id, content, content_vector, chunk_ids)
|
target_id, content, content_vector, chunk_ids, file_path)
|
||||||
VALUES ($1, $2, $3, $4, $5, $6, $7::varchar[])
|
VALUES ($1, $2, $3, $4, $5, $6, $7::varchar[], $8::varchar[])
|
||||||
ON CONFLICT (workspace,id) DO UPDATE
|
ON CONFLICT (workspace,id) DO UPDATE
|
||||||
SET source_id=EXCLUDED.source_id,
|
SET source_id=EXCLUDED.source_id,
|
||||||
target_id=EXCLUDED.target_id,
|
target_id=EXCLUDED.target_id,
|
||||||
content=EXCLUDED.content,
|
content=EXCLUDED.content,
|
||||||
content_vector=EXCLUDED.content_vector,
|
content_vector=EXCLUDED.content_vector,
|
||||||
chunk_ids=EXCLUDED.chunk_ids,
|
chunk_ids=EXCLUDED.chunk_ids,
|
||||||
|
file_path=EXCLUDED.file_path,
|
||||||
update_time = CURRENT_TIMESTAMP
|
update_time = CURRENT_TIMESTAMP
|
||||||
""",
|
""",
|
||||||
# SQL for VectorStorage
|
# SQL for VectorStorage
|
||||||
|
@@ -389,20 +389,21 @@ class LightRAG:
|
|||||||
self.namespace_prefix, NameSpace.VECTOR_STORE_ENTITIES
|
self.namespace_prefix, NameSpace.VECTOR_STORE_ENTITIES
|
||||||
),
|
),
|
||||||
embedding_func=self.embedding_func,
|
embedding_func=self.embedding_func,
|
||||||
meta_fields={"entity_name", "source_id", "content"},
|
meta_fields={"entity_name", "source_id", "content", "file_path"},
|
||||||
)
|
)
|
||||||
self.relationships_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
|
self.relationships_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
|
||||||
namespace=make_namespace(
|
namespace=make_namespace(
|
||||||
self.namespace_prefix, NameSpace.VECTOR_STORE_RELATIONSHIPS
|
self.namespace_prefix, NameSpace.VECTOR_STORE_RELATIONSHIPS
|
||||||
),
|
),
|
||||||
embedding_func=self.embedding_func,
|
embedding_func=self.embedding_func,
|
||||||
meta_fields={"src_id", "tgt_id", "source_id", "content"},
|
meta_fields={"src_id", "tgt_id", "source_id", "content", "file_path"},
|
||||||
)
|
)
|
||||||
self.chunks_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
|
self.chunks_vdb: BaseVectorStorage = self.vector_db_storage_cls( # type: ignore
|
||||||
namespace=make_namespace(
|
namespace=make_namespace(
|
||||||
self.namespace_prefix, NameSpace.VECTOR_STORE_CHUNKS
|
self.namespace_prefix, NameSpace.VECTOR_STORE_CHUNKS
|
||||||
),
|
),
|
||||||
embedding_func=self.embedding_func,
|
embedding_func=self.embedding_func,
|
||||||
|
meta_fields={"full_doc_id", "content", "file_path"},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Initialize document status storage
|
# Initialize document status storage
|
||||||
@@ -547,6 +548,7 @@ class LightRAG:
|
|||||||
split_by_character: str | None = None,
|
split_by_character: str | None = None,
|
||||||
split_by_character_only: bool = False,
|
split_by_character_only: bool = False,
|
||||||
ids: str | list[str] | None = None,
|
ids: str | list[str] | None = None,
|
||||||
|
file_paths: str | list[str] | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Sync Insert documents with checkpoint support
|
"""Sync Insert documents with checkpoint support
|
||||||
|
|
||||||
@@ -557,10 +559,13 @@ class LightRAG:
|
|||||||
split_by_character_only: if split_by_character_only is True, split the string by character only, when
|
split_by_character_only: if split_by_character_only is True, split the string by character only, when
|
||||||
split_by_character is None, this parameter is ignored.
|
split_by_character is None, this parameter is ignored.
|
||||||
ids: single string of the document ID or list of unique document IDs, if not provided, MD5 hash IDs will be generated
|
ids: single string of the document ID or list of unique document IDs, if not provided, MD5 hash IDs will be generated
|
||||||
|
file_paths: single string of the file path or list of file paths, used for citation
|
||||||
"""
|
"""
|
||||||
loop = always_get_an_event_loop()
|
loop = always_get_an_event_loop()
|
||||||
loop.run_until_complete(
|
loop.run_until_complete(
|
||||||
self.ainsert(input, split_by_character, split_by_character_only, ids)
|
self.ainsert(
|
||||||
|
input, split_by_character, split_by_character_only, ids, file_paths
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
async def ainsert(
|
async def ainsert(
|
||||||
@@ -569,6 +574,7 @@ class LightRAG:
|
|||||||
split_by_character: str | None = None,
|
split_by_character: str | None = None,
|
||||||
split_by_character_only: bool = False,
|
split_by_character_only: bool = False,
|
||||||
ids: str | list[str] | None = None,
|
ids: str | list[str] | None = None,
|
||||||
|
file_paths: str | list[str] | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Async Insert documents with checkpoint support
|
"""Async Insert documents with checkpoint support
|
||||||
|
|
||||||
@@ -579,8 +585,9 @@ class LightRAG:
|
|||||||
split_by_character_only: if split_by_character_only is True, split the string by character only, when
|
split_by_character_only: if split_by_character_only is True, split the string by character only, when
|
||||||
split_by_character is None, this parameter is ignored.
|
split_by_character is None, this parameter is ignored.
|
||||||
ids: list of unique document IDs, if not provided, MD5 hash IDs will be generated
|
ids: list of unique document IDs, if not provided, MD5 hash IDs will be generated
|
||||||
|
file_paths: list of file paths corresponding to each document, used for citation
|
||||||
"""
|
"""
|
||||||
await self.apipeline_enqueue_documents(input, ids)
|
await self.apipeline_enqueue_documents(input, ids, file_paths)
|
||||||
await self.apipeline_process_enqueue_documents(
|
await self.apipeline_process_enqueue_documents(
|
||||||
split_by_character, split_by_character_only
|
split_by_character, split_by_character_only
|
||||||
)
|
)
|
||||||
@@ -654,7 +661,10 @@ class LightRAG:
|
|||||||
await self._insert_done()
|
await self._insert_done()
|
||||||
|
|
||||||
async def apipeline_enqueue_documents(
|
async def apipeline_enqueue_documents(
|
||||||
self, input: str | list[str], ids: list[str] | None = None
|
self,
|
||||||
|
input: str | list[str],
|
||||||
|
ids: list[str] | None = None,
|
||||||
|
file_paths: str | list[str] | None = None,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""
|
"""
|
||||||
Pipeline for Processing Documents
|
Pipeline for Processing Documents
|
||||||
@@ -664,11 +674,30 @@ class LightRAG:
|
|||||||
3. Generate document initial status
|
3. Generate document initial status
|
||||||
4. Filter out already processed documents
|
4. Filter out already processed documents
|
||||||
5. Enqueue document in status
|
5. Enqueue document in status
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input: Single document string or list of document strings
|
||||||
|
ids: list of unique document IDs, if not provided, MD5 hash IDs will be generated
|
||||||
|
file_paths: list of file paths corresponding to each document, used for citation
|
||||||
"""
|
"""
|
||||||
if isinstance(input, str):
|
if isinstance(input, str):
|
||||||
input = [input]
|
input = [input]
|
||||||
if isinstance(ids, str):
|
if isinstance(ids, str):
|
||||||
ids = [ids]
|
ids = [ids]
|
||||||
|
if isinstance(file_paths, str):
|
||||||
|
file_paths = [file_paths]
|
||||||
|
|
||||||
|
# If file_paths is provided, ensure it matches the number of documents
|
||||||
|
if file_paths is not None:
|
||||||
|
if isinstance(file_paths, str):
|
||||||
|
file_paths = [file_paths]
|
||||||
|
if len(file_paths) != len(input):
|
||||||
|
raise ValueError(
|
||||||
|
"Number of file paths must match the number of documents"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# If no file paths provided, use placeholder
|
||||||
|
file_paths = ["unknown_source"] * len(input)
|
||||||
|
|
||||||
# 1. Validate ids if provided or generate MD5 hash IDs
|
# 1. Validate ids if provided or generate MD5 hash IDs
|
||||||
if ids is not None:
|
if ids is not None:
|
||||||
@@ -681,32 +710,59 @@ class LightRAG:
|
|||||||
raise ValueError("IDs must be unique")
|
raise ValueError("IDs must be unique")
|
||||||
|
|
||||||
# Generate contents dict of IDs provided by user and documents
|
# Generate contents dict of IDs provided by user and documents
|
||||||
contents = {id_: doc for id_, doc in zip(ids, input)}
|
contents = {
|
||||||
|
id_: {"content": doc, "file_path": path}
|
||||||
|
for id_, doc, path in zip(ids, input, file_paths)
|
||||||
|
}
|
||||||
else:
|
else:
|
||||||
# Clean input text and remove duplicates
|
# Clean input text and remove duplicates
|
||||||
input = list(set(clean_text(doc) for doc in input))
|
cleaned_input = [
|
||||||
# Generate contents dict of MD5 hash IDs and documents
|
(clean_text(doc), path) for doc, path in zip(input, file_paths)
|
||||||
contents = {compute_mdhash_id(doc, prefix="doc-"): doc for doc in input}
|
]
|
||||||
|
unique_content_with_paths = {}
|
||||||
|
|
||||||
|
# Keep track of unique content and their paths
|
||||||
|
for content, path in cleaned_input:
|
||||||
|
if content not in unique_content_with_paths:
|
||||||
|
unique_content_with_paths[content] = path
|
||||||
|
|
||||||
|
# Generate contents dict of MD5 hash IDs and documents with paths
|
||||||
|
contents = {
|
||||||
|
compute_mdhash_id(content, prefix="doc-"): {
|
||||||
|
"content": content,
|
||||||
|
"file_path": path,
|
||||||
|
}
|
||||||
|
for content, path in unique_content_with_paths.items()
|
||||||
|
}
|
||||||
|
|
||||||
# 2. Remove duplicate contents
|
# 2. Remove duplicate contents
|
||||||
unique_contents = {
|
unique_contents = {}
|
||||||
id_: content
|
for id_, content_data in contents.items():
|
||||||
for content, id_ in {
|
content = content_data["content"]
|
||||||
content: id_ for id_, content in contents.items()
|
file_path = content_data["file_path"]
|
||||||
}.items()
|
if content not in unique_contents:
|
||||||
|
unique_contents[content] = (id_, file_path)
|
||||||
|
|
||||||
|
# Reconstruct contents with unique content
|
||||||
|
contents = {
|
||||||
|
id_: {"content": content, "file_path": file_path}
|
||||||
|
for content, (id_, file_path) in unique_contents.items()
|
||||||
}
|
}
|
||||||
|
|
||||||
# 3. Generate document initial status
|
# 3. Generate document initial status
|
||||||
new_docs: dict[str, Any] = {
|
new_docs: dict[str, Any] = {
|
||||||
id_: {
|
id_: {
|
||||||
"content": content,
|
|
||||||
"content_summary": get_content_summary(content),
|
|
||||||
"content_length": len(content),
|
|
||||||
"status": DocStatus.PENDING,
|
"status": DocStatus.PENDING,
|
||||||
|
"content": content_data["content"],
|
||||||
|
"content_summary": get_content_summary(content_data["content"]),
|
||||||
|
"content_length": len(content_data["content"]),
|
||||||
"created_at": datetime.now().isoformat(),
|
"created_at": datetime.now().isoformat(),
|
||||||
"updated_at": datetime.now().isoformat(),
|
"updated_at": datetime.now().isoformat(),
|
||||||
|
"file_path": content_data[
|
||||||
|
"file_path"
|
||||||
|
], # Store file path in document status
|
||||||
}
|
}
|
||||||
for id_, content in unique_contents.items()
|
for id_, content_data in contents.items()
|
||||||
}
|
}
|
||||||
|
|
||||||
# 4. Filter out already processed documents
|
# 4. Filter out already processed documents
|
||||||
@@ -841,11 +897,15 @@ class LightRAG:
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""Process single document"""
|
"""Process single document"""
|
||||||
try:
|
try:
|
||||||
|
# Get file path from status document
|
||||||
|
file_path = getattr(status_doc, "file_path", "unknown_source")
|
||||||
|
|
||||||
# Generate chunks from document
|
# Generate chunks from document
|
||||||
chunks: dict[str, Any] = {
|
chunks: dict[str, Any] = {
|
||||||
compute_mdhash_id(dp["content"], prefix="chunk-"): {
|
compute_mdhash_id(dp["content"], prefix="chunk-"): {
|
||||||
**dp,
|
**dp,
|
||||||
"full_doc_id": doc_id,
|
"full_doc_id": doc_id,
|
||||||
|
"file_path": file_path, # Add file path to each chunk
|
||||||
}
|
}
|
||||||
for dp in self.chunking_func(
|
for dp in self.chunking_func(
|
||||||
status_doc.content,
|
status_doc.content,
|
||||||
@@ -856,6 +916,7 @@ class LightRAG:
|
|||||||
self.tiktoken_model_name,
|
self.tiktoken_model_name,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
# Process document (text chunks and full docs) in parallel
|
# Process document (text chunks and full docs) in parallel
|
||||||
# Create tasks with references for potential cancellation
|
# Create tasks with references for potential cancellation
|
||||||
doc_status_task = asyncio.create_task(
|
doc_status_task = asyncio.create_task(
|
||||||
@@ -863,11 +924,13 @@ class LightRAG:
|
|||||||
{
|
{
|
||||||
doc_id: {
|
doc_id: {
|
||||||
"status": DocStatus.PROCESSING,
|
"status": DocStatus.PROCESSING,
|
||||||
"updated_at": datetime.now().isoformat(),
|
"chunks_count": len(chunks),
|
||||||
"content": status_doc.content,
|
"content": status_doc.content,
|
||||||
"content_summary": status_doc.content_summary,
|
"content_summary": status_doc.content_summary,
|
||||||
"content_length": status_doc.content_length,
|
"content_length": status_doc.content_length,
|
||||||
"created_at": status_doc.created_at,
|
"created_at": status_doc.created_at,
|
||||||
|
"updated_at": datetime.now().isoformat(),
|
||||||
|
"file_path": file_path,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -906,6 +969,7 @@ class LightRAG:
|
|||||||
"content_length": status_doc.content_length,
|
"content_length": status_doc.content_length,
|
||||||
"created_at": status_doc.created_at,
|
"created_at": status_doc.created_at,
|
||||||
"updated_at": datetime.now().isoformat(),
|
"updated_at": datetime.now().isoformat(),
|
||||||
|
"file_path": file_path,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -937,6 +1001,7 @@ class LightRAG:
|
|||||||
"content_length": status_doc.content_length,
|
"content_length": status_doc.content_length,
|
||||||
"created_at": status_doc.created_at,
|
"created_at": status_doc.created_at,
|
||||||
"updated_at": datetime.now().isoformat(),
|
"updated_at": datetime.now().isoformat(),
|
||||||
|
"file_path": file_path,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
@@ -1063,7 +1128,10 @@ class LightRAG:
|
|||||||
loop.run_until_complete(self.ainsert_custom_kg(custom_kg, full_doc_id))
|
loop.run_until_complete(self.ainsert_custom_kg(custom_kg, full_doc_id))
|
||||||
|
|
||||||
async def ainsert_custom_kg(
|
async def ainsert_custom_kg(
|
||||||
self, custom_kg: dict[str, Any], full_doc_id: str = None
|
self,
|
||||||
|
custom_kg: dict[str, Any],
|
||||||
|
full_doc_id: str = None,
|
||||||
|
file_path: str = "custom_kg",
|
||||||
) -> None:
|
) -> None:
|
||||||
update_storage = False
|
update_storage = False
|
||||||
try:
|
try:
|
||||||
@@ -1093,6 +1161,7 @@ class LightRAG:
|
|||||||
"full_doc_id": full_doc_id
|
"full_doc_id": full_doc_id
|
||||||
if full_doc_id is not None
|
if full_doc_id is not None
|
||||||
else source_id,
|
else source_id,
|
||||||
|
"file_path": file_path, # Add file path
|
||||||
"status": DocStatus.PROCESSED,
|
"status": DocStatus.PROCESSED,
|
||||||
}
|
}
|
||||||
all_chunks_data[chunk_id] = chunk_entry
|
all_chunks_data[chunk_id] = chunk_entry
|
||||||
@@ -1197,6 +1266,7 @@ class LightRAG:
|
|||||||
"source_id": dp["source_id"],
|
"source_id": dp["source_id"],
|
||||||
"description": dp["description"],
|
"description": dp["description"],
|
||||||
"entity_type": dp["entity_type"],
|
"entity_type": dp["entity_type"],
|
||||||
|
"file_path": file_path, # Add file path
|
||||||
}
|
}
|
||||||
for dp in all_entities_data
|
for dp in all_entities_data
|
||||||
}
|
}
|
||||||
@@ -1212,6 +1282,7 @@ class LightRAG:
|
|||||||
"keywords": dp["keywords"],
|
"keywords": dp["keywords"],
|
||||||
"description": dp["description"],
|
"description": dp["description"],
|
||||||
"weight": dp["weight"],
|
"weight": dp["weight"],
|
||||||
|
"file_path": file_path, # Add file path
|
||||||
}
|
}
|
||||||
for dp in all_relationships_data
|
for dp in all_relationships_data
|
||||||
}
|
}
|
||||||
@@ -1473,8 +1544,7 @@ class LightRAG:
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# 1. Get the document status and related data
|
# 1. Get the document status and related data
|
||||||
doc_status = await self.doc_status.get_by_id(doc_id)
|
if not await self.doc_status.get_by_id(doc_id):
|
||||||
if not doc_status:
|
|
||||||
logger.warning(f"Document {doc_id} not found")
|
logger.warning(f"Document {doc_id} not found")
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -2220,7 +2290,6 @@ class LightRAG:
|
|||||||
"""Synchronously create a new entity.
|
"""Synchronously create a new entity.
|
||||||
|
|
||||||
Creates a new entity in the knowledge graph and adds it to the vector database.
|
Creates a new entity in the knowledge graph and adds it to the vector database.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
entity_name: Name of the new entity
|
entity_name: Name of the new entity
|
||||||
entity_data: Dictionary containing entity attributes, e.g. {"description": "description", "entity_type": "type"}
|
entity_data: Dictionary containing entity attributes, e.g. {"description": "description", "entity_type": "type"}
|
||||||
|
@@ -138,6 +138,7 @@ async def _handle_entity_relation_summary(
|
|||||||
async def _handle_single_entity_extraction(
|
async def _handle_single_entity_extraction(
|
||||||
record_attributes: list[str],
|
record_attributes: list[str],
|
||||||
chunk_key: str,
|
chunk_key: str,
|
||||||
|
file_path: str = "unknown_source",
|
||||||
):
|
):
|
||||||
if len(record_attributes) < 4 or record_attributes[0] != '"entity"':
|
if len(record_attributes) < 4 or record_attributes[0] != '"entity"':
|
||||||
return None
|
return None
|
||||||
@@ -171,13 +172,14 @@ async def _handle_single_entity_extraction(
|
|||||||
entity_type=entity_type,
|
entity_type=entity_type,
|
||||||
description=entity_description,
|
description=entity_description,
|
||||||
source_id=chunk_key,
|
source_id=chunk_key,
|
||||||
metadata={"created_at": time.time()},
|
metadata={"created_at": time.time(), "file_path": file_path},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
async def _handle_single_relationship_extraction(
|
async def _handle_single_relationship_extraction(
|
||||||
record_attributes: list[str],
|
record_attributes: list[str],
|
||||||
chunk_key: str,
|
chunk_key: str,
|
||||||
|
file_path: str = "unknown_source",
|
||||||
):
|
):
|
||||||
if len(record_attributes) < 5 or record_attributes[0] != '"relationship"':
|
if len(record_attributes) < 5 or record_attributes[0] != '"relationship"':
|
||||||
return None
|
return None
|
||||||
@@ -199,7 +201,7 @@ async def _handle_single_relationship_extraction(
|
|||||||
description=edge_description,
|
description=edge_description,
|
||||||
keywords=edge_keywords,
|
keywords=edge_keywords,
|
||||||
source_id=edge_source_id,
|
source_id=edge_source_id,
|
||||||
metadata={"created_at": time.time()},
|
metadata={"created_at": time.time(), "file_path": file_path},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -213,6 +215,7 @@ async def _merge_nodes_then_upsert(
|
|||||||
already_entity_types = []
|
already_entity_types = []
|
||||||
already_source_ids = []
|
already_source_ids = []
|
||||||
already_description = []
|
already_description = []
|
||||||
|
already_file_paths = []
|
||||||
|
|
||||||
already_node = await knowledge_graph_inst.get_node(entity_name)
|
already_node = await knowledge_graph_inst.get_node(entity_name)
|
||||||
if already_node is not None:
|
if already_node is not None:
|
||||||
@@ -220,6 +223,11 @@ async def _merge_nodes_then_upsert(
|
|||||||
already_source_ids.extend(
|
already_source_ids.extend(
|
||||||
split_string_by_multi_markers(already_node["source_id"], [GRAPH_FIELD_SEP])
|
split_string_by_multi_markers(already_node["source_id"], [GRAPH_FIELD_SEP])
|
||||||
)
|
)
|
||||||
|
already_file_paths.extend(
|
||||||
|
split_string_by_multi_markers(
|
||||||
|
already_node["metadata"]["file_path"], [GRAPH_FIELD_SEP]
|
||||||
|
)
|
||||||
|
)
|
||||||
already_description.append(already_node["description"])
|
already_description.append(already_node["description"])
|
||||||
|
|
||||||
entity_type = sorted(
|
entity_type = sorted(
|
||||||
@@ -235,6 +243,11 @@ async def _merge_nodes_then_upsert(
|
|||||||
source_id = GRAPH_FIELD_SEP.join(
|
source_id = GRAPH_FIELD_SEP.join(
|
||||||
set([dp["source_id"] for dp in nodes_data] + already_source_ids)
|
set([dp["source_id"] for dp in nodes_data] + already_source_ids)
|
||||||
)
|
)
|
||||||
|
file_path = GRAPH_FIELD_SEP.join(
|
||||||
|
set([dp["metadata"]["file_path"] for dp in nodes_data] + already_file_paths)
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.debug(f"file_path: {file_path}")
|
||||||
description = await _handle_entity_relation_summary(
|
description = await _handle_entity_relation_summary(
|
||||||
entity_name, description, global_config
|
entity_name, description, global_config
|
||||||
)
|
)
|
||||||
@@ -243,6 +256,7 @@ async def _merge_nodes_then_upsert(
|
|||||||
entity_type=entity_type,
|
entity_type=entity_type,
|
||||||
description=description,
|
description=description,
|
||||||
source_id=source_id,
|
source_id=source_id,
|
||||||
|
file_path=file_path,
|
||||||
)
|
)
|
||||||
await knowledge_graph_inst.upsert_node(
|
await knowledge_graph_inst.upsert_node(
|
||||||
entity_name,
|
entity_name,
|
||||||
@@ -263,6 +277,7 @@ async def _merge_edges_then_upsert(
|
|||||||
already_source_ids = []
|
already_source_ids = []
|
||||||
already_description = []
|
already_description = []
|
||||||
already_keywords = []
|
already_keywords = []
|
||||||
|
already_file_paths = []
|
||||||
|
|
||||||
if await knowledge_graph_inst.has_edge(src_id, tgt_id):
|
if await knowledge_graph_inst.has_edge(src_id, tgt_id):
|
||||||
already_edge = await knowledge_graph_inst.get_edge(src_id, tgt_id)
|
already_edge = await knowledge_graph_inst.get_edge(src_id, tgt_id)
|
||||||
@@ -279,6 +294,14 @@ async def _merge_edges_then_upsert(
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Get file_path with empty string default if missing or None
|
||||||
|
if already_edge.get("file_path") is not None:
|
||||||
|
already_file_paths.extend(
|
||||||
|
split_string_by_multi_markers(
|
||||||
|
already_edge["metadata"]["file_path"], [GRAPH_FIELD_SEP]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# Get description with empty string default if missing or None
|
# Get description with empty string default if missing or None
|
||||||
if already_edge.get("description") is not None:
|
if already_edge.get("description") is not None:
|
||||||
already_description.append(already_edge["description"])
|
already_description.append(already_edge["description"])
|
||||||
@@ -315,6 +338,16 @@ async def _merge_edges_then_upsert(
|
|||||||
+ already_source_ids
|
+ already_source_ids
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
file_path = GRAPH_FIELD_SEP.join(
|
||||||
|
set(
|
||||||
|
[
|
||||||
|
dp["metadata"]["file_path"]
|
||||||
|
for dp in edges_data
|
||||||
|
if dp.get("metadata", {}).get("file_path")
|
||||||
|
]
|
||||||
|
+ already_file_paths
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
for need_insert_id in [src_id, tgt_id]:
|
for need_insert_id in [src_id, tgt_id]:
|
||||||
if not (await knowledge_graph_inst.has_node(need_insert_id)):
|
if not (await knowledge_graph_inst.has_node(need_insert_id)):
|
||||||
@@ -325,6 +358,7 @@ async def _merge_edges_then_upsert(
|
|||||||
"source_id": source_id,
|
"source_id": source_id,
|
||||||
"description": description,
|
"description": description,
|
||||||
"entity_type": "UNKNOWN",
|
"entity_type": "UNKNOWN",
|
||||||
|
"file_path": file_path,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
description = await _handle_entity_relation_summary(
|
description = await _handle_entity_relation_summary(
|
||||||
@@ -338,6 +372,7 @@ async def _merge_edges_then_upsert(
|
|||||||
description=description,
|
description=description,
|
||||||
keywords=keywords,
|
keywords=keywords,
|
||||||
source_id=source_id,
|
source_id=source_id,
|
||||||
|
file_path=file_path,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -347,6 +382,7 @@ async def _merge_edges_then_upsert(
|
|||||||
description=description,
|
description=description,
|
||||||
keywords=keywords,
|
keywords=keywords,
|
||||||
source_id=source_id,
|
source_id=source_id,
|
||||||
|
file_path=file_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
return edge_data
|
return edge_data
|
||||||
@@ -456,11 +492,14 @@ async def extract_entities(
|
|||||||
else:
|
else:
|
||||||
return await use_llm_func(input_text)
|
return await use_llm_func(input_text)
|
||||||
|
|
||||||
async def _process_extraction_result(result: str, chunk_key: str):
|
async def _process_extraction_result(
|
||||||
|
result: str, chunk_key: str, file_path: str = "unknown_source"
|
||||||
|
):
|
||||||
"""Process a single extraction result (either initial or gleaning)
|
"""Process a single extraction result (either initial or gleaning)
|
||||||
Args:
|
Args:
|
||||||
result (str): The extraction result to process
|
result (str): The extraction result to process
|
||||||
chunk_key (str): The chunk key for source tracking
|
chunk_key (str): The chunk key for source tracking
|
||||||
|
file_path (str): The file path for citation
|
||||||
Returns:
|
Returns:
|
||||||
tuple: (nodes_dict, edges_dict) containing the extracted entities and relationships
|
tuple: (nodes_dict, edges_dict) containing the extracted entities and relationships
|
||||||
"""
|
"""
|
||||||
@@ -482,14 +521,14 @@ async def extract_entities(
|
|||||||
)
|
)
|
||||||
|
|
||||||
if_entities = await _handle_single_entity_extraction(
|
if_entities = await _handle_single_entity_extraction(
|
||||||
record_attributes, chunk_key
|
record_attributes, chunk_key, file_path
|
||||||
)
|
)
|
||||||
if if_entities is not None:
|
if if_entities is not None:
|
||||||
maybe_nodes[if_entities["entity_name"]].append(if_entities)
|
maybe_nodes[if_entities["entity_name"]].append(if_entities)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if_relation = await _handle_single_relationship_extraction(
|
if_relation = await _handle_single_relationship_extraction(
|
||||||
record_attributes, chunk_key
|
record_attributes, chunk_key, file_path
|
||||||
)
|
)
|
||||||
if if_relation is not None:
|
if if_relation is not None:
|
||||||
maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append(
|
maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append(
|
||||||
@@ -508,6 +547,8 @@ async def extract_entities(
|
|||||||
chunk_key = chunk_key_dp[0]
|
chunk_key = chunk_key_dp[0]
|
||||||
chunk_dp = chunk_key_dp[1]
|
chunk_dp = chunk_key_dp[1]
|
||||||
content = chunk_dp["content"]
|
content = chunk_dp["content"]
|
||||||
|
# Get file path from chunk data or use default
|
||||||
|
file_path = chunk_dp.get("file_path", "unknown_source")
|
||||||
|
|
||||||
# Get initial extraction
|
# Get initial extraction
|
||||||
hint_prompt = entity_extract_prompt.format(
|
hint_prompt = entity_extract_prompt.format(
|
||||||
@@ -517,9 +558,9 @@ async def extract_entities(
|
|||||||
final_result = await _user_llm_func_with_cache(hint_prompt)
|
final_result = await _user_llm_func_with_cache(hint_prompt)
|
||||||
history = pack_user_ass_to_openai_messages(hint_prompt, final_result)
|
history = pack_user_ass_to_openai_messages(hint_prompt, final_result)
|
||||||
|
|
||||||
# Process initial extraction
|
# Process initial extraction with file path
|
||||||
maybe_nodes, maybe_edges = await _process_extraction_result(
|
maybe_nodes, maybe_edges = await _process_extraction_result(
|
||||||
final_result, chunk_key
|
final_result, chunk_key, file_path
|
||||||
)
|
)
|
||||||
|
|
||||||
# Process additional gleaning results
|
# Process additional gleaning results
|
||||||
@@ -530,9 +571,9 @@ async def extract_entities(
|
|||||||
|
|
||||||
history += pack_user_ass_to_openai_messages(continue_prompt, glean_result)
|
history += pack_user_ass_to_openai_messages(continue_prompt, glean_result)
|
||||||
|
|
||||||
# Process gleaning result separately
|
# Process gleaning result separately with file path
|
||||||
glean_nodes, glean_edges = await _process_extraction_result(
|
glean_nodes, glean_edges = await _process_extraction_result(
|
||||||
glean_result, chunk_key
|
glean_result, chunk_key, file_path
|
||||||
)
|
)
|
||||||
|
|
||||||
# Merge results
|
# Merge results
|
||||||
@@ -637,8 +678,10 @@ async def extract_entities(
|
|||||||
"entity_type": dp["entity_type"],
|
"entity_type": dp["entity_type"],
|
||||||
"content": f"{dp['entity_name']}\n{dp['description']}",
|
"content": f"{dp['entity_name']}\n{dp['description']}",
|
||||||
"source_id": dp["source_id"],
|
"source_id": dp["source_id"],
|
||||||
|
"file_path": dp.get("file_path", "unknown_source"),
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"created_at": dp.get("metadata", {}).get("created_at", time.time())
|
"created_at": dp.get("created_at", time.time()),
|
||||||
|
"file_path": dp.get("file_path", "unknown_source"),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
for dp in all_entities_data
|
for dp in all_entities_data
|
||||||
@@ -653,8 +696,10 @@ async def extract_entities(
|
|||||||
"keywords": dp["keywords"],
|
"keywords": dp["keywords"],
|
||||||
"content": f"{dp['src_id']}\t{dp['tgt_id']}\n{dp['keywords']}\n{dp['description']}",
|
"content": f"{dp['src_id']}\t{dp['tgt_id']}\n{dp['keywords']}\n{dp['description']}",
|
||||||
"source_id": dp["source_id"],
|
"source_id": dp["source_id"],
|
||||||
|
"file_path": dp.get("file_path", "unknown_source"),
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"created_at": dp.get("metadata", {}).get("created_at", time.time())
|
"created_at": dp.get("created_at", time.time()),
|
||||||
|
"file_path": dp.get("file_path", "unknown_source"),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
for dp in all_relationships_data
|
for dp in all_relationships_data
|
||||||
@@ -1232,12 +1277,20 @@ async def _get_node_data(
|
|||||||
"description",
|
"description",
|
||||||
"rank",
|
"rank",
|
||||||
"created_at",
|
"created_at",
|
||||||
|
"file_path",
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
for i, n in enumerate(node_datas):
|
for i, n in enumerate(node_datas):
|
||||||
created_at = n.get("created_at", "UNKNOWN")
|
created_at = n.get("created_at", "UNKNOWN")
|
||||||
if isinstance(created_at, (int, float)):
|
if isinstance(created_at, (int, float)):
|
||||||
created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
|
created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
|
||||||
|
|
||||||
|
# Get file path from metadata or directly from node data
|
||||||
|
file_path = n.get("file_path", "unknown_source")
|
||||||
|
if not file_path or file_path == "unknown_source":
|
||||||
|
# Try to get from metadata
|
||||||
|
file_path = n.get("metadata", {}).get("file_path", "unknown_source")
|
||||||
|
|
||||||
entites_section_list.append(
|
entites_section_list.append(
|
||||||
[
|
[
|
||||||
i,
|
i,
|
||||||
@@ -1246,6 +1299,7 @@ async def _get_node_data(
|
|||||||
n.get("description", "UNKNOWN"),
|
n.get("description", "UNKNOWN"),
|
||||||
n["rank"],
|
n["rank"],
|
||||||
created_at,
|
created_at,
|
||||||
|
file_path,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
entities_context = list_of_list_to_csv(entites_section_list)
|
entities_context = list_of_list_to_csv(entites_section_list)
|
||||||
@@ -1260,6 +1314,7 @@ async def _get_node_data(
|
|||||||
"weight",
|
"weight",
|
||||||
"rank",
|
"rank",
|
||||||
"created_at",
|
"created_at",
|
||||||
|
"file_path",
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
for i, e in enumerate(use_relations):
|
for i, e in enumerate(use_relations):
|
||||||
@@ -1267,6 +1322,13 @@ async def _get_node_data(
|
|||||||
# Convert timestamp to readable format
|
# Convert timestamp to readable format
|
||||||
if isinstance(created_at, (int, float)):
|
if isinstance(created_at, (int, float)):
|
||||||
created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
|
created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
|
||||||
|
|
||||||
|
# Get file path from metadata or directly from edge data
|
||||||
|
file_path = e.get("file_path", "unknown_source")
|
||||||
|
if not file_path or file_path == "unknown_source":
|
||||||
|
# Try to get from metadata
|
||||||
|
file_path = e.get("metadata", {}).get("file_path", "unknown_source")
|
||||||
|
|
||||||
relations_section_list.append(
|
relations_section_list.append(
|
||||||
[
|
[
|
||||||
i,
|
i,
|
||||||
@@ -1277,6 +1339,7 @@ async def _get_node_data(
|
|||||||
e["weight"],
|
e["weight"],
|
||||||
e["rank"],
|
e["rank"],
|
||||||
created_at,
|
created_at,
|
||||||
|
file_path,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
relations_context = list_of_list_to_csv(relations_section_list)
|
relations_context = list_of_list_to_csv(relations_section_list)
|
||||||
@@ -1492,6 +1555,7 @@ async def _get_edge_data(
|
|||||||
"weight",
|
"weight",
|
||||||
"rank",
|
"rank",
|
||||||
"created_at",
|
"created_at",
|
||||||
|
"file_path",
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
for i, e in enumerate(edge_datas):
|
for i, e in enumerate(edge_datas):
|
||||||
@@ -1499,6 +1563,13 @@ async def _get_edge_data(
|
|||||||
# Convert timestamp to readable format
|
# Convert timestamp to readable format
|
||||||
if isinstance(created_at, (int, float)):
|
if isinstance(created_at, (int, float)):
|
||||||
created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
|
created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
|
||||||
|
|
||||||
|
# Get file path from metadata or directly from edge data
|
||||||
|
file_path = e.get("file_path", "unknown_source")
|
||||||
|
if not file_path or file_path == "unknown_source":
|
||||||
|
# Try to get from metadata
|
||||||
|
file_path = e.get("metadata", {}).get("file_path", "unknown_source")
|
||||||
|
|
||||||
relations_section_list.append(
|
relations_section_list.append(
|
||||||
[
|
[
|
||||||
i,
|
i,
|
||||||
@@ -1509,16 +1580,26 @@ async def _get_edge_data(
|
|||||||
e["weight"],
|
e["weight"],
|
||||||
e["rank"],
|
e["rank"],
|
||||||
created_at,
|
created_at,
|
||||||
|
file_path,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
relations_context = list_of_list_to_csv(relations_section_list)
|
relations_context = list_of_list_to_csv(relations_section_list)
|
||||||
|
|
||||||
entites_section_list = [["id", "entity", "type", "description", "rank"]]
|
entites_section_list = [
|
||||||
|
["id", "entity", "type", "description", "rank", "created_at", "file_path"]
|
||||||
|
]
|
||||||
for i, n in enumerate(use_entities):
|
for i, n in enumerate(use_entities):
|
||||||
created_at = e.get("created_at", "Unknown")
|
created_at = n.get("created_at", "Unknown")
|
||||||
# Convert timestamp to readable format
|
# Convert timestamp to readable format
|
||||||
if isinstance(created_at, (int, float)):
|
if isinstance(created_at, (int, float)):
|
||||||
created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
|
created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
|
||||||
|
|
||||||
|
# Get file path from metadata or directly from node data
|
||||||
|
file_path = n.get("file_path", "unknown_source")
|
||||||
|
if not file_path or file_path == "unknown_source":
|
||||||
|
# Try to get from metadata
|
||||||
|
file_path = n.get("metadata", {}).get("file_path", "unknown_source")
|
||||||
|
|
||||||
entites_section_list.append(
|
entites_section_list.append(
|
||||||
[
|
[
|
||||||
i,
|
i,
|
||||||
@@ -1527,6 +1608,7 @@ async def _get_edge_data(
|
|||||||
n.get("description", "UNKNOWN"),
|
n.get("description", "UNKNOWN"),
|
||||||
n["rank"],
|
n["rank"],
|
||||||
created_at,
|
created_at,
|
||||||
|
file_path,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
entities_context = list_of_list_to_csv(entites_section_list)
|
entities_context = list_of_list_to_csv(entites_section_list)
|
||||||
@@ -1882,13 +1964,14 @@ async def kg_query_with_keywords(
|
|||||||
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
||||||
logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}")
|
logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}")
|
||||||
|
|
||||||
|
# 6. Generate response
|
||||||
response = await use_model_func(
|
response = await use_model_func(
|
||||||
query,
|
query,
|
||||||
system_prompt=sys_prompt,
|
system_prompt=sys_prompt,
|
||||||
stream=query_param.stream,
|
stream=query_param.stream,
|
||||||
)
|
)
|
||||||
|
|
||||||
# 清理响应内容
|
# Clean up response content
|
||||||
if isinstance(response, str) and len(response) > len(sys_prompt):
|
if isinstance(response, str) and len(response) > len(sys_prompt):
|
||||||
response = (
|
response = (
|
||||||
response.replace(sys_prompt, "")
|
response.replace(sys_prompt, "")
|
||||||
|
@@ -61,7 +61,7 @@ Text:
|
|||||||
```
|
```
|
||||||
while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
|
while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
|
||||||
|
|
||||||
Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. “If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us.”
|
Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. "If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us."
|
||||||
|
|
||||||
The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
|
The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
|
||||||
|
|
||||||
@@ -92,7 +92,7 @@ Among the hardest hit, Nexon Technologies saw its stock plummet by 7.8% after re
|
|||||||
|
|
||||||
Meanwhile, commodity markets reflected a mixed sentiment. Gold futures rose by 1.5%, reaching $2,080 per ounce, as investors sought safe-haven assets. Crude oil prices continued their rally, climbing to $87.60 per barrel, supported by supply constraints and strong demand.
|
Meanwhile, commodity markets reflected a mixed sentiment. Gold futures rose by 1.5%, reaching $2,080 per ounce, as investors sought safe-haven assets. Crude oil prices continued their rally, climbing to $87.60 per barrel, supported by supply constraints and strong demand.
|
||||||
|
|
||||||
Financial experts are closely watching the Federal Reserve’s next move, as speculation grows over potential rate hikes. The upcoming policy announcement is expected to influence investor confidence and overall market stability.
|
Financial experts are closely watching the Federal Reserve's next move, as speculation grows over potential rate hikes. The upcoming policy announcement is expected to influence investor confidence and overall market stability.
|
||||||
```
|
```
|
||||||
|
|
||||||
Output:
|
Output:
|
||||||
@@ -222,6 +222,7 @@ When handling relationships with timestamps:
|
|||||||
- Use markdown formatting with appropriate section headings
|
- Use markdown formatting with appropriate section headings
|
||||||
- Please respond in the same language as the user's question.
|
- Please respond in the same language as the user's question.
|
||||||
- Ensure the response maintains continuity with the conversation history.
|
- Ensure the response maintains continuity with the conversation history.
|
||||||
|
- List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] Source content (File: file_path)
|
||||||
- If you don't know the answer, just say so.
|
- If you don't know the answer, just say so.
|
||||||
- Do not make anything up. Do not include information not provided by the Knowledge Base."""
|
- Do not make anything up. Do not include information not provided by the Knowledge Base."""
|
||||||
|
|
||||||
@@ -319,6 +320,7 @@ When handling content with timestamps:
|
|||||||
- Use markdown formatting with appropriate section headings
|
- Use markdown formatting with appropriate section headings
|
||||||
- Please respond in the same language as the user's question.
|
- Please respond in the same language as the user's question.
|
||||||
- Ensure the response maintains continuity with the conversation history.
|
- Ensure the response maintains continuity with the conversation history.
|
||||||
|
- List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] Source content (File: file_path)
|
||||||
- If you don't know the answer, just say so.
|
- If you don't know the answer, just say so.
|
||||||
- Do not include information not provided by the Document Chunks."""
|
- Do not include information not provided by the Document Chunks."""
|
||||||
|
|
||||||
@@ -378,8 +380,8 @@ When handling information with timestamps:
|
|||||||
- Use markdown formatting with appropriate section headings
|
- Use markdown formatting with appropriate section headings
|
||||||
- Please respond in the same language as the user's question.
|
- Please respond in the same language as the user's question.
|
||||||
- Ensure the response maintains continuity with the conversation history.
|
- Ensure the response maintains continuity with the conversation history.
|
||||||
- Organize answer in sesctions focusing on one main point or aspect of the answer
|
- Organize answer in sections focusing on one main point or aspect of the answer
|
||||||
- Use clear and descriptive section titles that reflect the content
|
- Use clear and descriptive section titles that reflect the content
|
||||||
- List up to 5 most important reference sources at the end under "References" sesction. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), in the following format: [KG/DC] Source content
|
- List up to 5 most important reference sources at the end under "References" section. Clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (DC), and include the file path if available, in the following format: [KG/DC] Source content (File: file_path)
|
||||||
- If you don't know the answer, just say so. Do not make anything up.
|
- If you don't know the answer, just say so. Do not make anything up.
|
||||||
- Do not include information not provided by the Data Sources."""
|
- Do not include information not provided by the Data Sources."""
|
||||||
|
@@ -1,3 +1,3 @@
|
|||||||
VITE_BACKEND_URL=http://localhost:9621
|
VITE_BACKEND_URL=http://localhost:9621
|
||||||
VITE_API_PROXY=true
|
VITE_API_PROXY=true
|
||||||
VITE_API_ENDPOINTS=/api,/documents,/graphs,/graph,/health,/query,/docs,/openapi.json,/login,/auth-status
|
VITE_API_ENDPOINTS=/,/api,/documents,/graphs,/graph,/health,/query,/docs,/openapi.json,/login,/auth-status
|
||||||
|
@@ -8,6 +8,8 @@ import { healthCheckInterval } from '@/lib/constants'
|
|||||||
import { useBackendState, useAuthStore } from '@/stores/state'
|
import { useBackendState, useAuthStore } from '@/stores/state'
|
||||||
import { useSettingsStore } from '@/stores/settings'
|
import { useSettingsStore } from '@/stores/settings'
|
||||||
import { useEffect } from 'react'
|
import { useEffect } from 'react'
|
||||||
|
import { useNavigate } from 'react-router-dom'
|
||||||
|
import { navigationService } from '@/services/navigation'
|
||||||
import SiteHeader from '@/features/SiteHeader'
|
import SiteHeader from '@/features/SiteHeader'
|
||||||
import { InvalidApiKeyError, RequireApiKeError } from '@/api/lightrag'
|
import { InvalidApiKeyError, RequireApiKeError } from '@/api/lightrag'
|
||||||
|
|
||||||
@@ -19,7 +21,13 @@ import ApiSite from '@/features/ApiSite'
|
|||||||
import { Tabs, TabsContent } from '@/components/ui/Tabs'
|
import { Tabs, TabsContent } from '@/components/ui/Tabs'
|
||||||
|
|
||||||
function App() {
|
function App() {
|
||||||
|
const navigate = useNavigate();
|
||||||
const message = useBackendState.use.message()
|
const message = useBackendState.use.message()
|
||||||
|
|
||||||
|
// Initialize navigation service
|
||||||
|
useEffect(() => {
|
||||||
|
navigationService.setNavigate(navigate);
|
||||||
|
}, [navigate]);
|
||||||
const enableHealthCheck = useSettingsStore.use.enableHealthCheck()
|
const enableHealthCheck = useSettingsStore.use.enableHealthCheck()
|
||||||
const currentTab = useSettingsStore.use.currentTab()
|
const currentTab = useSettingsStore.use.currentTab()
|
||||||
const [apiKeyInvalid, setApiKeyInvalid] = useState(false)
|
const [apiKeyInvalid, setApiKeyInvalid] = useState(false)
|
||||||
|
@@ -1,8 +1,9 @@
|
|||||||
import axios, { AxiosError } from 'axios'
|
import axios, { AxiosError } from 'axios'
|
||||||
import { backendBaseUrl, webuiPrefix } from '@/lib/constants'
|
import { backendBaseUrl } from '@/lib/constants'
|
||||||
import { errorMessage } from '@/lib/utils'
|
import { errorMessage } from '@/lib/utils'
|
||||||
import { useSettingsStore } from '@/stores/settings'
|
import { useSettingsStore } from '@/stores/settings'
|
||||||
import { useAuthStore } from '@/stores/state'
|
import { useAuthStore } from '@/stores/state'
|
||||||
|
import { navigationService } from '@/services/navigation'
|
||||||
|
|
||||||
// Types
|
// Types
|
||||||
export type LightragNodeType = {
|
export type LightragNodeType = {
|
||||||
@@ -157,21 +158,13 @@ axiosInstance.interceptors.request.use((config) => {
|
|||||||
const apiKey = useSettingsStore.getState().apiKey
|
const apiKey = useSettingsStore.getState().apiKey
|
||||||
const token = localStorage.getItem('LIGHTRAG-API-TOKEN');
|
const token = localStorage.getItem('LIGHTRAG-API-TOKEN');
|
||||||
|
|
||||||
// Check authentication status for paths that require authentication
|
// Always include token if it exists, regardless of path
|
||||||
const authRequiredPaths = ['/documents', '/graphs', '/query', '/health']; // Add all paths that require authentication
|
|
||||||
const isAuthRequired = authRequiredPaths.some(path => config.url?.includes(path));
|
|
||||||
|
|
||||||
if (isAuthRequired && !token && config.url !== '/login') {
|
|
||||||
// Cancel the request and return a rejected Promise
|
|
||||||
return Promise.reject(new Error('Authentication required'));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (apiKey) {
|
|
||||||
config.headers['X-API-Key'] = apiKey
|
|
||||||
}
|
|
||||||
if (token) {
|
if (token) {
|
||||||
config.headers['Authorization'] = `Bearer ${token}`
|
config.headers['Authorization'] = `Bearer ${token}`
|
||||||
}
|
}
|
||||||
|
if (apiKey) {
|
||||||
|
config.headers['X-API-Key'] = apiKey
|
||||||
|
}
|
||||||
return config
|
return config
|
||||||
})
|
})
|
||||||
|
|
||||||
@@ -185,11 +178,11 @@ axiosInstance.interceptors.response.use(
|
|||||||
sessionStorage.clear();
|
sessionStorage.clear();
|
||||||
useAuthStore.getState().logout();
|
useAuthStore.getState().logout();
|
||||||
|
|
||||||
if (window.location.pathname !== `${webuiPrefix}/#/login`) {
|
// Use navigation service to handle redirection
|
||||||
window.location.href = `${webuiPrefix}/#/login`;
|
navigationService.navigateToLogin();
|
||||||
}
|
|
||||||
|
|
||||||
return Promise.reject(error);
|
// Return a never-resolving promise to prevent further execution
|
||||||
|
return new Promise(() => {});
|
||||||
}
|
}
|
||||||
throw new Error(
|
throw new Error(
|
||||||
`${error.response.status} ${error.response.statusText}\n${JSON.stringify(
|
`${error.response.status} ${error.response.statusText}\n${JSON.stringify(
|
||||||
|
@@ -206,9 +206,9 @@ const LayoutsControl = () => {
|
|||||||
const layoutNoverlap = useLayoutNoverlap({
|
const layoutNoverlap = useLayoutNoverlap({
|
||||||
maxIterations: maxIterations,
|
maxIterations: maxIterations,
|
||||||
settings: {
|
settings: {
|
||||||
margin: 2,
|
margin: 5,
|
||||||
expansion: 1.1,
|
expansion: 1.1,
|
||||||
gridSize: 5,
|
gridSize: 1,
|
||||||
ratio: 1,
|
ratio: 1,
|
||||||
speed: 3,
|
speed: 3,
|
||||||
}
|
}
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
import { useCamera, useSigma } from '@react-sigma/core'
|
import { useCamera, useSigma } from '@react-sigma/core'
|
||||||
import { useCallback } from 'react'
|
import { useCallback } from 'react'
|
||||||
import Button from '@/components/ui/Button'
|
import Button from '@/components/ui/Button'
|
||||||
import { ZoomInIcon, ZoomOutIcon, FullscreenIcon } from 'lucide-react'
|
import { ZoomInIcon, ZoomOutIcon, FullscreenIcon, RotateCwIcon, RotateCcwIcon } from 'lucide-react'
|
||||||
import { controlButtonVariant } from '@/lib/constants'
|
import { controlButtonVariant } from '@/lib/constants'
|
||||||
import { useTranslation } from 'react-i18next';
|
import { useTranslation } from 'react-i18next';
|
||||||
|
|
||||||
@@ -44,8 +44,50 @@ const ZoomControl = () => {
|
|||||||
}
|
}
|
||||||
}, [sigma, reset])
|
}, [sigma, reset])
|
||||||
|
|
||||||
|
const handleRotate = useCallback(() => {
|
||||||
|
if (!sigma) return
|
||||||
|
|
||||||
|
const camera = sigma.getCamera()
|
||||||
|
const currentAngle = camera.angle
|
||||||
|
const newAngle = currentAngle + Math.PI / 8
|
||||||
|
|
||||||
|
camera.animate(
|
||||||
|
{ angle: newAngle },
|
||||||
|
{ duration: 200 }
|
||||||
|
)
|
||||||
|
}, [sigma])
|
||||||
|
|
||||||
|
const handleRotateCounterClockwise = useCallback(() => {
|
||||||
|
if (!sigma) return
|
||||||
|
|
||||||
|
const camera = sigma.getCamera()
|
||||||
|
const currentAngle = camera.angle
|
||||||
|
const newAngle = currentAngle - Math.PI / 8
|
||||||
|
|
||||||
|
camera.animate(
|
||||||
|
{ angle: newAngle },
|
||||||
|
{ duration: 200 }
|
||||||
|
)
|
||||||
|
}, [sigma])
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
|
<Button
|
||||||
|
variant={controlButtonVariant}
|
||||||
|
onClick={handleRotateCounterClockwise}
|
||||||
|
tooltip={t('graphPanel.sideBar.zoomControl.rotateCameraCounterClockwise')}
|
||||||
|
size="icon"
|
||||||
|
>
|
||||||
|
<RotateCcwIcon />
|
||||||
|
</Button>
|
||||||
|
<Button
|
||||||
|
variant={controlButtonVariant}
|
||||||
|
onClick={handleRotate}
|
||||||
|
tooltip={t('graphPanel.sideBar.zoomControl.rotateCamera')}
|
||||||
|
size="icon"
|
||||||
|
>
|
||||||
|
<RotateCwIcon />
|
||||||
|
</Button>
|
||||||
<Button
|
<Button
|
||||||
variant={controlButtonVariant}
|
variant={controlButtonVariant}
|
||||||
onClick={handleResetZoom}
|
onClick={handleResetZoom}
|
||||||
|
@@ -45,7 +45,6 @@ export default function DocumentManager() {
|
|||||||
} else {
|
} else {
|
||||||
setDocs(null)
|
setDocs(null)
|
||||||
}
|
}
|
||||||
// console.log(docs)
|
|
||||||
} else {
|
} else {
|
||||||
setDocs(null)
|
setDocs(null)
|
||||||
}
|
}
|
||||||
|
@@ -141,7 +141,13 @@ const fetchGraph = async (label: string, maxDepth: number, minDegree: number) =>
|
|||||||
|
|
||||||
// Create a new graph instance with the raw graph data
|
// Create a new graph instance with the raw graph data
|
||||||
const createSigmaGraph = (rawGraph: RawGraph | null) => {
|
const createSigmaGraph = (rawGraph: RawGraph | null) => {
|
||||||
// Always create a new graph instance
|
// Skip graph creation if no data or empty nodes
|
||||||
|
if (!rawGraph || !rawGraph.nodes.length) {
|
||||||
|
console.log('No graph data available, skipping sigma graph creation');
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create new graph instance
|
||||||
const graph = new DirectedGraph()
|
const graph = new DirectedGraph()
|
||||||
|
|
||||||
// Add nodes from raw graph data
|
// Add nodes from raw graph data
|
||||||
@@ -242,7 +248,7 @@ const useLightrangeGraph = () => {
|
|||||||
if (!isFetching && !fetchInProgressRef.current &&
|
if (!isFetching && !fetchInProgressRef.current &&
|
||||||
(paramsChanged || !useGraphStore.getState().graphDataFetchAttempted)) {
|
(paramsChanged || !useGraphStore.getState().graphDataFetchAttempted)) {
|
||||||
|
|
||||||
// Only fetch data if the Graph tab is visible
|
// Only fetch data if the Graph tab is visible and we haven't attempted a fetch yet
|
||||||
if (!isGraphTabVisible) {
|
if (!isGraphTabVisible) {
|
||||||
console.log('Graph tab not visible, skipping data fetch');
|
console.log('Graph tab not visible, skipping data fetch');
|
||||||
return;
|
return;
|
||||||
@@ -595,6 +601,8 @@ const useLightrangeGraph = () => {
|
|||||||
rawGraph.edgeIdMap[newEdge.id] = rawGraph.edges.length - 1;
|
rawGraph.edgeIdMap[newEdge.id] = rawGraph.edges.length - 1;
|
||||||
// Update dynamic edge map
|
// Update dynamic edge map
|
||||||
rawGraph.edgeDynamicIdMap[newEdge.dynamicId] = rawGraph.edges.length - 1;
|
rawGraph.edgeDynamicIdMap[newEdge.dynamicId] = rawGraph.edges.length - 1;
|
||||||
|
} else {
|
||||||
|
console.error('Edge already exists in rawGraph:', newEdge.id);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
import { ButtonVariantType } from '@/components/ui/Button'
|
import { ButtonVariantType } from '@/components/ui/Button'
|
||||||
|
|
||||||
export const backendBaseUrl = ''
|
export const backendBaseUrl = ''
|
||||||
export const webuiPrefix = ''
|
export const webuiPrefix = '/webui/'
|
||||||
|
|
||||||
export const controlButtonVariant: ButtonVariantType = 'ghost'
|
export const controlButtonVariant: ButtonVariantType = 'ghost'
|
||||||
|
|
||||||
|
@@ -112,7 +112,9 @@
|
|||||||
"zoomControl": {
|
"zoomControl": {
|
||||||
"zoomIn": "Zoom In",
|
"zoomIn": "Zoom In",
|
||||||
"zoomOut": "Zoom Out",
|
"zoomOut": "Zoom Out",
|
||||||
"resetZoom": "Reset Zoom"
|
"resetZoom": "Reset Zoom",
|
||||||
|
"rotateCamera": "Clockwise Rotate",
|
||||||
|
"rotateCameraCounterClockwise": "Counter-Clockwise Rotate"
|
||||||
},
|
},
|
||||||
|
|
||||||
"layoutsControl": {
|
"layoutsControl": {
|
||||||
|
@@ -111,7 +111,9 @@
|
|||||||
"zoomControl": {
|
"zoomControl": {
|
||||||
"zoomIn": "放大",
|
"zoomIn": "放大",
|
||||||
"zoomOut": "缩小",
|
"zoomOut": "缩小",
|
||||||
"resetZoom": "重置缩放"
|
"resetZoom": "重置缩放",
|
||||||
|
"rotateCamera": "顺时针旋转图形",
|
||||||
|
"rotateCameraCounterClockwise": "逆时针旋转图形"
|
||||||
},
|
},
|
||||||
"layoutsControl": {
|
"layoutsControl": {
|
||||||
"startAnimation": "继续布局动画",
|
"startAnimation": "继续布局动画",
|
||||||
|
17
lightrag_webui/src/services/navigation.ts
Normal file
17
lightrag_webui/src/services/navigation.ts
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
import { NavigateFunction } from 'react-router-dom';
|
||||||
|
|
||||||
|
class NavigationService {
|
||||||
|
private navigate: NavigateFunction | null = null;
|
||||||
|
|
||||||
|
setNavigate(navigate: NavigateFunction) {
|
||||||
|
this.navigate = navigate;
|
||||||
|
}
|
||||||
|
|
||||||
|
navigateToLogin() {
|
||||||
|
if (this.navigate) {
|
||||||
|
this.navigate('/login');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
export const navigationService = new NavigationService();
|
Reference in New Issue
Block a user