Merge branch 'main' into add-env-settings

This commit is contained in:
yangdx
2025-02-16 22:34:39 +08:00
25 changed files with 1086 additions and 793 deletions

View File

@@ -1,3 +1,5 @@
from __future__ import annotations
import os
from dotenv import load_dotenv
from dataclasses import dataclass, field
@@ -5,10 +7,8 @@ from enum import Enum
from typing import (
Any,
Literal,
Optional,
TypedDict,
TypeVar,
Union,
)
import numpy as np
from .utils import EmbeddingFunc
@@ -72,7 +72,7 @@ class QueryParam:
ll_keywords: list[str] = field(default_factory=list)
"""List of low-level keywords to refine retrieval focus."""
conversation_history: list[dict[str, Any]] = field(default_factory=list)
conversation_history: list[dict[str, str]] = field(default_factory=list)
"""Stores past conversation history to maintain context.
Format: [{"role": "user/assistant", "content": "message"}].
"""
@@ -86,19 +86,15 @@ class StorageNameSpace:
namespace: str
global_config: dict[str, Any]
async def index_done_callback(self):
async def index_done_callback(self) -> None:
"""Commit the storage operations after indexing"""
pass
async def query_done_callback(self):
"""Commit the storage operations after querying"""
pass
@dataclass
class BaseVectorStorage(StorageNameSpace):
embedding_func: EmbeddingFunc
meta_fields: set = field(default_factory=set)
meta_fields: set[str] = field(default_factory=set)
async def query(self, query: str, top_k: int) -> list[dict[str, Any]]:
raise NotImplementedError
@@ -109,12 +105,20 @@ class BaseVectorStorage(StorageNameSpace):
"""
raise NotImplementedError
async def delete_entity(self, entity_name: str) -> None:
"""Delete a single entity by its name"""
raise NotImplementedError
async def delete_entity_relation(self, entity_name: str) -> None:
"""Delete relations for a given entity by scanning metadata"""
raise NotImplementedError
@dataclass
class BaseKVStorage(StorageNameSpace):
embedding_func: EmbeddingFunc
embedding_func: EmbeddingFunc | None = None
async def get_by_id(self, id: str) -> Union[dict[str, Any], None]:
async def get_by_id(self, id: str) -> dict[str, Any] | None:
raise NotImplementedError
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
@@ -133,50 +137,75 @@ class BaseKVStorage(StorageNameSpace):
@dataclass
class BaseGraphStorage(StorageNameSpace):
embedding_func: EmbeddingFunc = None
embedding_func: EmbeddingFunc | None = None
"""Check if a node exists in the graph."""
async def has_node(self, node_id: str) -> bool:
raise NotImplementedError
"""Check if an edge exists in the graph."""
async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
raise NotImplementedError
"""Get the degree of a node."""
async def node_degree(self, node_id: str) -> int:
raise NotImplementedError
"""Get the degree of an edge."""
async def edge_degree(self, src_id: str, tgt_id: str) -> int:
raise NotImplementedError
async def get_node(self, node_id: str) -> Union[dict, None]:
"""Get a node by its id."""
async def get_node(self, node_id: str) -> dict[str, str] | None:
raise NotImplementedError
"""Get an edge by its source and target node ids."""
async def get_edge(
self, source_node_id: str, target_node_id: str
) -> Union[dict, None]:
) -> dict[str, str] | None:
raise NotImplementedError
async def get_node_edges(
self, source_node_id: str
) -> Union[list[tuple[str, str]], None]:
"""Get all edges connected to a node."""
async def get_node_edges(self, source_node_id: str) -> list[tuple[str, str]] | None:
raise NotImplementedError
async def upsert_node(self, node_id: str, node_data: dict[str, str]):
"""Upsert a node into the graph."""
async def upsert_node(self, node_id: str, node_data: dict[str, str]) -> None:
raise NotImplementedError
"""Upsert an edge into the graph."""
async def upsert_edge(
self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
):
) -> None:
raise NotImplementedError
async def delete_node(self, node_id: str):
"""Delete a node from the graph."""
async def delete_node(self, node_id: str) -> None:
raise NotImplementedError
async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
"""Embed nodes using an algorithm."""
async def embed_nodes(
self, algorithm: str
) -> tuple[np.ndarray[Any, Any], list[str]]:
raise NotImplementedError("Node embedding is not used in lightrag.")
"""Get all labels in the graph."""
async def get_all_labels(self) -> list[str]:
raise NotImplementedError
"""Get a knowledge graph of a node."""
async def get_knowledge_graph(
self, node_label: str, max_depth: int = 5
) -> KnowledgeGraph:
@@ -208,9 +237,9 @@ class DocProcessingStatus:
"""ISO format timestamp when document was created"""
updated_at: str
"""ISO format timestamp when document was last updated"""
chunks_count: Optional[int] = None
chunks_count: int | None = None
"""Number of chunks after splitting, used for processing"""
error: Optional[str] = None
error: str | None = None
"""Error message if failed"""
metadata: dict[str, Any] = field(default_factory=dict)
"""Additional metadata"""