Merge remote-tracking branch 'origin/main' into refactor-api-server

This commit is contained in:
yangdx
2025-02-21 11:24:16 +08:00
40 changed files with 1393 additions and 592 deletions

View File

@@ -1 +1,157 @@
# print ("init package vars here. ......")
STORAGE_IMPLEMENTATIONS = {
"KV_STORAGE": {
"implementations": [
"JsonKVStorage",
"MongoKVStorage",
"RedisKVStorage",
"TiDBKVStorage",
"PGKVStorage",
"OracleKVStorage",
],
"required_methods": ["get_by_id", "upsert"],
},
"GRAPH_STORAGE": {
"implementations": [
"NetworkXStorage",
"Neo4JStorage",
"MongoGraphStorage",
"TiDBGraphStorage",
"AGEStorage",
"GremlinStorage",
"PGGraphStorage",
"OracleGraphStorage",
],
"required_methods": ["upsert_node", "upsert_edge"],
},
"VECTOR_STORAGE": {
"implementations": [
"NanoVectorDBStorage",
"MilvusVectorDBStorage",
"ChromaVectorDBStorage",
"TiDBVectorDBStorage",
"PGVectorStorage",
"FaissVectorDBStorage",
"QdrantVectorDBStorage",
"OracleVectorDBStorage",
"MongoVectorDBStorage",
],
"required_methods": ["query", "upsert"],
},
"DOC_STATUS_STORAGE": {
"implementations": [
"JsonDocStatusStorage",
"PGDocStatusStorage",
"PGDocStatusStorage",
"MongoDocStatusStorage",
],
"required_methods": ["get_docs_by_status"],
},
}
# Storage implementation environment variable without default value
STORAGE_ENV_REQUIREMENTS: dict[str, list[str]] = {
# KV Storage Implementations
"JsonKVStorage": [],
"MongoKVStorage": [],
"RedisKVStorage": ["REDIS_URI"],
"TiDBKVStorage": ["TIDB_USER", "TIDB_PASSWORD", "TIDB_DATABASE"],
"PGKVStorage": ["POSTGRES_USER", "POSTGRES_PASSWORD", "POSTGRES_DATABASE"],
"OracleKVStorage": [
"ORACLE_DSN",
"ORACLE_USER",
"ORACLE_PASSWORD",
"ORACLE_CONFIG_DIR",
],
# Graph Storage Implementations
"NetworkXStorage": [],
"Neo4JStorage": ["NEO4J_URI", "NEO4J_USERNAME", "NEO4J_PASSWORD"],
"MongoGraphStorage": [],
"TiDBGraphStorage": ["TIDB_USER", "TIDB_PASSWORD", "TIDB_DATABASE"],
"AGEStorage": [
"AGE_POSTGRES_DB",
"AGE_POSTGRES_USER",
"AGE_POSTGRES_PASSWORD",
],
"GremlinStorage": ["GREMLIN_HOST", "GREMLIN_PORT", "GREMLIN_GRAPH"],
"PGGraphStorage": [
"POSTGRES_USER",
"POSTGRES_PASSWORD",
"POSTGRES_DATABASE",
],
"OracleGraphStorage": [
"ORACLE_DSN",
"ORACLE_USER",
"ORACLE_PASSWORD",
"ORACLE_CONFIG_DIR",
],
# Vector Storage Implementations
"NanoVectorDBStorage": [],
"MilvusVectorDBStorage": [],
"ChromaVectorDBStorage": [],
"TiDBVectorDBStorage": ["TIDB_USER", "TIDB_PASSWORD", "TIDB_DATABASE"],
"PGVectorStorage": ["POSTGRES_USER", "POSTGRES_PASSWORD", "POSTGRES_DATABASE"],
"FaissVectorDBStorage": [],
"QdrantVectorDBStorage": ["QDRANT_URL"], # QDRANT_API_KEY has default value None
"OracleVectorDBStorage": [
"ORACLE_DSN",
"ORACLE_USER",
"ORACLE_PASSWORD",
"ORACLE_CONFIG_DIR",
],
"MongoVectorDBStorage": [],
# Document Status Storage Implementations
"JsonDocStatusStorage": [],
"PGDocStatusStorage": ["POSTGRES_USER", "POSTGRES_PASSWORD", "POSTGRES_DATABASE"],
"MongoDocStatusStorage": [],
}
# Storage implementation module mapping
STORAGES = {
"NetworkXStorage": ".kg.networkx_impl",
"JsonKVStorage": ".kg.json_kv_impl",
"NanoVectorDBStorage": ".kg.nano_vector_db_impl",
"JsonDocStatusStorage": ".kg.json_doc_status_impl",
"Neo4JStorage": ".kg.neo4j_impl",
"OracleKVStorage": ".kg.oracle_impl",
"OracleGraphStorage": ".kg.oracle_impl",
"OracleVectorDBStorage": ".kg.oracle_impl",
"MilvusVectorDBStorage": ".kg.milvus_impl",
"MongoKVStorage": ".kg.mongo_impl",
"MongoDocStatusStorage": ".kg.mongo_impl",
"MongoGraphStorage": ".kg.mongo_impl",
"MongoVectorDBStorage": ".kg.mongo_impl",
"RedisKVStorage": ".kg.redis_impl",
"ChromaVectorDBStorage": ".kg.chroma_impl",
"TiDBKVStorage": ".kg.tidb_impl",
"TiDBVectorDBStorage": ".kg.tidb_impl",
"TiDBGraphStorage": ".kg.tidb_impl",
"PGKVStorage": ".kg.postgres_impl",
"PGVectorStorage": ".kg.postgres_impl",
"AGEStorage": ".kg.age_impl",
"PGGraphStorage": ".kg.postgres_impl",
"GremlinStorage": ".kg.gremlin_impl",
"PGDocStatusStorage": ".kg.postgres_impl",
"FaissVectorDBStorage": ".kg.faiss_impl",
"QdrantVectorDBStorage": ".kg.qdrant_impl",
}
def verify_storage_implementation(storage_type: str, storage_name: str) -> None:
"""Verify if storage implementation is compatible with specified storage type
Args:
storage_type: Storage type (KV_STORAGE, GRAPH_STORAGE etc.)
storage_name: Storage implementation name
Raises:
ValueError: If storage implementation is incompatible or missing required methods
"""
if storage_type not in STORAGE_IMPLEMENTATIONS:
raise ValueError(f"Unknown storage type: {storage_type}")
storage_info = STORAGE_IMPLEMENTATIONS[storage_type]
if storage_name not in storage_info["implementations"]:
raise ValueError(
f"Storage implementation '{storage_name}' is not compatible with {storage_type}. "
f"Compatible implementations are: {', '.join(storage_info['implementations'])}"
)

View File

@@ -34,14 +34,9 @@ if not pm.is_installed("psycopg-pool"):
if not pm.is_installed("asyncpg"):
pm.install("asyncpg")
try:
import psycopg
from psycopg.rows import namedtuple_row
from psycopg_pool import AsyncConnectionPool, PoolTimeout
except ImportError:
raise ImportError(
"`psycopg-pool, psycopg[binary,pool], asyncpg` library is not installed. Please install it via pip: `pip install psycopg-pool psycopg[binary,pool] asyncpg`."
)
import psycopg
from psycopg.rows import namedtuple_row
from psycopg_pool import AsyncConnectionPool, PoolTimeout
class AGEQueryException(Exception):

View File

@@ -10,13 +10,8 @@ import pipmaster as pm
if not pm.is_installed("chromadb"):
pm.install("chromadb")
try:
from chromadb import HttpClient, PersistentClient
from chromadb.config import Settings
except ImportError as e:
raise ImportError(
"`chromadb` library is not installed. Please install it via pip: `pip install chromadb`."
) from e
from chromadb import HttpClient, PersistentClient
from chromadb.config import Settings
@final
@@ -113,9 +108,9 @@ class ChromaVectorDBStorage(BaseVectorStorage):
raise
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
logger.warning("Empty data provided to vector DB")
return []
return
try:
ids = list(data.keys())

View File

@@ -20,12 +20,7 @@ from lightrag.base import (
if not pm.is_installed("faiss"):
pm.install("faiss")
try:
import faiss
except ImportError as e:
raise ImportError(
"`faiss` library is not installed. Please install it via pip: `pip install faiss`."
) from e
import faiss
@final
@@ -84,10 +79,9 @@ class FaissVectorDBStorage(BaseVectorStorage):
...
}
"""
logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
logger.warning("You are inserting empty data to the vector DB")
return []
return
current_time = time.time()

View File

@@ -2,6 +2,7 @@ import asyncio
import inspect
import json
import os
import pipmaster as pm
from dataclasses import dataclass
from typing import Any, Dict, List, final
@@ -20,14 +21,12 @@ from lightrag.utils import logger
from ..base import BaseGraphStorage
try:
from gremlin_python.driver import client, serializer
from gremlin_python.driver.aiohttp.transport import AiohttpTransport
from gremlin_python.driver.protocol import GremlinServerError
except ImportError as e:
raise ImportError(
"`gremlin` library is not installed. Please install it via pip: `pip install gremlin`."
) from e
if not pm.is_installed("gremlinpython"):
pm.install("gremlinpython")
from gremlin_python.driver import client, serializer
from gremlin_python.driver.aiohttp.transport import AiohttpTransport
from gremlin_python.driver.protocol import GremlinServerError
@final

View File

@@ -67,6 +67,10 @@ class JsonDocStatusStorage(DocStatusStorage):
write_json(self._data, self._file_name)
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
self._data.update(data)
await self.index_done_callback()

View File

@@ -43,6 +43,9 @@ class JsonKVStorage(BaseKVStorage):
return set(keys) - set(self._data.keys())
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
left_data = {k: v for k, v in data.items() if k not in self._data}
self._data.update(left_data)

View File

@@ -14,13 +14,8 @@ if not pm.is_installed("configparser"):
if not pm.is_installed("pymilvus"):
pm.install("pymilvus")
try:
import configparser
from pymilvus import MilvusClient
except ImportError as e:
raise ImportError(
"`pymilvus` library is not installed. Please install it via pip: `pip install pymilvus`."
) from e
import configparser
from pymilvus import MilvusClient
config = configparser.ConfigParser()
config.read("config.ini", "utf-8")
@@ -80,11 +75,11 @@ class MilvusVectorDBStorage(BaseVectorStorage):
)
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
if not len(data):
logger.warning("You insert an empty data to vector DB")
return []
list_data = [
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
list_data: list[dict[str, Any]] = [
{
"id": k,
**{k1: v1 for k1, v1 in v.items() if k1 in self.meta_fields},

View File

@@ -25,18 +25,13 @@ if not pm.is_installed("pymongo"):
if not pm.is_installed("motor"):
pm.install("motor")
try:
from motor.motor_asyncio import (
AsyncIOMotorClient,
AsyncIOMotorDatabase,
AsyncIOMotorCollection,
)
from pymongo.operations import SearchIndexModel
from pymongo.errors import PyMongoError
except ImportError as e:
raise ImportError(
"`motor, pymongo` library is not installed. Please install it via pip: `pip install motor pymongo`."
) from e
from motor.motor_asyncio import (
AsyncIOMotorClient,
AsyncIOMotorDatabase,
AsyncIOMotorCollection,
)
from pymongo.operations import SearchIndexModel
from pymongo.errors import PyMongoError
config = configparser.ConfigParser()
config.read("config.ini", "utf-8")
@@ -113,8 +108,12 @@ class MongoKVStorage(BaseKVStorage):
return keys - existing_ids
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
if is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE):
update_tasks = []
update_tasks: list[Any] = []
for mode, items in data.items():
for k, v in items.items():
key = f"{mode}_{k}"
@@ -186,7 +185,10 @@ class MongoDocStatusStorage(DocStatusStorage):
return data - existing_ids
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
update_tasks = []
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
update_tasks: list[Any] = []
for k, v in data.items():
data[k]["_id"] = k
update_tasks.append(
@@ -860,10 +862,9 @@ class MongoVectorDBStorage(BaseVectorStorage):
logger.debug("vector index already exist")
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.debug(f"Inserting {len(data)} vectors to {self.namespace}")
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
logger.warning("You are inserting an empty data set to vector DB")
return []
return
list_data = [
{

View File

@@ -18,12 +18,7 @@ from lightrag.base import (
if not pm.is_installed("nano-vectordb"):
pm.install("nano-vectordb")
try:
from nano_vectordb import NanoVectorDB
except ImportError as e:
raise ImportError(
"`nano-vectordb` library is not installed. Please install it via pip: `pip install nano-vectordb`."
) from e
from nano_vectordb import NanoVectorDB
@final
@@ -50,10 +45,9 @@ class NanoVectorDBStorage(BaseVectorStorage):
)
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
if not len(data):
logger.warning("You insert an empty data to vector DB")
return []
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
current_time = time.time()
list_data = [

View File

@@ -23,18 +23,13 @@ import pipmaster as pm
if not pm.is_installed("neo4j"):
pm.install("neo4j")
try:
from neo4j import (
AsyncGraphDatabase,
exceptions as neo4jExceptions,
AsyncDriver,
AsyncManagedTransaction,
GraphDatabase,
)
except ImportError as e:
raise ImportError(
"`neo4j` library is not installed. Please install it via pip: `pip install neo4j`."
) from e
from neo4j import (
AsyncGraphDatabase,
exceptions as neo4jExceptions,
AsyncDriver,
AsyncManagedTransaction,
GraphDatabase,
)
config = configparser.ConfigParser()
config.read("config.ini", "utf-8")

View File

@@ -17,16 +17,12 @@ import pipmaster as pm
if not pm.is_installed("networkx"):
pm.install("networkx")
if not pm.is_installed("graspologic"):
pm.install("graspologic")
try:
from graspologic import embed
import networkx as nx
except ImportError as e:
raise ImportError(
"`networkx` library is not installed. Please install it via pip: `pip install networkx`."
) from e
import networkx as nx
from graspologic import embed
@final

View File

@@ -26,14 +26,8 @@ if not pm.is_installed("graspologic"):
if not pm.is_installed("oracledb"):
pm.install("oracledb")
try:
from graspologic import embed
import oracledb
except ImportError as e:
raise ImportError(
"`oracledb` library is not installed. Please install it via pip: `pip install oracledb`."
) from e
from graspologic import embed
import oracledb
class OracleDB:
@@ -51,7 +45,7 @@ class OracleDB:
self.increment = 1
logger.info(f"Using the label {self.workspace} for Oracle Graph as identifier")
if self.user is None or self.password is None:
raise ValueError("Missing database user or password in addon_params")
raise ValueError("Missing database user or password")
try:
oracledb.defaults.fetch_lobs = False
@@ -332,6 +326,10 @@ class OracleKVStorage(BaseKVStorage):
################ INSERT METHODS ################
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
list_data = [
{

View File

@@ -38,14 +38,8 @@ import pipmaster as pm
if not pm.is_installed("asyncpg"):
pm.install("asyncpg")
try:
import asyncpg
from asyncpg import Pool
except ImportError as e:
raise ImportError(
"`asyncpg` library is not installed. Please install it via pip: `pip install asyncpg`."
) from e
import asyncpg
from asyncpg import Pool
class PostgreSQLDB:
@@ -61,9 +55,7 @@ class PostgreSQLDB:
self.pool: Pool | None = None
if self.user is None or self.password is None or self.database is None:
raise ValueError(
"Missing database user, password, or database in addon_params"
)
raise ValueError("Missing database user, password, or database")
async def initdb(self):
try:
@@ -353,6 +345,10 @@ class PGKVStorage(BaseKVStorage):
################ INSERT METHODS ################
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
pass
elif is_namespace(self.namespace, NameSpace.KV_STORE_FULL_DOCS):
@@ -454,10 +450,10 @@ class PGVectorStorage(BaseVectorStorage):
return upsert_sql, data
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
if not len(data):
logger.warning("You insert an empty data to vector DB")
return []
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
current_time = time.time()
list_data = [
{
@@ -618,6 +614,10 @@ class PGDocStatusStorage(DocStatusStorage):
Args:
data: dictionary of document IDs and their status data
"""
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status)
values($1,$2,$3,$4,$5,$6,$7)
on conflict(id,workspace) do update set

View File

@@ -15,16 +15,10 @@ config.read("config.ini", "utf-8")
import pipmaster as pm
if not pm.is_installed("qdrant_client"):
pm.install("qdrant_client")
if not pm.is_installed("qdrant-client"):
pm.install("qdrant-client")
try:
from qdrant_client import QdrantClient, models
except ImportError:
raise ImportError(
"`qdrant_client` library is not installed. Please install it via pip: `pip install qdrant-client`."
)
from qdrant_client import QdrantClient, models
def compute_mdhash_id_for_qdrant(
@@ -93,9 +87,9 @@ class QdrantVectorDBStorage(BaseVectorStorage):
)
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
if not len(data):
logger.warning("You insert an empty data to vector DB")
return []
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
list_data = [
{
"id": k,

View File

@@ -49,6 +49,9 @@ class RedisKVStorage(BaseKVStorage):
return set(keys) - existing_ids
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
pipe = self._redis.pipeline()
for k, v in data.items():

View File

@@ -20,13 +20,7 @@ if not pm.is_installed("pymysql"):
if not pm.is_installed("sqlalchemy"):
pm.install("sqlalchemy")
try:
from sqlalchemy import create_engine, text
except ImportError as e:
raise ImportError(
"`pymysql, sqlalchemy` library is not installed. Please install it via pip: `pip install pymysql sqlalchemy`."
) from e
from sqlalchemy import create_engine, text
class TiDB:
@@ -217,6 +211,9 @@ class TiDBKVStorage(BaseKVStorage):
################ INSERT full_doc AND chunks ################
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
left_data = {k: v for k, v in data.items() if k not in self._data}
self._data.update(left_data)
if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
@@ -324,12 +321,12 @@ class TiDBVectorDBStorage(BaseVectorStorage):
###### INSERT entities And relationships ######
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
# ignore, upsert in TiDBKVStorage already
if not len(data):
logger.warning("You insert an empty data to vector DB")
return []
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
if is_namespace(self.namespace, NameSpace.VECTOR_STORE_CHUNKS):
return []
return
logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
list_data = [