Add MongoDB as KV storage
This commit is contained in:
@@ -10,6 +10,10 @@ if not os.path.exists(WORKING_DIR):
|
|||||||
os.mkdir(WORKING_DIR)
|
os.mkdir(WORKING_DIR)
|
||||||
print(f"WorkingDir: {WORKING_DIR}")
|
print(f"WorkingDir: {WORKING_DIR}")
|
||||||
|
|
||||||
|
# mongo
|
||||||
|
os.environ["MONGO_URI"] = "mongodb://root:root@localhost:27017/"
|
||||||
|
os.environ["MONGO_DATABASE"] = "LightRAG"
|
||||||
|
|
||||||
# neo4j
|
# neo4j
|
||||||
BATCH_SIZE_NODES = 500
|
BATCH_SIZE_NODES = 500
|
||||||
BATCH_SIZE_EDGES = 100
|
BATCH_SIZE_EDGES = 100
|
||||||
@@ -38,6 +42,7 @@ rag = LightRAG(
|
|||||||
texts=texts, embed_model="bge-m3:latest", host="http://127.0.0.1:11434"
|
texts=texts, embed_model="bge-m3:latest", host="http://127.0.0.1:11434"
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
|
kv_storage="MongoKVStorage",
|
||||||
graph_storage="Neo4JStorage",
|
graph_storage="Neo4JStorage",
|
||||||
vector_storage="MilvusVectorDBStorge",
|
vector_storage="MilvusVectorDBStorge",
|
||||||
)
|
)
|
51
lightrag/kg/mongo_impl.py
Normal file
51
lightrag/kg/mongo_impl.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
import os
|
||||||
|
from tqdm.asyncio import tqdm as tqdm_async
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from pymongo import MongoClient
|
||||||
|
|
||||||
|
from lightrag.utils import logger
|
||||||
|
|
||||||
|
from lightrag.base import BaseKVStorage
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class MongoKVStorage(BaseKVStorage):
|
||||||
|
def __post_init__(self):
|
||||||
|
client = MongoClient(
|
||||||
|
os.environ.get("MONGO_URI", "mongodb://root:root@localhost:27017/")
|
||||||
|
)
|
||||||
|
database = client.get_database(os.environ.get("MONGO_DATABASE", "LightRAG"))
|
||||||
|
self._data = database.get_collection(self.namespace)
|
||||||
|
logger.info(f"Use MongoDB as KV {self.namespace}")
|
||||||
|
|
||||||
|
async def all_keys(self) -> list[str]:
|
||||||
|
return [x["_id"] for x in self._data.find({}, {"_id": 1})]
|
||||||
|
|
||||||
|
async def get_by_id(self, id):
|
||||||
|
return self._data.find_one({"_id": id})
|
||||||
|
|
||||||
|
async def get_by_ids(self, ids, fields=None):
|
||||||
|
if fields is None:
|
||||||
|
return list(self._data.find({"_id": {"$in": ids}}))
|
||||||
|
return list(
|
||||||
|
self._data.find(
|
||||||
|
{"_id": {"$in": ids}},
|
||||||
|
{field: 1 for field in fields},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
async def filter_keys(self, data: list[str]) -> set[str]:
|
||||||
|
existing_ids = [
|
||||||
|
str(x["_id"]) for x in self._data.find({"_id": {"$in": data}}, {"_id": 1})
|
||||||
|
]
|
||||||
|
return set([s for s in data if s not in existing_ids])
|
||||||
|
|
||||||
|
async def upsert(self, data: dict[str, dict]):
|
||||||
|
for k, v in tqdm_async(data.items(), desc="Upserting"):
|
||||||
|
self._data.update_one({"_id": k}, {"$set": v}, upsert=True)
|
||||||
|
data[k]["_id"] = k
|
||||||
|
return data
|
||||||
|
|
||||||
|
async def drop(self):
|
||||||
|
""" """
|
||||||
|
pass
|
@@ -46,6 +46,8 @@ from .kg.oracle_impl import OracleKVStorage, OracleGraphStorage, OracleVectorDBS
|
|||||||
|
|
||||||
from .kg.milvus_impl import MilvusVectorDBStorge
|
from .kg.milvus_impl import MilvusVectorDBStorge
|
||||||
|
|
||||||
|
from .kg.mongo_impl import MongoKVStorage
|
||||||
|
|
||||||
# future KG integrations
|
# future KG integrations
|
||||||
|
|
||||||
# from .kg.ArangoDB_impl import (
|
# from .kg.ArangoDB_impl import (
|
||||||
@@ -227,6 +229,7 @@ class LightRAG:
|
|||||||
# kv storage
|
# kv storage
|
||||||
"JsonKVStorage": JsonKVStorage,
|
"JsonKVStorage": JsonKVStorage,
|
||||||
"OracleKVStorage": OracleKVStorage,
|
"OracleKVStorage": OracleKVStorage,
|
||||||
|
"MongoKVStorage": MongoKVStorage,
|
||||||
# vector storage
|
# vector storage
|
||||||
"NanoVectorDBStorage": NanoVectorDBStorage,
|
"NanoVectorDBStorage": NanoVectorDBStorage,
|
||||||
"OracleVectorDBStorage": OracleVectorDBStorage,
|
"OracleVectorDBStorage": OracleVectorDBStorage,
|
||||||
|
@@ -12,6 +12,7 @@ ollama
|
|||||||
openai
|
openai
|
||||||
oracledb
|
oracledb
|
||||||
pymilvus
|
pymilvus
|
||||||
|
pymongo
|
||||||
pyvis
|
pyvis
|
||||||
tenacity
|
tenacity
|
||||||
# lmdeploy[all]
|
# lmdeploy[all]
|
||||||
|
Reference in New Issue
Block a user