diff --git a/examples/lightrag_ollama_neo4j_milvus_demo.py b/examples/lightrag_ollama_neo4j_milvus_mongo_demo.py similarity index 90% rename from examples/lightrag_ollama_neo4j_milvus_demo.py rename to examples/lightrag_ollama_neo4j_milvus_mongo_demo.py index 6ed6da83..8d26ba65 100644 --- a/examples/lightrag_ollama_neo4j_milvus_demo.py +++ b/examples/lightrag_ollama_neo4j_milvus_mongo_demo.py @@ -10,6 +10,10 @@ if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) print(f"WorkingDir: {WORKING_DIR}") +# mongo +os.environ["MONGO_URI"] = "mongodb://root:root@localhost:27017/" +os.environ["MONGO_DATABASE"] = "LightRAG" + # neo4j BATCH_SIZE_NODES = 500 BATCH_SIZE_EDGES = 100 @@ -38,6 +42,7 @@ rag = LightRAG( texts=texts, embed_model="bge-m3:latest", host="http://127.0.0.1:11434" ), ), + kv_storage="MongoKVStorage", graph_storage="Neo4JStorage", vector_storage="MilvusVectorDBStorge", ) diff --git a/lightrag/kg/mongo_impl.py b/lightrag/kg/mongo_impl.py new file mode 100644 index 00000000..61222357 --- /dev/null +++ b/lightrag/kg/mongo_impl.py @@ -0,0 +1,51 @@ +import os +from tqdm.asyncio import tqdm as tqdm_async +from dataclasses import dataclass +from pymongo import MongoClient + +from lightrag.utils import logger + +from lightrag.base import BaseKVStorage + + +@dataclass +class MongoKVStorage(BaseKVStorage): + def __post_init__(self): + client = MongoClient( + os.environ.get("MONGO_URI", "mongodb://root:root@localhost:27017/") + ) + database = client.get_database(os.environ.get("MONGO_DATABASE", "LightRAG")) + self._data = database.get_collection(self.namespace) + logger.info(f"Use MongoDB as KV {self.namespace}") + + async def all_keys(self) -> list[str]: + return [x["_id"] for x in self._data.find({}, {"_id": 1})] + + async def get_by_id(self, id): + return self._data.find_one({"_id": id}) + + async def get_by_ids(self, ids, fields=None): + if fields is None: + return list(self._data.find({"_id": {"$in": ids}})) + return list( + self._data.find( + {"_id": {"$in": ids}}, + {field: 1 for field in fields}, + ) + ) + + async def filter_keys(self, data: list[str]) -> set[str]: + existing_ids = [ + str(x["_id"]) for x in self._data.find({"_id": {"$in": data}}, {"_id": 1}) + ] + return set([s for s in data if s not in existing_ids]) + + async def upsert(self, data: dict[str, dict]): + for k, v in tqdm_async(data.items(), desc="Upserting"): + self._data.update_one({"_id": k}, {"$set": v}, upsert=True) + data[k]["_id"] = k + return data + + async def drop(self): + """ """ + pass diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 4fb94b38..a25dab79 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -46,6 +46,8 @@ from .kg.oracle_impl import OracleKVStorage, OracleGraphStorage, OracleVectorDBS from .kg.milvus_impl import MilvusVectorDBStorge +from .kg.mongo_impl import MongoKVStorage + # future KG integrations # from .kg.ArangoDB_impl import ( @@ -227,6 +229,7 @@ class LightRAG: # kv storage "JsonKVStorage": JsonKVStorage, "OracleKVStorage": OracleKVStorage, + "MongoKVStorage": MongoKVStorage, # vector storage "NanoVectorDBStorage": NanoVectorDBStorage, "OracleVectorDBStorage": OracleVectorDBStorage, diff --git a/requirements.txt b/requirements.txt index 4ccb2bb3..ad96fe7d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,7 @@ ollama openai oracledb pymilvus +pymongo pyvis tenacity # lmdeploy[all]