From 9a91b68e62b6c76ce5e2a1aad7f507d4c4cf2500 Mon Sep 17 00:00:00 2001 From: ArnoChen Date: Fri, 14 Feb 2025 02:48:15 +0800 Subject: [PATCH 1/5] fix configuration errors of mongodb, neo4j, and qdrant backends. --- lightrag/api/lightrag_server.py | 1 + lightrag/kg/mongo_impl.py | 2 +- lightrag/kg/neo4j_impl.py | 10 +++++----- lightrag/kg/qdrant_impl.py | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index fe52f592..a442074d 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -101,6 +101,7 @@ def estimate_tokens(text: str) -> int: return int(tokens) + def get_default_host(binding_type: str) -> str: default_hosts = { "ollama": os.getenv("LLM_BINDING_HOST", "http://localhost:11434"), diff --git a/lightrag/kg/mongo_impl.py b/lightrag/kg/mongo_impl.py index 08ad9465..226aecf2 100644 --- a/lightrag/kg/mongo_impl.py +++ b/lightrag/kg/mongo_impl.py @@ -44,7 +44,7 @@ class MongoKVStorage(BaseKVStorage): database = client.get_database( os.environ.get( "MONGO_DATABASE", - mongo_database=config.get("mongodb", "database", fallback="LightRAG"), + config.get("mongodb", "database", fallback="LightRAG"), ) ) self._data = database.get_collection(self.namespace) diff --git a/lightrag/kg/neo4j_impl.py b/lightrag/kg/neo4j_impl.py index 587a9167..c1c313ca 100644 --- a/lightrag/kg/neo4j_impl.py +++ b/lightrag/kg/neo4j_impl.py @@ -48,13 +48,13 @@ class Neo4JStorage(BaseGraphStorage): self._driver = None self._driver_lock = asyncio.Lock() - URI = os.environ["NEO4J_URI", config.get("neo4j", "uri", fallback=None)] - USERNAME = os.environ[ + URI = os.environ.get("NEO4J_URI", config.get("neo4j", "uri", fallback=None)) + USERNAME = os.environ.get( "NEO4J_USERNAME", config.get("neo4j", "username", fallback=None) - ] - PASSWORD = os.environ[ + ) + PASSWORD = os.environ.get( "NEO4J_PASSWORD", config.get("neo4j", "password", fallback=None) - ] + ) MAX_CONNECTION_POOL_SIZE = os.environ.get( "NEO4J_MAX_CONNECTION_POOL_SIZE", config.get("neo4j", "connection_pool_size", fallback=800), diff --git a/lightrag/kg/qdrant_impl.py b/lightrag/kg/qdrant_impl.py index ab3443c7..13d19806 100644 --- a/lightrag/kg/qdrant_impl.py +++ b/lightrag/kg/qdrant_impl.py @@ -61,8 +61,8 @@ class QdrantVectorDBStorage(BaseVectorStorage): client.create_collection(collection_name, **kwargs) def __post_init__(self): - config = self.global_config.get("vector_db_storage_cls_kwargs", {}) - cosine_threshold = config.get("cosine_better_than_threshold") + kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {}) + cosine_threshold = kwargs.get("cosine_better_than_threshold") if cosine_threshold is None: raise ValueError( "cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs" From 637e7efa8375ec1f25aba3e7181500fa4133ec25 Mon Sep 17 00:00:00 2001 From: ArnoChen Date: Fri, 14 Feb 2025 02:49:00 +0800 Subject: [PATCH 2/5] add the missing MongoDocStatusStorage --- config.ini.example | 1 - lightrag/lightrag.py | 7 ++++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/config.ini.example b/config.ini.example index e6ceed0a..3041611e 100644 --- a/config.ini.example +++ b/config.ini.example @@ -6,7 +6,6 @@ password = your-password [mongodb] uri = mongodb+srv://name:password@your-cluster-address database = lightrag -graph = false [redis] uri=redis://localhost:6379/1 diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index d53a252d..00c8fbbe 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -80,7 +80,12 @@ STORAGE_IMPLEMENTATIONS = { "required_methods": ["query", "upsert"], }, "DOC_STATUS_STORAGE": { - "implementations": ["JsonDocStatusStorage", "PGDocStatusStorage"], + "implementations": [ + "JsonDocStatusStorage", + "PGDocStatusStorage", + "PGDocStatusStorage", + "MongoDocStatusStorage", + ], "required_methods": ["get_pending_docs"], }, } From 0f12b400e4e3feedc20e7329f009686717d66134 Mon Sep 17 00:00:00 2001 From: ArnoChen Date: Fri, 14 Feb 2025 02:50:11 +0800 Subject: [PATCH 3/5] disabled check_storage_env_vars because the configurations of many backends can be read from the configuration file instead of environment variables. --- lightrag/lightrag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 00c8fbbe..6db6e752 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -426,7 +426,7 @@ class LightRAG: # Verify storage implementation compatibility self.verify_storage_implementation(storage_type, storage_name) # Check environment variables - self.check_storage_env_vars(storage_name) + # self.check_storage_env_vars(storage_name) # Ensure vector_db_storage_cls_kwargs has required fields default_vector_db_kwargs = { From 307184985b05cabee01e7a1929b76e8c0327fc1d Mon Sep 17 00:00:00 2001 From: ArnoChen Date: Fri, 14 Feb 2025 03:00:56 +0800 Subject: [PATCH 4/5] fix configuration errors of milvus --- lightrag/kg/milvus_impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lightrag/kg/milvus_impl.py b/lightrag/kg/milvus_impl.py index 88a8f475..f4d9d47f 100644 --- a/lightrag/kg/milvus_impl.py +++ b/lightrag/kg/milvus_impl.py @@ -32,8 +32,8 @@ class MilvusVectorDBStorage(BaseVectorStorage): ) def __post_init__(self): - config = self.global_config.get("vector_db_storage_cls_kwargs", {}) - cosine_threshold = config.get("cosine_better_than_threshold") + kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {}) + cosine_threshold = kwargs.get("cosine_better_than_threshold") if cosine_threshold is None: raise ValueError( "cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs" From cac1c993a9f020e42bb6a346f127fa0444abd2d4 Mon Sep 17 00:00:00 2001 From: ArnoChen Date: Fri, 14 Feb 2025 03:14:48 +0800 Subject: [PATCH 5/5] remove redundant cosine similarity filter in Qdrant query fix --- lightrag/kg/qdrant_impl.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/lightrag/kg/qdrant_impl.py b/lightrag/kg/qdrant_impl.py index 13d19806..3af76328 100644 --- a/lightrag/kg/qdrant_impl.py +++ b/lightrag/kg/qdrant_impl.py @@ -15,7 +15,6 @@ if not pm.is_installed("qdrant_client"): from qdrant_client import QdrantClient, models - config = configparser.ConfigParser() config.read("config.ini", "utf-8") @@ -138,12 +137,9 @@ class QdrantVectorDBStorage(BaseVectorStorage): query_vector=embedding[0], limit=top_k, with_payload=True, + score_threshold=self.cosine_better_than_threshold, ) + logger.debug(f"query result: {results}") - # 添加余弦相似度过滤 - filtered_results = [ - dp for dp in results if dp.score >= self.cosine_better_than_threshold - ] - return [ - {**dp.payload, "id": dp.id, "distance": dp.score} for dp in filtered_results - ] + + return [{**dp.payload, "id": dp.id, "distance": dp.score} for dp in results]