Fix cosine threshold parameter setting error for chroma

This commit is contained in:
yangdx
2025-01-29 22:41:18 +08:00
parent 90c765c724
commit 20d6355a4a
2 changed files with 10 additions and 11 deletions

View File

@@ -1,3 +1,4 @@
import os
import asyncio import asyncio
from dataclasses import dataclass from dataclasses import dataclass
from typing import Union from typing import Union
@@ -12,16 +13,16 @@ from lightrag.utils import logger
class ChromaVectorDBStorage(BaseVectorStorage): class ChromaVectorDBStorage(BaseVectorStorage):
"""ChromaDB vector storage implementation.""" """ChromaDB vector storage implementation."""
cosine_better_than_threshold: float = 0.2 cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
def __post_init__(self): def __post_init__(self):
try: try:
# Use global config value if specified, otherwise use default # Use global config value if specified, otherwise use default
self.cosine_better_than_threshold = self.global_config.get( config = self.global_config.get("vector_db_storage_cls_kwargs", {})
self.cosine_better_than_threshold = config.get(
"cosine_better_than_threshold", self.cosine_better_than_threshold "cosine_better_than_threshold", self.cosine_better_than_threshold
) )
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
user_collection_settings = config.get("collection_settings", {}) user_collection_settings = config.get("collection_settings", {})
# Default HNSW index settings for ChromaDB # Default HNSW index settings for ChromaDB
default_collection_settings = { default_collection_settings = {

View File

@@ -76,6 +76,12 @@ class NanoVectorDBStorage(BaseVectorStorage):
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2")) cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
def __post_init__(self): def __post_init__(self):
# Use global config value if specified, otherwise use default
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
self.cosine_better_than_threshold = config.get(
"cosine_better_than_threshold", self.cosine_better_than_threshold
)
self._client_file_name = os.path.join( self._client_file_name = os.path.join(
self.global_config["working_dir"], f"vdb_{self.namespace}.json" self.global_config["working_dir"], f"vdb_{self.namespace}.json"
) )
@@ -83,14 +89,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
self._client = NanoVectorDB( self._client = NanoVectorDB(
self.embedding_func.embedding_dim, storage_file=self._client_file_name self.embedding_func.embedding_dim, storage_file=self._client_file_name
) )
# get cosine_better_than_threshold from LightRAG
vector_db_kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
self.cosine_better_than_threshold = vector_db_kwargs.get(
"cosine_better_than_threshold",
self.global_config.get(
"cosine_better_than_threshold", self.cosine_better_than_threshold
),
)
async def upsert(self, data: dict[str, dict]): async def upsert(self, data: dict[str, dict]):
logger.info(f"Inserting {len(data)} vectors to {self.namespace}") logger.info(f"Inserting {len(data)} vectors to {self.namespace}")