refactor: make cosine similarity threshold a required config parameter

• Remove default threshold from env var
• Add validation for missing threshold
• Move default to lightrag.py config init
• Update all vector DB implementations
• Improve threshold validation consistency
This commit is contained in:
yangdx
2025-02-13 03:25:48 +08:00
parent 3308ecfa69
commit f01f57d0da
9 changed files with 59 additions and 30 deletions

View File

@@ -320,14 +320,14 @@ class OracleKVStorage(BaseKVStorage):
class OracleVectorDBStorage(BaseVectorStorage):
# db instance must be injected before use
# db: OracleDB
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
cosine_better_than_threshold: float = None
def __post_init__(self):
# Use global config value if specified, otherwise use default
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
self.cosine_better_than_threshold = config.get(
"cosine_better_than_threshold", self.cosine_better_than_threshold
)
cosine_threshold = config.get("cosine_better_than_threshold")
if cosine_threshold is None:
raise ValueError("cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs")
self.cosine_better_than_threshold = cosine_threshold
async def upsert(self, data: dict[str, dict]):
"""向向量数据库中插入数据"""