refactor: make cosine similarity threshold a required config parameter
• Remove default threshold from env var • Add validation for missing threshold • Move default to lightrag.py config init • Update all vector DB implementations • Improve threshold validation consistency
This commit is contained in:
@@ -299,15 +299,15 @@ class PGKVStorage(BaseKVStorage):
|
||||
class PGVectorStorage(BaseVectorStorage):
|
||||
# db instance must be injected before use
|
||||
# db: PostgreSQLDB
|
||||
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
||||
cosine_better_than_threshold: float = None
|
||||
|
||||
def __post_init__(self):
|
||||
self._max_batch_size = self.global_config["embedding_batch_num"]
|
||||
# Use global config value if specified, otherwise use default
|
||||
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
||||
self.cosine_better_than_threshold = config.get(
|
||||
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
||||
)
|
||||
cosine_threshold = config.get("cosine_better_than_threshold")
|
||||
if cosine_threshold is None:
|
||||
raise ValueError("cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs")
|
||||
self.cosine_better_than_threshold = cosine_threshold
|
||||
|
||||
def _upsert_chunks(self, item: dict):
|
||||
try:
|
||||
|
Reference in New Issue
Block a user