From 11c7af7fd86f552d00d3a1265574a3b8ced2fa33 Mon Sep 17 00:00:00 2001 From: yangdx Date: Thu, 13 Feb 2025 03:34:31 +0800 Subject: [PATCH] refactor: use vdb instance's cosine threshold instead of global constant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Remove global COSINE_THRESHOLD • Use instance-level threshold config • Update logging statements • Reference vdb threshold directly --- lightrag/operate.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index ee3c4512..f8d484af 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -35,8 +35,6 @@ from .prompt import GRAPH_FIELD_SEP, PROMPTS import time -COSINE_THRESHOLD = float(os.getenv("COSINE_THRESHOLD", "0.2")) - def chunking_by_token_size( content: str, @@ -1059,7 +1057,7 @@ async def _get_node_data( query_param: QueryParam, ): # get similar entities - logger.info(f"Query nodes: {query}, top_k: {query_param.top_k}, cosine: {COSINE_THRESHOLD}") + logger.info(f"Query nodes: {query}, top_k: {query_param.top_k}, cosine: {entities_vdb.cosine_better_than_threshold}") results = await entities_vdb.query(query, top_k=query_param.top_k) if not len(results): return "", "", "" @@ -1275,7 +1273,7 @@ async def _get_edge_data( text_chunks_db: BaseKVStorage, query_param: QueryParam, ): - logger.info(f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {COSINE_THRESHOLD}") + logger.info(f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {relationships_vdb.cosine_better_than_threshold}") results = await relationships_vdb.query(keywords, top_k=query_param.top_k) if not len(results):