diff --git a/lightrag/kg/nano_vector_db_impl.py b/lightrag/kg/nano_vector_db_impl.py index 6e8873fc..1cbd1b0b 100644 --- a/lightrag/kg/nano_vector_db_impl.py +++ b/lightrag/kg/nano_vector_db_impl.py @@ -139,9 +139,6 @@ class NanoVectorDBStorage(BaseVectorStorage): async def query(self, query: str, top_k=5): embedding = await self.embedding_func([query]) embedding = embedding[0] - logger.info( - f"Query: {query}, top_k: {top_k}, cosine: {self.cosine_better_than_threshold}" - ) results = self._client.query( query=embedding, top_k=top_k, diff --git a/lightrag/operate.py b/lightrag/operate.py index db7f59a5..ee3c4512 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1,5 +1,6 @@ import asyncio import json +import os import re from tqdm.asyncio import tqdm as tqdm_async from typing import Any, Union @@ -34,6 +35,9 @@ from .prompt import GRAPH_FIELD_SEP, PROMPTS import time +COSINE_THRESHOLD = float(os.getenv("COSINE_THRESHOLD", "0.2")) + + def chunking_by_token_size( content: str, split_by_character: Union[str, None] = None, @@ -1055,6 +1059,7 @@ async def _get_node_data( query_param: QueryParam, ): # get similar entities + logger.info(f"Query nodes: {query}, top_k: {query_param.top_k}, cosine: {COSINE_THRESHOLD}") results = await entities_vdb.query(query, top_k=query_param.top_k) if not len(results): return "", "", "" @@ -1270,6 +1275,7 @@ async def _get_edge_data( text_chunks_db: BaseKVStorage, query_param: QueryParam, ): + logger.info(f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {COSINE_THRESHOLD}") results = await relationships_vdb.query(keywords, top_k=query_param.top_k) if not len(results):