Refactor logging for vector similarity search with configurable threshold
This commit is contained in:
@@ -139,9 +139,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
||||
async def query(self, query: str, top_k=5):
|
||||
embedding = await self.embedding_func([query])
|
||||
embedding = embedding[0]
|
||||
logger.info(
|
||||
f"Query: {query}, top_k: {top_k}, cosine: {self.cosine_better_than_threshold}"
|
||||
)
|
||||
results = self._client.query(
|
||||
query=embedding,
|
||||
top_k=top_k,
|
||||
|
@@ -1,5 +1,6 @@
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
from tqdm.asyncio import tqdm as tqdm_async
|
||||
from typing import Any, Union
|
||||
@@ -34,6 +35,9 @@ from .prompt import GRAPH_FIELD_SEP, PROMPTS
|
||||
import time
|
||||
|
||||
|
||||
COSINE_THRESHOLD = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
||||
|
||||
|
||||
def chunking_by_token_size(
|
||||
content: str,
|
||||
split_by_character: Union[str, None] = None,
|
||||
@@ -1055,6 +1059,7 @@ async def _get_node_data(
|
||||
query_param: QueryParam,
|
||||
):
|
||||
# get similar entities
|
||||
logger.info(f"Query nodes: {query}, top_k: {query_param.top_k}, cosine: {COSINE_THRESHOLD}")
|
||||
results = await entities_vdb.query(query, top_k=query_param.top_k)
|
||||
if not len(results):
|
||||
return "", "", ""
|
||||
@@ -1270,6 +1275,7 @@ async def _get_edge_data(
|
||||
text_chunks_db: BaseKVStorage,
|
||||
query_param: QueryParam,
|
||||
):
|
||||
logger.info(f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {COSINE_THRESHOLD}")
|
||||
results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
|
||||
|
||||
if not len(results):
|
||||
|
Reference in New Issue
Block a user