Refactor logging for vector similarity search with configurable threshold

This commit is contained in:
yangdx
2025-02-13 02:14:32 +08:00
parent 9a77d91023
commit 3308ecfa69
2 changed files with 6 additions and 3 deletions

View File

@@ -139,9 +139,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
async def query(self, query: str, top_k=5):
embedding = await self.embedding_func([query])
embedding = embedding[0]
logger.info(
f"Query: {query}, top_k: {top_k}, cosine: {self.cosine_better_than_threshold}"
)
results = self._client.query(
query=embedding,
top_k=top_k,

View File

@@ -1,5 +1,6 @@
import asyncio
import json
import os
import re
from tqdm.asyncio import tqdm as tqdm_async
from typing import Any, Union
@@ -34,6 +35,9 @@ from .prompt import GRAPH_FIELD_SEP, PROMPTS
import time
COSINE_THRESHOLD = float(os.getenv("COSINE_THRESHOLD", "0.2"))
def chunking_by_token_size(
content: str,
split_by_character: Union[str, None] = None,
@@ -1055,6 +1059,7 @@ async def _get_node_data(
query_param: QueryParam,
):
# get similar entities
logger.info(f"Query nodes: {query}, top_k: {query_param.top_k}, cosine: {COSINE_THRESHOLD}")
results = await entities_vdb.query(query, top_k=query_param.top_k)
if not len(results):
return "", "", ""
@@ -1270,6 +1275,7 @@ async def _get_edge_data(
text_chunks_db: BaseKVStorage,
query_param: QueryParam,
):
logger.info(f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {COSINE_THRESHOLD}")
results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
if not len(results):