Refactor logging for vector similarity search with configurable threshold
This commit is contained in:
@@ -139,9 +139,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|||||||
async def query(self, query: str, top_k=5):
|
async def query(self, query: str, top_k=5):
|
||||||
embedding = await self.embedding_func([query])
|
embedding = await self.embedding_func([query])
|
||||||
embedding = embedding[0]
|
embedding = embedding[0]
|
||||||
logger.info(
|
|
||||||
f"Query: {query}, top_k: {top_k}, cosine: {self.cosine_better_than_threshold}"
|
|
||||||
)
|
|
||||||
results = self._client.query(
|
results = self._client.query(
|
||||||
query=embedding,
|
query=embedding,
|
||||||
top_k=top_k,
|
top_k=top_k,
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
from tqdm.asyncio import tqdm as tqdm_async
|
from tqdm.asyncio import tqdm as tqdm_async
|
||||||
from typing import Any, Union
|
from typing import Any, Union
|
||||||
@@ -34,6 +35,9 @@ from .prompt import GRAPH_FIELD_SEP, PROMPTS
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
COSINE_THRESHOLD = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
||||||
|
|
||||||
|
|
||||||
def chunking_by_token_size(
|
def chunking_by_token_size(
|
||||||
content: str,
|
content: str,
|
||||||
split_by_character: Union[str, None] = None,
|
split_by_character: Union[str, None] = None,
|
||||||
@@ -1055,6 +1059,7 @@ async def _get_node_data(
|
|||||||
query_param: QueryParam,
|
query_param: QueryParam,
|
||||||
):
|
):
|
||||||
# get similar entities
|
# get similar entities
|
||||||
|
logger.info(f"Query nodes: {query}, top_k: {query_param.top_k}, cosine: {COSINE_THRESHOLD}")
|
||||||
results = await entities_vdb.query(query, top_k=query_param.top_k)
|
results = await entities_vdb.query(query, top_k=query_param.top_k)
|
||||||
if not len(results):
|
if not len(results):
|
||||||
return "", "", ""
|
return "", "", ""
|
||||||
@@ -1270,6 +1275,7 @@ async def _get_edge_data(
|
|||||||
text_chunks_db: BaseKVStorage,
|
text_chunks_db: BaseKVStorage,
|
||||||
query_param: QueryParam,
|
query_param: QueryParam,
|
||||||
):
|
):
|
||||||
|
logger.info(f"Query edges: {keywords}, top_k: {query_param.top_k}, cosine: {COSINE_THRESHOLD}")
|
||||||
results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
|
results = await relationships_vdb.query(keywords, top_k=query_param.top_k)
|
||||||
|
|
||||||
if not len(results):
|
if not len(results):
|
||||||
|
Reference in New Issue
Block a user