specify LLM for query

This commit is contained in:
omdivyatej
2025-03-23 21:33:49 +05:30
parent f8ba98c1ff
commit 3522da1b21
4 changed files with 112 additions and 8 deletions

View File

@@ -705,7 +705,7 @@ async def kg_query(
system_prompt: str | None = None,
) -> str | AsyncIterator[str]:
# Handle cache
use_model_func = global_config["llm_model_func"]
use_model_func = query_param.model_func if query_param.model_func else global_config["llm_model_func"]
args_hash = compute_args_hash(query_param.mode, query, cache_type="query")
cached_response, quantized, min_val, max_val = await handle_cache(
hashing_kv, args_hash, query, query_param.mode, cache_type="query"
@@ -866,7 +866,7 @@ async def extract_keywords_only(
logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
# 5. Call the LLM for keyword extraction
use_model_func = global_config["llm_model_func"]
use_model_func = param.model_func if param.model_func else global_config["llm_model_func"]
result = await use_model_func(kw_prompt, keyword_extraction=True)
# 6. Parse out JSON from the LLM response
@@ -926,7 +926,7 @@ async def mix_kg_vector_query(
3. Combining both results for comprehensive answer generation
"""
# 1. Cache handling
use_model_func = global_config["llm_model_func"]
use_model_func = query_param.model_func if query_param.model_func else global_config["llm_model_func"]
args_hash = compute_args_hash("mix", query, cache_type="query")
cached_response, quantized, min_val, max_val = await handle_cache(
hashing_kv, args_hash, query, "mix", cache_type="query"
@@ -1731,7 +1731,7 @@ async def naive_query(
system_prompt: str | None = None,
) -> str | AsyncIterator[str]:
# Handle cache
use_model_func = global_config["llm_model_func"]
use_model_func = query_param.model_func if query_param.model_func else global_config["llm_model_func"]
args_hash = compute_args_hash(query_param.mode, query, cache_type="query")
cached_response, quantized, min_val, max_val = await handle_cache(
hashing_kv, args_hash, query, query_param.mode, cache_type="query"
@@ -1850,7 +1850,7 @@ async def kg_query_with_keywords(
# ---------------------------
# 1) Handle potential cache for query results
# ---------------------------
use_model_func = global_config["llm_model_func"]
use_model_func = query_param.model_func if query_param.model_func else global_config["llm_model_func"]
args_hash = compute_args_hash(query_param.mode, query, cache_type="query")
cached_response, quantized, min_val, max_val = await handle_cache(
hashing_kv, args_hash, query, query_param.mode, cache_type="query"