specify LLM for query

2025-03-23 21:33:49 +05:30
parent f8ba98c1ff
commit 3522da1b21
4 changed files with 112 additions and 8 deletions
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -705,7 +705,7 @@ async def kg_query(
    system_prompt: str | None = None,
 ) -> str | AsyncIterator[str]:
    # Handle cache
-    use_model_func = global_config["llm_model_func"]
+    use_model_func = query_param.model_func if query_param.model_func else global_config["llm_model_func"]
    args_hash = compute_args_hash(query_param.mode, query, cache_type="query")
    cached_response, quantized, min_val, max_val = await handle_cache(
        hashing_kv, args_hash, query, query_param.mode, cache_type="query"
@@ -866,7 +866,7 @@ async def extract_keywords_only(
    logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")

    # 5. Call the LLM for keyword extraction
-    use_model_func = global_config["llm_model_func"]
+    use_model_func = param.model_func if param.model_func else global_config["llm_model_func"]
    result = await use_model_func(kw_prompt, keyword_extraction=True)

    # 6. Parse out JSON from the LLM response
@@ -926,7 +926,7 @@ async def mix_kg_vector_query(
    3. Combining both results for comprehensive answer generation
    """
    # 1. Cache handling
-    use_model_func = global_config["llm_model_func"]
+    use_model_func = query_param.model_func if query_param.model_func else global_config["llm_model_func"]
    args_hash = compute_args_hash("mix", query, cache_type="query")
    cached_response, quantized, min_val, max_val = await handle_cache(
        hashing_kv, args_hash, query, "mix", cache_type="query"
@@ -1731,7 +1731,7 @@ async def naive_query(
    system_prompt: str | None = None,
 ) -> str | AsyncIterator[str]:
    # Handle cache
-    use_model_func = global_config["llm_model_func"]
+    use_model_func = query_param.model_func if query_param.model_func else global_config["llm_model_func"]
    args_hash = compute_args_hash(query_param.mode, query, cache_type="query")
    cached_response, quantized, min_val, max_val = await handle_cache(
        hashing_kv, args_hash, query, query_param.mode, cache_type="query"
@@ -1850,7 +1850,7 @@ async def kg_query_with_keywords(
    # ---------------------------
    # 1) Handle potential cache for query results
    # ---------------------------
-    use_model_func = global_config["llm_model_func"]
+    use_model_func = query_param.model_func if query_param.model_func else global_config["llm_model_func"]
    args_hash = compute_args_hash(query_param.mode, query, cache_type="query")
    cached_response, quantized, min_val, max_val = await handle_cache(
        hashing_kv, args_hash, query, query_param.mode, cache_type="query"