diff --git a/lightrag/llm.py b/lightrag/llm.py index d147e416..09e9fd74 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -73,11 +73,12 @@ async def openai_complete_if_cache( messages.extend(history_messages) messages.append({"role": "user", "content": prompt}) + hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None) # Handle cache mode = kwargs.pop("mode", "default") args_hash = compute_args_hash(model, messages) cached_response, quantized, min_val, max_val = await handle_cache( - kwargs.get("hashing_kv"), args_hash, prompt, mode + hashing_kv, args_hash, prompt, mode ) if cached_response is not None: return cached_response @@ -219,12 +220,12 @@ async def bedrock_complete_if_cache( # Add user prompt messages.append({"role": "user", "content": [{"text": prompt}]}) - + hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None) # Handle cache mode = kwargs.pop("mode", "default") args_hash = compute_args_hash(model, messages) cached_response, quantized, min_val, max_val = await handle_cache( - kwargs.get("hashing_kv"), args_hash, prompt, mode + hashing_kv, args_hash, prompt, mode ) if cached_response is not None: return cached_response @@ -250,12 +251,12 @@ async def bedrock_complete_if_cache( args["inferenceConfig"][inference_params_map.get(param, param)] = ( kwargs.pop(param) ) - + hashing_kv: BaseKVStorage = kwargs.pop("hashing_kv", None) # Handle cache mode = kwargs.pop("mode", "default") args_hash = compute_args_hash(model, messages) cached_response, quantized, min_val, max_val = await handle_cache( - kwargs.get("hashing_kv"), args_hash, prompt, mode + hashing_kv, args_hash, prompt, mode ) if cached_response is not None: return cached_response