diff --git a/lightrag/operate.py b/lightrag/operate.py index 80dd0f33..8179d7af 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -859,20 +859,22 @@ async def kg_query( .strip() ) - # Save to cache - await save_to_cache( - hashing_kv, - CacheData( - args_hash=args_hash, - content=response, - prompt=query, - quantized=quantized, - min_val=min_val, - max_val=max_val, - mode=query_param.mode, - cache_type="query", - ), - ) + if hashing_kv.global_config.get("enable_llm_cache"): + # Save to cache + await save_to_cache( + hashing_kv, + CacheData( + args_hash=args_hash, + content=response, + prompt=query, + quantized=quantized, + min_val=min_val, + max_val=max_val, + mode=query_param.mode, + cache_type="query", + ), + ) + return response @@ -989,19 +991,21 @@ async def extract_keywords_only( "high_level_keywords": hl_keywords, "low_level_keywords": ll_keywords, } - await save_to_cache( - hashing_kv, - CacheData( - args_hash=args_hash, - content=json.dumps(cache_data), - prompt=text, - quantized=quantized, - min_val=min_val, - max_val=max_val, - mode=param.mode, - cache_type="keywords", - ), - ) + if hashing_kv.global_config.get("enable_llm_cache"): + await save_to_cache( + hashing_kv, + CacheData( + args_hash=args_hash, + content=json.dumps(cache_data), + prompt=text, + quantized=quantized, + min_val=min_val, + max_val=max_val, + mode=param.mode, + cache_type="keywords", + ), + ) + return hl_keywords, ll_keywords @@ -1205,20 +1209,21 @@ async def mix_kg_vector_query( .strip() ) - # 7. Save cache - Only cache after collecting complete response - await save_to_cache( - hashing_kv, - CacheData( - args_hash=args_hash, - content=response, - prompt=query, - quantized=quantized, - min_val=min_val, - max_val=max_val, - mode="mix", - cache_type="query", - ), - ) + if hashing_kv.global_config.get("enable_llm_cache"): + # 7. Save cache - Only cache after collecting complete response + await save_to_cache( + hashing_kv, + CacheData( + args_hash=args_hash, + content=response, + prompt=query, + quantized=quantized, + min_val=min_val, + max_val=max_val, + mode="mix", + cache_type="query", + ), + ) return response @@ -1945,20 +1950,21 @@ async def naive_query( .strip() ) - # Save to cache - await save_to_cache( - hashing_kv, - CacheData( - args_hash=args_hash, - content=response, - prompt=query, - quantized=quantized, - min_val=min_val, - max_val=max_val, - mode=query_param.mode, - cache_type="query", - ), - ) + if hashing_kv.global_config.get("enable_llm_cache"): + # Save to cache + await save_to_cache( + hashing_kv, + CacheData( + args_hash=args_hash, + content=response, + prompt=query, + quantized=quantized, + min_val=min_val, + max_val=max_val, + mode=query_param.mode, + cache_type="query", + ), + ) return response @@ -2093,20 +2099,21 @@ async def kg_query_with_keywords( .strip() ) - # 7. Save cache - 只有在收集完整响应后才缓存 - await save_to_cache( - hashing_kv, - CacheData( - args_hash=args_hash, - content=response, - prompt=query, - quantized=quantized, - min_val=min_val, - max_val=max_val, - mode=query_param.mode, - cache_type="query", - ), - ) + if hashing_kv.global_config.get("enable_llm_cache"): + # 7. Save cache - 只有在收集完整响应后才缓存 + await save_to_cache( + hashing_kv, + CacheData( + args_hash=args_hash, + content=response, + prompt=query, + quantized=quantized, + min_val=min_val, + max_val=max_val, + mode=query_param.mode, + cache_type="query", + ), + ) return response diff --git a/lightrag/utils.py b/lightrag/utils.py index 091f8ff2..37400069 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -731,9 +731,6 @@ async def save_to_cache(hashing_kv, cache_data: CacheData): hashing_kv: The key-value storage for caching cache_data: The cache data to save """ - if not hashing_kv.global_config.get("enable_llm_cache"): - return - # Skip if storage is None or content is a streaming response if hashing_kv is None or not cache_data.content: return @@ -776,6 +773,8 @@ async def save_to_cache(hashing_kv, cache_data: CacheData): "original_prompt": cache_data.prompt, } + logger.info(f" == LLM cache == saving {cache_data.mode}: {cache_data.args_hash}") + # Only upsert if there's actual new content await hashing_kv.upsert({cache_data.mode: mode_cache}) @@ -1314,17 +1313,17 @@ async def use_llm_func_with_cache( res: str = await use_llm_func(input_text, **kwargs) - # Save to cache - logger.info(f" == LLM cache == saving {arg_hash}") - await save_to_cache( - llm_response_cache, - CacheData( - args_hash=arg_hash, - content=res, - prompt=_prompt, - cache_type=cache_type, - ), - ) + if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"): + await save_to_cache( + llm_response_cache, + CacheData( + args_hash=arg_hash, + content=res, + prompt=_prompt, + cache_type=cache_type, + ), + ) + return res # When cache is disabled, directly call LLM