Fix cache persistence bugs

This commit is contained in:
yangdx
2025-04-16 01:24:59 +08:00
parent ca955cee5d
commit 051e632ab3
2 changed files with 89 additions and 83 deletions

View File

@@ -859,20 +859,22 @@ async def kg_query(
.strip() .strip()
) )
# Save to cache if hashing_kv.global_config.get("enable_llm_cache"):
await save_to_cache( # Save to cache
hashing_kv, await save_to_cache(
CacheData( hashing_kv,
args_hash=args_hash, CacheData(
content=response, args_hash=args_hash,
prompt=query, content=response,
quantized=quantized, prompt=query,
min_val=min_val, quantized=quantized,
max_val=max_val, min_val=min_val,
mode=query_param.mode, max_val=max_val,
cache_type="query", mode=query_param.mode,
), cache_type="query",
) ),
)
return response return response
@@ -989,19 +991,21 @@ async def extract_keywords_only(
"high_level_keywords": hl_keywords, "high_level_keywords": hl_keywords,
"low_level_keywords": ll_keywords, "low_level_keywords": ll_keywords,
} }
await save_to_cache( if hashing_kv.global_config.get("enable_llm_cache"):
hashing_kv, await save_to_cache(
CacheData( hashing_kv,
args_hash=args_hash, CacheData(
content=json.dumps(cache_data), args_hash=args_hash,
prompt=text, content=json.dumps(cache_data),
quantized=quantized, prompt=text,
min_val=min_val, quantized=quantized,
max_val=max_val, min_val=min_val,
mode=param.mode, max_val=max_val,
cache_type="keywords", mode=param.mode,
), cache_type="keywords",
) ),
)
return hl_keywords, ll_keywords return hl_keywords, ll_keywords
@@ -1205,20 +1209,21 @@ async def mix_kg_vector_query(
.strip() .strip()
) )
# 7. Save cache - Only cache after collecting complete response if hashing_kv.global_config.get("enable_llm_cache"):
await save_to_cache( # 7. Save cache - Only cache after collecting complete response
hashing_kv, await save_to_cache(
CacheData( hashing_kv,
args_hash=args_hash, CacheData(
content=response, args_hash=args_hash,
prompt=query, content=response,
quantized=quantized, prompt=query,
min_val=min_val, quantized=quantized,
max_val=max_val, min_val=min_val,
mode="mix", max_val=max_val,
cache_type="query", mode="mix",
), cache_type="query",
) ),
)
return response return response
@@ -1973,20 +1978,21 @@ async def naive_query(
.strip() .strip()
) )
# Save to cache if hashing_kv.global_config.get("enable_llm_cache"):
await save_to_cache( # Save to cache
hashing_kv, await save_to_cache(
CacheData( hashing_kv,
args_hash=args_hash, CacheData(
content=response, args_hash=args_hash,
prompt=query, content=response,
quantized=quantized, prompt=query,
min_val=min_val, quantized=quantized,
max_val=max_val, min_val=min_val,
mode=query_param.mode, max_val=max_val,
cache_type="query", mode=query_param.mode,
), cache_type="query",
) ),
)
return response return response
@@ -2121,20 +2127,21 @@ async def kg_query_with_keywords(
.strip() .strip()
) )
# 7. Save cache - 只有在收集完整响应后才缓存 if hashing_kv.global_config.get("enable_llm_cache"):
await save_to_cache( # 7. Save cache - 只有在收集完整响应后才缓存
hashing_kv, await save_to_cache(
CacheData( hashing_kv,
args_hash=args_hash, CacheData(
content=response, args_hash=args_hash,
prompt=query, content=response,
quantized=quantized, prompt=query,
min_val=min_val, quantized=quantized,
max_val=max_val, min_val=min_val,
mode=query_param.mode, max_val=max_val,
cache_type="query", mode=query_param.mode,
), cache_type="query",
) ),
)
return response return response

View File

@@ -731,9 +731,6 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
hashing_kv: The key-value storage for caching hashing_kv: The key-value storage for caching
cache_data: The cache data to save cache_data: The cache data to save
""" """
if not hashing_kv.global_config.get("enable_llm_cache"):
return
# Skip if storage is None or content is a streaming response # Skip if storage is None or content is a streaming response
if hashing_kv is None or not cache_data.content: if hashing_kv is None or not cache_data.content:
return return
@@ -776,6 +773,8 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
"original_prompt": cache_data.prompt, "original_prompt": cache_data.prompt,
} }
logger.info(f" == LLM cache == saving {cache_data.mode}: {cache_data.args_hash}")
# Only upsert if there's actual new content # Only upsert if there's actual new content
await hashing_kv.upsert({cache_data.mode: mode_cache}) await hashing_kv.upsert({cache_data.mode: mode_cache})
@@ -1314,17 +1313,17 @@ async def use_llm_func_with_cache(
res: str = await use_llm_func(input_text, **kwargs) res: str = await use_llm_func(input_text, **kwargs)
# Save to cache if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"):
logger.info(f" == LLM cache == saving {arg_hash}") await save_to_cache(
await save_to_cache( llm_response_cache,
llm_response_cache, CacheData(
CacheData( args_hash=arg_hash,
args_hash=arg_hash, content=res,
content=res, prompt=_prompt,
prompt=_prompt, cache_type=cache_type,
cache_type=cache_type, ),
), )
)
return res return res
# When cache is disabled, directly call LLM # When cache is disabled, directly call LLM