Fix cache persistence bugs
This commit is contained in:
@@ -859,20 +859,22 @@ async def kg_query(
|
|||||||
.strip()
|
.strip()
|
||||||
)
|
)
|
||||||
|
|
||||||
# Save to cache
|
if hashing_kv.global_config.get("enable_llm_cache"):
|
||||||
await save_to_cache(
|
# Save to cache
|
||||||
hashing_kv,
|
await save_to_cache(
|
||||||
CacheData(
|
hashing_kv,
|
||||||
args_hash=args_hash,
|
CacheData(
|
||||||
content=response,
|
args_hash=args_hash,
|
||||||
prompt=query,
|
content=response,
|
||||||
quantized=quantized,
|
prompt=query,
|
||||||
min_val=min_val,
|
quantized=quantized,
|
||||||
max_val=max_val,
|
min_val=min_val,
|
||||||
mode=query_param.mode,
|
max_val=max_val,
|
||||||
cache_type="query",
|
mode=query_param.mode,
|
||||||
),
|
cache_type="query",
|
||||||
)
|
),
|
||||||
|
)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|
||||||
@@ -989,19 +991,21 @@ async def extract_keywords_only(
|
|||||||
"high_level_keywords": hl_keywords,
|
"high_level_keywords": hl_keywords,
|
||||||
"low_level_keywords": ll_keywords,
|
"low_level_keywords": ll_keywords,
|
||||||
}
|
}
|
||||||
await save_to_cache(
|
if hashing_kv.global_config.get("enable_llm_cache"):
|
||||||
hashing_kv,
|
await save_to_cache(
|
||||||
CacheData(
|
hashing_kv,
|
||||||
args_hash=args_hash,
|
CacheData(
|
||||||
content=json.dumps(cache_data),
|
args_hash=args_hash,
|
||||||
prompt=text,
|
content=json.dumps(cache_data),
|
||||||
quantized=quantized,
|
prompt=text,
|
||||||
min_val=min_val,
|
quantized=quantized,
|
||||||
max_val=max_val,
|
min_val=min_val,
|
||||||
mode=param.mode,
|
max_val=max_val,
|
||||||
cache_type="keywords",
|
mode=param.mode,
|
||||||
),
|
cache_type="keywords",
|
||||||
)
|
),
|
||||||
|
)
|
||||||
|
|
||||||
return hl_keywords, ll_keywords
|
return hl_keywords, ll_keywords
|
||||||
|
|
||||||
|
|
||||||
@@ -1205,20 +1209,21 @@ async def mix_kg_vector_query(
|
|||||||
.strip()
|
.strip()
|
||||||
)
|
)
|
||||||
|
|
||||||
# 7. Save cache - Only cache after collecting complete response
|
if hashing_kv.global_config.get("enable_llm_cache"):
|
||||||
await save_to_cache(
|
# 7. Save cache - Only cache after collecting complete response
|
||||||
hashing_kv,
|
await save_to_cache(
|
||||||
CacheData(
|
hashing_kv,
|
||||||
args_hash=args_hash,
|
CacheData(
|
||||||
content=response,
|
args_hash=args_hash,
|
||||||
prompt=query,
|
content=response,
|
||||||
quantized=quantized,
|
prompt=query,
|
||||||
min_val=min_val,
|
quantized=quantized,
|
||||||
max_val=max_val,
|
min_val=min_val,
|
||||||
mode="mix",
|
max_val=max_val,
|
||||||
cache_type="query",
|
mode="mix",
|
||||||
),
|
cache_type="query",
|
||||||
)
|
),
|
||||||
|
)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
@@ -1973,20 +1978,21 @@ async def naive_query(
|
|||||||
.strip()
|
.strip()
|
||||||
)
|
)
|
||||||
|
|
||||||
# Save to cache
|
if hashing_kv.global_config.get("enable_llm_cache"):
|
||||||
await save_to_cache(
|
# Save to cache
|
||||||
hashing_kv,
|
await save_to_cache(
|
||||||
CacheData(
|
hashing_kv,
|
||||||
args_hash=args_hash,
|
CacheData(
|
||||||
content=response,
|
args_hash=args_hash,
|
||||||
prompt=query,
|
content=response,
|
||||||
quantized=quantized,
|
prompt=query,
|
||||||
min_val=min_val,
|
quantized=quantized,
|
||||||
max_val=max_val,
|
min_val=min_val,
|
||||||
mode=query_param.mode,
|
max_val=max_val,
|
||||||
cache_type="query",
|
mode=query_param.mode,
|
||||||
),
|
cache_type="query",
|
||||||
)
|
),
|
||||||
|
)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
@@ -2121,20 +2127,21 @@ async def kg_query_with_keywords(
|
|||||||
.strip()
|
.strip()
|
||||||
)
|
)
|
||||||
|
|
||||||
# 7. Save cache - 只有在收集完整响应后才缓存
|
if hashing_kv.global_config.get("enable_llm_cache"):
|
||||||
await save_to_cache(
|
# 7. Save cache - 只有在收集完整响应后才缓存
|
||||||
hashing_kv,
|
await save_to_cache(
|
||||||
CacheData(
|
hashing_kv,
|
||||||
args_hash=args_hash,
|
CacheData(
|
||||||
content=response,
|
args_hash=args_hash,
|
||||||
prompt=query,
|
content=response,
|
||||||
quantized=quantized,
|
prompt=query,
|
||||||
min_val=min_val,
|
quantized=quantized,
|
||||||
max_val=max_val,
|
min_val=min_val,
|
||||||
mode=query_param.mode,
|
max_val=max_val,
|
||||||
cache_type="query",
|
mode=query_param.mode,
|
||||||
),
|
cache_type="query",
|
||||||
)
|
),
|
||||||
|
)
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
@@ -731,9 +731,6 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
|
|||||||
hashing_kv: The key-value storage for caching
|
hashing_kv: The key-value storage for caching
|
||||||
cache_data: The cache data to save
|
cache_data: The cache data to save
|
||||||
"""
|
"""
|
||||||
if not hashing_kv.global_config.get("enable_llm_cache"):
|
|
||||||
return
|
|
||||||
|
|
||||||
# Skip if storage is None or content is a streaming response
|
# Skip if storage is None or content is a streaming response
|
||||||
if hashing_kv is None or not cache_data.content:
|
if hashing_kv is None or not cache_data.content:
|
||||||
return
|
return
|
||||||
@@ -776,6 +773,8 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
|
|||||||
"original_prompt": cache_data.prompt,
|
"original_prompt": cache_data.prompt,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger.info(f" == LLM cache == saving {cache_data.mode}: {cache_data.args_hash}")
|
||||||
|
|
||||||
# Only upsert if there's actual new content
|
# Only upsert if there's actual new content
|
||||||
await hashing_kv.upsert({cache_data.mode: mode_cache})
|
await hashing_kv.upsert({cache_data.mode: mode_cache})
|
||||||
|
|
||||||
@@ -1314,17 +1313,17 @@ async def use_llm_func_with_cache(
|
|||||||
|
|
||||||
res: str = await use_llm_func(input_text, **kwargs)
|
res: str = await use_llm_func(input_text, **kwargs)
|
||||||
|
|
||||||
# Save to cache
|
if llm_response_cache.global_config.get("enable_llm_cache_for_entity_extract"):
|
||||||
logger.info(f" == LLM cache == saving {arg_hash}")
|
await save_to_cache(
|
||||||
await save_to_cache(
|
llm_response_cache,
|
||||||
llm_response_cache,
|
CacheData(
|
||||||
CacheData(
|
args_hash=arg_hash,
|
||||||
args_hash=arg_hash,
|
content=res,
|
||||||
content=res,
|
prompt=_prompt,
|
||||||
prompt=_prompt,
|
cache_type=cache_type,
|
||||||
cache_type=cache_type,
|
),
|
||||||
),
|
)
|
||||||
)
|
|
||||||
return res
|
return res
|
||||||
|
|
||||||
# When cache is disabled, directly call LLM
|
# When cache is disabled, directly call LLM
|
||||||
|
Reference in New Issue
Block a user