support pipeline mode

This commit is contained in:
jin
2025-01-16 12:58:15 +08:00
parent d5ae6669ea
commit 6ae8647285
6 changed files with 203 additions and 172 deletions

View File

@@ -41,7 +41,7 @@ logging.getLogger("httpx").setLevel(logging.WARNING)
def set_logger(log_file: str):
logger.setLevel(logging.DEBUG)
file_handler = logging.FileHandler(log_file, encoding='utf-8')
file_handler = logging.FileHandler(log_file, encoding="utf-8")
file_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter(
@@ -458,7 +458,7 @@ async def handle_cache(hashing_kv, args_hash, prompt, mode="default"):
return None, None, None, None
# For naive mode, only use simple cache matching
#if mode == "naive":
# if mode == "naive":
if mode == "default":
if exists_func(hashing_kv, "get_by_mode_and_id"):
mode_cache = await hashing_kv.get_by_mode_and_id(mode, args_hash) or {}
@@ -479,7 +479,9 @@ async def handle_cache(hashing_kv, args_hash, prompt, mode="default"):
quantized = min_val = max_val = None
if is_embedding_cache_enabled:
# Use embedding cache
embedding_model_func = hashing_kv.global_config["embedding_func"].func #["func"]
embedding_model_func = hashing_kv.global_config[
"embedding_func"
].func # ["func"]
llm_model_func = hashing_kv.global_config.get("llm_model_func")
current_embedding = await embedding_model_func([prompt])