Merge pull request #1318 from danielaskdd/main

Add ENABLE_LLM_CACHE env support and fix mix_kg_vector_query return value error when only_need_context is enabled
This commit is contained in:
Daniel.y
2025-04-09 13:03:07 +08:00
committed by GitHub
5 changed files with 20 additions and 6 deletions

View File

@@ -40,7 +40,6 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
# MAX_TOKEN_ENTITY_DESC=4000 # MAX_TOKEN_ENTITY_DESC=4000
### Settings for document indexing ### Settings for document indexing
ENABLE_LLM_CACHE_FOR_EXTRACT=true
SUMMARY_LANGUAGE=English SUMMARY_LANGUAGE=English
# CHUNK_SIZE=1200 # CHUNK_SIZE=1200
# CHUNK_OVERLAP_SIZE=100 # CHUNK_OVERLAP_SIZE=100
@@ -64,6 +63,8 @@ TEMPERATURE=0.5
MAX_ASYNC=4 MAX_ASYNC=4
### Max tokens send to LLM (less than context size of the model) ### Max tokens send to LLM (less than context size of the model)
MAX_TOKENS=32768 MAX_TOKENS=32768
ENABLE_LLM_CACHE=true
ENABLE_LLM_CACHE_FOR_EXTRACT=true
### Ollama example (For local services installed with docker, you can use host.docker.internal as host) ### Ollama example (For local services installed with docker, you can use host.docker.internal as host)
LLM_BINDING=ollama LLM_BINDING=ollama

View File

@@ -297,6 +297,7 @@ def parse_args() -> argparse.Namespace:
args.enable_llm_cache_for_extract = get_env_value( args.enable_llm_cache_for_extract = get_env_value(
"ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
) )
args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool)
# Inject LLM temperature configuration # Inject LLM temperature configuration
args.temperature = get_env_value("TEMPERATURE", 0.5, float) args.temperature = get_env_value("TEMPERATURE", 0.5, float)

View File

@@ -316,6 +316,7 @@ def create_app(args):
"cosine_better_than_threshold": args.cosine_threshold "cosine_better_than_threshold": args.cosine_threshold
}, },
enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract, enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
enable_llm_cache=args.enable_llm_cache,
embedding_cache_config={ embedding_cache_config={
"enabled": True, "enabled": True,
"similarity_threshold": 0.95, "similarity_threshold": 0.95,
@@ -347,6 +348,7 @@ def create_app(args):
"cosine_better_than_threshold": args.cosine_threshold "cosine_better_than_threshold": args.cosine_threshold
}, },
enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract, enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
enable_llm_cache=args.enable_llm_cache,
embedding_cache_config={ embedding_cache_config={
"enabled": True, "enabled": True,
"similarity_threshold": 0.95, "similarity_threshold": 0.95,
@@ -469,6 +471,7 @@ def create_app(args):
"graph_storage": args.graph_storage, "graph_storage": args.graph_storage,
"vector_storage": args.vector_storage, "vector_storage": args.vector_storage,
"enable_llm_cache_for_extract": args.enable_llm_cache_for_extract, "enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
"enable_llm_cache": args.enable_llm_cache,
}, },
"auth_mode": auth_mode, "auth_mode": auth_mode,
"pipeline_busy": pipeline_status.get("busy", False), "pipeline_busy": pipeline_status.get("busy", False),

View File

@@ -229,8 +229,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.yellow(f"{args.max_async}") ASCIIColors.yellow(f"{args.max_async}")
ASCIIColors.white(" ├─ Max Tokens: ", end="") ASCIIColors.white(" ├─ Max Tokens: ", end="")
ASCIIColors.yellow(f"{args.max_tokens}") ASCIIColors.yellow(f"{args.max_tokens}")
ASCIIColors.white(" ─ Timeout: ", end="") ASCIIColors.white(" ─ Timeout: ", end="")
ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}") ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
ASCIIColors.white(" ├─ LLM Cache Enabled: ", end="")
ASCIIColors.yellow(f"{args.enable_llm_cache}")
ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="")
ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")
# Embedding Configuration # Embedding Configuration
ASCIIColors.magenta("\n📊 Embedding Configuration:") ASCIIColors.magenta("\n📊 Embedding Configuration:")
@@ -257,10 +261,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.yellow(f"{args.chunk_overlap_size}") ASCIIColors.yellow(f"{args.chunk_overlap_size}")
ASCIIColors.white(" ├─ Cosine Threshold: ", end="") ASCIIColors.white(" ├─ Cosine Threshold: ", end="")
ASCIIColors.yellow(f"{args.cosine_threshold}") ASCIIColors.yellow(f"{args.cosine_threshold}")
ASCIIColors.white(" ─ Top-K: ", end="") ASCIIColors.white(" ─ Top-K: ", end="")
ASCIIColors.yellow(f"{args.top_k}") ASCIIColors.yellow(f"{args.top_k}")
ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="")
ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")
# System Configuration # System Configuration
ASCIIColors.magenta("\n💾 Storage Configuration:") ASCIIColors.magenta("\n💾 Storage Configuration:")

View File

@@ -1072,7 +1072,14 @@ async def mix_kg_vector_query(
return PROMPTS["fail_response"] return PROMPTS["fail_response"]
if query_param.only_need_context: if query_param.only_need_context:
return {"kg_context": kg_context, "vector_context": vector_context} context_str = f"""
-----Knowledge Graph Context-----
{kg_context if kg_context else "No relevant knowledge graph information found"}
-----Vector Context-----
{vector_context if vector_context else "No relevant text information found"}
""".strip()
return context_str
# 5. Construct hybrid prompt # 5. Construct hybrid prompt
sys_prompt = ( sys_prompt = (