From 37007244c2157cccb4896e1248c83381486d0a87 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 9 Apr 2025 12:42:17 +0800 Subject: [PATCH 1/4] Add ENABLE_LLM_CACHE env support --- env.example | 3 ++- lightrag/api/config.py | 3 +++ lightrag/api/lightrag_server.py | 3 +++ lightrag/api/utils_api.py | 10 ++++++---- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/env.example b/env.example index d21bbef6..3dbca084 100644 --- a/env.example +++ b/env.example @@ -40,7 +40,6 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System" # MAX_TOKEN_ENTITY_DESC=4000 ### Settings for document indexing -ENABLE_LLM_CACHE_FOR_EXTRACT=true SUMMARY_LANGUAGE=English # CHUNK_SIZE=1200 # CHUNK_OVERLAP_SIZE=100 @@ -64,6 +63,8 @@ TEMPERATURE=0.5 MAX_ASYNC=4 ### Max tokens send to LLM (less than context size of the model) MAX_TOKENS=32768 +ENABLE_LLM_CACHE=true +ENABLE_LLM_CACHE_FOR_EXTRACT=true ### Ollama example (For local services installed with docker, you can use host.docker.internal as host) LLM_BINDING=ollama diff --git a/lightrag/api/config.py b/lightrag/api/config.py index 1bbdb1c9..7b4c9bd3 100644 --- a/lightrag/api/config.py +++ b/lightrag/api/config.py @@ -297,6 +297,9 @@ def parse_args() -> argparse.Namespace: args.enable_llm_cache_for_extract = get_env_value( "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool ) + args.enable_llm_cache = get_env_value( + "ENABLE_LLM_CACHE", True, bool + ) # Inject LLM temperature configuration args.temperature = get_env_value("TEMPERATURE", 0.5, float) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 6f778f35..e7411883 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -316,6 +316,7 @@ def create_app(args): "cosine_better_than_threshold": args.cosine_threshold }, enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract, + enable_llm_cache=args.enable_llm_cache, embedding_cache_config={ "enabled": True, "similarity_threshold": 0.95, @@ -347,6 +348,7 @@ def create_app(args): "cosine_better_than_threshold": args.cosine_threshold }, enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract, + enable_llm_cache=args.enable_llm_cache, embedding_cache_config={ "enabled": True, "similarity_threshold": 0.95, @@ -469,6 +471,7 @@ def create_app(args): "graph_storage": args.graph_storage, "vector_storage": args.vector_storage, "enable_llm_cache_for_extract": args.enable_llm_cache_for_extract, + "enable_llm_cache": args.enable_llm_cache, }, "auth_mode": auth_mode, "pipeline_busy": pipeline_status.get("busy", False), diff --git a/lightrag/api/utils_api.py b/lightrag/api/utils_api.py index ad75fd4e..e9772854 100644 --- a/lightrag/api/utils_api.py +++ b/lightrag/api/utils_api.py @@ -229,8 +229,12 @@ def display_splash_screen(args: argparse.Namespace) -> None: ASCIIColors.yellow(f"{args.max_async}") ASCIIColors.white(" ├─ Max Tokens: ", end="") ASCIIColors.yellow(f"{args.max_tokens}") - ASCIIColors.white(" └─ Timeout: ", end="") + ASCIIColors.white(" ├─ Timeout: ", end="") ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}") + ASCIIColors.white(" ├─ LLM Cache Enabled: ", end="") + ASCIIColors.yellow(f"{args.enable_llm_cache}") + ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="") + ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}") # Embedding Configuration ASCIIColors.magenta("\n📊 Embedding Configuration:") @@ -257,10 +261,8 @@ def display_splash_screen(args: argparse.Namespace) -> None: ASCIIColors.yellow(f"{args.chunk_overlap_size}") ASCIIColors.white(" ├─ Cosine Threshold: ", end="") ASCIIColors.yellow(f"{args.cosine_threshold}") - ASCIIColors.white(" ├─ Top-K: ", end="") + ASCIIColors.white(" └─ Top-K: ", end="") ASCIIColors.yellow(f"{args.top_k}") - ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="") - ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}") # System Configuration ASCIIColors.magenta("\n💾 Storage Configuration:") From e94f7dbe1b88c7561c0bb37de332b8c208e4b5e4 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 9 Apr 2025 12:42:48 +0800 Subject: [PATCH 2/4] Fix linting --- lightrag/api/config.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lightrag/api/config.py b/lightrag/api/config.py index 7b4c9bd3..268b41cb 100644 --- a/lightrag/api/config.py +++ b/lightrag/api/config.py @@ -297,9 +297,7 @@ def parse_args() -> argparse.Namespace: args.enable_llm_cache_for_extract = get_env_value( "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool ) - args.enable_llm_cache = get_env_value( - "ENABLE_LLM_CACHE", True, bool - ) + args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool) # Inject LLM temperature configuration args.temperature = get_env_value("TEMPERATURE", 0.5, float) From 692415b2e1c4146a894f6ccbed701eea72abbb01 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 9 Apr 2025 12:59:32 +0800 Subject: [PATCH 3/4] Fix mix_kg_vector_query function return value error when only_need_context is enabled --- lightrag/operate.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 0e223bb6..d521686f 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1072,7 +1072,14 @@ async def mix_kg_vector_query( return PROMPTS["fail_response"] if query_param.only_need_context: - return {"kg_context": kg_context, "vector_context": vector_context} + context_str = f""" + -----Knowledge Graph Context----- + {kg_context if kg_context else "No relevant knowledge graph information found"} + + -----Vector Context----- + {vector_context if vector_context else "No relevant text information found"} + """.strip() + return context_str # 5. Construct hybrid prompt sys_prompt = ( From 740b4174d25e6ffbea63b0c005ec7c791610669e Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 9 Apr 2025 12:59:58 +0800 Subject: [PATCH 4/4] Fix linting --- lightrag/operate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index d521686f..70ae4893 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1075,7 +1075,7 @@ async def mix_kg_vector_query( context_str = f""" -----Knowledge Graph Context----- {kg_context if kg_context else "No relevant knowledge graph information found"} - + -----Vector Context----- {vector_context if vector_context else "No relevant text information found"} """.strip()