From 37007244c2157cccb4896e1248c83381486d0a87 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 9 Apr 2025 12:42:17 +0800
Subject: [PATCH 1/4] Add ENABLE_LLM_CACHE env support

---
 env.example                     |  3 ++-
 lightrag/api/config.py          |  3 +++
 lightrag/api/lightrag_server.py |  3 +++
 lightrag/api/utils_api.py       | 10 ++++++----
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/env.example b/env.example
index d21bbef6..3dbca084 100644
--- a/env.example
+++ b/env.example
@@ -40,7 +40,6 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
 # MAX_TOKEN_ENTITY_DESC=4000
 
 ### Settings for document indexing
-ENABLE_LLM_CACHE_FOR_EXTRACT=true
 SUMMARY_LANGUAGE=English
 # CHUNK_SIZE=1200
 # CHUNK_OVERLAP_SIZE=100
@@ -64,6 +63,8 @@ TEMPERATURE=0.5
 MAX_ASYNC=4
 ### Max tokens send to LLM (less than context size of the model)
 MAX_TOKENS=32768
+ENABLE_LLM_CACHE=true
+ENABLE_LLM_CACHE_FOR_EXTRACT=true
 
 ### Ollama example (For local services installed with docker, you can use host.docker.internal as host)
 LLM_BINDING=ollama
diff --git a/lightrag/api/config.py b/lightrag/api/config.py
index 1bbdb1c9..7b4c9bd3 100644
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@@ -297,6 +297,9 @@ def parse_args() -> argparse.Namespace:
     args.enable_llm_cache_for_extract = get_env_value(
         "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
     )
+    args.enable_llm_cache = get_env_value(
+        "ENABLE_LLM_CACHE", True, bool
+    )
 
     # Inject LLM temperature configuration
     args.temperature = get_env_value("TEMPERATURE", 0.5, float)
diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index 6f778f35..e7411883 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -316,6 +316,7 @@ def create_app(args):
                 "cosine_better_than_threshold": args.cosine_threshold
             },
             enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
+            enable_llm_cache=args.enable_llm_cache,
             embedding_cache_config={
                 "enabled": True,
                 "similarity_threshold": 0.95,
@@ -347,6 +348,7 @@ def create_app(args):
                 "cosine_better_than_threshold": args.cosine_threshold
             },
             enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
+            enable_llm_cache=args.enable_llm_cache,
             embedding_cache_config={
                 "enabled": True,
                 "similarity_threshold": 0.95,
@@ -469,6 +471,7 @@ def create_app(args):
                     "graph_storage": args.graph_storage,
                     "vector_storage": args.vector_storage,
                     "enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
+                    "enable_llm_cache": args.enable_llm_cache,
                 },
                 "auth_mode": auth_mode,
                 "pipeline_busy": pipeline_status.get("busy", False),
diff --git a/lightrag/api/utils_api.py b/lightrag/api/utils_api.py
index ad75fd4e..e9772854 100644
--- a/lightrag/api/utils_api.py
+++ b/lightrag/api/utils_api.py
@@ -229,8 +229,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
     ASCIIColors.yellow(f"{args.max_async}")
     ASCIIColors.white("    ├─ Max Tokens: ", end="")
     ASCIIColors.yellow(f"{args.max_tokens}")
-    ASCIIColors.white("    └─ Timeout: ", end="")
+    ASCIIColors.white("    ├─ Timeout: ", end="")
     ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
+    ASCIIColors.white("    ├─ LLM Cache Enabled: ", end="")
+    ASCIIColors.yellow(f"{args.enable_llm_cache}")
+    ASCIIColors.white("    └─ LLM Cache for Extraction Enabled: ", end="")
+    ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")
 
     # Embedding Configuration
     ASCIIColors.magenta("\n📊 Embedding Configuration:")
@@ -257,10 +261,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
     ASCIIColors.yellow(f"{args.chunk_overlap_size}")
     ASCIIColors.white("    ├─ Cosine Threshold: ", end="")
     ASCIIColors.yellow(f"{args.cosine_threshold}")
-    ASCIIColors.white("    ├─ Top-K: ", end="")
+    ASCIIColors.white("    └─ Top-K: ", end="")
     ASCIIColors.yellow(f"{args.top_k}")
-    ASCIIColors.white("    └─ LLM Cache for Extraction Enabled: ", end="")
-    ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")
 
     # System Configuration
     ASCIIColors.magenta("\n💾 Storage Configuration:")

From e94f7dbe1b88c7561c0bb37de332b8c208e4b5e4 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 9 Apr 2025 12:42:48 +0800
Subject: [PATCH 2/4] Fix linting

---
 lightrag/api/config.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lightrag/api/config.py b/lightrag/api/config.py
index 7b4c9bd3..268b41cb 100644
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@@ -297,9 +297,7 @@ def parse_args() -> argparse.Namespace:
     args.enable_llm_cache_for_extract = get_env_value(
         "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
     )
-    args.enable_llm_cache = get_env_value(
-        "ENABLE_LLM_CACHE", True, bool
-    )
+    args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool)
 
     # Inject LLM temperature configuration
     args.temperature = get_env_value("TEMPERATURE", 0.5, float)

From 692415b2e1c4146a894f6ccbed701eea72abbb01 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 9 Apr 2025 12:59:32 +0800
Subject: [PATCH 3/4] Fix mix_kg_vector_query function return value error when
 only_need_context is enabled

---
 lightrag/operate.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index 0e223bb6..d521686f 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -1072,7 +1072,14 @@ async def mix_kg_vector_query(
         return PROMPTS["fail_response"]
 
     if query_param.only_need_context:
-        return {"kg_context": kg_context, "vector_context": vector_context}
+        context_str = f"""
+        -----Knowledge Graph Context-----
+        {kg_context if kg_context else "No relevant knowledge graph information found"}
+        
+        -----Vector Context-----
+        {vector_context if vector_context else "No relevant text information found"}
+        """.strip()
+        return context_str
 
     # 5. Construct hybrid prompt
     sys_prompt = (

From 740b4174d25e6ffbea63b0c005ec7c791610669e Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 9 Apr 2025 12:59:58 +0800
Subject: [PATCH 4/4] Fix linting

---
 lightrag/operate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index d521686f..70ae4893 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -1075,7 +1075,7 @@ async def mix_kg_vector_query(
         context_str = f"""
         -----Knowledge Graph Context-----
         {kg_context if kg_context else "No relevant knowledge graph information found"}
-        
+
         -----Vector Context-----
         {vector_context if vector_context else "No relevant text information found"}
         """.strip()