diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index 5df4f765..c42a816a 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -50,9 +50,6 @@ from .auth import auth_handler
 # This update allows the user to put a different.env file for each lightrag folder
 load_dotenv(".env", override=True)
 
-# Read entity extraction cache config
-enable_llm_cache = os.getenv("ENABLE_LLM_CACHE_FOR_EXTRACT", "false").lower() == "true"
-
 # Initialize config parser
 config = configparser.ConfigParser()
 config.read("config.ini")
@@ -326,7 +323,7 @@ def create_app(args):
             vector_db_storage_cls_kwargs={
                 "cosine_better_than_threshold": args.cosine_threshold
             },
-            enable_llm_cache_for_entity_extract=enable_llm_cache,  # Read from environment variable
+            enable_llm_cache_for_entity_extract=args.enable_llm_cache,  # Read from args
             embedding_cache_config={
                 "enabled": True,
                 "similarity_threshold": 0.95,
@@ -355,7 +352,7 @@ def create_app(args):
             vector_db_storage_cls_kwargs={
                 "cosine_better_than_threshold": args.cosine_threshold
             },
-            enable_llm_cache_for_entity_extract=enable_llm_cache,  # Read from environment variable
+            enable_llm_cache_for_entity_extract=args.enable_llm_cache,  # Read from args
             embedding_cache_config={
                 "enabled": True,
                 "similarity_threshold": 0.95,
@@ -419,6 +416,7 @@ def create_app(args):
                 "doc_status_storage": args.doc_status_storage,
                 "graph_storage": args.graph_storage,
                 "vector_storage": args.vector_storage,
+                "enable_llm_cache": args.enable_llm_cache,
             },
             "update_status": update_status,
         }
diff --git a/lightrag/api/utils_api.py b/lightrag/api/utils_api.py
index dc467449..da443558 100644
--- a/lightrag/api/utils_api.py
+++ b/lightrag/api/utils_api.py
@@ -359,6 +359,13 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
     # Inject chunk configuration
     args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
     args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int)
+    
+    # Inject LLM cache configuration
+    args.enable_llm_cache = get_env_value(
+        "ENABLE_LLM_CACHE_FOR_EXTRACT", 
+        False, 
+        bool
+    )
 
     ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name
 
@@ -451,8 +458,10 @@ def display_splash_screen(args: argparse.Namespace) -> None:
     ASCIIColors.yellow(f"{args.history_turns}")
     ASCIIColors.white("    ├─ Cosine Threshold: ", end="")
     ASCIIColors.yellow(f"{args.cosine_threshold}")
-    ASCIIColors.white("    └─ Top-K: ", end="")
+    ASCIIColors.white("    ├─ Top-K: ", end="")
     ASCIIColors.yellow(f"{args.top_k}")
+    ASCIIColors.white("    └─ LLM Cache Enabled: ", end="")
+    ASCIIColors.yellow(f"{args.enable_llm_cache}")
 
     # System Configuration
     ASCIIColors.magenta("\n💾 Storage Configuration:")