diff --git a/README.md b/README.md index 623c70a6..d4724f88 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ lightrag - +

diff --git a/env.example b/env.example index d21bbef6..3dbca084 100644 --- a/env.example +++ b/env.example @@ -40,7 +40,6 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System" # MAX_TOKEN_ENTITY_DESC=4000 ### Settings for document indexing -ENABLE_LLM_CACHE_FOR_EXTRACT=true SUMMARY_LANGUAGE=English # CHUNK_SIZE=1200 # CHUNK_OVERLAP_SIZE=100 @@ -64,6 +63,8 @@ TEMPERATURE=0.5 MAX_ASYNC=4 ### Max tokens send to LLM (less than context size of the model) MAX_TOKENS=32768 +ENABLE_LLM_CACHE=true +ENABLE_LLM_CACHE_FOR_EXTRACT=true ### Ollama example (For local services installed with docker, you can use host.docker.internal as host) LLM_BINDING=ollama diff --git a/lightrag/api/config.py b/lightrag/api/config.py index 1bbdb1c9..268b41cb 100644 --- a/lightrag/api/config.py +++ b/lightrag/api/config.py @@ -297,6 +297,7 @@ def parse_args() -> argparse.Namespace: args.enable_llm_cache_for_extract = get_env_value( "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool ) + args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool) # Inject LLM temperature configuration args.temperature = get_env_value("TEMPERATURE", 0.5, float) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 84636bde..e7411883 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -157,7 +157,6 @@ def create_app(args): "openapi_url": "/openapi.json", # Explicitly set OpenAPI schema URL "docs_url": "/docs", # Explicitly set docs URL "redoc_url": "/redoc", # Explicitly set redoc URL - "openapi_tags": [{"name": "api"}], "lifespan": lifespan, } @@ -317,6 +316,7 @@ def create_app(args): "cosine_better_than_threshold": args.cosine_threshold }, enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract, + enable_llm_cache=args.enable_llm_cache, embedding_cache_config={ "enabled": True, "similarity_threshold": 0.95, @@ -348,6 +348,7 @@ def create_app(args): "cosine_better_than_threshold": args.cosine_threshold }, enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract, + enable_llm_cache=args.enable_llm_cache, embedding_cache_config={ "enabled": True, "similarity_threshold": 0.95, @@ -470,6 +471,7 @@ def create_app(args): "graph_storage": args.graph_storage, "vector_storage": args.vector_storage, "enable_llm_cache_for_extract": args.enable_llm_cache_for_extract, + "enable_llm_cache": args.enable_llm_cache, }, "auth_mode": auth_mode, "pipeline_busy": pipeline_status.get("busy", False), diff --git a/lightrag/api/utils_api.py b/lightrag/api/utils_api.py index ad75fd4e..e9772854 100644 --- a/lightrag/api/utils_api.py +++ b/lightrag/api/utils_api.py @@ -229,8 +229,12 @@ def display_splash_screen(args: argparse.Namespace) -> None: ASCIIColors.yellow(f"{args.max_async}") ASCIIColors.white(" ├─ Max Tokens: ", end="") ASCIIColors.yellow(f"{args.max_tokens}") - ASCIIColors.white(" └─ Timeout: ", end="") + ASCIIColors.white(" ├─ Timeout: ", end="") ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}") + ASCIIColors.white(" ├─ LLM Cache Enabled: ", end="") + ASCIIColors.yellow(f"{args.enable_llm_cache}") + ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="") + ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}") # Embedding Configuration ASCIIColors.magenta("\n📊 Embedding Configuration:") @@ -257,10 +261,8 @@ def display_splash_screen(args: argparse.Namespace) -> None: ASCIIColors.yellow(f"{args.chunk_overlap_size}") ASCIIColors.white(" ├─ Cosine Threshold: ", end="") ASCIIColors.yellow(f"{args.cosine_threshold}") - ASCIIColors.white(" ├─ Top-K: ", end="") + ASCIIColors.white(" └─ Top-K: ", end="") ASCIIColors.yellow(f"{args.top_k}") - ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="") - ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}") # System Configuration ASCIIColors.magenta("\n💾 Storage Configuration:") diff --git a/lightrag/operate.py b/lightrag/operate.py index 19aba310..97a356ad 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -1072,7 +1072,14 @@ async def mix_kg_vector_query( return PROMPTS["fail_response"] if query_param.only_need_context: - return {"kg_context": kg_context, "vector_context": vector_context} + context_str = f""" + -----Knowledge Graph Context----- + {kg_context if kg_context else "No relevant knowledge graph information found"} + + -----Vector Context----- + {vector_context if vector_context else "No relevant text information found"} + """.strip() + return context_str # 5. Construct hybrid prompt sys_prompt = (