Merge branch 'HKUDS:main' into main

2025-04-09 13:40:19 +08:00
parent 8aa3cd799a 97ce84a8b0
commit 224f63cd5f
6 changed files with 21 additions and 8 deletions
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
 <img src="./assets/logo.png" width="80" height="80" alt="lightrag">
 </td>
 <td>
-    
+
 <div>
    <p>
        <a href='https://lightrag.github.io'><img src='https://img.shields.io/badge/Project-Page-Green'></a>
--- a/env.example
+++ b/env.example
@@ -40,7 +40,6 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
 # MAX_TOKEN_ENTITY_DESC=4000

 ### Settings for document indexing
-ENABLE_LLM_CACHE_FOR_EXTRACT=true
 SUMMARY_LANGUAGE=English
 # CHUNK_SIZE=1200
 # CHUNK_OVERLAP_SIZE=100
@@ -64,6 +63,8 @@ TEMPERATURE=0.5
 MAX_ASYNC=4
 ### Max tokens send to LLM (less than context size of the model)
 MAX_TOKENS=32768
+ENABLE_LLM_CACHE=true
+ENABLE_LLM_CACHE_FOR_EXTRACT=true

 ### Ollama example (For local services installed with docker, you can use host.docker.internal as host)
 LLM_BINDING=ollama
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@@ -297,6 +297,7 @@ def parse_args() -> argparse.Namespace:
    args.enable_llm_cache_for_extract = get_env_value(
        "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
    )
+    args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool)

    # Inject LLM temperature configuration
    args.temperature = get_env_value("TEMPERATURE", 0.5, float)
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -157,7 +157,6 @@ def create_app(args):
        "openapi_url": "/openapi.json",  # Explicitly set OpenAPI schema URL
        "docs_url": "/docs",  # Explicitly set docs URL
        "redoc_url": "/redoc",  # Explicitly set redoc URL
-        "openapi_tags": [{"name": "api"}],
        "lifespan": lifespan,
    }

@@ -317,6 +316,7 @@ def create_app(args):
                "cosine_better_than_threshold": args.cosine_threshold
            },
            enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
+            enable_llm_cache=args.enable_llm_cache,
            embedding_cache_config={
                "enabled": True,
                "similarity_threshold": 0.95,
@@ -348,6 +348,7 @@ def create_app(args):
                "cosine_better_than_threshold": args.cosine_threshold
            },
            enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
+            enable_llm_cache=args.enable_llm_cache,
            embedding_cache_config={
                "enabled": True,
                "similarity_threshold": 0.95,
@@ -470,6 +471,7 @@ def create_app(args):
                    "graph_storage": args.graph_storage,
                    "vector_storage": args.vector_storage,
                    "enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
+                    "enable_llm_cache": args.enable_llm_cache,
                },
                "auth_mode": auth_mode,
                "pipeline_busy": pipeline_status.get("busy", False),
--- a/lightrag/api/utils_api.py
+++ b/lightrag/api/utils_api.py
@@ -229,8 +229,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
    ASCIIColors.yellow(f"{args.max_async}")
    ASCIIColors.white("    ├─ Max Tokens: ", end="")
    ASCIIColors.yellow(f"{args.max_tokens}")
-    ASCIIColors.white("    └─ Timeout: ", end="")
+    ASCIIColors.white("    ├─ Timeout: ", end="")
    ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
+    ASCIIColors.white("    ├─ LLM Cache Enabled: ", end="")
+    ASCIIColors.yellow(f"{args.enable_llm_cache}")
+    ASCIIColors.white("    └─ LLM Cache for Extraction Enabled: ", end="")
+    ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")

    # Embedding Configuration
    ASCIIColors.magenta("\n📊 Embedding Configuration:")
@@ -257,10 +261,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
    ASCIIColors.yellow(f"{args.chunk_overlap_size}")
    ASCIIColors.white("    ├─ Cosine Threshold: ", end="")
    ASCIIColors.yellow(f"{args.cosine_threshold}")
-    ASCIIColors.white("    ├─ Top-K: ", end="")
+    ASCIIColors.white("    └─ Top-K: ", end="")
    ASCIIColors.yellow(f"{args.top_k}")
-    ASCIIColors.white("    └─ LLM Cache for Extraction Enabled: ", end="")
-    ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")

    # System Configuration
    ASCIIColors.magenta("\n💾 Storage Configuration:")
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -1072,7 +1072,14 @@ async def mix_kg_vector_query(
        return PROMPTS["fail_response"]

    if query_param.only_need_context:
-        return {"kg_context": kg_context, "vector_context": vector_context}
+        context_str = f"""
+        -----Knowledge Graph Context-----
+        {kg_context if kg_context else "No relevant knowledge graph information found"}
+
+        -----Vector Context-----
+        {vector_context if vector_context else "No relevant text information found"}
+        """.strip()
+        return context_str

    # 5. Construct hybrid prompt
    sys_prompt = (