From c423f37f624d4de067823aae6fe52dc459a168f4 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 29 Jan 2025 21:00:42 +0800
Subject: [PATCH 01/11] Lower log level for keyword outputs

---
 lightrag/operate.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index 10c76bcc..66e4533c 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -590,8 +590,8 @@ async def kg_query(
         query, query_param, global_config, hashing_kv
     )
 
-    logger.info(f"High-level keywords: {hl_keywords}")
-    logger.info(f"Low-level  keywords: {ll_keywords}")
+    logger.debug(f"High-level keywords: {hl_keywords}")
+    logger.debug(f"Low-level  keywords: {ll_keywords}")
 
     # Handle empty keywords
     if hl_keywords == [] and ll_keywords == []:

From 4302c65a04935eeaedaa2f842d0b3458975100d4 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 29 Jan 2025 21:01:35 +0800
Subject: [PATCH 02/11] Avoid unnecessary LLM response generation

---
 lightrag/operate.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index 66e4533c..cf096993 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -1025,6 +1025,10 @@ async def _build_query_context(
             [hl_relations_context, ll_relations_context],
             [hl_text_units_context, ll_text_units_context],
         )
+    # not necessary to use LLM to generate a response
+    if not entities_context.strip() and not relations_context.strip():
+        return None
+        
     return f"""
 -----Entities-----
 ```csv

From d0052456d45ca6ae6d87a55a6b730c491e9f65ab Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 29 Jan 2025 21:09:11 +0800
Subject: [PATCH 03/11] Fix cosine threshold parameter setting error

---
 lightrag/kg/nano_vector_db_impl.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/lightrag/kg/nano_vector_db_impl.py b/lightrag/kg/nano_vector_db_impl.py
index aa23e811..328a1242 100644
--- a/lightrag/kg/nano_vector_db_impl.py
+++ b/lightrag/kg/nano_vector_db_impl.py
@@ -83,8 +83,11 @@ class NanoVectorDBStorage(BaseVectorStorage):
         self._client = NanoVectorDB(
             self.embedding_func.embedding_dim, storage_file=self._client_file_name
         )
-        self.cosine_better_than_threshold = self.global_config.get(
-            "cosine_better_than_threshold", self.cosine_better_than_threshold
+        # get cosine_better_than_threshold from LightRAG
+        vector_db_kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
+        self.cosine_better_than_threshold = vector_db_kwargs.get(
+            "cosine_better_than_threshold",
+            self.global_config.get("cosine_better_than_threshold", self.cosine_better_than_threshold)
         )
 
     async def upsert(self, data: dict[str, dict]):

From 7aedc08cafb010821a0c5993234214db58f3cef8 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 29 Jan 2025 21:34:34 +0800
Subject: [PATCH 04/11] Add RAG configuration options and enhance parameter
 configurability

- Add top-k and cosine-threshold parms for api server
- Update .env and cli parms handling with new parameters
- Improve splash screen display
- Update bash and storage classes to read new parameters from .env file.
---
 .env.example                       | 20 +++++++++++---------
 lightrag/api/lightrag_server.py    | 29 ++++++++++++++++++++++++++++-
 lightrag/base.py                   |  3 ++-
 lightrag/kg/nano_vector_db_impl.py |  2 +-
 4 files changed, 42 insertions(+), 12 deletions(-)

diff --git a/.env.example b/.env.example
index 82b9ca70..0c61d2e0 100644
--- a/.env.example
+++ b/.env.example
@@ -6,6 +6,17 @@ PORT=9621
 WORKING_DIR=/app/data/rag_storage
 INPUT_DIR=/app/data/inputs
 
+# RAG Configuration
+MAX_ASYNC=4
+MAX_TOKENS=32768
+EMBEDDING_DIM=1024
+MAX_EMBED_TOKENS=8192
+#HISTORY_TURNS=3
+#CHUNK_SIZE=1200
+#CHUNK_OVERLAP_SIZE=100
+#COSINE_THRESHOLD=0.2
+#TOP_K=50
+
 # LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
 # Ollama example
 LLM_BINDING=ollama
@@ -38,15 +49,6 @@ EMBEDDING_MODEL=bge-m3:latest
 # EMBEDDING_BINDING_HOST=http://host.docker.internal:9600
 # EMBEDDING_MODEL=bge-m3:latest
 
-# RAG Configuration
-MAX_ASYNC=4
-MAX_TOKENS=32768
-EMBEDDING_DIM=1024
-MAX_EMBED_TOKENS=8192
-#HISTORY_TURNS=3
-#CHUNK_SIZE=1200
-#CHUNK_OVERLAP_SIZE=100
-
 # Security (empty for no key)
 LIGHTRAG_API_KEY=your-secure-api-key-here
 
diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index 35e4acf7..2ab30d2b 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -207,8 +207,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
     ASCIIColors.yellow(f"{args.chunk_size}")
     ASCIIColors.white("    ├─ Chunk Overlap Size: ", end="")
     ASCIIColors.yellow(f"{args.chunk_overlap_size}")
-    ASCIIColors.white("    └─ History Turns: ", end="")
+    ASCIIColors.white("    ├─ History Turns: ", end="")
     ASCIIColors.yellow(f"{args.history_turns}")
+    ASCIIColors.white("    ├─ Cosine Threshold: ", end="")
+    ASCIIColors.yellow(f"{args.cosine_threshold}")
+    ASCIIColors.white("    └─ Top-K: ", end="")
+    ASCIIColors.yellow(f"{args.top_k}")
 
     # System Configuration
     ASCIIColors.magenta("\n🛠️ System Configuration:")
@@ -484,6 +488,20 @@ def parse_args() -> argparse.Namespace:
         help="Number of conversation history turns to include (default: from env or 3)",
     )
 
+    # Search parameters
+    parser.add_argument(
+        "--top-k",
+        type=int,
+        default=get_env_value("TOP_K", 50, int),
+        help="Number of most similar results to return (default: from env or 50)",
+    )
+    parser.add_argument(
+        "--cosine-threshold",
+        type=float,
+        default=get_env_value("COSINE_THRESHOLD", 0.4, float),
+        help="Cosine similarity threshold (default: from env or 0.4)",
+    )
+
     args = parser.parse_args()
 
     return args
@@ -846,6 +864,9 @@ def create_app(args):
             graph_storage=GRAPH_STORAGE,
             vector_storage=VECTOR_STORAGE,
             doc_status_storage=DOC_STATUS_STORAGE,
+            vector_db_storage_cls_kwargs={
+                "cosine_better_than_threshold": args.cosine_threshold
+            },
         )
     else:
         rag = LightRAG(
@@ -863,6 +884,9 @@ def create_app(args):
             graph_storage=GRAPH_STORAGE,
             vector_storage=VECTOR_STORAGE,
             doc_status_storage=DOC_STATUS_STORAGE,
+            vector_db_storage_cls_kwargs={
+                "cosine_better_than_threshold": args.cosine_threshold
+            },
         )
 
     async def index_file(file_path: Union[str, Path]) -> None:
@@ -1052,6 +1076,7 @@ def create_app(args):
                     mode=request.mode,
                     stream=request.stream,
                     only_need_context=request.only_need_context,
+                    top_k=args.top_k,
                 ),
             )
 
@@ -1093,6 +1118,7 @@ def create_app(args):
                     mode=request.mode,
                     stream=True,
                     only_need_context=request.only_need_context,
+                    top_k=args.top_k,
                 ),
             )
 
@@ -1632,6 +1658,7 @@ def create_app(args):
                 "stream": request.stream,
                 "only_need_context": False,
                 "conversation_history": conversation_history,
+                "top_k": args.top_k,
             }
 
             if args.history_turns is not None:
diff --git a/lightrag/base.py b/lightrag/base.py
index 36e70893..e71cac3f 100644
--- a/lightrag/base.py
+++ b/lightrag/base.py
@@ -1,3 +1,4 @@
+import os
 from dataclasses import dataclass, field
 from typing import (
     TypedDict,
@@ -32,7 +33,7 @@ class QueryParam:
     response_type: str = "Multiple Paragraphs"
     stream: bool = False
     # Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.
-    top_k: int = 60
+    top_k: int = int(os.getenv("TOP_K", "60"))
     # Number of document chunks to retrieve.
     # top_n: int = 10
     # Number of tokens for the original chunks.
diff --git a/lightrag/kg/nano_vector_db_impl.py b/lightrag/kg/nano_vector_db_impl.py
index 328a1242..b6650797 100644
--- a/lightrag/kg/nano_vector_db_impl.py
+++ b/lightrag/kg/nano_vector_db_impl.py
@@ -73,7 +73,7 @@ from lightrag.base import (
 
 @dataclass
 class NanoVectorDBStorage(BaseVectorStorage):
-    cosine_better_than_threshold: float = 0.2
+    cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
 
     def __post_init__(self):
         self._client_file_name = os.path.join(

From 7ff8c7b9d8516aa0b385944a4585a65385d9e2c7 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 29 Jan 2025 21:35:46 +0800
Subject: [PATCH 05/11] Add timeout parameter to OpenAI alike LLM model
 configuration

---
 lightrag/api/lightrag_server.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index 2ab30d2b..65b1a39e 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -876,6 +876,9 @@ def create_app(args):
             else openai_alike_model_complete,
             chunk_token_size=int(args.chunk_size),
             chunk_overlap_token_size=int(args.chunk_overlap_size),
+            llm_model_kwargs={
+                "timeout": args.timeout,
+            },
             llm_model_name=args.llm_model,
             llm_model_max_async=args.max_async,
             llm_model_max_token_size=args.max_tokens,

From c8b890547ab9a0377d94efdf25de7f3862a098ad Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 29 Jan 2025 21:36:31 +0800
Subject: [PATCH 06/11] Add logging for query parameters in
 NanoVectorDBStorage.query

---
 lightrag/kg/nano_vector_db_impl.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lightrag/kg/nano_vector_db_impl.py b/lightrag/kg/nano_vector_db_impl.py
index b6650797..54c1c824 100644
--- a/lightrag/kg/nano_vector_db_impl.py
+++ b/lightrag/kg/nano_vector_db_impl.py
@@ -137,6 +137,7 @@ class NanoVectorDBStorage(BaseVectorStorage):
     async def query(self, query: str, top_k=5):
         embedding = await self.embedding_func([query])
         embedding = embedding[0]
+        logger.info(f"Query: {query}, top_k: {top_k}, cosine_better_than_threshold: {self.cosine_better_than_threshold}")
         results = self._client.query(
             query=embedding,
             top_k=top_k,

From 90c765c724c1ed4e68e3efd6c7e1b9eb2973e3b4 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 29 Jan 2025 22:14:18 +0800
Subject: [PATCH 07/11] Fix linting

---
 lightrag/kg/nano_vector_db_impl.py | 8 ++++++--
 lightrag/operate.py                | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/lightrag/kg/nano_vector_db_impl.py b/lightrag/kg/nano_vector_db_impl.py
index 54c1c824..e42036b5 100644
--- a/lightrag/kg/nano_vector_db_impl.py
+++ b/lightrag/kg/nano_vector_db_impl.py
@@ -87,7 +87,9 @@ class NanoVectorDBStorage(BaseVectorStorage):
         vector_db_kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
         self.cosine_better_than_threshold = vector_db_kwargs.get(
             "cosine_better_than_threshold",
-            self.global_config.get("cosine_better_than_threshold", self.cosine_better_than_threshold)
+            self.global_config.get(
+                "cosine_better_than_threshold", self.cosine_better_than_threshold
+            ),
         )
 
     async def upsert(self, data: dict[str, dict]):
@@ -137,7 +139,9 @@ class NanoVectorDBStorage(BaseVectorStorage):
     async def query(self, query: str, top_k=5):
         embedding = await self.embedding_func([query])
         embedding = embedding[0]
-        logger.info(f"Query: {query}, top_k: {top_k}, cosine_better_than_threshold: {self.cosine_better_than_threshold}")
+        logger.info(
+            f"Query: {query}, top_k: {top_k}, cosine_better_than_threshold: {self.cosine_better_than_threshold}"
+        )
         results = self._client.query(
             query=embedding,
             top_k=top_k,
diff --git a/lightrag/operate.py b/lightrag/operate.py
index cf096993..aeb64a7e 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -1028,7 +1028,7 @@ async def _build_query_context(
     # not necessary to use LLM to generate a response
     if not entities_context.strip() and not relations_context.strip():
         return None
-        
+
     return f"""
 -----Entities-----
 ```csv

From 20d6355a4a1f9f62c8b4dfe16c579e04fb4307a5 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 29 Jan 2025 22:41:18 +0800
Subject: [PATCH 08/11] Fix cosine threshold parameter setting error for chroma

---
 lightrag/kg/chroma_impl.py         |  7 ++++---
 lightrag/kg/nano_vector_db_impl.py | 14 ++++++--------
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/lightrag/kg/chroma_impl.py b/lightrag/kg/chroma_impl.py
index 200e780c..72a2627a 100644
--- a/lightrag/kg/chroma_impl.py
+++ b/lightrag/kg/chroma_impl.py
@@ -1,3 +1,4 @@
+import os
 import asyncio
 from dataclasses import dataclass
 from typing import Union
@@ -12,16 +13,16 @@ from lightrag.utils import logger
 class ChromaVectorDBStorage(BaseVectorStorage):
     """ChromaDB vector storage implementation."""
 
-    cosine_better_than_threshold: float = 0.2
+    cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
 
     def __post_init__(self):
         try:
             # Use global config value if specified, otherwise use default
-            self.cosine_better_than_threshold = self.global_config.get(
+            config = self.global_config.get("vector_db_storage_cls_kwargs", {})
+            self.cosine_better_than_threshold = config.get(
                 "cosine_better_than_threshold", self.cosine_better_than_threshold
             )
 
-            config = self.global_config.get("vector_db_storage_cls_kwargs", {})
             user_collection_settings = config.get("collection_settings", {})
             # Default HNSW index settings for ChromaDB
             default_collection_settings = {
diff --git a/lightrag/kg/nano_vector_db_impl.py b/lightrag/kg/nano_vector_db_impl.py
index e42036b5..ed272fee 100644
--- a/lightrag/kg/nano_vector_db_impl.py
+++ b/lightrag/kg/nano_vector_db_impl.py
@@ -76,6 +76,12 @@ class NanoVectorDBStorage(BaseVectorStorage):
     cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
 
     def __post_init__(self):
+        # Use global config value if specified, otherwise use default
+        config = self.global_config.get("vector_db_storage_cls_kwargs", {})
+        self.cosine_better_than_threshold = config.get(
+            "cosine_better_than_threshold", self.cosine_better_than_threshold
+        )
+
         self._client_file_name = os.path.join(
             self.global_config["working_dir"], f"vdb_{self.namespace}.json"
         )
@@ -83,14 +89,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
         self._client = NanoVectorDB(
             self.embedding_func.embedding_dim, storage_file=self._client_file_name
         )
-        # get cosine_better_than_threshold from LightRAG
-        vector_db_kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
-        self.cosine_better_than_threshold = vector_db_kwargs.get(
-            "cosine_better_than_threshold",
-            self.global_config.get(
-                "cosine_better_than_threshold", self.cosine_better_than_threshold
-            ),
-        )
 
     async def upsert(self, data: dict[str, dict]):
         logger.info(f"Inserting {len(data)} vectors to {self.namespace}")

From e29682eef86580589a2697eb9648846ee68454e6 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 29 Jan 2025 23:39:47 +0800
Subject: [PATCH 09/11] Allow configuration of LLM parameters through
 environment variables

---
 lightrag/lightrag.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index acad9295..92fc954f 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -158,8 +158,8 @@ class LightRAG:
     # LLM
     llm_model_func: callable = None  # This must be set (we do want to separate llm from the corte, so no more default initialization)
     llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct"  # 'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
-    llm_model_max_token_size: int = 32768
-    llm_model_max_async: int = 16
+    llm_model_max_token_size: int = int(os.getenv("MAX_TOKENS", "32768"))
+    llm_model_max_async: int = int(os.getenv("MAX_ASYNC", "16"))
     llm_model_kwargs: dict = field(default_factory=dict)
 
     # storage

From 46c9c7d95bcde367162bf7daab5673e738c85812 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 29 Jan 2025 23:45:20 +0800
Subject: [PATCH 10/11] Update sample env file and documentation

- Change COSINE_THRESHOLD to 0.4
- Adjust TOP_K to 50
- Enhance API README details
---
 .env.example           |  4 ++--
 README.md              |  9 ++++++---
 lightrag/api/README.md | 26 ++++++++++++++++----------
 3 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/.env.example b/.env.example
index 0c61d2e0..6f868212 100644
--- a/.env.example
+++ b/.env.example
@@ -14,8 +14,8 @@ MAX_EMBED_TOKENS=8192
 #HISTORY_TURNS=3
 #CHUNK_SIZE=1200
 #CHUNK_OVERLAP_SIZE=100
-#COSINE_THRESHOLD=0.2
-#TOP_K=50
+#COSINE_THRESHOLD=0.4   # 0.2 while not running API server
+#TOP_K=50               # 60  while not running API server
 
 # LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
 # Ollama example
diff --git a/README.md b/README.md
index 6e8d6507..ad405e90 100644
--- a/README.md
+++ b/README.md
@@ -360,6 +360,8 @@ class QueryParam:
     max_token_for_local_context: int = 4000
 ```
 
+> default value of Top_k can be change by environment  variables  TOP_K.
+
 ### Batch Insert
 
 ```python
@@ -730,10 +732,10 @@ if __name__ == "__main__":
 | **embedding\_func\_max\_async**              | `int` | Maximum number of concurrent asynchronous embedding processes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | `16`                                                                                                        |
 | **llm\_model\_func**                         | `callable` | Function for LLM generation                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         | `gpt_4o_mini_complete`                                                                                      |
 | **llm\_model\_name**                         | `str` | LLM model name for generation                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       | `meta-llama/Llama-3.2-1B-Instruct`                                                                          |
-| **llm\_model\_max\_token\_size**             | `int` | Maximum token size for LLM generation (affects entity relation summaries)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | `32768`                                                                                                     |
-| **llm\_model\_max\_async**                   | `int` | Maximum number of concurrent asynchronous LLM processes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | `16`                                                                                                        |
+| **llm\_model\_max\_token\_size**             | `int` | Maximum token size for LLM generation (affects entity relation summaries)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           | `32768`（default value changed by  env var MAX_TOKENS)                                    |
+| **llm\_model\_max\_async**                   | `int` | Maximum number of concurrent asynchronous LLM processes                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             | `16`（default value changed by  env var MAX_ASYNC)                                           |
 | **llm\_model\_kwargs**                       | `dict` | Additional parameters for LLM generation                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            |                                                                                                             |
-| **vector\_db\_storage\_cls\_kwargs**         | `dict` | Additional parameters for vector database (currently not used)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |                                                                                                             |
+| **vector\_db\_storage\_cls\_kwargs**         | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval.                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              | cosine_better_than_threshold: 0.2（default value changed by  env var COSINE_THRESHOLD) |
 | **enable\_llm\_cache**                       | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    | `TRUE`                                                                                                      |
 | **enable\_llm\_cache\_for\_entity\_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  | `TRUE`                                                                                                     |
 | **addon\_params**                            | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"], "insert_batch_size": 10}`: sets example limit, output language, and batch size for document processing                                                                                                                                                                                                                                                                                                                                                                                                                            | `example_number: all examples, language: English, insert_batch_size: 10`                                    |
@@ -741,6 +743,7 @@ if __name__ == "__main__":
 | **embedding\_cache\_config**                 | `dict` | Configuration for question-answer caching. Contains three parameters:<br>- `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers.<br>- `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM.<br>- `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}`                         |
 
 ### Error Handling
+
 <details>
 <summary>Click to view error handling details</summary>
 
diff --git a/lightrag/api/README.md b/lightrag/api/README.md
index 4e818242..288ff79c 100644
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@@ -98,6 +98,8 @@ After starting the lightrag-server, you can add an Ollama-type connection in the
 
 LightRAG can be configured using either command-line arguments or environment variables. When both are provided, command-line arguments take precedence over environment variables.
 
+For better performance, the API server's default values for TOP_K and COSINE_THRESHOLD are set to 50 and 0.4 respectively. If COSINE_THRESHOLD remains at its default value of 0.2 in LightRAG, many irrelevant entities and relations would be retrieved and sent to the LLM.
+
 ### Environment Variables
 
 You can configure LightRAG using environment variables by creating a `.env` file in your project root directory. Here's a complete example of available environment variables:
@@ -111,6 +113,17 @@ PORT=9621
 WORKING_DIR=/app/data/rag_storage
 INPUT_DIR=/app/data/inputs
 
+# RAG Configuration
+MAX_ASYNC=4
+MAX_TOKENS=32768
+EMBEDDING_DIM=1024
+MAX_EMBED_TOKENS=8192
+#HISTORY_TURNS=3
+#CHUNK_SIZE=1200
+#CHUNK_OVERLAP_SIZE=100
+#COSINE_THRESHOLD=0.4
+#TOP_K=50
+
 # LLM Configuration
 LLM_BINDING=ollama
 LLM_BINDING_HOST=http://localhost:11434
@@ -124,14 +137,8 @@ EMBEDDING_BINDING=ollama
 EMBEDDING_BINDING_HOST=http://localhost:11434
 EMBEDDING_MODEL=bge-m3:latest
 
-# RAG Configuration
-MAX_ASYNC=4
-MAX_TOKENS=32768
-EMBEDDING_DIM=1024
-MAX_EMBED_TOKENS=8192
-
 # Security
-LIGHTRAG_API_KEY=
+#LIGHTRAG_API_KEY=you-api-key-for-accessing-LightRAG
 
 # Logging
 LOG_LEVEL=INFO
@@ -186,10 +193,9 @@ PORT=7000 python lightrag.py
 | --ssl | False | Enable HTTPS |
 | --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) |
 | --ssl-keyfile | None | Path to SSL private key file (required if --ssl is enabled) |
+| --top-k | 50 | Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode. |
+| --cosine-threshold | 0.4 | The cossine threshold for nodes and relations retrieval, works with top-k to control the retrieval of nodes and relations. |
 
-
-
-For protecting the server using an authentication key, you can also use an environment variable named `LIGHTRAG_API_KEY`.
 ### Example Usage
 
 #### Running a Lightrag server with ollama default local server as llm and embedding backends

From 06647438b21a2ab04a42a1048d1fe43fd21cbbeb Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 29 Jan 2025 23:47:57 +0800
Subject: [PATCH 11/11] Refactor threshold handling to use environment
 variables and global config settings for oracle, postgres and tidb

---
 lightrag/kg/oracle_impl.py   | 9 +++++++--
 lightrag/kg/postgres_impl.py | 6 ++++--
 lightrag/kg/tidb_impl.py     | 6 ++++--
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/lightrag/kg/oracle_impl.py b/lightrag/kg/oracle_impl.py
index 2d1f631c..368e8618 100644
--- a/lightrag/kg/oracle_impl.py
+++ b/lightrag/kg/oracle_impl.py
@@ -1,3 +1,4 @@
+import os
 import asyncio
 
 # import html
@@ -341,10 +342,14 @@ class OracleKVStorage(BaseKVStorage):
 class OracleVectorDBStorage(BaseVectorStorage):
     # should pass db object to self.db
     db: OracleDB = None
-    cosine_better_than_threshold: float = 0.2
+    cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
 
     def __post_init__(self):
-        pass
+        # Use global config value if specified, otherwise use default
+        config = self.global_config.get("vector_db_storage_cls_kwargs", {})
+        self.cosine_better_than_threshold = config.get(
+            "cosine_better_than_threshold", self.cosine_better_than_threshold
+        )
 
     async def upsert(self, data: dict[str, dict]):
         """向向量数据库中插入数据"""
diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py
index 57fe8d8d..b315abca 100644
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@@ -301,12 +301,14 @@ class PGKVStorage(BaseKVStorage):
 
 @dataclass
 class PGVectorStorage(BaseVectorStorage):
-    cosine_better_than_threshold: float = 0.2
+    cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
     db: PostgreSQLDB = None
 
     def __post_init__(self):
         self._max_batch_size = self.global_config["embedding_batch_num"]
-        self.cosine_better_than_threshold = self.global_config.get(
+        # Use global config value if specified, otherwise use default
+        config = self.global_config.get("vector_db_storage_cls_kwargs", {})
+        self.cosine_better_than_threshold = config.get(
             "cosine_better_than_threshold", self.cosine_better_than_threshold
         )
 
diff --git a/lightrag/kg/tidb_impl.py b/lightrag/kg/tidb_impl.py
index d76c2c99..0579a57c 100644
--- a/lightrag/kg/tidb_impl.py
+++ b/lightrag/kg/tidb_impl.py
@@ -217,14 +217,16 @@ class TiDBKVStorage(BaseKVStorage):
 
 @dataclass
 class TiDBVectorDBStorage(BaseVectorStorage):
-    cosine_better_than_threshold: float = 0.2
+    cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
 
     def __post_init__(self):
         self._client_file_name = os.path.join(
             self.global_config["working_dir"], f"vdb_{self.namespace}.json"
         )
         self._max_batch_size = self.global_config["embedding_batch_num"]
-        self.cosine_better_than_threshold = self.global_config.get(
+        # Use global config value if specified, otherwise use default
+        config = self.global_config.get("vector_db_storage_cls_kwargs", {})
+        self.cosine_better_than_threshold = config.get(
             "cosine_better_than_threshold", self.cosine_better_than_threshold
         )