diff --git a/.env.example b/.env.example
index 82b9ca70..6f868212 100644
--- a/.env.example
+++ b/.env.example
@@ -6,6 +6,17 @@ PORT=9621
WORKING_DIR=/app/data/rag_storage
INPUT_DIR=/app/data/inputs
+# RAG Configuration
+MAX_ASYNC=4
+MAX_TOKENS=32768
+EMBEDDING_DIM=1024
+MAX_EMBED_TOKENS=8192
+#HISTORY_TURNS=3
+#CHUNK_SIZE=1200
+#CHUNK_OVERLAP_SIZE=100
+#COSINE_THRESHOLD=0.4 # 0.2 while not running API server
+#TOP_K=50 # 60 while not running API server
+
# LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
# Ollama example
LLM_BINDING=ollama
@@ -38,15 +49,6 @@ EMBEDDING_MODEL=bge-m3:latest
# EMBEDDING_BINDING_HOST=http://host.docker.internal:9600
# EMBEDDING_MODEL=bge-m3:latest
-# RAG Configuration
-MAX_ASYNC=4
-MAX_TOKENS=32768
-EMBEDDING_DIM=1024
-MAX_EMBED_TOKENS=8192
-#HISTORY_TURNS=3
-#CHUNK_SIZE=1200
-#CHUNK_OVERLAP_SIZE=100
-
# Security (empty for no key)
LIGHTRAG_API_KEY=your-secure-api-key-here
diff --git a/README.md b/README.md
index 6e8d6507..ad405e90 100644
--- a/README.md
+++ b/README.md
@@ -360,6 +360,8 @@ class QueryParam:
max_token_for_local_context: int = 4000
```
+> default value of Top_k can be change by environment variables TOP_K.
+
### Batch Insert
```python
@@ -730,10 +732,10 @@ if __name__ == "__main__":
| **embedding\_func\_max\_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` |
| **llm\_model\_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` |
| **llm\_model\_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` |
-| **llm\_model\_max\_token\_size** | `int` | Maximum token size for LLM generation (affects entity relation summaries) | `32768` |
-| **llm\_model\_max\_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `16` |
+| **llm\_model\_max\_token\_size** | `int` | Maximum token size for LLM generation (affects entity relation summaries) | `32768`(default value changed by env var MAX_TOKENS) |
+| **llm\_model\_max\_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `16`(default value changed by env var MAX_ASYNC) |
| **llm\_model\_kwargs** | `dict` | Additional parameters for LLM generation | |
-| **vector\_db\_storage\_cls\_kwargs** | `dict` | Additional parameters for vector database (currently not used) | |
+| **vector\_db\_storage\_cls\_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval. | cosine_better_than_threshold: 0.2(default value changed by env var COSINE_THRESHOLD) |
| **enable\_llm\_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` |
| **enable\_llm\_cache\_for\_entity\_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application | `TRUE` |
| **addon\_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"], "insert_batch_size": 10}`: sets example limit, output language, and batch size for document processing | `example_number: all examples, language: English, insert_batch_size: 10` |
@@ -741,6 +743,7 @@ if __name__ == "__main__":
| **embedding\_cache\_config** | `dict` | Configuration for question-answer caching. Contains three parameters:
- `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers.
- `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM.
- `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
### Error Handling
+
Click to view error handling details
diff --git a/lightrag/api/README.md b/lightrag/api/README.md
index 4e818242..288ff79c 100644
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@@ -98,6 +98,8 @@ After starting the lightrag-server, you can add an Ollama-type connection in the
LightRAG can be configured using either command-line arguments or environment variables. When both are provided, command-line arguments take precedence over environment variables.
+For better performance, the API server's default values for TOP_K and COSINE_THRESHOLD are set to 50 and 0.4 respectively. If COSINE_THRESHOLD remains at its default value of 0.2 in LightRAG, many irrelevant entities and relations would be retrieved and sent to the LLM.
+
### Environment Variables
You can configure LightRAG using environment variables by creating a `.env` file in your project root directory. Here's a complete example of available environment variables:
@@ -111,6 +113,17 @@ PORT=9621
WORKING_DIR=/app/data/rag_storage
INPUT_DIR=/app/data/inputs
+# RAG Configuration
+MAX_ASYNC=4
+MAX_TOKENS=32768
+EMBEDDING_DIM=1024
+MAX_EMBED_TOKENS=8192
+#HISTORY_TURNS=3
+#CHUNK_SIZE=1200
+#CHUNK_OVERLAP_SIZE=100
+#COSINE_THRESHOLD=0.4
+#TOP_K=50
+
# LLM Configuration
LLM_BINDING=ollama
LLM_BINDING_HOST=http://localhost:11434
@@ -124,14 +137,8 @@ EMBEDDING_BINDING=ollama
EMBEDDING_BINDING_HOST=http://localhost:11434
EMBEDDING_MODEL=bge-m3:latest
-# RAG Configuration
-MAX_ASYNC=4
-MAX_TOKENS=32768
-EMBEDDING_DIM=1024
-MAX_EMBED_TOKENS=8192
-
# Security
-LIGHTRAG_API_KEY=
+#LIGHTRAG_API_KEY=you-api-key-for-accessing-LightRAG
# Logging
LOG_LEVEL=INFO
@@ -186,10 +193,9 @@ PORT=7000 python lightrag.py
| --ssl | False | Enable HTTPS |
| --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) |
| --ssl-keyfile | None | Path to SSL private key file (required if --ssl is enabled) |
+| --top-k | 50 | Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode. |
+| --cosine-threshold | 0.4 | The cossine threshold for nodes and relations retrieval, works with top-k to control the retrieval of nodes and relations. |
-
-
-For protecting the server using an authentication key, you can also use an environment variable named `LIGHTRAG_API_KEY`.
### Example Usage
#### Running a Lightrag server with ollama default local server as llm and embedding backends
diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index d9edc351..e162f5ec 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -212,8 +212,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.yellow(f"{args.chunk_size}")
ASCIIColors.white(" ├─ Chunk Overlap Size: ", end="")
ASCIIColors.yellow(f"{args.chunk_overlap_size}")
- ASCIIColors.white(" └─ History Turns: ", end="")
+ ASCIIColors.white(" ├─ History Turns: ", end="")
ASCIIColors.yellow(f"{args.history_turns}")
+ ASCIIColors.white(" ├─ Cosine Threshold: ", end="")
+ ASCIIColors.yellow(f"{args.cosine_threshold}")
+ ASCIIColors.white(" └─ Top-K: ", end="")
+ ASCIIColors.yellow(f"{args.top_k}")
# System Configuration
ASCIIColors.magenta("\n🛠️ System Configuration:")
@@ -489,6 +493,20 @@ def parse_args() -> argparse.Namespace:
help="Number of conversation history turns to include (default: from env or 3)",
)
+ # Search parameters
+ parser.add_argument(
+ "--top-k",
+ type=int,
+ default=get_env_value("TOP_K", 50, int),
+ help="Number of most similar results to return (default: from env or 50)",
+ )
+ parser.add_argument(
+ "--cosine-threshold",
+ type=float,
+ default=get_env_value("COSINE_THRESHOLD", 0.4, float),
+ help="Cosine similarity threshold (default: from env or 0.4)",
+ )
+
parser.add_argument(
"--simulated-model-name",
type=str,
@@ -862,6 +880,9 @@ def create_app(args):
graph_storage=ollama_server_infos.GRAPH_STORAGE,
vector_storage=ollama_server_infos.VECTOR_STORAGE,
doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
+ vector_db_storage_cls_kwargs={
+ "cosine_better_than_threshold": args.cosine_threshold
+ },
)
else:
rag = LightRAG(
@@ -871,6 +892,9 @@ def create_app(args):
else openai_alike_model_complete,
chunk_token_size=int(args.chunk_size),
chunk_overlap_token_size=int(args.chunk_overlap_size),
+ llm_model_kwargs={
+ "timeout": args.timeout,
+ },
llm_model_name=args.llm_model,
llm_model_max_async=args.max_async,
llm_model_max_token_size=args.max_tokens,
@@ -879,6 +903,9 @@ def create_app(args):
graph_storage=ollama_server_infos.GRAPH_STORAGE,
vector_storage=ollama_server_infos.VECTOR_STORAGE,
doc_status_storage=ollama_server_infos.DOC_STATUS_STORAGE,
+ vector_db_storage_cls_kwargs={
+ "cosine_better_than_threshold": args.cosine_threshold
+ },
)
async def index_file(file_path: Union[str, Path]) -> None:
@@ -1068,6 +1095,7 @@ def create_app(args):
mode=request.mode,
stream=request.stream,
only_need_context=request.only_need_context,
+ top_k=args.top_k,
),
)
@@ -1109,6 +1137,7 @@ def create_app(args):
mode=request.mode,
stream=True,
only_need_context=request.only_need_context,
+ top_k=args.top_k,
),
)
@@ -1648,6 +1677,7 @@ def create_app(args):
"stream": request.stream,
"only_need_context": False,
"conversation_history": conversation_history,
+ "top_k": args.top_k,
}
if args.history_turns is not None:
diff --git a/lightrag/base.py b/lightrag/base.py
index 36e70893..e71cac3f 100644
--- a/lightrag/base.py
+++ b/lightrag/base.py
@@ -1,3 +1,4 @@
+import os
from dataclasses import dataclass, field
from typing import (
TypedDict,
@@ -32,7 +33,7 @@ class QueryParam:
response_type: str = "Multiple Paragraphs"
stream: bool = False
# Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.
- top_k: int = 60
+ top_k: int = int(os.getenv("TOP_K", "60"))
# Number of document chunks to retrieve.
# top_n: int = 10
# Number of tokens for the original chunks.
diff --git a/lightrag/kg/chroma_impl.py b/lightrag/kg/chroma_impl.py
index 200e780c..72a2627a 100644
--- a/lightrag/kg/chroma_impl.py
+++ b/lightrag/kg/chroma_impl.py
@@ -1,3 +1,4 @@
+import os
import asyncio
from dataclasses import dataclass
from typing import Union
@@ -12,16 +13,16 @@ from lightrag.utils import logger
class ChromaVectorDBStorage(BaseVectorStorage):
"""ChromaDB vector storage implementation."""
- cosine_better_than_threshold: float = 0.2
+ cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
def __post_init__(self):
try:
# Use global config value if specified, otherwise use default
- self.cosine_better_than_threshold = self.global_config.get(
+ config = self.global_config.get("vector_db_storage_cls_kwargs", {})
+ self.cosine_better_than_threshold = config.get(
"cosine_better_than_threshold", self.cosine_better_than_threshold
)
- config = self.global_config.get("vector_db_storage_cls_kwargs", {})
user_collection_settings = config.get("collection_settings", {})
# Default HNSW index settings for ChromaDB
default_collection_settings = {
diff --git a/lightrag/kg/nano_vector_db_impl.py b/lightrag/kg/nano_vector_db_impl.py
index aa23e811..ed272fee 100644
--- a/lightrag/kg/nano_vector_db_impl.py
+++ b/lightrag/kg/nano_vector_db_impl.py
@@ -73,9 +73,15 @@ from lightrag.base import (
@dataclass
class NanoVectorDBStorage(BaseVectorStorage):
- cosine_better_than_threshold: float = 0.2
+ cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
def __post_init__(self):
+ # Use global config value if specified, otherwise use default
+ config = self.global_config.get("vector_db_storage_cls_kwargs", {})
+ self.cosine_better_than_threshold = config.get(
+ "cosine_better_than_threshold", self.cosine_better_than_threshold
+ )
+
self._client_file_name = os.path.join(
self.global_config["working_dir"], f"vdb_{self.namespace}.json"
)
@@ -83,9 +89,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
self._client = NanoVectorDB(
self.embedding_func.embedding_dim, storage_file=self._client_file_name
)
- self.cosine_better_than_threshold = self.global_config.get(
- "cosine_better_than_threshold", self.cosine_better_than_threshold
- )
async def upsert(self, data: dict[str, dict]):
logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
@@ -134,6 +137,9 @@ class NanoVectorDBStorage(BaseVectorStorage):
async def query(self, query: str, top_k=5):
embedding = await self.embedding_func([query])
embedding = embedding[0]
+ logger.info(
+ f"Query: {query}, top_k: {top_k}, cosine_better_than_threshold: {self.cosine_better_than_threshold}"
+ )
results = self._client.query(
query=embedding,
top_k=top_k,
diff --git a/lightrag/kg/oracle_impl.py b/lightrag/kg/oracle_impl.py
index 2d1f631c..368e8618 100644
--- a/lightrag/kg/oracle_impl.py
+++ b/lightrag/kg/oracle_impl.py
@@ -1,3 +1,4 @@
+import os
import asyncio
# import html
@@ -341,10 +342,14 @@ class OracleKVStorage(BaseKVStorage):
class OracleVectorDBStorage(BaseVectorStorage):
# should pass db object to self.db
db: OracleDB = None
- cosine_better_than_threshold: float = 0.2
+ cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
def __post_init__(self):
- pass
+ # Use global config value if specified, otherwise use default
+ config = self.global_config.get("vector_db_storage_cls_kwargs", {})
+ self.cosine_better_than_threshold = config.get(
+ "cosine_better_than_threshold", self.cosine_better_than_threshold
+ )
async def upsert(self, data: dict[str, dict]):
"""向向量数据库中插入数据"""
diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py
index 57fe8d8d..b315abca 100644
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@@ -301,12 +301,14 @@ class PGKVStorage(BaseKVStorage):
@dataclass
class PGVectorStorage(BaseVectorStorage):
- cosine_better_than_threshold: float = 0.2
+ cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
db: PostgreSQLDB = None
def __post_init__(self):
self._max_batch_size = self.global_config["embedding_batch_num"]
- self.cosine_better_than_threshold = self.global_config.get(
+ # Use global config value if specified, otherwise use default
+ config = self.global_config.get("vector_db_storage_cls_kwargs", {})
+ self.cosine_better_than_threshold = config.get(
"cosine_better_than_threshold", self.cosine_better_than_threshold
)
diff --git a/lightrag/kg/tidb_impl.py b/lightrag/kg/tidb_impl.py
index d76c2c99..0579a57c 100644
--- a/lightrag/kg/tidb_impl.py
+++ b/lightrag/kg/tidb_impl.py
@@ -217,14 +217,16 @@ class TiDBKVStorage(BaseKVStorage):
@dataclass
class TiDBVectorDBStorage(BaseVectorStorage):
- cosine_better_than_threshold: float = 0.2
+ cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
def __post_init__(self):
self._client_file_name = os.path.join(
self.global_config["working_dir"], f"vdb_{self.namespace}.json"
)
self._max_batch_size = self.global_config["embedding_batch_num"]
- self.cosine_better_than_threshold = self.global_config.get(
+ # Use global config value if specified, otherwise use default
+ config = self.global_config.get("vector_db_storage_cls_kwargs", {})
+ self.cosine_better_than_threshold = config.get(
"cosine_better_than_threshold", self.cosine_better_than_threshold
)
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index acad9295..92fc954f 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -158,8 +158,8 @@ class LightRAG:
# LLM
llm_model_func: callable = None # This must be set (we do want to separate llm from the corte, so no more default initialization)
llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" # 'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
- llm_model_max_token_size: int = 32768
- llm_model_max_async: int = 16
+ llm_model_max_token_size: int = int(os.getenv("MAX_TOKENS", "32768"))
+ llm_model_max_async: int = int(os.getenv("MAX_ASYNC", "16"))
llm_model_kwargs: dict = field(default_factory=dict)
# storage
diff --git a/lightrag/operate.py b/lightrag/operate.py
index 6756a40c..fbcb6c7c 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -590,8 +590,8 @@ async def kg_query(
query, query_param, global_config, hashing_kv
)
- logger.info(f"High-level keywords: {hl_keywords}")
- logger.info(f"Low-level keywords: {ll_keywords}")
+ logger.debug(f"High-level keywords: {hl_keywords}")
+ logger.debug(f"Low-level keywords: {ll_keywords}")
# Handle empty keywords
if hl_keywords == [] and ll_keywords == []:
@@ -1026,6 +1026,10 @@ async def _build_query_context(
[hl_relations_context, ll_relations_context],
[hl_text_units_context, ll_text_units_context],
)
+ # not necessary to use LLM to generate a response
+ if not entities_context.strip() and not relations_context.strip():
+ return None
+
return f"""
-----Entities-----
```csv