From 46c9c7d95bcde367162bf7daab5673e738c85812 Mon Sep 17 00:00:00 2001 From: yangdx Date: Wed, 29 Jan 2025 23:45:20 +0800 Subject: [PATCH] Update sample env file and documentation - Change COSINE_THRESHOLD to 0.4 - Adjust TOP_K to 50 - Enhance API README details --- .env.example | 4 ++-- README.md | 9 ++++++--- lightrag/api/README.md | 26 ++++++++++++++++---------- 3 files changed, 24 insertions(+), 15 deletions(-) diff --git a/.env.example b/.env.example index 0c61d2e0..6f868212 100644 --- a/.env.example +++ b/.env.example @@ -14,8 +14,8 @@ MAX_EMBED_TOKENS=8192 #HISTORY_TURNS=3 #CHUNK_SIZE=1200 #CHUNK_OVERLAP_SIZE=100 -#COSINE_THRESHOLD=0.2 -#TOP_K=50 +#COSINE_THRESHOLD=0.4 # 0.2 while not running API server +#TOP_K=50 # 60 while not running API server # LLM Configuration (Use valid host. For local services, you can use host.docker.internal) # Ollama example diff --git a/README.md b/README.md index 6e8d6507..ad405e90 100644 --- a/README.md +++ b/README.md @@ -360,6 +360,8 @@ class QueryParam: max_token_for_local_context: int = 4000 ``` +> default value of Top_k can be change by environment variables TOP_K. + ### Batch Insert ```python @@ -730,10 +732,10 @@ if __name__ == "__main__": | **embedding\_func\_max\_async** | `int` | Maximum number of concurrent asynchronous embedding processes | `16` | | **llm\_model\_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` | | **llm\_model\_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` | -| **llm\_model\_max\_token\_size** | `int` | Maximum token size for LLM generation (affects entity relation summaries) | `32768` | -| **llm\_model\_max\_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `16` | +| **llm\_model\_max\_token\_size** | `int` | Maximum token size for LLM generation (affects entity relation summaries) | `32768`(default value changed by env var MAX_TOKENS) | +| **llm\_model\_max\_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `16`(default value changed by env var MAX_ASYNC) | | **llm\_model\_kwargs** | `dict` | Additional parameters for LLM generation | | -| **vector\_db\_storage\_cls\_kwargs** | `dict` | Additional parameters for vector database (currently not used) | | +| **vector\_db\_storage\_cls\_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval. | cosine_better_than_threshold: 0.2(default value changed by env var COSINE_THRESHOLD) | | **enable\_llm\_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` | | **enable\_llm\_cache\_for\_entity\_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application | `TRUE` | | **addon\_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"], "insert_batch_size": 10}`: sets example limit, output language, and batch size for document processing | `example_number: all examples, language: English, insert_batch_size: 10` | @@ -741,6 +743,7 @@ if __name__ == "__main__": | **embedding\_cache\_config** | `dict` | Configuration for question-answer caching. Contains three parameters:
- `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers.
- `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM.
- `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` | ### Error Handling +
Click to view error handling details diff --git a/lightrag/api/README.md b/lightrag/api/README.md index 4e818242..288ff79c 100644 --- a/lightrag/api/README.md +++ b/lightrag/api/README.md @@ -98,6 +98,8 @@ After starting the lightrag-server, you can add an Ollama-type connection in the LightRAG can be configured using either command-line arguments or environment variables. When both are provided, command-line arguments take precedence over environment variables. +For better performance, the API server's default values for TOP_K and COSINE_THRESHOLD are set to 50 and 0.4 respectively. If COSINE_THRESHOLD remains at its default value of 0.2 in LightRAG, many irrelevant entities and relations would be retrieved and sent to the LLM. + ### Environment Variables You can configure LightRAG using environment variables by creating a `.env` file in your project root directory. Here's a complete example of available environment variables: @@ -111,6 +113,17 @@ PORT=9621 WORKING_DIR=/app/data/rag_storage INPUT_DIR=/app/data/inputs +# RAG Configuration +MAX_ASYNC=4 +MAX_TOKENS=32768 +EMBEDDING_DIM=1024 +MAX_EMBED_TOKENS=8192 +#HISTORY_TURNS=3 +#CHUNK_SIZE=1200 +#CHUNK_OVERLAP_SIZE=100 +#COSINE_THRESHOLD=0.4 +#TOP_K=50 + # LLM Configuration LLM_BINDING=ollama LLM_BINDING_HOST=http://localhost:11434 @@ -124,14 +137,8 @@ EMBEDDING_BINDING=ollama EMBEDDING_BINDING_HOST=http://localhost:11434 EMBEDDING_MODEL=bge-m3:latest -# RAG Configuration -MAX_ASYNC=4 -MAX_TOKENS=32768 -EMBEDDING_DIM=1024 -MAX_EMBED_TOKENS=8192 - # Security -LIGHTRAG_API_KEY= +#LIGHTRAG_API_KEY=you-api-key-for-accessing-LightRAG # Logging LOG_LEVEL=INFO @@ -186,10 +193,9 @@ PORT=7000 python lightrag.py | --ssl | False | Enable HTTPS | | --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) | | --ssl-keyfile | None | Path to SSL private key file (required if --ssl is enabled) | +| --top-k | 50 | Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode. | +| --cosine-threshold | 0.4 | The cossine threshold for nodes and relations retrieval, works with top-k to control the retrieval of nodes and relations. | - - -For protecting the server using an authentication key, you can also use an environment variable named `LIGHTRAG_API_KEY`. ### Example Usage #### Running a Lightrag server with ollama default local server as llm and embedding backends