Added temperature parameter for LLM

2025-03-24 02:02:34 +08:00
parent 7d3b20c4d8
commit 7e8a2c0e9b
5 changed files with 31 additions and 16 deletions
--- a/env.example
+++ b/env.example
@@ -39,21 +39,23 @@
 # MAX_TOKEN_ENTITY_DESC=4000

 ### Settings for document indexing
-# SUMMARY_LANGUAGE=English
+ENABLE_LLM_CACHE_FOR_EXTRACT=true    # Enable LLM cache for entity extraction
+SUMMARY_LANGUAGE=English
 # CHUNK_SIZE=1200
 # CHUNK_OVERLAP_SIZE=100
 # MAX_TOKEN_SUMMARY=500                # Max tokens for entity or relations summary
 # MAX_PARALLEL_INSERT=2                # Number of parallel processing documents in one patch
-# MAX_ASYNC=4                          # Max concurrency requests of LLM
-# ENABLE_LLM_CACHE_FOR_EXTRACT=true    # Enable LLM cache for entity extraction

 # EMBEDDING_BATCH_NUM=32               # num of chunks send to Embedding in one request
 # EMBEDDING_FUNC_MAX_ASYNC=16          # Max concurrency requests for Embedding
 # MAX_EMBED_TOKENS=8192

 ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
-# MAX_TOKENS=32768             # Max tokens send to LLM (less than context size of the model)
-# TIMEOUT=150                  # Time out in seconds for LLM, None for infinite timeout
+TIMEOUT=150                            # Time out in seconds for LLM, None for infinite timeout
+TEMPERATURE=0.5
+MAX_ASYNC=4                            # Max concurrency requests of LLM
+MAX_TOKENS=32768                       # Max tokens send to LLM (less than context size of the model)
+
 LLM_BINDING=ollama
 LLM_MODEL=mistral-nemo:latest
 LLM_BINDING_API_KEY=your_api_key