Added temperature parameter for LLM

This commit is contained in:
yangdx
2025-03-24 02:02:34 +08:00
parent 7d3b20c4d8
commit 7e8a2c0e9b
5 changed files with 31 additions and 16 deletions

View File

@@ -630,11 +630,11 @@ rag.insert(["TEXT1", "TEXT2",...])
rag = LightRAG( rag = LightRAG(
working_dir=WORKING_DIR, working_dir=WORKING_DIR,
addon_params={ addon_params={
"insert_batch_size": 20 # Process 20 documents per batch "insert_batch_size": 4 # Process 4 documents per batch
} }
) )
rag.insert(["TEXT1", "TEXT2", "TEXT3", ...]) # Documents will be processed in batches of 20 rag.insert(["TEXT1", "TEXT2", "TEXT3", ...]) # Documents will be processed in batches of 4
``` ```
The `insert_batch_size` parameter in `addon_params` controls how many documents are processed in each batch during insertion. This is useful for: The `insert_batch_size` parameter in `addon_params` controls how many documents are processed in each batch during insertion. This is useful for:

View File

@@ -39,21 +39,23 @@
# MAX_TOKEN_ENTITY_DESC=4000 # MAX_TOKEN_ENTITY_DESC=4000
### Settings for document indexing ### Settings for document indexing
# SUMMARY_LANGUAGE=English ENABLE_LLM_CACHE_FOR_EXTRACT=true # Enable LLM cache for entity extraction
SUMMARY_LANGUAGE=English
# CHUNK_SIZE=1200 # CHUNK_SIZE=1200
# CHUNK_OVERLAP_SIZE=100 # CHUNK_OVERLAP_SIZE=100
# MAX_TOKEN_SUMMARY=500 # Max tokens for entity or relations summary # MAX_TOKEN_SUMMARY=500 # Max tokens for entity or relations summary
# MAX_PARALLEL_INSERT=2 # Number of parallel processing documents in one patch # MAX_PARALLEL_INSERT=2 # Number of parallel processing documents in one patch
# MAX_ASYNC=4 # Max concurrency requests of LLM
# ENABLE_LLM_CACHE_FOR_EXTRACT=true # Enable LLM cache for entity extraction
# EMBEDDING_BATCH_NUM=32 # num of chunks send to Embedding in one request # EMBEDDING_BATCH_NUM=32 # num of chunks send to Embedding in one request
# EMBEDDING_FUNC_MAX_ASYNC=16 # Max concurrency requests for Embedding # EMBEDDING_FUNC_MAX_ASYNC=16 # Max concurrency requests for Embedding
# MAX_EMBED_TOKENS=8192 # MAX_EMBED_TOKENS=8192
### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal) ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
# MAX_TOKENS=32768 # Max tokens send to LLM (less than context size of the model) TIMEOUT=150 # Time out in seconds for LLM, None for infinite timeout
# TIMEOUT=150 # Time out in seconds for LLM, None for infinite timeout TEMPERATURE=0.5
MAX_ASYNC=4 # Max concurrency requests of LLM
MAX_TOKENS=32768 # Max tokens send to LLM (less than context size of the model)
LLM_BINDING=ollama LLM_BINDING=ollama
LLM_MODEL=mistral-nemo:latest LLM_MODEL=mistral-nemo:latest
LLM_BINDING_API_KEY=your_api_key LLM_BINDING_API_KEY=your_api_key

View File

@@ -207,6 +207,7 @@ def create_app(args):
history_messages=history_messages, history_messages=history_messages,
base_url=args.llm_binding_host, base_url=args.llm_binding_host,
api_key=args.llm_binding_api_key, api_key=args.llm_binding_api_key,
temperature=args.temperature,
**kwargs, **kwargs,
) )
@@ -230,6 +231,7 @@ def create_app(args):
base_url=args.llm_binding_host, base_url=args.llm_binding_host,
api_key=os.getenv("AZURE_OPENAI_API_KEY"), api_key=os.getenv("AZURE_OPENAI_API_KEY"),
api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"), api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"),
temperature=args.temperature,
**kwargs, **kwargs,
) )
@@ -302,6 +304,7 @@ def create_app(args):
}, },
namespace_prefix=args.namespace_prefix, namespace_prefix=args.namespace_prefix,
auto_manage_storages_states=False, auto_manage_storages_states=False,
max_parallel_insert=args.max_parallel_insert,
) )
else: # azure_openai else: # azure_openai
rag = LightRAG( rag = LightRAG(
@@ -331,6 +334,7 @@ def create_app(args):
}, },
namespace_prefix=args.namespace_prefix, namespace_prefix=args.namespace_prefix,
auto_manage_storages_states=False, auto_manage_storages_states=False,
max_parallel_insert=args.max_parallel_insert,
) )
# Add routes # Add routes

View File

@@ -475,8 +475,8 @@ async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager):
if not new_files: if not new_files:
return return
# Get MAX_PARALLEL_INSERT from global_args # Get MAX_PARALLEL_INSERT from global_args["main_args"]
max_parallel = global_args["max_parallel_insert"] max_parallel = global_args["main_args"].max_parallel_insert
# Calculate batch size as 2 * MAX_PARALLEL_INSERT # Calculate batch size as 2 * MAX_PARALLEL_INSERT
batch_size = 2 * max_parallel batch_size = 2 * max_parallel

View File

@@ -14,6 +14,7 @@ from dotenv import load_dotenv
from fastapi.security import APIKeyHeader, OAuth2PasswordBearer from fastapi.security import APIKeyHeader, OAuth2PasswordBearer
from starlette.status import HTTP_403_FORBIDDEN from starlette.status import HTTP_403_FORBIDDEN
from .auth import auth_handler from .auth import auth_handler
from ..prompt import PROMPTS
# Load environment variables # Load environment variables
load_dotenv() load_dotenv()
@@ -364,9 +365,9 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
args.vector_storage = get_env_value( args.vector_storage = get_env_value(
"LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE "LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE
) )
# Get MAX_PARALLEL_INSERT from environment # Get MAX_PARALLEL_INSERT from environment
global_args["max_parallel_insert"] = get_env_value("MAX_PARALLEL_INSERT", 2, int) args.max_parallel_insert = get_env_value("MAX_PARALLEL_INSERT", 2, int)
# Handle openai-ollama special case # Handle openai-ollama special case
if args.llm_binding == "openai-ollama": if args.llm_binding == "openai-ollama":
@@ -396,6 +397,9 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
args.enable_llm_cache_for_extract = get_env_value( args.enable_llm_cache_for_extract = get_env_value(
"ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
) )
# Inject LLM temperature configuration
args.temperature = get_env_value("TEMPERATURE", 0.5, float)
# Select Document loading tool (DOCLING, DEFAULT) # Select Document loading tool (DOCLING, DEFAULT)
args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT") args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")
@@ -464,6 +468,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.yellow(f"{args.llm_binding_host}") ASCIIColors.yellow(f"{args.llm_binding_host}")
ASCIIColors.white(" ├─ Model: ", end="") ASCIIColors.white(" ├─ Model: ", end="")
ASCIIColors.yellow(f"{args.llm_model}") ASCIIColors.yellow(f"{args.llm_model}")
ASCIIColors.white(" ├─ Temperature: ", end="")
ASCIIColors.yellow(f"{args.temperature}")
ASCIIColors.white(" ├─ Max Async for LLM: ", end="")
ASCIIColors.yellow(f"{args.max_async}")
ASCIIColors.white(" ├─ Max Tokens: ", end="")
ASCIIColors.yellow(f"{args.max_tokens}")
ASCIIColors.white(" └─ Timeout: ", end="") ASCIIColors.white(" └─ Timeout: ", end="")
ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}") ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
@@ -479,13 +489,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.yellow(f"{args.embedding_dim}") ASCIIColors.yellow(f"{args.embedding_dim}")
# RAG Configuration # RAG Configuration
summary_language = os.getenv("SUMMARY_LANGUAGE", PROMPTS["DEFAULT_LANGUAGE"])
ASCIIColors.magenta("\n⚙️ RAG Configuration:") ASCIIColors.magenta("\n⚙️ RAG Configuration:")
ASCIIColors.white(" ├─ Max Async for LLM: ", end="") ASCIIColors.white(" ├─ Summary Language: ", end="")
ASCIIColors.yellow(f"{args.max_async}") ASCIIColors.yellow(f"{summary_language}")
ASCIIColors.white(" ├─ Max Parallel Insert: ", end="") ASCIIColors.white(" ├─ Max Parallel Insert: ", end="")
ASCIIColors.yellow(f"{global_args['max_parallel_insert']}") ASCIIColors.yellow(f"{args.max_parallel_insert}")
ASCIIColors.white(" ├─ Max Tokens: ", end="")
ASCIIColors.yellow(f"{args.max_tokens}")
ASCIIColors.white(" ├─ Max Embed Tokens: ", end="") ASCIIColors.white(" ├─ Max Embed Tokens: ", end="")
ASCIIColors.yellow(f"{args.max_embed_tokens}") ASCIIColors.yellow(f"{args.max_embed_tokens}")
ASCIIColors.white(" ├─ Chunk Size: ", end="") ASCIIColors.white(" ├─ Chunk Size: ", end="")