From 7e8a2c0e9b0b9e746298e0ca5a204fc83e17b334 Mon Sep 17 00:00:00 2001 From: yangdx Date: Mon, 24 Mar 2025 02:02:34 +0800 Subject: [PATCH] Added temperature parameter for LLM --- README.md | 4 ++-- env.example | 12 +++++++----- lightrag/api/lightrag_server.py | 4 ++++ lightrag/api/routers/document_routes.py | 4 ++-- lightrag/api/utils_api.py | 23 ++++++++++++++++------- 5 files changed, 31 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 5ff32d4c..f07c4530 100644 --- a/README.md +++ b/README.md @@ -630,11 +630,11 @@ rag.insert(["TEXT1", "TEXT2",...]) rag = LightRAG( working_dir=WORKING_DIR, addon_params={ - "insert_batch_size": 20 # Process 20 documents per batch + "insert_batch_size": 4 # Process 4 documents per batch } ) -rag.insert(["TEXT1", "TEXT2", "TEXT3", ...]) # Documents will be processed in batches of 20 +rag.insert(["TEXT1", "TEXT2", "TEXT3", ...]) # Documents will be processed in batches of 4 ``` The `insert_batch_size` parameter in `addon_params` controls how many documents are processed in each batch during insertion. This is useful for: diff --git a/env.example b/env.example index 46404582..f6c4a38b 100644 --- a/env.example +++ b/env.example @@ -39,21 +39,23 @@ # MAX_TOKEN_ENTITY_DESC=4000 ### Settings for document indexing -# SUMMARY_LANGUAGE=English +ENABLE_LLM_CACHE_FOR_EXTRACT=true # Enable LLM cache for entity extraction +SUMMARY_LANGUAGE=English # CHUNK_SIZE=1200 # CHUNK_OVERLAP_SIZE=100 # MAX_TOKEN_SUMMARY=500 # Max tokens for entity or relations summary # MAX_PARALLEL_INSERT=2 # Number of parallel processing documents in one patch -# MAX_ASYNC=4 # Max concurrency requests of LLM -# ENABLE_LLM_CACHE_FOR_EXTRACT=true # Enable LLM cache for entity extraction # EMBEDDING_BATCH_NUM=32 # num of chunks send to Embedding in one request # EMBEDDING_FUNC_MAX_ASYNC=16 # Max concurrency requests for Embedding # MAX_EMBED_TOKENS=8192 ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal) -# MAX_TOKENS=32768 # Max tokens send to LLM (less than context size of the model) -# TIMEOUT=150 # Time out in seconds for LLM, None for infinite timeout +TIMEOUT=150 # Time out in seconds for LLM, None for infinite timeout +TEMPERATURE=0.5 +MAX_ASYNC=4 # Max concurrency requests of LLM +MAX_TOKENS=32768 # Max tokens send to LLM (less than context size of the model) + LLM_BINDING=ollama LLM_MODEL=mistral-nemo:latest LLM_BINDING_API_KEY=your_api_key diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 584d020f..15e78c40 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -207,6 +207,7 @@ def create_app(args): history_messages=history_messages, base_url=args.llm_binding_host, api_key=args.llm_binding_api_key, + temperature=args.temperature, **kwargs, ) @@ -230,6 +231,7 @@ def create_app(args): base_url=args.llm_binding_host, api_key=os.getenv("AZURE_OPENAI_API_KEY"), api_version=os.getenv("AZURE_OPENAI_API_VERSION", "2024-08-01-preview"), + temperature=args.temperature, **kwargs, ) @@ -302,6 +304,7 @@ def create_app(args): }, namespace_prefix=args.namespace_prefix, auto_manage_storages_states=False, + max_parallel_insert=args.max_parallel_insert, ) else: # azure_openai rag = LightRAG( @@ -331,6 +334,7 @@ def create_app(args): }, namespace_prefix=args.namespace_prefix, auto_manage_storages_states=False, + max_parallel_insert=args.max_parallel_insert, ) # Add routes diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index e0c8f545..48bc1243 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -475,8 +475,8 @@ async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager): if not new_files: return - # Get MAX_PARALLEL_INSERT from global_args - max_parallel = global_args["max_parallel_insert"] + # Get MAX_PARALLEL_INSERT from global_args["main_args"] + max_parallel = global_args["main_args"].max_parallel_insert # Calculate batch size as 2 * MAX_PARALLEL_INSERT batch_size = 2 * max_parallel diff --git a/lightrag/api/utils_api.py b/lightrag/api/utils_api.py index 25136bd2..ddc0554c 100644 --- a/lightrag/api/utils_api.py +++ b/lightrag/api/utils_api.py @@ -14,6 +14,7 @@ from dotenv import load_dotenv from fastapi.security import APIKeyHeader, OAuth2PasswordBearer from starlette.status import HTTP_403_FORBIDDEN from .auth import auth_handler +from ..prompt import PROMPTS # Load environment variables load_dotenv() @@ -364,9 +365,9 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace: args.vector_storage = get_env_value( "LIGHTRAG_VECTOR_STORAGE", DefaultRAGStorageConfig.VECTOR_STORAGE ) - + # Get MAX_PARALLEL_INSERT from environment - global_args["max_parallel_insert"] = get_env_value("MAX_PARALLEL_INSERT", 2, int) + args.max_parallel_insert = get_env_value("MAX_PARALLEL_INSERT", 2, int) # Handle openai-ollama special case if args.llm_binding == "openai-ollama": @@ -396,6 +397,9 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace: args.enable_llm_cache_for_extract = get_env_value( "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool ) + + # Inject LLM temperature configuration + args.temperature = get_env_value("TEMPERATURE", 0.5, float) # Select Document loading tool (DOCLING, DEFAULT) args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT") @@ -464,6 +468,12 @@ def display_splash_screen(args: argparse.Namespace) -> None: ASCIIColors.yellow(f"{args.llm_binding_host}") ASCIIColors.white(" ├─ Model: ", end="") ASCIIColors.yellow(f"{args.llm_model}") + ASCIIColors.white(" ├─ Temperature: ", end="") + ASCIIColors.yellow(f"{args.temperature}") + ASCIIColors.white(" ├─ Max Async for LLM: ", end="") + ASCIIColors.yellow(f"{args.max_async}") + ASCIIColors.white(" ├─ Max Tokens: ", end="") + ASCIIColors.yellow(f"{args.max_tokens}") ASCIIColors.white(" └─ Timeout: ", end="") ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}") @@ -479,13 +489,12 @@ def display_splash_screen(args: argparse.Namespace) -> None: ASCIIColors.yellow(f"{args.embedding_dim}") # RAG Configuration + summary_language = os.getenv("SUMMARY_LANGUAGE", PROMPTS["DEFAULT_LANGUAGE"]) ASCIIColors.magenta("\n⚙️ RAG Configuration:") - ASCIIColors.white(" ├─ Max Async for LLM: ", end="") - ASCIIColors.yellow(f"{args.max_async}") + ASCIIColors.white(" ├─ Summary Language: ", end="") + ASCIIColors.yellow(f"{summary_language}") ASCIIColors.white(" ├─ Max Parallel Insert: ", end="") - ASCIIColors.yellow(f"{global_args['max_parallel_insert']}") - ASCIIColors.white(" ├─ Max Tokens: ", end="") - ASCIIColors.yellow(f"{args.max_tokens}") + ASCIIColors.yellow(f"{args.max_parallel_insert}") ASCIIColors.white(" ├─ Max Embed Tokens: ", end="") ASCIIColors.yellow(f"{args.max_embed_tokens}") ASCIIColors.white(" ├─ Chunk Size: ", end="")