Adjust concurrency limits more LLM friendly settings for new comers

- Lowered max async LLM processes to 4
- Enabled LLM cache for entity extraction
- Reduced max parallel insert to 2
This commit is contained in:
yangdx
2025-03-16 23:56:34 +08:00
parent 9d971e5889
commit c2ba7f33ff
5 changed files with 7 additions and 6 deletions

View File

@@ -214,7 +214,7 @@ class LightRAG:
llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 32768)))
"""Maximum number of tokens allowed per LLM response."""
llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 16)))
llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 4)))
"""Maximum number of concurrent LLM calls."""
llm_model_kwargs: dict[str, Any] = field(default_factory=dict)
@@ -238,7 +238,7 @@ class LightRAG:
# Extensions
# ---
max_parallel_insert: int = field(default=int(os.getenv("MAX_PARALLEL_INSERT", 20)))
max_parallel_insert: int = field(default=int(os.getenv("MAX_PARALLEL_INSERT", 2)))
"""Maximum number of parallel insert operations."""
addon_params: dict[str, Any] = field(