diff --git a/README.md b/README.md index 61e7b20f..c9a35260 100644 --- a/README.md +++ b/README.md @@ -1061,7 +1061,7 @@ Valid modes are: | **llm\_model\_func** | `callable` | Function for LLM generation | `gpt_4o_mini_complete` | | **llm\_model\_name** | `str` | LLM model name for generation | `meta-llama/Llama-3.2-1B-Instruct` | | **llm\_model\_max\_token\_size** | `int` | Maximum token size for LLM generation (affects entity relation summaries) | `32768`(default value changed by env var MAX_TOKENS) | -| **llm\_model\_max\_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `16`(default value changed by env var MAX_ASYNC) | +| **llm\_model\_max\_async** | `int` | Maximum number of concurrent asynchronous LLM processes | `4`(default value changed by env var MAX_ASYNC) | | **llm\_model\_kwargs** | `dict` | Additional parameters for LLM generation | | | **vector\_db\_storage\_cls\_kwargs** | `dict` | Additional parameters for vector database, like setting the threshold for nodes and relations retrieval. | cosine_better_than_threshold: 0.2(default value changed by env var COSINE_THRESHOLD) | | **enable\_llm\_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` | diff --git a/env.example b/env.example index 955741ef..66d209ad 100644 --- a/env.example +++ b/env.example @@ -50,7 +50,8 @@ # MAX_TOKEN_SUMMARY=500 # Max tokens for entity or relations summary # SUMMARY_LANGUAGE=English # MAX_EMBED_TOKENS=8192 -# ENABLE_LLM_CACHE_FOR_EXTRACT=false # Enable LLM cache for entity extraction, defaults to false +# ENABLE_LLM_CACHE_FOR_EXTRACT=true # Enable LLM cache for entity extraction +# MAX_PARALLEL_INSERT=2 # Maximum number of parallel processing documents in pipeline ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal) LLM_BINDING=ollama diff --git a/lightrag/api/README.md b/lightrag/api/README.md index 7a07ddb8..8dcba7a2 100644 --- a/lightrag/api/README.md +++ b/lightrag/api/README.md @@ -224,7 +224,7 @@ LightRAG supports binding to various LLM/Embedding backends: Use environment variables `LLM_BINDING` or CLI argument `--llm-binding` to select LLM backend type. Use environment variables `EMBEDDING_BINDING` or CLI argument `--embedding-binding` to select LLM backend type. ### Entity Extraction Configuration -* ENABLE_LLM_CACHE_FOR_EXTRACT: Enable LLM cache for entity extraction (default: false) +* ENABLE_LLM_CACHE_FOR_EXTRACT: Enable LLM cache for entity extraction (default: true) It's very common to set `ENABLE_LLM_CACHE_FOR_EXTRACT` to true for test environment to reduce the cost of LLM calls. diff --git a/lightrag/api/docs/LightRagWithPostGRESQL.md b/lightrag/api/docs/LightRagWithPostGRESQL.md index f9d6a06b..5804a369 100644 --- a/lightrag/api/docs/LightRagWithPostGRESQL.md +++ b/lightrag/api/docs/LightRagWithPostGRESQL.md @@ -141,7 +141,7 @@ Start the LightRAG server using specified options: lightrag-server --port 9621 --key sk-somepassword --kv-storage PGKVStorage --graph-storage PGGraphStorage --vector-storage PGVectorStorage --doc-status-storage PGDocStatusStorage ``` -Replace `the-port-number` with your desired port number (default is 9621) and `your-secret-key` with a secure key. +Replace the `port` number with your desired port number (default is 9621) and `your-secret-key` with a secure key. ## Conclusion diff --git a/lightrag/api/utils_api.py b/lightrag/api/utils_api.py index 1f75db9c..88a0132c 100644 --- a/lightrag/api/utils_api.py +++ b/lightrag/api/utils_api.py @@ -364,7 +364,7 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace: # Inject LLM cache configuration args.enable_llm_cache_for_extract = get_env_value( - "ENABLE_LLM_CACHE_FOR_EXTRACT", False, bool + "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool ) # Select Document loading tool (DOCLING, DEFAULT) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 49d462f6..6a3e7c5f 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -755,7 +755,7 @@ class PGDocStatusStorage(DocStatusStorage): result = await self.db.query(sql, params, True) docs_by_status = { element["id"]: DocProcessingStatus( - content=result[0]["content"], + content=element["content"], content_summary=element["content_summary"], content_length=element["content_length"], status=element["status"], @@ -1556,7 +1556,7 @@ TABLES = { content_vector VECTOR, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP, - chunk_id VARCHAR(255) NULL, + chunk_id TEXT NULL, CONSTRAINT LIGHTRAG_VDB_ENTITY_PK PRIMARY KEY (workspace, id) )""" }, @@ -1570,7 +1570,7 @@ TABLES = { content_vector VECTOR, create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, update_time TIMESTAMP, - chunk_id VARCHAR(255) NULL, + chunk_id TEXT NULL, CONSTRAINT LIGHTRAG_VDB_RELATION_PK PRIMARY KEY (workspace, id) )""" }, diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index a8b11549..5a5461e0 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -214,7 +214,7 @@ class LightRAG: llm_model_max_token_size: int = field(default=int(os.getenv("MAX_TOKENS", 32768))) """Maximum number of tokens allowed per LLM response.""" - llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 16))) + llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 4))) """Maximum number of concurrent LLM calls.""" llm_model_kwargs: dict[str, Any] = field(default_factory=dict) @@ -238,7 +238,7 @@ class LightRAG: # Extensions # --- - max_parallel_insert: int = field(default=int(os.getenv("MAX_PARALLEL_INSERT", 20))) + max_parallel_insert: int = field(default=int(os.getenv("MAX_PARALLEL_INSERT", 2))) """Maximum number of parallel insert operations.""" addon_params: dict[str, Any] = field( @@ -553,6 +553,7 @@ class LightRAG: Args: input: Single document string or list of document strings split_by_character: if split_by_character is not None, split the string by character, if chunk longer than + chunk_token_size, it will be split again by token size. split_by_character_only: if split_by_character_only is True, split the string by character only, when split_by_character is None, this parameter is ignored. ids: single string of the document ID or list of unique document IDs, if not provided, MD5 hash IDs will be generated @@ -574,6 +575,7 @@ class LightRAG: Args: input: Single document string or list of document strings split_by_character: if split_by_character is not None, split the string by character, if chunk longer than + chunk_token_size, it will be split again by token size. split_by_character_only: if split_by_character_only is True, split the string by character only, when split_by_character is None, this parameter is ignored. ids: list of unique document IDs, if not provided, MD5 hash IDs will be generated