added max paralle insert
This commit is contained in:
@@ -347,6 +347,9 @@ class LightRAG:
|
|||||||
"""If True, enables caching for entity extraction steps to reduce LLM costs."""
|
"""If True, enables caching for entity extraction steps to reduce LLM costs."""
|
||||||
|
|
||||||
# Extensions
|
# Extensions
|
||||||
|
max_parallel_insert: int = field(default_factory=lambda: int(os.getenv("MAX_PARALLEL_INSERT", 20)))
|
||||||
|
"""Maximum number of parallel insert operations."""
|
||||||
|
|
||||||
addon_params: dict[str, Any] = field(default_factory=dict)
|
addon_params: dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
||||||
# Storages Management
|
# Storages Management
|
||||||
@@ -786,10 +789,9 @@ class LightRAG:
|
|||||||
return
|
return
|
||||||
|
|
||||||
# 2. split docs into chunks, insert chunks, update doc status
|
# 2. split docs into chunks, insert chunks, update doc status
|
||||||
batch_size = self.addon_params.get("insert_batch_size", 10)
|
|
||||||
docs_batches = [
|
docs_batches = [
|
||||||
list(to_process_docs.items())[i : i + batch_size]
|
list(to_process_docs.items())[i : i + self.max_parallel_insert]
|
||||||
for i in range(0, len(to_process_docs), batch_size)
|
for i in range(0, len(to_process_docs), self.max_parallel_insert)
|
||||||
]
|
]
|
||||||
|
|
||||||
logger.info(f"Number of batches to process: {len(docs_batches)}.")
|
logger.info(f"Number of batches to process: {len(docs_batches)}.")
|
||||||
|
Reference in New Issue
Block a user