From 6a326e2783acfbccd5632aed33f6609df92f68b6 Mon Sep 17 00:00:00 2001 From: yangdx Date: Sat, 1 Feb 2025 10:03:27 +0800 Subject: [PATCH] Revert "Refactor embedding functions and add async query limit" This reverts commit 21481dba8f3b020797718de3d8a82aafa7f69590. --- lightrag/kg/nano_vector_db_impl.py | 6 +----- lightrag/lightrag.py | 24 ++++++++++-------------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/lightrag/kg/nano_vector_db_impl.py b/lightrag/kg/nano_vector_db_impl.py index 6e8873fc..7bd52d20 100644 --- a/lightrag/kg/nano_vector_db_impl.py +++ b/lightrag/kg/nano_vector_db_impl.py @@ -76,8 +76,6 @@ class NanoVectorDBStorage(BaseVectorStorage): cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2")) def __post_init__(self): - # Initialize lock only for file operations - self._save_lock = asyncio.Lock() # Use global config value if specified, otherwise use default config = self.global_config.get("vector_db_storage_cls_kwargs", {}) self.cosine_better_than_threshold = config.get( @@ -212,6 +210,4 @@ class NanoVectorDBStorage(BaseVectorStorage): logger.error(f"Error deleting relations for {entity_name}: {e}") async def index_done_callback(self): - # Protect file write operation - async with self._save_lock: - self._client.save() + self._client.save() diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index f0fb92fd..92fc954f 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -154,7 +154,6 @@ class LightRAG: embedding_func: EmbeddingFunc = None # This must be set (we do want to separate llm from the corte, so no more default initialization) embedding_batch_num: int = 32 embedding_func_max_async: int = 16 - embedding_func_max_async_query: int = 4 # LLM llm_model_func: callable = None # This must be set (we do want to separate llm from the corte, so no more default initialization) @@ -196,13 +195,10 @@ class LightRAG: _print_config = ",\n ".join([f"{k} = {v}" for k, v in global_config.items()]) logger.debug(f"LightRAG init with param:\n {_print_config}\n") - # Init embedding functions with separate instances for insert and query - self.insert_embedding_func = limit_async_func_call( - self.embedding_func_max_async - )(self.embedding_func) - self.query_embedding_func = limit_async_func_call( - self.embedding_func_max_async_query - )(self.embedding_func) + # Init LLM + self.embedding_func = limit_async_func_call(self.embedding_func_max_async)( + self.embedding_func + ) # Initialize all storages self.key_string_value_json_storage_cls: Type[BaseKVStorage] = ( @@ -242,15 +238,15 @@ class LightRAG: #### self.full_docs = self.key_string_value_json_storage_cls( namespace="full_docs", - embedding_func=self.insert_embedding_func, + embedding_func=self.embedding_func, ) self.text_chunks = self.key_string_value_json_storage_cls( namespace="text_chunks", - embedding_func=self.insert_embedding_func, + embedding_func=self.embedding_func, ) self.chunk_entity_relation_graph = self.graph_storage_cls( namespace="chunk_entity_relation", - embedding_func=self.insert_embedding_func, + embedding_func=self.embedding_func, ) #### # add embedding func by walter over @@ -258,17 +254,17 @@ class LightRAG: self.entities_vdb = self.vector_db_storage_cls( namespace="entities", - embedding_func=self.query_embedding_func, + embedding_func=self.embedding_func, meta_fields={"entity_name"}, ) self.relationships_vdb = self.vector_db_storage_cls( namespace="relationships", - embedding_func=self.query_embedding_func, + embedding_func=self.embedding_func, meta_fields={"src_id", "tgt_id"}, ) self.chunks_vdb = self.vector_db_storage_cls( namespace="chunks", - embedding_func=self.query_embedding_func, + embedding_func=self.embedding_func, ) if self.llm_response_cache and hasattr(