Revert "Refactor embedding functions and add async query limit"

This reverts commit 21481dba8f.
This commit is contained in:
yangdx
2025-02-01 10:03:27 +08:00
parent 2ba36f87e3
commit 6a326e2783
2 changed files with 11 additions and 19 deletions

View File

@@ -76,8 +76,6 @@ class NanoVectorDBStorage(BaseVectorStorage):
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2")) cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
def __post_init__(self): def __post_init__(self):
# Initialize lock only for file operations
self._save_lock = asyncio.Lock()
# Use global config value if specified, otherwise use default # Use global config value if specified, otherwise use default
config = self.global_config.get("vector_db_storage_cls_kwargs", {}) config = self.global_config.get("vector_db_storage_cls_kwargs", {})
self.cosine_better_than_threshold = config.get( self.cosine_better_than_threshold = config.get(
@@ -212,6 +210,4 @@ class NanoVectorDBStorage(BaseVectorStorage):
logger.error(f"Error deleting relations for {entity_name}: {e}") logger.error(f"Error deleting relations for {entity_name}: {e}")
async def index_done_callback(self): async def index_done_callback(self):
# Protect file write operation self._client.save()
async with self._save_lock:
self._client.save()

View File

@@ -154,7 +154,6 @@ class LightRAG:
embedding_func: EmbeddingFunc = None # This must be set (we do want to separate llm from the corte, so no more default initialization) embedding_func: EmbeddingFunc = None # This must be set (we do want to separate llm from the corte, so no more default initialization)
embedding_batch_num: int = 32 embedding_batch_num: int = 32
embedding_func_max_async: int = 16 embedding_func_max_async: int = 16
embedding_func_max_async_query: int = 4
# LLM # LLM
llm_model_func: callable = None # This must be set (we do want to separate llm from the corte, so no more default initialization) llm_model_func: callable = None # This must be set (we do want to separate llm from the corte, so no more default initialization)
@@ -196,13 +195,10 @@ class LightRAG:
_print_config = ",\n ".join([f"{k} = {v}" for k, v in global_config.items()]) _print_config = ",\n ".join([f"{k} = {v}" for k, v in global_config.items()])
logger.debug(f"LightRAG init with param:\n {_print_config}\n") logger.debug(f"LightRAG init with param:\n {_print_config}\n")
# Init embedding functions with separate instances for insert and query # Init LLM
self.insert_embedding_func = limit_async_func_call( self.embedding_func = limit_async_func_call(self.embedding_func_max_async)(
self.embedding_func_max_async self.embedding_func
)(self.embedding_func) )
self.query_embedding_func = limit_async_func_call(
self.embedding_func_max_async_query
)(self.embedding_func)
# Initialize all storages # Initialize all storages
self.key_string_value_json_storage_cls: Type[BaseKVStorage] = ( self.key_string_value_json_storage_cls: Type[BaseKVStorage] = (
@@ -242,15 +238,15 @@ class LightRAG:
#### ####
self.full_docs = self.key_string_value_json_storage_cls( self.full_docs = self.key_string_value_json_storage_cls(
namespace="full_docs", namespace="full_docs",
embedding_func=self.insert_embedding_func, embedding_func=self.embedding_func,
) )
self.text_chunks = self.key_string_value_json_storage_cls( self.text_chunks = self.key_string_value_json_storage_cls(
namespace="text_chunks", namespace="text_chunks",
embedding_func=self.insert_embedding_func, embedding_func=self.embedding_func,
) )
self.chunk_entity_relation_graph = self.graph_storage_cls( self.chunk_entity_relation_graph = self.graph_storage_cls(
namespace="chunk_entity_relation", namespace="chunk_entity_relation",
embedding_func=self.insert_embedding_func, embedding_func=self.embedding_func,
) )
#### ####
# add embedding func by walter over # add embedding func by walter over
@@ -258,17 +254,17 @@ class LightRAG:
self.entities_vdb = self.vector_db_storage_cls( self.entities_vdb = self.vector_db_storage_cls(
namespace="entities", namespace="entities",
embedding_func=self.query_embedding_func, embedding_func=self.embedding_func,
meta_fields={"entity_name"}, meta_fields={"entity_name"},
) )
self.relationships_vdb = self.vector_db_storage_cls( self.relationships_vdb = self.vector_db_storage_cls(
namespace="relationships", namespace="relationships",
embedding_func=self.query_embedding_func, embedding_func=self.embedding_func,
meta_fields={"src_id", "tgt_id"}, meta_fields={"src_id", "tgt_id"},
) )
self.chunks_vdb = self.vector_db_storage_cls( self.chunks_vdb = self.vector_db_storage_cls(
namespace="chunks", namespace="chunks",
embedding_func=self.query_embedding_func, embedding_func=self.embedding_func,
) )
if self.llm_response_cache and hasattr( if self.llm_response_cache and hasattr(