From 9277fe8c29187cdca009d88267632e974faf6b62 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Wed, 19 Feb 2025 22:22:41 +0100 Subject: [PATCH] fixed return --- lightrag/kg/chroma_impl.py | 4 ++-- lightrag/kg/faiss_impl.py | 5 ++--- lightrag/kg/json_doc_status_impl.py | 4 ++++ lightrag/kg/json_kv_impl.py | 3 +++ lightrag/kg/milvus_impl.py | 10 +++++----- lightrag/kg/mongo_impl.py | 16 +++++++++++----- lightrag/kg/nano_vector_db_impl.py | 7 +++---- lightrag/kg/oracle_impl.py | 4 ++++ lightrag/kg/postgres_impl.py | 16 ++++++++++++---- lightrag/kg/qdrant_impl.py | 6 +++--- lightrag/kg/redis_impl.py | 3 +++ lightrag/kg/tidb_impl.py | 13 ++++++++----- 12 files changed, 60 insertions(+), 31 deletions(-) diff --git a/lightrag/kg/chroma_impl.py b/lightrag/kg/chroma_impl.py index 62a9b601..5befd8d0 100644 --- a/lightrag/kg/chroma_impl.py +++ b/lightrag/kg/chroma_impl.py @@ -113,9 +113,9 @@ class ChromaVectorDBStorage(BaseVectorStorage): raise async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") if not data: - logger.warning("Empty data provided to vector DB") - return [] + return try: ids = list(data.keys()) diff --git a/lightrag/kg/faiss_impl.py b/lightrag/kg/faiss_impl.py index 75abf389..e8f8206d 100644 --- a/lightrag/kg/faiss_impl.py +++ b/lightrag/kg/faiss_impl.py @@ -84,10 +84,9 @@ class FaissVectorDBStorage(BaseVectorStorage): ... } """ - logger.info(f"Inserting {len(data)} vectors to {self.namespace}") + logger.info(f"Inserting {len(data)} to {self.namespace}") if not data: - logger.warning("You are inserting empty data to the vector DB") - return [] + return current_time = time.time() diff --git a/lightrag/kg/json_doc_status_impl.py b/lightrag/kg/json_doc_status_impl.py index 1a05abc2..b6133a4c 100644 --- a/lightrag/kg/json_doc_status_impl.py +++ b/lightrag/kg/json_doc_status_impl.py @@ -58,6 +58,10 @@ class JsonDocStatusStorage(DocStatusStorage): write_json(self._data, self._file_name) async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + self._data.update(data) await self.index_done_callback() diff --git a/lightrag/kg/json_kv_impl.py b/lightrag/kg/json_kv_impl.py index 7e13dea7..e1ea507a 100644 --- a/lightrag/kg/json_kv_impl.py +++ b/lightrag/kg/json_kv_impl.py @@ -43,6 +43,9 @@ class JsonKVStorage(BaseKVStorage): return set(keys) - set(self._data.keys()) async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return left_data = {k: v for k, v in data.items() if k not in self._data} self._data.update(left_data) diff --git a/lightrag/kg/milvus_impl.py b/lightrag/kg/milvus_impl.py index b1746514..342fd0a8 100644 --- a/lightrag/kg/milvus_impl.py +++ b/lightrag/kg/milvus_impl.py @@ -80,11 +80,11 @@ class MilvusVectorDBStorage(BaseVectorStorage): ) async def upsert(self, data: dict[str, dict[str, Any]]) -> None: - logger.info(f"Inserting {len(data)} vectors to {self.namespace}") - if not len(data): - logger.warning("You insert an empty data to vector DB") - return [] - list_data = [ + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + + list_data: list[dict[str, Any]] = [ { "id": k, **{k1: v1 for k1, v1 in v.items() if k1 in self.meta_fields}, diff --git a/lightrag/kg/mongo_impl.py b/lightrag/kg/mongo_impl.py index a6e6edfd..f6a25ba6 100644 --- a/lightrag/kg/mongo_impl.py +++ b/lightrag/kg/mongo_impl.py @@ -113,8 +113,12 @@ class MongoKVStorage(BaseKVStorage): return keys - existing_ids async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + if is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE): - update_tasks = [] + update_tasks: list[Any] = [] for mode, items in data.items(): for k, v in items.items(): key = f"{mode}_{k}" @@ -186,7 +190,10 @@ class MongoDocStatusStorage(DocStatusStorage): return data - existing_ids async def upsert(self, data: dict[str, dict[str, Any]]) -> None: - update_tasks = [] + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + update_tasks: list[Any] = [] for k, v in data.items(): data[k]["_id"] = k update_tasks.append( @@ -860,10 +867,9 @@ class MongoVectorDBStorage(BaseVectorStorage): logger.debug("vector index already exist") async def upsert(self, data: dict[str, dict[str, Any]]) -> None: - logger.debug(f"Inserting {len(data)} vectors to {self.namespace}") + logger.info(f"Inserting {len(data)} to {self.namespace}") if not data: - logger.warning("You are inserting an empty data set to vector DB") - return [] + return list_data = [ { diff --git a/lightrag/kg/nano_vector_db_impl.py b/lightrag/kg/nano_vector_db_impl.py index b246790b..7462bd7c 100644 --- a/lightrag/kg/nano_vector_db_impl.py +++ b/lightrag/kg/nano_vector_db_impl.py @@ -50,10 +50,9 @@ class NanoVectorDBStorage(BaseVectorStorage): ) async def upsert(self, data: dict[str, dict[str, Any]]) -> None: - logger.info(f"Inserting {len(data)} vectors to {self.namespace}") - if not len(data): - logger.warning("You insert an empty data to vector DB") - return [] + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return current_time = time.time() list_data = [ diff --git a/lightrag/kg/oracle_impl.py b/lightrag/kg/oracle_impl.py index 0916f6b0..014726fb 100644 --- a/lightrag/kg/oracle_impl.py +++ b/lightrag/kg/oracle_impl.py @@ -332,6 +332,10 @@ class OracleKVStorage(BaseKVStorage): ################ INSERT METHODS ################ async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS): list_data = [ { diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index a9c4b3b7..ad7c4b5e 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -353,6 +353,10 @@ class PGKVStorage(BaseKVStorage): ################ INSERT METHODS ################ async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS): pass elif is_namespace(self.namespace, NameSpace.KV_STORE_FULL_DOCS): @@ -454,10 +458,10 @@ class PGVectorStorage(BaseVectorStorage): return upsert_sql, data async def upsert(self, data: dict[str, dict[str, Any]]) -> None: - logger.info(f"Inserting {len(data)} vectors to {self.namespace}") - if not len(data): - logger.warning("You insert an empty data to vector DB") - return [] + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + current_time = time.time() list_data = [ { @@ -618,6 +622,10 @@ class PGDocStatusStorage(DocStatusStorage): Args: data: dictionary of document IDs and their status data """ + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status) values($1,$2,$3,$4,$5,$6,$7) on conflict(id,workspace) do update set diff --git a/lightrag/kg/qdrant_impl.py b/lightrag/kg/qdrant_impl.py index f9edc510..d54b2408 100644 --- a/lightrag/kg/qdrant_impl.py +++ b/lightrag/kg/qdrant_impl.py @@ -93,9 +93,9 @@ class QdrantVectorDBStorage(BaseVectorStorage): ) async def upsert(self, data: dict[str, dict[str, Any]]) -> None: - if not len(data): - logger.warning("You insert an empty data to vector DB") - return [] + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return list_data = [ { "id": k, diff --git a/lightrag/kg/redis_impl.py b/lightrag/kg/redis_impl.py index 4bff6c62..7e177346 100644 --- a/lightrag/kg/redis_impl.py +++ b/lightrag/kg/redis_impl.py @@ -49,6 +49,9 @@ class RedisKVStorage(BaseKVStorage): return set(keys) - existing_ids async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return pipe = self._redis.pipeline() for k, v in data.items(): diff --git a/lightrag/kg/tidb_impl.py b/lightrag/kg/tidb_impl.py index ed9c8d4b..4266d07c 100644 --- a/lightrag/kg/tidb_impl.py +++ b/lightrag/kg/tidb_impl.py @@ -217,6 +217,9 @@ class TiDBKVStorage(BaseKVStorage): ################ INSERT full_doc AND chunks ################ async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return left_data = {k: v for k, v in data.items() if k not in self._data} self._data.update(left_data) if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS): @@ -324,12 +327,12 @@ class TiDBVectorDBStorage(BaseVectorStorage): ###### INSERT entities And relationships ###### async def upsert(self, data: dict[str, dict[str, Any]]) -> None: - # ignore, upsert in TiDBKVStorage already - if not len(data): - logger.warning("You insert an empty data to vector DB") - return [] + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return if is_namespace(self.namespace, NameSpace.VECTOR_STORE_CHUNKS): - return [] + return + logger.info(f"Inserting {len(data)} vectors to {self.namespace}") list_data = [