diff --git a/lightrag/kg/chroma_impl.py b/lightrag/kg/chroma_impl.py index 8196cb2a..3b726c8b 100644 --- a/lightrag/kg/chroma_impl.py +++ b/lightrag/kg/chroma_impl.py @@ -108,9 +108,9 @@ class ChromaVectorDBStorage(BaseVectorStorage): raise async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") if not data: - logger.warning("Empty data provided to vector DB") - return [] + return try: ids = list(data.keys()) diff --git a/lightrag/kg/faiss_impl.py b/lightrag/kg/faiss_impl.py index 7c67e212..2ac0899e 100644 --- a/lightrag/kg/faiss_impl.py +++ b/lightrag/kg/faiss_impl.py @@ -79,10 +79,9 @@ class FaissVectorDBStorage(BaseVectorStorage): ... } """ - logger.info(f"Inserting {len(data)} vectors to {self.namespace}") + logger.info(f"Inserting {len(data)} to {self.namespace}") if not data: - logger.warning("You are inserting empty data to the vector DB") - return [] + return current_time = time.time() diff --git a/lightrag/kg/json_doc_status_impl.py b/lightrag/kg/json_doc_status_impl.py index 1a05abc2..b6133a4c 100644 --- a/lightrag/kg/json_doc_status_impl.py +++ b/lightrag/kg/json_doc_status_impl.py @@ -58,6 +58,10 @@ class JsonDocStatusStorage(DocStatusStorage): write_json(self._data, self._file_name) async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + self._data.update(data) await self.index_done_callback() diff --git a/lightrag/kg/json_kv_impl.py b/lightrag/kg/json_kv_impl.py index 7e13dea7..e1ea507a 100644 --- a/lightrag/kg/json_kv_impl.py +++ b/lightrag/kg/json_kv_impl.py @@ -43,6 +43,9 @@ class JsonKVStorage(BaseKVStorage): return set(keys) - set(self._data.keys()) async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return left_data = {k: v for k, v in data.items() if k not in self._data} self._data.update(left_data) diff --git a/lightrag/kg/milvus_impl.py b/lightrag/kg/milvus_impl.py index 833460a8..33a5c12b 100644 --- a/lightrag/kg/milvus_impl.py +++ b/lightrag/kg/milvus_impl.py @@ -75,11 +75,11 @@ class MilvusVectorDBStorage(BaseVectorStorage): ) async def upsert(self, data: dict[str, dict[str, Any]]) -> None: - logger.info(f"Inserting {len(data)} vectors to {self.namespace}") - if not len(data): - logger.warning("You insert an empty data to vector DB") - return [] - list_data = [ + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + + list_data: list[dict[str, Any]] = [ { "id": k, **{k1: v1 for k1, v1 in v.items() if k1 in self.meta_fields}, diff --git a/lightrag/kg/mongo_impl.py b/lightrag/kg/mongo_impl.py index bc427773..0048b384 100644 --- a/lightrag/kg/mongo_impl.py +++ b/lightrag/kg/mongo_impl.py @@ -108,8 +108,12 @@ class MongoKVStorage(BaseKVStorage): return keys - existing_ids async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + if is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE): - update_tasks = [] + update_tasks: list[Any] = [] for mode, items in data.items(): for k, v in items.items(): key = f"{mode}_{k}" @@ -181,7 +185,10 @@ class MongoDocStatusStorage(DocStatusStorage): return data - existing_ids async def upsert(self, data: dict[str, dict[str, Any]]) -> None: - update_tasks = [] + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + update_tasks: list[Any] = [] for k, v in data.items(): data[k]["_id"] = k update_tasks.append( @@ -855,10 +862,9 @@ class MongoVectorDBStorage(BaseVectorStorage): logger.debug("vector index already exist") async def upsert(self, data: dict[str, dict[str, Any]]) -> None: - logger.debug(f"Inserting {len(data)} vectors to {self.namespace}") + logger.info(f"Inserting {len(data)} to {self.namespace}") if not data: - logger.warning("You are inserting an empty data set to vector DB") - return [] + return list_data = [ { diff --git a/lightrag/kg/nano_vector_db_impl.py b/lightrag/kg/nano_vector_db_impl.py index 315b5a8f..b0900095 100644 --- a/lightrag/kg/nano_vector_db_impl.py +++ b/lightrag/kg/nano_vector_db_impl.py @@ -45,10 +45,9 @@ class NanoVectorDBStorage(BaseVectorStorage): ) async def upsert(self, data: dict[str, dict[str, Any]]) -> None: - logger.info(f"Inserting {len(data)} vectors to {self.namespace}") - if not len(data): - logger.warning("You insert an empty data to vector DB") - return [] + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return current_time = time.time() list_data = [ diff --git a/lightrag/kg/oracle_impl.py b/lightrag/kg/oracle_impl.py index 57db7e5b..80c22f38 100644 --- a/lightrag/kg/oracle_impl.py +++ b/lightrag/kg/oracle_impl.py @@ -326,6 +326,10 @@ class OracleKVStorage(BaseKVStorage): ################ INSERT METHODS ################ async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS): list_data = [ { diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 4ffa2fb2..201e7715 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -347,6 +347,10 @@ class PGKVStorage(BaseKVStorage): ################ INSERT METHODS ################ async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS): pass elif is_namespace(self.namespace, NameSpace.KV_STORE_FULL_DOCS): @@ -448,10 +452,10 @@ class PGVectorStorage(BaseVectorStorage): return upsert_sql, data async def upsert(self, data: dict[str, dict[str, Any]]) -> None: - logger.info(f"Inserting {len(data)} vectors to {self.namespace}") - if not len(data): - logger.warning("You insert an empty data to vector DB") - return [] + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + current_time = time.time() list_data = [ { @@ -612,6 +616,10 @@ class PGDocStatusStorage(DocStatusStorage): Args: data: dictionary of document IDs and their status data """ + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return + sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status) values($1,$2,$3,$4,$5,$6,$7) on conflict(id,workspace) do update set diff --git a/lightrag/kg/qdrant_impl.py b/lightrag/kg/qdrant_impl.py index d350e7fa..b08f0b62 100644 --- a/lightrag/kg/qdrant_impl.py +++ b/lightrag/kg/qdrant_impl.py @@ -87,9 +87,9 @@ class QdrantVectorDBStorage(BaseVectorStorage): ) async def upsert(self, data: dict[str, dict[str, Any]]) -> None: - if not len(data): - logger.warning("You insert an empty data to vector DB") - return [] + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return list_data = [ { "id": k, diff --git a/lightrag/kg/redis_impl.py b/lightrag/kg/redis_impl.py index 4bff6c62..7e177346 100644 --- a/lightrag/kg/redis_impl.py +++ b/lightrag/kg/redis_impl.py @@ -49,6 +49,9 @@ class RedisKVStorage(BaseKVStorage): return set(keys) - existing_ids async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return pipe = self._redis.pipeline() for k, v in data.items(): diff --git a/lightrag/kg/tidb_impl.py b/lightrag/kg/tidb_impl.py index 8b8fa2b6..4adb0141 100644 --- a/lightrag/kg/tidb_impl.py +++ b/lightrag/kg/tidb_impl.py @@ -211,6 +211,9 @@ class TiDBKVStorage(BaseKVStorage): ################ INSERT full_doc AND chunks ################ async def upsert(self, data: dict[str, dict[str, Any]]) -> None: + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return left_data = {k: v for k, v in data.items() if k not in self._data} self._data.update(left_data) if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS): @@ -318,12 +321,12 @@ class TiDBVectorDBStorage(BaseVectorStorage): ###### INSERT entities And relationships ###### async def upsert(self, data: dict[str, dict[str, Any]]) -> None: - # ignore, upsert in TiDBKVStorage already - if not len(data): - logger.warning("You insert an empty data to vector DB") - return [] + logger.info(f"Inserting {len(data)} to {self.namespace}") + if not data: + return if is_namespace(self.namespace, NameSpace.VECTOR_STORE_CHUNKS): - return [] + return + logger.info(f"Inserting {len(data)} vectors to {self.namespace}") list_data = [