Merge pull request #883 from YanSte/fix-return-none

Optimised returns
This commit is contained in:
Yannick Stephan
2025-02-19 22:24:50 +01:00
committed by GitHub
12 changed files with 60 additions and 31 deletions

View File

@@ -108,9 +108,9 @@ class ChromaVectorDBStorage(BaseVectorStorage):
raise raise
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data: if not data:
logger.warning("Empty data provided to vector DB") return
return []
try: try:
ids = list(data.keys()) ids = list(data.keys())

View File

@@ -79,10 +79,9 @@ class FaissVectorDBStorage(BaseVectorStorage):
... ...
} }
""" """
logger.info(f"Inserting {len(data)} vectors to {self.namespace}") logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data: if not data:
logger.warning("You are inserting empty data to the vector DB") return
return []
current_time = time.time() current_time = time.time()

View File

@@ -58,6 +58,10 @@ class JsonDocStatusStorage(DocStatusStorage):
write_json(self._data, self._file_name) write_json(self._data, self._file_name)
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
self._data.update(data) self._data.update(data)
await self.index_done_callback() await self.index_done_callback()

View File

@@ -43,6 +43,9 @@ class JsonKVStorage(BaseKVStorage):
return set(keys) - set(self._data.keys()) return set(keys) - set(self._data.keys())
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
left_data = {k: v for k, v in data.items() if k not in self._data} left_data = {k: v for k, v in data.items() if k not in self._data}
self._data.update(left_data) self._data.update(left_data)

View File

@@ -75,11 +75,11 @@ class MilvusVectorDBStorage(BaseVectorStorage):
) )
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} vectors to {self.namespace}") logger.info(f"Inserting {len(data)} to {self.namespace}")
if not len(data): if not data:
logger.warning("You insert an empty data to vector DB") return
return []
list_data = [ list_data: list[dict[str, Any]] = [
{ {
"id": k, "id": k,
**{k1: v1 for k1, v1 in v.items() if k1 in self.meta_fields}, **{k1: v1 for k1, v1 in v.items() if k1 in self.meta_fields},

View File

@@ -108,8 +108,12 @@ class MongoKVStorage(BaseKVStorage):
return keys - existing_ids return keys - existing_ids
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
if is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE): if is_namespace(self.namespace, NameSpace.KV_STORE_LLM_RESPONSE_CACHE):
update_tasks = [] update_tasks: list[Any] = []
for mode, items in data.items(): for mode, items in data.items():
for k, v in items.items(): for k, v in items.items():
key = f"{mode}_{k}" key = f"{mode}_{k}"
@@ -181,7 +185,10 @@ class MongoDocStatusStorage(DocStatusStorage):
return data - existing_ids return data - existing_ids
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
update_tasks = [] logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
update_tasks: list[Any] = []
for k, v in data.items(): for k, v in data.items():
data[k]["_id"] = k data[k]["_id"] = k
update_tasks.append( update_tasks.append(
@@ -855,10 +862,9 @@ class MongoVectorDBStorage(BaseVectorStorage):
logger.debug("vector index already exist") logger.debug("vector index already exist")
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.debug(f"Inserting {len(data)} vectors to {self.namespace}") logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data: if not data:
logger.warning("You are inserting an empty data set to vector DB") return
return []
list_data = [ list_data = [
{ {

View File

@@ -45,10 +45,9 @@ class NanoVectorDBStorage(BaseVectorStorage):
) )
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} vectors to {self.namespace}") logger.info(f"Inserting {len(data)} to {self.namespace}")
if not len(data): if not data:
logger.warning("You insert an empty data to vector DB") return
return []
current_time = time.time() current_time = time.time()
list_data = [ list_data = [

View File

@@ -326,6 +326,10 @@ class OracleKVStorage(BaseKVStorage):
################ INSERT METHODS ################ ################ INSERT METHODS ################
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS): if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
list_data = [ list_data = [
{ {

View File

@@ -347,6 +347,10 @@ class PGKVStorage(BaseKVStorage):
################ INSERT METHODS ################ ################ INSERT METHODS ################
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS): if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
pass pass
elif is_namespace(self.namespace, NameSpace.KV_STORE_FULL_DOCS): elif is_namespace(self.namespace, NameSpace.KV_STORE_FULL_DOCS):
@@ -448,10 +452,10 @@ class PGVectorStorage(BaseVectorStorage):
return upsert_sql, data return upsert_sql, data
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} vectors to {self.namespace}") logger.info(f"Inserting {len(data)} to {self.namespace}")
if not len(data): if not data:
logger.warning("You insert an empty data to vector DB") return
return []
current_time = time.time() current_time = time.time()
list_data = [ list_data = [
{ {
@@ -612,6 +616,10 @@ class PGDocStatusStorage(DocStatusStorage):
Args: Args:
data: dictionary of document IDs and their status data data: dictionary of document IDs and their status data
""" """
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status) sql = """insert into LIGHTRAG_DOC_STATUS(workspace,id,content,content_summary,content_length,chunks_count,status)
values($1,$2,$3,$4,$5,$6,$7) values($1,$2,$3,$4,$5,$6,$7)
on conflict(id,workspace) do update set on conflict(id,workspace) do update set

View File

@@ -87,9 +87,9 @@ class QdrantVectorDBStorage(BaseVectorStorage):
) )
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
if not len(data): logger.info(f"Inserting {len(data)} to {self.namespace}")
logger.warning("You insert an empty data to vector DB") if not data:
return [] return
list_data = [ list_data = [
{ {
"id": k, "id": k,

View File

@@ -49,6 +49,9 @@ class RedisKVStorage(BaseKVStorage):
return set(keys) - existing_ids return set(keys) - existing_ids
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
pipe = self._redis.pipeline() pipe = self._redis.pipeline()
for k, v in data.items(): for k, v in data.items():

View File

@@ -211,6 +211,9 @@ class TiDBKVStorage(BaseKVStorage):
################ INSERT full_doc AND chunks ################ ################ INSERT full_doc AND chunks ################
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
logger.info(f"Inserting {len(data)} to {self.namespace}")
if not data:
return
left_data = {k: v for k, v in data.items() if k not in self._data} left_data = {k: v for k, v in data.items() if k not in self._data}
self._data.update(left_data) self._data.update(left_data)
if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS): if is_namespace(self.namespace, NameSpace.KV_STORE_TEXT_CHUNKS):
@@ -318,12 +321,12 @@ class TiDBVectorDBStorage(BaseVectorStorage):
###### INSERT entities And relationships ###### ###### INSERT entities And relationships ######
async def upsert(self, data: dict[str, dict[str, Any]]) -> None: async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
# ignore, upsert in TiDBKVStorage already logger.info(f"Inserting {len(data)} to {self.namespace}")
if not len(data): if not data:
logger.warning("You insert an empty data to vector DB") return
return []
if is_namespace(self.namespace, NameSpace.VECTOR_STORE_CHUNKS): if is_namespace(self.namespace, NameSpace.VECTOR_STORE_CHUNKS):
return [] return
logger.info(f"Inserting {len(data)} vectors to {self.namespace}") logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
list_data = [ list_data = [