Only update storage if there was something to insert

Before, the `finally` block would always call `_insert_done()`, which writes out the `vdb_*` and `kv_store_*` files ... even if there was nothing to insert (because all docs had already been inserted).  This was causing the speed of skippable inserts to become very slow as the graph grew.
This commit is contained in:
Rick Battle
2024-11-12 09:30:21 -07:00
committed by GitHub
parent dcf2641147
commit d4a27c901e

View File

@@ -227,6 +227,7 @@ class LightRAG:
return loop.run_until_complete(self.ainsert(string_or_strings)) return loop.run_until_complete(self.ainsert(string_or_strings))
async def ainsert(self, string_or_strings): async def ainsert(self, string_or_strings):
update_storage = False
try: try:
if isinstance(string_or_strings, str): if isinstance(string_or_strings, str):
string_or_strings = [string_or_strings] string_or_strings = [string_or_strings]
@@ -240,6 +241,7 @@ class LightRAG:
if not len(new_docs): if not len(new_docs):
logger.warning("All docs are already in the storage") logger.warning("All docs are already in the storage")
return return
update_storage = True
logger.info(f"[New Docs] inserting {len(new_docs)} docs") logger.info(f"[New Docs] inserting {len(new_docs)} docs")
inserting_chunks = {} inserting_chunks = {}
@@ -286,6 +288,7 @@ class LightRAG:
await self.full_docs.upsert(new_docs) await self.full_docs.upsert(new_docs)
await self.text_chunks.upsert(inserting_chunks) await self.text_chunks.upsert(inserting_chunks)
finally: finally:
if update_storage:
await self._insert_done() await self._insert_done()
async def _insert_done(self): async def _insert_done(self):