From 83f8a5139c8f076bcb893eb91ac4f0c660364fb6 Mon Sep 17 00:00:00 2001 From: Rick Battle Date: Tue, 12 Nov 2024 09:30:21 -0700 Subject: [PATCH] Only update storage if there was something to insert Before, the `finally` block would always call `_insert_done()`, which writes out the `vdb_*` and `kv_store_*` files ... even if there was nothing to insert (because all docs had already been inserted). This was causing the speed of skippable inserts to become very slow as the graph grew. --- lightrag/lightrag.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 50e33405..67337098 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -227,6 +227,7 @@ class LightRAG: return loop.run_until_complete(self.ainsert(string_or_strings)) async def ainsert(self, string_or_strings): + update_storage = False try: if isinstance(string_or_strings, str): string_or_strings = [string_or_strings] @@ -240,6 +241,7 @@ class LightRAG: if not len(new_docs): logger.warning("All docs are already in the storage") return + update_storage = True logger.info(f"[New Docs] inserting {len(new_docs)} docs") inserting_chunks = {} @@ -286,7 +288,8 @@ class LightRAG: await self.full_docs.upsert(new_docs) await self.text_chunks.upsert(inserting_chunks) finally: - await self._insert_done() + if update_storage: + await self._insert_done() async def _insert_done(self): tasks = []