From d0a4ef252ee480579777cdfda164651e0118bdf6 Mon Sep 17 00:00:00 2001 From: david Date: Tue, 10 Dec 2024 09:00:22 +0800 Subject: [PATCH] fix: rare embedding issue. --- lightrag/storage.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/lightrag/storage.py b/lightrag/storage.py index 007d6534..4c043893 100644 --- a/lightrag/storage.py +++ b/lightrag/storage.py @@ -107,10 +107,16 @@ class NanoVectorDBStorage(BaseVectorStorage): embeddings = await f embeddings_list.append(embeddings) embeddings = np.concatenate(embeddings_list) - for i, d in enumerate(list_data): - d["__vector__"] = embeddings[i] - results = self._client.upsert(datas=list_data) - return results + if len(embeddings) == len(list_data): + for i, d in enumerate(list_data): + d["__vector__"] = embeddings[i] + results = self._client.upsert(datas=list_data) + return results + else: + # sometimes the embedding is not returned correctly. just log it. + logger.error( + f"embedding is not 1-1 with data, {len(embeddings)} != {len(list_data)}" + ) async def query(self, query: str, top_k=5): embedding = await self.embedding_func([query])