From 7271ac69b7394aa9ceb2499bbd6010fc0393d446 Mon Sep 17 00:00:00 2001 From: Magic_yuan <317617749@qq.com> Date: Mon, 30 Dec 2024 20:32:10 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=96=AD=E7=82=B9=E7=BB=AD?= =?UTF-8?q?=E4=BC=A0bug=EF=BC=8C=E8=BF=87=E6=BB=A4=E5=B7=B2=E7=BB=8F?= =?UTF-8?q?=E5=AD=98=E5=9C=A8=E7=9A=84=E6=96=87=E6=A1=A3=E6=98=AF=E5=8F=AA?= =?UTF-8?q?=E8=BF=87=E6=BB=A4=E7=8A=B6=E6=80=81=E6=98=AF=E5=B7=B2=E7=BB=8F?= =?UTF-8?q?=E5=A4=84=E7=90=86=E5=AE=8C=E6=88=90=E7=9A=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lightrag/storage.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/lightrag/storage.py b/lightrag/storage.py index 4c862dbe..ac8a95d3 100644 --- a/lightrag/storage.py +++ b/lightrag/storage.py @@ -341,8 +341,14 @@ class JsonDocStatusStorage(DocStatusStorage): logger.info(f"Loaded document status storage with {len(self._data)} records") async def filter_keys(self, data: list[str]) -> set[str]: - """Return keys that don't exist in storage""" - return set([k for k in data if k not in self._data]) + """Return keys that should be processed (not in storage or not successfully processed)""" + return set( + [ + k + for k in data + if k not in self._data or self._data[k]["status"] != DocStatus.PROCESSED + ] + ) async def get_status_counts(self) -> Dict[str, int]: """Get counts of documents in each status"""