From eb1fc0dae797e3a52fd921eadff665db4f068d80 Mon Sep 17 00:00:00 2001
From: chenzihong <522023320011@smail.nju.edu.cn>
Date: Mon, 30 Dec 2024 01:46:15 +0800
Subject: [PATCH 1/3] fix: change exception type

---
 lightrag/llm.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/lightrag/llm.py b/lightrag/llm.py
index 25792d53..69cd16f1 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -15,7 +15,7 @@ from openai import (
     AsyncOpenAI,
     APIConnectionError,
     RateLimitError,
-    Timeout,
+    APITimeoutError,
     AsyncAzureOpenAI,
 )
 from pydantic import BaseModel, Field
@@ -47,7 +47,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
 )
 async def openai_complete_if_cache(
     model,
@@ -108,7 +108,7 @@ async def openai_complete_if_cache(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APIConnectionError)),
 )
 async def azure_openai_complete_if_cache(
     model,
@@ -259,7 +259,7 @@ def initialize_hf_model(model_name):
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
 )
 async def hf_model_if_cache(
     model,
@@ -326,7 +326,7 @@ async def hf_model_if_cache(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
 )
 async def ollama_model_if_cache(
     model,
@@ -444,7 +444,7 @@ def initialize_lmdeploy_pipeline(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
 )
 async def lmdeploy_model_if_cache(
     model,
@@ -704,7 +704,7 @@ async def lollms_model_complete(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
 )
 async def zhipu_complete_if_cache(
     prompt: Union[str, List[Dict[str, str]]],
@@ -834,7 +834,7 @@ async def zhipu_complete(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
 )
 async def zhipu_embedding(
     texts: list[str], model: str = "embedding-3", api_key: str = None, **kwargs
@@ -870,7 +870,7 @@ async def zhipu_embedding(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
 )
 async def openai_embedding(
     texts: list[str],
@@ -928,7 +928,7 @@ async def jina_embedding(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
 )
 async def nvidia_openai_embedding(
     texts: list[str],
@@ -959,7 +959,7 @@ async def nvidia_openai_embedding(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
 )
 async def azure_openai_embedding(
     texts: list[str],
@@ -990,7 +990,7 @@ async def azure_openai_embedding(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)),
+    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
 )
 async def siliconcloud_embedding(
     texts: list[str],

From 7271ac69b7394aa9ceb2499bbd6010fc0393d446 Mon Sep 17 00:00:00 2001
From: Magic_yuan <317617749@qq.com>
Date: Mon, 30 Dec 2024 20:32:10 +0800
Subject: [PATCH 2/3] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E6=96=AD=E7=82=B9?=
 =?UTF-8?q?=E7=BB=AD=E4=BC=A0bug=EF=BC=8C=E8=BF=87=E6=BB=A4=E5=B7=B2?=
 =?UTF-8?q?=E7=BB=8F=E5=AD=98=E5=9C=A8=E7=9A=84=E6=96=87=E6=A1=A3=E6=98=AF?=
 =?UTF-8?q?=E5=8F=AA=E8=BF=87=E6=BB=A4=E7=8A=B6=E6=80=81=E6=98=AF=E5=B7=B2?=
 =?UTF-8?q?=E7=BB=8F=E5=A4=84=E7=90=86=E5=AE=8C=E6=88=90=E7=9A=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 lightrag/storage.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/lightrag/storage.py b/lightrag/storage.py
index 4c862dbe..ac8a95d3 100644
--- a/lightrag/storage.py
+++ b/lightrag/storage.py
@@ -341,8 +341,14 @@ class JsonDocStatusStorage(DocStatusStorage):
         logger.info(f"Loaded document status storage with {len(self._data)} records")
 
     async def filter_keys(self, data: list[str]) -> set[str]:
-        """Return keys that don't exist in storage"""
-        return set([k for k in data if k not in self._data])
+        """Return keys that should be processed (not in storage or not successfully processed)"""
+        return set(
+            [
+                k
+                for k in data
+                if k not in self._data or self._data[k]["status"] != DocStatus.PROCESSED
+            ]
+        )
 
     async def get_status_counts(self) -> Dict[str, int]:
         """Get counts of documents in each status"""

From 648645ef455d3bbccb36a1e6ecb12dd3d779b3fe Mon Sep 17 00:00:00 2001
From: chenzihong <522023320011@smail.nju.edu.cn>
Date: Tue, 31 Dec 2024 01:33:14 +0800
Subject: [PATCH 3/3] fix: fix formatting issues

---
 lightrag/llm.py | 44 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/lightrag/llm.py b/lightrag/llm.py
index 69cd16f1..0c17019a 100644
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -47,7 +47,9 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
+    retry=retry_if_exception_type(
+        (RateLimitError, APIConnectionError, APITimeoutError)
+    ),
 )
 async def openai_complete_if_cache(
     model,
@@ -108,7 +110,9 @@ async def openai_complete_if_cache(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APIConnectionError)),
+    retry=retry_if_exception_type(
+        (RateLimitError, APIConnectionError, APIConnectionError)
+    ),
 )
 async def azure_openai_complete_if_cache(
     model,
@@ -259,7 +263,9 @@ def initialize_hf_model(model_name):
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
+    retry=retry_if_exception_type(
+        (RateLimitError, APIConnectionError, APITimeoutError)
+    ),
 )
 async def hf_model_if_cache(
     model,
@@ -326,7 +332,9 @@ async def hf_model_if_cache(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
+    retry=retry_if_exception_type(
+        (RateLimitError, APIConnectionError, APITimeoutError)
+    ),
 )
 async def ollama_model_if_cache(
     model,
@@ -444,7 +452,9 @@ def initialize_lmdeploy_pipeline(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
+    retry=retry_if_exception_type(
+        (RateLimitError, APIConnectionError, APITimeoutError)
+    ),
 )
 async def lmdeploy_model_if_cache(
     model,
@@ -704,7 +714,9 @@ async def lollms_model_complete(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
+    retry=retry_if_exception_type(
+        (RateLimitError, APIConnectionError, APITimeoutError)
+    ),
 )
 async def zhipu_complete_if_cache(
     prompt: Union[str, List[Dict[str, str]]],
@@ -834,7 +846,9 @@ async def zhipu_complete(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
+    retry=retry_if_exception_type(
+        (RateLimitError, APIConnectionError, APITimeoutError)
+    ),
 )
 async def zhipu_embedding(
     texts: list[str], model: str = "embedding-3", api_key: str = None, **kwargs
@@ -870,7 +884,9 @@ async def zhipu_embedding(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
+    retry=retry_if_exception_type(
+        (RateLimitError, APIConnectionError, APITimeoutError)
+    ),
 )
 async def openai_embedding(
     texts: list[str],
@@ -928,7 +944,9 @@ async def jina_embedding(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
+    retry=retry_if_exception_type(
+        (RateLimitError, APIConnectionError, APITimeoutError)
+    ),
 )
 async def nvidia_openai_embedding(
     texts: list[str],
@@ -959,7 +977,9 @@ async def nvidia_openai_embedding(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=10),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
+    retry=retry_if_exception_type(
+        (RateLimitError, APIConnectionError, APITimeoutError)
+    ),
 )
 async def azure_openai_embedding(
     texts: list[str],
@@ -990,7 +1010,9 @@ async def azure_openai_embedding(
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_exponential(multiplier=1, min=4, max=60),
-    retry=retry_if_exception_type((RateLimitError, APIConnectionError, APITimeoutError)),
+    retry=retry_if_exception_type(
+        (RateLimitError, APIConnectionError, APITimeoutError)
+    ),
 )
 async def siliconcloud_embedding(
     texts: list[str],