diff --git a/lightrag/llm.py b/lightrag/llm.py index 25792d53..0c17019a 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -15,7 +15,7 @@ from openai import ( AsyncOpenAI, APIConnectionError, RateLimitError, - Timeout, + APITimeoutError, AsyncAzureOpenAI, ) from pydantic import BaseModel, Field @@ -47,7 +47,9 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false" @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), - retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), + retry=retry_if_exception_type( + (RateLimitError, APIConnectionError, APITimeoutError) + ), ) async def openai_complete_if_cache( model, @@ -108,7 +110,9 @@ async def openai_complete_if_cache( @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), - retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), + retry=retry_if_exception_type( + (RateLimitError, APIConnectionError, APIConnectionError) + ), ) async def azure_openai_complete_if_cache( model, @@ -259,7 +263,9 @@ def initialize_hf_model(model_name): @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), - retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), + retry=retry_if_exception_type( + (RateLimitError, APIConnectionError, APITimeoutError) + ), ) async def hf_model_if_cache( model, @@ -326,7 +332,9 @@ async def hf_model_if_cache( @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), - retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), + retry=retry_if_exception_type( + (RateLimitError, APIConnectionError, APITimeoutError) + ), ) async def ollama_model_if_cache( model, @@ -444,7 +452,9 @@ def initialize_lmdeploy_pipeline( @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), - retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), + retry=retry_if_exception_type( + (RateLimitError, APIConnectionError, APITimeoutError) + ), ) async def lmdeploy_model_if_cache( model, @@ -704,7 +714,9 @@ async def lollms_model_complete( @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), - retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), + retry=retry_if_exception_type( + (RateLimitError, APIConnectionError, APITimeoutError) + ), ) async def zhipu_complete_if_cache( prompt: Union[str, List[Dict[str, str]]], @@ -834,7 +846,9 @@ async def zhipu_complete( @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), - retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), + retry=retry_if_exception_type( + (RateLimitError, APIConnectionError, APITimeoutError) + ), ) async def zhipu_embedding( texts: list[str], model: str = "embedding-3", api_key: str = None, **kwargs @@ -870,7 +884,9 @@ async def zhipu_embedding( @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), - retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), + retry=retry_if_exception_type( + (RateLimitError, APIConnectionError, APITimeoutError) + ), ) async def openai_embedding( texts: list[str], @@ -928,7 +944,9 @@ async def jina_embedding( @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), - retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), + retry=retry_if_exception_type( + (RateLimitError, APIConnectionError, APITimeoutError) + ), ) async def nvidia_openai_embedding( texts: list[str], @@ -959,7 +977,9 @@ async def nvidia_openai_embedding( @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10), - retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), + retry=retry_if_exception_type( + (RateLimitError, APIConnectionError, APITimeoutError) + ), ) async def azure_openai_embedding( texts: list[str], @@ -990,7 +1010,9 @@ async def azure_openai_embedding( @retry( stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=60), - retry=retry_if_exception_type((RateLimitError, APIConnectionError, Timeout)), + retry=retry_if_exception_type( + (RateLimitError, APIConnectionError, APITimeoutError) + ), ) async def siliconcloud_embedding( texts: list[str],