added timeout

This commit is contained in:
Saifeddine ALOUI
2025-01-10 21:39:25 +01:00
parent 2297007b7b
commit adb288c5bb
2 changed files with 26 additions and 1 deletions

View File

@@ -101,6 +101,12 @@ def parse_args():
help="Embedding model name (default: bge-m3:latest)", help="Embedding model name (default: bge-m3:latest)",
) )
parser.add_argument(
"--timeout",
default=300,
help="Timeout is seconds (useful when using slow AI)",
)
# RAG configuration # RAG configuration
parser.add_argument( parser.add_argument(
"--max-async", type=int, default=4, help="Maximum async operations (default: 4)" "--max-async", type=int, default=4, help="Maximum async operations (default: 4)"
@@ -139,6 +145,22 @@ def parse_args():
default=None, default=None,
) )
# Optional https parameters
parser.add_argument(
"--ssl",
action="store_true",
help="Enable HTTPS (default: False)"
)
parser.add_argument(
"--ssl-certfile",
default=None,
help="Path to SSL certificate file (required if --ssl is enabled)"
)
parser.add_argument(
"--ssl-keyfile",
default=None,
help="Path to SSL private key file (required if --ssl is enabled)"
)
return parser.parse_args() return parser.parse_args()
@@ -284,6 +306,7 @@ def create_app(args):
llm_model_max_token_size=args.max_tokens, llm_model_max_token_size=args.max_tokens,
llm_model_kwargs={ llm_model_kwargs={
"host": args.llm_binding_host, "host": args.llm_binding_host,
"timeout":args.timeout
"options": {"num_ctx": args.max_tokens}, "options": {"num_ctx": args.max_tokens},
}, },
embedding_func=EmbeddingFunc( embedding_func=EmbeddingFunc(

View File

@@ -336,6 +336,7 @@ async def hf_model_if_cache(
(RateLimitError, APIConnectionError, APITimeoutError) (RateLimitError, APIConnectionError, APITimeoutError)
), ),
) )
async def ollama_model_if_cache( async def ollama_model_if_cache(
model, model,
prompt, prompt,
@@ -406,8 +407,9 @@ async def lollms_model_if_cache(
full_prompt += prompt full_prompt += prompt
request_data["prompt"] = full_prompt request_data["prompt"] = full_prompt
timeout = aiohttp.ClientTimeout(total=kwargs.get("timeout", 300)) # 300 seconds = 5 minutes
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=timeout) as session:
if stream: if stream:
async def inner(): async def inner():