Merge pull request #712 from danielaskdd/handle-stream-cancel-error
Improve LLM Error Handling for API Server
This commit is contained in:
@@ -12,7 +12,7 @@ from fastapi import (
|
||||
# Add this to store progress globally
|
||||
from typing import Dict
|
||||
import threading
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
|
||||
@@ -1725,11 +1725,11 @@ def create_app(args):
|
||||
)
|
||||
|
||||
async def stream_generator():
|
||||
try:
|
||||
first_chunk_time = None
|
||||
last_chunk_time = None
|
||||
total_response = ""
|
||||
|
||||
try:
|
||||
# Ensure response is an async generator
|
||||
if isinstance(response, str):
|
||||
# If it's a string, send in two parts
|
||||
@@ -1767,6 +1767,7 @@ def create_app(args):
|
||||
}
|
||||
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
||||
else:
|
||||
try:
|
||||
async for chunk in response:
|
||||
if chunk:
|
||||
if first_chunk_time is None:
|
||||
@@ -1786,7 +1787,38 @@ def create_app(args):
|
||||
"done": False,
|
||||
}
|
||||
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
||||
except (asyncio.CancelledError, Exception) as e:
|
||||
error_msg = str(e)
|
||||
if isinstance(e, asyncio.CancelledError):
|
||||
error_msg = "Stream was cancelled by server"
|
||||
else:
|
||||
error_msg = f"Provider error: {error_msg}"
|
||||
|
||||
logging.error(f"Stream error: {error_msg}")
|
||||
|
||||
# Send error message to client
|
||||
error_data = {
|
||||
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
||||
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": f"\n\nError: {error_msg}",
|
||||
"images": None,
|
||||
},
|
||||
"done": False,
|
||||
}
|
||||
yield f"{json.dumps(error_data, ensure_ascii=False)}\n"
|
||||
|
||||
# Send final message to close the stream
|
||||
final_data = {
|
||||
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
||||
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
||||
"done": True,
|
||||
}
|
||||
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
|
||||
return
|
||||
|
||||
if last_chunk_time is not None:
|
||||
completion_tokens = estimate_tokens(total_response)
|
||||
total_time = last_chunk_time - start_time
|
||||
prompt_eval_time = first_chunk_time - start_time
|
||||
@@ -1804,10 +1836,27 @@ def create_app(args):
|
||||
"eval_duration": eval_time,
|
||||
}
|
||||
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
||||
return # Ensure the generator ends immediately after sending the completion marker
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error in stream_generator: {str(e)}")
|
||||
raise
|
||||
error_msg = f"Error in stream_generator: {str(e)}"
|
||||
logging.error(error_msg)
|
||||
|
||||
# Send error message to client
|
||||
error_data = {
|
||||
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
||||
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
||||
"error": {"code": "STREAM_ERROR", "message": error_msg},
|
||||
}
|
||||
yield f"{json.dumps(error_data, ensure_ascii=False)}\n"
|
||||
|
||||
# Ensure sending end marker
|
||||
final_data = {
|
||||
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
||||
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
||||
"done": True,
|
||||
}
|
||||
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
|
||||
return
|
||||
|
||||
return StreamingResponse(
|
||||
stream_generator(),
|
||||
|
@@ -125,6 +125,7 @@ async def openai_complete_if_cache(
|
||||
if hasattr(response, "__aiter__"):
|
||||
|
||||
async def inner():
|
||||
try:
|
||||
async for chunk in response:
|
||||
content = chunk.choices[0].delta.content
|
||||
if content is None:
|
||||
@@ -132,6 +133,9 @@ async def openai_complete_if_cache(
|
||||
if r"\u" in content:
|
||||
content = safe_unicode_decode(content.encode("utf-8"))
|
||||
yield content
|
||||
except Exception as e:
|
||||
logger.error(f"Error in stream response: {str(e)}")
|
||||
raise
|
||||
|
||||
return inner()
|
||||
else:
|
||||
|
Reference in New Issue
Block a user