Improve error handling and response consistency in streaming endpoints
• Add error message forwarding to client • Handle stream cancellations gracefully • Add logging for stream errors • Ensure clean stream termination • Add try-catch in OpenAI streaming
This commit is contained in:
@@ -1780,18 +1780,34 @@ def create_app(args):
|
|||||||
"done": False,
|
"done": False,
|
||||||
}
|
}
|
||||||
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
||||||
except asyncio.CancelledError:
|
except (asyncio.CancelledError, Exception) as e:
|
||||||
|
error_msg = str(e)
|
||||||
|
if isinstance(e, asyncio.CancelledError):
|
||||||
|
error_msg = "Stream was cancelled by server"
|
||||||
|
else:
|
||||||
|
error_msg = f"Provider error: {error_msg}"
|
||||||
|
|
||||||
|
logging.error(f"Stream error: {error_msg}")
|
||||||
|
|
||||||
|
# Send error message to client
|
||||||
error_data = {
|
error_data = {
|
||||||
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
||||||
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
||||||
"error": {
|
"error": {
|
||||||
"code": "STREAM_CANCELLED",
|
"code": "STREAM_ERROR",
|
||||||
"message": "Stream was cancelled by server",
|
"message": error_msg
|
||||||
},
|
},
|
||||||
"done": False,
|
|
||||||
}
|
}
|
||||||
yield f"{json.dumps(error_data, ensure_ascii=False)}\n"
|
yield f"{json.dumps(error_data, ensure_ascii=False)}\n"
|
||||||
raise
|
|
||||||
|
# Send final message to close the stream
|
||||||
|
final_data = {
|
||||||
|
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
||||||
|
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
||||||
|
"done": True,
|
||||||
|
}
|
||||||
|
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
|
||||||
|
return
|
||||||
|
|
||||||
if last_chunk_time is not None:
|
if last_chunk_time is not None:
|
||||||
completion_tokens = estimate_tokens(total_response)
|
completion_tokens = estimate_tokens(total_response)
|
||||||
@@ -1816,23 +1832,25 @@ def create_app(args):
|
|||||||
error_msg = f"Error in stream_generator: {str(e)}"
|
error_msg = f"Error in stream_generator: {str(e)}"
|
||||||
logging.error(error_msg)
|
logging.error(error_msg)
|
||||||
|
|
||||||
# 发送错误消息给客户端
|
# Send error message to client
|
||||||
error_data = {
|
error_data = {
|
||||||
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
||||||
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
||||||
"error": {"code": "STREAM_ERROR", "message": error_msg},
|
"error": {
|
||||||
"done": False,
|
"code": "STREAM_ERROR",
|
||||||
|
"message": error_msg
|
||||||
|
},
|
||||||
}
|
}
|
||||||
yield f"{json.dumps(error_data, ensure_ascii=False)}\n"
|
yield f"{json.dumps(error_data, ensure_ascii=False)}\n"
|
||||||
|
|
||||||
# 确保发送结束标记
|
# Ensure sending end marker
|
||||||
final_data = {
|
final_data = {
|
||||||
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
||||||
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
||||||
"done": True,
|
"done": True,
|
||||||
}
|
}
|
||||||
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
|
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
|
||||||
raise
|
return
|
||||||
|
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
stream_generator(),
|
stream_generator(),
|
||||||
|
@@ -125,6 +125,7 @@ async def openai_complete_if_cache(
|
|||||||
if hasattr(response, "__aiter__"):
|
if hasattr(response, "__aiter__"):
|
||||||
|
|
||||||
async def inner():
|
async def inner():
|
||||||
|
try:
|
||||||
async for chunk in response:
|
async for chunk in response:
|
||||||
content = chunk.choices[0].delta.content
|
content = chunk.choices[0].delta.content
|
||||||
if content is None:
|
if content is None:
|
||||||
@@ -132,6 +133,9 @@ async def openai_complete_if_cache(
|
|||||||
if r"\u" in content:
|
if r"\u" in content:
|
||||||
content = safe_unicode_decode(content.encode("utf-8"))
|
content = safe_unicode_decode(content.encode("utf-8"))
|
||||||
yield content
|
yield content
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error in stream response: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
return inner()
|
return inner()
|
||||||
else:
|
else:
|
||||||
|
Reference in New Issue
Block a user