feat: improve error handling for streaming responses
• Add CancelledError handling for streams • Send error details to client in JSON • Add error status codes and messages • Always send final completion marker • Refactor stream generator error handling
This commit is contained in:
@@ -12,7 +12,7 @@ from fastapi import (
|
|||||||
# Add this to store progress globally
|
# Add this to store progress globally
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
import threading
|
import threading
|
||||||
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
@@ -1718,11 +1718,11 @@ def create_app(args):
|
|||||||
)
|
)
|
||||||
|
|
||||||
async def stream_generator():
|
async def stream_generator():
|
||||||
try:
|
|
||||||
first_chunk_time = None
|
first_chunk_time = None
|
||||||
last_chunk_time = None
|
last_chunk_time = None
|
||||||
total_response = ""
|
total_response = ""
|
||||||
|
|
||||||
|
try:
|
||||||
# Ensure response is an async generator
|
# Ensure response is an async generator
|
||||||
if isinstance(response, str):
|
if isinstance(response, str):
|
||||||
# If it's a string, send in two parts
|
# If it's a string, send in two parts
|
||||||
@@ -1760,6 +1760,7 @@ def create_app(args):
|
|||||||
}
|
}
|
||||||
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
||||||
else:
|
else:
|
||||||
|
try:
|
||||||
async for chunk in response:
|
async for chunk in response:
|
||||||
if chunk:
|
if chunk:
|
||||||
if first_chunk_time is None:
|
if first_chunk_time is None:
|
||||||
@@ -1779,7 +1780,20 @@ def create_app(args):
|
|||||||
"done": False,
|
"done": False,
|
||||||
}
|
}
|
||||||
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
error_data = {
|
||||||
|
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
||||||
|
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
||||||
|
"error": {
|
||||||
|
"code": "STREAM_CANCELLED",
|
||||||
|
"message": "Stream was cancelled by server"
|
||||||
|
},
|
||||||
|
"done": False
|
||||||
|
}
|
||||||
|
yield f"{json.dumps(error_data, ensure_ascii=False)}\n"
|
||||||
|
raise
|
||||||
|
|
||||||
|
if last_chunk_time is not None:
|
||||||
completion_tokens = estimate_tokens(total_response)
|
completion_tokens = estimate_tokens(total_response)
|
||||||
total_time = last_chunk_time - start_time
|
total_time = last_chunk_time - start_time
|
||||||
prompt_eval_time = first_chunk_time - start_time
|
prompt_eval_time = first_chunk_time - start_time
|
||||||
@@ -1797,9 +1811,30 @@ def create_app(args):
|
|||||||
"eval_duration": eval_time,
|
"eval_duration": eval_time,
|
||||||
}
|
}
|
||||||
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
||||||
return # Ensure the generator ends immediately after sending the completion marker
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Error in stream_generator: {str(e)}")
|
error_msg = f"Error in stream_generator: {str(e)}"
|
||||||
|
logging.error(error_msg)
|
||||||
|
|
||||||
|
# 发送错误消息给客户端
|
||||||
|
error_data = {
|
||||||
|
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
||||||
|
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
||||||
|
"error": {
|
||||||
|
"code": "STREAM_ERROR",
|
||||||
|
"message": error_msg
|
||||||
|
},
|
||||||
|
"done": False
|
||||||
|
}
|
||||||
|
yield f"{json.dumps(error_data, ensure_ascii=False)}\n"
|
||||||
|
|
||||||
|
# 确保发送结束标记
|
||||||
|
final_data = {
|
||||||
|
"model": ollama_server_infos.LIGHTRAG_MODEL,
|
||||||
|
"created_at": ollama_server_infos.LIGHTRAG_CREATED_AT,
|
||||||
|
"done": True
|
||||||
|
}
|
||||||
|
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
|
||||||
raise
|
raise
|
||||||
|
|
||||||
return StreamingResponse(
|
return StreamingResponse(
|
||||||
|
Reference in New Issue
Block a user