增强聊天接口的调试和性能统计功能
- 添加原始请求日志记录 - 修改响应结构以包含性能统计 - 更新测试用例以展示性能数据 - 优化响应格式为字典结构 - 增加请求体解码功能
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
from fastapi import FastAPI, HTTPException, File, UploadFile, Form
|
from fastapi import FastAPI, HTTPException, File, UploadFile, Form, Request
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
import logging
|
import logging
|
||||||
import argparse
|
import argparse
|
||||||
@@ -673,7 +673,10 @@ def create_app(args):
|
|||||||
return query, SearchMode.hybrid
|
return query, SearchMode.hybrid
|
||||||
|
|
||||||
@app.post("/api/chat")
|
@app.post("/api/chat")
|
||||||
async def chat(request: OllamaChatRequest):
|
async def chat(raw_request: Request, request: OllamaChatRequest):
|
||||||
|
# 打印原始请求数据
|
||||||
|
body = await raw_request.body()
|
||||||
|
logging.info(f"收到 /api/chat 原始请求: {body.decode('utf-8')}")
|
||||||
"""Handle chat completion requests"""
|
"""Handle chat completion requests"""
|
||||||
try:
|
try:
|
||||||
# 获取所有消息内容
|
# 获取所有消息内容
|
||||||
@@ -776,17 +779,23 @@ def create_app(args):
|
|||||||
if not response_text:
|
if not response_text:
|
||||||
response_text = "No response generated"
|
response_text = "No response generated"
|
||||||
|
|
||||||
# 构造并返回响应
|
# 构造响应,包含性能统计信息
|
||||||
return OllamaChatResponse(
|
return {
|
||||||
model=LIGHTRAG_MODEL,
|
"model": LIGHTRAG_MODEL,
|
||||||
created_at=LIGHTRAG_CREATED_AT,
|
"created_at": LIGHTRAG_CREATED_AT,
|
||||||
message=OllamaMessage(
|
"message": {
|
||||||
role="assistant",
|
"role": "assistant",
|
||||||
content=str(response_text), # 确保转换为字符串
|
"content": str(response_text), # 确保转换为字符串
|
||||||
images=None
|
"images": None
|
||||||
),
|
},
|
||||||
done=True
|
"done": True,
|
||||||
)
|
"total_duration": 0, # 由于我们没有实际统计这些指标,暂时使用默认值
|
||||||
|
"load_duration": 0,
|
||||||
|
"prompt_eval_count": 0,
|
||||||
|
"prompt_eval_duration": 0,
|
||||||
|
"eval_count": 0,
|
||||||
|
"eval_duration": 0
|
||||||
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
|
||||||
|
@@ -23,7 +23,26 @@ def test_non_stream_chat():
|
|||||||
|
|
||||||
# 打印响应
|
# 打印响应
|
||||||
print("\n=== 非流式调用响应 ===")
|
print("\n=== 非流式调用响应 ===")
|
||||||
print(json.dumps(response.json(), ensure_ascii=False, indent=2))
|
response_json = response.json()
|
||||||
|
|
||||||
|
# 打印消息内容
|
||||||
|
print("=== 响应内容 ===")
|
||||||
|
print(json.dumps({
|
||||||
|
"model": response_json["model"],
|
||||||
|
"message": response_json["message"]
|
||||||
|
}, ensure_ascii=False, indent=2))
|
||||||
|
|
||||||
|
# 打印性能统计
|
||||||
|
print("\n=== 性能统计 ===")
|
||||||
|
stats = {
|
||||||
|
"total_duration": response_json["total_duration"],
|
||||||
|
"load_duration": response_json["load_duration"],
|
||||||
|
"prompt_eval_count": response_json["prompt_eval_count"],
|
||||||
|
"prompt_eval_duration": response_json["prompt_eval_duration"],
|
||||||
|
"eval_count": response_json["eval_count"],
|
||||||
|
"eval_duration": response_json["eval_duration"]
|
||||||
|
}
|
||||||
|
print(json.dumps(stats, ensure_ascii=False, indent=2))
|
||||||
|
|
||||||
def test_stream_chat():
|
def test_stream_chat():
|
||||||
"""测试流式调用 /api/chat 接口"""
|
"""测试流式调用 /api/chat 接口"""
|
||||||
|
Reference in New Issue
Block a user