增强聊天接口的调试和性能统计功能

- 添加原始请求日志记录
- 修改响应结构以包含性能统计
- 更新测试用例以展示性能数据
- 优化响应格式为字典结构
- 增加请求体解码功能
This commit is contained in:
yangdx
2025-01-15 21:15:12 +08:00
parent 8ef1248c76
commit af9ac188f0
2 changed files with 42 additions and 14 deletions

View File

@@ -1,4 +1,4 @@
from fastapi import FastAPI, HTTPException, File, UploadFile, Form
from fastapi import FastAPI, HTTPException, File, UploadFile, Form, Request
from pydantic import BaseModel
import logging
import argparse
@@ -673,7 +673,10 @@ def create_app(args):
return query, SearchMode.hybrid
@app.post("/api/chat")
async def chat(request: OllamaChatRequest):
async def chat(raw_request: Request, request: OllamaChatRequest):
# 打印原始请求数据
body = await raw_request.body()
logging.info(f"收到 /api/chat 原始请求: {body.decode('utf-8')}")
"""Handle chat completion requests"""
try:
# 获取所有消息内容
@@ -776,17 +779,23 @@ def create_app(args):
if not response_text:
response_text = "No response generated"
# 构造并返回响应
return OllamaChatResponse(
model=LIGHTRAG_MODEL,
created_at=LIGHTRAG_CREATED_AT,
message=OllamaMessage(
role="assistant",
content=str(response_text), # 确保转换为字符串
images=None
),
done=True
)
# 构造响应,包含性能统计信息
return {
"model": LIGHTRAG_MODEL,
"created_at": LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"content": str(response_text), # 确保转换为字符串
"images": None
},
"done": True,
"total_duration": 0, # 由于我们没有实际统计这些指标,暂时使用默认值
"load_duration": 0,
"prompt_eval_count": 0,
"prompt_eval_duration": 0,
"eval_count": 0,
"eval_duration": 0
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

View File

@@ -23,7 +23,26 @@ def test_non_stream_chat():
# 打印响应
print("\n=== 非流式调用响应 ===")
print(json.dumps(response.json(), ensure_ascii=False, indent=2))
response_json = response.json()
# 打印消息内容
print("=== 响应内容 ===")
print(json.dumps({
"model": response_json["model"],
"message": response_json["message"]
}, ensure_ascii=False, indent=2))
# 打印性能统计
print("\n=== 性能统计 ===")
stats = {
"total_duration": response_json["total_duration"],
"load_duration": response_json["load_duration"],
"prompt_eval_count": response_json["prompt_eval_count"],
"prompt_eval_duration": response_json["prompt_eval_duration"],
"eval_count": response_json["eval_count"],
"eval_duration": response_json["eval_duration"]
}
print(json.dumps(stats, ensure_ascii=False, indent=2))
def test_stream_chat():
"""测试流式调用 /api/chat 接口"""