增强聊天接口的调试和性能统计功能

- 添加原始请求日志记录
- 修改响应结构以包含性能统计
- 更新测试用例以展示性能数据
- 优化响应格式为字典结构
- 增加请求体解码功能
This commit is contained in:
yangdx
2025-01-15 21:15:12 +08:00
parent 8ef1248c76
commit af9ac188f0
2 changed files with 42 additions and 14 deletions

View File

@@ -1,4 +1,4 @@
from fastapi import FastAPI, HTTPException, File, UploadFile, Form from fastapi import FastAPI, HTTPException, File, UploadFile, Form, Request
from pydantic import BaseModel from pydantic import BaseModel
import logging import logging
import argparse import argparse
@@ -673,7 +673,10 @@ def create_app(args):
return query, SearchMode.hybrid return query, SearchMode.hybrid
@app.post("/api/chat") @app.post("/api/chat")
async def chat(request: OllamaChatRequest): async def chat(raw_request: Request, request: OllamaChatRequest):
# 打印原始请求数据
body = await raw_request.body()
logging.info(f"收到 /api/chat 原始请求: {body.decode('utf-8')}")
"""Handle chat completion requests""" """Handle chat completion requests"""
try: try:
# 获取所有消息内容 # 获取所有消息内容
@@ -776,17 +779,23 @@ def create_app(args):
if not response_text: if not response_text:
response_text = "No response generated" response_text = "No response generated"
# 构造并返回响应 # 构造响应,包含性能统计信息
return OllamaChatResponse( return {
model=LIGHTRAG_MODEL, "model": LIGHTRAG_MODEL,
created_at=LIGHTRAG_CREATED_AT, "created_at": LIGHTRAG_CREATED_AT,
message=OllamaMessage( "message": {
role="assistant", "role": "assistant",
content=str(response_text), # 确保转换为字符串 "content": str(response_text), # 确保转换为字符串
images=None "images": None
), },
done=True "done": True,
) "total_duration": 0, # 由于我们没有实际统计这些指标,暂时使用默认值
"load_duration": 0,
"prompt_eval_count": 0,
"prompt_eval_duration": 0,
"eval_count": 0,
"eval_duration": 0
}
except Exception as e: except Exception as e:
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))

View File

@@ -23,7 +23,26 @@ def test_non_stream_chat():
# 打印响应 # 打印响应
print("\n=== 非流式调用响应 ===") print("\n=== 非流式调用响应 ===")
print(json.dumps(response.json(), ensure_ascii=False, indent=2)) response_json = response.json()
# 打印消息内容
print("=== 响应内容 ===")
print(json.dumps({
"model": response_json["model"],
"message": response_json["message"]
}, ensure_ascii=False, indent=2))
# 打印性能统计
print("\n=== 性能统计 ===")
stats = {
"total_duration": response_json["total_duration"],
"load_duration": response_json["load_duration"],
"prompt_eval_count": response_json["prompt_eval_count"],
"prompt_eval_duration": response_json["prompt_eval_duration"],
"eval_count": response_json["eval_count"],
"eval_duration": response_json["eval_duration"]
}
print(json.dumps(stats, ensure_ascii=False, indent=2))
def test_stream_chat(): def test_stream_chat():
"""测试流式调用 /api/chat 接口""" """测试流式调用 /api/chat 接口"""