增强聊天接口的调试和性能统计功能

- 添加原始请求日志记录 - 修改响应结构以包含性能统计 - 更新测试用例以展示性能数据 - 优化响应格式为字典结构 - 增加请求体解码功能
2025-01-15 21:15:12 +08:00
parent 8ef1248c76
commit af9ac188f0
2 changed files with 42 additions and 14 deletions
--- a/lightrag/api/lightrag_ollama.py
+++ b/lightrag/api/lightrag_ollama.py
@@ -1,4 +1,4 @@
-from fastapi import FastAPI, HTTPException, File, UploadFile, Form
+from fastapi import FastAPI, HTTPException, File, UploadFile, Form, Request
 from pydantic import BaseModel
 import logging
 import argparse
@@ -673,7 +673,10 @@ def create_app(args):
        return query, SearchMode.hybrid
    @app.post("/api/chat")
-    async def chat(request: OllamaChatRequest):
+    async def chat(raw_request: Request, request: OllamaChatRequest):
        # 打印原始请求数据
        body = await raw_request.body()
        logging.info(f"收到 /api/chat 原始请求: {body.decode('utf-8')}")
        """Handle chat completion requests"""
        try:
            # 获取所有消息内容
@@ -776,17 +779,23 @@ def create_app(args):
                if not response_text:
                    response_text = "No response generated"
-                # 构造并返回响应
+                # 构造响应，包含性能统计信息
-                return OllamaChatResponse(
+                return {
-                    model=LIGHTRAG_MODEL,
+                    "model": LIGHTRAG_MODEL,
-                    created_at=LIGHTRAG_CREATED_AT,
+                    "created_at": LIGHTRAG_CREATED_AT,
-                    message=OllamaMessage(
+                    "message": {
-                        role="assistant",
+                        "role": "assistant",
-                        content=str(response_text),  # 确保转换为字符串
+                        "content": str(response_text),  # 确保转换为字符串
-                        images=None
+                        "images": None
-                    ),
+                    },
-                    done=True
+                    "done": True,
-                )
+                    "total_duration": 0,  # 由于我们没有实际统计这些指标，暂时使用默认值
                    "load_duration": 0,
                    "prompt_eval_count": 0,
                    "prompt_eval_duration": 0,
                    "eval_count": 0,
                    "eval_duration": 0
                }
        except Exception as e:
            raise HTTPException(status_code=500, detail=str(e))
--- a/test_lightrag_ollama_chat.py
+++ b/test_lightrag_ollama_chat.py
@@ -23,7 +23,26 @@ def test_non_stream_chat():
    # 打印响应
    print("\n=== 非流式调用响应 ===")
-    print(json.dumps(response.json(), ensure_ascii=False, indent=2))
+    response_json = response.json()
    # 打印消息内容
    print("=== 响应内容 ===")
    print(json.dumps({
        "model": response_json["model"],
        "message": response_json["message"]
    }, ensure_ascii=False, indent=2))
    # 打印性能统计
    print("\n=== 性能统计 ===")
    stats = {
        "total_duration": response_json["total_duration"],
        "load_duration": response_json["load_duration"],
        "prompt_eval_count": response_json["prompt_eval_count"],
        "prompt_eval_duration": response_json["prompt_eval_duration"],
        "eval_count": response_json["eval_count"],
        "eval_duration": response_json["eval_duration"]
    }
    print(json.dumps(stats, ensure_ascii=False, indent=2))
 def test_stream_chat():
    """测试流式调用 /api/chat 接口"""