解决查询命中缓存时流式响应未遵循Ollma规范的问题

- rag返回结果未字符串时,响应分两次发送
- 第一次发送查询内容
- 第二次发送统计信息
This commit is contained in:
yangdx
2025-01-15 23:09:50 +08:00
parent e978a15593
commit 9632a8f0dc
2 changed files with 19 additions and 4 deletions

View File

@@ -709,16 +709,31 @@ def create_app(args):
try:
# 确保 response 是异步生成器
if isinstance(response, str):
# 如果是字符串,作为单个完整响应发送
# 如果是字符串,分两次发送
# 第一次发送查询内容
data = {
"model": LIGHTRAG_MODEL,
"created_at": LIGHTRAG_CREATED_AT,
"message": {
"role": "assistant",
"role": "assistant",
"content": response,
"images": None
},
"done": True
"done": False
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
# 第二次发送统计信息
data = {
"model": LIGHTRAG_MODEL,
"created_at": LIGHTRAG_CREATED_AT,
"done": True,
"total_duration": 1,
"load_duration": 1,
"prompt_eval_count": 999,
"prompt_eval_duration": 1,
"eval_count": 999,
"eval_duration": 1
}
yield f"{json.dumps(data, ensure_ascii=False)}\n"
else: