Translate comments to English
This commit is contained in:
@@ -541,13 +541,13 @@ def create_app(args):
|
|||||||
|
|
||||||
async def stream_generator():
|
async def stream_generator():
|
||||||
if isinstance(response, str):
|
if isinstance(response, str):
|
||||||
# 如果是字符串,一次性发送
|
# If it's a string, send it all at once
|
||||||
yield f"{json.dumps({'response': response})}\n"
|
yield f"{json.dumps({'response': response})}\n"
|
||||||
else:
|
else:
|
||||||
# 如果是异步生成器,逐块发送
|
# If it's an async generator, send chunks one by one
|
||||||
try:
|
try:
|
||||||
async for chunk in response:
|
async for chunk in response:
|
||||||
if chunk: # 只发送非空内容
|
if chunk: # Only send non-empty content
|
||||||
yield f"{json.dumps({'response': chunk})}\n"
|
yield f"{json.dumps({'response': chunk})}\n"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Streaming error: {str(e)}")
|
logging.error(f"Streaming error: {str(e)}")
|
||||||
@@ -563,7 +563,7 @@ def create_app(args):
|
|||||||
"Access-Control-Allow-Origin": "*",
|
"Access-Control-Allow-Origin": "*",
|
||||||
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
"Access-Control-Allow-Methods": "POST, OPTIONS",
|
||||||
"Access-Control-Allow-Headers": "Content-Type",
|
"Access-Control-Allow-Headers": "Content-Type",
|
||||||
"X-Accel-Buffering": "no", # 禁用 Nginx 缓冲
|
"X-Accel-Buffering": "no", # Disable Nginx buffering
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -760,7 +760,6 @@ def create_app(args):
|
|||||||
last_chunk_time = first_chunk_time
|
last_chunk_time = first_chunk_time
|
||||||
total_response = response
|
total_response = response
|
||||||
|
|
||||||
# 第一次发送查询内容
|
|
||||||
data = {
|
data = {
|
||||||
"model": LIGHTRAG_MODEL,
|
"model": LIGHTRAG_MODEL,
|
||||||
"created_at": LIGHTRAG_CREATED_AT,
|
"created_at": LIGHTRAG_CREATED_AT,
|
||||||
@@ -773,41 +772,35 @@ def create_app(args):
|
|||||||
}
|
}
|
||||||
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
||||||
|
|
||||||
# 计算各项指标
|
|
||||||
completion_tokens = estimate_tokens(total_response)
|
completion_tokens = estimate_tokens(total_response)
|
||||||
total_time = last_chunk_time - start_time # 总时间
|
total_time = last_chunk_time - start_time
|
||||||
prompt_eval_time = (
|
prompt_eval_time = (
|
||||||
first_chunk_time - start_time
|
first_chunk_time - start_time
|
||||||
) # 首个响应之前的时间
|
)
|
||||||
eval_time = (
|
eval_time = (
|
||||||
last_chunk_time - first_chunk_time
|
last_chunk_time - first_chunk_time
|
||||||
) # 生成响应的时间
|
)
|
||||||
|
|
||||||
# 第二次发送统计信息
|
|
||||||
data = {
|
data = {
|
||||||
"model": LIGHTRAG_MODEL,
|
"model": LIGHTRAG_MODEL,
|
||||||
"created_at": LIGHTRAG_CREATED_AT,
|
"created_at": LIGHTRAG_CREATED_AT,
|
||||||
"done": True,
|
"done": True,
|
||||||
"total_duration": total_time, # 总时间
|
"total_duration": total_time,
|
||||||
"load_duration": 0, # 加载时间为0
|
"load_duration": 0,
|
||||||
"prompt_eval_count": prompt_tokens, # 输入token数
|
"prompt_eval_count": prompt_tokens,
|
||||||
"prompt_eval_duration": prompt_eval_time, # 首个响应之前的时间
|
"prompt_eval_duration": prompt_eval_time,
|
||||||
"eval_count": completion_tokens, # 输出token数
|
"eval_count": completion_tokens,
|
||||||
"eval_duration": eval_time, # 生成响应的时间
|
"eval_duration": eval_time,
|
||||||
}
|
}
|
||||||
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
||||||
else:
|
else:
|
||||||
# 流式响应
|
|
||||||
async for chunk in response:
|
async for chunk in response:
|
||||||
if chunk: # 只发送非空内容
|
if chunk:
|
||||||
# 记录第一个chunk的时间
|
|
||||||
if first_chunk_time is None:
|
if first_chunk_time is None:
|
||||||
first_chunk_time = time.time_ns()
|
first_chunk_time = time.time_ns()
|
||||||
|
|
||||||
# 更新最后一个chunk的时间
|
|
||||||
last_chunk_time = time.time_ns()
|
last_chunk_time = time.time_ns()
|
||||||
|
|
||||||
# 累积响应内容
|
|
||||||
total_response += chunk
|
total_response += chunk
|
||||||
data = {
|
data = {
|
||||||
"model": LIGHTRAG_MODEL,
|
"model": LIGHTRAG_MODEL,
|
||||||
@@ -821,30 +814,28 @@ def create_app(args):
|
|||||||
}
|
}
|
||||||
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
||||||
|
|
||||||
# 计算各项指标
|
|
||||||
completion_tokens = estimate_tokens(total_response)
|
completion_tokens = estimate_tokens(total_response)
|
||||||
total_time = last_chunk_time - start_time # 总时间
|
total_time = last_chunk_time - start_time
|
||||||
prompt_eval_time = (
|
prompt_eval_time = (
|
||||||
first_chunk_time - start_time
|
first_chunk_time - start_time
|
||||||
) # 首个响应之前的时间
|
)
|
||||||
eval_time = (
|
eval_time = (
|
||||||
last_chunk_time - first_chunk_time
|
last_chunk_time - first_chunk_time
|
||||||
) # 生成响应的时间
|
)
|
||||||
|
|
||||||
# 发送完成标记,包含性能统计信息
|
|
||||||
data = {
|
data = {
|
||||||
"model": LIGHTRAG_MODEL,
|
"model": LIGHTRAG_MODEL,
|
||||||
"created_at": LIGHTRAG_CREATED_AT,
|
"created_at": LIGHTRAG_CREATED_AT,
|
||||||
"done": True,
|
"done": True,
|
||||||
"total_duration": total_time, # 总时间
|
"total_duration": total_time,
|
||||||
"load_duration": 0, # 加载时间为0
|
"load_duration": 0,
|
||||||
"prompt_eval_count": prompt_tokens, # 输入token数
|
"prompt_eval_count": prompt_tokens,
|
||||||
"prompt_eval_duration": prompt_eval_time, # 首个响应之前的时间
|
"prompt_eval_duration": prompt_eval_time,
|
||||||
"eval_count": completion_tokens, # 输出token数
|
"eval_count": completion_tokens,
|
||||||
"eval_duration": eval_time, # 生成响应的时间
|
"eval_duration": eval_time,
|
||||||
}
|
}
|
||||||
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
yield f"{json.dumps(data, ensure_ascii=False)}\n"
|
||||||
return # 确保生成器在发送完成标记后立即结束
|
return # Ensure the generator ends immediately after sending the completion marker
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Error in stream_generator: {str(e)}")
|
logging.error(f"Error in stream_generator: {str(e)}")
|
||||||
raise
|
raise
|
||||||
@@ -862,37 +853,33 @@ def create_app(args):
|
|||||||
},
|
},
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# 非流式响应
|
|
||||||
first_chunk_time = time.time_ns()
|
first_chunk_time = time.time_ns()
|
||||||
response_text = await rag.aquery(cleaned_query, param=query_param)
|
response_text = await rag.aquery(cleaned_query, param=query_param)
|
||||||
last_chunk_time = time.time_ns()
|
last_chunk_time = time.time_ns()
|
||||||
|
|
||||||
# 确保响应不为空
|
|
||||||
if not response_text:
|
if not response_text:
|
||||||
response_text = "No response generated"
|
response_text = "No response generated"
|
||||||
|
|
||||||
# 计算各项指标
|
|
||||||
completion_tokens = estimate_tokens(str(response_text))
|
completion_tokens = estimate_tokens(str(response_text))
|
||||||
total_time = last_chunk_time - start_time # 总时间
|
total_time = last_chunk_time - start_time
|
||||||
prompt_eval_time = first_chunk_time - start_time # 首个响应之前的时间
|
prompt_eval_time = first_chunk_time - start_time
|
||||||
eval_time = last_chunk_time - first_chunk_time # 生成响应的时间
|
eval_time = last_chunk_time - first_chunk_time
|
||||||
|
|
||||||
# 构造响应,包含性能统计信息
|
|
||||||
return {
|
return {
|
||||||
"model": LIGHTRAG_MODEL,
|
"model": LIGHTRAG_MODEL,
|
||||||
"created_at": LIGHTRAG_CREATED_AT,
|
"created_at": LIGHTRAG_CREATED_AT,
|
||||||
"message": {
|
"message": {
|
||||||
"role": "assistant",
|
"role": "assistant",
|
||||||
"content": str(response_text), # 确保转换为字符串
|
"content": str(response_text),
|
||||||
"images": None,
|
"images": None,
|
||||||
},
|
},
|
||||||
"done": True,
|
"done": True,
|
||||||
"total_duration": total_time, # 总时间
|
"total_duration": total_time,
|
||||||
"load_duration": 0, # 加载时间为0
|
"load_duration": 0,
|
||||||
"prompt_eval_count": prompt_tokens, # 输入token数
|
"prompt_eval_count": prompt_tokens,
|
||||||
"prompt_eval_duration": prompt_eval_time, # 首个响应之前的时间
|
"prompt_eval_duration": prompt_eval_time,
|
||||||
"eval_count": completion_tokens, # 输出token数
|
"eval_count": completion_tokens,
|
||||||
"eval_duration": eval_time, # 生成响应的时间
|
"eval_duration": eval_time,
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
raise HTTPException(status_code=500, detail=str(e))
|
||||||
|
Reference in New Issue
Block a user