fix: improve timing accuracy and variable scoping in OllamaAPI

This commit is contained in:
yangdx
2025-02-06 10:42:49 +08:00
parent e124ad7f9c
commit 9103e7f463

View File

@@ -203,15 +203,15 @@ class OllamaAPI:
)
async def stream_generator():
try:
first_chunk_time = None
last_chunk_time = time.time_ns()
total_response = ""
try:
# Ensure response is an async generator
if isinstance(response, str):
# If it's a string, send in two parts
first_chunk_time = last_chunk_time
first_chunk_time = start_time
last_chunk_time = time.time_ns()
total_response = response
@@ -284,7 +284,7 @@ class OllamaAPI:
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
return
if first_chunk_time is None:
first_chunk_time = last_chunk_time
first_chunk_time = start_time
completion_tokens = estimate_tokens(total_response)
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time
@@ -409,15 +409,15 @@ class OllamaAPI:
)
async def stream_generator():
try:
first_chunk_time = None
last_chunk_time = time.time_ns()
total_response = ""
try:
# Ensure response is an async generator
if isinstance(response, str):
# If it's a string, send in two parts
first_chunk_time = last_chunk_time
first_chunk_time = start_time
last_chunk_time = time.time_ns()
total_response = response
@@ -503,7 +503,7 @@ class OllamaAPI:
return
if first_chunk_time is None:
first_chunk_time = last_chunk_time
first_chunk_time = start_time
completion_tokens = estimate_tokens(total_response)
total_time = last_chunk_time - start_time
prompt_eval_time = first_chunk_time - start_time