From e124ad7f9cfd1c366b4b2c8fb1aaab2ffee1703e Mon Sep 17 00:00:00 2001 From: yangdx Date: Thu, 6 Feb 2025 04:53:05 +0800 Subject: [PATCH] Fix timing calculation logic in OllamaAPI stream generators MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit • Initialize first_chunk_time as None • Set timing only when first chunk arrives --- lightrag/api/ollama_api.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/lightrag/api/ollama_api.py b/lightrag/api/ollama_api.py index c6f40879..132601c3 100644 --- a/lightrag/api/ollama_api.py +++ b/lightrag/api/ollama_api.py @@ -203,14 +203,15 @@ class OllamaAPI: ) async def stream_generator(): - first_chunk_time = time.time_ns() - last_chunk_time = first_chunk_time + first_chunk_time = None + last_chunk_time = time.time_ns() total_response = "" try: # Ensure response is an async generator if isinstance(response, str): # If it's a string, send in two parts + first_chunk_time = last_chunk_time last_chunk_time = time.time_ns() total_response = response @@ -282,7 +283,8 @@ class OllamaAPI: } yield f"{json.dumps(final_data, ensure_ascii=False)}\n" return - + if first_chunk_time is None: + first_chunk_time = last_chunk_time completion_tokens = estimate_tokens(total_response) total_time = last_chunk_time - start_time prompt_eval_time = first_chunk_time - start_time @@ -407,14 +409,15 @@ class OllamaAPI: ) async def stream_generator(): - first_chunk_time = time.time_ns() - last_chunk_time = first_chunk_time + first_chunk_time = None + last_chunk_time = time.time_ns() total_response = "" try: # Ensure response is an async generator if isinstance(response, str): # If it's a string, send in two parts + first_chunk_time = last_chunk_time last_chunk_time = time.time_ns() total_response = response @@ -499,6 +502,8 @@ class OllamaAPI: yield f"{json.dumps(final_data, ensure_ascii=False)}\n" return + if first_chunk_time is None: + first_chunk_time = last_chunk_time completion_tokens = estimate_tokens(total_response) total_time = last_chunk_time - start_time prompt_eval_time = first_chunk_time - start_time