From e124ad7f9cfd1c366b4b2c8fb1aaab2ffee1703e Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Thu, 6 Feb 2025 04:53:05 +0800
Subject: [PATCH] Fix timing calculation logic in OllamaAPI stream generators
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

• Initialize first_chunk_time as None
• Set timing only when first chunk arrives
---
 lightrag/api/ollama_api.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/lightrag/api/ollama_api.py b/lightrag/api/ollama_api.py
index c6f40879..132601c3 100644
--- a/lightrag/api/ollama_api.py
+++ b/lightrag/api/ollama_api.py
@@ -203,14 +203,15 @@ class OllamaAPI:
                     )
 
                     async def stream_generator():
-                        first_chunk_time = time.time_ns()
-                        last_chunk_time = first_chunk_time
+                        first_chunk_time = None
+                        last_chunk_time = time.time_ns()
                         total_response = ""
 
                         try:
                             # Ensure response is an async generator
                             if isinstance(response, str):
                                 # If it's a string, send in two parts
+                                first_chunk_time = last_chunk_time
                                 last_chunk_time = time.time_ns()
                                 total_response = response
 
@@ -282,7 +283,8 @@ class OllamaAPI:
                                     }
                                     yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
                                     return
-
+                                if first_chunk_time is None:
+                                    first_chunk_time = last_chunk_time
                                 completion_tokens = estimate_tokens(total_response)
                                 total_time = last_chunk_time - start_time
                                 prompt_eval_time = first_chunk_time - start_time
@@ -407,14 +409,15 @@ class OllamaAPI:
                         )
 
                     async def stream_generator():
-                        first_chunk_time = time.time_ns()
-                        last_chunk_time = first_chunk_time
+                        first_chunk_time = None
+                        last_chunk_time = time.time_ns()
                         total_response = ""
 
                         try:
                             # Ensure response is an async generator
                             if isinstance(response, str):
                                 # If it's a string, send in two parts
+                                first_chunk_time = last_chunk_time
                                 last_chunk_time = time.time_ns()
                                 total_response = response
 
@@ -499,6 +502,8 @@ class OllamaAPI:
                                     yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
                                     return
 
+                                if first_chunk_time is None:
+                                    first_chunk_time = last_chunk_time
                                 completion_tokens = estimate_tokens(total_response)
                                 total_time = last_chunk_time - start_time
                                 prompt_eval_time = first_chunk_time - start_time