From 828af49d6bacf5d30d7978e07a952244060f9adf Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Wed, 15 Jan 2025 18:47:01 +0800
Subject: [PATCH] =?UTF-8?q?=E8=83=BD=E5=A4=9F=E6=AD=A3=E7=A1=AE=E8=B0=83?=
 =?UTF-8?q?=E7=94=A8rag=EF=BC=8Crag=E6=89=A7=E8=A1=8C=E5=AE=8C=E6=88=90?=
 =?UTF-8?q?=E5=90=8E=EF=BC=8C=E6=97=A0=E6=B3=95=E8=BF=94=E5=9B=9E=E5=86=85?=
 =?UTF-8?q?=E5=AE=B9?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 lightrag/api/lightrag_ollama.py | 75 ++++++++++++++++++++-------------
 1 file changed, 46 insertions(+), 29 deletions(-)

diff --git a/lightrag/api/lightrag_ollama.py b/lightrag/api/lightrag_ollama.py
index 39c8256a..4e83acb0 100644
--- a/lightrag/api/lightrag_ollama.py
+++ b/lightrag/api/lightrag_ollama.py
@@ -659,38 +659,55 @@ def create_app(args):
             cleaned_query, mode = parse_query_mode(query)
             
             # 调用RAG进行查询
-            response = await rag.aquery(
-                cleaned_query,
-                param=QueryParam(
-                    mode=mode,
-                    stream=request.stream,
-                )
-            )
-
             if request.stream:
+                response = await rag.aquery(
+                    cleaned_query,
+                    param=QueryParam(
+                        mode=mode,
+                        stream=True,
+                        only_need_context=False
+                    ),
+                )
+
                 async def stream_generator():
-                    async for chunk in response:
-                        yield OllamaChatResponse(
-                            model=LIGHTRAG_MODEL,
-                            created_at=LIGHTRAG_CREATED_AT,
-                            message=OllamaMessage(
-                                role="assistant",
-                                content=chunk
-                            ),
-                            done=False
-                        )
-                    # 发送一个空的完成消息
-                    yield OllamaChatResponse(
-                        model=LIGHTRAG_MODEL,
-                        created_at=LIGHTRAG_CREATED_AT,
-                        message=OllamaMessage(
-                            role="assistant",
-                            content=""
-                        ),
-                        done=True
-                    )
-                return stream_generator()
+                    try:
+                        async for chunk in response:
+                            yield {
+                                "model": LIGHTRAG_MODEL,
+                                "created_at": LIGHTRAG_CREATED_AT,
+                                "message": {
+                                    "role": "assistant",
+                                    "content": chunk
+                                },
+                                "done": False
+                            }
+                        yield {
+                            "model": LIGHTRAG_MODEL,
+                            "created_at": LIGHTRAG_CREATED_AT,
+                            "message": {
+                                "role": "assistant",
+                                "content": ""
+                            },
+                            "done": True
+                        }
+                    except Exception as e:
+                        logging.error(f"Error in stream_generator: {str(e)}")
+                        raise
+                from fastapi.responses import StreamingResponse
+                import json
+                return StreamingResponse(
+                    (f"data: {json.dumps(chunk)}\n\n" async for chunk in stream_generator()),
+                    media_type="text/event-stream"
+                )
             else:
+                response = await rag.aquery(
+                    cleaned_query,
+                    param=QueryParam(
+                        mode=mode,
+                        stream=False,
+                        only_need_context=False
+                    ),
+                )
                 return OllamaChatResponse(
                     model=LIGHTRAG_MODEL,
                     created_at=LIGHTRAG_CREATED_AT,