From fb9df50adad499d9259eecd16faad0e6e0677b98 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Sat, 25 Jan 2025 22:14:40 +0800
Subject: [PATCH 01/13] Add conversation history support to chat API

- Added HISTORY_TURNS env variable
- Updated chat request data creation
- Modified server to handle history
- Added history to test cases
---
 .env.example                    |  1 +
 lightrag/api/lightrag_server.py | 26 ++++++++++++---
 test_lightrag_ollama_chat.py    | 59 ++++++++++++++++++++++++++++-----
 3 files changed, 74 insertions(+), 12 deletions(-)

diff --git a/.env.example b/.env.example
index 944e7851..b652847b 100644
--- a/.env.example
+++ b/.env.example
@@ -43,6 +43,7 @@ MAX_ASYNC=4
 MAX_TOKENS=32768
 EMBEDDING_DIM=1024
 MAX_EMBED_TOKENS=8192
+#HISTORY_TURNS=3
 
 # Security (empty for no key)
 LIGHTRAG_API_KEY=your-secure-api-key-here
diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index 533be818..b94e0d17 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -470,6 +470,13 @@ def parse_args() -> argparse.Namespace:
         help="Enable automatic scanning when the program starts",
     )
 
+    parser.add_argument(
+        "--history-turns",
+        type=int,
+        default=get_env_value("HISTORY_TURNS", None, int),
+        help="Number of conversation history turns to include (default: from env or None)",
+    )
+
     args = parser.parse_args()
 
     return args
@@ -1576,8 +1583,9 @@ def create_app(args):
             if not messages:
                 raise HTTPException(status_code=400, detail="No messages provided")
 
-            # Get the last message as query
+            # Get the last message as query and previous messages as history
             query = messages[-1].content
+            conversation_history = messages[:-1]  # 所有之前的消息作为历史记录
 
             # Check for query prefix
             cleaned_query, mode = parse_query_mode(query)
@@ -1585,9 +1593,19 @@ def create_app(args):
             start_time = time.time_ns()
             prompt_tokens = estimate_tokens(cleaned_query)
 
-            query_param = QueryParam(
-                mode=mode, stream=request.stream, only_need_context=False
-            )
+            # 构建 query_param
+            param_dict = {
+                "mode": mode,
+                "stream": request.stream,
+                "only_need_context": False,
+                "conversation_history": conversation_history,
+            }
+
+            # 如果设置了 history_turns，添加到参数中
+            if args.history_turns is not None:
+                param_dict["history_turns"] = args.history_turns
+
+            query_param = QueryParam(**param_dict)
 
             if request.stream:
                 from fastapi.responses import StreamingResponse
diff --git a/test_lightrag_ollama_chat.py b/test_lightrag_ollama_chat.py
index d1e61d39..faf9485d 100644
--- a/test_lightrag_ollama_chat.py
+++ b/test_lightrag_ollama_chat.py
@@ -189,19 +189,32 @@ def get_base_url(endpoint: str = "chat") -> str:
 
 
 def create_chat_request_data(
-    content: str, stream: bool = False, model: str = None
+    content: str,
+    stream: bool = False,
+    model: str = None,
+    conversation_history: List[Dict[str, str]] = None,
+    history_turns: int = None,
 ) -> Dict[str, Any]:
     """Create chat request data
     Args:
         content: User message content
         stream: Whether to use streaming response
         model: Model name
+        conversation_history: List of previous conversation messages
+        history_turns: Number of history turns to include
     Returns:
         Dictionary containing complete chat request data
     """
+    messages = conversation_history or []
+    if history_turns is not None and conversation_history:
+        messages = messages[
+            -2 * history_turns :
+        ]  # Each turn has 2 messages (user + assistant)
+    messages.append({"role": "user", "content": content})
+
     return {
         "model": model or CONFIG["server"]["model"],
-        "messages": [{"role": "user", "content": content}],
+        "messages": messages,
         "stream": stream,
     }
 
@@ -259,11 +272,25 @@ def run_test(func: Callable, name: str) -> None:
 def test_non_stream_chat() -> None:
     """Test non-streaming call to /api/chat endpoint"""
     url = get_base_url()
-    data = create_chat_request_data(
-        CONFIG["test_cases"]["basic"]["query"], stream=False
-    )
 
-    # Send request
+    # Example conversation history
+    conversation_history = [
+        {"role": "user", "content": "你好"},
+        {"role": "assistant", "content": "你好!我是一个AI助手,很高兴为你服务。"},
+        {"role": "user", "content": "西游记里有几个主要人物?"},
+        {
+            "role": "assistant",
+            "content": "西游记的主要人物有唐僧、孙悟空、猪八戒、沙和尚这四位主角。",
+        },
+    ]
+
+    # Send request with conversation history and history turns
+    data = create_chat_request_data(
+        CONFIG["test_cases"]["basic"]["query"],
+        stream=False,
+        conversation_history=conversation_history,
+        history_turns=2,  # Only include last 2 turns
+    )
     response = make_request(url, data)
 
     # Print response
@@ -297,9 +324,25 @@ def test_stream_chat() -> None:
     The last message will contain performance statistics, with done set to true.
     """
     url = get_base_url()
-    data = create_chat_request_data(CONFIG["test_cases"]["basic"]["query"], stream=True)
 
-    # Send request and get streaming response
+    # Example conversation history
+    conversation_history = [
+        {"role": "user", "content": "你好"},
+        {"role": "assistant", "content": "你好!我是一个AI助手,很高兴为你服务。"},
+        {"role": "user", "content": "西游记里有几个主要人物?"},
+        {
+            "role": "assistant",
+            "content": "西游记的主要人物有唐僧、孙悟空、猪八戒、沙和尚这四位主角。",
+        },
+    ]
+
+    # Send request with conversation history and history turns
+    data = create_chat_request_data(
+        CONFIG["test_cases"]["basic"]["query"],
+        stream=True,
+        conversation_history=conversation_history,
+        history_turns=2,  # Only include last 2 turns
+    )
     response = make_request(url, data, stream=True)
 
     if OutputControl.is_verbose():

From 2719e07107db9f3f30b96d0776cdc8c828d78ece Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Sat, 25 Jan 2025 22:33:09 +0800
Subject: [PATCH 02/13] Convert OllamaMessage to dict in conversation history

---
 lightrag/api/lightrag_server.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index b94e0d17..e000dc0e 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -1585,7 +1585,10 @@ def create_app(args):
 
             # Get the last message as query and previous messages as history
             query = messages[-1].content
-            conversation_history = messages[:-1]  # 所有之前的消息作为历史记录
+            # Convert OllamaMessage objects to dictionaries
+            conversation_history = [
+                {"role": msg.role, "content": msg.content} for msg in messages[:-1]
+            ]
 
             # Check for query prefix
             cleaned_query, mode = parse_query_mode(query)

From 86282ba4347cc8a0a698cd27958a71a8923f92a9 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Sat, 25 Jan 2025 22:54:12 +0800
Subject: [PATCH 03/13] Fix history_context handling error in kg_query

---
 lightrag/operate.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index af66eee6..0469fb7e 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -633,11 +633,8 @@ async def kg_query(
     # Process conversation history
     history_context = ""
     if query_param.conversation_history:
-        recent_history = query_param.conversation_history[
-            -query_param.history_window_size :
-        ]
-        history_context = "\n".join(
-            [f"{turn['role']}: {turn['content']}" for turn in recent_history]
+        history_context = get_conversation_turns(
+            query_param.conversation_history, query_param.history_turns
         )
 
     sys_prompt_temp = PROMPTS["rag_response"]

From aa4bccb1bba947e892d018a716e6f8993f295a9f Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Sat, 25 Jan 2025 23:38:45 +0800
Subject: [PATCH 04/13] Remove lightrag_api_open_webui_demo.py file

---
 examples/lightrag_api_open_webui_demo.py | 140 -----------------------
 1 file changed, 140 deletions(-)
 delete mode 100644 examples/lightrag_api_open_webui_demo.py

diff --git a/examples/lightrag_api_open_webui_demo.py b/examples/lightrag_api_open_webui_demo.py
deleted file mode 100644
index 88454da8..00000000
--- a/examples/lightrag_api_open_webui_demo.py
+++ /dev/null
@@ -1,140 +0,0 @@
-from datetime import datetime, timezone
-from fastapi import FastAPI
-from fastapi.responses import StreamingResponse
-import inspect
-import json
-from pydantic import BaseModel
-from typing import Optional
-
-import os
-import logging
-from lightrag import LightRAG, QueryParam
-from lightrag.llm.ollama import ollama_model_complete, ollama_embed
-from lightrag.utils import EmbeddingFunc
-
-import nest_asyncio
-
-WORKING_DIR = "./dickens"
-
-logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
-
-if not os.path.exists(WORKING_DIR):
-    os.mkdir(WORKING_DIR)
-
-rag = LightRAG(
-    working_dir=WORKING_DIR,
-    llm_model_func=ollama_model_complete,
-    llm_model_name="qwen2.5:latest",
-    llm_model_max_async=4,
-    llm_model_max_token_size=32768,
-    llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 32768}},
-    embedding_func=EmbeddingFunc(
-        embedding_dim=1024,
-        max_token_size=8192,
-        func=lambda texts: ollama_embed(
-            texts=texts, embed_model="bge-m3:latest", host="http://127.0.0.1:11434"
-        ),
-    ),
-)
-
-with open("./book.txt", "r", encoding="utf-8") as f:
-    rag.insert(f.read())
-
-# Apply nest_asyncio to solve event loop issues
-nest_asyncio.apply()
-
-app = FastAPI(title="LightRAG", description="LightRAG API open-webui")
-
-
-# Data models
-MODEL_NAME = "LightRAG:latest"
-
-
-class Message(BaseModel):
-    role: Optional[str] = None
-    content: str
-
-
-class OpenWebUIRequest(BaseModel):
-    stream: Optional[bool] = None
-    model: Optional[str] = None
-    messages: list[Message]
-
-
-# API routes
-
-
-@app.get("/")
-async def index():
-    return "Set Ollama link to http://ip:port/ollama in Open-WebUI Settings"
-
-
-@app.get("/ollama/api/version")
-async def ollama_version():
-    return {"version": "0.4.7"}
-
-
-@app.get("/ollama/api/tags")
-async def ollama_tags():
-    return {
-        "models": [
-            {
-                "name": MODEL_NAME,
-                "model": MODEL_NAME,
-                "modified_at": "2024-11-12T20:22:37.561463923+08:00",
-                "size": 4683087332,
-                "digest": "845dbda0ea48ed749caafd9e6037047aa19acfcfd82e704d7ca97d631a0b697e",
-                "details": {
-                    "parent_model": "",
-                    "format": "gguf",
-                    "family": "qwen2",
-                    "families": ["qwen2"],
-                    "parameter_size": "7.6B",
-                    "quantization_level": "Q4_K_M",
-                },
-            }
-        ]
-    }
-
-
-@app.post("/ollama/api/chat")
-async def ollama_chat(request: OpenWebUIRequest):
-    resp = rag.query(
-        request.messages[-1].content, param=QueryParam(mode="hybrid", stream=True)
-    )
-    if inspect.isasyncgen(resp):
-
-        async def ollama_resp(chunks):
-            async for chunk in chunks:
-                yield (
-                    json.dumps(
-                        {
-                            "model": MODEL_NAME,
-                            "created_at": datetime.now(timezone.utc).strftime(
-                                "%Y-%m-%dT%H:%M:%S.%fZ"
-                            ),
-                            "message": {
-                                "role": "assistant",
-                                "content": chunk,
-                            },
-                            "done": False,
-                        },
-                        ensure_ascii=False,
-                    ).encode("utf-8")
-                    + b"\n"
-                )  # the b"\n" is important
-
-        return StreamingResponse(ollama_resp(resp), media_type="application/json")
-    else:
-        return resp
-
-
-@app.get("/health")
-async def health_check():
-    return {"status": "healthy"}
-
-
-if __name__ == "__main__":
-    import uvicorn
-
-    uvicorn.run(app, host="0.0.0.0", port=8020)

From 22742ec9cb8f0c3dc991056f2892505d6850bfda Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Sat, 25 Jan 2025 23:46:29 +0800
Subject: [PATCH 05/13] fix: remove outdated Ollama model config notes

- Remove legacy configuration instructions for Open WebUI tasks
- Ollama API can properly bypass conversation metadata generation
---
 lightrag/api/README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lightrag/api/README.md b/lightrag/api/README.md
index 89906006..4e818242 100644
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@@ -94,8 +94,6 @@ For example, chat message "/mix 唐僧有几个徒弟" will trigger a mix mode q
 
 After starting the lightrag-server, you can add an Ollama-type connection in the Open WebUI admin pannel. And then a model named lightrag:latest will appear in Open WebUI's model management interface. Users can then send queries to LightRAG through the chat interface.
 
-To prevent Open WebUI from using LightRAG when generating conversation titles, go to Admin Panel > Interface > Set Task Model and change both Local Models and External Models to any option except "Current Model".
-
 ## Configuration
 
 LightRAG can be configured using either command-line arguments or environment variables. When both are provided, command-line arguments take precedence over environment variables.

From e4e42a8ec43811a63fc445c803bb28b0b11758f9 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Sun, 26 Jan 2025 02:31:16 +0800
Subject: [PATCH 06/13] Fetch chunk size and chunk overlap size from .env file

---
 .env.example                    | 2 ++
 lightrag/api/lightrag_server.py | 8 ++++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/.env.example b/.env.example
index b652847b..82b9ca70 100644
--- a/.env.example
+++ b/.env.example
@@ -44,6 +44,8 @@ MAX_TOKENS=32768
 EMBEDDING_DIM=1024
 MAX_EMBED_TOKENS=8192
 #HISTORY_TURNS=3
+#CHUNK_SIZE=1200
+#CHUNK_OVERLAP_SIZE=100
 
 # Security (empty for no key)
 LIGHTRAG_API_KEY=your-secure-api-key-here
diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index e000dc0e..da15d212 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -383,14 +383,14 @@ def parse_args() -> argparse.Namespace:
 
     parser.add_argument(
         "--chunk_size",
-        default=1200,
-        help="chunk token size default 1200",
+        default=get_env_value("CHUNK_SIZE", 1200),
+        help="chunk chunk size default 1200",
     )
 
     parser.add_argument(
         "--chunk_overlap_size",
-        default=100,
-        help="chunk token size default 1200",
+        default=get_env_value("CHUNK_OVERLAP_SIZE", 100),
+        help="chunk overlap size default 100",
     )
 
     def timeout_type(value):

From 9f80c1904ff11e6a0ba68779bdfdc42285beb4be Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Sun, 26 Jan 2025 05:09:42 +0800
Subject: [PATCH 07/13] Refactoring command line argurements handling logic,
 add more RAG config to splash screen

---
 lightrag/api/lightrag_server.py | 42 ++++++++++++++++++++-------------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index da15d212..2d183b73 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -200,8 +200,14 @@ def display_splash_screen(args: argparse.Namespace) -> None:
     ASCIIColors.yellow(f"{args.max_async}")
     ASCIIColors.white("    ├─ Max Tokens: ", end="")
     ASCIIColors.yellow(f"{args.max_tokens}")
-    ASCIIColors.white("    └─ Max Embed Tokens: ", end="")
+    ASCIIColors.white("    ├─ Max Embed Tokens: ", end="")
     ASCIIColors.yellow(f"{args.max_embed_tokens}")
+    ASCIIColors.white("    ├─ Chunk Size: ", end="")
+    ASCIIColors.yellow(f"{args.chunk_size}")
+    ASCIIColors.white("    ├─ Chunk Overlap Size: ", end="")
+    ASCIIColors.yellow(f"{args.chunk_overlap_size}")
+    ASCIIColors.white("    └─ History Turns: ", end="")
+    ASCIIColors.yellow(f"{args.history_turns}")
 
     # System Configuration
     ASCIIColors.magenta("\n🛠️ System Configuration:")
@@ -294,7 +300,7 @@ def parse_args() -> argparse.Namespace:
         description="LightRAG FastAPI Server with separate working and input directories"
     )
 
-    # Bindings (with env var support)
+    # Bindings configuration
     parser.add_argument(
         "--llm-binding",
         default=get_env_value("LLM_BINDING", "ollama"),
@@ -306,8 +312,6 @@ def parse_args() -> argparse.Namespace:
         help="Embedding binding to be used. Supported: lollms, ollama, openai (default: from env or ollama)",
     )
 
-    # Parse temporary args for host defaults
-    temp_args, _ = parser.parse_known_args()
 
     # Server configuration
     parser.add_argument(
@@ -335,13 +339,13 @@ def parse_args() -> argparse.Namespace:
     )
 
     # LLM Model configuration
-    default_llm_host = get_env_value(
-        "LLM_BINDING_HOST", get_default_host(temp_args.llm_binding)
-    )
     parser.add_argument(
         "--llm-binding-host",
-        default=default_llm_host,
-        help=f"llm server host URL (default: from env or {default_llm_host})",
+        default=get_env_value("LLM_BINDING_HOST", None),
+        help="LLM server host URL. If not provided, defaults based on llm-binding:\n" +
+             "- ollama: http://localhost:11434\n" +
+             "- lollms: http://localhost:9600\n" +
+             "- openai: https://api.openai.com/v1",
     )
 
     default_llm_api_key = get_env_value("LLM_BINDING_API_KEY", None)
@@ -359,13 +363,13 @@ def parse_args() -> argparse.Namespace:
     )
 
     # Embedding model configuration
-    default_embedding_host = get_env_value(
-        "EMBEDDING_BINDING_HOST", get_default_host(temp_args.embedding_binding)
-    )
     parser.add_argument(
         "--embedding-binding-host",
-        default=default_embedding_host,
-        help=f"embedding server host URL (default: from env or {default_embedding_host})",
+        default=get_env_value("EMBEDDING_BINDING_HOST", None),
+        help="Embedding server host URL. If not provided, defaults based on embedding-binding:\n" +
+             "- ollama: http://localhost:11434\n" +
+             "- lollms: http://localhost:9600\n" +
+             "- openai: https://api.openai.com/v1",
     )
 
     default_embedding_api_key = get_env_value("EMBEDDING_BINDING_API_KEY", "")
@@ -641,8 +645,7 @@ def get_api_key_dependency(api_key: Optional[str]):
 
 
 def create_app(args):
-    # Verify that bindings arer correctly setup
-
+    # Verify that bindings are correctly setup
     if args.llm_binding not in [
         "lollms",
         "ollama",
@@ -655,6 +658,13 @@ def create_app(args):
     if args.embedding_binding not in ["lollms", "ollama", "openai", "azure_openai"]:
         raise Exception("embedding binding not supported")
 
+    # Set default hosts if not provided
+    if args.llm_binding_host is None:
+        args.llm_binding_host = get_default_host(args.llm_binding)
+    
+    if args.embedding_binding_host is None:
+        args.embedding_binding_host = get_default_host(args.embedding_binding)
+
     # Add SSL validation
     if args.ssl:
         if not args.ssl_certfile or not args.ssl_keyfile:

From 25a58a6545d183e305fa07c771083252fce47135 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Sun, 26 Jan 2025 05:10:57 +0800
Subject: [PATCH 08/13] Fix linting

---
 lightrag/api/lightrag_server.py | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index 2d183b73..2c7e0a54 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -312,7 +312,6 @@ def parse_args() -> argparse.Namespace:
         help="Embedding binding to be used. Supported: lollms, ollama, openai (default: from env or ollama)",
     )
 
-
     # Server configuration
     parser.add_argument(
         "--host",
@@ -342,10 +341,10 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--llm-binding-host",
         default=get_env_value("LLM_BINDING_HOST", None),
-        help="LLM server host URL. If not provided, defaults based on llm-binding:\n" +
-             "- ollama: http://localhost:11434\n" +
-             "- lollms: http://localhost:9600\n" +
-             "- openai: https://api.openai.com/v1",
+        help="LLM server host URL. If not provided, defaults based on llm-binding:\n"
+        + "- ollama: http://localhost:11434\n"
+        + "- lollms: http://localhost:9600\n"
+        + "- openai: https://api.openai.com/v1",
     )
 
     default_llm_api_key = get_env_value("LLM_BINDING_API_KEY", None)
@@ -366,10 +365,10 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--embedding-binding-host",
         default=get_env_value("EMBEDDING_BINDING_HOST", None),
-        help="Embedding server host URL. If not provided, defaults based on embedding-binding:\n" +
-             "- ollama: http://localhost:11434\n" +
-             "- lollms: http://localhost:9600\n" +
-             "- openai: https://api.openai.com/v1",
+        help="Embedding server host URL. If not provided, defaults based on embedding-binding:\n"
+        + "- ollama: http://localhost:11434\n"
+        + "- lollms: http://localhost:9600\n"
+        + "- openai: https://api.openai.com/v1",
     )
 
     default_embedding_api_key = get_env_value("EMBEDDING_BINDING_API_KEY", "")
@@ -661,7 +660,7 @@ def create_app(args):
     # Set default hosts if not provided
     if args.llm_binding_host is None:
         args.llm_binding_host = get_default_host(args.llm_binding)
-    
+
     if args.embedding_binding_host is None:
         args.embedding_binding_host = get_default_host(args.embedding_binding)
 

From f8d26cb19303dcbd2299244a377c72794a5feead Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Sun, 26 Jan 2025 05:19:51 +0800
Subject: [PATCH 09/13] Update default history turns to 3

---
 lightrag/api/lightrag_server.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index 2c7e0a54..de6cb794 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -476,8 +476,8 @@ def parse_args() -> argparse.Namespace:
     parser.add_argument(
         "--history-turns",
         type=int,
-        default=get_env_value("HISTORY_TURNS", None, int),
-        help="Number of conversation history turns to include (default: from env or None)",
+        default=get_env_value("HISTORY_TURNS", 3, int),
+        help="Number of conversation history turns to include (default: from env or 3)",
     )
 
     args = parser.parse_args()

From f045fc3d590ad70d1f78697cac22cc5d54aec7e5 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Sun, 26 Jan 2025 11:36:24 +0800
Subject: [PATCH 10/13] Update API endpoint documentation

---
 lightrag/api/lightrag_server.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index de6cb794..bbf72374 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -1446,7 +1446,10 @@ def create_app(args):
 
     @app.post("/api/generate")
     async def generate(raw_request: Request, request: OllamaGenerateRequest):
-        """Handle generate completion requests"""
+        """Handle generate completion requests
+        For compatiblity purpuse, the request is not processed by LightRAG,
+        and will be handled by underlying LLM model.
+        """
         try:
             query = request.prompt
             start_time = time.time_ns()
@@ -1585,7 +1588,10 @@ def create_app(args):
 
     @app.post("/api/chat")
     async def chat(raw_request: Request, request: OllamaChatRequest):
-        """Handle chat completion requests"""
+        """Process chat completion requests.
+        Routes user queries through LightRAG by selecting query mode based on prefix indicators.
+        Detects and forwards OpenWebUI session-related requests (for meta data generation task) directly to LLM.
+        """
         try:
             # Get all messages
             messages = request.messages
@@ -1605,7 +1611,6 @@ def create_app(args):
             start_time = time.time_ns()
             prompt_tokens = estimate_tokens(cleaned_query)
 
-            # 构建 query_param
             param_dict = {
                 "mode": mode,
                 "stream": request.stream,
@@ -1613,7 +1618,6 @@ def create_app(args):
                 "conversation_history": conversation_history,
             }
 
-            # 如果设置了 history_turns，添加到参数中
             if args.history_turns is not None:
                 param_dict["history_turns"] = args.history_turns
 

From c1edef7119fdaad515d2dc298120d789764440f9 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Sun, 26 Jan 2025 23:21:32 +0800
Subject: [PATCH 11/13] Increase timeout for LightRAG Ollama chat

---
 test_lightrag_ollama_chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test_lightrag_ollama_chat.py b/test_lightrag_ollama_chat.py
index faf9485d..6982d44b 100644
--- a/test_lightrag_ollama_chat.py
+++ b/test_lightrag_ollama_chat.py
@@ -104,7 +104,7 @@ DEFAULT_CONFIG = {
         "host": "localhost",
         "port": 9621,
         "model": "lightrag:latest",
-        "timeout": 30,
+        "timeout": 120,
         "max_retries": 3,
         "retry_delay": 1,
     },

From 01288debd148f39eca4545b8b1a291f5930b028a Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Mon, 27 Jan 2025 02:45:44 +0800
Subject: [PATCH 12/13] Ensure splash output flush to system log

---
 lightrag/api/lightrag_server.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index bbf72374..d700d5c5 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -17,6 +17,7 @@ import shutil
 import aiofiles
 from ascii_colors import trace_exception, ASCIIColors
 import os
+import sys
 import configparser
 
 from fastapi import Depends, Security
@@ -287,6 +288,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
 
     ASCIIColors.green("Server is ready to accept connections! 🚀\n")
 
+    # Ensure splash output flush to system log
+    sys.stdout.flush()
 
 def parse_args() -> argparse.Namespace:
     """

From 03604d3186c9ba93fbf203fb514b7c0b978a2adb Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Mon, 27 Jan 2025 02:46:21 +0800
Subject: [PATCH 13/13] Fix linting

---
 lightrag/api/lightrag_server.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index d700d5c5..3f245e9f 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -291,6 +291,7 @@ def display_splash_screen(args: argparse.Namespace) -> None:
     # Ensure splash output flush to system log
     sys.stdout.flush()
 
+
 def parse_args() -> argparse.Namespace:
     """
     Parse command line arguments with environment variable fallback