From a8f7b7e2b7860f9ec30561e2b055b03a24ed1919 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Mon, 3 Feb 2025 11:49:17 +0800
Subject: [PATCH 1/4] Add "/bypass" mode to skip context retrieval and directly
 use LLM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

• Added SearchMode.bypass enum value
• Added /bypass prefix handler
• Skip RAG when in bypass mode
• Pass conversation history to LLM
• Apply bypass mode for both stream/non-stream
---
 lightrag/api/lightrag_server.py | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index 600bfdc4..51968ae4 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -599,6 +599,7 @@ class SearchMode(str, Enum):
     global_ = "global"
     hybrid = "hybrid"
     mix = "mix"
+    bypass = "bypass"
 
 
 class OllamaMessage(BaseModel):
@@ -1507,6 +1508,7 @@ def create_app(args):
             "/naive ": SearchMode.naive,
             "/hybrid ": SearchMode.hybrid,
             "/mix ": SearchMode.mix,
+            "/bypass ": SearchMode.bypass,
         }
 
         for prefix, mode in mode_map.items():
@@ -1700,9 +1702,20 @@ def create_app(args):
             if request.stream:
                 from fastapi.responses import StreamingResponse
 
-                response = await rag.aquery(  # Need await to get async generator
-                    cleaned_query, param=query_param
-                )
+                # Determine if the request is prefix with "/bypass"
+                if mode == SearchMode.bypass:
+                    if request.system:
+                        rag.llm_model_kwargs["system_prompt"] = request.system
+                    response = await rag.llm_model_func(
+                        cleaned_query, 
+                        stream=True,
+                        history_messages=conversation_history,
+                        **rag.llm_model_kwargs
+                    )
+                else:
+                    response = await rag.aquery(  # Need await to get async generator
+                        cleaned_query, param=query_param
+                    )
 
                 async def stream_generator():
                     try:
@@ -1804,16 +1817,19 @@ def create_app(args):
             else:
                 first_chunk_time = time.time_ns()
 
-                # Determine if the request is from Open WebUI's session title and session keyword generation task
+                # Determine if the request is prefix with "/bypass" or from Open WebUI's session title and session keyword generation task
                 match_result = re.search(
                     r"\n<chat_history>\nUSER:", cleaned_query, re.MULTILINE
                 )
-                if match_result:
+                if match_result or mode == SearchMode.bypass:
                     if request.system:
                         rag.llm_model_kwargs["system_prompt"] = request.system
 
                     response_text = await rag.llm_model_func(
-                        cleaned_query, stream=False, **rag.llm_model_kwargs
+                        cleaned_query, 
+                        stream=False,
+                        history_messages=conversation_history,
+                        **rag.llm_model_kwargs
                     )
                 else:
                     response_text = await rag.aquery(cleaned_query, param=query_param)

From ede4122b639fa78590afa0994c21faf196379dcb Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Mon, 3 Feb 2025 12:25:59 +0800
Subject: [PATCH 2/4] docs: add documentation for /bypass prefix in LightRAG
 api

---
 lightrag/api/README.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lightrag/api/README.md b/lightrag/api/README.md
index 288ff79c..66b3a10c 100644
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@@ -82,14 +82,19 @@ We provide an Ollama-compatible interfaces for LightRAG, aiming to emulate Light
 
 A query prefix in the query string can determines which LightRAG query mode is used to generate the respond for the query. The supported prefixes include:
 
+```
 /local
 /global
 /hybrid
 /naive
 /mix
+/bypass
+```
 
 For example, chat message "/mix 唐僧有几个徒弟" will trigger a mix mode query for LighRAG. A chat message without query prefix will trigger a hybrid mode query by default。
 
+"/bypass" is not a LightRAG query mode, it will tell API Server to pass the query directly to the underlying LLM with chat history. So user can use LLM to answer question base on the LightRAG query results. (If you are using Open WebUI as front end, you can just switch the model to a normal LLM instead of using /bypass prefix)
+
 #### Connect Open WebUI to LightRAG
 
 After starting the lightrag-server, you can add an Ollama-type connection in the Open WebUI admin pannel. And then a model named lightrag:latest will appear in Open WebUI's model management interface. Users can then send queries to LightRAG through the chat interface.

From 4ab02a878f3372770e9768be43b6b465af9d4fcd Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Mon, 3 Feb 2025 12:39:52 +0800
Subject: [PATCH 3/4] Fix linting

---
 lightrag/api/lightrag_server.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index 51968ae4..a72e1b4c 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -1707,10 +1707,10 @@ def create_app(args):
                     if request.system:
                         rag.llm_model_kwargs["system_prompt"] = request.system
                     response = await rag.llm_model_func(
-                        cleaned_query, 
+                        cleaned_query,
                         stream=True,
                         history_messages=conversation_history,
-                        **rag.llm_model_kwargs
+                        **rag.llm_model_kwargs,
                     )
                 else:
                     response = await rag.aquery(  # Need await to get async generator
@@ -1826,10 +1826,10 @@ def create_app(args):
                         rag.llm_model_kwargs["system_prompt"] = request.system
 
                     response_text = await rag.llm_model_func(
-                        cleaned_query, 
+                        cleaned_query,
                         stream=False,
                         history_messages=conversation_history,
-                        **rag.llm_model_kwargs
+                        **rag.llm_model_kwargs,
                     )
                 else:
                     response_text = await rag.aquery(cleaned_query, param=query_param)

From 5cf875755a042c93164a5e5d240ce866f5fb37e9 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Mon, 3 Feb 2025 13:07:08 +0800
Subject: [PATCH 4/4] Update API endpoint documentation to clarify Ollama
 server compatibility
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

• Add Ollama server doc for /api/tags
• Update /api/generate endpoint docs
• Update /api/chat endpoint docs
---
 lightrag/api/lightrag_server.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index a72e1b4c..ec58f552 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -1477,7 +1477,7 @@ def create_app(args):
 
     @app.get("/api/tags")
     async def get_tags():
-        """Get available models"""
+        """Retrun available models acting as an Ollama server"""
         return OllamaTagResponse(
             models=[
                 {
@@ -1521,7 +1521,7 @@ def create_app(args):
 
     @app.post("/api/generate")
     async def generate(raw_request: Request, request: OllamaGenerateRequest):
-        """Handle generate completion requests
+        """Handle generate completion requests acting as an Ollama model
         For compatiblity purpuse, the request is not processed by LightRAG,
         and will be handled by underlying LLM model.
         """
@@ -1663,7 +1663,7 @@ def create_app(args):
 
     @app.post("/api/chat")
     async def chat(raw_request: Request, request: OllamaChatRequest):
-        """Process chat completion requests.
+        """Process chat completion requests acting as an Ollama model
         Routes user queries through LightRAG by selecting query mode based on prefix indicators.
         Detects and forwards OpenWebUI session-related requests (for meta data generation task) directly to LLM.
         """