From a8f7b7e2b7860f9ec30561e2b055b03a24ed1919 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Mon, 3 Feb 2025 11:49:17 +0800
Subject: [PATCH] Add "/bypass" mode to skip context retrieval and directly use
 LLM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

• Added SearchMode.bypass enum value
• Added /bypass prefix handler
• Skip RAG when in bypass mode
• Pass conversation history to LLM
• Apply bypass mode for both stream/non-stream
---
 lightrag/api/lightrag_server.py | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index 600bfdc4..51968ae4 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -599,6 +599,7 @@ class SearchMode(str, Enum):
     global_ = "global"
     hybrid = "hybrid"
     mix = "mix"
+    bypass = "bypass"
 
 
 class OllamaMessage(BaseModel):
@@ -1507,6 +1508,7 @@ def create_app(args):
             "/naive ": SearchMode.naive,
             "/hybrid ": SearchMode.hybrid,
             "/mix ": SearchMode.mix,
+            "/bypass ": SearchMode.bypass,
         }
 
         for prefix, mode in mode_map.items():
@@ -1700,9 +1702,20 @@ def create_app(args):
             if request.stream:
                 from fastapi.responses import StreamingResponse
 
-                response = await rag.aquery(  # Need await to get async generator
-                    cleaned_query, param=query_param
-                )
+                # Determine if the request is prefix with "/bypass"
+                if mode == SearchMode.bypass:
+                    if request.system:
+                        rag.llm_model_kwargs["system_prompt"] = request.system
+                    response = await rag.llm_model_func(
+                        cleaned_query, 
+                        stream=True,
+                        history_messages=conversation_history,
+                        **rag.llm_model_kwargs
+                    )
+                else:
+                    response = await rag.aquery(  # Need await to get async generator
+                        cleaned_query, param=query_param
+                    )
 
                 async def stream_generator():
                     try:
@@ -1804,16 +1817,19 @@ def create_app(args):
             else:
                 first_chunk_time = time.time_ns()
 
-                # Determine if the request is from Open WebUI's session title and session keyword generation task
+                # Determine if the request is prefix with "/bypass" or from Open WebUI's session title and session keyword generation task
                 match_result = re.search(
                     r"\n<chat_history>\nUSER:", cleaned_query, re.MULTILINE
                 )
-                if match_result:
+                if match_result or mode == SearchMode.bypass:
                     if request.system:
                         rag.llm_model_kwargs["system_prompt"] = request.system
 
                     response_text = await rag.llm_model_func(
-                        cleaned_query, stream=False, **rag.llm_model_kwargs
+                        cleaned_query, 
+                        stream=False,
+                        history_messages=conversation_history,
+                        **rag.llm_model_kwargs
                     )
                 else:
                     response_text = await rag.aquery(cleaned_query, param=query_param)