Add "/bypass" mode to skip context retrieval and directly use LLM

• Added SearchMode.bypass enum value • Added /bypass prefix handler • Skip RAG when in bypass mode • Pass conversation history to LLM • Apply bypass mode for both stream/non-stream
2025-02-03 11:49:17 +08:00
parent c07b5522fe
commit a8f7b7e2b7
1 changed files with 22 additions and 6 deletions
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -599,6 +599,7 @@ class SearchMode(str, Enum):
    global_ = "global"
    hybrid = "hybrid"
    mix = "mix"
    bypass = "bypass"
 class OllamaMessage(BaseModel):
@@ -1507,6 +1508,7 @@ def create_app(args):
            "/naive ": SearchMode.naive,
            "/hybrid ": SearchMode.hybrid,
            "/mix ": SearchMode.mix,
            "/bypass ": SearchMode.bypass,
        }
        for prefix, mode in mode_map.items():
@@ -1700,6 +1702,17 @@ def create_app(args):
            if request.stream:
                from fastapi.responses import StreamingResponse
                # Determine if the request is prefix with "/bypass"
                if mode == SearchMode.bypass:
                    if request.system:
                        rag.llm_model_kwargs["system_prompt"] = request.system
                    response = await rag.llm_model_func(
                        cleaned_query, 
                        stream=True,
                        history_messages=conversation_history,
                        **rag.llm_model_kwargs
                    )
                else:
                    response = await rag.aquery(  # Need await to get async generator
                        cleaned_query, param=query_param
                    )
@@ -1804,16 +1817,19 @@ def create_app(args):
            else:
                first_chunk_time = time.time_ns()
-                # Determine if the request is from Open WebUI's session title and session keyword generation task
+                # Determine if the request is prefix with "/bypass" or from Open WebUI's session title and session keyword generation task
                match_result = re.search(
                    r"\n<chat_history>\nUSER:", cleaned_query, re.MULTILINE
                )
-                if match_result:
+                if match_result or mode == SearchMode.bypass:
                    if request.system:
                        rag.llm_model_kwargs["system_prompt"] = request.system
                    response_text = await rag.llm_model_func(
-                        cleaned_query, stream=False, **rag.llm_model_kwargs
+                        cleaned_query, 
                        stream=False,
                        history_messages=conversation_history,
                        **rag.llm_model_kwargs
                    )
                else:
                    response_text = await rag.aquery(cleaned_query, param=query_param)