Add "/bypass" mode to skip context retrieval and directly use LLM

• Added SearchMode.bypass enum value • Added /bypass prefix handler • Skip RAG when in bypass mode • Pass conversation history to LLM • Apply bypass mode for both stream/non-stream
2025-02-03 11:49:17 +08:00
parent c07b5522fe
commit a8f7b7e2b7
1 changed files with 22 additions and 6 deletions
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -599,6 +599,7 @@ class SearchMode(str, Enum):
    global_ = "global"
    hybrid = "hybrid"
    mix = "mix"
+    bypass = "bypass"


 class OllamaMessage(BaseModel):
@@ -1507,6 +1508,7 @@ def create_app(args):
            "/naive ": SearchMode.naive,
            "/hybrid ": SearchMode.hybrid,
            "/mix ": SearchMode.mix,
+            "/bypass ": SearchMode.bypass,
        }

        for prefix, mode in mode_map.items():
@@ -1700,9 +1702,20 @@ def create_app(args):
            if request.stream:
                from fastapi.responses import StreamingResponse

-                response = await rag.aquery(  # Need await to get async generator
-                    cleaned_query, param=query_param
-                )
+                # Determine if the request is prefix with "/bypass"
+                if mode == SearchMode.bypass:
+                    if request.system:
+                        rag.llm_model_kwargs["system_prompt"] = request.system
+                    response = await rag.llm_model_func(
+                        cleaned_query, 
+                        stream=True,
+                        history_messages=conversation_history,
+                        **rag.llm_model_kwargs
+                    )
+                else:
+                    response = await rag.aquery(  # Need await to get async generator
+                        cleaned_query, param=query_param
+                    )

                async def stream_generator():
                    try:
@@ -1804,16 +1817,19 @@ def create_app(args):
            else:
                first_chunk_time = time.time_ns()

-                # Determine if the request is from Open WebUI's session title and session keyword generation task
+                # Determine if the request is prefix with "/bypass" or from Open WebUI's session title and session keyword generation task
                match_result = re.search(
                    r"\n<chat_history>\nUSER:", cleaned_query, re.MULTILINE
                )
-                if match_result:
+                if match_result or mode == SearchMode.bypass:
                    if request.system:
                        rag.llm_model_kwargs["system_prompt"] = request.system

                    response_text = await rag.llm_model_func(
-                        cleaned_query, stream=False, **rag.llm_model_kwargs
+                        cleaned_query, 
+                        stream=False,
+                        history_messages=conversation_history,
+                        **rag.llm_model_kwargs
                    )
                else:
                    response_text = await rag.aquery(cleaned_query, param=query_param)