Add "/bypass" mode to skip context retrieval and directly use LLM
• Added SearchMode.bypass enum value • Added /bypass prefix handler • Skip RAG when in bypass mode • Pass conversation history to LLM • Apply bypass mode for both stream/non-stream
This commit is contained in:
@@ -599,6 +599,7 @@ class SearchMode(str, Enum):
|
||||
global_ = "global"
|
||||
hybrid = "hybrid"
|
||||
mix = "mix"
|
||||
bypass = "bypass"
|
||||
|
||||
|
||||
class OllamaMessage(BaseModel):
|
||||
@@ -1507,6 +1508,7 @@ def create_app(args):
|
||||
"/naive ": SearchMode.naive,
|
||||
"/hybrid ": SearchMode.hybrid,
|
||||
"/mix ": SearchMode.mix,
|
||||
"/bypass ": SearchMode.bypass,
|
||||
}
|
||||
|
||||
for prefix, mode in mode_map.items():
|
||||
@@ -1700,6 +1702,17 @@ def create_app(args):
|
||||
if request.stream:
|
||||
from fastapi.responses import StreamingResponse
|
||||
|
||||
# Determine if the request is prefix with "/bypass"
|
||||
if mode == SearchMode.bypass:
|
||||
if request.system:
|
||||
rag.llm_model_kwargs["system_prompt"] = request.system
|
||||
response = await rag.llm_model_func(
|
||||
cleaned_query,
|
||||
stream=True,
|
||||
history_messages=conversation_history,
|
||||
**rag.llm_model_kwargs
|
||||
)
|
||||
else:
|
||||
response = await rag.aquery( # Need await to get async generator
|
||||
cleaned_query, param=query_param
|
||||
)
|
||||
@@ -1804,16 +1817,19 @@ def create_app(args):
|
||||
else:
|
||||
first_chunk_time = time.time_ns()
|
||||
|
||||
# Determine if the request is from Open WebUI's session title and session keyword generation task
|
||||
# Determine if the request is prefix with "/bypass" or from Open WebUI's session title and session keyword generation task
|
||||
match_result = re.search(
|
||||
r"\n<chat_history>\nUSER:", cleaned_query, re.MULTILINE
|
||||
)
|
||||
if match_result:
|
||||
if match_result or mode == SearchMode.bypass:
|
||||
if request.system:
|
||||
rag.llm_model_kwargs["system_prompt"] = request.system
|
||||
|
||||
response_text = await rag.llm_model_func(
|
||||
cleaned_query, stream=False, **rag.llm_model_kwargs
|
||||
cleaned_query,
|
||||
stream=False,
|
||||
history_messages=conversation_history,
|
||||
**rag.llm_model_kwargs
|
||||
)
|
||||
else:
|
||||
response_text = await rag.aquery(cleaned_query, param=query_param)
|
||||
|
Reference in New Issue
Block a user