Add "/bypass" mode to skip context retrieval and directly use LLM

• Added SearchMode.bypass enum value
• Added /bypass prefix handler
• Skip RAG when in bypass mode
• Pass conversation history to LLM
• Apply bypass mode for both stream/non-stream
This commit is contained in:
yangdx
2025-02-03 11:49:17 +08:00
parent c07b5522fe
commit a8f7b7e2b7

View File

@@ -599,6 +599,7 @@ class SearchMode(str, Enum):
global_ = "global"
hybrid = "hybrid"
mix = "mix"
bypass = "bypass"
class OllamaMessage(BaseModel):
@@ -1507,6 +1508,7 @@ def create_app(args):
"/naive ": SearchMode.naive,
"/hybrid ": SearchMode.hybrid,
"/mix ": SearchMode.mix,
"/bypass ": SearchMode.bypass,
}
for prefix, mode in mode_map.items():
@@ -1700,9 +1702,20 @@ def create_app(args):
if request.stream:
from fastapi.responses import StreamingResponse
response = await rag.aquery( # Need await to get async generator
cleaned_query, param=query_param
)
# Determine if the request is prefix with "/bypass"
if mode == SearchMode.bypass:
if request.system:
rag.llm_model_kwargs["system_prompt"] = request.system
response = await rag.llm_model_func(
cleaned_query,
stream=True,
history_messages=conversation_history,
**rag.llm_model_kwargs
)
else:
response = await rag.aquery( # Need await to get async generator
cleaned_query, param=query_param
)
async def stream_generator():
try:
@@ -1804,16 +1817,19 @@ def create_app(args):
else:
first_chunk_time = time.time_ns()
# Determine if the request is from Open WebUI's session title and session keyword generation task
# Determine if the request is prefix with "/bypass" or from Open WebUI's session title and session keyword generation task
match_result = re.search(
r"\n<chat_history>\nUSER:", cleaned_query, re.MULTILINE
)
if match_result:
if match_result or mode == SearchMode.bypass:
if request.system:
rag.llm_model_kwargs["system_prompt"] = request.system
response_text = await rag.llm_model_func(
cleaned_query, stream=False, **rag.llm_model_kwargs
cleaned_query,
stream=False,
history_messages=conversation_history,
**rag.llm_model_kwargs
)
else:
response_text = await rag.aquery(cleaned_query, param=query_param)