Add "/bypass" mode to skip context retrieval and directly use LLM
• Added SearchMode.bypass enum value • Added /bypass prefix handler • Skip RAG when in bypass mode • Pass conversation history to LLM • Apply bypass mode for both stream/non-stream
This commit is contained in:
@@ -599,6 +599,7 @@ class SearchMode(str, Enum):
|
|||||||
global_ = "global"
|
global_ = "global"
|
||||||
hybrid = "hybrid"
|
hybrid = "hybrid"
|
||||||
mix = "mix"
|
mix = "mix"
|
||||||
|
bypass = "bypass"
|
||||||
|
|
||||||
|
|
||||||
class OllamaMessage(BaseModel):
|
class OllamaMessage(BaseModel):
|
||||||
@@ -1507,6 +1508,7 @@ def create_app(args):
|
|||||||
"/naive ": SearchMode.naive,
|
"/naive ": SearchMode.naive,
|
||||||
"/hybrid ": SearchMode.hybrid,
|
"/hybrid ": SearchMode.hybrid,
|
||||||
"/mix ": SearchMode.mix,
|
"/mix ": SearchMode.mix,
|
||||||
|
"/bypass ": SearchMode.bypass,
|
||||||
}
|
}
|
||||||
|
|
||||||
for prefix, mode in mode_map.items():
|
for prefix, mode in mode_map.items():
|
||||||
@@ -1700,6 +1702,17 @@ def create_app(args):
|
|||||||
if request.stream:
|
if request.stream:
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
|
|
||||||
|
# Determine if the request is prefix with "/bypass"
|
||||||
|
if mode == SearchMode.bypass:
|
||||||
|
if request.system:
|
||||||
|
rag.llm_model_kwargs["system_prompt"] = request.system
|
||||||
|
response = await rag.llm_model_func(
|
||||||
|
cleaned_query,
|
||||||
|
stream=True,
|
||||||
|
history_messages=conversation_history,
|
||||||
|
**rag.llm_model_kwargs
|
||||||
|
)
|
||||||
|
else:
|
||||||
response = await rag.aquery( # Need await to get async generator
|
response = await rag.aquery( # Need await to get async generator
|
||||||
cleaned_query, param=query_param
|
cleaned_query, param=query_param
|
||||||
)
|
)
|
||||||
@@ -1804,16 +1817,19 @@ def create_app(args):
|
|||||||
else:
|
else:
|
||||||
first_chunk_time = time.time_ns()
|
first_chunk_time = time.time_ns()
|
||||||
|
|
||||||
# Determine if the request is from Open WebUI's session title and session keyword generation task
|
# Determine if the request is prefix with "/bypass" or from Open WebUI's session title and session keyword generation task
|
||||||
match_result = re.search(
|
match_result = re.search(
|
||||||
r"\n<chat_history>\nUSER:", cleaned_query, re.MULTILINE
|
r"\n<chat_history>\nUSER:", cleaned_query, re.MULTILINE
|
||||||
)
|
)
|
||||||
if match_result:
|
if match_result or mode == SearchMode.bypass:
|
||||||
if request.system:
|
if request.system:
|
||||||
rag.llm_model_kwargs["system_prompt"] = request.system
|
rag.llm_model_kwargs["system_prompt"] = request.system
|
||||||
|
|
||||||
response_text = await rag.llm_model_func(
|
response_text = await rag.llm_model_func(
|
||||||
cleaned_query, stream=False, **rag.llm_model_kwargs
|
cleaned_query,
|
||||||
|
stream=False,
|
||||||
|
history_messages=conversation_history,
|
||||||
|
**rag.llm_model_kwargs
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
response_text = await rag.aquery(cleaned_query, param=query_param)
|
response_text = await rag.aquery(cleaned_query, param=query_param)
|
||||||
|
Reference in New Issue
Block a user