diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 600bfdc4..51968ae4 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -599,6 +599,7 @@ class SearchMode(str, Enum): global_ = "global" hybrid = "hybrid" mix = "mix" + bypass = "bypass" class OllamaMessage(BaseModel): @@ -1507,6 +1508,7 @@ def create_app(args): "/naive ": SearchMode.naive, "/hybrid ": SearchMode.hybrid, "/mix ": SearchMode.mix, + "/bypass ": SearchMode.bypass, } for prefix, mode in mode_map.items(): @@ -1700,9 +1702,20 @@ def create_app(args): if request.stream: from fastapi.responses import StreamingResponse - response = await rag.aquery( # Need await to get async generator - cleaned_query, param=query_param - ) + # Determine if the request is prefix with "/bypass" + if mode == SearchMode.bypass: + if request.system: + rag.llm_model_kwargs["system_prompt"] = request.system + response = await rag.llm_model_func( + cleaned_query, + stream=True, + history_messages=conversation_history, + **rag.llm_model_kwargs + ) + else: + response = await rag.aquery( # Need await to get async generator + cleaned_query, param=query_param + ) async def stream_generator(): try: @@ -1804,16 +1817,19 @@ def create_app(args): else: first_chunk_time = time.time_ns() - # Determine if the request is from Open WebUI's session title and session keyword generation task + # Determine if the request is prefix with "/bypass" or from Open WebUI's session title and session keyword generation task match_result = re.search( r"\n\nUSER:", cleaned_query, re.MULTILINE ) - if match_result: + if match_result or mode == SearchMode.bypass: if request.system: rag.llm_model_kwargs["system_prompt"] = request.system response_text = await rag.llm_model_func( - cleaned_query, stream=False, **rag.llm_model_kwargs + cleaned_query, + stream=False, + history_messages=conversation_history, + **rag.llm_model_kwargs ) else: response_text = await rag.aquery(cleaned_query, param=query_param)