diff --git a/lightrag/api/README.md b/lightrag/api/README.md index 288ff79c..66b3a10c 100644 --- a/lightrag/api/README.md +++ b/lightrag/api/README.md @@ -82,14 +82,19 @@ We provide an Ollama-compatible interfaces for LightRAG, aiming to emulate Light A query prefix in the query string can determines which LightRAG query mode is used to generate the respond for the query. The supported prefixes include: +``` /local /global /hybrid /naive /mix +/bypass +``` For example, chat message "/mix 唐僧有几个徒弟" will trigger a mix mode query for LighRAG. A chat message without query prefix will trigger a hybrid mode query by default。 +"/bypass" is not a LightRAG query mode, it will tell API Server to pass the query directly to the underlying LLM with chat history. So user can use LLM to answer question base on the LightRAG query results. (If you are using Open WebUI as front end, you can just switch the model to a normal LLM instead of using /bypass prefix) + #### Connect Open WebUI to LightRAG After starting the lightrag-server, you can add an Ollama-type connection in the Open WebUI admin pannel. And then a model named lightrag:latest will appear in Open WebUI's model management interface. Users can then send queries to LightRAG through the chat interface. diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 600bfdc4..ec58f552 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -599,6 +599,7 @@ class SearchMode(str, Enum): global_ = "global" hybrid = "hybrid" mix = "mix" + bypass = "bypass" class OllamaMessage(BaseModel): @@ -1476,7 +1477,7 @@ def create_app(args): @app.get("/api/tags") async def get_tags(): - """Get available models""" + """Retrun available models acting as an Ollama server""" return OllamaTagResponse( models=[ { @@ -1507,6 +1508,7 @@ def create_app(args): "/naive ": SearchMode.naive, "/hybrid ": SearchMode.hybrid, "/mix ": SearchMode.mix, + "/bypass ": SearchMode.bypass, } for prefix, mode in mode_map.items(): @@ -1519,7 +1521,7 @@ def create_app(args): @app.post("/api/generate") async def generate(raw_request: Request, request: OllamaGenerateRequest): - """Handle generate completion requests + """Handle generate completion requests acting as an Ollama model For compatiblity purpuse, the request is not processed by LightRAG, and will be handled by underlying LLM model. """ @@ -1661,7 +1663,7 @@ def create_app(args): @app.post("/api/chat") async def chat(raw_request: Request, request: OllamaChatRequest): - """Process chat completion requests. + """Process chat completion requests acting as an Ollama model Routes user queries through LightRAG by selecting query mode based on prefix indicators. Detects and forwards OpenWebUI session-related requests (for meta data generation task) directly to LLM. """ @@ -1700,9 +1702,20 @@ def create_app(args): if request.stream: from fastapi.responses import StreamingResponse - response = await rag.aquery( # Need await to get async generator - cleaned_query, param=query_param - ) + # Determine if the request is prefix with "/bypass" + if mode == SearchMode.bypass: + if request.system: + rag.llm_model_kwargs["system_prompt"] = request.system + response = await rag.llm_model_func( + cleaned_query, + stream=True, + history_messages=conversation_history, + **rag.llm_model_kwargs, + ) + else: + response = await rag.aquery( # Need await to get async generator + cleaned_query, param=query_param + ) async def stream_generator(): try: @@ -1804,16 +1817,19 @@ def create_app(args): else: first_chunk_time = time.time_ns() - # Determine if the request is from Open WebUI's session title and session keyword generation task + # Determine if the request is prefix with "/bypass" or from Open WebUI's session title and session keyword generation task match_result = re.search( r"\n\nUSER:", cleaned_query, re.MULTILINE ) - if match_result: + if match_result or mode == SearchMode.bypass: if request.system: rag.llm_model_kwargs["system_prompt"] = request.system response_text = await rag.llm_model_func( - cleaned_query, stream=False, **rag.llm_model_kwargs + cleaned_query, + stream=False, + history_messages=conversation_history, + **rag.llm_model_kwargs, ) else: response_text = await rag.aquery(cleaned_query, param=query_param)