Merge branch 'HKUDS:main' into main

2025-02-03 22:05:59 +01:00
parent da6864d9c6 0c8a2bface
commit 797b5fa463
11 changed files with 1394 additions and 692 deletions
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@@ -82,14 +82,19 @@ We provide an Ollama-compatible interfaces for LightRAG, aiming to emulate Light

 A query prefix in the query string can determines which LightRAG query mode is used to generate the respond for the query. The supported prefixes include:

+```
 /local
 /global
 /hybrid
 /naive
 /mix
+/bypass
+```

 For example, chat message "/mix 唐僧有几个徒弟" will trigger a mix mode query for LighRAG. A chat message without query prefix will trigger a hybrid mode query by default。

+"/bypass" is not a LightRAG query mode, it will tell API Server to pass the query directly to the underlying LLM with chat history. So user can use LLM to answer question base on the LightRAG query results. (If you are using Open WebUI as front end, you can just switch the model to a normal LLM instead of using /bypass prefix)
+
 #### Connect Open WebUI to LightRAG

 After starting the lightrag-server, you can add an Ollama-type connection in the Open WebUI admin pannel. And then a model named lightrag:latest will appear in Open WebUI's model management interface. Users can then send queries to LightRAG through the chat interface.
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -599,6 +599,7 @@ class SearchMode(str, Enum):
    global_ = "global"
    hybrid = "hybrid"
    mix = "mix"
+    bypass = "bypass"


 class OllamaMessage(BaseModel):
@@ -1476,7 +1477,7 @@ def create_app(args):

    @app.get("/api/tags")
    async def get_tags():
-        """Get available models"""
+        """Retrun available models acting as an Ollama server"""
        return OllamaTagResponse(
            models=[
                {
@@ -1507,6 +1508,7 @@ def create_app(args):
            "/naive ": SearchMode.naive,
            "/hybrid ": SearchMode.hybrid,
            "/mix ": SearchMode.mix,
+            "/bypass ": SearchMode.bypass,
        }

        for prefix, mode in mode_map.items():
@@ -1519,7 +1521,7 @@ def create_app(args):

    @app.post("/api/generate")
    async def generate(raw_request: Request, request: OllamaGenerateRequest):
-        """Handle generate completion requests
+        """Handle generate completion requests acting as an Ollama model
        For compatiblity purpuse, the request is not processed by LightRAG,
        and will be handled by underlying LLM model.
        """
@@ -1661,7 +1663,7 @@ def create_app(args):

    @app.post("/api/chat")
    async def chat(raw_request: Request, request: OllamaChatRequest):
-        """Process chat completion requests.
+        """Process chat completion requests acting as an Ollama model
        Routes user queries through LightRAG by selecting query mode based on prefix indicators.
        Detects and forwards OpenWebUI session-related requests (for meta data generation task) directly to LLM.
        """
@@ -1700,9 +1702,20 @@ def create_app(args):
            if request.stream:
                from fastapi.responses import StreamingResponse

-                response = await rag.aquery(  # Need await to get async generator
-                    cleaned_query, param=query_param
-                )
+                # Determine if the request is prefix with "/bypass"
+                if mode == SearchMode.bypass:
+                    if request.system:
+                        rag.llm_model_kwargs["system_prompt"] = request.system
+                    response = await rag.llm_model_func(
+                        cleaned_query,
+                        stream=True,
+                        history_messages=conversation_history,
+                        **rag.llm_model_kwargs,
+                    )
+                else:
+                    response = await rag.aquery(  # Need await to get async generator
+                        cleaned_query, param=query_param
+                    )

                async def stream_generator():
                    try:
@@ -1804,16 +1817,19 @@ def create_app(args):
            else:
                first_chunk_time = time.time_ns()

-                # Determine if the request is from Open WebUI's session title and session keyword generation task
+                # Determine if the request is prefix with "/bypass" or from Open WebUI's session title and session keyword generation task
                match_result = re.search(
                    r"\n<chat_history>\nUSER:", cleaned_query, re.MULTILINE
                )
-                if match_result:
+                if match_result or mode == SearchMode.bypass:
                    if request.system:
                        rag.llm_model_kwargs["system_prompt"] = request.system

                    response_text = await rag.llm_model_func(
-                        cleaned_query, stream=False, **rag.llm_model_kwargs
+                        cleaned_query,
+                        stream=False,
+                        history_messages=conversation_history,
+                        **rag.llm_model_kwargs,
                    )
                else:
                    response_text = await rag.aquery(cleaned_query, param=query_param)