Merge branch 'HKUDS:main' into main

This commit is contained in:
Saifeddine ALOUI
2025-02-03 22:05:59 +01:00
committed by GitHub
11 changed files with 1394 additions and 692 deletions

View File

@@ -82,14 +82,19 @@ We provide an Ollama-compatible interfaces for LightRAG, aiming to emulate Light
A query prefix in the query string can determines which LightRAG query mode is used to generate the respond for the query. The supported prefixes include:
```
/local
/global
/hybrid
/naive
/mix
/bypass
```
For example, chat message "/mix 唐僧有几个徒弟" will trigger a mix mode query for LighRAG. A chat message without query prefix will trigger a hybrid mode query by default。
"/bypass" is not a LightRAG query mode, it will tell API Server to pass the query directly to the underlying LLM with chat history. So user can use LLM to answer question base on the LightRAG query results. (If you are using Open WebUI as front end, you can just switch the model to a normal LLM instead of using /bypass prefix)
#### Connect Open WebUI to LightRAG
After starting the lightrag-server, you can add an Ollama-type connection in the Open WebUI admin pannel. And then a model named lightrag:latest will appear in Open WebUI's model management interface. Users can then send queries to LightRAG through the chat interface.

View File

@@ -599,6 +599,7 @@ class SearchMode(str, Enum):
global_ = "global"
hybrid = "hybrid"
mix = "mix"
bypass = "bypass"
class OllamaMessage(BaseModel):
@@ -1476,7 +1477,7 @@ def create_app(args):
@app.get("/api/tags")
async def get_tags():
"""Get available models"""
"""Retrun available models acting as an Ollama server"""
return OllamaTagResponse(
models=[
{
@@ -1507,6 +1508,7 @@ def create_app(args):
"/naive ": SearchMode.naive,
"/hybrid ": SearchMode.hybrid,
"/mix ": SearchMode.mix,
"/bypass ": SearchMode.bypass,
}
for prefix, mode in mode_map.items():
@@ -1519,7 +1521,7 @@ def create_app(args):
@app.post("/api/generate")
async def generate(raw_request: Request, request: OllamaGenerateRequest):
"""Handle generate completion requests
"""Handle generate completion requests acting as an Ollama model
For compatiblity purpuse, the request is not processed by LightRAG,
and will be handled by underlying LLM model.
"""
@@ -1661,7 +1663,7 @@ def create_app(args):
@app.post("/api/chat")
async def chat(raw_request: Request, request: OllamaChatRequest):
"""Process chat completion requests.
"""Process chat completion requests acting as an Ollama model
Routes user queries through LightRAG by selecting query mode based on prefix indicators.
Detects and forwards OpenWebUI session-related requests (for meta data generation task) directly to LLM.
"""
@@ -1700,9 +1702,20 @@ def create_app(args):
if request.stream:
from fastapi.responses import StreamingResponse
response = await rag.aquery( # Need await to get async generator
cleaned_query, param=query_param
)
# Determine if the request is prefix with "/bypass"
if mode == SearchMode.bypass:
if request.system:
rag.llm_model_kwargs["system_prompt"] = request.system
response = await rag.llm_model_func(
cleaned_query,
stream=True,
history_messages=conversation_history,
**rag.llm_model_kwargs,
)
else:
response = await rag.aquery( # Need await to get async generator
cleaned_query, param=query_param
)
async def stream_generator():
try:
@@ -1804,16 +1817,19 @@ def create_app(args):
else:
first_chunk_time = time.time_ns()
# Determine if the request is from Open WebUI's session title and session keyword generation task
# Determine if the request is prefix with "/bypass" or from Open WebUI's session title and session keyword generation task
match_result = re.search(
r"\n<chat_history>\nUSER:", cleaned_query, re.MULTILINE
)
if match_result:
if match_result or mode == SearchMode.bypass:
if request.system:
rag.llm_model_kwargs["system_prompt"] = request.system
response_text = await rag.llm_model_func(
cleaned_query, stream=False, **rag.llm_model_kwargs
cleaned_query,
stream=False,
history_messages=conversation_history,
**rag.llm_model_kwargs,
)
else:
response_text = await rag.aquery(cleaned_query, param=query_param)