Merge pull request #1552 from danielaskdd/ollama-user-prompt

Add user prompt support for Ollama api
2025-05-09 11:53:55 +08:00
parent 4e1caf1e40 8145b436c8
commit 79a2007f1f
3 changed files with 52 additions and 7 deletions
--- a/lightrag/api/README-zh.md
+++ b/lightrag/api/README-zh.md
@@ -202,6 +202,15 @@ Open WebUI 使用 LLM 来执行会话标题和会话关键词生成任务。因

 "/context" 也不是 LightRAG 查询模式，它会告诉 LightRAG 只返回为 LLM 准备的上下文信息。您可以检查上下文是否符合您的需求，或者自行处理上下文。

+### 在聊天中添加用户提示词
+
+使用LightRAG进行内容查询时，应避免将搜索过程与无关的输出处理相结合，这会显著影响查询效果。用户提示（user prompt）正是为解决这一问题而设计 -- 它不参与RAG检索阶段，而是在查询完成后指导大语言模型（LLM）如何处理检索结果。我们可以在查询前缀末尾添加方括号，从而向LLM传递用户提示词：
+
+```
+/[使用mermaid格式画图] 请画出 Scrooge 的人物关系图谱
+/mix[使用mermaid格式画图] 请画出 Scrooge 的人物关系图谱
+```
+
 ## API 密钥和认证

 默认情况下，LightRAG 服务器可以在没有任何认证的情况下访问。我们可以使用 API 密钥或账户凭证配置服务器以确保其安全。
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@@ -204,6 +204,15 @@ For example, the chat message `/mix What's LightRAG?` will trigger a mix mode qu

 `/context` is also not a LightRAG query mode; it will tell LightRAG to return only the context information prepared for the LLM. You can check the context if it's what you want, or process the context by yourself.

+### Add user prompt in chat
+
+When using LightRAG for content queries, avoid combining the search process with unrelated output processing, as this significantly impacts query effectiveness. User prompt is specifically designed to address this issue — it does not participate in the RAG retrieval phase, but rather guides the LLM on how to process the retrieved results after the query is completed. We can append square brackets to the query prefix to provide the LLM with the user prompt:
+
+```
+/[Use mermaid format for diagrams] Please draw a character relationship diagram for Scrooge
+/mix[Use mermaid format for diagrams] Please draw a character relationship diagram for Scrooge
+```
+
 ## API Key and Authentication

 By default, the LightRAG Server can be accessed without any authentication. We can configure the server with an API Key or account credentials to secure it.
--- a/lightrag/api/routers/ollama_api.py
+++ b/lightrag/api/routers/ollama_api.py
@@ -101,10 +101,31 @@ def estimate_tokens(text: str) -> int:
    return len(tokens)


-def parse_query_mode(query: str) -> tuple[str, SearchMode, bool]:
+def parse_query_mode(query: str) -> tuple[str, SearchMode, bool, Optional[str]]:
    """Parse query prefix to determine search mode
-    Returns tuple of (cleaned_query, search_mode, only_need_context)
+    Returns tuple of (cleaned_query, search_mode, only_need_context, user_prompt)
+
+    Examples:
+    - "/local[use mermaid format for diagrams] query string" -> (cleaned_query, SearchMode.local, False, "use mermaid format for diagrams")
+    - "/[use mermaid format for diagrams] query string" -> (cleaned_query, SearchMode.hybrid, False, "use mermaid format for diagrams")
+    - "/local  query string" -> (cleaned_query, SearchMode.local, False, None)
    """
+    # Initialize user_prompt as None
+    user_prompt = None
+
+    # First check if there's a bracket format for user prompt
+    bracket_pattern = r"^/([a-z]*)\[(.*?)\](.*)"
+    bracket_match = re.match(bracket_pattern, query)
+
+    if bracket_match:
+        mode_prefix = bracket_match.group(1)
+        user_prompt = bracket_match.group(2)
+        remaining_query = bracket_match.group(3).lstrip()
+
+        # Reconstruct query, removing the bracket part
+        query = f"/{mode_prefix} {remaining_query}".strip()
+
+    # Unified handling of mode and only_need_context determination
    mode_map = {
        "/local ": (SearchMode.local, False),
        "/global ": (
@@ -128,11 +149,11 @@ def parse_query_mode(query: str) -> tuple[str, SearchMode, bool]:

    for prefix, (mode, only_need_context) in mode_map.items():
        if query.startswith(prefix):
-            # After removing prefix an leading spaces
+            # After removing prefix and leading spaces
            cleaned_query = query[len(prefix) :].lstrip()
-            return cleaned_query, mode, only_need_context
+            return cleaned_query, mode, only_need_context, user_prompt

-    return query, SearchMode.hybrid, False
+    return query, SearchMode.hybrid, False, user_prompt


 class OllamaAPI:
@@ -362,7 +383,9 @@ class OllamaAPI:
                ]

                # Check for query prefix
-                cleaned_query, mode, only_need_context = parse_query_mode(query)
+                cleaned_query, mode, only_need_context, user_prompt = parse_query_mode(
+                    query
+                )

                start_time = time.time_ns()
                prompt_tokens = estimate_tokens(cleaned_query)
@@ -375,6 +398,10 @@ class OllamaAPI:
                    "top_k": self.top_k,
                }

+                # Add user_prompt to param_dict
+                if user_prompt is not None:
+                    param_dict["user_prompt"] = user_prompt
+
                if (
                    hasattr(self.rag, "args")
                    and self.rag.args.history_turns is not None
@@ -524,7 +551,7 @@ class OllamaAPI:
                            "Cache-Control": "no-cache",
                            "Connection": "keep-alive",
                            "Content-Type": "application/x-ndjson",
-                            "X-Accel-Buffering": "no",  # 确保在Nginx代理时正确处理流式响应
+                            "X-Accel-Buffering": "no",  # Ensure proper handling of streaming responses in Nginx proxy
                        },
                    )
                else: