Merge pull request #605 from ParisNeo/main

Pull Request: API Key Management Implementation and Azure OpenAI Bug Fix
2025-01-20 12:27:32 +08:00
parent 617eb1bfaa f8ba76f7b8
commit e589134a97
3 changed files with 74 additions and 6 deletions
--- a/extra/OpenWebuiTool/openwebui_tool.py
+++ b/extra/OpenWebuiTool/openwebui_tool.py
@@ -0,0 +1,30 @@
+"""
+OpenWebui Lightrag Integration Tool
+==================================
+
+This tool enables the integration and use of Lightrag within the OpenWebui environment,
+providing a seamless interface for RAG (Retrieval-Augmented Generation) operations.
+
+Author: ParisNeo (parisneoai@gmail.com)
+Social:
+    - Twitter: @ParisNeo_AI
+    - Reddit: r/lollms
+    - Instagram: https://www.instagram.com/parisneo_ai/
+
+License: Apache 2.0
+Copyright (c) 2024-2025 ParisNeo
+
+This tool is part of the LoLLMs project (Lord of Large Language and Multimodal Systems).
+For more information, visit: https://github.com/ParisNeo/lollms
+
+Requirements:
+    - Python 3.8+
+    - OpenWebui
+    - Lightrag
+"""
+
+# Tool version
+__version__ = "1.0.0"
+__author__ = "ParisNeo"
+__author_email__ = "parisneoai@gmail.com"
+__description__ = "Lightrag integration for OpenWebui"
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -298,6 +298,14 @@ def parse_args() -> argparse.Namespace:
        help=f"llm server host URL (default: from env or {default_llm_host})",
    )

+    default_llm_api_key = get_env_value("LLM_BINDING_API_KEY", None)
+
+    parser.add_argument(
+        "--llm-binding-api-key",
+        default=default_llm_api_key,
+        help="llm server API key (default: from env or empty string)",
+    )
+
    parser.add_argument(
        "--llm-model",
        default=get_env_value("LLM_MODEL", "mistral-nemo:latest"),
@@ -314,6 +322,13 @@ def parse_args() -> argparse.Namespace:
        help=f"embedding server host URL (default: from env or {default_embedding_host})",
    )

+    default_embedding_api_key = get_env_value("EMBEDDING_BINDING_API_KEY", "")
+    parser.add_argument(
+        "--embedding-binding-api-key",
+        default=default_embedding_api_key,
+        help="embedding server API key (default: from env or empty string)",
+    )
+
    parser.add_argument(
        "--embedding-model",
        default=get_env_value("EMBEDDING_MODEL", "bge-m3:latest"),
@@ -630,22 +645,26 @@ def create_app(args):
            texts,
            embed_model=args.embedding_model,
            host=args.embedding_binding_host,
+            api_key=args.embedding_binding_api_key,
        )
        if args.embedding_binding == "lollms"
        else ollama_embed(
            texts,
            embed_model=args.embedding_model,
            host=args.embedding_binding_host,
+            api_key=args.embedding_binding_api_key,
        )
        if args.embedding_binding == "ollama"
        else azure_openai_embedding(
            texts,
-            model=args.embedding_model,  # no host is used for openai
+            model=args.embedding_model,  # no host is used for openai,
+            api_key=args.embedding_binding_api_key,
        )
        if args.embedding_binding == "azure_openai"
        else openai_embedding(
            texts,
-            model=args.embedding_model,  # no host is used for openai
+            model=args.embedding_model,  # no host is used for openai,
+            api_key=args.embedding_binding_api_key,
        ),
    )

@@ -663,6 +682,7 @@ def create_app(args):
                "host": args.llm_binding_host,
                "timeout": args.timeout,
                "options": {"num_ctx": args.max_tokens},
+                "api_key": args.llm_binding_api_key,
            },
            embedding_func=embedding_func,
        )
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -349,7 +349,9 @@ async def ollama_model_if_cache(
    host = kwargs.pop("host", None)
    timeout = kwargs.pop("timeout", None)
    kwargs.pop("hashing_kv", None)
-    ollama_client = ollama.AsyncClient(host=host, timeout=timeout)
+    api_key = kwargs.pop("api_key", None)
+    headers = {"Authorization": f"Bearer {api_key}"} if api_key else None
+    ollama_client = ollama.AsyncClient(host=host, timeout=timeout, headers=headers)
    messages = []
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})
@@ -380,6 +382,8 @@ async def lollms_model_if_cache(
    """Client implementation for lollms generation."""

    stream = True if kwargs.get("stream") else False
+    api_key = kwargs.pop("api_key", None)
+    headers = {"Authorization": f"Bearer {api_key}"} if api_key else None

    # Extract lollms specific parameters
    request_data = {
@@ -408,7 +412,7 @@ async def lollms_model_if_cache(
    request_data["prompt"] = full_prompt
    timeout = aiohttp.ClientTimeout(total=kwargs.get("timeout", None))

-    async with aiohttp.ClientSession(timeout=timeout) as session:
+    async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session:
        if stream:

            async def inner():
@@ -1148,6 +1152,13 @@ async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarra


 async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray:
+    api_key = kwargs.pop("api_key", None)
+    headers = (
+        {"Authorization": api_key, "Content-Type": "application/json"}
+        if api_key
+        else None
+    )
+    kwargs["headers"] = headers
    ollama_client = ollama.Client(**kwargs)
    data = ollama_client.embed(model=embed_model, input=texts)
    return data["embeddings"]
@@ -1168,13 +1179,20 @@ async def lollms_embed(
    Returns:
        np.ndarray: Array of embeddings
    """
-    async with aiohttp.ClientSession() as session:
+    api_key = kwargs.pop("api_key", None)
+    headers = (
+        {"Authorization": api_key, "Content-Type": "application/json"}
+        if api_key
+        else None
+    )
+    async with aiohttp.ClientSession(headers=headers) as session:
        embeddings = []
        for text in texts:
            request_data = {"text": text}

            async with session.post(
-                f"{base_url}/lollms_embed", json=request_data
+                f"{base_url}/lollms_embed",
+                json=request_data,
            ) as response:
                result = await response.json()
                embeddings.append(result["vector"])