From 35f04b51e64462af8dfab5ddf01bdefe4e3b2326 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Fri, 17 Jan 2025 11:18:45 +0100 Subject: [PATCH 1/5] Update lightrag_server.py --- lightrag/api/lightrag_server.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index b898277a..a32f283e 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -270,6 +270,16 @@ def parse_args() -> argparse.Namespace: default=default_llm_host, help=f"llm server host URL (default: from env or {default_llm_host})", ) + + default_llm_api_key = get_env_value( + "LLM_BINDING_API_KEY", "" + ) + + parser.add_argument( + "--llm-binding-api-key", + default=default_llm_api_key, + help=f"llm server API key (default: from env or empty string)", + ) parser.add_argument( "--llm-model", @@ -286,6 +296,15 @@ def parse_args() -> argparse.Namespace: default=default_embedding_host, help=f"embedding server host URL (default: from env or {default_embedding_host})", ) + + default_embedding_api_key = get_env_value( + "EMBEDDING_BINDING_API_KEY", "" + ) + parser.add_argument( + "--embedding-binding-api-key", + default=default_embedding_api_key, + help=f"embedding server API key (default: from env or empty string)", + ) parser.add_argument( "--embedding-model", From c6774390fd188ced51a7e445084efe70dd02bbfb Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Fri, 17 Jan 2025 11:44:46 +0100 Subject: [PATCH 2/5] Update .env.example --- .env.example | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.env.example b/.env.example index 7d5c0fe5..21c1030a 100644 --- a/.env.example +++ b/.env.example @@ -13,9 +13,9 @@ LLM_BINDING_HOST=http://host.docker.internal:11434 LLM_MODEL=mistral-nemo:latest # Lollms example -LLM_BINDING=lollms -LLM_BINDING_HOST=http://host.docker.internal:9600 -LLM_MODEL=mistral-nemo:latest +# LLM_BINDING=lollms +# LLM_BINDING_HOST=http://host.docker.internal:9600 +# LLM_MODEL=mistral-nemo:latest # Embedding Configuration (Use valid host. For local services, you can use host.docker.internal) @@ -25,9 +25,9 @@ EMBEDDING_BINDING_HOST=http://host.docker.internal:11434 EMBEDDING_MODEL=bge-m3:latest # Lollms example -EMBEDDING_BINDING=lollms -EMBEDDING_BINDING_HOST=http://host.docker.internal:9600 -EMBEDDING_MODEL=bge-m3:latest +# EMBEDDING_BINDING=lollms +# EMBEDDING_BINDING_HOST=http://host.docker.internal:9600 +# EMBEDDING_MODEL=bge-m3:latest # RAG Configuration MAX_ASYNC=4 From 9cae05e1ff72bf830b500847e2a8f0ca73bba95c Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Sun, 19 Jan 2025 23:24:37 +0100 Subject: [PATCH 3/5] Fixed a bug introduced by a modification by someone else in azure_openai_complete (please make sure you test before commiting code) Added api_key to lollms, ollama, openai for both llm and embedding bindings allowing to use api key protected services. --- lightrag/api/lightrag_server.py | 11 ++++++++--- lightrag/llm.py | 19 ++++++++++++++----- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 2e0aabd7..e4cbac57 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -299,7 +299,7 @@ def parse_args() -> argparse.Namespace: ) default_llm_api_key = get_env_value( - "LLM_BINDING_API_KEY", "" + "LLM_BINDING_API_KEY", None ) parser.add_argument( @@ -649,22 +649,26 @@ def create_app(args): texts, embed_model=args.embedding_model, host=args.embedding_binding_host, + api_key = args.embedding_binding_api_key ) if args.embedding_binding == "lollms" else ollama_embed( texts, embed_model=args.embedding_model, host=args.embedding_binding_host, + api_key = args.embedding_binding_api_key ) if args.embedding_binding == "ollama" else azure_openai_embedding( texts, - model=args.embedding_model, # no host is used for openai + model=args.embedding_model, # no host is used for openai, + api_key = args.embedding_binding_api_key ) if args.embedding_binding == "azure_openai" else openai_embedding( texts, - model=args.embedding_model, # no host is used for openai + model=args.embedding_model, # no host is used for openai, + api_key = args.embedding_binding_api_key ), ) @@ -682,6 +686,7 @@ def create_app(args): "host": args.llm_binding_host, "timeout": args.timeout, "options": {"num_ctx": args.max_tokens}, + "api_key": args.llm_binding_api_key }, embedding_func=embedding_func, ) diff --git a/lightrag/llm.py b/lightrag/llm.py index 1f52d4ae..02fe3961 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -349,7 +349,9 @@ async def ollama_model_if_cache( host = kwargs.pop("host", None) timeout = kwargs.pop("timeout", None) kwargs.pop("hashing_kv", None) - ollama_client = ollama.AsyncClient(host=host, timeout=timeout) + api_key = kwargs.pop("api_key", None) + headers={'Authorization': f'Bearer {api_key}'} if api_key else None + ollama_client = ollama.AsyncClient(host=host, timeout=timeout, headers=headers) messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) @@ -380,6 +382,8 @@ async def lollms_model_if_cache( """Client implementation for lollms generation.""" stream = True if kwargs.get("stream") else False + api_key = kwargs.pop("api_key", None) + headers={'Authorization': f'Bearer {api_key}'} if api_key else None # Extract lollms specific parameters request_data = { @@ -408,7 +412,7 @@ async def lollms_model_if_cache( request_data["prompt"] = full_prompt timeout = aiohttp.ClientTimeout(total=kwargs.get("timeout", None)) - async with aiohttp.ClientSession(timeout=timeout) as session: + async with aiohttp.ClientSession(timeout=timeout,headers=headers) as session: if stream: async def inner(): @@ -622,7 +626,7 @@ async def nvidia_openai_complete( async def azure_openai_complete( - model: str = "gpt-4o-mini", prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs + model: str = "gpt-4o-mini", prompt="", system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs ) -> str: keyword_extraction = kwargs.pop("keyword_extraction", None) result = await azure_openai_complete_if_cache( @@ -1148,6 +1152,9 @@ async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarra async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray: + api_key = kwargs.pop("api_key",None) + headers = {"Authorization": api_key, "Content-Type": "application/json"} if api_key else None + kwargs["headers"]=headers ollama_client = ollama.Client(**kwargs) data = ollama_client.embed(model=embed_model, input=texts) return data["embeddings"] @@ -1168,13 +1175,15 @@ async def lollms_embed( Returns: np.ndarray: Array of embeddings """ - async with aiohttp.ClientSession() as session: + api_key = kwargs.pop("api_key",None) + headers = {"Authorization": api_key, "Content-Type": "application/json"} if api_key else None + async with aiohttp.ClientSession(headers=headers) as session: embeddings = [] for text in texts: request_data = {"text": text} async with session.post( - f"{base_url}/lollms_embed", json=request_data + f"{base_url}/lollms_embed", json=request_data, ) as response: result = await response.json() embeddings.append(result["vector"]) From 2dfbbec4073aee262cb409c3bf9283f08506d399 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Sun, 19 Jan 2025 23:25:26 +0100 Subject: [PATCH 4/5] Started a dummy OpenWebUI tool. I will upload the working version in the next pull request --- extra/OpenWebuiTool/openwebui_tool.py | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 extra/OpenWebuiTool/openwebui_tool.py diff --git a/extra/OpenWebuiTool/openwebui_tool.py b/extra/OpenWebuiTool/openwebui_tool.py new file mode 100644 index 00000000..260b779c --- /dev/null +++ b/extra/OpenWebuiTool/openwebui_tool.py @@ -0,0 +1,34 @@ +""" +OpenWebui Lightrag Integration Tool +================================== + +This tool enables the integration and use of Lightrag within the OpenWebui environment, +providing a seamless interface for RAG (Retrieval-Augmented Generation) operations. + +Author: ParisNeo (parisneoai@gmail.com) +Social: + - Twitter: @ParisNeo_AI + - Reddit: r/lollms + - Instagram: https://www.instagram.com/parisneo_ai/ + +License: Apache 2.0 +Copyright (c) 2024-2025 ParisNeo + +This tool is part of the LoLLMs project (Lord of Large Language and Multimodal Systems). +For more information, visit: https://github.com/ParisNeo/lollms + +Requirements: + - Python 3.8+ + - OpenWebui + - Lightrag +""" + +from pathlib import Path +from typing import Optional, List, Dict, Union, Any +from datetime import datetime + +# Tool version +__version__ = "1.0.0" +__author__ = "ParisNeo" +__author_email__ = "parisneoai@gmail.com" +__description__ = "Lightrag integration for OpenWebui" \ No newline at end of file From 70425b0357cdca8767b8546c2d3d9eab7284f693 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Mon, 20 Jan 2025 00:26:28 +0100 Subject: [PATCH 5/5] fixed linting --- extra/OpenWebuiTool/openwebui_tool.py | 6 +---- lightrag/api/lightrag_server.py | 28 +++++++++----------- lightrag/llm.py | 38 ++++++++++++++++++--------- 3 files changed, 39 insertions(+), 33 deletions(-) diff --git a/extra/OpenWebuiTool/openwebui_tool.py b/extra/OpenWebuiTool/openwebui_tool.py index 260b779c..1dabc68d 100644 --- a/extra/OpenWebuiTool/openwebui_tool.py +++ b/extra/OpenWebuiTool/openwebui_tool.py @@ -23,12 +23,8 @@ Requirements: - Lightrag """ -from pathlib import Path -from typing import Optional, List, Dict, Union, Any -from datetime import datetime - # Tool version __version__ = "1.0.0" __author__ = "ParisNeo" __author_email__ = "parisneoai@gmail.com" -__description__ = "Lightrag integration for OpenWebui" \ No newline at end of file +__description__ = "Lightrag integration for OpenWebui" diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index e4cbac57..21fc68ab 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -297,15 +297,13 @@ def parse_args() -> argparse.Namespace: default=default_llm_host, help=f"llm server host URL (default: from env or {default_llm_host})", ) - - default_llm_api_key = get_env_value( - "LLM_BINDING_API_KEY", None - ) - + + default_llm_api_key = get_env_value("LLM_BINDING_API_KEY", None) + parser.add_argument( "--llm-binding-api-key", default=default_llm_api_key, - help=f"llm server API key (default: from env or empty string)", + help="llm server API key (default: from env or empty string)", ) parser.add_argument( @@ -323,14 +321,12 @@ def parse_args() -> argparse.Namespace: default=default_embedding_host, help=f"embedding server host URL (default: from env or {default_embedding_host})", ) - - default_embedding_api_key = get_env_value( - "EMBEDDING_BINDING_API_KEY", "" - ) + + default_embedding_api_key = get_env_value("EMBEDDING_BINDING_API_KEY", "") parser.add_argument( "--embedding-binding-api-key", default=default_embedding_api_key, - help=f"embedding server API key (default: from env or empty string)", + help="embedding server API key (default: from env or empty string)", ) parser.add_argument( @@ -649,26 +645,26 @@ def create_app(args): texts, embed_model=args.embedding_model, host=args.embedding_binding_host, - api_key = args.embedding_binding_api_key + api_key=args.embedding_binding_api_key, ) if args.embedding_binding == "lollms" else ollama_embed( texts, embed_model=args.embedding_model, host=args.embedding_binding_host, - api_key = args.embedding_binding_api_key + api_key=args.embedding_binding_api_key, ) if args.embedding_binding == "ollama" else azure_openai_embedding( texts, model=args.embedding_model, # no host is used for openai, - api_key = args.embedding_binding_api_key + api_key=args.embedding_binding_api_key, ) if args.embedding_binding == "azure_openai" else openai_embedding( texts, model=args.embedding_model, # no host is used for openai, - api_key = args.embedding_binding_api_key + api_key=args.embedding_binding_api_key, ), ) @@ -686,7 +682,7 @@ def create_app(args): "host": args.llm_binding_host, "timeout": args.timeout, "options": {"num_ctx": args.max_tokens}, - "api_key": args.llm_binding_api_key + "api_key": args.llm_binding_api_key, }, embedding_func=embedding_func, ) diff --git a/lightrag/llm.py b/lightrag/llm.py index 02fe3961..c3e395c9 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -349,8 +349,8 @@ async def ollama_model_if_cache( host = kwargs.pop("host", None) timeout = kwargs.pop("timeout", None) kwargs.pop("hashing_kv", None) - api_key = kwargs.pop("api_key", None) - headers={'Authorization': f'Bearer {api_key}'} if api_key else None + api_key = kwargs.pop("api_key", None) + headers = {"Authorization": f"Bearer {api_key}"} if api_key else None ollama_client = ollama.AsyncClient(host=host, timeout=timeout, headers=headers) messages = [] if system_prompt: @@ -382,8 +382,8 @@ async def lollms_model_if_cache( """Client implementation for lollms generation.""" stream = True if kwargs.get("stream") else False - api_key = kwargs.pop("api_key", None) - headers={'Authorization': f'Bearer {api_key}'} if api_key else None + api_key = kwargs.pop("api_key", None) + headers = {"Authorization": f"Bearer {api_key}"} if api_key else None # Extract lollms specific parameters request_data = { @@ -412,7 +412,7 @@ async def lollms_model_if_cache( request_data["prompt"] = full_prompt timeout = aiohttp.ClientTimeout(total=kwargs.get("timeout", None)) - async with aiohttp.ClientSession(timeout=timeout,headers=headers) as session: + async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session: if stream: async def inner(): @@ -626,7 +626,12 @@ async def nvidia_openai_complete( async def azure_openai_complete( - model: str = "gpt-4o-mini", prompt="", system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs + model: str = "gpt-4o-mini", + prompt="", + system_prompt=None, + history_messages=[], + keyword_extraction=False, + **kwargs, ) -> str: keyword_extraction = kwargs.pop("keyword_extraction", None) result = await azure_openai_complete_if_cache( @@ -1152,9 +1157,13 @@ async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarra async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray: - api_key = kwargs.pop("api_key",None) - headers = {"Authorization": api_key, "Content-Type": "application/json"} if api_key else None - kwargs["headers"]=headers + api_key = kwargs.pop("api_key", None) + headers = ( + {"Authorization": api_key, "Content-Type": "application/json"} + if api_key + else None + ) + kwargs["headers"] = headers ollama_client = ollama.Client(**kwargs) data = ollama_client.embed(model=embed_model, input=texts) return data["embeddings"] @@ -1175,15 +1184,20 @@ async def lollms_embed( Returns: np.ndarray: Array of embeddings """ - api_key = kwargs.pop("api_key",None) - headers = {"Authorization": api_key, "Content-Type": "application/json"} if api_key else None + api_key = kwargs.pop("api_key", None) + headers = ( + {"Authorization": api_key, "Content-Type": "application/json"} + if api_key + else None + ) async with aiohttp.ClientSession(headers=headers) as session: embeddings = [] for text in texts: request_data = {"text": text} async with session.post( - f"{base_url}/lollms_embed", json=request_data, + f"{base_url}/lollms_embed", + json=request_data, ) as response: result = await response.json() embeddings.append(result["vector"])