Merge pull request #605 from ParisNeo/main
Pull Request: API Key Management Implementation and Azure OpenAI Bug Fix
This commit is contained in:
30
extra/OpenWebuiTool/openwebui_tool.py
Normal file
30
extra/OpenWebuiTool/openwebui_tool.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
"""
|
||||||
|
OpenWebui Lightrag Integration Tool
|
||||||
|
==================================
|
||||||
|
|
||||||
|
This tool enables the integration and use of Lightrag within the OpenWebui environment,
|
||||||
|
providing a seamless interface for RAG (Retrieval-Augmented Generation) operations.
|
||||||
|
|
||||||
|
Author: ParisNeo (parisneoai@gmail.com)
|
||||||
|
Social:
|
||||||
|
- Twitter: @ParisNeo_AI
|
||||||
|
- Reddit: r/lollms
|
||||||
|
- Instagram: https://www.instagram.com/parisneo_ai/
|
||||||
|
|
||||||
|
License: Apache 2.0
|
||||||
|
Copyright (c) 2024-2025 ParisNeo
|
||||||
|
|
||||||
|
This tool is part of the LoLLMs project (Lord of Large Language and Multimodal Systems).
|
||||||
|
For more information, visit: https://github.com/ParisNeo/lollms
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Python 3.8+
|
||||||
|
- OpenWebui
|
||||||
|
- Lightrag
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Tool version
|
||||||
|
__version__ = "1.0.0"
|
||||||
|
__author__ = "ParisNeo"
|
||||||
|
__author_email__ = "parisneoai@gmail.com"
|
||||||
|
__description__ = "Lightrag integration for OpenWebui"
|
@@ -298,6 +298,14 @@ def parse_args() -> argparse.Namespace:
|
|||||||
help=f"llm server host URL (default: from env or {default_llm_host})",
|
help=f"llm server host URL (default: from env or {default_llm_host})",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
default_llm_api_key = get_env_value("LLM_BINDING_API_KEY", None)
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--llm-binding-api-key",
|
||||||
|
default=default_llm_api_key,
|
||||||
|
help="llm server API key (default: from env or empty string)",
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--llm-model",
|
"--llm-model",
|
||||||
default=get_env_value("LLM_MODEL", "mistral-nemo:latest"),
|
default=get_env_value("LLM_MODEL", "mistral-nemo:latest"),
|
||||||
@@ -314,6 +322,13 @@ def parse_args() -> argparse.Namespace:
|
|||||||
help=f"embedding server host URL (default: from env or {default_embedding_host})",
|
help=f"embedding server host URL (default: from env or {default_embedding_host})",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
default_embedding_api_key = get_env_value("EMBEDDING_BINDING_API_KEY", "")
|
||||||
|
parser.add_argument(
|
||||||
|
"--embedding-binding-api-key",
|
||||||
|
default=default_embedding_api_key,
|
||||||
|
help="embedding server API key (default: from env or empty string)",
|
||||||
|
)
|
||||||
|
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--embedding-model",
|
"--embedding-model",
|
||||||
default=get_env_value("EMBEDDING_MODEL", "bge-m3:latest"),
|
default=get_env_value("EMBEDDING_MODEL", "bge-m3:latest"),
|
||||||
@@ -630,22 +645,26 @@ def create_app(args):
|
|||||||
texts,
|
texts,
|
||||||
embed_model=args.embedding_model,
|
embed_model=args.embedding_model,
|
||||||
host=args.embedding_binding_host,
|
host=args.embedding_binding_host,
|
||||||
|
api_key=args.embedding_binding_api_key,
|
||||||
)
|
)
|
||||||
if args.embedding_binding == "lollms"
|
if args.embedding_binding == "lollms"
|
||||||
else ollama_embed(
|
else ollama_embed(
|
||||||
texts,
|
texts,
|
||||||
embed_model=args.embedding_model,
|
embed_model=args.embedding_model,
|
||||||
host=args.embedding_binding_host,
|
host=args.embedding_binding_host,
|
||||||
|
api_key=args.embedding_binding_api_key,
|
||||||
)
|
)
|
||||||
if args.embedding_binding == "ollama"
|
if args.embedding_binding == "ollama"
|
||||||
else azure_openai_embedding(
|
else azure_openai_embedding(
|
||||||
texts,
|
texts,
|
||||||
model=args.embedding_model, # no host is used for openai
|
model=args.embedding_model, # no host is used for openai,
|
||||||
|
api_key=args.embedding_binding_api_key,
|
||||||
)
|
)
|
||||||
if args.embedding_binding == "azure_openai"
|
if args.embedding_binding == "azure_openai"
|
||||||
else openai_embedding(
|
else openai_embedding(
|
||||||
texts,
|
texts,
|
||||||
model=args.embedding_model, # no host is used for openai
|
model=args.embedding_model, # no host is used for openai,
|
||||||
|
api_key=args.embedding_binding_api_key,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -663,6 +682,7 @@ def create_app(args):
|
|||||||
"host": args.llm_binding_host,
|
"host": args.llm_binding_host,
|
||||||
"timeout": args.timeout,
|
"timeout": args.timeout,
|
||||||
"options": {"num_ctx": args.max_tokens},
|
"options": {"num_ctx": args.max_tokens},
|
||||||
|
"api_key": args.llm_binding_api_key,
|
||||||
},
|
},
|
||||||
embedding_func=embedding_func,
|
embedding_func=embedding_func,
|
||||||
)
|
)
|
||||||
|
@@ -349,7 +349,9 @@ async def ollama_model_if_cache(
|
|||||||
host = kwargs.pop("host", None)
|
host = kwargs.pop("host", None)
|
||||||
timeout = kwargs.pop("timeout", None)
|
timeout = kwargs.pop("timeout", None)
|
||||||
kwargs.pop("hashing_kv", None)
|
kwargs.pop("hashing_kv", None)
|
||||||
ollama_client = ollama.AsyncClient(host=host, timeout=timeout)
|
api_key = kwargs.pop("api_key", None)
|
||||||
|
headers = {"Authorization": f"Bearer {api_key}"} if api_key else None
|
||||||
|
ollama_client = ollama.AsyncClient(host=host, timeout=timeout, headers=headers)
|
||||||
messages = []
|
messages = []
|
||||||
if system_prompt:
|
if system_prompt:
|
||||||
messages.append({"role": "system", "content": system_prompt})
|
messages.append({"role": "system", "content": system_prompt})
|
||||||
@@ -380,6 +382,8 @@ async def lollms_model_if_cache(
|
|||||||
"""Client implementation for lollms generation."""
|
"""Client implementation for lollms generation."""
|
||||||
|
|
||||||
stream = True if kwargs.get("stream") else False
|
stream = True if kwargs.get("stream") else False
|
||||||
|
api_key = kwargs.pop("api_key", None)
|
||||||
|
headers = {"Authorization": f"Bearer {api_key}"} if api_key else None
|
||||||
|
|
||||||
# Extract lollms specific parameters
|
# Extract lollms specific parameters
|
||||||
request_data = {
|
request_data = {
|
||||||
@@ -408,7 +412,7 @@ async def lollms_model_if_cache(
|
|||||||
request_data["prompt"] = full_prompt
|
request_data["prompt"] = full_prompt
|
||||||
timeout = aiohttp.ClientTimeout(total=kwargs.get("timeout", None))
|
timeout = aiohttp.ClientTimeout(total=kwargs.get("timeout", None))
|
||||||
|
|
||||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
async with aiohttp.ClientSession(timeout=timeout, headers=headers) as session:
|
||||||
if stream:
|
if stream:
|
||||||
|
|
||||||
async def inner():
|
async def inner():
|
||||||
@@ -1148,6 +1152,13 @@ async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarra
|
|||||||
|
|
||||||
|
|
||||||
async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray:
|
async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray:
|
||||||
|
api_key = kwargs.pop("api_key", None)
|
||||||
|
headers = (
|
||||||
|
{"Authorization": api_key, "Content-Type": "application/json"}
|
||||||
|
if api_key
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
kwargs["headers"] = headers
|
||||||
ollama_client = ollama.Client(**kwargs)
|
ollama_client = ollama.Client(**kwargs)
|
||||||
data = ollama_client.embed(model=embed_model, input=texts)
|
data = ollama_client.embed(model=embed_model, input=texts)
|
||||||
return data["embeddings"]
|
return data["embeddings"]
|
||||||
@@ -1168,13 +1179,20 @@ async def lollms_embed(
|
|||||||
Returns:
|
Returns:
|
||||||
np.ndarray: Array of embeddings
|
np.ndarray: Array of embeddings
|
||||||
"""
|
"""
|
||||||
async with aiohttp.ClientSession() as session:
|
api_key = kwargs.pop("api_key", None)
|
||||||
|
headers = (
|
||||||
|
{"Authorization": api_key, "Content-Type": "application/json"}
|
||||||
|
if api_key
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
async with aiohttp.ClientSession(headers=headers) as session:
|
||||||
embeddings = []
|
embeddings = []
|
||||||
for text in texts:
|
for text in texts:
|
||||||
request_data = {"text": text}
|
request_data = {"text": text}
|
||||||
|
|
||||||
async with session.post(
|
async with session.post(
|
||||||
f"{base_url}/lollms_embed", json=request_data
|
f"{base_url}/lollms_embed",
|
||||||
|
json=request_data,
|
||||||
) as response:
|
) as response:
|
||||||
result = await response.json()
|
result = await response.json()
|
||||||
embeddings.append(result["vector"])
|
embeddings.append(result["vector"])
|
||||||
|
Reference in New Issue
Block a user