Revert "Integrated the GraphML Visualizer as an optional component of LightRAG"
This commit is contained in:
358
extra/OpenWebuiTool/openwebui_tool.py
Normal file
358
extra/OpenWebuiTool/openwebui_tool.py
Normal file
@@ -0,0 +1,358 @@
|
||||
"""
|
||||
OpenWebui Lightrag Integration Tool
|
||||
==================================
|
||||
|
||||
This tool enables the integration and use of Lightrag within the OpenWebui environment,
|
||||
providing a seamless interface for RAG (Retrieval-Augmented Generation) operations.
|
||||
|
||||
Author: ParisNeo (parisneoai@gmail.com)
|
||||
Social:
|
||||
- Twitter: @ParisNeo_AI
|
||||
- Reddit: r/lollms
|
||||
- Instagram: https://www.instagram.com/parisneo_ai/
|
||||
|
||||
License: Apache 2.0
|
||||
Copyright (c) 2024-2025 ParisNeo
|
||||
|
||||
This tool is part of the LoLLMs project (Lord of Large Language and Multimodal Systems).
|
||||
For more information, visit: https://github.com/ParisNeo/lollms
|
||||
|
||||
Requirements:
|
||||
- Python 3.8+
|
||||
- OpenWebui
|
||||
- Lightrag
|
||||
"""
|
||||
|
||||
# Tool version
|
||||
__version__ = "1.0.0"
|
||||
__author__ = "ParisNeo"
|
||||
__author_email__ = "parisneoai@gmail.com"
|
||||
__description__ = "Lightrag integration for OpenWebui"
|
||||
|
||||
|
||||
import requests
|
||||
import json
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Callable, Any, Literal, Union, List, Tuple
|
||||
|
||||
|
||||
class StatusEventEmitter:
|
||||
def __init__(self, event_emitter: Callable[[dict], Any] = None):
|
||||
self.event_emitter = event_emitter
|
||||
|
||||
async def emit(self, description="Unknown State", status="in_progress", done=False):
|
||||
if self.event_emitter:
|
||||
await self.event_emitter(
|
||||
{
|
||||
"type": "status",
|
||||
"data": {
|
||||
"status": status,
|
||||
"description": description,
|
||||
"done": done,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class MessageEventEmitter:
|
||||
def __init__(self, event_emitter: Callable[[dict], Any] = None):
|
||||
self.event_emitter = event_emitter
|
||||
|
||||
async def emit(self, content="Some message"):
|
||||
if self.event_emitter:
|
||||
await self.event_emitter(
|
||||
{
|
||||
"type": "message",
|
||||
"data": {
|
||||
"content": content,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class Tools:
|
||||
class Valves(BaseModel):
|
||||
LIGHTRAG_SERVER_URL: str = Field(
|
||||
default="http://localhost:9621/query",
|
||||
description="The base URL for the LightRag server",
|
||||
)
|
||||
MODE: Literal["naive", "local", "global", "hybrid"] = Field(
|
||||
default="hybrid",
|
||||
description="The mode to use for the LightRag query. Options: naive, local, global, hybrid",
|
||||
)
|
||||
ONLY_NEED_CONTEXT: bool = Field(
|
||||
default=False,
|
||||
description="If True, only the context is needed from the LightRag response",
|
||||
)
|
||||
DEBUG_MODE: bool = Field(
|
||||
default=False,
|
||||
description="If True, debugging information will be emitted",
|
||||
)
|
||||
KEY: str = Field(
|
||||
default="",
|
||||
description="Optional Bearer Key for authentication",
|
||||
)
|
||||
MAX_ENTITIES: int = Field(
|
||||
default=5,
|
||||
description="Maximum number of entities to keep",
|
||||
)
|
||||
MAX_RELATIONSHIPS: int = Field(
|
||||
default=5,
|
||||
description="Maximum number of relationships to keep",
|
||||
)
|
||||
MAX_SOURCES: int = Field(
|
||||
default=3,
|
||||
description="Maximum number of sources to keep",
|
||||
)
|
||||
|
||||
def __init__(self):
|
||||
self.valves = self.Valves()
|
||||
self.headers = {
|
||||
"Content-Type": "application/json",
|
||||
"User-Agent": "LightRag-Tool/1.0",
|
||||
}
|
||||
|
||||
async def query_lightrag(
|
||||
self,
|
||||
query: str,
|
||||
__event_emitter__: Callable[[dict], Any] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Query the LightRag server and retrieve information.
|
||||
This function must be called before answering the user question
|
||||
:params query: The query string to send to the LightRag server.
|
||||
:return: The response from the LightRag server in Markdown format or raw response.
|
||||
"""
|
||||
self.status_emitter = StatusEventEmitter(__event_emitter__)
|
||||
self.message_emitter = MessageEventEmitter(__event_emitter__)
|
||||
|
||||
lightrag_url = self.valves.LIGHTRAG_SERVER_URL
|
||||
payload = {
|
||||
"query": query,
|
||||
"mode": str(self.valves.MODE),
|
||||
"stream": False,
|
||||
"only_need_context": self.valves.ONLY_NEED_CONTEXT,
|
||||
}
|
||||
await self.status_emitter.emit("Initializing Lightrag query..")
|
||||
|
||||
if self.valves.DEBUG_MODE:
|
||||
await self.message_emitter.emit(
|
||||
"### Debug Mode Active\n\nDebugging information will be displayed.\n"
|
||||
)
|
||||
await self.message_emitter.emit(
|
||||
"#### Payload Sent to LightRag Server\n```json\n"
|
||||
+ json.dumps(payload, indent=4)
|
||||
+ "\n```\n"
|
||||
)
|
||||
|
||||
# Add Bearer Key to headers if provided
|
||||
if self.valves.KEY:
|
||||
self.headers["Authorization"] = f"Bearer {self.valves.KEY}"
|
||||
|
||||
try:
|
||||
await self.status_emitter.emit("Sending request to LightRag server")
|
||||
|
||||
response = requests.post(
|
||||
lightrag_url, json=payload, headers=self.headers, timeout=120
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
await self.status_emitter.emit(
|
||||
status="complete",
|
||||
description="LightRag query Succeeded",
|
||||
done=True,
|
||||
)
|
||||
|
||||
# Return parsed Markdown if ONLY_NEED_CONTEXT is True, otherwise return raw response
|
||||
if self.valves.ONLY_NEED_CONTEXT:
|
||||
try:
|
||||
if self.valves.DEBUG_MODE:
|
||||
await self.message_emitter.emit(
|
||||
"#### LightRag Server Response\n```json\n"
|
||||
+ data["response"]
|
||||
+ "\n```\n"
|
||||
)
|
||||
except Exception as ex:
|
||||
if self.valves.DEBUG_MODE:
|
||||
await self.message_emitter.emit(
|
||||
"#### Exception\n" + str(ex) + "\n"
|
||||
)
|
||||
return f"Exception: {ex}"
|
||||
return data["response"]
|
||||
else:
|
||||
if self.valves.DEBUG_MODE:
|
||||
await self.message_emitter.emit(
|
||||
"#### LightRag Server Response\n```json\n"
|
||||
+ data["response"]
|
||||
+ "\n```\n"
|
||||
)
|
||||
await self.status_emitter.emit("Lightrag query success")
|
||||
return data["response"]
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
await self.status_emitter.emit(
|
||||
status="error",
|
||||
description=f"Error during LightRag query: {str(e)}",
|
||||
done=True,
|
||||
)
|
||||
return json.dumps({"error": str(e)})
|
||||
|
||||
def extract_code_blocks(
|
||||
self, text: str, return_remaining_text: bool = False
|
||||
) -> Union[List[dict], Tuple[List[dict], str]]:
|
||||
"""
|
||||
This function extracts code blocks from a given text and optionally returns the text without code blocks.
|
||||
|
||||
Parameters:
|
||||
text (str): The text from which to extract code blocks. Code blocks are identified by triple backticks (```).
|
||||
return_remaining_text (bool): If True, also returns the text with code blocks removed.
|
||||
|
||||
Returns:
|
||||
Union[List[dict], Tuple[List[dict], str]]:
|
||||
- If return_remaining_text is False: Returns only the list of code block dictionaries
|
||||
- If return_remaining_text is True: Returns a tuple containing:
|
||||
* List of code block dictionaries
|
||||
* String containing the text with all code blocks removed
|
||||
|
||||
Each code block dictionary contains:
|
||||
- 'index' (int): The index of the code block in the text
|
||||
- 'file_name' (str): The name of the file extracted from the preceding line, if available
|
||||
- 'content' (str): The content of the code block
|
||||
- 'type' (str): The type of the code block
|
||||
- 'is_complete' (bool): True if the block has a closing tag, False otherwise
|
||||
"""
|
||||
remaining = text
|
||||
bloc_index = 0
|
||||
first_index = 0
|
||||
indices = []
|
||||
text_without_blocks = text
|
||||
|
||||
# Find all code block delimiters
|
||||
while len(remaining) > 0:
|
||||
try:
|
||||
index = remaining.index("```")
|
||||
indices.append(index + first_index)
|
||||
remaining = remaining[index + 3 :]
|
||||
first_index += index + 3
|
||||
bloc_index += 1
|
||||
except Exception:
|
||||
if bloc_index % 2 == 1:
|
||||
index = len(remaining)
|
||||
indices.append(index)
|
||||
remaining = ""
|
||||
|
||||
code_blocks = []
|
||||
is_start = True
|
||||
|
||||
# Process code blocks and build text without blocks if requested
|
||||
if return_remaining_text:
|
||||
text_parts = []
|
||||
last_end = 0
|
||||
|
||||
for index, code_delimiter_position in enumerate(indices):
|
||||
if is_start:
|
||||
block_infos = {
|
||||
"index": len(code_blocks),
|
||||
"file_name": "",
|
||||
"section": "",
|
||||
"content": "",
|
||||
"type": "",
|
||||
"is_complete": False,
|
||||
}
|
||||
|
||||
# Store text before code block if returning remaining text
|
||||
if return_remaining_text:
|
||||
text_parts.append(text[last_end:code_delimiter_position].strip())
|
||||
|
||||
# Check the preceding line for file name
|
||||
preceding_text = text[:code_delimiter_position].strip().splitlines()
|
||||
if preceding_text:
|
||||
last_line = preceding_text[-1].strip()
|
||||
if last_line.startswith("<file_name>") and last_line.endswith(
|
||||
"</file_name>"
|
||||
):
|
||||
file_name = last_line[
|
||||
len("<file_name>") : -len("</file_name>")
|
||||
].strip()
|
||||
block_infos["file_name"] = file_name
|
||||
elif last_line.startswith("## filename:"):
|
||||
file_name = last_line[len("## filename:") :].strip()
|
||||
block_infos["file_name"] = file_name
|
||||
if last_line.startswith("<section>") and last_line.endswith(
|
||||
"</section>"
|
||||
):
|
||||
section = last_line[
|
||||
len("<section>") : -len("</section>")
|
||||
].strip()
|
||||
block_infos["section"] = section
|
||||
|
||||
sub_text = text[code_delimiter_position + 3 :]
|
||||
if len(sub_text) > 0:
|
||||
try:
|
||||
find_space = sub_text.index(" ")
|
||||
except Exception:
|
||||
find_space = int(1e10)
|
||||
try:
|
||||
find_return = sub_text.index("\n")
|
||||
except Exception:
|
||||
find_return = int(1e10)
|
||||
next_index = min(find_return, find_space)
|
||||
if "{" in sub_text[:next_index]:
|
||||
next_index = 0
|
||||
start_pos = next_index
|
||||
|
||||
if code_delimiter_position + 3 < len(text) and text[
|
||||
code_delimiter_position + 3
|
||||
] in ["\n", " ", "\t"]:
|
||||
block_infos["type"] = "language-specific"
|
||||
else:
|
||||
block_infos["type"] = sub_text[:next_index]
|
||||
|
||||
if index + 1 < len(indices):
|
||||
next_pos = indices[index + 1] - code_delimiter_position
|
||||
if (
|
||||
next_pos - 3 < len(sub_text)
|
||||
and sub_text[next_pos - 3] == "`"
|
||||
):
|
||||
block_infos["content"] = sub_text[
|
||||
start_pos : next_pos - 3
|
||||
].strip()
|
||||
block_infos["is_complete"] = True
|
||||
else:
|
||||
block_infos["content"] = sub_text[
|
||||
start_pos:next_pos
|
||||
].strip()
|
||||
block_infos["is_complete"] = False
|
||||
|
||||
if return_remaining_text:
|
||||
last_end = indices[index + 1] + 3
|
||||
else:
|
||||
block_infos["content"] = sub_text[start_pos:].strip()
|
||||
block_infos["is_complete"] = False
|
||||
|
||||
if return_remaining_text:
|
||||
last_end = len(text)
|
||||
|
||||
code_blocks.append(block_infos)
|
||||
is_start = False
|
||||
else:
|
||||
is_start = True
|
||||
|
||||
if return_remaining_text:
|
||||
# Add any remaining text after the last code block
|
||||
if last_end < len(text):
|
||||
text_parts.append(text[last_end:].strip())
|
||||
# Join all non-code parts with newlines
|
||||
text_without_blocks = "\n".join(filter(None, text_parts))
|
||||
return code_blocks, text_without_blocks
|
||||
|
||||
return code_blocks
|
||||
|
||||
def clean(self, csv_content: str):
|
||||
lines = csv_content.splitlines()
|
||||
if lines:
|
||||
# Remove spaces around headers and ensure no spaces between commas
|
||||
header = ",".join([col.strip() for col in lines[0].split(",")])
|
||||
lines[0] = header # Replace the first line with the cleaned header
|
||||
csv_content = "\n".join(lines)
|
||||
return csv_content
|
Reference in New Issue
Block a user