diff --git a/examples/lightrag_api_open_webui_demo.py b/examples/lightrag_api_open_webui_demo.py deleted file mode 100644 index 88454da8..00000000 --- a/examples/lightrag_api_open_webui_demo.py +++ /dev/null @@ -1,140 +0,0 @@ -from datetime import datetime, timezone -from fastapi import FastAPI -from fastapi.responses import StreamingResponse -import inspect -import json -from pydantic import BaseModel -from typing import Optional - -import os -import logging -from lightrag import LightRAG, QueryParam -from lightrag.llm.ollama import ollama_model_complete, ollama_embed -from lightrag.utils import EmbeddingFunc - -import nest_asyncio - -WORKING_DIR = "./dickens" - -logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) - -if not os.path.exists(WORKING_DIR): - os.mkdir(WORKING_DIR) - -rag = LightRAG( - working_dir=WORKING_DIR, - llm_model_func=ollama_model_complete, - llm_model_name="qwen2.5:latest", - llm_model_max_async=4, - llm_model_max_token_size=32768, - llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 32768}}, - embedding_func=EmbeddingFunc( - embedding_dim=1024, - max_token_size=8192, - func=lambda texts: ollama_embed( - texts=texts, embed_model="bge-m3:latest", host="http://127.0.0.1:11434" - ), - ), -) - -with open("./book.txt", "r", encoding="utf-8") as f: - rag.insert(f.read()) - -# Apply nest_asyncio to solve event loop issues -nest_asyncio.apply() - -app = FastAPI(title="LightRAG", description="LightRAG API open-webui") - - -# Data models -MODEL_NAME = "LightRAG:latest" - - -class Message(BaseModel): - role: Optional[str] = None - content: str - - -class OpenWebUIRequest(BaseModel): - stream: Optional[bool] = None - model: Optional[str] = None - messages: list[Message] - - -# API routes - - -@app.get("/") -async def index(): - return "Set Ollama link to http://ip:port/ollama in Open-WebUI Settings" - - -@app.get("/ollama/api/version") -async def ollama_version(): - return {"version": "0.4.7"} - - -@app.get("/ollama/api/tags") -async def ollama_tags(): - return { - "models": [ - { - "name": MODEL_NAME, - "model": MODEL_NAME, - "modified_at": "2024-11-12T20:22:37.561463923+08:00", - "size": 4683087332, - "digest": "845dbda0ea48ed749caafd9e6037047aa19acfcfd82e704d7ca97d631a0b697e", - "details": { - "parent_model": "", - "format": "gguf", - "family": "qwen2", - "families": ["qwen2"], - "parameter_size": "7.6B", - "quantization_level": "Q4_K_M", - }, - } - ] - } - - -@app.post("/ollama/api/chat") -async def ollama_chat(request: OpenWebUIRequest): - resp = rag.query( - request.messages[-1].content, param=QueryParam(mode="hybrid", stream=True) - ) - if inspect.isasyncgen(resp): - - async def ollama_resp(chunks): - async for chunk in chunks: - yield ( - json.dumps( - { - "model": MODEL_NAME, - "created_at": datetime.now(timezone.utc).strftime( - "%Y-%m-%dT%H:%M:%S.%fZ" - ), - "message": { - "role": "assistant", - "content": chunk, - }, - "done": False, - }, - ensure_ascii=False, - ).encode("utf-8") - + b"\n" - ) # the b"\n" is important - - return StreamingResponse(ollama_resp(resp), media_type="application/json") - else: - return resp - - -@app.get("/health") -async def health_check(): - return {"status": "healthy"} - - -if __name__ == "__main__": - import uvicorn - - uvicorn.run(app, host="0.0.0.0", port=8020)