From d8f4f3eedee98fc3e0b3724298a7355a80708c8f Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Mon, 16 Dec 2024 01:05:49 +0100 Subject: [PATCH] Added a fastapi service --- .gitignore | 1 + api/README.md | 182 +++++++++++++++++++++++ api/ollama_lightrag_server.py | 271 ++++++++++++++++++++++++++++++++++ api/requirements.txt | 2 + fast_api_server/__init__.py | 0 5 files changed, 456 insertions(+) create mode 100644 api/README.md create mode 100644 api/ollama_lightrag_server.py create mode 100644 api/requirements.txt create mode 100644 fast_api_server/__init__.py diff --git a/.gitignore b/.gitignore index e6f5f5ba..68484898 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ ignore_this.txt .ruff_cache/ gui/ *.log +.vscode \ No newline at end of file diff --git a/api/README.md b/api/README.md new file mode 100644 index 00000000..0d9a8b0c --- /dev/null +++ b/api/README.md @@ -0,0 +1,182 @@ +# LightRAG API Server + +A powerful FastAPI-based server for managing and querying documents using LightRAG (Light Retrieval-Augmented Generation). This server provides a REST API interface for document management and intelligent querying using various LLM models through Ollama. + +## Features + +- 🔍 Multiple search modes (naive, local, global, hybrid) +- 📡 Streaming and non-streaming responses +- 📝 Document management (insert, batch upload, clear) +- ⚙️ Highly configurable model parameters +- 📚 Support for text and file uploads +- 🔧 RESTful API with automatic documentation +- 🚀 Built with FastAPI for high performance + +## Prerequisites + +- Python 3.8+ +- Ollama server running locally or remotely +- Required Python packages: + - fastapi + - uvicorn + - lightrag + - pydantic + +## Installation + +1. Clone the repository: +```bash +git clone https://github.com/yourusername/lightrag-server.git +cd lightrag-server +``` + +2. Install dependencies: +```bash +pip install -r requirements.txt +``` + +3. Make sure Ollama is running and accessible. + +## Configuration + +The server can be configured using command-line arguments: + +```bash +python rag_server.py --help +``` + +Available options: + +| Parameter | Default | Description | +|-----------|---------|-------------| +| --host | 0.0.0.0 | Server host | +| --port | 8000 | Server port | +| --model | gemma2:2b | LLM model name | +| --embedding-model | nomic-embed-text | Embedding model name | +| --ollama-host | http://localhost:11434 | Ollama host URL | +| --working-dir | ./dickens | Working directory for RAG | +| --max-async | 4 | Maximum async operations | +| --max-tokens | 32768 | Maximum token size | +| --embedding-dim | 768 | Embedding dimensions | +| --max-embed-tokens | 8192 | Maximum embedding token size | +| --input-file | ./book.txt | Initial input file | +| --log-level | INFO | Logging level | + +## Quick Start + +1. Basic usage with default settings: +```bash +python rag_server.py +``` + +2. Custom configuration: +```bash +python rag_server.py --model llama2:13b --port 8080 --working-dir ./custom_rag +``` + +3. Using the launch script: +```bash +chmod +x launch_rag_server.sh +./launch_rag_server.sh +``` + +## API Endpoints + +### Query Endpoints + +#### POST /query +Query the RAG system with options for different search modes. + +```bash +curl -X POST "http://localhost:8000/query" \ + -H "Content-Type: application/json" \ + -d '{"query": "Your question here", "mode": "hybrid"}' +``` + +#### POST /query/stream +Stream responses from the RAG system. + +```bash +curl -X POST "http://localhost:8000/query/stream" \ + -H "Content-Type: application/json" \ + -d '{"query": "Your question here", "mode": "hybrid"}' +``` + +### Document Management Endpoints + +#### POST /documents/text +Insert text directly into the RAG system. + +```bash +curl -X POST "http://localhost:8000/documents/text" \ + -H "Content-Type: application/json" \ + -d '{"text": "Your text content here", "description": "Optional description"}' +``` + +#### POST /documents/file +Upload a single file to the RAG system. + +```bash +curl -X POST "http://localhost:8000/documents/file" \ + -F "file=@/path/to/your/document.txt" \ + -F "description=Optional description" +``` + +#### POST /documents/batch +Upload multiple files at once. + +```bash +curl -X POST "http://localhost:8000/documents/batch" \ + -F "files=@/path/to/doc1.txt" \ + -F "files=@/path/to/doc2.txt" +``` + +#### DELETE /documents +Clear all documents from the RAG system. + +```bash +curl -X DELETE "http://localhost:8000/documents" +``` + +### Utility Endpoints + +#### GET /health +Check server health and configuration. + +```bash +curl "http://localhost:8000/health" +``` + +## Development + +### Running in Development Mode + +```bash +uvicorn rag_server:app --reload --port 8000 +``` + +### API Documentation + +When the server is running, visit: +- Swagger UI: http://localhost:8000/docs +- ReDoc: http://localhost:8000/redoc + +## Contributing + +1. Fork the repository +2. Create your feature branch (`git checkout -b feature/AmazingFeature`) +3. Commit your changes (`git commit -m 'Add some AmazingFeature'`) +4. Push to the branch (`git push origin feature/AmazingFeature`) +5. Open a Pull Request + +## License + +This project is licensed under the MIT License - see the LICENSE file for details. + +## Acknowledgments + +- Built with [FastAPI](https://fastapi.tiangolo.com/) +- Uses [LightRAG](https://github.com/HKUDS/LightRAG) for document processing +- Powered by [Ollama](https://ollama.ai/) for LLM inference + +## Support diff --git a/api/ollama_lightrag_server.py b/api/ollama_lightrag_server.py new file mode 100644 index 00000000..383e04cb --- /dev/null +++ b/api/ollama_lightrag_server.py @@ -0,0 +1,271 @@ +from fastapi import FastAPI, HTTPException, File, UploadFile, Form +from fastapi.responses import JSONResponse +from pydantic import BaseModel +import asyncio +import os +import logging +import argparse +from lightrag import LightRAG, QueryParam +from lightrag.llm import ollama_model_complete, ollama_embedding +from lightrag.utils import EmbeddingFunc +from typing import Optional, List +from enum import Enum +import io + +def parse_args(): + parser = argparse.ArgumentParser( + description=""" +LightRAG FastAPI Server +====================== + +A REST API server for text querying using LightRAG. Supports multiple search modes, +streaming responses, and document management. + +Features: +- Multiple search modes (naive, local, global, hybrid) +- Streaming and non-streaming responses +- Document insertion and management +- Configurable model parameters +- REST API with automatic documentation +""", + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + # Server configuration + parser.add_argument('--host', default='0.0.0.0', help='Server host (default: 0.0.0.0)') + parser.add_argument('--port', type=int, default=8000, help='Server port (default: 8000)') + + # Model configuration + parser.add_argument('--model', default='gemma2:2b', help='LLM model name (default: gemma2:2b)') + parser.add_argument('--embedding-model', default='nomic-embed-text', help='Embedding model name (default: nomic-embed-text)') + parser.add_argument('--ollama-host', default='http://localhost:11434', help='Ollama host URL (default: http://localhost:11434)') + + # RAG configuration + parser.add_argument('--working-dir', default='./dickens', help='Working directory for RAG (default: ./dickens)') + parser.add_argument('--max-async', type=int, default=4, help='Maximum async operations (default: 4)') + parser.add_argument('--max-tokens', type=int, default=32768, help='Maximum token size (default: 32768)') + parser.add_argument('--embedding-dim', type=int, default=768, help='Embedding dimensions (default: 768)') + parser.add_argument('--max-embed-tokens', type=int, default=8192, help='Maximum embedding token size (default: 8192)') + + # Input configuration + parser.add_argument('--input-file', default='./book.txt', help='Initial input file to process (default: ./book.txt)') + + # Logging configuration + parser.add_argument('--log-level', default='INFO', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], + help='Logging level (default: INFO)') + + return parser.parse_args() + +# Pydantic models +class SearchMode(str, Enum): + naive = "naive" + local = "local" + global_ = "global" + hybrid = "hybrid" + +class QueryRequest(BaseModel): + query: str + mode: SearchMode = SearchMode.hybrid + stream: bool = False + +class QueryResponse(BaseModel): + response: str + +class InsertTextRequest(BaseModel): + text: str + description: Optional[str] = None + +class InsertResponse(BaseModel): + status: str + message: str + document_count: int + +def create_app(args): + # Setup logging + logging.basicConfig(format="%(levelname)s:%(message)s", level=getattr(logging, args.log_level)) + + # Initialize FastAPI app + app = FastAPI( + title="LightRAG API", + description=""" + API for querying text using LightRAG. + + Configuration: + - Model: {model} + - Embedding Model: {embed_model} + - Working Directory: {work_dir} + - Max Tokens: {max_tokens} + """.format( + model=args.model, + embed_model=args.embedding_model, + work_dir=args.working_dir, + max_tokens=args.max_tokens + ) + ) + + # Create working directory if it doesn't exist + if not os.path.exists(args.working_dir): + os.makedirs(args.working_dir) + + # Initialize RAG + rag = LightRAG( + working_dir=args.working_dir, + llm_model_func=ollama_model_complete, + llm_model_name=args.model, + llm_model_max_async=args.max_async, + llm_model_max_token_size=args.max_tokens, + llm_model_kwargs={"host": args.ollama_host, "options": {"num_ctx": args.max_tokens}}, + embedding_func=EmbeddingFunc( + embedding_dim=args.embedding_dim, + max_token_size=args.max_embed_tokens, + func=lambda texts: ollama_embedding( + texts, embed_model=args.embedding_model, host=args.ollama_host + ), + ), + ) + + @app.on_event("startup") + async def startup_event(): + try: + with open(args.input_file, "r", encoding="utf-8") as f: + rag.insert(f.read()) + except FileNotFoundError: + logging.warning(f"Input file {args.input_file} not found. Please ensure the file exists before querying.") + + @app.post("/query", response_model=QueryResponse) + async def query_text(request: QueryRequest): + try: + response = rag.query( + request.query, + param=QueryParam(mode=request.mode, stream=request.stream) + ) + + if request.stream: + result = "" + async for chunk in response: + result += chunk + return QueryResponse(response=result) + else: + return QueryResponse(response=response) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @app.post("/query/stream") + async def query_text_stream(request: QueryRequest): + try: + response = rag.query( + request.query, + param=QueryParam(mode=request.mode, stream=True) + ) + + async def stream_generator(): + async for chunk in response: + yield chunk + + return stream_generator() + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @app.post("/documents/text", response_model=InsertResponse) + async def insert_text(request: InsertTextRequest): + try: + rag.insert(request.text) + return InsertResponse( + status="success", + message="Text successfully inserted", + document_count=len(rag) + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @app.post("/documents/file", response_model=InsertResponse) + async def insert_file( + file: UploadFile = File(...), + description: str = Form(None) + ): + try: + content = await file.read() + + if file.filename.endswith(('.txt', '.md')): + text = content.decode('utf-8') + rag.insert(text) + else: + raise HTTPException( + status_code=400, + detail="Unsupported file type. Only .txt and .md files are supported" + ) + + return InsertResponse( + status="success", + message=f"File '{file.filename}' successfully inserted", + document_count=len(rag) + ) + except UnicodeDecodeError: + raise HTTPException(status_code=400, detail="File encoding not supported") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @app.post("/documents/batch", response_model=InsertResponse) + async def insert_batch(files: List[UploadFile] = File(...)): + try: + inserted_count = 0 + failed_files = [] + + for file in files: + try: + content = await file.read() + if file.filename.endswith(('.txt', '.md')): + text = content.decode('utf-8') + rag.insert(text) + inserted_count += 1 + else: + failed_files.append(f"{file.filename} (unsupported type)") + except Exception as e: + failed_files.append(f"{file.filename} ({str(e)})") + + status_message = f"Successfully inserted {inserted_count} documents" + if failed_files: + status_message += f". Failed files: {', '.join(failed_files)}" + + return InsertResponse( + status="success" if inserted_count > 0 else "partial_success", + message=status_message, + document_count=len(rag) + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @app.delete("/documents", response_model=InsertResponse) + async def clear_documents(): + try: + rag.text_chunks = [] + rag.entities_vdb = None + rag.relationships_vdb = None + return InsertResponse( + status="success", + message="All documents cleared successfully", + document_count=0 + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + @app.get("/health") + async def health_check(): + return { + "status": "healthy", + "configuration": { + "model": args.model, + "embedding_model": args.embedding_model, + "working_dir": args.working_dir, + "max_tokens": args.max_tokens, + "ollama_host": args.ollama_host + } + } + + return app + +if __name__ == "__main__": + args = parse_args() + import uvicorn + app = create_app(args) + uvicorn.run(app, host=args.host, port=args.port) diff --git a/api/requirements.txt b/api/requirements.txt new file mode 100644 index 00000000..f0615cfd --- /dev/null +++ b/api/requirements.txt @@ -0,0 +1,2 @@ +fastapi +uvicorn \ No newline at end of file diff --git a/fast_api_server/__init__.py b/fast_api_server/__init__.py new file mode 100644 index 00000000..e69de29b