From 9951f8584a8df9f17d44d18e83e5741ee0193b16 Mon Sep 17 00:00:00 2001
From: Saifeddine ALOUI <aloui.seifeddine@gmail.com>
Date: Tue, 24 Dec 2024 10:18:41 +0100
Subject: [PATCH] Added API as an option to the installation, reorganized the
 API and fused all documentations in README.md

---
 README.md                                     | 288 ++++++++++++++++++
 api/README_LOLLMS.md                          | 177 -----------
 api/README_OLLAMA.md                          | 177 -----------
 api/README_OPENAI.md                          | 171 -----------
 {api => lightrag/api}/.gitignore              |   0
 .../api}/lollms_lightrag_server.py            |   5 +-
 .../api}/ollama_lightrag_server.py            |   5 +-
 .../api}/openai_lightrag_server.py            |   5 +-
 {api => lightrag/api}/requirements.txt        |   0
 setup.py                                      |  20 ++
 10 files changed, 320 insertions(+), 528 deletions(-)
 delete mode 100644 api/README_LOLLMS.md
 delete mode 100644 api/README_OLLAMA.md
 delete mode 100644 api/README_OPENAI.md
 rename {api => lightrag/api}/.gitignore (100%)
 rename {api => lightrag/api}/lollms_lightrag_server.py (99%)
 rename {api => lightrag/api}/ollama_lightrag_server.py (99%)
 rename {api => lightrag/api}/openai_lightrag_server.py (99%)
 rename {api => lightrag/api}/requirements.txt (100%)

diff --git a/README.md b/README.md
index a24c9b72..13c7676a 100644
--- a/README.md
+++ b/README.md
@@ -1019,6 +1019,294 @@ def extract_queries(file_path):
 └── test.py
 ```
 
+## Install with API Support
+
+LightRAG provides optional API support through FastAPI servers that add RAG capabilities to existing LLM services. You can install LightRAG with API support in two ways:
+
+### 1. Installation from PyPI
+
+```bash
+pip install "lightrag-hku[api]"
+```
+
+### 2. Installation from Source (Development)
+
+```bash
+# Clone the repository
+git clone https://github.com/ParisNeo/lightrag.git
+
+# Change to the repository directory
+cd lightrag
+
+# Install in editable mode with API support
+pip install -e ".[api]"
+```
+
+### Prerequisites
+
+Before running any of the servers, ensure you have the corresponding backend service running:
+
+#### For LoLLMs Server
+- LoLLMs must be running and accessible
+- Default connection: http://localhost:11434
+- Configure using --lollms-host if running on a different host/port
+
+#### For Ollama Server
+- Ollama must be running and accessible
+- Default connection: http://localhost:11434
+- Configure using --ollama-host if running on a different host/port
+
+#### For OpenAI Server
+- Requires valid OpenAI API credentials set in environment variables
+- OPENAI_API_KEY must be set
+
+### Configuration Options
+
+Each server has its own specific configuration options:
+
+#### LoLLMs Server Options
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| --host | 0.0.0.0 | RAG server host |
+| --port | 9621 | RAG server port |
+| --model | mistral-nemo:latest | LLM model name |
+| --embedding-model | bge-m3:latest | Embedding model name |
+| --lollms-host | http://localhost:11434 | LoLLMS backend URL |
+| --working-dir | ./rag_storage | Working directory for RAG |
+| --max-async | 4 | Maximum async operations |
+| --max-tokens | 32768 | Maximum token size |
+| --embedding-dim | 1024 | Embedding dimensions |
+| --max-embed-tokens | 8192 | Maximum embedding token size |
+| --input-file | ./book.txt | Initial input file |
+| --log-level | INFO | Logging level |
+
+#### Ollama Server Options
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| --host | 0.0.0.0 | RAG server host |
+| --port | 9621 | RAG server port |
+| --model | mistral-nemo:latest | LLM model name |
+| --embedding-model | bge-m3:latest | Embedding model name |
+| --ollama-host | http://localhost:11434 | Ollama backend URL |
+| --working-dir | ./rag_storage | Working directory for RAG |
+| --max-async | 4 | Maximum async operations |
+| --max-tokens | 32768 | Maximum token size |
+| --embedding-dim | 1024 | Embedding dimensions |
+| --max-embed-tokens | 8192 | Maximum embedding token size |
+| --input-file | ./book.txt | Initial input file |
+| --log-level | INFO | Logging level |
+
+#### OpenAI Server Options
+
+| Parameter | Default | Description |
+|-----------|---------|-------------|
+| --host | 0.0.0.0 | RAG server host |
+| --port | 9621 | RAG server port |
+| --model | gpt-4 | OpenAI model name |
+| --embedding-model | text-embedding-3-large | OpenAI embedding model |
+| --working-dir | ./rag_storage | Working directory for RAG |
+| --max-tokens | 32768 | Maximum token size |
+| --max-embed-tokens | 8192 | Maximum embedding token size |
+| --input-dir | ./inputs | Input directory for documents |
+| --log-level | INFO | Logging level |
+
+### Example Usage
+
+#### LoLLMs RAG Server
+
+```bash
+# Custom configuration with specific model and working directory
+lollms-lightrag-server --model mistral-nemo --port 8080 --working-dir ./custom_rag
+
+# Using specific models (ensure they are installed in your LoLLMs instance)
+lollms-lightrag-server --model mistral-nemo:latest --embedding-model bge-m3 --embedding-dim 1024
+```
+
+#### Ollama RAG Server
+
+```bash
+# Custom configuration with specific model and working directory
+ollama-lightrag-server --model mistral-nemo:latest --port 8080 --working-dir ./custom_rag
+
+# Using specific models (ensure they are installed in your Ollama instance)
+ollama-lightrag-server --model mistral-nemo:latest --embedding-model bge-m3 --embedding-dim 1024
+```
+
+#### OpenAI RAG Server
+
+```bash
+# Using GPT-4 with text-embedding-3-large
+openai-lightrag-server --port 9624 --model gpt-4 --embedding-model text-embedding-3-large
+```
+
+**Important Notes:**
+- For LoLLMs: Make sure the specified models are installed in your LoLLMs instance
+- For Ollama: Make sure the specified models are installed in your Ollama instance
+- For OpenAI: Ensure you have set up your OPENAI_API_KEY environment variable
+
+For help on any server, use the --help flag:
+```bash
+lollms-lightrag-server --help
+ollama-lightrag-server --help
+openai-lightrag-server --help
+```
+
+Note: If you don't need the API functionality, you can install the base package without API support using:
+```bash
+pip install lightrag-hku
+```
+
+## API Endpoints
+
+All servers (LoLLMs, Ollama, and OpenAI) provide the same REST API endpoints for RAG functionality.
+
+### Query Endpoints
+
+#### POST /query
+Query the RAG system with options for different search modes.
+
+```bash
+curl -X POST "http://localhost:9621/query" \
+    -H "Content-Type: application/json" \
+    -d '{"query": "Your question here", "mode": "hybrid"}'
+```
+
+#### POST /query/stream
+Stream responses from the RAG system.
+
+```bash
+curl -X POST "http://localhost:9621/query/stream" \
+    -H "Content-Type: application/json" \
+    -d '{"query": "Your question here", "mode": "hybrid"}'
+```
+
+### Document Management Endpoints
+
+#### POST /documents/text
+Insert text directly into the RAG system.
+
+```bash
+curl -X POST "http://localhost:9621/documents/text" \
+    -H "Content-Type: application/json" \
+    -d '{"text": "Your text content here", "description": "Optional description"}'
+```
+
+#### POST /documents/file
+Upload a single file to the RAG system.
+
+```bash
+curl -X POST "http://localhost:9621/documents/file" \
+    -F "file=@/path/to/your/document.txt" \
+    -F "description=Optional description"
+```
+
+#### POST /documents/batch
+Upload multiple files at once.
+
+```bash
+curl -X POST "http://localhost:9621/documents/batch" \
+    -F "files=@/path/to/doc1.txt" \
+    -F "files=@/path/to/doc2.txt"
+```
+
+#### DELETE /documents
+Clear all documents from the RAG system.
+
+```bash
+curl -X DELETE "http://localhost:9621/documents"
+```
+
+### Utility Endpoints
+
+#### GET /health
+Check server health and configuration.
+
+```bash
+curl "http://localhost:9621/health"
+```
+
+## Development
+
+### Running in Development Mode
+
+For LoLLMs:
+```bash
+uvicorn lollms_lightrag_server:app --reload --port 9621
+```
+
+For Ollama:
+```bash
+uvicorn ollama_lightrag_server:app --reload --port 9621
+```
+
+For OpenAI:
+```bash
+uvicorn openai_lightrag_server:app --reload --port 9621
+```
+
+### API Documentation
+
+When any server is running, visit:
+- Swagger UI: http://localhost:9621/docs
+- ReDoc: http://localhost:9621/redoc
+
+### Testing API Endpoints
+
+You can test the API endpoints using the provided curl commands or through the Swagger UI interface. Make sure to:
+1. Start the appropriate backend service (LoLLMs, Ollama, or OpenAI)
+2. Start the RAG server
+3. Upload some documents using the document management endpoints
+4. Query the system using the query endpoints
+
+### Important Features
+
+#### Automatic Document Vectorization
+When starting any of the servers with the `--input-dir` parameter, the system will automatically:
+1. Scan the specified directory for documents
+2. Check for existing vectorized content in the database
+3. Only vectorize new documents that aren't already in the database
+4. Make all content immediately available for RAG queries
+
+This intelligent caching mechanism:
+- Prevents unnecessary re-vectorization of existing documents
+- Reduces startup time for subsequent runs
+- Preserves system resources
+- Maintains consistency across restarts
+
+### Example Usage
+
+#### LoLLMs RAG Server
+
+```bash
+# Start server with automatic document vectorization
+# Only new documents will be vectorized, existing ones will be loaded from cache
+lollms-lightrag-server --input-dir ./my_documents --port 8080
+```
+
+#### Ollama RAG Server
+
+```bash
+# Start server with automatic document vectorization
+# Previously vectorized documents will be loaded from the database
+ollama-lightrag-server --input-dir ./my_documents --port 8080
+```
+
+#### OpenAI RAG Server
+
+```bash
+# Start server with automatic document vectorization
+# Existing documents are retrieved from cache, only new ones are processed
+openai-lightrag-server --input-dir ./my_documents --port 9624
+```
+
+**Important Notes:**
+- The `--input-dir` parameter enables automatic document processing at startup
+- Documents already in the database are not re-vectorized
+- Only new documents in the input directory will be processed
+- This optimization significantly reduces startup time for subsequent runs
+- The working directory (`--working-dir`) stores the vectorized documents database
 
 ## Star History
 
diff --git a/api/README_LOLLMS.md b/api/README_LOLLMS.md
deleted file mode 100644
index d56ac909..00000000
--- a/api/README_LOLLMS.md
+++ /dev/null
@@ -1,177 +0,0 @@
-# LightRAG API Server
-
-A powerful FastAPI-based server for managing and querying documents using LightRAG (Light Retrieval-Augmented Generation). This server provides a REST API interface for document management and intelligent querying using various LLM models through LoLLMS.
-
-## Features
-
-- 🔍 Multiple search modes (naive, local, global, hybrid)
-- 📡 Streaming and non-streaming responses
-- 📝 Document management (insert, batch upload, clear)
-- ⚙️ Highly configurable model parameters
-- 📚 Support for text and file uploads
-- 🔧 RESTful API with automatic documentation
-- 🚀 Built with FastAPI for high performance
-
-## Prerequisites
-
-- Python 3.8+
-- LoLLMS server running locally or remotely
-- Required Python packages:
-  - fastapi
-  - uvicorn
-  - lightrag
-  - pydantic
-
-## Installation
-If you are using windows, you will need to donwload and install visual c++ build tools from [https://visualstudio.microsoft.com/visual-cpp-build-tools/ ](https://visualstudio.microsoft.com/visual-cpp-build-tools/)
-Make sure you install the VS 2022 C++ x64/x86 Build tools like from indivisual componants tab:
-![image](https://github.com/user-attachments/assets/3723e15b-0a2c-42ed-aebf-e595a9f9c946)
-
-This is mandatory for builmding some modules.
-
-1. Clone the repository:
-```bash
-git clone https://github.com/ParisNeo/LightRAG.git
-cd api
-```
-
-2. Install dependencies:
-```bash
-pip install -r requirements.txt
-```
-
-3. Make sure LoLLMS is running and accessible.
-
-## Configuration
-
-The server can be configured using command-line arguments:
-
-```bash
-python ollama_lightollama_lightrag_server.py --help
-```
-
-Available options:
-
-| Parameter | Default | Description |
-|-----------|---------|-------------|
-| --host | 0.0.0.0 | Server host |
-| --port | 9621 | Server port |
-| --model | mistral-nemo:latest | LLM model name |
-| --embedding-model | bge-m3:latest | Embedding model name |
-| --lollms-host | http://localhost:11434 | LoLLMS host URL |
-| --working-dir | ./rag_storage | Working directory for RAG |
-| --max-async | 4 | Maximum async operations |
-| --max-tokens | 32768 | Maximum token size |
-| --embedding-dim | 1024 | Embedding dimensions |
-| --max-embed-tokens | 8192 | Maximum embedding token size |
-| --input-file | ./book.txt | Initial input file |
-| --log-level | INFO | Logging level |
-
-## Quick Start
-
-1. Basic usage with default settings:
-```bash
-python ollama_lightrag_server.py
-```
-
-2. Custom configuration:
-```bash
-python ollama_lightrag_server.py --model llama2:13b --port 8080 --working-dir ./custom_rag
-```
-
-Make sure the models are installed in your lollms instance
-```bash
-python ollama_lightrag_server.py --model mistral-nemo:latest --embedding-model bge-m3  --embedding-dim 1024
-```
-
-## API Endpoints
-
-### Query Endpoints
-
-#### POST /query
-Query the RAG system with options for different search modes.
-
-```bash
-curl -X POST "http://localhost:9621/query" \
-    -H "Content-Type: application/json" \
-    -d '{"query": "Your question here", "mode": "hybrid"}'
-```
-
-#### POST /query/stream
-Stream responses from the RAG system.
-
-```bash
-curl -X POST "http://localhost:9621/query/stream" \
-    -H "Content-Type: application/json" \
-    -d '{"query": "Your question here", "mode": "hybrid"}'
-```
-
-### Document Management Endpoints
-
-#### POST /documents/text
-Insert text directly into the RAG system.
-
-```bash
-curl -X POST "http://localhost:9621/documents/text" \
-    -H "Content-Type: application/json" \
-    -d '{"text": "Your text content here", "description": "Optional description"}'
-```
-
-#### POST /documents/file
-Upload a single file to the RAG system.
-
-```bash
-curl -X POST "http://localhost:9621/documents/file" \
-    -F "file=@/path/to/your/document.txt" \
-    -F "description=Optional description"
-```
-
-#### POST /documents/batch
-Upload multiple files at once.
-
-```bash
-curl -X POST "http://localhost:9621/documents/batch" \
-    -F "files=@/path/to/doc1.txt" \
-    -F "files=@/path/to/doc2.txt"
-```
-
-#### DELETE /documents
-Clear all documents from the RAG system.
-
-```bash
-curl -X DELETE "http://localhost:9621/documents"
-```
-
-### Utility Endpoints
-
-#### GET /health
-Check server health and configuration.
-
-```bash
-curl "http://localhost:9621/health"
-```
-
-## Development
-
-### Running in Development Mode
-
-```bash
-uvicorn ollama_lightrag_server:app --reload --port 9621
-```
-
-### API Documentation
-
-When the server is running, visit:
-- Swagger UI: http://localhost:9621/docs
-- ReDoc: http://localhost:9621/redoc
-
-
-## License
-
-This project is licensed under the MIT License - see the LICENSE file for details.
-
-## Acknowledgments
-
-- Built with [FastAPI](https://fastapi.tiangolo.com/)
-- Uses [LightRAG](https://github.com/HKUDS/LightRAG) for document processing
-- Powered by [LoLLMS](https://lollms.ai/) for LLM inference
diff --git a/api/README_OLLAMA.md b/api/README_OLLAMA.md
deleted file mode 100644
index 04be5ac3..00000000
--- a/api/README_OLLAMA.md
+++ /dev/null
@@ -1,177 +0,0 @@
-# LightRAG API Server
-
-A powerful FastAPI-based server for managing and querying documents using LightRAG (Light Retrieval-Augmented Generation). This server provides a REST API interface for document management and intelligent querying using various LLM models through Ollama.
-
-## Features
-
-- 🔍 Multiple search modes (naive, local, global, hybrid)
-- 📡 Streaming and non-streaming responses
-- 📝 Document management (insert, batch upload, clear)
-- ⚙️ Highly configurable model parameters
-- 📚 Support for text and file uploads
-- 🔧 RESTful API with automatic documentation
-- 🚀 Built with FastAPI for high performance
-
-## Prerequisites
-
-- Python 3.8+
-- Ollama server running locally or remotely
-- Required Python packages:
-  - fastapi
-  - uvicorn
-  - lightrag
-  - pydantic
-
-## Installation
-If you are using windows, you will need to donwload and install visual c++ build tools from [https://visualstudio.microsoft.com/visual-cpp-build-tools/ ](https://visualstudio.microsoft.com/visual-cpp-build-tools/)
-Make sure you install the VS 2022 C++ x64/x86 Build tools like from indivisual componants tab:
-![image](https://github.com/user-attachments/assets/3723e15b-0a2c-42ed-aebf-e595a9f9c946)
-
-This is mandatory for builmding some modules.
-
-1. Clone the repository:
-```bash
-git clone https://github.com/ParisNeo/LightRAG.git
-cd api
-```
-
-2. Install dependencies:
-```bash
-pip install -r requirements.txt
-```
-
-3. Make sure Ollama is running and accessible.
-
-## Configuration
-
-The server can be configured using command-line arguments:
-
-```bash
-python ollama_lightollama_lightrag_server.py --help
-```
-
-Available options:
-
-| Parameter | Default | Description |
-|-----------|---------|-------------|
-| --host | 0.0.0.0 | Server host |
-| --port | 9621 | Server port |
-| --model | mistral-nemo:latest | LLM model name |
-| --embedding-model | bge-m3:latest | Embedding model name |
-| --ollama-host | http://localhost:11434 | Ollama host URL |
-| --working-dir | ./rag_storage | Working directory for RAG |
-| --max-async | 4 | Maximum async operations |
-| --max-tokens | 32768 | Maximum token size |
-| --embedding-dim | 1024 | Embedding dimensions |
-| --max-embed-tokens | 8192 | Maximum embedding token size |
-| --input-file | ./book.txt | Initial input file |
-| --log-level | INFO | Logging level |
-
-## Quick Start
-
-1. Basic usage with default settings:
-```bash
-python ollama_lightrag_server.py
-```
-
-2. Custom configuration:
-```bash
-python ollama_lightrag_server.py --model llama2:13b --port 8080 --working-dir ./custom_rag
-```
-
-Make sure the models are installed in your ollama instance
-```bash
-python ollama_lightrag_server.py --model mistral-nemo:latest --embedding-model bge-m3  --embedding-dim 1024
-```
-
-## API Endpoints
-
-### Query Endpoints
-
-#### POST /query
-Query the RAG system with options for different search modes.
-
-```bash
-curl -X POST "http://localhost:9621/query" \
-    -H "Content-Type: application/json" \
-    -d '{"query": "Your question here", "mode": "hybrid"}'
-```
-
-#### POST /query/stream
-Stream responses from the RAG system.
-
-```bash
-curl -X POST "http://localhost:9621/query/stream" \
-    -H "Content-Type: application/json" \
-    -d '{"query": "Your question here", "mode": "hybrid"}'
-```
-
-### Document Management Endpoints
-
-#### POST /documents/text
-Insert text directly into the RAG system.
-
-```bash
-curl -X POST "http://localhost:9621/documents/text" \
-    -H "Content-Type: application/json" \
-    -d '{"text": "Your text content here", "description": "Optional description"}'
-```
-
-#### POST /documents/file
-Upload a single file to the RAG system.
-
-```bash
-curl -X POST "http://localhost:9621/documents/file" \
-    -F "file=@/path/to/your/document.txt" \
-    -F "description=Optional description"
-```
-
-#### POST /documents/batch
-Upload multiple files at once.
-
-```bash
-curl -X POST "http://localhost:9621/documents/batch" \
-    -F "files=@/path/to/doc1.txt" \
-    -F "files=@/path/to/doc2.txt"
-```
-
-#### DELETE /documents
-Clear all documents from the RAG system.
-
-```bash
-curl -X DELETE "http://localhost:9621/documents"
-```
-
-### Utility Endpoints
-
-#### GET /health
-Check server health and configuration.
-
-```bash
-curl "http://localhost:9621/health"
-```
-
-## Development
-
-### Running in Development Mode
-
-```bash
-uvicorn ollama_lightrag_server:app --reload --port 9621
-```
-
-### API Documentation
-
-When the server is running, visit:
-- Swagger UI: http://localhost:9621/docs
-- ReDoc: http://localhost:9621/redoc
-
-
-## License
-
-This project is licensed under the MIT License - see the LICENSE file for details.
-
-## Acknowledgments
-
-- Built with [FastAPI](https://fastapi.tiangolo.com/)
-- Uses [LightRAG](https://github.com/HKUDS/LightRAG) for document processing
-- Powered by [Ollama](https://ollama.ai/) for LLM inference
diff --git a/api/README_OPENAI.md b/api/README_OPENAI.md
deleted file mode 100644
index 42c0cf32..00000000
--- a/api/README_OPENAI.md
+++ /dev/null
@@ -1,171 +0,0 @@
-
-# LightRAG API Server
-
-A powerful FastAPI-based server for managing and querying documents using LightRAG (Light Retrieval-Augmented Generation). This server provides a REST API interface for document management and intelligent querying using OpenAI's language models.
-
-## Features
-
-- 🔍 Multiple search modes (naive, local, global, hybrid)
-- 📡 Streaming and non-streaming responses
-- 📝 Document management (insert, batch upload, clear)
-- ⚙️ Highly configurable model parameters
-- 📚 Support for text and file uploads
-- 🔧 RESTful API with automatic documentation
-- 🚀 Built with FastAPI for high performance
-
-## Prerequisites
-
-- Python 3.8+
-- OpenAI API key
-- Required Python packages:
-  - fastapi
-  - uvicorn
-  - lightrag
-  - pydantic
-  - openai
-  - nest-asyncio
-
-## Installation
-If you are using Windows, you will need to download and install visual c++ build tools from [https://visualstudio.microsoft.com/visual-cpp-build-tools/](https://visualstudio.microsoft.com/visual-cpp-build-tools/)
-Make sure you install the VS 2022 C++ x64/x86 Build tools from individual components tab.
-
-1. Clone the repository:
-```bash
-git clone https://github.com/ParisNeo/LightRAG.git
-cd api
-```
-
-2. Install dependencies:
-```bash
-pip install -r requirements.txt
-```
-
-3. Set up your OpenAI API key:
-```bash
-export OPENAI_API_KEY='your-api-key-here'
-```
-
-## Configuration
-
-The server can be configured using command-line arguments:
-
-```bash
-python openai_lightrag_server.py --help
-```
-
-Available options:
-
-| Parameter | Default | Description |
-|-----------|---------|-------------|
-| --host | 0.0.0.0 | Server host |
-| --port | 9621 | Server port |
-| --model | gpt-4 | OpenAI model name |
-| --embedding-model | text-embedding-3-large | OpenAI embedding model |
-| --working-dir | ./rag_storage | Working directory for RAG |
-| --max-tokens | 32768 | Maximum token size |
-| --max-embed-tokens | 8192 | Maximum embedding token size |
-| --input-dir | ./inputs | Input directory for documents |
-| --log-level | INFO | Logging level |
-
-## Quick Start
-
-1. Basic usage with default settings:
-```bash
-python openai_lightrag_server.py
-```
-
-2. Custom configuration:
-```bash
-python openai_lightrag_server.py --model gpt-4 --port 8080 --working-dir ./custom_rag
-```
-
-## API Endpoints
-
-### Query Endpoints
-
-#### POST /query
-Query the RAG system with options for different search modes.
-
-```bash
-curl -X POST "http://localhost:9621/query" \
-    -H "Content-Type: application/json" \
-    -d '{"query": "Your question here", "mode": "hybrid"}'
-```
-
-#### POST /query/stream
-Stream responses from the RAG system.
-
-```bash
-curl -X POST "http://localhost:9621/query/stream" \
-    -H "Content-Type: application/json" \
-    -d '{"query": "Your question here", "mode": "hybrid"}'
-```
-
-### Document Management Endpoints
-
-#### POST /documents/text
-Insert text directly into the RAG system.
-
-```bash
-curl -X POST "http://localhost:9621/documents/text" \
-    -H "Content-Type: application/json" \
-    -d '{"text": "Your text content here", "description": "Optional description"}'
-```
-
-#### POST /documents/file
-Upload a single file to the RAG system.
-
-```bash
-curl -X POST "http://localhost:9621/documents/file" \
-    -F "file=@/path/to/your/document.txt" \
-    -F "description=Optional description"
-```
-
-#### POST /documents/batch
-Upload multiple files at once.
-
-```bash
-curl -X POST "http://localhost:9621/documents/batch" \
-    -F "files=@/path/to/doc1.txt" \
-    -F "files=@/path/to/doc2.txt"
-```
-
-#### DELETE /documents
-Clear all documents from the RAG system.
-
-```bash
-curl -X DELETE "http://localhost:9621/documents"
-```
-
-### Utility Endpoints
-
-#### GET /health
-Check server health and configuration.
-
-```bash
-curl "http://localhost:9621/health"
-```
-
-## Development
-
-### Running in Development Mode
-
-```bash
-uvicorn openai_lightrag_server:app --reload --port 9621
-```
-
-### API Documentation
-
-When the server is running, visit:
-- Swagger UI: http://localhost:9621/docs
-- ReDoc: http://localhost:9621/redoc
-
-## License
-
-This project is licensed under the MIT License - see the LICENSE file for details.
-
-## Acknowledgments
-
-- Built with [FastAPI](https://fastapi.tiangolo.com/)
-- Uses [LightRAG](https://github.com/HKUDS/LightRAG) for document processing
-- Powered by [OpenAI](https://openai.com/) for language model inference
diff --git a/api/.gitignore b/lightrag/api/.gitignore
similarity index 100%
rename from api/.gitignore
rename to lightrag/api/.gitignore
diff --git a/api/lollms_lightrag_server.py b/lightrag/api/lollms_lightrag_server.py
similarity index 99%
rename from api/lollms_lightrag_server.py
rename to lightrag/api/lollms_lightrag_server.py
index 4babcaa8..7e504c8f 100644
--- a/api/lollms_lightrag_server.py
+++ b/lightrag/api/lollms_lightrag_server.py
@@ -393,9 +393,12 @@ def create_app(args):
     return app
 
 
-if __name__ == "__main__":
+def main():
     args = parse_args()
     import uvicorn
 
     app = create_app(args)
     uvicorn.run(app, host=args.host, port=args.port)
+
+if __name__ == "__main__":
+    main()
diff --git a/api/ollama_lightrag_server.py b/lightrag/api/ollama_lightrag_server.py
similarity index 99%
rename from api/ollama_lightrag_server.py
rename to lightrag/api/ollama_lightrag_server.py
index 055532c8..3a334191 100644
--- a/api/ollama_lightrag_server.py
+++ b/lightrag/api/ollama_lightrag_server.py
@@ -393,9 +393,12 @@ def create_app(args):
     return app
 
 
-if __name__ == "__main__":
+def main():
     args = parse_args()
     import uvicorn
 
     app = create_app(args)
     uvicorn.run(app, host=args.host, port=args.port)
+
+if __name__ == "__main__":
+    main()
diff --git a/api/openai_lightrag_server.py b/lightrag/api/openai_lightrag_server.py
similarity index 99%
rename from api/openai_lightrag_server.py
rename to lightrag/api/openai_lightrag_server.py
index 4746b2e3..c82e4013 100644
--- a/api/openai_lightrag_server.py
+++ b/lightrag/api/openai_lightrag_server.py
@@ -397,9 +397,12 @@ def create_app(args):
     return app
 
 
-if __name__ == "__main__":
+def main():
     args = parse_args()
     import uvicorn
 
     app = create_app(args)
     uvicorn.run(app, host=args.host, port=args.port)
+
+if __name__ == "__main__":
+    main()
diff --git a/api/requirements.txt b/lightrag/api/requirements.txt
similarity index 100%
rename from api/requirements.txt
rename to lightrag/api/requirements.txt
diff --git a/setup.py b/setup.py
index 1b1f65f0..d3d2b59c 100644
--- a/setup.py
+++ b/setup.py
@@ -52,6 +52,16 @@ def read_requirements():
     return deps
 
 
+def read_api_requirements():
+    api_deps = []
+    try:
+        with open("./lightrag/api/requirements.txt") as f:
+            api_deps = [line.strip() for line in f if line.strip()]
+    except FileNotFoundError:
+        print("Warning: API requirements.txt not found.")
+    return api_deps
+
+
 metadata = retrieve_metadata()
 long_description = read_long_description()
 requirements = read_requirements()
@@ -85,4 +95,14 @@ setuptools.setup(
         if metadata.get("__url__")
         else "",
     },
+    extras_require={
+        "api": read_api_requirements(),  # API requirements as optional
+    },
+    entry_points={
+        "console_scripts": [
+            "lollms-lightrag-server=lightrag.api.lollms_lightrag_server:main [api]",
+            "ollama-lightrag-server=lightrag.api.ollama_lightrag_server:main [api]",
+            "openai-lightrag-server=lightrag.api.openai_lightrag_server:main [api]",
+        ],
+    },
 )