From 02f94ab228c17122833173aa4c9825bada4a176f Mon Sep 17 00:00:00 2001 From: "zhenjie.ye" Date: Sat, 26 Oct 2024 15:56:48 +0800 Subject: [PATCH 1/4] [feat] Add API server implementation and endpoints --- README.md | 119 ++++++++++++++ .../lightrag_api_openai_compatible_demo.py | 153 ++++++++++++++++++ 2 files changed, 272 insertions(+) create mode 100644 examples/lightrag_api_openai_compatible_demo.py diff --git a/README.md b/README.md index 7fab9a01..d11b1691 100644 --- a/README.md +++ b/README.md @@ -397,6 +397,125 @@ if __name__ == "__main__": +## API Server Implementation + +LightRAG also provides a FastAPI-based server implementation for RESTful API access to RAG operations. This allows you to run LightRAG as a service and interact with it through HTTP requests. + +### Setting up the API Server +
+Click to expand setup instructions + +1. First, ensure you have the required dependencies: +```bash +pip install fastapi uvicorn pydantic +``` + +2. Set up your environment variables: +```bash +export RAG_DIR="your_index_directory" # Optional: Defaults to "index_default" +``` + +3. Run the API server: +```bash +python examples/lightrag_api_openai_compatible_demo.py +``` + +The server will start on `http://0.0.0.0:8020`. +
+ +### API Endpoints + +The API server provides the following endpoints: + +#### 1. Query Endpoint +
+Click to view Query endpoint details + +- **URL:** `/query` +- **Method:** POST +- **Body:** +```json +{ + "query": "Your question here", + "mode": "hybrid" // Can be "naive", "local", "global", or "hybrid" +} +``` +- **Example:** +```bash +curl -X POST "http://127.0.0.1:8020/query" \ + -H "Content-Type: application/json" \ + -d '{"query": "What are the main themes?", "mode": "hybrid"}' +``` +
+ +#### 2. Insert Text Endpoint +
+Click to view Insert Text endpoint details + +- **URL:** `/insert` +- **Method:** POST +- **Body:** +```json +{ + "text": "Your text content here" +} +``` +- **Example:** +```bash +curl -X POST "http://127.0.0.1:8020/insert" \ + -H "Content-Type: application/json" \ + -d '{"text": "Content to be inserted into RAG"}' +``` +
+ +#### 3. Insert File Endpoint +
+Click to view Insert File endpoint details + +- **URL:** `/insert_file` +- **Method:** POST +- **Body:** +```json +{ + "file_path": "path/to/your/file.txt" +} +``` +- **Example:** +```bash +curl -X POST "http://127.0.0.1:8020/insert_file" \ + -H "Content-Type: application/json" \ + -d '{"file_path": "./book.txt"}' +``` +
+ +#### 4. Health Check Endpoint +
+Click to view Health Check endpoint details + +- **URL:** `/health` +- **Method:** GET +- **Example:** +```bash +curl -X GET "http://127.0.0.1:8020/health" +``` +
+ +### Configuration + +The API server can be configured using environment variables: +- `RAG_DIR`: Directory for storing the RAG index (default: "index_default") +- API keys and base URLs should be configured in the code for your specific LLM and embedding model providers + +### Error Handling +
+Click to view error handling details + +The API includes comprehensive error handling: +- File not found errors (404) +- Processing errors (500) +- Supports multiple file encodings (UTF-8 and GBK) +
+ ## Evaluation ### Dataset The dataset used in LightRAG can be downloaded from [TommyChien/UltraDomain](https://huggingface.co/datasets/TommyChien/UltraDomain). diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py new file mode 100644 index 00000000..f8d105ea --- /dev/null +++ b/examples/lightrag_api_openai_compatible_demo.py @@ -0,0 +1,153 @@ +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +import os +from lightrag import LightRAG, QueryParam +from lightrag.llm import openai_complete_if_cache, openai_embedding +from lightrag.utils import EmbeddingFunc +import numpy as np +from typing import Optional +import asyncio +import nest_asyncio + +# Apply nest_asyncio to solve event loop issues +nest_asyncio.apply() + +DEFAULT_RAG_DIR="index_default" +app = FastAPI(title="LightRAG API", description="API for RAG operations") + +# Configure working directory +WORKING_DIR = os.environ.get('RAG_DIR', f'{DEFAULT_RAG_DIR}') +print(f"WORKING_DIR: {WORKING_DIR}") +if not os.path.exists(WORKING_DIR): + os.mkdir(WORKING_DIR) + +# LLM model function +async def llm_model_func( + prompt, system_prompt=None, history_messages=[], **kwargs +) -> str: + return await openai_complete_if_cache( + "gpt-4o-mini", + prompt, + system_prompt=system_prompt, + history_messages=history_messages, + api_key='YOUR_API_KEY', + base_url="YourURL/v1", + **kwargs, + ) + +# Embedding function +async def embedding_func(texts: list[str]) -> np.ndarray: + return await openai_embedding( + texts, + model="text-embedding-3-large", + api_key='YOUR_API_KEY', + base_url="YourURL/v1", + ) + +# Initialize RAG instance +rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=llm_model_func, + embedding_func=EmbeddingFunc( + embedding_dim=3072, max_token_size=8192, func=embedding_func + ), +) + +# Data models +class QueryRequest(BaseModel): + query: str + mode: str = "hybrid" + +class InsertRequest(BaseModel): + text: str + +class InsertFileRequest(BaseModel): + file_path: str + +class Response(BaseModel): + status: str + data: Optional[str] = None + message: Optional[str] = None + +# API routes +@app.post("/query", response_model=Response) +async def query_endpoint(request: QueryRequest): + try: + loop = asyncio.get_event_loop() + result = await loop.run_in_executor( + None, + lambda: rag.query(request.query, param=QueryParam(mode=request.mode)) + ) + return Response( + status="success", + data=result + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/insert", response_model=Response) +async def insert_endpoint(request: InsertRequest): + try: + loop = asyncio.get_event_loop() + await loop.run_in_executor(None, lambda: rag.insert(request.text)) + return Response( + status="success", + message="Text inserted successfully" + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/insert_file", response_model=Response) +async def insert_file(request: InsertFileRequest): + try: + # Check if file exists + if not os.path.exists(request.file_path): + raise HTTPException( + status_code=404, + detail=f"File not found: {request.file_path}" + ) + + # Read file content + try: + with open(request.file_path, 'r', encoding='utf-8') as f: + content = f.read() + except UnicodeDecodeError: + # If UTF-8 decoding fails, try other encodings + with open(request.file_path, 'r', encoding='gbk') as f: + content = f.read() + + # Insert file content + loop = asyncio.get_event_loop() + await loop.run_in_executor(None, lambda: rag.insert(content)) + + return Response( + status="success", + message=f"File content from {request.file_path} inserted successfully" + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/health") +async def health_check(): + return {"status": "healthy"} + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8020) + +# Usage example +# To run the server, use the following command in your terminal: +# python lightrag_api_openai_compatible_demo.py + +# Example requests: +# 1. Query: +# curl -X POST "http://127.0.0.1:8020/query" -H "Content-Type: application/json" -d '{"query": "your query here", "mode": "hybrid"}' + +# 2. Insert text: +# curl -X POST "http://127.0.0.1:8020/insert" -H "Content-Type: application/json" -d '{"text": "your text here"}' + +# 3. Insert file: +# curl -X POST "http://127.0.0.1:8020/insert_file" -H "Content-Type: application/json" -d '{"file_path": "path/to/your/file.txt"}' + +# 4. Health check: +# curl -X GET "http://127.0.0.1:8020/health" \ No newline at end of file From 08feac942ad0de01ccbe16253d7b7a2ad35b7621 Mon Sep 17 00:00:00 2001 From: "zhenjie.ye" Date: Sat, 26 Oct 2024 16:00:30 +0800 Subject: [PATCH 2/4] Refactor code formatting in lightrag_api_openai_compatible_demo.py --- .../lightrag_api_openai_compatible_demo.py | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py index f8d105ea..ad9560dc 100644 --- a/examples/lightrag_api_openai_compatible_demo.py +++ b/examples/lightrag_api_openai_compatible_demo.py @@ -12,7 +12,7 @@ import nest_asyncio # Apply nest_asyncio to solve event loop issues nest_asyncio.apply() -DEFAULT_RAG_DIR="index_default" +DEFAULT_RAG_DIR = "index_default" app = FastAPI(title="LightRAG API", description="API for RAG operations") # Configure working directory @@ -22,6 +22,8 @@ if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) # LLM model function + + async def llm_model_func( prompt, system_prompt=None, history_messages=[], **kwargs ) -> str: @@ -36,6 +38,8 @@ async def llm_model_func( ) # Embedding function + + async def embedding_func(texts: list[str]) -> np.ndarray: return await openai_embedding( texts, @@ -54,29 +58,37 @@ rag = LightRAG( ) # Data models + + class QueryRequest(BaseModel): query: str mode: str = "hybrid" + class InsertRequest(BaseModel): text: str + class InsertFileRequest(BaseModel): file_path: str + class Response(BaseModel): status: str data: Optional[str] = None message: Optional[str] = None # API routes + + @app.post("/query", response_model=Response) async def query_endpoint(request: QueryRequest): try: loop = asyncio.get_event_loop() result = await loop.run_in_executor( - None, - lambda: rag.query(request.query, param=QueryParam(mode=request.mode)) + None, + lambda: rag.query( + request.query, param=QueryParam(mode=request.mode)) ) return Response( status="success", @@ -85,6 +97,7 @@ async def query_endpoint(request: QueryRequest): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) + @app.post("/insert", response_model=Response) async def insert_endpoint(request: InsertRequest): try: @@ -97,6 +110,7 @@ async def insert_endpoint(request: InsertRequest): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) + @app.post("/insert_file", response_model=Response) async def insert_file(request: InsertFileRequest): try: @@ -106,7 +120,7 @@ async def insert_file(request: InsertFileRequest): status_code=404, detail=f"File not found: {request.file_path}" ) - + # Read file content try: with open(request.file_path, 'r', encoding='utf-8') as f: @@ -115,11 +129,11 @@ async def insert_file(request: InsertFileRequest): # If UTF-8 decoding fails, try other encodings with open(request.file_path, 'r', encoding='gbk') as f: content = f.read() - + # Insert file content loop = asyncio.get_event_loop() await loop.run_in_executor(None, lambda: rag.insert(content)) - + return Response( status="success", message=f"File content from {request.file_path} inserted successfully" @@ -127,6 +141,7 @@ async def insert_file(request: InsertFileRequest): except Exception as e: raise HTTPException(status_code=500, detail=str(e)) + @app.get("/health") async def health_check(): return {"status": "healthy"} @@ -150,4 +165,4 @@ if __name__ == "__main__": # curl -X POST "http://127.0.0.1:8020/insert_file" -H "Content-Type: application/json" -d '{"file_path": "path/to/your/file.txt"}' # 4. Health check: -# curl -X GET "http://127.0.0.1:8020/health" \ No newline at end of file +# curl -X GET "http://127.0.0.1:8020/health" From fb84c1e5be3b6b5dc34ed96606194b93624f3900 Mon Sep 17 00:00:00 2001 From: "zhenjie.ye" Date: Sat, 26 Oct 2024 16:09:36 +0800 Subject: [PATCH 3/4] Refactor code formatting in lightrag_api_openai_compatible_demo.py --- .../lightrag_api_openai_compatible_demo.py | 34 ++++++++----------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/examples/lightrag_api_openai_compatible_demo.py b/examples/lightrag_api_openai_compatible_demo.py index ad9560dc..2cd262bb 100644 --- a/examples/lightrag_api_openai_compatible_demo.py +++ b/examples/lightrag_api_openai_compatible_demo.py @@ -16,7 +16,7 @@ DEFAULT_RAG_DIR = "index_default" app = FastAPI(title="LightRAG API", description="API for RAG operations") # Configure working directory -WORKING_DIR = os.environ.get('RAG_DIR', f'{DEFAULT_RAG_DIR}') +WORKING_DIR = os.environ.get("RAG_DIR", f"{DEFAULT_RAG_DIR}") print(f"WORKING_DIR: {WORKING_DIR}") if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) @@ -32,11 +32,12 @@ async def llm_model_func( prompt, system_prompt=system_prompt, history_messages=history_messages, - api_key='YOUR_API_KEY', + api_key="YOUR_API_KEY", base_url="YourURL/v1", **kwargs, ) + # Embedding function @@ -44,10 +45,11 @@ async def embedding_func(texts: list[str]) -> np.ndarray: return await openai_embedding( texts, model="text-embedding-3-large", - api_key='YOUR_API_KEY', + api_key="YOUR_API_KEY", base_url="YourURL/v1", ) + # Initialize RAG instance rag = LightRAG( working_dir=WORKING_DIR, @@ -78,6 +80,7 @@ class Response(BaseModel): data: Optional[str] = None message: Optional[str] = None + # API routes @@ -86,14 +89,9 @@ async def query_endpoint(request: QueryRequest): try: loop = asyncio.get_event_loop() result = await loop.run_in_executor( - None, - lambda: rag.query( - request.query, param=QueryParam(mode=request.mode)) - ) - return Response( - status="success", - data=result + None, lambda: rag.query(request.query, param=QueryParam(mode=request.mode)) ) + return Response(status="success", data=result) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -103,10 +101,7 @@ async def insert_endpoint(request: InsertRequest): try: loop = asyncio.get_event_loop() await loop.run_in_executor(None, lambda: rag.insert(request.text)) - return Response( - status="success", - message="Text inserted successfully" - ) + return Response(status="success", message="Text inserted successfully") except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -117,17 +112,16 @@ async def insert_file(request: InsertFileRequest): # Check if file exists if not os.path.exists(request.file_path): raise HTTPException( - status_code=404, - detail=f"File not found: {request.file_path}" + status_code=404, detail=f"File not found: {request.file_path}" ) # Read file content try: - with open(request.file_path, 'r', encoding='utf-8') as f: + with open(request.file_path, "r", encoding="utf-8") as f: content = f.read() except UnicodeDecodeError: # If UTF-8 decoding fails, try other encodings - with open(request.file_path, 'r', encoding='gbk') as f: + with open(request.file_path, "r", encoding="gbk") as f: content = f.read() # Insert file content @@ -136,7 +130,7 @@ async def insert_file(request: InsertFileRequest): return Response( status="success", - message=f"File content from {request.file_path} inserted successfully" + message=f"File content from {request.file_path} inserted successfully", ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -146,8 +140,10 @@ async def insert_file(request: InsertFileRequest): async def health_check(): return {"status": "healthy"} + if __name__ == "__main__": import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8020) # Usage example From f71e389d5b2772b1cc381dada644b9118334d9dc Mon Sep 17 00:00:00 2001 From: "zhenjie.ye" Date: Sat, 26 Oct 2024 16:12:10 +0800 Subject: [PATCH 4/4] Refactor code formatting in lightrag_api_openai_compatible_demo.py --- lightrag/lightrag.py | 4 +--- lightrag/llm.py | 4 +++- setup.py | 31 ++++++++++++++++++++++++------- 3 files changed, 28 insertions(+), 11 deletions(-) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 3004f5ed..b84e22ef 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -85,9 +85,7 @@ class LightRAG: # LLM llm_model_func: callable = gpt_4o_mini_complete # hf_model_complete# - llm_model_name: str = ( - "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it' - ) + llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it' llm_model_max_token_size: int = 32768 llm_model_max_async: int = 16 diff --git a/lightrag/llm.py b/lightrag/llm.py index bb0d6063..fd6b72d6 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -286,7 +286,9 @@ async def hf_model_if_cache( output = hf_model.generate( **input_ids, max_new_tokens=512, num_return_sequences=1, early_stopping=True ) - response_text = hf_tokenizer.decode(output[0][len(inputs["input_ids"][0]):], skip_special_tokens=True) + response_text = hf_tokenizer.decode( + output[0][len(inputs["input_ids"][0]) :], skip_special_tokens=True + ) if hashing_kv is not None: await hashing_kv.upsert({args_hash: {"return": response_text, "model": model}}) return response_text diff --git a/setup.py b/setup.py index bdf49f02..1b1f65f0 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,7 @@ import setuptools from pathlib import Path + # Reading the long description from README.md def read_long_description(): try: @@ -8,6 +9,7 @@ def read_long_description(): except FileNotFoundError: return "A description of LightRAG is currently unavailable." + # Retrieving metadata from __init__.py def retrieve_metadata(): vars2find = ["__author__", "__version__", "__url__"] @@ -17,18 +19,26 @@ def retrieve_metadata(): for line in f.readlines(): for v in vars2find: if line.startswith(v): - line = line.replace(" ", "").replace('"', "").replace("'", "").strip() + line = ( + line.replace(" ", "") + .replace('"', "") + .replace("'", "") + .strip() + ) vars2readme[v] = line.split("=")[1] except FileNotFoundError: raise FileNotFoundError("Metadata file './lightrag/__init__.py' not found.") - + # Checking if all required variables are found missing_vars = [v for v in vars2find if v not in vars2readme] if missing_vars: - raise ValueError(f"Missing required metadata variables in __init__.py: {missing_vars}") - + raise ValueError( + f"Missing required metadata variables in __init__.py: {missing_vars}" + ) + return vars2readme + # Reading dependencies from requirements.txt def read_requirements(): deps = [] @@ -36,9 +46,12 @@ def read_requirements(): with open("./requirements.txt") as f: deps = [line.strip() for line in f if line.strip()] except FileNotFoundError: - print("Warning: 'requirements.txt' not found. No dependencies will be installed.") + print( + "Warning: 'requirements.txt' not found. No dependencies will be installed." + ) return deps + metadata = retrieve_metadata() long_description = read_long_description() requirements = read_requirements() @@ -51,7 +64,9 @@ setuptools.setup( description="LightRAG: Simple and Fast Retrieval-Augmented Generation", long_description=long_description, long_description_content_type="text/markdown", - packages=setuptools.find_packages(exclude=("tests*", "docs*")), # Automatically find packages + packages=setuptools.find_packages( + exclude=("tests*", "docs*") + ), # Automatically find packages classifiers=[ "Development Status :: 4 - Beta", "Programming Language :: Python :: 3", @@ -66,6 +81,8 @@ setuptools.setup( project_urls={ # Additional project metadata "Documentation": metadata.get("__url__", ""), "Source": metadata.get("__url__", ""), - "Tracker": f"{metadata.get('__url__', '')}/issues" if metadata.get("__url__") else "" + "Tracker": f"{metadata.get('__url__', '')}/issues" + if metadata.get("__url__") + else "", }, )