diff --git a/.gitignore b/.gitignore index 68484898..f038ef5d 100644 --- a/.gitignore +++ b/.gitignore @@ -14,4 +14,4 @@ ignore_this.txt .ruff_cache/ gui/ *.log -.vscode \ No newline at end of file +.vscode diff --git a/api/.gitignore b/api/.gitignore index c9e606d6..8cb6821a 100644 --- a/api/.gitignore +++ b/api/.gitignore @@ -1,2 +1,2 @@ inputs -rag_storage \ No newline at end of file +rag_storage diff --git a/api/README_OPENAI.md b/api/README_OPENAI.md index 6aa28989..42c0cf32 100644 --- a/api/README_OPENAI.md +++ b/api/README_OPENAI.md @@ -169,4 +169,3 @@ This project is licensed under the MIT License - see the LICENSE file for detail - Built with [FastAPI](https://fastapi.tiangolo.com/) - Uses [LightRAG](https://github.com/HKUDS/LightRAG) for document processing - Powered by [OpenAI](https://openai.com/) for language model inference - diff --git a/api/ollama_lightrag_server.py b/api/ollama_lightrag_server.py index 4aab6f8c..850e814f 100644 --- a/api/ollama_lightrag_server.py +++ b/api/ollama_lightrag_server.py @@ -1,8 +1,5 @@ from fastapi import FastAPI, HTTPException, File, UploadFile, Form -from fastapi.responses import JSONResponse from pydantic import BaseModel -import asyncio -import os import logging import argparse from lightrag import LightRAG, QueryParam @@ -13,7 +10,8 @@ from enum import Enum from pathlib import Path import shutil import aiofiles -from ascii_colors import ASCIIColors, trace_exception +from ascii_colors import trace_exception + def parse_args(): parser = argparse.ArgumentParser( @@ -21,45 +19,84 @@ def parse_args(): ) # Server configuration - parser.add_argument('--host', default='0.0.0.0', help='Server host (default: 0.0.0.0)') - parser.add_argument('--port', type=int, default=9621, help='Server port (default: 9621)') - + parser.add_argument( + "--host", default="0.0.0.0", help="Server host (default: 0.0.0.0)" + ) + parser.add_argument( + "--port", type=int, default=9621, help="Server port (default: 9621)" + ) + # Directory configuration - parser.add_argument('--working-dir', default='./rag_storage', - help='Working directory for RAG storage (default: ./rag_storage)') - parser.add_argument('--input-dir', default='./inputs', - help='Directory containing input documents (default: ./inputs)') - + parser.add_argument( + "--working-dir", + default="./rag_storage", + help="Working directory for RAG storage (default: ./rag_storage)", + ) + parser.add_argument( + "--input-dir", + default="./inputs", + help="Directory containing input documents (default: ./inputs)", + ) + # Model configuration - parser.add_argument('--model', default='mistral-nemo:latest', help='LLM model name (default: mistral-nemo:latest)') - parser.add_argument('--embedding-model', default='bge-m3:latest', - help='Embedding model name (default: bge-m3:latest)') - parser.add_argument('--ollama-host', default='http://localhost:11434', - help='Ollama host URL (default: http://localhost:11434)') - + parser.add_argument( + "--model", + default="mistral-nemo:latest", + help="LLM model name (default: mistral-nemo:latest)", + ) + parser.add_argument( + "--embedding-model", + default="bge-m3:latest", + help="Embedding model name (default: bge-m3:latest)", + ) + parser.add_argument( + "--ollama-host", + default="http://localhost:11434", + help="Ollama host URL (default: http://localhost:11434)", + ) + # RAG configuration - parser.add_argument('--max-async', type=int, default=4, help='Maximum async operations (default: 4)') - parser.add_argument('--max-tokens', type=int, default=32768, help='Maximum token size (default: 32768)') - parser.add_argument('--embedding-dim', type=int, default=1024, - help='Embedding dimensions (default: 1024)') - parser.add_argument('--max-embed-tokens', type=int, default=8192, - help='Maximum embedding token size (default: 8192)') - + parser.add_argument( + "--max-async", type=int, default=4, help="Maximum async operations (default: 4)" + ) + parser.add_argument( + "--max-tokens", + type=int, + default=32768, + help="Maximum token size (default: 32768)", + ) + parser.add_argument( + "--embedding-dim", + type=int, + default=1024, + help="Embedding dimensions (default: 1024)", + ) + parser.add_argument( + "--max-embed-tokens", + type=int, + default=8192, + help="Maximum embedding token size (default: 8192)", + ) + # Logging configuration - parser.add_argument('--log-level', default='INFO', - choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], - help='Logging level (default: INFO)') - + parser.add_argument( + "--log-level", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Logging level (default: INFO)", + ) + return parser.parse_args() + class DocumentManager: """Handles document operations and tracking""" - - def __init__(self, input_dir: str, supported_extensions: tuple = ('.txt', '.md')): + + def __init__(self, input_dir: str, supported_extensions: tuple = (".txt", ".md")): self.input_dir = Path(input_dir) self.supported_extensions = supported_extensions self.indexed_files = set() - + # Create input directory if it doesn't exist self.input_dir.mkdir(parents=True, exist_ok=True) @@ -67,7 +104,7 @@ class DocumentManager: """Scan input directory for new files""" new_files = [] for ext in self.supported_extensions: - for file_path in self.input_dir.rglob(f'*{ext}'): + for file_path in self.input_dir.rglob(f"*{ext}"): if file_path not in self.indexed_files: new_files.append(file_path) return new_files @@ -80,6 +117,7 @@ class DocumentManager: """Check if file type is supported""" return any(filename.lower().endswith(ext) for ext in self.supported_extensions) + # Pydantic models class SearchMode(str, Enum): naive = "naive" @@ -87,31 +125,38 @@ class SearchMode(str, Enum): global_ = "global" hybrid = "hybrid" + class QueryRequest(BaseModel): query: str mode: SearchMode = SearchMode.hybrid stream: bool = False + class QueryResponse(BaseModel): response: str + class InsertTextRequest(BaseModel): text: str description: Optional[str] = None + class InsertResponse(BaseModel): status: str message: str document_count: int + def create_app(args): # Setup logging - logging.basicConfig(format="%(levelname)s:%(message)s", level=getattr(logging, args.log_level)) + logging.basicConfig( + format="%(levelname)s:%(message)s", level=getattr(logging, args.log_level) + ) # Initialize FastAPI app app = FastAPI( title="LightRAG API", - description="API for querying text using LightRAG with separate storage and input directories" + description="API for querying text using LightRAG with separate storage and input directories", ) # Create working directory if it doesn't exist @@ -127,7 +172,10 @@ def create_app(args): llm_model_name=args.model, llm_model_max_async=args.max_async, llm_model_max_token_size=args.max_tokens, - llm_model_kwargs={"host": args.ollama_host, "options": {"num_ctx": args.max_tokens}}, + llm_model_kwargs={ + "host": args.ollama_host, + "options": {"num_ctx": args.max_tokens}, + }, embedding_func=EmbeddingFunc( embedding_dim=args.embedding_dim, max_token_size=args.max_embed_tokens, @@ -136,6 +184,7 @@ def create_app(args): ), ), ) + @app.on_event("startup") async def startup_event(): """Index all files in input directory during startup""" @@ -144,7 +193,7 @@ def create_app(args): for file_path in new_files: try: # Use async file reading - async with aiofiles.open(file_path, 'r', encoding='utf-8') as f: + async with aiofiles.open(file_path, "r", encoding="utf-8") as f: content = await f.read() # Use the async version of insert directly await rag.ainsert(content) @@ -153,9 +202,9 @@ def create_app(args): except Exception as e: trace_exception(e) logging.error(f"Error indexing file {file_path}: {str(e)}") - + logging.info(f"Indexed {len(new_files)} documents from {args.input_dir}") - + except Exception as e: logging.error(f"Error during startup indexing: {str(e)}") @@ -165,21 +214,21 @@ def create_app(args): try: new_files = doc_manager.scan_directory() indexed_count = 0 - + for file_path in new_files: try: - with open(file_path, 'r', encoding='utf-8') as f: + with open(file_path, "r", encoding="utf-8") as f: content = f.read() rag.insert(content) doc_manager.mark_as_indexed(file_path) indexed_count += 1 except Exception as e: logging.error(f"Error indexing file {file_path}: {str(e)}") - + return { "status": "success", "indexed_count": indexed_count, - "total_documents": len(doc_manager.indexed_files) + "total_documents": len(doc_manager.indexed_files), } except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -191,23 +240,23 @@ def create_app(args): if not doc_manager.is_supported_file(file.filename): raise HTTPException( status_code=400, - detail=f"Unsupported file type. Supported types: {doc_manager.supported_extensions}" + detail=f"Unsupported file type. Supported types: {doc_manager.supported_extensions}", ) - + file_path = doc_manager.input_dir / file.filename with open(file_path, "wb") as buffer: shutil.copyfileobj(file.file, buffer) - + # Immediately index the uploaded file with open(file_path, "r", encoding="utf-8") as f: content = f.read() rag.insert(content) doc_manager.mark_as_indexed(file_path) - + return { "status": "success", "message": f"File uploaded and indexed: {file.filename}", - "total_documents": len(doc_manager.indexed_files) + "total_documents": len(doc_manager.indexed_files), } except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -217,9 +266,9 @@ def create_app(args): try: response = await rag.aquery( request.query, - param=QueryParam(mode=request.mode, stream=request.stream) + param=QueryParam(mode=request.mode, stream=request.stream), ) - + if request.stream: result = "" async for chunk in response: @@ -234,14 +283,13 @@ def create_app(args): async def query_text_stream(request: QueryRequest): try: response = rag.query( - request.query, - param=QueryParam(mode=request.mode, stream=True) + request.query, param=QueryParam(mode=request.mode, stream=True) ) - + async def stream_generator(): async for chunk in response: yield chunk - + return stream_generator() except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -253,32 +301,29 @@ def create_app(args): return InsertResponse( status="success", message="Text successfully inserted", - document_count=len(rag) + document_count=len(rag), ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.post("/documents/file", response_model=InsertResponse) - async def insert_file( - file: UploadFile = File(...), - description: str = Form(None) - ): + async def insert_file(file: UploadFile = File(...), description: str = Form(None)): try: content = await file.read() - - if file.filename.endswith(('.txt', '.md')): - text = content.decode('utf-8') + + if file.filename.endswith((".txt", ".md")): + text = content.decode("utf-8") rag.insert(text) else: raise HTTPException( status_code=400, - detail="Unsupported file type. Only .txt and .md files are supported" + detail="Unsupported file type. Only .txt and .md files are supported", ) - + return InsertResponse( status="success", message=f"File '{file.filename}' successfully inserted", - document_count=len(rag) + document_count=len(rag), ) except UnicodeDecodeError: raise HTTPException(status_code=400, detail="File encoding not supported") @@ -290,27 +335,27 @@ def create_app(args): try: inserted_count = 0 failed_files = [] - + for file in files: try: content = await file.read() - if file.filename.endswith(('.txt', '.md')): - text = content.decode('utf-8') + if file.filename.endswith((".txt", ".md")): + text = content.decode("utf-8") rag.insert(text) inserted_count += 1 else: failed_files.append(f"{file.filename} (unsupported type)") except Exception as e: failed_files.append(f"{file.filename} ({str(e)})") - + status_message = f"Successfully inserted {inserted_count} documents" if failed_files: status_message += f". Failed files: {', '.join(failed_files)}" - + return InsertResponse( status="success" if inserted_count > 0 else "partial_success", message=status_message, - document_count=len(rag) + document_count=len(rag), ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -324,12 +369,11 @@ def create_app(args): return InsertResponse( status="success", message="All documents cleared successfully", - document_count=0 + document_count=0, ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) - @app.get("/health") async def get_status(): """Get current system status""" @@ -342,14 +386,16 @@ def create_app(args): "model": args.model, "embedding_model": args.embedding_model, "max_tokens": args.max_tokens, - "ollama_host": args.ollama_host - } + "ollama_host": args.ollama_host, + }, } return app + if __name__ == "__main__": args = parse_args() import uvicorn + app = create_app(args) uvicorn.run(app, host=args.host, port=args.port) diff --git a/api/openai_lightrag_server.py b/api/openai_lightrag_server.py index 6d8eb8fc..4746b2e3 100644 --- a/api/openai_lightrag_server.py +++ b/api/openai_lightrag_server.py @@ -1,8 +1,6 @@ from fastapi import FastAPI, HTTPException, File, UploadFile, Form -from fastapi.responses import JSONResponse from pydantic import BaseModel import asyncio -import os import logging import argparse from lightrag import LightRAG, QueryParam @@ -13,53 +11,81 @@ from enum import Enum from pathlib import Path import shutil import aiofiles -from ascii_colors import ASCIIColors, trace_exception -import numpy as np +from ascii_colors import trace_exception import nest_asyncio # Apply nest_asyncio to solve event loop issues nest_asyncio.apply() + def parse_args(): parser = argparse.ArgumentParser( description="LightRAG FastAPI Server with OpenAI integration" ) # Server configuration - parser.add_argument('--host', default='0.0.0.0', help='Server host (default: 0.0.0.0)') - parser.add_argument('--port', type=int, default=9621, help='Server port (default: 9621)') - + parser.add_argument( + "--host", default="0.0.0.0", help="Server host (default: 0.0.0.0)" + ) + parser.add_argument( + "--port", type=int, default=9621, help="Server port (default: 9621)" + ) + # Directory configuration - parser.add_argument('--working-dir', default='./rag_storage', - help='Working directory for RAG storage (default: ./rag_storage)') - parser.add_argument('--input-dir', default='./inputs', - help='Directory containing input documents (default: ./inputs)') - + parser.add_argument( + "--working-dir", + default="./rag_storage", + help="Working directory for RAG storage (default: ./rag_storage)", + ) + parser.add_argument( + "--input-dir", + default="./inputs", + help="Directory containing input documents (default: ./inputs)", + ) + # Model configuration - parser.add_argument('--model', default='gpt-4', help='OpenAI model name (default: gpt-4)') - parser.add_argument('--embedding-model', default='text-embedding-3-large', - help='OpenAI embedding model (default: text-embedding-3-large)') - + parser.add_argument( + "--model", default="gpt-4", help="OpenAI model name (default: gpt-4)" + ) + parser.add_argument( + "--embedding-model", + default="text-embedding-3-large", + help="OpenAI embedding model (default: text-embedding-3-large)", + ) + # RAG configuration - parser.add_argument('--max-tokens', type=int, default=32768, help='Maximum token size (default: 32768)') - parser.add_argument('--max-embed-tokens', type=int, default=8192, - help='Maximum embedding token size (default: 8192)') - + parser.add_argument( + "--max-tokens", + type=int, + default=32768, + help="Maximum token size (default: 32768)", + ) + parser.add_argument( + "--max-embed-tokens", + type=int, + default=8192, + help="Maximum embedding token size (default: 8192)", + ) + # Logging configuration - parser.add_argument('--log-level', default='INFO', - choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'], - help='Logging level (default: INFO)') - + parser.add_argument( + "--log-level", + default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="Logging level (default: INFO)", + ) + return parser.parse_args() + class DocumentManager: """Handles document operations and tracking""" - - def __init__(self, input_dir: str, supported_extensions: tuple = ('.txt', '.md')): + + def __init__(self, input_dir: str, supported_extensions: tuple = (".txt", ".md")): self.input_dir = Path(input_dir) self.supported_extensions = supported_extensions self.indexed_files = set() - + # Create input directory if it doesn't exist self.input_dir.mkdir(parents=True, exist_ok=True) @@ -67,7 +93,7 @@ class DocumentManager: """Scan input directory for new files""" new_files = [] for ext in self.supported_extensions: - for file_path in self.input_dir.rglob(f'*{ext}'): + for file_path in self.input_dir.rglob(f"*{ext}"): if file_path not in self.indexed_files: new_files.append(file_path) return new_files @@ -80,6 +106,7 @@ class DocumentManager: """Check if file type is supported""" return any(filename.lower().endswith(ext) for ext in self.supported_extensions) + # Pydantic models class SearchMode(str, Enum): naive = "naive" @@ -87,37 +114,45 @@ class SearchMode(str, Enum): global_ = "global" hybrid = "hybrid" + class QueryRequest(BaseModel): query: str mode: SearchMode = SearchMode.hybrid stream: bool = False + class QueryResponse(BaseModel): response: str + class InsertTextRequest(BaseModel): text: str description: Optional[str] = None + class InsertResponse(BaseModel): status: str message: str document_count: int + async def get_embedding_dim(embedding_model: str) -> int: """Get embedding dimensions for the specified model""" test_text = ["This is a test sentence."] embedding = await openai_embedding(test_text, model=embedding_model) return embedding.shape[1] + def create_app(args): # Setup logging - logging.basicConfig(format="%(levelname)s:%(message)s", level=getattr(logging, args.log_level)) + logging.basicConfig( + format="%(levelname)s:%(message)s", level=getattr(logging, args.log_level) + ) # Initialize FastAPI app app = FastAPI( title="LightRAG API", - description="API for querying text using LightRAG with OpenAI integration" + description="API for querying text using LightRAG with OpenAI integration", ) # Create working directory if it doesn't exist @@ -129,6 +164,18 @@ def create_app(args): # Get embedding dimensions embedding_dim = asyncio.run(get_embedding_dim(args.embedding_model)) + async def async_openai_complete( + prompt, system_prompt=None, history_messages=[], **kwargs + ): + """Async wrapper for OpenAI completion""" + return await openai_complete_if_cache( + args.model, + prompt, + system_prompt=system_prompt, + history_messages=history_messages, + **kwargs, + ) + # Initialize RAG with OpenAI configuration rag = LightRAG( working_dir=args.working_dir, @@ -142,15 +189,6 @@ def create_app(args): ), ) - async def async_openai_complete(prompt, system_prompt=None, history_messages=[], **kwargs): - """Async wrapper for OpenAI completion""" - return await openai_complete_if_cache( - args.model, - prompt, - system_prompt=system_prompt, - history_messages=history_messages, - **kwargs - ) @app.on_event("startup") async def startup_event(): """Index all files in input directory during startup""" @@ -159,7 +197,7 @@ def create_app(args): for file_path in new_files: try: # Use async file reading - async with aiofiles.open(file_path, 'r', encoding='utf-8') as f: + async with aiofiles.open(file_path, "r", encoding="utf-8") as f: content = await f.read() # Use the async version of insert directly await rag.ainsert(content) @@ -168,9 +206,9 @@ def create_app(args): except Exception as e: trace_exception(e) logging.error(f"Error indexing file {file_path}: {str(e)}") - + logging.info(f"Indexed {len(new_files)} documents from {args.input_dir}") - + except Exception as e: logging.error(f"Error during startup indexing: {str(e)}") @@ -180,21 +218,21 @@ def create_app(args): try: new_files = doc_manager.scan_directory() indexed_count = 0 - + for file_path in new_files: try: - with open(file_path, 'r', encoding='utf-8') as f: + with open(file_path, "r", encoding="utf-8") as f: content = f.read() rag.insert(content) doc_manager.mark_as_indexed(file_path) indexed_count += 1 except Exception as e: logging.error(f"Error indexing file {file_path}: {str(e)}") - + return { "status": "success", "indexed_count": indexed_count, - "total_documents": len(doc_manager.indexed_files) + "total_documents": len(doc_manager.indexed_files), } except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -206,23 +244,23 @@ def create_app(args): if not doc_manager.is_supported_file(file.filename): raise HTTPException( status_code=400, - detail=f"Unsupported file type. Supported types: {doc_manager.supported_extensions}" + detail=f"Unsupported file type. Supported types: {doc_manager.supported_extensions}", ) - + file_path = doc_manager.input_dir / file.filename with open(file_path, "wb") as buffer: shutil.copyfileobj(file.file, buffer) - + # Immediately index the uploaded file with open(file_path, "r", encoding="utf-8") as f: content = f.read() rag.insert(content) doc_manager.mark_as_indexed(file_path) - + return { "status": "success", "message": f"File uploaded and indexed: {file.filename}", - "total_documents": len(doc_manager.indexed_files) + "total_documents": len(doc_manager.indexed_files), } except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -232,9 +270,9 @@ def create_app(args): try: response = await rag.aquery( request.query, - param=QueryParam(mode=request.mode, stream=request.stream) + param=QueryParam(mode=request.mode, stream=request.stream), ) - + if request.stream: result = "" async for chunk in response: @@ -249,14 +287,13 @@ def create_app(args): async def query_text_stream(request: QueryRequest): try: response = rag.query( - request.query, - param=QueryParam(mode=request.mode, stream=True) + request.query, param=QueryParam(mode=request.mode, stream=True) ) - + async def stream_generator(): async for chunk in response: yield chunk - + return stream_generator() except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -268,32 +305,29 @@ def create_app(args): return InsertResponse( status="success", message="Text successfully inserted", - document_count=len(rag) + document_count=len(rag), ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.post("/documents/file", response_model=InsertResponse) - async def insert_file( - file: UploadFile = File(...), - description: str = Form(None) - ): + async def insert_file(file: UploadFile = File(...), description: str = Form(None)): try: content = await file.read() - - if file.filename.endswith(('.txt', '.md')): - text = content.decode('utf-8') + + if file.filename.endswith((".txt", ".md")): + text = content.decode("utf-8") rag.insert(text) else: raise HTTPException( status_code=400, - detail="Unsupported file type. Only .txt and .md files are supported" + detail="Unsupported file type. Only .txt and .md files are supported", ) - + return InsertResponse( status="success", message=f"File '{file.filename}' successfully inserted", - document_count=len(rag) + document_count=len(rag), ) except UnicodeDecodeError: raise HTTPException(status_code=400, detail="File encoding not supported") @@ -305,27 +339,27 @@ def create_app(args): try: inserted_count = 0 failed_files = [] - + for file in files: try: content = await file.read() - if file.filename.endswith(('.txt', '.md')): - text = content.decode('utf-8') + if file.filename.endswith((".txt", ".md")): + text = content.decode("utf-8") rag.insert(text) inserted_count += 1 else: failed_files.append(f"{file.filename} (unsupported type)") except Exception as e: failed_files.append(f"{file.filename} ({str(e)})") - + status_message = f"Successfully inserted {inserted_count} documents" if failed_files: status_message += f". Failed files: {', '.join(failed_files)}" - + return InsertResponse( status="success" if inserted_count > 0 else "partial_success", message=status_message, - document_count=len(rag) + document_count=len(rag), ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -339,7 +373,7 @@ def create_app(args): return InsertResponse( status="success", message="All documents cleared successfully", - document_count=0 + document_count=0, ) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @@ -356,14 +390,16 @@ def create_app(args): "model": args.model, "embedding_model": args.embedding_model, "max_tokens": args.max_tokens, - "embedding_dim": embedding_dim - } + "embedding_dim": embedding_dim, + }, } return app + if __name__ == "__main__": args = parse_args() import uvicorn + app = create_app(args) uvicorn.run(app, host=args.host, port=args.port) diff --git a/api/requirements.txt b/api/requirements.txt index 95aa72e5..6871ffd8 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -1,4 +1,4 @@ +ascii_colors fastapi -uvicorn python-multipart -ascii_colors \ No newline at end of file +uvicorn