From 2266399da612c67f8e5036f186f8d35f53a3e1a3 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Wed, 18 Dec 2024 01:37:16 +0100 Subject: [PATCH] working server --- api/README.md | 21 ++++++--------------- api/ollama_lightrag_server.py | 14 ++++++++------ api/requirements.txt | 3 ++- 3 files changed, 16 insertions(+), 22 deletions(-) diff --git a/api/README.md b/api/README.md index a1843052..04be5ac3 100644 --- a/api/README.md +++ b/api/README.md @@ -56,13 +56,13 @@ Available options: |-----------|---------|-------------| | --host | 0.0.0.0 | Server host | | --port | 9621 | Server port | -| --model | gemma2:2b | LLM model name | -| --embedding-model | nomic-embed-text | Embedding model name | +| --model | mistral-nemo:latest | LLM model name | +| --embedding-model | bge-m3:latest | Embedding model name | | --ollama-host | http://localhost:11434 | Ollama host URL | -| --working-dir | ./dickens | Working directory for RAG | +| --working-dir | ./rag_storage | Working directory for RAG | | --max-async | 4 | Maximum async operations | | --max-tokens | 32768 | Maximum token size | -| --embedding-dim | 768 | Embedding dimensions | +| --embedding-dim | 1024 | Embedding dimensions | | --max-embed-tokens | 8192 | Maximum embedding token size | | --input-file | ./book.txt | Initial input file | | --log-level | INFO | Logging level | @@ -79,11 +79,11 @@ python ollama_lightrag_server.py python ollama_lightrag_server.py --model llama2:13b --port 8080 --working-dir ./custom_rag ``` +Make sure the models are installed in your ollama instance ```bash -python ollama_lightrag_server.py --model mistral-nemo:latest --embedding-dim 1024 --embedding-model bge-m3 +python ollama_lightrag_server.py --model mistral-nemo:latest --embedding-model bge-m3 --embedding-dim 1024 ``` - ## API Endpoints ### Query Endpoints @@ -165,13 +165,6 @@ When the server is running, visit: - Swagger UI: http://localhost:9621/docs - ReDoc: http://localhost:9621/redoc -## Contributing - -1. Fork the repository -2. Create your feature branch (`git checkout -b feature/AmazingFeature`) -3. Commit your changes (`git commit -m 'Add some AmazingFeature'`) -4. Push to the branch (`git push origin feature/AmazingFeature`) -5. Open a Pull Request ## License @@ -182,5 +175,3 @@ This project is licensed under the MIT License - see the LICENSE file for detail - Built with [FastAPI](https://fastapi.tiangolo.com/) - Uses [LightRAG](https://github.com/HKUDS/LightRAG) for document processing - Powered by [Ollama](https://ollama.ai/) for LLM inference - -## Support diff --git a/api/ollama_lightrag_server.py b/api/ollama_lightrag_server.py index 1c463d57..4aab6f8c 100644 --- a/api/ollama_lightrag_server.py +++ b/api/ollama_lightrag_server.py @@ -13,6 +13,7 @@ from enum import Enum from pathlib import Path import shutil import aiofiles +from ascii_colors import ASCIIColors, trace_exception def parse_args(): parser = argparse.ArgumentParser( @@ -30,17 +31,17 @@ def parse_args(): help='Directory containing input documents (default: ./inputs)') # Model configuration - parser.add_argument('--model', default='gemma2:2b', help='LLM model name (default: gemma2:2b)') - parser.add_argument('--embedding-model', default='nomic-embed-text', - help='Embedding model name (default: nomic-embed-text)') + parser.add_argument('--model', default='mistral-nemo:latest', help='LLM model name (default: mistral-nemo:latest)') + parser.add_argument('--embedding-model', default='bge-m3:latest', + help='Embedding model name (default: bge-m3:latest)') parser.add_argument('--ollama-host', default='http://localhost:11434', help='Ollama host URL (default: http://localhost:11434)') # RAG configuration parser.add_argument('--max-async', type=int, default=4, help='Maximum async operations (default: 4)') parser.add_argument('--max-tokens', type=int, default=32768, help='Maximum token size (default: 32768)') - parser.add_argument('--embedding-dim', type=int, default=768, - help='Embedding dimensions (default: 768)') + parser.add_argument('--embedding-dim', type=int, default=1024, + help='Embedding dimensions (default: 1024)') parser.add_argument('--max-embed-tokens', type=int, default=8192, help='Maximum embedding token size (default: 8192)') @@ -150,6 +151,7 @@ def create_app(args): doc_manager.mark_as_indexed(file_path) logging.info(f"Indexed file: {file_path}") except Exception as e: + trace_exception(e) logging.error(f"Error indexing file {file_path}: {str(e)}") logging.info(f"Indexed {len(new_files)} documents from {args.input_dir}") @@ -328,7 +330,7 @@ def create_app(args): raise HTTPException(status_code=500, detail=str(e)) - @app.get("/status") + @app.get("/health") async def get_status(): """Get current system status""" return { diff --git a/api/requirements.txt b/api/requirements.txt index 6d7503ce..95aa72e5 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -1,3 +1,4 @@ fastapi uvicorn -python-multipart \ No newline at end of file +python-multipart +ascii_colors \ No newline at end of file