From 108fc4a1ee3e65ee9a28018ec60eea94956f5df4 Mon Sep 17 00:00:00 2001 From: Andrii Lazarchuk Date: Mon, 21 Oct 2024 11:53:06 +0000 Subject: [PATCH 1/6] Add ability to passadditional parameters to ollama library like host and timeout --- .gitignore | 121 +++++++++++++++++++++++++++++++ examples/lightrag_ollama_demo.py | 31 +++++--- lightrag/lightrag.py | 3 +- lightrag/llm.py | 9 ++- 4 files changed, 151 insertions(+), 13 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..422c67ce --- /dev/null +++ b/.gitignore @@ -0,0 +1,121 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +*.egg +*.egg-info/ +dist/ +build/ +*.whl + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.env.* +.venv +.venv.* +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyderworkspace + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# Example files +book.txt +dickens/ diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py index a2d04aa6..dfda26e6 100644 --- a/examples/lightrag_ollama_demo.py +++ b/examples/lightrag_ollama_demo.py @@ -1,4 +1,7 @@ import os +import logging + +logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG) from lightrag import LightRAG, QueryParam from lightrag.llm import ollama_model_complete, ollama_embedding @@ -11,15 +14,17 @@ if not os.path.exists(WORKING_DIR): rag = LightRAG( working_dir=WORKING_DIR, - llm_model_func=ollama_model_complete, - llm_model_name='your_model_name', + tiktoken_model_name="mistral:7b", + llm_model_func=ollama_model_complete, + llm_model_name="mistral:7b", + llm_model_max_async=2, + llm_model_kwargs={"host": "http://localhost:11434"}, embedding_func=EmbeddingFunc( embedding_dim=768, max_token_size=8192, func=lambda texts: ollama_embedding( - texts, - embed_model="nomic-embed-text" - ) + texts, embed_model="nomic-embed-text", host="http://localhost:11434" + ), ), ) @@ -28,13 +33,21 @@ with open("./book.txt") as f: rag.insert(f.read()) # Perform naive search -print(rag.query("What are the top themes in this story?", param=QueryParam(mode="naive"))) +print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="naive")) +) # Perform local search -print(rag.query("What are the top themes in this story?", param=QueryParam(mode="local"))) +print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="local")) +) # Perform global search -print(rag.query("What are the top themes in this story?", param=QueryParam(mode="global"))) +print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="global")) +) # Perform hybrid search -print(rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid"))) +print( + rag.query("What are the top themes in this story?", param=QueryParam(mode="hybrid")) +) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 83312ef6..c3e5cdab 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -86,6 +86,7 @@ class LightRAG: llm_model_name: str = 'meta-llama/Llama-3.2-1B-Instruct'#'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it' llm_model_max_token_size: int = 32768 llm_model_max_async: int = 16 + llm_model_kwargs: dict = field(default_factory=dict) # storage key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage @@ -158,7 +159,7 @@ class LightRAG: ) self.llm_model_func = limit_async_func_call(self.llm_model_max_async)( - partial(self.llm_model_func, hashing_kv=self.llm_response_cache) + partial(self.llm_model_func, hashing_kv=self.llm_response_cache, **self.llm_model_kwargs) ) def insert(self, string_or_strings): diff --git a/lightrag/llm.py b/lightrag/llm.py index 7328a583..aac384d9 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -98,8 +98,10 @@ async def ollama_model_if_cache( ) -> str: kwargs.pop("max_tokens", None) kwargs.pop("response_format", None) + host = kwargs.pop("host", None) + timeout = kwargs.pop("timeout", None) - ollama_client = ollama.AsyncClient() + ollama_client = ollama.AsyncClient(host=host, timeout=timeout) messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) @@ -193,10 +195,11 @@ async def hf_embedding(texts: list[str], tokenizer, embed_model) -> np.ndarray: embeddings = outputs.last_hidden_state.mean(dim=1) return embeddings.detach().numpy() -async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray: +async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarray: embed_text = [] + ollama_client = ollama.Client(**kwargs) for text in texts: - data = ollama.embeddings(model=embed_model, prompt=text) + data = ollama_client.embeddings(model=embed_model, prompt=text) embed_text.append(data["embedding"]) return embed_text From 25a2dd41c1e39801f029fcd9fb128b4d8b45356d Mon Sep 17 00:00:00 2001 From: Andrii Lazarchuk Date: Mon, 21 Oct 2024 11:53:06 +0000 Subject: [PATCH 2/6] Add ability to passadditional parameters to ollama library like host and timeout --- .gitignore | 3 ++- examples/lightrag_ollama_demo.py | 3 +++ lightrag/lightrag.py | 3 ++- lightrag/llm.py | 9 ++++++--- 4 files changed, 13 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 5a41ae32..9ce353de 100644 --- a/.gitignore +++ b/.gitignore @@ -4,4 +4,5 @@ dickens/ book.txt lightrag-dev/ .idea/ -dist/ \ No newline at end of file +dist/ +.venv/ \ No newline at end of file diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py index c61b71c0..f968d26e 100644 --- a/examples/lightrag_ollama_demo.py +++ b/examples/lightrag_ollama_demo.py @@ -1,4 +1,7 @@ import os +import logging + +logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG) from lightrag import LightRAG, QueryParam from lightrag.llm import ollama_model_complete, ollama_embedding diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 5137af42..d4b1eaa1 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -88,6 +88,7 @@ class LightRAG: llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it' llm_model_max_token_size: int = 32768 llm_model_max_async: int = 16 + llm_model_kwargs: dict = field(default_factory=dict) # storage key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage @@ -154,7 +155,7 @@ class LightRAG: ) self.llm_model_func = limit_async_func_call(self.llm_model_max_async)( - partial(self.llm_model_func, hashing_kv=self.llm_response_cache) + partial(self.llm_model_func, hashing_kv=self.llm_response_cache, **self.llm_model_kwargs) ) def insert(self, string_or_strings): diff --git a/lightrag/llm.py b/lightrag/llm.py index be801e0c..aa818995 100644 --- a/lightrag/llm.py +++ b/lightrag/llm.py @@ -222,8 +222,10 @@ async def ollama_model_if_cache( ) -> str: kwargs.pop("max_tokens", None) kwargs.pop("response_format", None) + host = kwargs.pop("host", None) + timeout = kwargs.pop("timeout", None) - ollama_client = ollama.AsyncClient() + ollama_client = ollama.AsyncClient(host=host, timeout=timeout) messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) @@ -415,10 +417,11 @@ async def hf_embedding(texts: list[str], tokenizer, embed_model) -> np.ndarray: return embeddings.detach().numpy() -async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray: +async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarray: embed_text = [] + ollama_client = ollama.Client(**kwargs) for text in texts: - data = ollama.embeddings(model=embed_model, prompt=text) + data = ollama_client.embeddings(model=embed_model, prompt=text) embed_text.append(data["embedding"]) return embed_text From e54d0536c46d4ecf49f86746109ea6e08505017c Mon Sep 17 00:00:00 2001 From: Andrii Lazarchuk Date: Mon, 21 Oct 2024 13:53:28 +0000 Subject: [PATCH 3/6] Small fix on demo --- examples/lightrag_ollama_demo.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py index dfda26e6..93196066 100644 --- a/examples/lightrag_ollama_demo.py +++ b/examples/lightrag_ollama_demo.py @@ -1,7 +1,7 @@ import os import logging -logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.DEBUG) +logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) from lightrag import LightRAG, QueryParam from lightrag.llm import ollama_model_complete, ollama_embedding @@ -14,7 +14,6 @@ if not os.path.exists(WORKING_DIR): rag = LightRAG( working_dir=WORKING_DIR, - tiktoken_model_name="mistral:7b", llm_model_func=ollama_model_complete, llm_model_name="mistral:7b", llm_model_max_async=2, From 1d24eaf656990fe040ac5c78b93e615a2a5e81fa Mon Sep 17 00:00:00 2001 From: Andrii Lazarchuk Date: Tue, 22 Oct 2024 14:35:42 +0000 Subject: [PATCH 4/6] Finetune example to be able to run ollama example without need to tweak context size in Modelfile --- examples/lightrag_ollama_demo.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py index 93196066..6070131f 100644 --- a/examples/lightrag_ollama_demo.py +++ b/examples/lightrag_ollama_demo.py @@ -15,9 +15,10 @@ if not os.path.exists(WORKING_DIR): rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=ollama_model_complete, - llm_model_name="mistral:7b", - llm_model_max_async=2, - llm_model_kwargs={"host": "http://localhost:11434"}, + llm_model_name="gemma2:2b", + llm_model_max_async=4, + llm_model_max_token_size=32768, + llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 32768}}, embedding_func=EmbeddingFunc( embedding_dim=768, max_token_size=8192, @@ -27,7 +28,6 @@ rag = LightRAG( ), ) - with open("./book.txt") as f: rag.insert(f.read()) From 84b60e4aa687bc313115015c0376e94535ddf592 Mon Sep 17 00:00:00 2001 From: Andrii Lazarchuk Date: Mon, 28 Oct 2024 17:05:38 +0200 Subject: [PATCH 5/6] Fix lint issue --- examples/lightrag_ollama_demo.py | 5 ++--- lightrag/lightrag.py | 6 +++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/examples/lightrag_ollama_demo.py b/examples/lightrag_ollama_demo.py index 0a704024..1a320d13 100644 --- a/examples/lightrag_ollama_demo.py +++ b/examples/lightrag_ollama_demo.py @@ -1,14 +1,13 @@ import os import logging - -logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) - from lightrag import LightRAG, QueryParam from lightrag.llm import ollama_model_complete, ollama_embedding from lightrag.utils import EmbeddingFunc WORKING_DIR = "./dickens" +logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) + if not os.path.exists(WORKING_DIR): os.mkdir(WORKING_DIR) diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 955651fb..89ee1df5 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -155,7 +155,11 @@ class LightRAG: ) self.llm_model_func = limit_async_func_call(self.llm_model_max_async)( - partial(self.llm_model_func, hashing_kv=self.llm_response_cache, **self.llm_model_kwargs) + partial( + self.llm_model_func, + hashing_kv=self.llm_response_cache, + **self.llm_model_kwargs, + ) ) def insert(self, string_or_strings): From 6b80237805e6a5986387c2e674bf609214577079 Mon Sep 17 00:00:00 2001 From: Andrii Lazarchuk Date: Mon, 28 Oct 2024 19:05:59 +0200 Subject: [PATCH 6/6] Update README with more details --- README.md | 46 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 15696b57..683dd0b2 100644 --- a/README.md +++ b/README.md @@ -163,7 +163,10 @@ rag = LightRAG(
Using Ollama Models -* If you want to use Ollama models, you only need to set LightRAG as follows: +### Overview +If you want to use Ollama models, you need to pull model you plan to use and embedding model, for example `nomic-embed-text`. + +Then you only need to set LightRAG as follows: ```python from lightrag.llm import ollama_model_complete, ollama_embedding @@ -185,28 +188,59 @@ rag = LightRAG( ) ``` -* Increasing the `num_ctx` parameter: +### Increasing context size +In order for LightRAG to work context should be at least 32k tokens. By default Ollama models have context size of 8k. You can achieve this using one of two ways: + +#### Increasing the `num_ctx` parameter in Modelfile. 1. Pull the model: -```python +```bash ollama pull qwen2 ``` 2. Display the model file: -```python +```bash ollama show --modelfile qwen2 > Modelfile ``` 3. Edit the Modelfile by adding the following line: -```python +```bash PARAMETER num_ctx 32768 ``` 4. Create the modified model: -```python +```bash ollama create -f Modelfile qwen2m ``` +#### Setup `num_ctx` via Ollama API. +Tiy can use `llm_model_kwargs` param to configure ollama: + +```python +rag = LightRAG( + working_dir=WORKING_DIR, + llm_model_func=ollama_model_complete, # Use Ollama model for text generation + llm_model_name='your_model_name', # Your model name + llm_model_kwargs={"options": {"num_ctx": 32768}}, + # Use Ollama embedding function + embedding_func=EmbeddingFunc( + embedding_dim=768, + max_token_size=8192, + func=lambda texts: ollama_embedding( + texts, + embed_model="nomic-embed-text" + ) + ), +) +``` +#### Fully functional example + +There fully functional example `examples/lightrag_ollama_demo.py` that utilizes `gemma2:2b` model, runs only 4 requests in parallel and set context size to 32k. + +#### Low RAM GPUs + +In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`. +
### Query Param