Merge pull request #5 from alazarchuk/before-sync-28-10-2024
Add ability to pass additional parameters to ollama library
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -5,3 +5,4 @@ book.txt
|
|||||||
lightrag-dev/
|
lightrag-dev/
|
||||||
.idea/
|
.idea/
|
||||||
dist/
|
dist/
|
||||||
|
.venv/
|
||||||
|
46
README.md
46
README.md
@@ -163,7 +163,10 @@ rag = LightRAG(
|
|||||||
<details>
|
<details>
|
||||||
<summary> Using Ollama Models </summary>
|
<summary> Using Ollama Models </summary>
|
||||||
|
|
||||||
* If you want to use Ollama models, you only need to set LightRAG as follows:
|
### Overview
|
||||||
|
If you want to use Ollama models, you need to pull model you plan to use and embedding model, for example `nomic-embed-text`.
|
||||||
|
|
||||||
|
Then you only need to set LightRAG as follows:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from lightrag.llm import ollama_model_complete, ollama_embedding
|
from lightrag.llm import ollama_model_complete, ollama_embedding
|
||||||
@@ -185,28 +188,59 @@ rag = LightRAG(
|
|||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
* Increasing the `num_ctx` parameter:
|
### Increasing context size
|
||||||
|
In order for LightRAG to work context should be at least 32k tokens. By default Ollama models have context size of 8k. You can achieve this using one of two ways:
|
||||||
|
|
||||||
|
#### Increasing the `num_ctx` parameter in Modelfile.
|
||||||
|
|
||||||
1. Pull the model:
|
1. Pull the model:
|
||||||
```python
|
```bash
|
||||||
ollama pull qwen2
|
ollama pull qwen2
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Display the model file:
|
2. Display the model file:
|
||||||
```python
|
```bash
|
||||||
ollama show --modelfile qwen2 > Modelfile
|
ollama show --modelfile qwen2 > Modelfile
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Edit the Modelfile by adding the following line:
|
3. Edit the Modelfile by adding the following line:
|
||||||
```python
|
```bash
|
||||||
PARAMETER num_ctx 32768
|
PARAMETER num_ctx 32768
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Create the modified model:
|
4. Create the modified model:
|
||||||
```python
|
```bash
|
||||||
ollama create -f Modelfile qwen2m
|
ollama create -f Modelfile qwen2m
|
||||||
```
|
```
|
||||||
|
|
||||||
|
#### Setup `num_ctx` via Ollama API.
|
||||||
|
Tiy can use `llm_model_kwargs` param to configure ollama:
|
||||||
|
|
||||||
|
```python
|
||||||
|
rag = LightRAG(
|
||||||
|
working_dir=WORKING_DIR,
|
||||||
|
llm_model_func=ollama_model_complete, # Use Ollama model for text generation
|
||||||
|
llm_model_name='your_model_name', # Your model name
|
||||||
|
llm_model_kwargs={"options": {"num_ctx": 32768}},
|
||||||
|
# Use Ollama embedding function
|
||||||
|
embedding_func=EmbeddingFunc(
|
||||||
|
embedding_dim=768,
|
||||||
|
max_token_size=8192,
|
||||||
|
func=lambda texts: ollama_embedding(
|
||||||
|
texts,
|
||||||
|
embed_model="nomic-embed-text"
|
||||||
|
)
|
||||||
|
),
|
||||||
|
)
|
||||||
|
```
|
||||||
|
#### Fully functional example
|
||||||
|
|
||||||
|
There fully functional example `examples/lightrag_ollama_demo.py` that utilizes `gemma2:2b` model, runs only 4 requests in parallel and set context size to 32k.
|
||||||
|
|
||||||
|
#### Low RAM GPUs
|
||||||
|
|
||||||
|
In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.
|
||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
### Query Param
|
### Query Param
|
||||||
|
@@ -1,26 +1,32 @@
|
|||||||
import os
|
import os
|
||||||
|
import logging
|
||||||
from lightrag import LightRAG, QueryParam
|
from lightrag import LightRAG, QueryParam
|
||||||
from lightrag.llm import ollama_model_complete, ollama_embedding
|
from lightrag.llm import ollama_model_complete, ollama_embedding
|
||||||
from lightrag.utils import EmbeddingFunc
|
from lightrag.utils import EmbeddingFunc
|
||||||
|
|
||||||
WORKING_DIR = "./dickens"
|
WORKING_DIR = "./dickens"
|
||||||
|
|
||||||
|
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
|
||||||
|
|
||||||
if not os.path.exists(WORKING_DIR):
|
if not os.path.exists(WORKING_DIR):
|
||||||
os.mkdir(WORKING_DIR)
|
os.mkdir(WORKING_DIR)
|
||||||
|
|
||||||
rag = LightRAG(
|
rag = LightRAG(
|
||||||
working_dir=WORKING_DIR,
|
working_dir=WORKING_DIR,
|
||||||
llm_model_func=ollama_model_complete,
|
llm_model_func=ollama_model_complete,
|
||||||
llm_model_name="your_model_name",
|
llm_model_name="gemma2:2b",
|
||||||
|
llm_model_max_async=4,
|
||||||
|
llm_model_max_token_size=32768,
|
||||||
|
llm_model_kwargs={"host": "http://localhost:11434", "options": {"num_ctx": 32768}},
|
||||||
embedding_func=EmbeddingFunc(
|
embedding_func=EmbeddingFunc(
|
||||||
embedding_dim=768,
|
embedding_dim=768,
|
||||||
max_token_size=8192,
|
max_token_size=8192,
|
||||||
func=lambda texts: ollama_embedding(texts, embed_model="nomic-embed-text"),
|
func=lambda texts: ollama_embedding(
|
||||||
|
texts, embed_model="nomic-embed-text", host="http://localhost:11434"
|
||||||
|
),
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
with open("./book.txt", "r", encoding="utf-8") as f:
|
with open("./book.txt", "r", encoding="utf-8") as f:
|
||||||
rag.insert(f.read())
|
rag.insert(f.read())
|
||||||
|
|
||||||
|
@@ -88,6 +88,7 @@ class LightRAG:
|
|||||||
llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
|
llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
|
||||||
llm_model_max_token_size: int = 32768
|
llm_model_max_token_size: int = 32768
|
||||||
llm_model_max_async: int = 16
|
llm_model_max_async: int = 16
|
||||||
|
llm_model_kwargs: dict = field(default_factory=dict)
|
||||||
|
|
||||||
# storage
|
# storage
|
||||||
key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage
|
key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage
|
||||||
@@ -154,7 +155,11 @@ class LightRAG:
|
|||||||
)
|
)
|
||||||
|
|
||||||
self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
|
self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
|
||||||
partial(self.llm_model_func, hashing_kv=self.llm_response_cache)
|
partial(
|
||||||
|
self.llm_model_func,
|
||||||
|
hashing_kv=self.llm_response_cache,
|
||||||
|
**self.llm_model_kwargs,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
def insert(self, string_or_strings):
|
def insert(self, string_or_strings):
|
||||||
|
@@ -299,8 +299,10 @@ async def ollama_model_if_cache(
|
|||||||
) -> str:
|
) -> str:
|
||||||
kwargs.pop("max_tokens", None)
|
kwargs.pop("max_tokens", None)
|
||||||
kwargs.pop("response_format", None)
|
kwargs.pop("response_format", None)
|
||||||
|
host = kwargs.pop("host", None)
|
||||||
|
timeout = kwargs.pop("timeout", None)
|
||||||
|
|
||||||
ollama_client = ollama.AsyncClient()
|
ollama_client = ollama.AsyncClient(host=host, timeout=timeout)
|
||||||
messages = []
|
messages = []
|
||||||
if system_prompt:
|
if system_prompt:
|
||||||
messages.append({"role": "system", "content": system_prompt})
|
messages.append({"role": "system", "content": system_prompt})
|
||||||
@@ -700,10 +702,11 @@ async def hf_embedding(texts: list[str], tokenizer, embed_model) -> np.ndarray:
|
|||||||
return embeddings.detach().numpy()
|
return embeddings.detach().numpy()
|
||||||
|
|
||||||
|
|
||||||
async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray:
|
async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarray:
|
||||||
embed_text = []
|
embed_text = []
|
||||||
|
ollama_client = ollama.Client(**kwargs)
|
||||||
for text in texts:
|
for text in texts:
|
||||||
data = ollama.embeddings(model=embed_model, prompt=text)
|
data = ollama_client.embeddings(model=embed_model, prompt=text)
|
||||||
embed_text.append(data["embedding"])
|
embed_text.append(data["embedding"])
|
||||||
|
|
||||||
return embed_text
|
return embed_text
|
||||||
|
Reference in New Issue
Block a user