Merge pull request #3 from alazarchuk/fix-ollama-integration

Fix ollama integration
2024-10-22 06:32:03 -07:00
parent 2720442893 0d44308987
commit 2aa576f580
4 changed files with 19 additions and 7 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@ book.txt
 lightrag-dev/
 .idea/
 dist/
 .venv/
--- a/examples/lightrag_ollama_demo.py
+++ b/examples/lightrag_ollama_demo.py
@@ -1,4 +1,7 @@
 import os
 import logging
 logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
 from lightrag import LightRAG, QueryParam
 from lightrag.llm import ollama_model_complete, ollama_embedding
@@ -12,11 +15,15 @@ if not os.path.exists(WORKING_DIR):
 rag = LightRAG(
    working_dir=WORKING_DIR,
    llm_model_func=ollama_model_complete,
-    llm_model_name="your_model_name",
+    llm_model_name="mistral:7b",
    llm_model_max_async=2,
    llm_model_kwargs={"host": "http://localhost:11434"},
    embedding_func=EmbeddingFunc(
        embedding_dim=768,
        max_token_size=8192,
-        func=lambda texts: ollama_embedding(texts, embed_model="nomic-embed-text"),
+        func=lambda texts: ollama_embedding(
            texts, embed_model="nomic-embed-text", host="http://localhost:11434"
        ),
    ),
 )
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -88,6 +88,7 @@ class LightRAG:
    llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct"  #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
    llm_model_max_token_size: int = 32768
    llm_model_max_async: int = 16
    llm_model_kwargs: dict = field(default_factory=dict)
    # storage
    key_string_value_json_storage_cls: Type[BaseKVStorage] = JsonKVStorage
@@ -154,7 +155,7 @@ class LightRAG:
        )
        self.llm_model_func = limit_async_func_call(self.llm_model_max_async)(
-            partial(self.llm_model_func, hashing_kv=self.llm_response_cache)
+            partial(self.llm_model_func, hashing_kv=self.llm_response_cache, **self.llm_model_kwargs)
        )
    def insert(self, string_or_strings):
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -222,8 +222,10 @@ async def ollama_model_if_cache(
 ) -> str:
    kwargs.pop("max_tokens", None)
    kwargs.pop("response_format", None)
    host = kwargs.pop("host", None)
    timeout = kwargs.pop("timeout", None)
-    ollama_client = ollama.AsyncClient()
+    ollama_client = ollama.AsyncClient(host=host, timeout=timeout)
    messages = []
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})
@@ -415,10 +417,11 @@ async def hf_embedding(texts: list[str], tokenizer, embed_model) -> np.ndarray:
    return embeddings.detach().numpy()
-async def ollama_embedding(texts: list[str], embed_model) -> np.ndarray:
+async def ollama_embedding(texts: list[str], embed_model, **kwargs) -> np.ndarray:
    embed_text = []
    ollama_client = ollama.Client(**kwargs)
    for text in texts:
-        data = ollama.embeddings(model=embed_model, prompt=text)
+        data = ollama_client.embeddings(model=embed_model, prompt=text)
        embed_text.append(data["embedding"])
    return embed_text