remove tqdm and cleaned readme and ollama

2025-02-18 19:58:03 +01:00
parent 24ae083284
commit 2524e02428
16 changed files with 30 additions and 141 deletions
--- a/lightrag/api/requirements.txt
+++ b/lightrag/api/requirements.txt
@@ -7,5 +7,4 @@ python-multipart
 tenacity
 tiktoken
 torch
-tqdm
 uvicorn
--- a/lightrag/kg/faiss_impl.py
+++ b/lightrag/kg/faiss_impl.py
@@ -22,7 +22,6 @@ if not pm.is_installed("faiss"):

 try:
    import faiss
-    from tqdm.asyncio import tqdm as tqdm_async
 except ImportError as e:
    raise ImportError(
        "`faiss` library is not installed. Please install it via pip: `pip install faiss`."
@@ -109,16 +108,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        pbar = tqdm_async(
-            total=len(batches), desc="Generating embeddings", unit="batch"
-        )
-
-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)

        # Flatten the list of arrays
--- a/lightrag/kg/milvus_impl.py
+++ b/lightrag/kg/milvus_impl.py
@@ -1,7 +1,6 @@
 import asyncio
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import numpy as np
 from lightrag.utils import logger
@@ -94,15 +93,7 @@ class MilvusVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)

        embeddings = np.concatenate(embeddings_list)
--- a/lightrag/kg/mongo_impl.py
+++ b/lightrag/kg/mongo_impl.py
@@ -2,7 +2,6 @@ import os
 from dataclasses import dataclass
 import numpy as np
 import configparser
-from tqdm.asyncio import tqdm as tqdm_async
 import asyncio

 from typing import Any, List, Union, final
@@ -854,17 +853,8 @@ class MongoVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)
-
        embeddings = np.concatenate(embeddings_list)
        for i, d in enumerate(list_data):
            d["vector"] = np.array(embeddings[i], dtype=np.float32).tolist()
--- a/lightrag/kg/nano_vector_db_impl.py
+++ b/lightrag/kg/nano_vector_db_impl.py
@@ -1,7 +1,6 @@
 import asyncio
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import numpy as np

@@ -71,15 +70,7 @@ class NanoVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)

        embeddings = np.concatenate(embeddings_list)
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@@ -41,7 +41,6 @@ if not pm.is_installed("asyncpg"):

 try:
    import asyncpg
-    from tqdm.asyncio import tqdm as tqdm_async

 except ImportError as e:
    raise ImportError(
@@ -380,15 +379,7 @@ class PGVectorStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)

        embeddings = np.concatenate(embeddings_list)
--- a/lightrag/kg/qdrant_impl.py
+++ b/lightrag/kg/qdrant_impl.py
@@ -1,7 +1,6 @@
 import asyncio
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import numpy as np
 import hashlib
@@ -110,15 +109,7 @@ class QdrantVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)

        embeddings = np.concatenate(embeddings_list)
--- a/lightrag/kg/redis_impl.py
+++ b/lightrag/kg/redis_impl.py
@@ -1,6 +1,5 @@
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import pipmaster as pm
 import configparser
@@ -51,7 +50,8 @@ class RedisKVStorage(BaseKVStorage):

    async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
        pipe = self._redis.pipeline()
-        for k, v in tqdm_async(data.items(), desc="Upserting"):
+
+        for k, v in data.items():
            pipe.set(f"{self.namespace}:{k}", json.dumps(v))
        await pipe.execute()

--- a/lightrag/kg/tidb_impl.py
+++ b/lightrag/kg/tidb_impl.py
@@ -7,7 +7,6 @@ import numpy as np

 from lightrag.types import KnowledgeGraph

-from tqdm import tqdm

 from ..base import BaseGraphStorage, BaseKVStorage, BaseVectorStorage
 from ..namespace import NameSpace, is_namespace
@@ -270,15 +269,8 @@ class TiDBVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]
        embedding_tasks = [self.embedding_func(batch) for batch in batches]
-        embeddings_list = []
-        for f in tqdm(
-            asyncio.as_completed(embedding_tasks),
-            total=len(embedding_tasks),
-            desc="Generating embeddings",
-            unit="batch",
-        ):
-            embeddings = await f
-            embeddings_list.append(embeddings)
+        embeddings_list = await asyncio.gather(*embedding_tasks)
+
        embeddings = np.concatenate(embeddings_list)
        for i, d in enumerate(list_data):
            d["content_vector"] = embeddings[i]
--- a/lightrag/llm/ollama.py
+++ b/lightrag/llm/ollama.py
@@ -4,7 +4,7 @@ if sys.version_info < (3, 9):
    from typing import AsyncIterator
 else:
    from collections.abc import AsyncIterator
-    
+
 import pipmaster as pm  # Pipmaster for dynamic library install

 # install specific modules
@@ -48,7 +48,7 @@ async def _ollama_model_if_cache(
    **kwargs,
 ) -> Union[str, AsyncIterator[str]]:
    stream = True if kwargs.get("stream") else False
-    
+
    kwargs.pop("max_tokens", None)
    # kwargs.pop("response_format", None) # allow json
    host = kwargs.pop("host", None)
@@ -129,4 +129,4 @@ async def ollama_embed(texts: list[str], embed_model, **kwargs) -> np.ndarray:
    kwargs["headers"] = headers
    ollama_client = ollama.Client(**kwargs)
    data = ollama_client.embed(model=embed_model, input=texts)
-    return data["embeddings"]
+    return data["embeddings"]
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -3,7 +3,6 @@ from __future__ import annotations
 import asyncio
 import json
 import re
-from tqdm.asyncio import tqdm as tqdm_async
 from typing import Any, AsyncIterator
 from collections import Counter, defaultdict
 from .utils import (
@@ -500,16 +499,8 @@ async def extract_entities(
        )
        return dict(maybe_nodes), dict(maybe_edges)

-    results = []
-    for result in tqdm_async(
-        asyncio.as_completed([_process_single_content(c) for c in ordered_chunks]),
-        total=len(ordered_chunks),
-        desc="Level 2 - Extracting entities and relationships",
-        unit="chunk",
-        position=1,
-        leave=False,
-    ):
-        results.append(await result)
+    tasks = [_process_single_content(c) for c in ordered_chunks]
+    results = await asyncio.gather(*tasks)

    maybe_nodes = defaultdict(list)
    maybe_edges = defaultdict(list)
@@ -518,41 +509,20 @@ async def extract_entities(
            maybe_nodes[k].extend(v)
        for k, v in m_edges.items():
            maybe_edges[tuple(sorted(k))].extend(v)
-    logger.debug("Inserting entities into storage...")
-    all_entities_data = []
-    for result in tqdm_async(
-        asyncio.as_completed(
-            [
-                _merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
-                for k, v in maybe_nodes.items()
-            ]
-        ),
-        total=len(maybe_nodes),
-        desc="Level 3 - Inserting entities",
-        unit="entity",
-        position=2,
-        leave=False,
-    ):
-        all_entities_data.append(await result)

-    logger.debug("Inserting relationships into storage...")
-    all_relationships_data = []
-    for result in tqdm_async(
-        asyncio.as_completed(
-            [
-                _merge_edges_then_upsert(
-                    k[0], k[1], v, knowledge_graph_inst, global_config
-                )
-                for k, v in maybe_edges.items()
-            ]
-        ),
-        total=len(maybe_edges),
-        desc="Level 3 - Inserting relationships",
-        unit="relationship",
-        position=3,
-        leave=False,
-    ):
-        all_relationships_data.append(await result)
+    all_entities_data = await asyncio.gather(
+        *[
+            _merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
+            for k, v in maybe_nodes.items()
+        ]
+    )
+
+    all_relationships_data = await asyncio.gather(
+        *[
+            _merge_edges_then_upsert(k[0], k[1], v, knowledge_graph_inst, global_config)
+            for k, v in maybe_edges.items()
+        ]
+    )

    if not len(all_entities_data) and not len(all_relationships_data):
        logger.warning(
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -19,7 +19,6 @@ import tiktoken
 from lightrag.prompt import PROMPTS


-
 VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true"


@@ -84,7 +83,6 @@ class EmbeddingFunc:
        return await self.func(*args, **kwargs)


-
 def locate_json_string_body_from_string(content: str) -> str | None:
    """Locate the JSON string body from a string"""
    try:
@@ -715,4 +713,3 @@ def get_conversation_turns(
        )

    return "\n".join(formatted_turns)
-