Merge pull request #842 from YanSte/server

Lightrag Server Launch fix.
2025-02-18 22:16:48 +01:00
parent 29582fdcbd 55cd900e8e
commit 2c99dfc34f
31 changed files with 67 additions and 701 deletions
--- a/README.md
+++ b/README.md
@@ -246,6 +246,9 @@ rag = LightRAG(
 <summary> Using Hugging Face Models </summary>

 * If you want to use Hugging Face models, you only need to set LightRAG as follows:
+
+See `lightrag_hf_demo.py`
+
 ```python
 from lightrag.llm import hf_model_complete, hf_embed
 from transformers import AutoModel, AutoTokenizer
@@ -344,16 +347,6 @@ rag = LightRAG(
    ),
 )
 ```
-#### Fully functional example
-
-There fully functional example `examples/lightrag_ollama_demo.py` that utilizes `gemma2:2b` model, runs only 4 requests in parallel and set context size to 32k.
-
-#### Using "Thinking" Models (e.g., DeepSeek)
-
-To return only the model's response, you can pass `reasoning_tag` in `llm_model_kwargs`.
-
-For example, for DeepSeek models, `reasoning_tag` should be set to `think`.
-
 #### Low RAM GPUs

 In order to run this experiment on low RAM GPU you should select small model and tune context window (increasing context increase memory consumption). For example, running this ollama example on repurposed mining GPU with 6Gb of RAM required to set context size to 26k while using `gemma2:2b`. It was able to find 197 entities and 19 relations on `book.txt`.
--- a/lightrag/api/requirements.txt
+++ b/lightrag/api/requirements.txt
@@ -1,11 +1,9 @@
+aiofiles
 ascii_colors
 fastapi
 numpy
 pipmaster
 python-dotenv
-python-multipart
 tenacity
 tiktoken
-torch
-tqdm
 uvicorn
--- a/lightrag/kg/faiss_impl.py
+++ b/lightrag/kg/faiss_impl.py
@@ -22,7 +22,6 @@ if not pm.is_installed("faiss"):

 try:
    import faiss
-    from tqdm.asyncio import tqdm as tqdm_async
 except ImportError as e:
    raise ImportError(
        "`faiss` library is not installed. Please install it via pip: `pip install faiss`."
@@ -109,16 +108,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        pbar = tqdm_async(
-            total=len(batches), desc="Generating embeddings", unit="batch"
-        )
-
-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)

        # Flatten the list of arrays
--- a/lightrag/kg/milvus_impl.py
+++ b/lightrag/kg/milvus_impl.py
@@ -1,18 +1,21 @@
 import asyncio
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import numpy as np
 from lightrag.utils import logger
 from ..base import BaseVectorStorage
 import pipmaster as pm
-import configparser
+
+
+if not pm.is_installed("configparser"):
+    pm.install("configparser")

 if not pm.is_installed("pymilvus"):
    pm.install("pymilvus")

 try:
+    import configparser
    from pymilvus import MilvusClient
 except ImportError as e:
    raise ImportError(
@@ -94,15 +97,7 @@ class MilvusVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)

        embeddings = np.concatenate(embeddings_list)
--- a/lightrag/kg/mongo_impl.py
+++ b/lightrag/kg/mongo_impl.py
@@ -2,7 +2,6 @@ import os
 from dataclasses import dataclass
 import numpy as np
 import configparser
-from tqdm.asyncio import tqdm as tqdm_async
 import asyncio

 from typing import Any, List, Union, final
@@ -854,17 +853,8 @@ class MongoVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)
-
        embeddings = np.concatenate(embeddings_list)
        for i, d in enumerate(list_data):
            d["vector"] = np.array(embeddings[i], dtype=np.float32).tolist()
--- a/lightrag/kg/nano_vector_db_impl.py
+++ b/lightrag/kg/nano_vector_db_impl.py
@@ -1,7 +1,6 @@
 import asyncio
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import numpy as np

@@ -71,15 +70,7 @@ class NanoVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)

        embeddings = np.concatenate(embeddings_list)
--- a/lightrag/kg/networkx_impl.py
+++ b/lightrag/kg/networkx_impl.py
@@ -1,7 +1,6 @@
-import html
 import os
 from dataclasses import dataclass
-from typing import Any, cast, final
+from typing import Any, final

 import numpy as np

@@ -14,8 +13,16 @@ from lightrag.utils import (
 from lightrag.base import (
    BaseGraphStorage,
 )
+import pipmaster as pm
+
+if not pm.is_installed("graspologic"):
+    pm.install("graspologic")
+
+if not pm.is_installed("networkx"):
+    pm.install("networkx")

 try:
+    from graspologic import embed
    import networkx as nx
 except ImportError as e:
    raise ImportError(
@@ -39,21 +46,6 @@ class NetworkXStorage(BaseGraphStorage):
        )
        nx.write_graphml(graph, file_name)

-    @staticmethod
-    def stable_largest_connected_component(graph: nx.Graph) -> nx.Graph:
-        """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
-        Return the largest connected component of the graph, with nodes and edges sorted in a stable way.
-        """
-        from graspologic.utils import largest_connected_component
-
-        graph = graph.copy()
-        graph = cast(nx.Graph, largest_connected_component(graph))
-        node_mapping = {
-            node: html.unescape(node.upper().strip()) for node in graph.nodes()
-        }  # type: ignore
-        graph = nx.relabel_nodes(graph, node_mapping)
-        return NetworkXStorage._stabilize_graph(graph)
-
    @staticmethod
    def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
        """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
@@ -153,8 +145,6 @@ class NetworkXStorage(BaseGraphStorage):

    # @TODO: NOT USED
    async def _node2vec_embed(self):
-        from graspologic import embed
-
        embeddings, nodes = embed.node2vec_embed(
            self._graph,
            **self.global_config["node2vec_params"],
--- a/lightrag/kg/oracle_impl.py
+++ b/lightrag/kg/oracle_impl.py
@@ -20,10 +20,14 @@ from ..utils import logger

 import pipmaster as pm

+if not pm.is_installed("graspologic"):
+    pm.install("graspologic")
+
 if not pm.is_installed("oracledb"):
    pm.install("oracledb")

 try:
+    from graspologic import embed
    import oracledb

 except ImportError as e:
@@ -452,8 +456,6 @@ class OracleGraphStorage(BaseGraphStorage):

    async def _node2vec_embed(self):
        """为节点生成向量"""
-        from graspologic import embed
-
        embeddings, nodes = embed.node2vec_embed(
            self._graph,
            **self.config["node2vec_params"],
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@@ -41,7 +41,6 @@ if not pm.is_installed("asyncpg"):

 try:
    import asyncpg
-    from tqdm.asyncio import tqdm as tqdm_async

 except ImportError as e:
    raise ImportError(
@@ -380,15 +379,7 @@ class PGVectorStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)

        embeddings = np.concatenate(embeddings_list)
--- a/lightrag/kg/qdrant_impl.py
+++ b/lightrag/kg/qdrant_impl.py
@@ -1,7 +1,6 @@
 import asyncio
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import numpy as np
 import hashlib
@@ -110,15 +109,7 @@ class QdrantVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)

        embeddings = np.concatenate(embeddings_list)
--- a/lightrag/kg/redis_impl.py
+++ b/lightrag/kg/redis_impl.py
@@ -1,6 +1,5 @@
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import pipmaster as pm
 import configparser
@@ -51,7 +50,8 @@ class RedisKVStorage(BaseKVStorage):

    async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
        pipe = self._redis.pipeline()
-        for k, v in tqdm_async(data.items(), desc="Upserting"):
+
+        for k, v in data.items():
            pipe.set(f"{self.namespace}:{k}", json.dumps(v))
        await pipe.execute()

--- a/lightrag/kg/tidb_impl.py
+++ b/lightrag/kg/tidb_impl.py
@@ -7,7 +7,6 @@ import numpy as np

 from lightrag.types import KnowledgeGraph

-from tqdm import tqdm

 from ..base import BaseGraphStorage, BaseKVStorage, BaseVectorStorage
 from ..namespace import NameSpace, is_namespace
@@ -270,15 +269,8 @@ class TiDBVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]
        embedding_tasks = [self.embedding_func(batch) for batch in batches]
-        embeddings_list = []
-        for f in tqdm(
-            asyncio.as_completed(embedding_tasks),
-            total=len(embedding_tasks),
-            desc="Generating embeddings",
-            unit="batch",
-        ):
-            embeddings = await f
-            embeddings_list.append(embeddings)
+        embeddings_list = await asyncio.gather(*embedding_tasks)
+
        embeddings = np.concatenate(embeddings_list)
        for i, d in enumerate(list_data):
            d["content_vector"] = embeddings[i]
--- a/lightrag/llm/azure_openai.py
+++ b/lightrag/llm/azure_openai.py
@@ -1,46 +1,3 @@
-"""
-Azure OpenAI LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with aure openai's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - openai
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.azure_openai import azure_openai_model_complete, azure_openai_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
-
 import os
 import pipmaster as pm  # Pipmaster for dynamic library install

--- a/lightrag/llm/bedrock.py
+++ b/lightrag/llm/bedrock.py
@@ -1,46 +1,3 @@
-"""
-Bedrock LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with Bedrock's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - aioboto3, tenacity
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.bebrock import bebrock_model_complete, bebrock_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
-
 import copy
 import os
 import json
--- a/lightrag/llm/hf.py
+++ b/lightrag/llm/hf.py
@@ -1,47 +1,7 @@
-"""
-Hugging face LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with Hugging face's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - transformers
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.hf import hf_model_complete, hf_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
 import copy
 import os
+from functools import lru_cache
+
 import pipmaster as pm  # Pipmaster for dynamic library install

 # install specific modules
@@ -51,9 +11,12 @@ if not pm.is_installed("torch"):
    pm.install("torch")
 if not pm.is_installed("tenacity"):
    pm.install("tenacity")
+if not pm.is_installed("numpy"):
+    pm.install("numpy")
+if not pm.is_installed("tenacity"):
+    pm.install("tenacity")

 from transformers import AutoTokenizer, AutoModelForCausalLM
-from functools import lru_cache
 from tenacity import (
    retry,
    stop_after_attempt,
--- a/lightrag/llm/jina.py
+++ b/lightrag/llm/jina.py
@@ -1,43 +1,3 @@
-"""
-Jina Embedding Interface Module
-==========================
-
-This module provides interfaces for interacting with jina system,
-including embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added embedding generation
-
-Dependencies:
-    - tenacity
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.jina import jina_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
 import os
 import pipmaster as pm  # Pipmaster for dynamic library install

--- a/lightrag/llm/lmdeploy.py
+++ b/lightrag/llm/lmdeploy.py
@@ -1,45 +1,3 @@
-"""
-LMDeploy LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with LMDeploy's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - tenacity
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.lmdeploy import lmdeploy_model_complete, lmdeploy_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
 import pipmaster as pm  # Pipmaster for dynamic library install

 # install specific modules
--- a/lightrag/llm/lollms.py
+++ b/lightrag/llm/lollms.py
@@ -1,66 +1,3 @@
-"""
-LoLLMs (Lord of Large Language Models) Interface Module
-=====================================================
-
-This module provides the official interface for interacting with LoLLMs (Lord of Large Language and multimodal Systems),
-a unified framework for AI model interaction and deployment.
-
-LoLLMs is designed as a "one tool to rule them all" solution, providing seamless integration
-with various AI models while maintaining high performance and user-friendly interfaces.
-
-Author: ParisNeo
-Created: 2024-01-24
-License: Apache 2.0
-
-Copyright (c) 2024 ParisNeo
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
-Version: 2.0.0
-
-Change Log:
- 2.0.0 (2024-01-24):
-    * Added async support for model inference
-    * Implemented streaming capabilities
-    * Added embedding generation functionality
-    * Enhanced parameter handling
-    * Improved error handling and timeout management
-
-Dependencies:
-    - aiohttp
-    - numpy
-    - Python >= 3.10
-
-Features:
-    - Async text generation with streaming support
-    - Embedding generation
-    - Configurable model parameters
-    - System prompt and chat history support
-    - Timeout handling
-    - API key authentication
-
-Usage:
-    from llm_interfaces.lollms import lollms_model_complete, lollms_embed
-
-Project Repository: https://github.com/ParisNeo/lollms
-Documentation: https://github.com/ParisNeo/lollms/docs
-"""
-
-__version__ = "1.0.0"
-__author__ = "ParisNeo"
-__status__ = "Production"
-__project_url__ = "https://github.com/ParisNeo/lollms"
-__doc_url__ = "https://github.com/ParisNeo/lollms/docs"
 import sys

 if sys.version_info < (3, 9):
--- a/lightrag/llm/nvidia_openai.py
+++ b/lightrag/llm/nvidia_openai.py
@@ -1,46 +1,3 @@
-"""
-OpenAI LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with openai's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - openai
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.nvidia_openai import nvidia_openai_model_complete, nvidia_openai_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
-
 import sys
 import os

@@ -48,6 +5,7 @@ if sys.version_info < (3, 9):
    pass
 else:
    pass
+
 import pipmaster as pm  # Pipmaster for dynamic library install

 # install specific modules
--- a/lightrag/llm/ollama.py
+++ b/lightrag/llm/ollama.py
@@ -1,51 +1,10 @@
-"""
-Ollama LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with Ollama's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - ollama
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.ollama_interface import ollama_model_complete, ollama_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
 import sys

 if sys.version_info < (3, 9):
    from typing import AsyncIterator
 else:
    from collections.abc import AsyncIterator
+
 import pipmaster as pm  # Pipmaster for dynamic library install

 # install specific modules
@@ -54,7 +13,9 @@ if not pm.is_installed("ollama"):
 if not pm.is_installed("tenacity"):
    pm.install("tenacity")

+
 import ollama
+
 from tenacity import (
    retry,
    stop_after_attempt,
@@ -67,7 +28,7 @@ from lightrag.exceptions import (
    APITimeoutError,
 )
 from lightrag.api import __api_version__
-from lightrag.utils import extract_reasoning
+
 import numpy as np
 from typing import Union

@@ -79,7 +40,7 @@ from typing import Union
        (RateLimitError, APIConnectionError, APITimeoutError)
    ),
 )
-async def ollama_model_if_cache(
+async def _ollama_model_if_cache(
    model,
    prompt,
    system_prompt=None,
@@ -87,7 +48,7 @@ async def ollama_model_if_cache(
    **kwargs,
 ) -> Union[str, AsyncIterator[str]]:
    stream = True if kwargs.get("stream") else False
-    reasoning_tag = kwargs.pop("reasoning_tag", None)
+
    kwargs.pop("max_tokens", None)
    # kwargs.pop("response_format", None) # allow json
    host = kwargs.pop("host", None)
@@ -125,11 +86,7 @@ async def ollama_model_if_cache(
        response and can simply be trimmed.
        """

-        return (
-            model_response
-            if reasoning_tag is None
-            else extract_reasoning(model_response, reasoning_tag).response_content
-        )
+        return model_response


 async def ollama_model_complete(
@@ -139,7 +96,7 @@ async def ollama_model_complete(
    if keyword_extraction:
        kwargs["format"] = "json"
    model_name = kwargs["hashing_kv"].global_config["llm_model_name"]
-    return await ollama_model_if_cache(
+    return await _ollama_model_if_cache(
        model_name,
        prompt,
        system_prompt=system_prompt,
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@@ -1,45 +1,3 @@
-"""
-OpenAI LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with openai's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - openai
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.openai import openai_model_complete, openai_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
 from ..utils import verbose_debug, VERBOSE_DEBUG
 import sys
 import os
--- a/lightrag/llm/siliconcloud.py
+++ b/lightrag/llm/siliconcloud.py
@@ -1,43 +1,3 @@
-"""
-SiliconCloud Embedding Interface Module
-==========================
-
-This module provides interfaces for interacting with SiliconCloud system,
-including embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added embedding generation
-
-Dependencies:
-    - tenacity
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.siliconcloud import siliconcloud_model_complete, siliconcloud_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
 import sys

 if sys.version_info < (3, 9):
--- a/lightrag/llm/zhipu.py
+++ b/lightrag/llm/zhipu.py
@@ -1,45 +1,3 @@
-"""
-Zhipu LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with LMDeploy's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - tenacity
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.zhipu import zhipu_model_complete, zhipu_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
 import sys
 import re
 import json
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -3,7 +3,6 @@ from __future__ import annotations
 import asyncio
 import json
 import re
-from tqdm.asyncio import tqdm as tqdm_async
 from typing import Any, AsyncIterator
 from collections import Counter, defaultdict
 from .utils import (
@@ -500,16 +499,8 @@ async def extract_entities(
        )
        return dict(maybe_nodes), dict(maybe_edges)

-    results = []
-    for result in tqdm_async(
-        asyncio.as_completed([_process_single_content(c) for c in ordered_chunks]),
-        total=len(ordered_chunks),
-        desc="Level 2 - Extracting entities and relationships",
-        unit="chunk",
-        position=1,
-        leave=False,
-    ):
-        results.append(await result)
+    tasks = [_process_single_content(c) for c in ordered_chunks]
+    results = await asyncio.gather(*tasks)

    maybe_nodes = defaultdict(list)
    maybe_edges = defaultdict(list)
@@ -518,41 +509,20 @@ async def extract_entities(
            maybe_nodes[k].extend(v)
        for k, v in m_edges.items():
            maybe_edges[tuple(sorted(k))].extend(v)
-    logger.debug("Inserting entities into storage...")
-    all_entities_data = []
-    for result in tqdm_async(
-        asyncio.as_completed(
-            [
-                _merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
-                for k, v in maybe_nodes.items()
-            ]
-        ),
-        total=len(maybe_nodes),
-        desc="Level 3 - Inserting entities",
-        unit="entity",
-        position=2,
-        leave=False,
-    ):
-        all_entities_data.append(await result)

-    logger.debug("Inserting relationships into storage...")
-    all_relationships_data = []
-    for result in tqdm_async(
-        asyncio.as_completed(
-            [
-                _merge_edges_then_upsert(
-                    k[0], k[1], v, knowledge_graph_inst, global_config
-                )
-                for k, v in maybe_edges.items()
-            ]
-        ),
-        total=len(maybe_edges),
-        desc="Level 3 - Inserting relationships",
-        unit="relationship",
-        position=3,
-        leave=False,
-    ):
-        all_relationships_data.append(await result)
+    all_entities_data = await asyncio.gather(
+        *[
+            _merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
+            for k, v in maybe_nodes.items()
+        ]
+    )
+
+    all_relationships_data = await asyncio.gather(
+        *[
+            _merge_edges_then_upsert(k[0], k[1], v, knowledge_graph_inst, global_config)
+            for k, v in maybe_edges.items()
+        ]
+    )

    if not len(all_entities_data) and not len(all_relationships_data):
        logger.warning(
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@@ -1,8 +1,9 @@
 from __future__ import annotations
+from typing import Any

 GRAPH_FIELD_SEP = "<SEP>"

-PROMPTS = {}
+PROMPTS: dict[str, Any] = {}

 PROMPTS["DEFAULT_LANGUAGE"] = "English"
 PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
--- a/lightrag/tools/lightrag_visualizer/graph_visualizer.py
+++ b/lightrag/tools/lightrag_visualizer/graph_visualizer.py
@@ -1,10 +1,3 @@
-"""
-3D GraphML Viewer using Dear ImGui and ModernGL
-Author: ParisNeo, ArnoChen
-Description: An interactive 3D GraphML viewer using imgui_bundle and ModernGL
-Version: 2.0
-"""
-
 from typing import Optional, Tuple, Dict, List
 import numpy as np
 import networkx as nx
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -13,13 +13,12 @@ from functools import wraps
 from hashlib import md5
 from typing import Any, Callable
 import xml.etree.ElementTree as ET
-import bs4
-
 import numpy as np
 import tiktoken

 from lightrag.prompt import PROMPTS

+
 VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true"


@@ -84,13 +83,6 @@ class EmbeddingFunc:
        return await self.func(*args, **kwargs)


-@dataclass
-class ReasoningResponse:
-    reasoning_content: str | None
-    response_content: str
-    tag: str
-
-
 def locate_json_string_body_from_string(content: str) -> str | None:
    """Locate the JSON string body from a string"""
    try:
@@ -721,28 +713,3 @@ def get_conversation_turns(
        )

    return "\n".join(formatted_turns)
-
-
-def extract_reasoning(response: str, tag: str) -> ReasoningResponse:
-    """Extract the reasoning section and the following section from the LLM response.
-
-    Args:
-        response: LLM response
-        tag: Tag to extract
-    Returns:
-        ReasoningResponse: Reasoning section and following section
-
-    """
-    soup = bs4.BeautifulSoup(response, "html.parser")
-
-    reasoning_section = soup.find(tag)
-    if reasoning_section is None:
-        return ReasoningResponse(None, response, tag)
-    reasoning_content = reasoning_section.get_text().strip()
-
-    after_reasoning_section = reasoning_section.next_sibling
-    if after_reasoning_section is None:
-        return ReasoningResponse(reasoning_content, "", tag)
-    after_reasoning_content = after_reasoning_section.get_text().strip()
-
-    return ReasoningResponse(reasoning_content, after_reasoning_content, tag)
--- a/reproduce/Step_3.py
+++ b/reproduce/Step_3.py
@@ -2,7 +2,6 @@ import re
 import json
 import asyncio
 from lightrag import LightRAG, QueryParam
-from tqdm import tqdm


 def extract_queries(file_path):
@@ -44,7 +43,7 @@ def run_queries_and_save_to_json(
        result_file.write("[\n")
        first_entry = True

-        for query_text in tqdm(queries, desc="Processing queries", unit="query"):
+        for query_text in queries:
            result, error = loop.run_until_complete(
                process_query(query_text, rag_instance, query_param)
            )
--- a/reproduce/Step_3_openai_compatible.py
+++ b/reproduce/Step_3_openai_compatible.py
@@ -3,7 +3,6 @@ import re
 import json
 import asyncio
 from lightrag import LightRAG, QueryParam
-from tqdm import tqdm
 from lightrag.llm.openai import openai_complete_if_cache, openai_embed
 from lightrag.utils import EmbeddingFunc
 import numpy as np
@@ -76,7 +75,7 @@ def run_queries_and_save_to_json(
        result_file.write("[\n")
        first_entry = True

-        for query_text in tqdm(queries, desc="Processing queries", unit="query"):
+        for query_text in queries:
            result, error = loop.run_until_complete(
                process_query(query_text, rag_instance, query_param)
            )
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,12 +1,6 @@
-accelerate
-aiofiles
 aiohttp
 configparser

-# File manipulation libraries
-docling
-graspologic
-
 # database packages
 networkx

@@ -19,10 +13,7 @@ python-dotenv
 setuptools
 tenacity

-
 # LLM packages
 tiktoken
-tqdm
-xxhash

 # Extra libraries are installed when needed using pipmaster
--- a/tests/test_lightrag_ollama_chat.py
+++ b/tests/test_lightrag_ollama_chat.py