Merge pull request #842 from YanSte/server

Lightrag Server Launch fix.
2025-02-18 22:16:48 +01:00
parent 29582fdcbd 55cd900e8e
commit 2c99dfc34f
31 changed files with 67 additions and 701 deletions
--- a/lightrag/api/requirements.txt
+++ b/lightrag/api/requirements.txt
@@ -1,11 +1,9 @@
+aiofiles
 ascii_colors
 fastapi
 numpy
 pipmaster
 python-dotenv
-python-multipart
 tenacity
 tiktoken
-torch
-tqdm
 uvicorn
--- a/lightrag/kg/faiss_impl.py
+++ b/lightrag/kg/faiss_impl.py
@@ -22,7 +22,6 @@ if not pm.is_installed("faiss"):

 try:
    import faiss
-    from tqdm.asyncio import tqdm as tqdm_async
 except ImportError as e:
    raise ImportError(
        "`faiss` library is not installed. Please install it via pip: `pip install faiss`."
@@ -109,16 +108,7 @@ class FaissVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        pbar = tqdm_async(
-            total=len(batches), desc="Generating embeddings", unit="batch"
-        )
-
-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)

        # Flatten the list of arrays
--- a/lightrag/kg/milvus_impl.py
+++ b/lightrag/kg/milvus_impl.py
@@ -1,18 +1,21 @@
 import asyncio
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import numpy as np
 from lightrag.utils import logger
 from ..base import BaseVectorStorage
 import pipmaster as pm
-import configparser
+
+
+if not pm.is_installed("configparser"):
+    pm.install("configparser")

 if not pm.is_installed("pymilvus"):
    pm.install("pymilvus")

 try:
+    import configparser
    from pymilvus import MilvusClient
 except ImportError as e:
    raise ImportError(
@@ -94,15 +97,7 @@ class MilvusVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)

        embeddings = np.concatenate(embeddings_list)
--- a/lightrag/kg/mongo_impl.py
+++ b/lightrag/kg/mongo_impl.py
@@ -2,7 +2,6 @@ import os
 from dataclasses import dataclass
 import numpy as np
 import configparser
-from tqdm.asyncio import tqdm as tqdm_async
 import asyncio

 from typing import Any, List, Union, final
@@ -854,17 +853,8 @@ class MongoVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)
-
        embeddings = np.concatenate(embeddings_list)
        for i, d in enumerate(list_data):
            d["vector"] = np.array(embeddings[i], dtype=np.float32).tolist()
--- a/lightrag/kg/nano_vector_db_impl.py
+++ b/lightrag/kg/nano_vector_db_impl.py
@@ -1,7 +1,6 @@
 import asyncio
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import numpy as np

@@ -71,15 +70,7 @@ class NanoVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)

        embeddings = np.concatenate(embeddings_list)
--- a/lightrag/kg/networkx_impl.py
+++ b/lightrag/kg/networkx_impl.py
@@ -1,7 +1,6 @@
-import html
 import os
 from dataclasses import dataclass
-from typing import Any, cast, final
+from typing import Any, final

 import numpy as np

@@ -14,8 +13,16 @@ from lightrag.utils import (
 from lightrag.base import (
    BaseGraphStorage,
 )
+import pipmaster as pm
+
+if not pm.is_installed("graspologic"):
+    pm.install("graspologic")
+
+if not pm.is_installed("networkx"):
+    pm.install("networkx")

 try:
+    from graspologic import embed
    import networkx as nx
 except ImportError as e:
    raise ImportError(
@@ -39,21 +46,6 @@ class NetworkXStorage(BaseGraphStorage):
        )
        nx.write_graphml(graph, file_name)

-    @staticmethod
-    def stable_largest_connected_component(graph: nx.Graph) -> nx.Graph:
-        """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
-        Return the largest connected component of the graph, with nodes and edges sorted in a stable way.
-        """
-        from graspologic.utils import largest_connected_component
-
-        graph = graph.copy()
-        graph = cast(nx.Graph, largest_connected_component(graph))
-        node_mapping = {
-            node: html.unescape(node.upper().strip()) for node in graph.nodes()
-        }  # type: ignore
-        graph = nx.relabel_nodes(graph, node_mapping)
-        return NetworkXStorage._stabilize_graph(graph)
-
    @staticmethod
    def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
        """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
@@ -153,8 +145,6 @@ class NetworkXStorage(BaseGraphStorage):

    # @TODO: NOT USED
    async def _node2vec_embed(self):
-        from graspologic import embed
-
        embeddings, nodes = embed.node2vec_embed(
            self._graph,
            **self.global_config["node2vec_params"],
--- a/lightrag/kg/oracle_impl.py
+++ b/lightrag/kg/oracle_impl.py
@@ -20,10 +20,14 @@ from ..utils import logger

 import pipmaster as pm

+if not pm.is_installed("graspologic"):
+    pm.install("graspologic")
+
 if not pm.is_installed("oracledb"):
    pm.install("oracledb")

 try:
+    from graspologic import embed
    import oracledb

 except ImportError as e:
@@ -452,8 +456,6 @@ class OracleGraphStorage(BaseGraphStorage):

    async def _node2vec_embed(self):
        """为节点生成向量"""
-        from graspologic import embed
-
        embeddings, nodes = embed.node2vec_embed(
            self._graph,
            **self.config["node2vec_params"],
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@@ -41,7 +41,6 @@ if not pm.is_installed("asyncpg"):

 try:
    import asyncpg
-    from tqdm.asyncio import tqdm as tqdm_async

 except ImportError as e:
    raise ImportError(
@@ -380,15 +379,7 @@ class PGVectorStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)

        embeddings = np.concatenate(embeddings_list)
--- a/lightrag/kg/qdrant_impl.py
+++ b/lightrag/kg/qdrant_impl.py
@@ -1,7 +1,6 @@
 import asyncio
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import numpy as np
 import hashlib
@@ -110,15 +109,7 @@ class QdrantVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]

-        async def wrapped_task(batch):
-            result = await self.embedding_func(batch)
-            pbar.update(1)
-            return result
-
-        embedding_tasks = [wrapped_task(batch) for batch in batches]
-        pbar = tqdm_async(
-            total=len(embedding_tasks), desc="Generating embeddings", unit="batch"
-        )
+        embedding_tasks = [self.embedding_func(batch) for batch in batches]
        embeddings_list = await asyncio.gather(*embedding_tasks)

        embeddings = np.concatenate(embeddings_list)
--- a/lightrag/kg/redis_impl.py
+++ b/lightrag/kg/redis_impl.py
@@ -1,6 +1,5 @@
 import os
 from typing import Any, final
-from tqdm.asyncio import tqdm as tqdm_async
 from dataclasses import dataclass
 import pipmaster as pm
 import configparser
@@ -51,7 +50,8 @@ class RedisKVStorage(BaseKVStorage):

    async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
        pipe = self._redis.pipeline()
-        for k, v in tqdm_async(data.items(), desc="Upserting"):
+
+        for k, v in data.items():
            pipe.set(f"{self.namespace}:{k}", json.dumps(v))
        await pipe.execute()

--- a/lightrag/kg/tidb_impl.py
+++ b/lightrag/kg/tidb_impl.py
@@ -7,7 +7,6 @@ import numpy as np

 from lightrag.types import KnowledgeGraph

-from tqdm import tqdm

 from ..base import BaseGraphStorage, BaseKVStorage, BaseVectorStorage
 from ..namespace import NameSpace, is_namespace
@@ -270,15 +269,8 @@ class TiDBVectorDBStorage(BaseVectorStorage):
            for i in range(0, len(contents), self._max_batch_size)
        ]
        embedding_tasks = [self.embedding_func(batch) for batch in batches]
-        embeddings_list = []
-        for f in tqdm(
-            asyncio.as_completed(embedding_tasks),
-            total=len(embedding_tasks),
-            desc="Generating embeddings",
-            unit="batch",
-        ):
-            embeddings = await f
-            embeddings_list.append(embeddings)
+        embeddings_list = await asyncio.gather(*embedding_tasks)
+
        embeddings = np.concatenate(embeddings_list)
        for i, d in enumerate(list_data):
            d["content_vector"] = embeddings[i]
--- a/lightrag/llm/azure_openai.py
+++ b/lightrag/llm/azure_openai.py
@@ -1,46 +1,3 @@
-"""
-Azure OpenAI LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with aure openai's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - openai
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.azure_openai import azure_openai_model_complete, azure_openai_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
-
 import os
 import pipmaster as pm  # Pipmaster for dynamic library install

--- a/lightrag/llm/bedrock.py
+++ b/lightrag/llm/bedrock.py
@@ -1,46 +1,3 @@
-"""
-Bedrock LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with Bedrock's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - aioboto3, tenacity
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.bebrock import bebrock_model_complete, bebrock_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
-
 import copy
 import os
 import json
--- a/lightrag/llm/hf.py
+++ b/lightrag/llm/hf.py
@@ -1,47 +1,7 @@
-"""
-Hugging face LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with Hugging face's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - transformers
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.hf import hf_model_complete, hf_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
 import copy
 import os
+from functools import lru_cache
+
 import pipmaster as pm  # Pipmaster for dynamic library install

 # install specific modules
@@ -51,9 +11,12 @@ if not pm.is_installed("torch"):
    pm.install("torch")
 if not pm.is_installed("tenacity"):
    pm.install("tenacity")
+if not pm.is_installed("numpy"):
+    pm.install("numpy")
+if not pm.is_installed("tenacity"):
+    pm.install("tenacity")

 from transformers import AutoTokenizer, AutoModelForCausalLM
-from functools import lru_cache
 from tenacity import (
    retry,
    stop_after_attempt,
--- a/lightrag/llm/jina.py
+++ b/lightrag/llm/jina.py
@@ -1,43 +1,3 @@
-"""
-Jina Embedding Interface Module
-==========================
-
-This module provides interfaces for interacting with jina system,
-including embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added embedding generation
-
-Dependencies:
-    - tenacity
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.jina import jina_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
 import os
 import pipmaster as pm  # Pipmaster for dynamic library install

--- a/lightrag/llm/lmdeploy.py
+++ b/lightrag/llm/lmdeploy.py
@@ -1,45 +1,3 @@
-"""
-LMDeploy LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with LMDeploy's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - tenacity
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.lmdeploy import lmdeploy_model_complete, lmdeploy_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
 import pipmaster as pm  # Pipmaster for dynamic library install

 # install specific modules
--- a/lightrag/llm/lollms.py
+++ b/lightrag/llm/lollms.py
@@ -1,66 +1,3 @@
-"""
-LoLLMs (Lord of Large Language Models) Interface Module
-=====================================================
-
-This module provides the official interface for interacting with LoLLMs (Lord of Large Language and multimodal Systems),
-a unified framework for AI model interaction and deployment.
-
-LoLLMs is designed as a "one tool to rule them all" solution, providing seamless integration
-with various AI models while maintaining high performance and user-friendly interfaces.
-
-Author: ParisNeo
-Created: 2024-01-24
-License: Apache 2.0
-
-Copyright (c) 2024 ParisNeo
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-
-Version: 2.0.0
-
-Change Log:
- 2.0.0 (2024-01-24):
-    * Added async support for model inference
-    * Implemented streaming capabilities
-    * Added embedding generation functionality
-    * Enhanced parameter handling
-    * Improved error handling and timeout management
-
-Dependencies:
-    - aiohttp
-    - numpy
-    - Python >= 3.10
-
-Features:
-    - Async text generation with streaming support
-    - Embedding generation
-    - Configurable model parameters
-    - System prompt and chat history support
-    - Timeout handling
-    - API key authentication
-
-Usage:
-    from llm_interfaces.lollms import lollms_model_complete, lollms_embed
-
-Project Repository: https://github.com/ParisNeo/lollms
-Documentation: https://github.com/ParisNeo/lollms/docs
-"""
-
-__version__ = "1.0.0"
-__author__ = "ParisNeo"
-__status__ = "Production"
-__project_url__ = "https://github.com/ParisNeo/lollms"
-__doc_url__ = "https://github.com/ParisNeo/lollms/docs"
 import sys

 if sys.version_info < (3, 9):
--- a/lightrag/llm/nvidia_openai.py
+++ b/lightrag/llm/nvidia_openai.py
@@ -1,46 +1,3 @@
-"""
-OpenAI LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with openai's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - openai
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.nvidia_openai import nvidia_openai_model_complete, nvidia_openai_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
-
 import sys
 import os

@@ -48,6 +5,7 @@ if sys.version_info < (3, 9):
    pass
 else:
    pass
+
 import pipmaster as pm  # Pipmaster for dynamic library install

 # install specific modules
--- a/lightrag/llm/ollama.py
+++ b/lightrag/llm/ollama.py
@@ -1,51 +1,10 @@
-"""
-Ollama LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with Ollama's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - ollama
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.ollama_interface import ollama_model_complete, ollama_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
 import sys

 if sys.version_info < (3, 9):
    from typing import AsyncIterator
 else:
    from collections.abc import AsyncIterator
+
 import pipmaster as pm  # Pipmaster for dynamic library install

 # install specific modules
@@ -54,7 +13,9 @@ if not pm.is_installed("ollama"):
 if not pm.is_installed("tenacity"):
    pm.install("tenacity")

+
 import ollama
+
 from tenacity import (
    retry,
    stop_after_attempt,
@@ -67,7 +28,7 @@ from lightrag.exceptions import (
    APITimeoutError,
 )
 from lightrag.api import __api_version__
-from lightrag.utils import extract_reasoning
+
 import numpy as np
 from typing import Union

@@ -79,7 +40,7 @@ from typing import Union
        (RateLimitError, APIConnectionError, APITimeoutError)
    ),
 )
-async def ollama_model_if_cache(
+async def _ollama_model_if_cache(
    model,
    prompt,
    system_prompt=None,
@@ -87,7 +48,7 @@ async def ollama_model_if_cache(
    **kwargs,
 ) -> Union[str, AsyncIterator[str]]:
    stream = True if kwargs.get("stream") else False
-    reasoning_tag = kwargs.pop("reasoning_tag", None)
+
    kwargs.pop("max_tokens", None)
    # kwargs.pop("response_format", None) # allow json
    host = kwargs.pop("host", None)
@@ -125,11 +86,7 @@ async def ollama_model_if_cache(
        response and can simply be trimmed.
        """

-        return (
-            model_response
-            if reasoning_tag is None
-            else extract_reasoning(model_response, reasoning_tag).response_content
-        )
+        return model_response


 async def ollama_model_complete(
@@ -139,7 +96,7 @@ async def ollama_model_complete(
    if keyword_extraction:
        kwargs["format"] = "json"
    model_name = kwargs["hashing_kv"].global_config["llm_model_name"]
-    return await ollama_model_if_cache(
+    return await _ollama_model_if_cache(
        model_name,
        prompt,
        system_prompt=system_prompt,
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@@ -1,45 +1,3 @@
-"""
-OpenAI LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with openai's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - openai
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.openai import openai_model_complete, openai_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
 from ..utils import verbose_debug, VERBOSE_DEBUG
 import sys
 import os
--- a/lightrag/llm/siliconcloud.py
+++ b/lightrag/llm/siliconcloud.py
@@ -1,43 +1,3 @@
-"""
-SiliconCloud Embedding Interface Module
-==========================
-
-This module provides interfaces for interacting with SiliconCloud system,
-including embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added embedding generation
-
-Dependencies:
-    - tenacity
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.siliconcloud import siliconcloud_model_complete, siliconcloud_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
 import sys

 if sys.version_info < (3, 9):
--- a/lightrag/llm/zhipu.py
+++ b/lightrag/llm/zhipu.py
@@ -1,45 +1,3 @@
-"""
-Zhipu LLM Interface Module
-==========================
-
-This module provides interfaces for interacting with LMDeploy's language models,
-including text generation and embedding capabilities.
-
-Author: Lightrag team
-Created: 2024-01-24
-License: MIT License
-
-Copyright (c) 2024 Lightrag
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-Version: 1.0.0
-
-Change Log:
- 1.0.0 (2024-01-24): Initial release
-    * Added async chat completion support
-    * Added embedding generation
-    * Added stream response capability
-
-Dependencies:
-    - tenacity
-    - numpy
-    - pipmaster
-    - Python >= 3.10
-
-Usage:
-    from llm_interfaces.zhipu import zhipu_model_complete, zhipu_embed
-"""
-
-__version__ = "1.0.0"
-__author__ = "lightrag Team"
-__status__ = "Production"
-
 import sys
 import re
 import json
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -3,7 +3,6 @@ from __future__ import annotations
 import asyncio
 import json
 import re
-from tqdm.asyncio import tqdm as tqdm_async
 from typing import Any, AsyncIterator
 from collections import Counter, defaultdict
 from .utils import (
@@ -500,16 +499,8 @@ async def extract_entities(
        )
        return dict(maybe_nodes), dict(maybe_edges)

-    results = []
-    for result in tqdm_async(
-        asyncio.as_completed([_process_single_content(c) for c in ordered_chunks]),
-        total=len(ordered_chunks),
-        desc="Level 2 - Extracting entities and relationships",
-        unit="chunk",
-        position=1,
-        leave=False,
-    ):
-        results.append(await result)
+    tasks = [_process_single_content(c) for c in ordered_chunks]
+    results = await asyncio.gather(*tasks)

    maybe_nodes = defaultdict(list)
    maybe_edges = defaultdict(list)
@@ -518,41 +509,20 @@ async def extract_entities(
            maybe_nodes[k].extend(v)
        for k, v in m_edges.items():
            maybe_edges[tuple(sorted(k))].extend(v)
-    logger.debug("Inserting entities into storage...")
-    all_entities_data = []
-    for result in tqdm_async(
-        asyncio.as_completed(
-            [
-                _merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
-                for k, v in maybe_nodes.items()
-            ]
-        ),
-        total=len(maybe_nodes),
-        desc="Level 3 - Inserting entities",
-        unit="entity",
-        position=2,
-        leave=False,
-    ):
-        all_entities_data.append(await result)

-    logger.debug("Inserting relationships into storage...")
-    all_relationships_data = []
-    for result in tqdm_async(
-        asyncio.as_completed(
-            [
-                _merge_edges_then_upsert(
-                    k[0], k[1], v, knowledge_graph_inst, global_config
-                )
-                for k, v in maybe_edges.items()
-            ]
-        ),
-        total=len(maybe_edges),
-        desc="Level 3 - Inserting relationships",
-        unit="relationship",
-        position=3,
-        leave=False,
-    ):
-        all_relationships_data.append(await result)
+    all_entities_data = await asyncio.gather(
+        *[
+            _merge_nodes_then_upsert(k, v, knowledge_graph_inst, global_config)
+            for k, v in maybe_nodes.items()
+        ]
+    )
+
+    all_relationships_data = await asyncio.gather(
+        *[
+            _merge_edges_then_upsert(k[0], k[1], v, knowledge_graph_inst, global_config)
+            for k, v in maybe_edges.items()
+        ]
+    )

    if not len(all_entities_data) and not len(all_relationships_data):
        logger.warning(
--- a/lightrag/prompt.py
+++ b/lightrag/prompt.py
@@ -1,8 +1,9 @@
 from __future__ import annotations
+from typing import Any

 GRAPH_FIELD_SEP = "<SEP>"

-PROMPTS = {}
+PROMPTS: dict[str, Any] = {}

 PROMPTS["DEFAULT_LANGUAGE"] = "English"
 PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
--- a/lightrag/tools/lightrag_visualizer/graph_visualizer.py
+++ b/lightrag/tools/lightrag_visualizer/graph_visualizer.py
@@ -1,10 +1,3 @@
-"""
-3D GraphML Viewer using Dear ImGui and ModernGL
-Author: ParisNeo, ArnoChen
-Description: An interactive 3D GraphML viewer using imgui_bundle and ModernGL
-Version: 2.0
-"""
-
 from typing import Optional, Tuple, Dict, List
 import numpy as np
 import networkx as nx
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -13,13 +13,12 @@ from functools import wraps
 from hashlib import md5
 from typing import Any, Callable
 import xml.etree.ElementTree as ET
-import bs4
-
 import numpy as np
 import tiktoken

 from lightrag.prompt import PROMPTS

+
 VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true"


@@ -84,13 +83,6 @@ class EmbeddingFunc:
        return await self.func(*args, **kwargs)


-@dataclass
-class ReasoningResponse:
-    reasoning_content: str | None
-    response_content: str
-    tag: str
-
-
 def locate_json_string_body_from_string(content: str) -> str | None:
    """Locate the JSON string body from a string"""
    try:
@@ -721,28 +713,3 @@ def get_conversation_turns(
        )

    return "\n".join(formatted_turns)
-
-
-def extract_reasoning(response: str, tag: str) -> ReasoningResponse:
-    """Extract the reasoning section and the following section from the LLM response.
-
-    Args:
-        response: LLM response
-        tag: Tag to extract
-    Returns:
-        ReasoningResponse: Reasoning section and following section
-
-    """
-    soup = bs4.BeautifulSoup(response, "html.parser")
-
-    reasoning_section = soup.find(tag)
-    if reasoning_section is None:
-        return ReasoningResponse(None, response, tag)
-    reasoning_content = reasoning_section.get_text().strip()
-
-    after_reasoning_section = reasoning_section.next_sibling
-    if after_reasoning_section is None:
-        return ReasoningResponse(reasoning_content, "", tag)
-    after_reasoning_content = after_reasoning_section.get_text().strip()
-
-    return ReasoningResponse(reasoning_content, after_reasoning_content, tag)