diff --git a/examples/.env.oai.example b/examples/.env.oai.example
deleted file mode 100644
index cea86da2..00000000
--- a/examples/.env.oai.example
+++ /dev/null
@@ -1,7 +0,0 @@
-AZURE_OPENAI_API_VERSION=2024-08-01-preview
-AZURE_OPENAI_DEPLOYMENT=gpt-4o
-AZURE_OPENAI_API_KEY=myapikey
-AZURE_OPENAI_ENDPOINT=https://myendpoint.openai.azure.com
-
-AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
-AZURE_EMBEDDING_API_VERSION=2023-05-15
diff --git a/examples/batch_eval.py b/examples/batch_eval.py
deleted file mode 100644
index a85e1ede..00000000
--- a/examples/batch_eval.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import re
-import json
-import jsonlines
-
-from openai import OpenAI
-
-
-def batch_eval(query_file, result1_file, result2_file, output_file_path):
-    client = OpenAI()
-
-    with open(query_file, "r") as f:
-        data = f.read()
-
-    queries = re.findall(r"- Question \d+: (.+)", data)
-
-    with open(result1_file, "r") as f:
-        answers1 = json.load(f)
-    answers1 = [i["result"] for i in answers1]
-
-    with open(result2_file, "r") as f:
-        answers2 = json.load(f)
-    answers2 = [i["result"] for i in answers2]
-
-    requests = []
-    for i, (query, answer1, answer2) in enumerate(zip(queries, answers1, answers2)):
-        sys_prompt = """
-        ---Role---
-        You are an expert tasked with evaluating two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
-        """
-
-        prompt = f"""
-        You will evaluate two answers to the same question based on three criteria: **Comprehensiveness**, **Diversity**, and **Empowerment**.
-
-        - **Comprehensiveness**: How much detail does the answer provide to cover all aspects and details of the question?
-        - **Diversity**: How varied and rich is the answer in providing different perspectives and insights on the question?
-        - **Empowerment**: How well does the answer help the reader understand and make informed judgments about the topic?
-
-        For each criterion, choose the better answer (either Answer 1 or Answer 2) and explain why. Then, select an overall winner based on these three categories.
-
-        Here is the question:
-        {query}
-
-        Here are the two answers:
-
-        **Answer 1:**
-        {answer1}
-
-        **Answer 2:**
-        {answer2}
-
-        Evaluate both answers using the three criteria listed above and provide detailed explanations for each criterion.
-
-        Output your evaluation in the following JSON format:
-
-        {{
-            "Comprehensiveness": {{
-                "Winner": "[Answer 1 or Answer 2]",
-                "Explanation": "[Provide explanation here]"
-            }},
-            "Empowerment": {{
-                "Winner": "[Answer 1 or Answer 2]",
-                "Explanation": "[Provide explanation here]"
-            }},
-            "Overall Winner": {{
-                "Winner": "[Answer 1 or Answer 2]",
-                "Explanation": "[Summarize why this answer is the overall winner based on the three criteria]"
-            }}
-        }}
-        """
-
-        request_data = {
-            "custom_id": f"request-{i+1}",
-            "method": "POST",
-            "url": "/v1/chat/completions",
-            "body": {
-                "model": "gpt-4o-mini",
-                "messages": [
-                    {"role": "system", "content": sys_prompt},
-                    {"role": "user", "content": prompt},
-                ],
-            },
-        }
-
-        requests.append(request_data)
-
-    with jsonlines.open(output_file_path, mode="w") as writer:
-        for request in requests:
-            writer.write(request)
-
-    print(f"Batch API requests written to {output_file_path}")
-
-    batch_input_file = client.files.create(
-        file=open(output_file_path, "rb"), purpose="batch"
-    )
-    batch_input_file_id = batch_input_file.id
-
-    batch = client.batches.create(
-        input_file_id=batch_input_file_id,
-        endpoint="/v1/chat/completions",
-        completion_window="24h",
-        metadata={"description": "nightly eval job"},
-    )
-
-    print(f"Batch {batch.id} has been created.")
-
-
-if __name__ == "__main__":
-    batch_eval()
diff --git a/examples/generate_query.py b/examples/generate_query.py
deleted file mode 100644
index 705b23d3..00000000
--- a/examples/generate_query.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from openai import OpenAI
-
-# os.environ["OPENAI_API_KEY"] = ""
-
-
-def openai_complete_if_cache(
-    model="gpt-4o-mini", prompt=None, system_prompt=None, history_messages=[], **kwargs
-) -> str:
-    openai_client = OpenAI()
-
-    messages = []
-    if system_prompt:
-        messages.append({"role": "system", "content": system_prompt})
-    messages.extend(history_messages)
-    messages.append({"role": "user", "content": prompt})
-
-    response = openai_client.chat.completions.create(
-        model=model, messages=messages, **kwargs
-    )
-    return response.choices[0].message.content
-
-
-if __name__ == "__main__":
-    description = ""
-    prompt = f"""
-    Given the following description of a dataset:
-
-    {description}
-
-    Please identify 5 potential users who would engage with this dataset. For each user, list 5 tasks they would perform with this dataset. Then, for each (user, task) combination, generate 5 questions that require a high-level understanding of the entire dataset.
-
-    Output the results in the following structure:
-    - User 1: [user description]
-        - Task 1: [task description]
-            - Question 1:
-            - Question 2:
-            - Question 3:
-            - Question 4:
-            - Question 5:
-        - Task 2: [task description]
-            ...
-        - Task 5: [task description]
-    - User 2: [user description]
-        ...
-    - User 5: [user description]
-        ...
-    """
-
-    result = openai_complete_if_cache(model="gpt-4o-mini", prompt=prompt)
-
-    file_path = "./queries.txt"
-    with open(file_path, "w") as file:
-        file.write(result)
-
-    print(f"Queries written to {file_path}")
diff --git a/examples/get_all_edges_nx.py b/examples/get_all_edges_nx.py
deleted file mode 100644
index b2c1d84e..00000000
--- a/examples/get_all_edges_nx.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import networkx as nx
-
-G = nx.read_graphml("./dickensTestEmbedcall/graph_chunk_entity_relation.graphml")
-
-
-def get_all_edges_and_nodes(G):
-    # Get all edges and their properties
-    edges_with_properties = []
-    for u, v, data in G.edges(data=True):
-        edges_with_properties.append(
-            {
-                "start": u,
-                "end": v,
-                "label": data.get(
-                    "label", ""
-                ),  # Assuming 'label' is used for edge type
-                "properties": data,
-                "start_node_properties": G.nodes[u],
-                "end_node_properties": G.nodes[v],
-            }
-        )
-
-    return edges_with_properties
-
-
-# Example usage
-if __name__ == "__main__":
-    # Assume G is your NetworkX graph loaded from Neo4j
-
-    all_edges = get_all_edges_and_nodes(G)
-
-    # Print all edges and node properties
-    for edge in all_edges:
-        print(f"Edge Label: {edge['label']}")
-        print(f"Edge Properties: {edge['properties']}")
-        print(f"Start Node: {edge['start']}")
-        print(f"Start Node Properties: {edge['start_node_properties']}")
-        print(f"End Node: {edge['end']}")
-        print(f"End Node Properties: {edge['end_node_properties']}")
-        print("---")
diff --git a/examples/openai_README.md b/examples/openai_README.md
deleted file mode 100644
index e2d8d42e..00000000
--- a/examples/openai_README.md
+++ /dev/null
@@ -1,114 +0,0 @@
-
-## API Server Implementation
-
-LightRAG also provides a FastAPI-based server implementation for RESTful API access to RAG operations. This allows you to run LightRAG as a service and interact with it through HTTP requests.
-
-### Setting up the API Server
-<details>
-<summary>Click to expand setup instructions</summary>
-
-1. First, ensure you have the required dependencies:
-```bash
-pip install fastapi uvicorn pydantic
-```
-
-2. Set up your environment variables:
-```bash
-export RAG_DIR="your_index_directory"  # Optional: Defaults to "index_default"
-export OPENAI_BASE_URL="Your OpenAI API base URL"  # Optional: Defaults to "https://api.openai.com/v1"
-export OPENAI_API_KEY="Your OpenAI API key"  # Required
-export LLM_MODEL="Your LLM model" # Optional: Defaults to "gpt-4o-mini"
-export EMBEDDING_MODEL="Your embedding model" # Optional: Defaults to "text-embedding-3-large"
-```
-
-3. Run the API server:
-```bash
-python examples/lightrag_api_openai_compatible_demo.py
-```
-
-The server will start on `http://0.0.0.0:8020`.
-</details>
-
-### API Endpoints
-
-The API server provides the following endpoints:
-
-#### 1. Query Endpoint
-<details>
-<summary>Click to view Query endpoint details</summary>
-
-- **URL:** `/query`
-- **Method:** POST
-- **Body:**
-```json
-{
-    "query": "Your question here",
-    "mode": "hybrid",  // Can be "naive", "local", "global", or "hybrid"
-    "only_need_context": true // Optional: Defaults to false, if true, only the referenced context will be returned, otherwise the llm answer will be returned
-}
-```
-- **Example:**
-```bash
-curl -X POST "http://127.0.0.1:8020/query" \
-     -H "Content-Type: application/json" \
-     -d '{"query": "What are the main themes?", "mode": "hybrid"}'
-```
-</details>
-
-#### 2. Insert Text Endpoint
-<details>
-<summary>Click to view Insert Text endpoint details</summary>
-
-- **URL:** `/insert`
-- **Method:** POST
-- **Body:**
-```json
-{
-    "text": "Your text content here"
-}
-```
-- **Example:**
-```bash
-curl -X POST "http://127.0.0.1:8020/insert" \
-     -H "Content-Type: application/json" \
-     -d '{"text": "Content to be inserted into RAG"}'
-```
-</details>
-
-#### 3. Insert File Endpoint
-<details>
-<summary>Click to view Insert File endpoint details</summary>
-
-- **URL:** `/insert_file`
-- **Method:** POST
-- **Body:**
-```json
-{
-    "file_path": "path/to/your/file.txt"
-}
-```
-- **Example:**
-```bash
-curl -X POST "http://127.0.0.1:8020/insert_file" \
-     -H "Content-Type: application/json" \
-     -d '{"file_path": "./book.txt"}'
-```
-</details>
-
-#### 4. Health Check Endpoint
-<details>
-<summary>Click to view Health Check endpoint details</summary>
-
-- **URL:** `/health`
-- **Method:** GET
-- **Example:**
-```bash
-curl -X GET "http://127.0.0.1:8020/health"
-```
-</details>
-
-### Configuration
-
-The API server can be configured using environment variables:
-- `RAG_DIR`: Directory for storing the RAG index (default: "index_default")
-- API keys and base URLs should be configured in the code for your specific LLM and embedding model providers
diff --git a/examples/openai_README_zh.md b/examples/openai_README_zh.md
deleted file mode 100644
index 068b2caf..00000000
--- a/examples/openai_README_zh.md
+++ /dev/null
@@ -1,115 +0,0 @@
-
-## API 服务器实现
-
-LightRAG also provides a FastAPI-based server implementation for RESTful API access to RAG operations. This allows you to run LightRAG as a service and interact with it through HTTP requests.
-LightRAG 还提供基于 FastAPI 的服务器实现，用于对 RAG 操作进行 RESTful API 访问。这允许您将 LightRAG 作为服务运行并通过 HTTP 请求与其交互。
-
-### 设置 API 服务器
-<details>
-<summary>单击展开设置说明</summary>
-
-1. 首先，确保您具有所需的依赖项:
-```bash
-pip install fastapi uvicorn pydantic
-```
-
-2. 设置您的环境变量:
-```bash
-export RAG_DIR="your_index_directory"  # Optional: Defaults to "index_default"
-export OPENAI_BASE_URL="Your OpenAI API base URL"  # Optional: Defaults to "https://api.openai.com/v1"
-export OPENAI_API_KEY="Your OpenAI API key"  # Required
-export LLM_MODEL="Your LLM model" # Optional: Defaults to "gpt-4o-mini"
-export EMBEDDING_MODEL="Your embedding model" # Optional: Defaults to "text-embedding-3-large"
-```
-
-3. 运行API服务器:
-```bash
-python examples/lightrag_api_openai_compatible_demo.py
-```
-
-服务器将启动于 `http://0.0.0.0:8020`.
-</details>
-
-### API端点
-
-API服务器提供以下端点:
-
-#### 1. 查询端点
-<details>
-<summary>点击查看查询端点详情</summary>
-
-- **URL:** `/query`
-- **Method:** POST
-- **Body:**
-```json
-{
-    "query": "Your question here",
-    "mode": "hybrid",  // Can be "naive", "local", "global", or "hybrid"
-    "only_need_context": true // Optional: Defaults to false, if true, only the referenced context will be returned, otherwise the llm answer will be returned
-}
-```
-- **Example:**
-```bash
-curl -X POST "http://127.0.0.1:8020/query" \
-     -H "Content-Type: application/json" \
-     -d '{"query": "What are the main themes?", "mode": "hybrid"}'
-```
-</details>
-
-#### 2. 插入文本端点
-<details>
-<summary>单击可查看插入文本端点详细信息</summary>
-
-- **URL:** `/insert`
-- **Method:** POST
-- **Body:**
-```json
-{
-    "text": "Your text content here"
-}
-```
-- **Example:**
-```bash
-curl -X POST "http://127.0.0.1:8020/insert" \
-     -H "Content-Type: application/json" \
-     -d '{"text": "Content to be inserted into RAG"}'
-```
-</details>
-
-#### 3. 插入文件端点
-<details>
-<summary>单击查看插入文件端点详细信息</summary>
-
-- **URL:** `/insert_file`
-- **Method:** POST
-- **Body:**
-```json
-{
-    "file_path": "path/to/your/file.txt"
-}
-```
-- **Example:**
-```bash
-curl -X POST "http://127.0.0.1:8020/insert_file" \
-     -H "Content-Type: application/json" \
-     -d '{"file_path": "./book.txt"}'
-```
-</details>
-
-#### 4. 健康检查端点
-<details>
-<summary>点击查看健康检查端点详细信息</summary>
-
-- **URL:** `/health`
-- **Method:** GET
-- **Example:**
-```bash
-curl -X GET "http://127.0.0.1:8020/health"
-```
-</details>
-
-### 配置
-
-可以使用环境变量配置API服务器:
-- `RAG_DIR`: 存放RAG索引的目录 (default: "index_default")
-- 应在代码中为您的特定 LLM 和嵌入模型提供商配置 API 密钥和基本 URL
diff --git a/examples/test.py b/examples/test.py
deleted file mode 100644
index f2456436..00000000
--- a/examples/test.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import os
-import asyncio
-from lightrag import LightRAG, QueryParam
-from lightrag.llm.openai import gpt_4o_mini_complete
-from lightrag.kg.shared_storage import initialize_pipeline_status
-#########
-# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
-# import nest_asyncio
-# nest_asyncio.apply()
-#########
-
-WORKING_DIR = "./dickens"
-
-if not os.path.exists(WORKING_DIR):
-    os.mkdir(WORKING_DIR)
-
-
-async def initialize_rag():
-    rag = LightRAG(
-        working_dir=WORKING_DIR,
-        llm_model_func=gpt_4o_mini_complete,  # Use gpt_4o_mini_complete LLM model
-        # llm_model_func=gpt_4o_complete  # Optionally, use a stronger model
-    )
-
-    await rag.initialize_storages()
-    await initialize_pipeline_status()
-
-    return rag
-
-
-def main():
-    # Initialize RAG instance
-    rag = asyncio.run(initialize_rag())
-
-    with open("./book.txt", "r", encoding="utf-8") as f:
-        rag.insert(f.read())
-
-    # Perform naive search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="naive")
-        )
-    )
-
-    # Perform local search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="local")
-        )
-    )
-
-    # Perform global search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="global")
-        )
-    )
-
-    # Perform hybrid search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="hybrid")
-        )
-    )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/test_chromadb.py b/examples/test_chromadb.py
deleted file mode 100644
index e4e9b698..00000000
--- a/examples/test_chromadb.py
+++ /dev/null
@@ -1,158 +0,0 @@
-import os
-import asyncio
-from lightrag import LightRAG, QueryParam
-from lightrag.llm.openai import gpt_4o_mini_complete, openai_embed
-from lightrag.utils import EmbeddingFunc
-import numpy as np
-from lightrag.kg.shared_storage import initialize_pipeline_status
-
-#########
-# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
-# import nest_asyncio
-# nest_asyncio.apply()
-#########
-WORKING_DIR = "./chromadb_test_dir"
-if not os.path.exists(WORKING_DIR):
-    os.mkdir(WORKING_DIR)
-
-# ChromaDB Configuration
-CHROMADB_USE_LOCAL_PERSISTENT = False
-# Local PersistentClient Configuration
-CHROMADB_LOCAL_PATH = os.environ.get(
-    "CHROMADB_LOCAL_PATH", os.path.join(WORKING_DIR, "chroma_data")
-)
-# Remote HttpClient Configuration
-CHROMADB_HOST = os.environ.get("CHROMADB_HOST", "localhost")
-CHROMADB_PORT = int(os.environ.get("CHROMADB_PORT", 8000))
-CHROMADB_AUTH_TOKEN = os.environ.get("CHROMADB_AUTH_TOKEN", "secret-token")
-CHROMADB_AUTH_PROVIDER = os.environ.get(
-    "CHROMADB_AUTH_PROVIDER", "chromadb.auth.token_authn.TokenAuthClientProvider"
-)
-CHROMADB_AUTH_HEADER = os.environ.get("CHROMADB_AUTH_HEADER", "X-Chroma-Token")
-
-# Embedding Configuration and Functions
-EMBEDDING_MODEL = os.environ.get("EMBEDDING_MODEL", "text-embedding-3-large")
-EMBEDDING_MAX_TOKEN_SIZE = int(os.environ.get("EMBEDDING_MAX_TOKEN_SIZE", 8192))
-
-# ChromaDB requires knowing the dimension of embeddings upfront when
-# creating a collection. The embedding dimension is model-specific
-# (e.g. text-embedding-3-large uses 3072 dimensions)
-# we dynamically determine it by running a test embedding
-# and then pass it to the ChromaDBStorage class
-
-
-async def embedding_func(texts: list[str]) -> np.ndarray:
-    return await openai_embed(
-        texts,
-        model=EMBEDDING_MODEL,
-    )
-
-
-async def get_embedding_dimension():
-    test_text = ["This is a test sentence."]
-    embedding = await embedding_func(test_text)
-    return embedding.shape[1]
-
-
-async def create_embedding_function_instance():
-    # Get embedding dimension
-    embedding_dimension = await get_embedding_dimension()
-    # Create embedding function instance
-    return EmbeddingFunc(
-        embedding_dim=embedding_dimension,
-        max_token_size=EMBEDDING_MAX_TOKEN_SIZE,
-        func=embedding_func,
-    )
-
-
-async def initialize_rag():
-    embedding_func_instance = await create_embedding_function_instance()
-    if CHROMADB_USE_LOCAL_PERSISTENT:
-        rag = LightRAG(
-            working_dir=WORKING_DIR,
-            llm_model_func=gpt_4o_mini_complete,
-            embedding_func=embedding_func_instance,
-            vector_storage="ChromaVectorDBStorage",
-            log_level="DEBUG",
-            embedding_batch_num=32,
-            vector_db_storage_cls_kwargs={
-                "local_path": CHROMADB_LOCAL_PATH,
-                "collection_settings": {
-                    "hnsw:space": "cosine",
-                    "hnsw:construction_ef": 128,
-                    "hnsw:search_ef": 128,
-                    "hnsw:M": 16,
-                    "hnsw:batch_size": 100,
-                    "hnsw:sync_threshold": 1000,
-                },
-            },
-        )
-    else:
-        rag = LightRAG(
-            working_dir=WORKING_DIR,
-            llm_model_func=gpt_4o_mini_complete,
-            embedding_func=embedding_func_instance,
-            vector_storage="ChromaVectorDBStorage",
-            log_level="DEBUG",
-            embedding_batch_num=32,
-            vector_db_storage_cls_kwargs={
-                "host": CHROMADB_HOST,
-                "port": CHROMADB_PORT,
-                "auth_token": CHROMADB_AUTH_TOKEN,
-                "auth_provider": CHROMADB_AUTH_PROVIDER,
-                "auth_header_name": CHROMADB_AUTH_HEADER,
-                "collection_settings": {
-                    "hnsw:space": "cosine",
-                    "hnsw:construction_ef": 128,
-                    "hnsw:search_ef": 128,
-                    "hnsw:M": 16,
-                    "hnsw:batch_size": 100,
-                    "hnsw:sync_threshold": 1000,
-                },
-            },
-        )
-
-    await rag.initialize_storages()
-    await initialize_pipeline_status()
-
-    return rag
-
-
-def main():
-    # Initialize RAG instance
-    rag = asyncio.run(initialize_rag())
-
-    with open("./book.txt", "r", encoding="utf-8") as f:
-        rag.insert(f.read())
-
-    # Perform naive search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="naive")
-        )
-    )
-
-    # Perform local search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="local")
-        )
-    )
-
-    # Perform global search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="global")
-        )
-    )
-
-    # Perform hybrid search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="hybrid")
-        )
-    )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/test_faiss.py b/examples/test_faiss.py
deleted file mode 100644
index febdce14..00000000
--- a/examples/test_faiss.py
+++ /dev/null
@@ -1,108 +0,0 @@
-import os
-import logging
-import asyncio
-import numpy as np
-
-from dotenv import load_dotenv
-from sentence_transformers import SentenceTransformer
-
-from openai import AzureOpenAI
-from lightrag import LightRAG, QueryParam
-from lightrag.utils import EmbeddingFunc
-from lightrag.kg.shared_storage import initialize_pipeline_status
-
-WORKING_DIR = "./dickens"
-# Configure Logging
-logging.basicConfig(level=logging.INFO)
-
-# Load environment variables from .env file
-load_dotenv()
-AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
-AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT")
-AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
-AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
-
-
-async def llm_model_func(
-    prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
-) -> str:
-    # Create a client for AzureOpenAI
-    client = AzureOpenAI(
-        api_key=AZURE_OPENAI_API_KEY,
-        api_version=AZURE_OPENAI_API_VERSION,
-        azure_endpoint=AZURE_OPENAI_ENDPOINT,
-    )
-
-    # Build the messages list for the conversation
-    messages = []
-    if system_prompt:
-        messages.append({"role": "system", "content": system_prompt})
-    if history_messages:
-        messages.extend(history_messages)
-    messages.append({"role": "user", "content": prompt})
-
-    # Call the LLM
-    chat_completion = client.chat.completions.create(
-        model=AZURE_OPENAI_DEPLOYMENT,
-        messages=messages,
-        temperature=kwargs.get("temperature", 0),
-        top_p=kwargs.get("top_p", 1),
-        n=kwargs.get("n", 1),
-    )
-
-    return chat_completion.choices[0].message.content
-
-
-async def embedding_func(texts: list[str]) -> np.ndarray:
-    model = SentenceTransformer("all-MiniLM-L6-v2")
-    embeddings = model.encode(texts, convert_to_numpy=True)
-    return embeddings
-
-
-async def initialize_rag():
-    rag = LightRAG(
-        working_dir=WORKING_DIR,
-        llm_model_func=llm_model_func,
-        embedding_func=EmbeddingFunc(
-            embedding_dim=384,
-            max_token_size=8192,
-            func=embedding_func,
-        ),
-        vector_storage="FaissVectorDBStorage",
-        vector_db_storage_cls_kwargs={
-            "cosine_better_than_threshold": 0.2  # Your desired threshold
-        },
-    )
-
-    await rag.initialize_storages()
-    await initialize_pipeline_status()
-
-    return rag
-
-
-def main():
-    # Initialize RAG instance
-    rag = asyncio.run(initialize_rag())
-    # Insert the custom chunks into LightRAG
-    book1 = open("./book_1.txt", encoding="utf-8")
-    book2 = open("./book_2.txt", encoding="utf-8")
-
-    rag.insert([book1.read(), book2.read()])
-
-    query_text = "What are the main themes?"
-
-    print("Result (Naive):")
-    print(rag.query(query_text, param=QueryParam(mode="naive")))
-
-    print("\nResult (Local):")
-    print(rag.query(query_text, param=QueryParam(mode="local")))
-
-    print("\nResult (Global):")
-    print(rag.query(query_text, param=QueryParam(mode="global")))
-
-    print("\nResult (Hybrid):")
-    print(rag.query(query_text, param=QueryParam(mode="hybrid")))
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/test_neo4j.py b/examples/test_neo4j.py
deleted file mode 100644
index 7f620acc..00000000
--- a/examples/test_neo4j.py
+++ /dev/null
@@ -1,71 +0,0 @@
-import os
-import asyncio
-from lightrag import LightRAG, QueryParam
-from lightrag.llm.openai import gpt_4o_mini_complete
-from lightrag.kg.shared_storage import initialize_pipeline_status
-
-#########
-# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
-# import nest_asyncio
-# nest_asyncio.apply()
-#########
-
-WORKING_DIR = "./local_neo4jWorkDir"
-
-if not os.path.exists(WORKING_DIR):
-    os.mkdir(WORKING_DIR)
-
-
-async def initialize_rag():
-    rag = LightRAG(
-        working_dir=WORKING_DIR,
-        llm_model_func=gpt_4o_mini_complete,  # Use gpt_4o_mini_complete LLM model
-        graph_storage="Neo4JStorage",
-        log_level="INFO",
-        # llm_model_func=gpt_4o_complete  # Optionally, use a stronger model
-    )
-
-    await rag.initialize_storages()
-    await initialize_pipeline_status()
-
-    return rag
-
-
-def main():
-    # Initialize RAG instance
-    rag = asyncio.run(initialize_rag())
-
-    with open("./book.txt", "r", encoding="utf-8") as f:
-        rag.insert(f.read())
-
-    # Perform naive search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="naive")
-        )
-    )
-
-    # Perform local search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="local")
-        )
-    )
-
-    # Perform global search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="global")
-        )
-    )
-
-    # Perform hybrid search
-    print(
-        rag.query(
-            "What are the top themes in this story?", param=QueryParam(mode="hybrid")
-        )
-    )
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/test_postgres.py b/examples/test_postgres.py
deleted file mode 100644
index e1f796c6..00000000
--- a/examples/test_postgres.py
+++ /dev/null
@@ -1,51 +0,0 @@
-import os
-import asyncio
-from lightrag.kg.postgres_impl import PGGraphStorage
-from lightrag.llm.ollama import ollama_embedding
-from lightrag.utils import EmbeddingFunc
-
-#########
-# Uncomment the below two lines if running in a jupyter notebook to handle the async nature of rag.insert()
-# import nest_asyncio
-# nest_asyncio.apply()
-#########
-
-WORKING_DIR = "./local_neo4jWorkDir"
-
-if not os.path.exists(WORKING_DIR):
-    os.mkdir(WORKING_DIR)
-
-# AGE
-os.environ["AGE_GRAPH_NAME"] = "dickens"
-
-os.environ["POSTGRES_HOST"] = "localhost"
-os.environ["POSTGRES_PORT"] = "15432"
-os.environ["POSTGRES_USER"] = "rag"
-os.environ["POSTGRES_PASSWORD"] = "rag"
-os.environ["POSTGRES_DATABASE"] = "rag"
-
-
-async def main():
-    graph_db = PGGraphStorage(
-        namespace="dickens",
-        embedding_func=EmbeddingFunc(
-            embedding_dim=1024,
-            max_token_size=8192,
-            func=lambda texts: ollama_embedding(
-                texts, embed_model="bge-m3", host="http://localhost:11434"
-            ),
-        ),
-        global_config={},
-    )
-    await graph_db.initialize()
-    labels = await graph_db.get_all_labels()
-    print("all labels", labels)
-
-    res = await graph_db.get_knowledge_graph("FEZZIWIG")
-    print("knowledge graphs", res)
-
-    await graph_db.finalize()
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/examples/vram_management_demo.py b/examples/vram_management_demo.py
deleted file mode 100644
index 36eb5468..00000000
--- a/examples/vram_management_demo.py
+++ /dev/null
@@ -1,121 +0,0 @@
-import os
-import time
-import asyncio
-from lightrag import LightRAG, QueryParam
-from lightrag.llm.ollama import ollama_model_complete, ollama_embed
-from lightrag.utils import EmbeddingFunc
-from lightrag.kg.shared_storage import initialize_pipeline_status
-
-# Working directory and the directory path for text files
-WORKING_DIR = "./dickens"
-TEXT_FILES_DIR = "/llm/mt"
-
-# Create the working directory if it doesn't exist
-if not os.path.exists(WORKING_DIR):
-    os.mkdir(WORKING_DIR)
-
-
-async def initialize_rag():
-    # Initialize LightRAG
-    rag = LightRAG(
-        working_dir=WORKING_DIR,
-        llm_model_func=ollama_model_complete,
-        llm_model_name="qwen2.5:3b-instruct-max-context",
-        embedding_func=EmbeddingFunc(
-            embedding_dim=768,
-            max_token_size=8192,
-            func=lambda texts: ollama_embed(texts, embed_model="nomic-embed-text"),
-        ),
-    )
-    await rag.initialize_storages()
-    await initialize_pipeline_status()
-
-    return rag
-
-
-# Read all .txt files from the TEXT_FILES_DIR directory
-texts = []
-for filename in os.listdir(TEXT_FILES_DIR):
-    if filename.endswith(".txt"):
-        file_path = os.path.join(TEXT_FILES_DIR, filename)
-        with open(file_path, "r", encoding="utf-8") as file:
-            texts.append(file.read())
-
-
-# Batch insert texts into LightRAG with a retry mechanism
-def insert_texts_with_retry(rag, texts, retries=3, delay=5):
-    for _ in range(retries):
-        try:
-            rag.insert(texts)
-            return
-        except Exception as e:
-            print(
-                f"Error occurred during insertion: {e}. Retrying in {delay} seconds..."
-            )
-            time.sleep(delay)
-    raise RuntimeError("Failed to insert texts after multiple retries.")
-
-
-def main():
-    # Initialize RAG instance
-    rag = asyncio.run(initialize_rag())
-
-    insert_texts_with_retry(rag, texts)
-
-    # Perform different types of queries and handle potential errors
-    try:
-        print(
-            rag.query(
-                "What are the top themes in this story?", param=QueryParam(mode="naive")
-            )
-        )
-    except Exception as e:
-        print(f"Error performing naive search: {e}")
-
-    try:
-        print(
-            rag.query(
-                "What are the top themes in this story?", param=QueryParam(mode="local")
-            )
-        )
-    except Exception as e:
-        print(f"Error performing local search: {e}")
-
-    try:
-        print(
-            rag.query(
-                "What are the top themes in this story?",
-                param=QueryParam(mode="global"),
-            )
-        )
-    except Exception as e:
-        print(f"Error performing global search: {e}")
-
-    try:
-        print(
-            rag.query(
-                "What are the top themes in this story?",
-                param=QueryParam(mode="hybrid"),
-            )
-        )
-    except Exception as e:
-        print(f"Error performing hybrid search: {e}")
-
-    # Function to clear VRAM resources
-    def clear_vram():
-        os.system("sudo nvidia-smi --gpu-reset")
-
-    # Regularly clear VRAM to prevent overflow
-    clear_vram_interval = 3600  # Clear once every hour
-    start_time = time.time()
-
-    while True:
-        current_time = time.time()
-        if current_time - start_time > clear_vram_interval:
-            clear_vram()
-            start_time = current_time
-        time.sleep(60)  # Check the time every minute
-
-
-if __name__ == "__main__":
-    main()