Merge branch 'main' into graph-storage-batch-query

2025-04-12 22:03:54 +08:00
parent d11f346cd5 ecfe4209c3
commit e6f9a7fcf8
50 changed files with 1307 additions and 940 deletions
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
 <center><h2>🚀 LightRAG: Simple and Fast Retrieval-Augmented Generation</h2></center>
+
 <div align="center">
 <table border="0" width="100%">
 <tr>
@@ -6,6 +7,7 @@
 <img src="./assets/logo.png" width="80" height="80" alt="lightrag">
 </td>
 <td>
+
 <div>
    <p>
        <a href='https://lightrag.github.io'><img src='https://img.shields.io/badge/Project-Page-Green'></a>
@@ -32,9 +34,13 @@

 </div>

+<div align="center">
+    <a href="https://trendshift.io/repositories/13043" target="_blank"><img src="https://trendshift.io/api/badge/repositories/13043" alt="HKUDS%2FLightRAG | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>
+</div>
+
 ## 🎉 News

- [X] [2025.03.18]🎯📢LightRAG now supports citation functionality.
+- [X] [2025.03.18]🎯📢LightRAG now supports citation functionality, enabling proper source attribution.
 - [X] [2025.02.05]🎯📢Our team has released [VideoRAG](https://github.com/HKUDS/VideoRAG) understanding extremely long-context videos.
 - [X] [2025.01.13]🎯📢Our team has released [MiniRAG](https://github.com/HKUDS/MiniRAG) making RAG simpler with small models.
 - [X] [2025.01.06]🎯📢You can now [use PostgreSQL for Storage](#using-postgresql-for-storage).
--- a/env.example
+++ b/env.example
@@ -40,15 +40,18 @@ WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
 # MAX_TOKEN_ENTITY_DESC=4000

 ### Settings for document indexing
-ENABLE_LLM_CACHE_FOR_EXTRACT=true
 SUMMARY_LANGUAGE=English
 # CHUNK_SIZE=1200
 # CHUNK_OVERLAP_SIZE=100
-### Max tokens for entity or relations summary
-# MAX_TOKEN_SUMMARY=500
+
 ### Number of parallel processing documents in one patch
 # MAX_PARALLEL_INSERT=2

+### Max tokens for entity/relations description after merge
+# MAX_TOKEN_SUMMARY=500
+### Number of entities/edges to trigger LLM re-summary on merge ( at least 3 is recommented)
+# FORCE_LLM_SUMMARY_ON_MERGE=6
+
 ### Num of chunks send to Embedding in single request
 # EMBEDDING_BATCH_NUM=32
 ### Max concurrency requests for Embedding
@@ -64,6 +67,8 @@ TEMPERATURE=0.5
 MAX_ASYNC=4
 ### Max tokens send to LLM (less than context size of the model)
 MAX_TOKENS=32768
+ENABLE_LLM_CACHE=true
+ENABLE_LLM_CACHE_FOR_EXTRACT=true

 ### Ollama example (For local services installed with docker, you can use host.docker.internal as host)
 LLM_BINDING=ollama
--- a/lightrag/init.py
+++ b/lightrag/init.py
@@ -1,5 +1,5 @@
 from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam

-__version__ = "1.3.1"
+__version__ = "1.3.2"
 __author__ = "Zirui Guo"
 __url__ = "https://github.com/HKUDS/LightRAG"
--- a/lightrag/api/README-zh.md
+++ b/lightrag/api/README-zh.md
@@ -102,6 +102,10 @@ lightrag-gunicorn --workers 4
 - `--log-level`：日志级别（默认：INFO）
 - --input-dir：指定要扫描文档的目录（默认：./input）

+> ** 要求将.env文件置于启动目录中是经过特意设计的**。 这样做的目的是支持用户同时启动多个LightRAG实例，并为不同实例配置不同的.env文件。
+
+> **修改.env文件后，您需要重新打开终端以使新设置生效**。 这是因为每次启动时，LightRAG Server会将.env文件中的环境变量加载至系统环境变量，且系统环境变量的设置具有更高优先级。
+
 ### 启动时自动扫描

 当使用 `--auto-scan-at-startup` 参数启动任何服务器时，系统将自动：
@@ -164,7 +168,7 @@ sudo systemctl enable lightrag.service

 ### 将 Open WebUI 连接到 LightRAG

-启动 lightrag-server 后，您可以在 Open WebUI 管理面板中添加 Ollama 类型的连接。然后，一个名为 lightrag:latest 的模型将出现在 Open WebUI 的模型管理界面中。用户随后可以通过聊天界面向 LightRAG 发送查询。对于这种用例，最好将 LightRAG 安装为服务。
+启动 lightrag-server 后，您可以在 Open WebUI 管理面板中添加 Ollama 类型的连接。然后，一个名为 `lightrag:latest` 的模型将出现在 Open WebUI 的模型管理界面中。用户随后可以通过聊天界面向 LightRAG 发送查询。对于这种用例，最好将 LightRAG 安装为服务。

 Open WebUI 使用 LLM 来执行会话标题和会话关键词生成任务。因此，Ollama 聊天补全 API 会检测并将 OpenWebUI 会话相关请求直接转发给底层 LLM。Open WebUI 的截图：

@@ -172,6 +176,8 @@ Open WebUI 使用 LLM 来执行会话标题和会话关键词生成任务。因

 ### 在聊天中选择查询模式

+如果您从 LightRAG 的 Ollama 接口发送消息（查询），默认查询模式是 `hybrid`。您可以通过发送带有查询前缀的消息来选择查询模式。
+
 查询字符串中的查询前缀可以决定使用哪种 LightRAG 查询模式来生成响应。支持的前缀包括：

 ```
@@ -180,13 +186,22 @@ Open WebUI 使用 LLM 来执行会话标题和会话关键词生成任务。因
 /hybrid
 /naive
 /mix
+
 /bypass
+/context
+/localcontext
+/globalcontext
+/hybridcontext
+/naivecontext
+/mixcontext
 ```

 例如，聊天消息 "/mix 唐僧有几个徒弟" 将触发 LightRAG 的混合模式查询。没有查询前缀的聊天消息默认会触发混合模式查询。

 "/bypass" 不是 LightRAG 查询模式，它会告诉 API 服务器将查询连同聊天历史直接传递给底层 LLM。因此用户可以使用 LLM 基于聊天历史回答问题。如果您使用 Open WebUI 作为前端，您可以直接切换到普通 LLM 模型，而不是使用 /bypass 前缀。

+"/context" 也不是 LightRAG 查询模式，它会告诉 LightRAG 只返回为 LLM 准备的上下文信息。您可以检查上下文是否符合您的需求，或者自行处理上下文。
+
 ## API 密钥和认证

 默认情况下，LightRAG 服务器可以在没有任何认证的情况下访问。我们可以使用 API 密钥或账户凭证配置服务器以确保其安全。
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@@ -1,6 +1,6 @@
 # LightRAG Server and WebUI

-The LightRAG Server is designed to provide Web UI and API support. The Web UI facilitates document indexing, knowledge graph exploration, and a simple RAG query interface. LightRAG Server also provide an Ollama compatible interfaces, aiming to emulate LightRAG as an Ollama chat model. This allows AI chat bot, such as Open WebUI, to access LightRAG easily.
+The LightRAG Server is designed to provide a Web UI and API support. The Web UI facilitates document indexing, knowledge graph exploration, and a simple RAG query interface. LightRAG Server also provides an Ollama-compatible interface, aiming to emulate LightRAG as an Ollama chat model. This allows AI chat bots, such as Open WebUI, to access LightRAG easily.

 ![image-20250323122538997](./README.assets/image-20250323122538997.png)

@@ -8,17 +8,17 @@ The LightRAG Server is designed to provide Web UI and API support. The Web UI fa

 ![image-20250323123011220](./README.assets/image-20250323123011220.png)

-## Getting Start
+## Getting Started

 ### Installation

-* Install from PyPI
+*   Install from PyPI

 ```bash
 pip install "lightrag-hku[api]"
 ```

-* Installation from Source
+*   Installation from Source

 ```bash
 # Clone the repository
@@ -27,7 +27,7 @@ git clone https://github.com/HKUDS/lightrag.git
 # Change to the repository directory
 cd lightrag

-# create a Python virtual enviroment if neccesary
+# create a Python virtual environment if necessary
 # Install in editable mode with API support
 pip install -e ".[api]"
 ```
@@ -36,23 +36,23 @@ pip install -e ".[api]"

 LightRAG necessitates the integration of both an LLM (Large Language Model) and an Embedding Model to effectively execute document indexing and querying operations. Prior to the initial deployment of the LightRAG server, it is essential to configure the settings for both the LLM and the Embedding Model. LightRAG supports binding to various LLM/Embedding backends:

-* ollama
-* lollms
-* openai or openai compatible
-* azure_openai
+*   ollama
+*   lollms
+*   openai or openai compatible
+*   azure_openai

 It is recommended to use environment variables to configure the LightRAG Server. There is an example environment variable file named `env.example` in the root directory of the project. Please copy this file to the startup directory and rename it to `.env`. After that, you can modify the parameters related to the LLM and Embedding models in the `.env` file. It is important to note that the LightRAG Server will load the environment variables from `.env` into the system environment variables each time it starts. Since the LightRAG Server will prioritize the settings in the system environment variables, if you modify the `.env` file after starting the LightRAG Server via the command line, you need to execute `source .env` to make the new settings take effect.

-Here are some examples of common settings for LLM and Embedding models：
+Here are some examples of common settings for LLM and Embedding models:

-* OpenAI LLM + Ollama Embedding
+*   OpenAI LLM + Ollama Embedding:

 ```
 LLM_BINDING=openai
 LLM_MODEL=gpt-4o
 LLM_BINDING_HOST=https://api.openai.com/v1
 LLM_BINDING_API_KEY=your_api_key
-### Max tokens send to LLM (less than model context size)
+### Max tokens sent to LLM (less than model context size)
 MAX_TOKENS=32768

 EMBEDDING_BINDING=ollama
@@ -62,14 +62,14 @@ EMBEDDING_DIM=1024
 # EMBEDDING_BINDING_API_KEY=your_api_key
 ```

-* Ollama LLM + Ollama Embedding
+*   Ollama LLM + Ollama Embedding:

 ```
 LLM_BINDING=ollama
 LLM_MODEL=mistral-nemo:latest
 LLM_BINDING_HOST=http://localhost:11434
 # LLM_BINDING_API_KEY=your_api_key
-### Max tokens send to LLM (base on your Ollama Server capacity)
+### Max tokens sent to LLM (based on your Ollama Server capacity)
 MAX_TOKENS=8192

 EMBEDDING_BINDING=ollama
@@ -82,12 +82,12 @@ EMBEDDING_DIM=1024
 ### Starting LightRAG Server

 The LightRAG Server supports two operational modes:
-* The simple and efficient Uvicorn mode
+*   The simple and efficient Uvicorn mode:

 ```
 lightrag-server
 ```
-* The multiprocess Gunicorn + Uvicorn mode (production mode, not supported on Windows environments)
+*   The multiprocess Gunicorn + Uvicorn mode (production mode, not supported on Windows environments):

 ```
 lightrag-gunicorn --workers 4
@@ -96,44 +96,46 @@ The `.env` file **must be placed in the startup directory**.

 Upon launching, the LightRAG Server will create a documents directory (default is `./inputs`) and a data directory (default is `./rag_storage`). This allows you to initiate multiple instances of LightRAG Server from different directories, with each instance configured to listen on a distinct network port.

-Here are some common used startup parameters:
+Here are some commonly used startup parameters:

- `--host`: Server listening address (default: 0.0.0.0)
- `--port`: Server listening port (default: 9621)
- `--timeout`: LLM request timeout (default: 150 seconds)
- `--log-level`: Logging level (default: INFO)
- --input-dir: specifying the directory to scan for documents (default: ./input)
+-   `--host`: Server listening address (default: 0.0.0.0)
+-   `--port`: Server listening port (default: 9621)
+-   `--timeout`: LLM request timeout (default: 150 seconds)
+-   `--log-level`: Logging level (default: INFO)
+-   `--input-dir`: Specifying the directory to scan for documents (default: ./inputs)

-> The requirement for the .env file to be in the startup directory is intentionally designed this way. The purpose is to support users in launching multiple LightRAG instances simultaneously. Allow different .env files for different instances.
+> The requirement for the .env file to be in the startup directory is intentionally designed this way. The purpose is to support users in launching multiple LightRAG instances simultaneously, allowing different .env files for different instances.
+
+> **After changing the .env file, you need to open a new terminal to make  the new settings take effect.** This because the LightRAG Server will load the environment variables from .env into the system environment variables each time it starts, and LightRAG Server will prioritize the settings in the system environment variables.

 ### Auto scan on startup

 When starting any of the servers with the `--auto-scan-at-startup` parameter, the system will automatically:

-1. Scan for new files in the input directory
-2. Indexing new documents that aren't already in the database
-3. Make all content immediately available for RAG queries
+1.  Scan for new files in the input directory
+2.  Index new documents that aren't already in the database
+3.  Make all content immediately available for RAG queries

-> The `--input-dir` parameter specify the input directory to scan for. You can trigger input diretory scan from webui.
+> The `--input-dir` parameter specifies the input directory to scan. You can trigger the input directory scan from the Web UI.

 ### Multiple workers for Gunicorn + Uvicorn

-The LightRAG Server can operate in the `Gunicorn + Uvicorn` preload mode. Gunicorn's Multiple Worker (multiprocess) capability prevents document indexing tasks from blocking RAG queries.  Using CPU-exhaustive document extraction tools, such as docling, can lead to the entire system being blocked in pure Uvicorn mode.
+The LightRAG Server can operate in the `Gunicorn + Uvicorn` preload mode. Gunicorn's multiple worker (multiprocess) capability prevents document indexing tasks from blocking RAG queries. Using CPU-exhaustive document extraction tools, such as docling, can lead to the entire system being blocked in pure Uvicorn mode.

-Though LightRAG Server uses one workers to process the document indexing pipeline, with aysnc task supporting of Uvicorn, multiple files can be processed in parallell. The bottleneck of document indexing speed mainly lies with the LLM. If your LLM supports high concurrency, you can accelerate document indexing by increasing the concurrency level of the LLM. Below are several environment variables related to concurrent processing, along with their default values:
+Though LightRAG Server uses one worker to process the document indexing pipeline, with the async task support of Uvicorn, multiple files can be processed in parallel. The bottleneck of document indexing speed mainly lies with the LLM. If your LLM supports high concurrency, you can accelerate document indexing by increasing the concurrency level of the LLM. Below are several environment variables related to concurrent processing, along with their default values:

 ```
-### Num of worker processes, not greater then (2 x number_of_cores) + 1
+### Number of worker processes, not greater than (2 x number_of_cores) + 1
 WORKERS=2
-### Num of parallel files to process in one batch
+### Number of parallel files to process in one batch
 MAX_PARALLEL_INSERT=2
-### Max concurrency requests of LLM
+### Max concurrent requests to the LLM
 MAX_ASYNC=4
 ```

-### Install Lightrag as a Linux Service
+### Install LightRAG as a Linux Service

-Create a your service file `lightrag.sevice` from the sample file : `lightrag.sevice.example`. Modified the WorkingDirectoryand EexecStart in the service file:
+Create your service file `lightrag.service` from the sample file: `lightrag.service.example`. Modify the `WorkingDirectory` and `ExecStart` in the service file:

 ```text
 Description=LightRAG Ollama Service
@@ -141,7 +143,7 @@ WorkingDirectory=<lightrag installed directory>
 ExecStart=<lightrag installed directory>/lightrag/api/lightrag-api
 ```

-Modify your service startup script: `lightrag-api`. Change you python virtual environment activation command as needed:
+Modify your service startup script: `lightrag-api`. Change your Python virtual environment activation command as needed:

 ```shell
 #!/bin/bash
@@ -164,19 +166,21 @@ sudo systemctl enable lightrag.service

 ## Ollama Emulation

-We provide an Ollama-compatible interfaces for LightRAG, aiming to emulate LightRAG as an Ollama chat model. This allows AI chat frontends supporting Ollama, such as Open WebUI, to access LightRAG easily.
+We provide Ollama-compatible interfaces for LightRAG, aiming to emulate LightRAG as an Ollama chat model. This allows AI chat frontends supporting Ollama, such as Open WebUI, to access LightRAG easily.

 ### Connect Open WebUI to LightRAG

-After starting the lightrag-server, you can add an Ollama-type connection in the Open WebUI admin pannel. And then a model named lightrag:latest will appear in Open WebUI's model management interface. Users can then send queries to LightRAG through the chat interface. You'd better install LightRAG as service for this use case.
+After starting the lightrag-server, you can add an Ollama-type connection in the Open WebUI admin panel. And then a model named `lightrag:latest` will appear in Open WebUI's model management interface. Users can then send queries to LightRAG through the chat interface. You should install LightRAG as a service for this use case.

-Open WebUI's use LLM to do the session title and session keyword generation task. So the Ollama chat chat completion API detects and forwards OpenWebUI session-related requests directly to underlying LLM. Screen shot from Open WebUI:
+Open WebUI uses an LLM to do the session title and session keyword generation task. So the Ollama chat completion API detects and forwards OpenWebUI session-related requests directly to the underlying LLM. Screenshot from Open WebUI:

 ![image-20250323194750379](./README.assets/image-20250323194750379.png)

 ### Choose Query mode in chat

-A query prefix in the query string can determines which LightRAG query mode is used to generate the respond for the query. The supported prefixes include:
+The default query mode is `hybrid` if you send a message (query) from the Ollama interface of LightRAG. You can select query mode by sending a message with a query prefix.
+
+A query prefix in the query string can determine which LightRAG query mode is used to generate the response for the query. The supported prefixes include:

 ```
 /local
@@ -184,31 +188,38 @@ A query prefix in the query string can determines which LightRAG query mode is u
 /hybrid
 /naive
 /mix
+
 /bypass
+/context
+/localcontext
+/globalcontext
+/hybridcontext
+/naivecontext
+/mixcontext
 ```

-For example, chat message "/mix 唐僧有几个徒弟" will trigger a mix mode query for LighRAG. A chat message without query prefix will trigger a hybrid mode query by default。
+For example, the chat message `/mix What's LightRAG?` will trigger a mix mode query for LightRAG. A chat message without a query prefix will trigger a hybrid mode query by default.

-"/bypass" is not a LightRAG query mode, it will tell API Server to pass the query directly to the underlying LLM with chat history. So user can use LLM to answer question base on the chat history. If you are using Open WebUI as front end, you can just switch the model to a normal LLM instead of using /bypass prefix.
+`/bypass` is not a LightRAG query mode; it will tell the API Server to pass the query directly to the underlying LLM, including the chat history. So the user can use the LLM to answer questions based on the chat history. If you are using Open WebUI as a front end, you can just switch the model to a normal LLM instead of using the `/bypass` prefix.

+`/context` is also not a LightRAG query mode; it will tell LightRAG to return only the context information prepared for the LLM. You can check the context if it's what you want, or process the context by yourself.

+## API Key and Authentication

-## API-Key and Authentication
+By default, the LightRAG Server can be accessed without any authentication. We can configure the server with an API Key or account credentials to secure it.

-By default, the LightRAG Server can be accessed without any authentication. We can configure the server with an API-Key or account credentials to secure it.
-
-* API-KEY
+*   API Key:

 ```
 LIGHTRAG_API_KEY=your-secure-api-key-here
 WHITELIST_PATHS=/health,/api/*
 ```

-> Health check and Ollama emuluation endpoins is exclude from API-KEY check by default.
+> Health check and Ollama emulation endpoints are excluded from API Key check by default.

-* Account credentials (the web UI requires login before access)
+*   Account credentials (the Web UI requires login before access can be granted):

-LightRAG API Server implements JWT-based authentication using HS256 algorithm. To enable secure access control, the following environment variables are required:
+LightRAG API Server implements JWT-based authentication using the HS256 algorithm. To enable secure access control, the following environment variables are required:

 ```bash
 # For jwt auth
@@ -219,16 +230,14 @@ TOKEN_EXPIRE_HOURS=4

 > Currently, only the configuration of an administrator account and password is supported. A comprehensive account system is yet to be developed and implemented.

-If Account credentials are not configured, the web UI will access the system as a Guest. Therefore, even if only API-KEY is configured, all API can still be accessed through the Guest account, which remains insecure. Hence, to safeguard the API, it is necessary to configure both authentication methods simultaneously.
-
-
+If Account credentials are not configured, the Web UI will access the system as a Guest. Therefore, even if only an API Key is configured, all APIs can still be accessed through the Guest account, which remains insecure. Hence, to safeguard the API, it is necessary to configure both authentication methods simultaneously.

 ## For Azure OpenAI Backend

 Azure OpenAI API can be created using the following commands in Azure CLI (you need to install Azure CLI first from [https://docs.microsoft.com/en-us/cli/azure/install-azure-cli](https://docs.microsoft.com/en-us/cli/azure/install-azure-cli)):

 ```bash
-# Change the resource group name, location and OpenAI resource name as needed
+# Change the resource group name, location, and OpenAI resource name as needed
 RESOURCE_GROUP_NAME=LightRAG
 LOCATION=swedencentral
 RESOURCE_NAME=LightRAG-OpenAI
@@ -246,7 +255,7 @@ az cognitiveservices account keys list --name $RESOURCE_NAME -g $RESOURCE_GROUP_
 The output of the last command will give you the endpoint and the key for the OpenAI API. You can use these values to set the environment variables in the `.env` file.

 ```
-# Azure OpenAI Configuration in .env
+# Azure OpenAI Configuration in .env:
 LLM_BINDING=azure_openai
 LLM_BINDING_HOST=your-azure-endpoint
 LLM_MODEL=your-model-deployment-name
@@ -254,91 +263,89 @@ LLM_BINDING_API_KEY=your-azure-api-key
 ### API version is optional, defaults to latest version
 AZURE_OPENAI_API_VERSION=2024-08-01-preview

-### if using Azure OpenAI for embeddings
+### If using Azure OpenAI for embeddings
 EMBEDDING_BINDING=azure_openai
 EMBEDDING_MODEL=your-embedding-deployment-name
 ```

-
-
 ## LightRAG Server Configuration in Detail

-API Server can be config in three way (highest priority first):
+The API Server can be configured in three ways (highest priority first):

-* Command line arguments
-* Enviroment variables or .env file
-* Config.ini (Only for storage configuration)
+*   Command line arguments
+*   Environment variables or .env file
+*   Config.ini (Only for storage configuration)

-Most of the configurations come with a default settings, check out details in sample file: `.env.example`. Datastorage configuration can be also set by config.ini. A sample file `config.ini.example` is provided for your convenience.
+Most of the configurations come with default settings; check out the details in the sample file: `.env.example`. Data storage configuration can also be set by config.ini. A sample file `config.ini.example` is provided for your convenience.

 ### LLM and Embedding Backend Supported

 LightRAG supports binding to various LLM/Embedding backends:

-* ollama
-* lollms
-* openai & openai compatible
-* azure_openai
+*   ollama
+*   lollms
+*   openai & openai compatible
+*   azure_openai

-Use environment variables  `LLM_BINDING` or CLI argument `--llm-binding` to select LLM backend type. Use environment variables  `EMBEDDING_BINDING` or CLI argument `--embedding-binding` to select LLM backend type.
+Use environment variables `LLM_BINDING` or CLI argument `--llm-binding` to select the LLM backend type. Use environment variables `EMBEDDING_BINDING` or CLI argument `--embedding-binding` to select the Embedding backend type.

 ### Entity Extraction Configuration
-* ENABLE_LLM_CACHE_FOR_EXTRACT: Enable LLM cache for entity extraction (default: true)
+*   ENABLE_LLM_CACHE_FOR_EXTRACT: Enable LLM cache for entity extraction (default: true)

-It's very common to set `ENABLE_LLM_CACHE_FOR_EXTRACT` to true for test environment to reduce the cost of LLM calls.
+It's very common to set `ENABLE_LLM_CACHE_FOR_EXTRACT` to true for a test environment to reduce the cost of LLM calls.

 ### Storage Types Supported

-LightRAG uses 4 types of storage for difference purposes:
+LightRAG uses 4 types of storage for different purposes:

-* KV_STORAGE：llm response cache, text chunks, document information
-* VECTOR_STORAGE：entities vectors, relation vectors, chunks vectors
-* GRAPH_STORAGE：entity relation graph
-* DOC_STATUS_STORAGE：documents indexing status
+*   KV_STORAGE: llm response cache, text chunks, document information
+*   VECTOR_STORAGE: entities vectors, relation vectors, chunks vectors
+*   GRAPH_STORAGE: entity relation graph
+*   DOC_STATUS_STORAGE: document indexing status

-Each storage type have servals implementations:
+Each storage type has several implementations:

-* KV_STORAGE supported implement-name
+*   KV_STORAGE supported implementations:

 ```
-JsonKVStorage    JsonFile(default)
+JsonKVStorage    JsonFile (default)
 PGKVStorage      Postgres
 RedisKVStorage   Redis
-MongoKVStorage   MogonDB
+MongoKVStorage   MongoDB
 ```

-* GRAPH_STORAGE supported implement-name
+*   GRAPH_STORAGE supported implementations:

 ```
-NetworkXStorage      NetworkX(defualt)
+NetworkXStorage      NetworkX (default)
 Neo4JStorage         Neo4J
 PGGraphStorage       Postgres
 AGEStorage           AGE
 ```

-* VECTOR_STORAGE supported implement-name
+*   VECTOR_STORAGE supported implementations:

 ```
-NanoVectorDBStorage         NanoVector(default)
+NanoVectorDBStorage         NanoVector (default)
 PGVectorStorage             Postgres
-MilvusVectorDBStorge        Milvus
+MilvusVectorDBStorage       Milvus
 ChromaVectorDBStorage       Chroma
 FaissVectorDBStorage        Faiss
 QdrantVectorDBStorage       Qdrant
 MongoVectorDBStorage        MongoDB
 ```

-* DOC_STATUS_STORAGE：supported implement-name
+*   DOC_STATUS_STORAGE: supported implementations:

 ```
-JsonDocStatusStorage        JsonFile(default)
+JsonDocStatusStorage        JsonFile (default)
 PGDocStatusStorage          Postgres
 MongoDocStatusStorage       MongoDB
 ```

-### How Select Storage Implementation
+### How to Select Storage Implementation

-You can select storage implementation by environment variables. Your can set the following environmental variables to a specific storage implement-name before the your first start of the API  Server:
+You can select storage implementation by environment variables. You can set the following environment variables to a specific storage implementation name before the first start of the API Server:

 ```
 LIGHTRAG_KV_STORAGE=PGKVStorage
@@ -347,30 +354,30 @@ LIGHTRAG_GRAPH_STORAGE=PGGraphStorage
 LIGHTRAG_DOC_STATUS_STORAGE=PGDocStatusStorage
 ```

-You can not change storage implementation selection after you add documents to LightRAG. Data migration from one storage implementation to anthor is not supported yet. For further information please read the sample env file or config.ini file.
+You cannot change storage implementation selection after adding documents to LightRAG. Data migration from one storage implementation to another is not supported yet. For further information, please read the sample env file or config.ini file.

-### LightRag API Server Comand Line Options
+### LightRAG API Server Command Line Options

-| Parameter | Default | Description |
-|-----------|---------|-------------|
-| --host | 0.0.0.0 | Server host |
-| --port | 9621 | Server port |
-| --working-dir | ./rag_storage | Working directory for RAG storage |
-| --input-dir | ./inputs | Directory containing input documents |
-| --max-async | 4 | Maximum async operations |
-| --max-tokens | 32768 | Maximum token size |
-| --timeout | 150 | Timeout in seconds. None for infinite timeout(not recommended) |
-| --log-level | INFO | Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) |
-| --verbose | - | Verbose debug output (True, Flase) |
-| --key | None | API key for authentication. Protects lightrag server against unauthorized access |
-| --ssl | False | Enable HTTPS |
-| --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) |
-| --ssl-keyfile | None | Path to SSL private key file (required if --ssl is enabled) |
-| --top-k | 50 | Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode. |
-| --cosine-threshold | 0.4 | The cossine threshold for nodes and relations retrieval, works with top-k to control the retrieval of nodes and relations. |
-| --llm-binding | ollama | LLM binding type (lollms, ollama, openai, openai-ollama, azure_openai) |
-| --embedding-binding | ollama | Embedding binding type (lollms, ollama, openai, azure_openai) |
-| auto-scan-at-startup | - | Scan input directory for new files and start indexing |
+| Parameter             | Default       | Description                                                                                                                     |
+| --------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------- |
+| --host                | 0.0.0.0       | Server host                                                                                                                     |
+| --port                | 9621          | Server port                                                                                                                     |
+| --working-dir         | ./rag_storage | Working directory for RAG storage                                                                                               |
+| --input-dir           | ./inputs      | Directory containing input documents                                                                                            |
+| --max-async           | 4             | Maximum number of async operations                                                                                              |
+| --max-tokens          | 32768         | Maximum token size                                                                                                              |
+| --timeout             | 150           | Timeout in seconds. None for infinite timeout (not recommended)                                                                 |
+| --log-level           | INFO          | Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)                                                                           |
+| --verbose             | -             | Verbose debug output (True, False)                                                                                              |
+| --key                 | None          | API key for authentication. Protects the LightRAG server against unauthorized access                                            |
+| --ssl                 | False         | Enable HTTPS                                                                                                                    |
+| --ssl-certfile        | None          | Path to SSL certificate file (required if --ssl is enabled)                                                                     |
+| --ssl-keyfile         | None          | Path to SSL private key file (required if --ssl is enabled)                                                                     |
+| --top-k               | 50            | Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode.                  |
+| --cosine-threshold    | 0.4           | The cosine threshold for nodes and relation retrieval, works with top-k to control the retrieval of nodes and relations.        |
+| --llm-binding         | ollama        | LLM binding type (lollms, ollama, openai, openai-ollama, azure_openai)                                                          |
+| --embedding-binding   | ollama        | Embedding binding type (lollms, ollama, openai, azure_openai)                                                                   |
+| --auto-scan-at-startup| -             | Scan input directory for new files and start indexing                                                                           |

 ### .env Examples

@@ -416,20 +423,20 @@ EMBEDDING_BINDING_HOST=http://localhost:11434

 ## API Endpoints

-All servers (LoLLMs, Ollama, OpenAI and Azure OpenAI) provide the same REST API endpoints for RAG functionality. When API Server is running, visit:
+All servers (LoLLMs, Ollama, OpenAI and Azure OpenAI) provide the same REST API endpoints for RAG functionality. When the API Server is running, visit:

- Swagger UI: http://localhost:9621/docs
- ReDoc: http://localhost:9621/redoc
+-   Swagger UI: http://localhost:9621/docs
+-   ReDoc: http://localhost:9621/redoc

 You can test the API endpoints using the provided curl commands or through the Swagger UI interface. Make sure to:

-1. Start the appropriate backend service (LoLLMs, Ollama, or OpenAI)
-2. Start the RAG server
-3. Upload some documents using the document management endpoints
-4. Query the system using the query endpoints
-5. Trigger document scan if new files is put into inputs directory
+1.  Start the appropriate backend service (LoLLMs, Ollama, or OpenAI)
+2.  Start the RAG server
+3.  Upload some documents using the document management endpoints
+4.  Query the system using the query endpoints
+5.  Trigger document scan if new files are put into the inputs directory

-### Query Endpoints
+### Query Endpoints:

 #### POST /query
 Query the RAG system with options for different search modes.
@@ -437,7 +444,7 @@ Query the RAG system with options for different search modes.
 ```bash
 curl -X POST "http://localhost:9621/query" \
    -H "Content-Type: application/json" \
-    -d '{"query": "Your question here", "mode": "hybrid", ""}'
+    -d '{"query": "Your question here", "mode": "hybrid"}'
 ```

 #### POST /query/stream
@@ -449,7 +456,7 @@ curl -X POST "http://localhost:9621/query/stream" \
    -d '{"query": "Your question here", "mode": "hybrid"}'
 ```

-### Document Management Endpoints
+### Document Management Endpoints:

 #### POST /documents/text
 Insert text directly into the RAG system.
@@ -480,13 +487,13 @@ curl -X POST "http://localhost:9621/documents/batch" \

 #### POST /documents/scan

-Trigger document scan for new files in the Input directory.
+Trigger document scan for new files in the input directory.

 ```bash
 curl -X POST "http://localhost:9621/documents/scan" --max-time 1800
 ```

-> Ajust max-time according to the estimated index time  for all new files.
+> Adjust max-time according to the estimated indexing time for all new files.

 #### DELETE /documents

@@ -496,7 +503,7 @@ Clear all documents from the RAG system.
 curl -X DELETE "http://localhost:9621/documents"
 ```

-### Ollama Emulation Endpoints
+### Ollama Emulation Endpoints:

 #### GET /api/version

@@ -508,7 +515,7 @@ curl http://localhost:9621/api/version

 #### GET /api/tags

-Get Ollama available models.
+Get available Ollama models.

 ```bash
 curl http://localhost:9621/api/tags
@@ -516,20 +523,20 @@ curl http://localhost:9621/api/tags

 #### POST /api/chat

-Handle chat completion requests. Routes user queries through LightRAG by selecting query mode based on query prefix. Detects and forwards OpenWebUI session-related requests (for meta data generation task) directly to underlying LLM.
+Handle chat completion requests. Routes user queries through LightRAG by selecting query mode based on query prefix. Detects and forwards OpenWebUI session-related requests (for metadata generation task) directly to the underlying LLM.

 ```shell
 curl -N -X POST http://localhost:9621/api/chat -H "Content-Type: application/json" -d \
  '{"model":"lightrag:latest","messages":[{"role":"user","content":"猪八戒是谁"}],"stream":true}'
 ```

-> For more information about Ollama API pls. visit :  [Ollama API documentation](https://github.com/ollama/ollama/blob/main/docs/api.md)
+> For more information about Ollama API, please visit: [Ollama API documentation](https://github.com/ollama/ollama/blob/main/docs/api.md)

 #### POST /api/generate

-Handle generate completion requests. For compatibility purpose, the request is not processed by LightRAG, and will be handled by underlying LLM model.
+Handle generate completion requests. For compatibility purposes, the request is not processed by LightRAG, and will be handled by the underlying LLM model.

-### Utility Endpoints
+### Utility Endpoints:

 #### GET /health
 Check server health and configuration.
--- a/lightrag/api/init.py
+++ b/lightrag/api/init.py
@@ -1 +1 @@
-__api_version__ = "0139"
+__api_version__ = "0148"
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@@ -297,6 +297,7 @@ def parse_args() -> argparse.Namespace:
    args.enable_llm_cache_for_extract = get_env_value(
        "ENABLE_LLM_CACHE_FOR_EXTRACT", True, bool
    )
+    args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool)

    # Inject LLM temperature configuration
    args.temperature = get_env_value("TEMPERATURE", 0.5, float)
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -157,7 +157,6 @@ def create_app(args):
        "openapi_url": "/openapi.json",  # Explicitly set OpenAPI schema URL
        "docs_url": "/docs",  # Explicitly set docs URL
        "redoc_url": "/redoc",  # Explicitly set redoc URL
-        "openapi_tags": [{"name": "api"}],
        "lifespan": lifespan,
    }

@@ -317,6 +316,7 @@ def create_app(args):
                "cosine_better_than_threshold": args.cosine_threshold
            },
            enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
+            enable_llm_cache=args.enable_llm_cache,
            embedding_cache_config={
                "enabled": True,
                "similarity_threshold": 0.95,
@@ -348,6 +348,7 @@ def create_app(args):
                "cosine_better_than_threshold": args.cosine_threshold
            },
            enable_llm_cache_for_entity_extract=args.enable_llm_cache_for_extract,
+            enable_llm_cache=args.enable_llm_cache,
            embedding_cache_config={
                "enabled": True,
                "similarity_threshold": 0.95,
@@ -470,6 +471,7 @@ def create_app(args):
                    "graph_storage": args.graph_storage,
                    "vector_storage": args.vector_storage,
                    "enable_llm_cache_for_extract": args.enable_llm_cache_for_extract,
+                    "enable_llm_cache": args.enable_llm_cache,
                },
                "auth_mode": auth_mode,
                "pipeline_busy": pipeline_status.get("busy", False),
--- a/lightrag/api/routers/document_routes.py
+++ b/lightrag/api/routers/document_routes.py
@@ -499,7 +499,10 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
                    content = result.document.export_to_markdown()
                else:
                    if not pm.is_installed("python-docx"):  # type: ignore
-                        pm.install("docx")
+                        try:
+                            pm.install("python-docx")
+                        except Exception:
+                            pm.install("docx")
                    from docx import Document  # type: ignore
                    from io import BytesIO

--- a/lightrag/api/routers/ollama_api.py
+++ b/lightrag/api/routers/ollama_api.py
@@ -23,6 +23,7 @@ class SearchMode(str, Enum):
    hybrid = "hybrid"
    mix = "mix"
    bypass = "bypass"
+    context = "context"


 class OllamaMessage(BaseModel):
@@ -100,26 +101,38 @@ def estimate_tokens(text: str) -> int:
    return len(tokens)


-def parse_query_mode(query: str) -> tuple[str, SearchMode]:
+def parse_query_mode(query: str) -> tuple[str, SearchMode, bool]:
    """Parse query prefix to determine search mode
-    Returns tuple of (cleaned_query, search_mode)
+    Returns tuple of (cleaned_query, search_mode, only_need_context)
    """
    mode_map = {
-        "/local ": SearchMode.local,
-        "/global ": SearchMode.global_,  # global_ is used because 'global' is a Python keyword
-        "/naive ": SearchMode.naive,
-        "/hybrid ": SearchMode.hybrid,
-        "/mix ": SearchMode.mix,
-        "/bypass ": SearchMode.bypass,
+        "/local ": (SearchMode.local, False),
+        "/global ": (
+            SearchMode.global_,
+            False,
+        ),  # global_ is used because 'global' is a Python keyword
+        "/naive ": (SearchMode.naive, False),
+        "/hybrid ": (SearchMode.hybrid, False),
+        "/mix ": (SearchMode.mix, False),
+        "/bypass ": (SearchMode.bypass, False),
+        "/context": (
+            SearchMode.hybrid,
+            True,
+        ),
+        "/localcontext": (SearchMode.local, True),
+        "/globalcontext": (SearchMode.global_, True),
+        "/hybridcontext": (SearchMode.hybrid, True),
+        "/naivecontext": (SearchMode.naive, True),
+        "/mixcontext": (SearchMode.mix, True),
    }

-    for prefix, mode in mode_map.items():
+    for prefix, (mode, only_need_context) in mode_map.items():
        if query.startswith(prefix):
            # After removing prefix an leading spaces
            cleaned_query = query[len(prefix) :].lstrip()
-            return cleaned_query, mode
+            return cleaned_query, mode, only_need_context

-    return query, SearchMode.hybrid
+    return query, SearchMode.hybrid, False


 class OllamaAPI:
@@ -295,7 +308,7 @@ class OllamaAPI:
                            "Cache-Control": "no-cache",
                            "Connection": "keep-alive",
                            "Content-Type": "application/x-ndjson",
-                            "X-Accel-Buffering": "no",  # 确保在Nginx代理时正确处理流式响应
+                            "X-Accel-Buffering": "no",  # Ensure proper handling of streaming responses in Nginx proxy
                        },
                    )
                else:
@@ -349,7 +362,7 @@ class OllamaAPI:
                ]

                # Check for query prefix
-                cleaned_query, mode = parse_query_mode(query)
+                cleaned_query, mode, only_need_context = parse_query_mode(query)

                start_time = time.time_ns()
                prompt_tokens = estimate_tokens(cleaned_query)
@@ -357,7 +370,7 @@ class OllamaAPI:
                param_dict = {
                    "mode": mode,
                    "stream": request.stream,
-                    "only_need_context": False,
+                    "only_need_context": only_need_context,
                    "conversation_history": conversation_history,
                    "top_k": self.top_k,
                }
--- a/lightrag/api/routers/query_routes.py
+++ b/lightrag/api/routers/query_routes.py
@@ -22,7 +22,7 @@ class QueryRequest(BaseModel):
        description="The query text",
    )

-    mode: Literal["local", "global", "hybrid", "naive", "mix"] = Field(
+    mode: Literal["local", "global", "hybrid", "naive", "mix", "bypass"] = Field(
        default="hybrid",
        description="Query mode",
    )
--- a/lightrag/api/run_with_gunicorn.py
+++ b/lightrag/api/run_with_gunicorn.py
@@ -147,8 +147,8 @@ def main():

            # Timeout configuration prioritizes command line arguments
            gunicorn_config.timeout = (
-                global_args.timeout
-                if global_args.timeout * 2
+                global_args.timeout * 2
+                if global_args.timeout is not None
                else int(os.getenv("TIMEOUT", 150 * 2))
            )

--- a/lightrag/api/utils_api.py
+++ b/lightrag/api/utils_api.py
@@ -229,8 +229,12 @@ def display_splash_screen(args: argparse.Namespace) -> None:
    ASCIIColors.yellow(f"{args.max_async}")
    ASCIIColors.white("    ├─ Max Tokens: ", end="")
    ASCIIColors.yellow(f"{args.max_tokens}")
-    ASCIIColors.white("    └─ Timeout: ", end="")
+    ASCIIColors.white("    ├─ Timeout: ", end="")
    ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
+    ASCIIColors.white("    ├─ LLM Cache Enabled: ", end="")
+    ASCIIColors.yellow(f"{args.enable_llm_cache}")
+    ASCIIColors.white("    └─ LLM Cache for Extraction Enabled: ", end="")
+    ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")

    # Embedding Configuration
    ASCIIColors.magenta("\n📊 Embedding Configuration:")
@@ -259,8 +263,10 @@ def display_splash_screen(args: argparse.Namespace) -> None:
    ASCIIColors.yellow(f"{args.cosine_threshold}")
    ASCIIColors.white("    ├─ Top-K: ", end="")
    ASCIIColors.yellow(f"{args.top_k}")
-    ASCIIColors.white("    └─ LLM Cache for Extraction Enabled: ", end="")
-    ASCIIColors.yellow(f"{args.enable_llm_cache_for_extract}")
+    ASCIIColors.white("    ├─ Max Token Summary: ", end="")
+    ASCIIColors.yellow(f"{int(os.getenv('MAX_TOKEN_SUMMARY', 500))}")
+    ASCIIColors.white("    └─ Force LLM Summary on Merge: ", end="")
+    ASCIIColors.yellow(f"{int(os.getenv('FORCE_LLM_SUMMARY_ON_MERGE', 6))}")

    # System Configuration
    ASCIIColors.magenta("\n💾 Storage Configuration:")
--- a/lightrag/api/webui/assets/index-CQhBIpFe.css
+++ b/lightrag/api/webui/assets/index-CQhBIpFe.css
--- a/lightrag/api/webui/assets/index-CTB4Vp_z.css
+++ b/lightrag/api/webui/assets/index-CTB4Vp_z.css
--- a/lightrag/api/webui/assets/index-CkwV8nfm.js
+++ b/lightrag/api/webui/assets/index-CkwV8nfm.js
--- a/lightrag/api/webui/index.html
+++ b/lightrag/api/webui/index.html
@@ -8,8 +8,8 @@
    <link rel="icon" type="image/svg+xml" href="logo.png" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Lightrag</title>
-    <script type="module" crossorigin src="/webui/assets/index-sivPufd7.js"></script>
-    <link rel="stylesheet" crossorigin href="/webui/assets/index-CQhBIpFe.css">
+    <script type="module" crossorigin src="/webui/assets/index-CkwV8nfm.js"></script>
+    <link rel="stylesheet" crossorigin href="/webui/assets/index-CTB4Vp_z.css">
  </head>
  <body>
    <div id="root"></div>
--- a/lightrag/base.py
+++ b/lightrag/base.py
@@ -12,7 +12,6 @@ from typing import (
    TypeVar,
    Callable,
 )
-import numpy as np
 from .utils import EmbeddingFunc
 from .types import KnowledgeGraph

@@ -36,7 +35,7 @@ T = TypeVar("T")
 class QueryParam:
    """Configuration parameters for query execution in LightRAG."""

-    mode: Literal["local", "global", "hybrid", "naive", "mix"] = "global"
+    mode: Literal["local", "global", "hybrid", "naive", "mix", "bypass"] = "global"
    """Specifies the retrieval mode:
    - "local": Focuses on context-dependent information.
    - "global": Utilizes global knowledge.
@@ -281,63 +280,164 @@ class BaseGraphStorage(StorageNameSpace, ABC):

    @abstractmethod
    async def has_node(self, node_id: str) -> bool:
-        """Check if an edge exists in the graph."""
+        """Check if a node exists in the graph.
+
+        Args:
+            node_id: The ID of the node to check
+
+        Returns:
+            True if the node exists, False otherwise
+        """

    @abstractmethod
    async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
-        """Get the degree of a node."""
+        """Check if an edge exists between two nodes.
+
+        Args:
+            source_node_id: The ID of the source node
+            target_node_id: The ID of the target node
+
+        Returns:
+            True if the edge exists, False otherwise
+        """

    @abstractmethod
    async def node_degree(self, node_id: str) -> int:
-        """Get the degree of an edge."""
+        """Get the degree (number of connected edges) of a node.
+
+        Args:
+            node_id: The ID of the node
+
+        Returns:
+            The number of edges connected to the node
+        """

    @abstractmethod
    async def edge_degree(self, src_id: str, tgt_id: str) -> int:
-        """Get a node by its id."""
+        """Get the total degree of an edge (sum of degrees of its source and target nodes).
+
+        Args:
+            src_id: The ID of the source node
+            tgt_id: The ID of the target node
+
+        Returns:
+            The sum of the degrees of the source and target nodes
+        """

    @abstractmethod
    async def get_node(self, node_id: str) -> dict[str, str] | None:
-        """Get node by its label identifier, return only node properties"""
+        """Get node by its ID, returning only node properties.
+
+        Args:
+            node_id: The ID of the node to retrieve
+
+        Returns:
+            A dictionary of node properties if found, None otherwise
+        """

    @abstractmethod
    async def get_edge(
        self, source_node_id: str, target_node_id: str
    ) -> dict[str, str] | None:
-        """Get edge properties between two nodes"""
+        """Get edge properties between two nodes.
+
+        Args:
+            source_node_id: The ID of the source node
+            target_node_id: The ID of the target node
+
+        Returns:
+            A dictionary of edge properties if found, None otherwise
+        """

    @abstractmethod
    async def get_node_edges(self, source_node_id: str) -> list[tuple[str, str]] | None:
-        """Upsert a node into the graph."""
+        """Get all edges connected to a node.
+
+        Args:
+            source_node_id: The ID of the node to get edges for
+
+        Returns:
+            A list of (source_id, target_id) tuples representing edges,
+            or None if the node doesn't exist
+        """

    @abstractmethod
    async def upsert_node(self, node_id: str, node_data: dict[str, str]) -> None:
-        """Upsert an edge into the graph."""
+        """Insert a new node or update an existing node in the graph.
+
+        Importance notes for in-memory storage:
+        1. Changes will be persisted to disk during the next index_done_callback
+        2. Only one process should updating the storage at a time before index_done_callback,
+           KG-storage-log should be used to avoid data corruption
+
+        Args:
+            node_id: The ID of the node to insert or update
+            node_data: A dictionary of node properties
+        """

    @abstractmethod
    async def upsert_edge(
        self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
    ) -> None:
+        """Insert a new edge or update an existing edge in the graph.
+
+        Importance notes for in-memory storage:
+        1. Changes will be persisted to disk during the next index_done_callback
+        2. Only one process should updating the storage at a time before index_done_callback,
+           KG-storage-log should be used to avoid data corruption
+
+        Args:
+            source_node_id: The ID of the source node
+            target_node_id: The ID of the target node
+            edge_data: A dictionary of edge properties
+        """
+
+    @abstractmethod
+    async def delete_node(self, node_id: str) -> None:
        """Delete a node from the graph.

        Importance notes for in-memory storage:
        1. Changes will be persisted to disk during the next index_done_callback
        2. Only one process should updating the storage at a time before index_done_callback,
           KG-storage-log should be used to avoid data corruption
+
+        Args:
+            node_id: The ID of the node to delete
        """

    @abstractmethod
-    async def delete_node(self, node_id: str) -> None:
-        """Embed nodes using an algorithm."""
+    async def remove_nodes(self, nodes: list[str]):
+        """Delete multiple nodes
+
+        Importance notes:
+        1. Changes will be persisted to disk during the next index_done_callback
+        2. Only one process should updating the storage at a time before index_done_callback,
+           KG-storage-log should be used to avoid data corruption
+
+        Args:
+            nodes: List of node IDs to be deleted
+        """

    @abstractmethod
-    async def embed_nodes(
-        self, algorithm: str
-    ) -> tuple[np.ndarray[Any, Any], list[str]]:
-        """Get all labels in the graph."""
+    async def remove_edges(self, edges: list[tuple[str, str]]):
+        """Delete multiple edges
+
+        Importance notes:
+        1. Changes will be persisted to disk during the next index_done_callback
+        2. Only one process should updating the storage at a time before index_done_callback,
+           KG-storage-log should be used to avoid data corruption
+
+        Args:
+            edges: List of edges to be deleted, each edge is a (source, target) tuple
+        """

    @abstractmethod
    async def get_all_labels(self) -> list[str]:
-        """Get a knowledge graph of a node."""
+        """Get all labels in the graph.
+
+        Returns:
+            A list of all node labels in the graph, sorted alphabetically
+        """

    @abstractmethod
    async def get_knowledge_graph(
--- a/lightrag/kg/age_impl.py
+++ b/lightrag/kg/age_impl.py
@@ -6,7 +6,6 @@ import sys
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
 from typing import Any, Dict, List, NamedTuple, Optional, Union, final
-import numpy as np
 import pipmaster as pm
 from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge

@@ -89,11 +88,6 @@ class AGEStorage(BaseGraphStorage):

        return None

-    def __post_init__(self):
-        self._node_embed_algorithms = {
-            "node2vec": self._node2vec_embed,
-        }
-
    async def close(self):
        if self._driver:
            await self._driver.close()
@@ -593,9 +587,6 @@ class AGEStorage(BaseGraphStorage):
            logger.error("Error during edge upsert: {%s}", e)
            raise

-    async def _node2vec_embed(self):
-        print("Implemented but never called.")
-
    @asynccontextmanager
    async def _get_pool_connection(self, timeout: Optional[float] = None):
        """Workaround for a psycopg_pool bug"""
@@ -668,21 +659,6 @@ class AGEStorage(BaseGraphStorage):
                logger.error(f"Error during edge deletion: {str(e)}")
                raise

-    async def embed_nodes(
-        self, algorithm: str
-    ) -> tuple[np.ndarray[Any, Any], list[str]]:
-        """Embed nodes using the specified algorithm
-
-        Args:
-            algorithm: Name of the embedding algorithm
-
-        Returns:
-            tuple: (embedding matrix, list of node identifiers)
-        """
-        if algorithm not in self._node_embed_algorithms:
-            raise ValueError(f"Node embedding algorithm {algorithm} not supported")
-        return await self._node_embed_algorithms[algorithm]()
-
    async def get_all_labels(self) -> list[str]:
        """Get all node labels in the database

--- a/lightrag/kg/gremlin_impl.py
+++ b/lightrag/kg/gremlin_impl.py
@@ -6,9 +6,6 @@ import pipmaster as pm
 from dataclasses import dataclass
 from typing import Any, Dict, List, final

-import numpy as np
-
-
 from tenacity import (
    retry,
    retry_if_exception_type,
@@ -72,11 +69,6 @@ class GremlinStorage(BaseGraphStorage):
            transport_factory=lambda: AiohttpTransport(call_from_event_loop=True),
        )

-    def __post_init__(self):
-        self._node_embed_algorithms = {
-            "node2vec": self._node2vec_embed,
-        }
-
    async def close(self):
        if self._driver:
            self._driver.close()
@@ -392,9 +384,6 @@ class GremlinStorage(BaseGraphStorage):
            logger.error("Error during edge upsert: {%s}", e)
            raise

-    async def _node2vec_embed(self):
-        print("Implemented but never called.")
-
    async def delete_node(self, node_id: str) -> None:
        """Delete a node with the specified entity_name

@@ -419,27 +408,6 @@ class GremlinStorage(BaseGraphStorage):
            logger.error(f"Error during node deletion: {str(e)}")
            raise

-    async def embed_nodes(
-        self, algorithm: str
-    ) -> tuple[np.ndarray[Any, Any], list[str]]:
-        """
-        Embed nodes using the specified algorithm.
-        Currently, only node2vec is supported but never called.
-
-        Args:
-            algorithm: The name of the embedding algorithm to use
-
-        Returns:
-            A tuple of (embeddings, node_ids)
-
-        Raises:
-            NotImplementedError: If the specified algorithm is not supported
-            ValueError: If the algorithm is not supported
-        """
-        if algorithm not in self._node_embed_algorithms:
-            raise ValueError(f"Node embedding algorithm {algorithm} not supported")
-        return await self._node_embed_algorithms[algorithm]()
-
    async def get_all_labels(self) -> list[str]:
        """
        Get all node entity_names in the graph
--- a/lightrag/kg/json_doc_status_impl.py
+++ b/lightrag/kg/json_doc_status_impl.py
@@ -116,7 +116,7 @@ class JsonDocStatusStorage(DocStatusStorage):
        """
        if not data:
            return
-        logger.info(f"Inserting {len(data)} records to {self.namespace}")
+        logger.debug(f"Inserting {len(data)} records to {self.namespace}")
        async with self._storage_lock:
            self._data.update(data)
            await set_all_update_flags(self.namespace)
--- a/lightrag/kg/json_kv_impl.py
+++ b/lightrag/kg/json_kv_impl.py
@@ -121,7 +121,7 @@ class JsonKVStorage(BaseKVStorage):
        """
        if not data:
            return
-        logger.info(f"Inserting {len(data)} records to {self.namespace}")
+        logger.debug(f"Inserting {len(data)} records to {self.namespace}")
        async with self._storage_lock:
            self._data.update(data)
            await set_all_update_flags(self.namespace)
--- a/lightrag/kg/mongo_impl.py
+++ b/lightrag/kg/mongo_impl.py
@@ -663,20 +663,6 @@ class MongoGraphStorage(BaseGraphStorage):
        # Remove the node doc
        await self.collection.delete_one({"_id": node_id})

-    #
-    # -------------------------------------------------------------------------
-    # EMBEDDINGS (NOT IMPLEMENTED)
-    # -------------------------------------------------------------------------
-    #
-
-    async def embed_nodes(
-        self, algorithm: str
-    ) -> tuple[np.ndarray[Any, Any], list[str]]:
-        """
-        Placeholder for demonstration, raises NotImplementedError.
-        """
-        raise NotImplementedError("Node embedding is not used in lightrag.")
-
    #
    # -------------------------------------------------------------------------
    # QUERY
--- a/lightrag/kg/nano_vector_db_impl.py
+++ b/lightrag/kg/nano_vector_db_impl.py
@@ -85,7 +85,7 @@ class NanoVectorDBStorage(BaseVectorStorage):
           KG-storage-log should be used to avoid data corruption
        """

-        logger.info(f"Inserting {len(data)} to {self.namespace}")
+        logger.debug(f"Inserting {len(data)} to {self.namespace}")
        if not data:
            return

--- a/lightrag/kg/neo4j_impl.py
+++ b/lightrag/kg/neo4j_impl.py
@@ -2,8 +2,7 @@ import inspect
 import os
 import re
 from dataclasses import dataclass
-from typing import Any, final
-import numpy as np
+from typing import final
 import configparser


@@ -51,11 +50,6 @@ class Neo4JStorage(BaseGraphStorage):
        )
        self._driver = None

-    def __post_init__(self):
-        self._node_embed_algorithms = {
-            "node2vec": self._node2vec_embed,
-        }
-
    async def initialize(self):
        URI = os.environ.get("NEO4J_URI", config.get("neo4j", "uri", fallback=None))
        USERNAME = os.environ.get(
@@ -348,7 +342,7 @@ class Neo4JStorage(BaseGraphStorage):

                    degree = record["degree"]
                    logger.debug(
-                        "Neo4j query node degree for {node_id} return: {degree}"
+                        f"Neo4j query node degree for {node_id} return: {degree}"
                    )
                    return degree
                finally:
@@ -635,9 +629,6 @@ class Neo4JStorage(BaseGraphStorage):
            logger.error(f"Error during edge upsert: {str(e)}")
            raise

-    async def _node2vec_embed(self):
-        print("Implemented but never called.")
-
    async def get_knowledge_graph(
        self,
        node_label: str,
@@ -1126,11 +1117,6 @@ class Neo4JStorage(BaseGraphStorage):
                logger.error(f"Error during edge deletion: {str(e)}")
                raise

-    async def embed_nodes(
-        self, algorithm: str
-    ) -> tuple[np.ndarray[Any, Any], list[str]]:
-        raise NotImplementedError
-
    async def drop(self) -> dict[str, str]:
        """Drop all data from storage and clean up resources

--- a/lightrag/kg/networkx_impl.py
+++ b/lightrag/kg/networkx_impl.py
@@ -1,7 +1,6 @@
 import os
 from dataclasses import dataclass
-from typing import Any, final
-import numpy as np
+from typing import final

 from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
 from lightrag.utils import logger
@@ -16,7 +15,6 @@ if not pm.is_installed("graspologic"):
    pm.install("graspologic")

 import networkx as nx
-from graspologic import embed
 from .shared_storage import (
    get_storage_lock,
    get_update_flag,
@@ -42,40 +40,6 @@ class NetworkXStorage(BaseGraphStorage):
        )
        nx.write_graphml(graph, file_name)

-    # TODO：deprecated, remove later
-    @staticmethod
-    def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
-        """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
-        Ensure an undirected graph with the same relationships will always be read the same way.
-        """
-        fixed_graph = nx.DiGraph() if graph.is_directed() else nx.Graph()
-
-        sorted_nodes = graph.nodes(data=True)
-        sorted_nodes = sorted(sorted_nodes, key=lambda x: x[0])
-
-        fixed_graph.add_nodes_from(sorted_nodes)
-        edges = list(graph.edges(data=True))
-
-        if not graph.is_directed():
-
-            def _sort_source_target(edge):
-                source, target, edge_data = edge
-                if source > target:
-                    temp = source
-                    source = target
-                    target = temp
-                return source, target, edge_data
-
-            edges = [_sort_source_target(edge) for edge in edges]
-
-        def _get_edge_key(source: Any, target: Any) -> str:
-            return f"{source} -> {target}"
-
-        edges = sorted(edges, key=lambda x: _get_edge_key(x[0], x[1]))
-
-        fixed_graph.add_edges_from(edges)
-        return fixed_graph
-
    def __post_init__(self):
        self._graphml_xml_file = os.path.join(
            self.global_config["working_dir"], f"graph_{self.namespace}.graphml"
@@ -94,10 +58,6 @@ class NetworkXStorage(BaseGraphStorage):
            logger.info("Created new empty graph")
        self._graph = preloaded_graph or nx.Graph()

-        self._node_embed_algorithms = {
-            "node2vec": self._node2vec_embed,
-        }
-
    async def initialize(self):
        """Initialize storage data"""
        # Get the update flag for cross-process update notification
@@ -191,24 +151,6 @@ class NetworkXStorage(BaseGraphStorage):
        else:
            logger.warning(f"Node {node_id} not found in the graph for deletion.")

-    # TODO: NOT USED
-    async def embed_nodes(
-        self, algorithm: str
-    ) -> tuple[np.ndarray[Any, Any], list[str]]:
-        if algorithm not in self._node_embed_algorithms:
-            raise ValueError(f"Node embedding algorithm {algorithm} not supported")
-        return await self._node_embed_algorithms[algorithm]()
-
-    # TODO: NOT USED
-    async def _node2vec_embed(self):
-        graph = await self._get_graph()
-        embeddings, nodes = embed.node2vec_embed(
-            graph,
-            **self.global_config["node2vec_params"],
-        )
-        nodes_ids = [graph.nodes[node_id]["id"] for node_id in nodes]
-        return embeddings, nodes_ids
-
    async def remove_nodes(self, nodes: list[str]):
        """Delete multiple nodes

@@ -392,7 +334,7 @@ class NetworkXStorage(BaseGraphStorage):
            # Check if storage was updated by another process
            if self.storage_updated.value:
                # Storage was updated by another process, reload data instead of saving
-                logger.warning(
+                logger.info(
                    f"Graph for {self.namespace} was updated by another process, reloading..."
                )
                self._graph = (
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@@ -361,7 +361,7 @@ class PGKVStorage(BaseKVStorage):

    ################ INSERT METHODS ################
    async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
-        logger.info(f"Inserting {len(data)} to {self.namespace}")
+        logger.debug(f"Inserting {len(data)} to {self.namespace}")
        if not data:
            return

@@ -560,7 +560,7 @@ class PGVectorStorage(BaseVectorStorage):
        return upsert_sql, data

    async def upsert(self, data: dict[str, dict[str, Any]]) -> None:
-        logger.info(f"Inserting {len(data)} to {self.namespace}")
+        logger.debug(f"Inserting {len(data)} to {self.namespace}")
        if not data:
            return

@@ -949,7 +949,7 @@ class PGDocStatusStorage(DocStatusStorage):
        Args:
            data: dictionary of document IDs and their status data
        """
-        logger.info(f"Inserting {len(data)} to {self.namespace}")
+        logger.debug(f"Inserting {len(data)} to {self.namespace}")
        if not data:
            return

@@ -1021,9 +1021,6 @@ class PGGraphQueryException(Exception):
 class PGGraphStorage(BaseGraphStorage):
    def __post_init__(self):
        self.graph_name = self.namespace or os.environ.get("AGE_GRAPH_NAME", "lightrag")
-        self._node_embed_algorithms = {
-            "node2vec": self._node2vec_embed,
-        }
        self.db: PostgreSQLDB | None = None

    async def initialize(self):
@@ -1396,9 +1393,6 @@ class PGGraphStorage(BaseGraphStorage):
            )
            raise

-    async def _node2vec_embed(self):
-        print("Implemented but never called.")
-
    async def delete_node(self, node_id: str) -> None:
        """
        Delete a node from the graph.
@@ -1485,24 +1479,6 @@ class PGGraphStorage(BaseGraphStorage):
        labels = [result["label"] for result in results]
        return labels

-    async def embed_nodes(
-        self, algorithm: str
-    ) -> tuple[np.ndarray[Any, Any], list[str]]:
-        """
-        Generate node embeddings using the specified algorithm.
-
-        Args:
-            algorithm (str): The name of the embedding algorithm to use.
-
-        Returns:
-            tuple[np.ndarray[Any, Any], list[str]]: A tuple containing the embeddings and the corresponding node IDs.
-        """
-        if algorithm not in self._node_embed_algorithms:
-            raise ValueError(f"Unsupported embedding algorithm: {algorithm}")
-
-        embed_func = self._node_embed_algorithms[algorithm]
-        return await embed_func()
-
    async def get_knowledge_graph(
        self,
        node_label: str,
--- a/lightrag/kg/tidb_impl.py
+++ b/lightrag/kg/tidb_impl.py
@@ -800,13 +800,6 @@ class TiDBGraphStorage(BaseGraphStorage):
        }
        await self.db.execute(merge_sql, data)

-    async def embed_nodes(
-        self, algorithm: str
-    ) -> tuple[np.ndarray[Any, Any], list[str]]:
-        if algorithm not in self._node_embed_algorithms:
-            raise ValueError(f"Node embedding algorithm {algorithm} not supported")
-        return await self._node_embed_algorithms[algorithm]()
-
    # Query

    async def has_node(self, node_id: str) -> bool:
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -103,8 +103,10 @@ class LightRAG:
    entity_extract_max_gleaning: int = field(default=1)
    """Maximum number of entity extraction attempts for ambiguous content."""

-    entity_summary_to_max_tokens: int = field(
-        default=int(os.getenv("MAX_TOKEN_SUMMARY", 500))
+    summary_to_max_tokens: int = field(default=int(os.getenv("MAX_TOKEN_SUMMARY", 500)))
+
+    force_llm_summary_on_merge: int = field(
+        default=int(os.getenv("FORCE_LLM_SUMMARY_ON_MERGE", 6))
    )

    # Text chunking
@@ -153,31 +155,6 @@ class LightRAG:
    Defaults to `chunking_by_token_size` if not specified.
    """

-    # Node embedding
-    # ---
-
-    node_embedding_algorithm: str = field(default="node2vec")
-    """Algorithm used for node embedding in knowledge graphs."""
-
-    node2vec_params: dict[str, int] = field(
-        default_factory=lambda: {
-            "dimensions": 1536,
-            "num_walks": 10,
-            "walk_length": 40,
-            "window_size": 2,
-            "iterations": 3,
-            "random_seed": 3,
-        }
-    )
-    """Configuration for the node2vec embedding algorithm:
-    - dimensions: Number of dimensions for embeddings.
-    - num_walks: Number of random walks per node.
-    - walk_length: Number of steps per random walk.
-    - window_size: Context window size for training.
-    - iterations: Number of iterations for training.
-    - random_seed: Seed value for reproducibility.
-    """
-
    # Embedding
    # ---

@@ -900,6 +877,15 @@ class LightRAG:
                        # Get file path from status document
                        file_path = getattr(status_doc, "file_path", "unknown_source")

+                        async with pipeline_status_lock:
+                            log_message = f"Processing file: {file_path}"
+                            logger.info(log_message)
+                            pipeline_status["history_messages"].append(log_message)
+                            log_message = f"Processing d-id: {doc_id}"
+                            logger.info(log_message)
+                            pipeline_status["latest_message"] = log_message
+                            pipeline_status["history_messages"].append(log_message)
+
                        # Generate chunks from document
                        chunks: dict[str, Any] = {
                            compute_mdhash_id(dp["content"], prefix="chunk-"): {
@@ -1372,6 +1358,16 @@ class LightRAG:
                hashing_kv=self.llm_response_cache,  # Directly use llm_response_cache
                system_prompt=system_prompt,
            )
+        elif param.mode == "bypass":
+            # Bypass mode: directly use LLM without knowledge retrieval
+            use_llm_func = param.model_func or global_config["llm_model_func"]
+            param.stream = True if param.stream is None else param.stream
+            response = await use_llm_func(
+                query.strip(),
+                system_prompt=system_prompt,
+                history_messages=param.conversation_history,
+                stream=param.stream,
+            )
        else:
            raise ValueError(f"Unknown mode {param.mode}")
        await self._query_done()
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@@ -81,6 +81,8 @@ def create_openai_async_client(

    if base_url is not None:
        merged_configs["base_url"] = base_url
+    else:
+        merged_configs["base_url"] = os.environ["OPENAI_API_BASE"]

    return AsyncOpenAI(**merged_configs)

--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -16,6 +16,7 @@ from .utils import (
    encode_string_by_tiktoken,
    is_float_regex,
    list_of_list_to_csv,
+    normalize_extracted_info,
    pack_user_ass_to_openai_messages,
    split_string_by_multi_markers,
    truncate_list_by_token_size,
@@ -24,8 +25,8 @@ from .utils import (
    handle_cache,
    save_to_cache,
    CacheData,
-    statistic_data,
    get_conversation_turns,
+    use_llm_func_with_cache,
 )
 from .base import (
    BaseGraphStorage,
@@ -106,6 +107,9 @@ async def _handle_entity_relation_summary(
    entity_or_relation_name: str,
    description: str,
    global_config: dict,
+    pipeline_status: dict = None,
+    pipeline_status_lock=None,
+    llm_response_cache: BaseKVStorage | None = None,
 ) -> str:
    """Handle entity relation summary
    For each entity or relation, input is the combined description of already existing description and new description.
@@ -114,14 +118,13 @@ async def _handle_entity_relation_summary(
    use_llm_func: callable = global_config["llm_model_func"]
    llm_max_tokens = global_config["llm_model_max_token_size"]
    tiktoken_model_name = global_config["tiktoken_model_name"]
-    summary_max_tokens = global_config["entity_summary_to_max_tokens"]
+    summary_max_tokens = global_config["summary_to_max_tokens"]
+
    language = global_config["addon_params"].get(
        "language", PROMPTS["DEFAULT_LANGUAGE"]
    )

    tokens = encode_string_by_tiktoken(description, model_name=tiktoken_model_name)
-    if len(tokens) < summary_max_tokens:  # No need for summary
-        return description
    prompt_template = PROMPTS["summarize_entity_descriptions"]
    use_description = decode_tokens_by_tiktoken(
        tokens[:llm_max_tokens], model_name=tiktoken_model_name
@@ -133,7 +136,15 @@ async def _handle_entity_relation_summary(
    )
    use_prompt = prompt_template.format(**context_base)
    logger.debug(f"Trigger summary: {entity_or_relation_name}")
-    summary = await use_llm_func(use_prompt, max_tokens=summary_max_tokens)
+
+    # Use LLM function with cache
+    summary = await use_llm_func_with_cache(
+        use_prompt,
+        use_llm_func,
+        llm_response_cache=llm_response_cache,
+        max_tokens=summary_max_tokens,
+        cache_type="extract",
+    )
    return summary


@@ -153,6 +164,9 @@ async def _handle_single_entity_extraction(
        )
        return None

+    # Normalize entity name
+    entity_name = normalize_extracted_info(entity_name, is_entity=True)
+
    # Clean and validate entity type
    entity_type = clean_str(record_attributes[2]).strip('"')
    if not entity_type.strip() or entity_type.startswith('("'):
@@ -162,7 +176,9 @@ async def _handle_single_entity_extraction(
        return None

    # Clean and validate description
-    entity_description = clean_str(record_attributes[3]).strip('"')
+    entity_description = clean_str(record_attributes[3])
+    entity_description = normalize_extracted_info(entity_description)
+
    if not entity_description.strip():
        logger.warning(
            f"Entity extraction error: empty description for entity '{entity_name}' of type '{entity_type}'"
@@ -186,13 +202,20 @@ async def _handle_single_relationship_extraction(
    if len(record_attributes) < 5 or record_attributes[0] != '"relationship"':
        return None
    # add this record as edge
-    source = clean_str(record_attributes[1]).strip('"')
-    target = clean_str(record_attributes[2]).strip('"')
-    edge_description = clean_str(record_attributes[3]).strip('"')
-    edge_keywords = clean_str(record_attributes[4]).strip('"')
+    source = clean_str(record_attributes[1])
+    target = clean_str(record_attributes[2])
+
+    # Normalize source and target entity names
+    source = normalize_extracted_info(source, is_entity=True)
+    target = normalize_extracted_info(target, is_entity=True)
+
+    edge_description = clean_str(record_attributes[3])
+    edge_description = normalize_extracted_info(edge_description)
+
+    edge_keywords = clean_str(record_attributes[4]).strip('"').strip("'")
    edge_source_id = chunk_key
    weight = (
-        float(record_attributes[-1].strip('"'))
+        float(record_attributes[-1].strip('"').strip("'"))
        if is_float_regex(record_attributes[-1])
        else 1.0
    )
@@ -212,6 +235,9 @@ async def _merge_nodes_then_upsert(
    nodes_data: list[dict],
    knowledge_graph_inst: BaseGraphStorage,
    global_config: dict,
+    pipeline_status: dict = None,
+    pipeline_status_lock=None,
+    llm_response_cache: BaseKVStorage | None = None,
 ):
    """Get existing nodes from knowledge graph use name,if exists, merge data, else create, then upsert."""
    already_entity_types = []
@@ -247,10 +273,35 @@ async def _merge_nodes_then_upsert(
        set([dp["file_path"] for dp in nodes_data] + already_file_paths)
    )

-    logger.debug(f"file_path: {file_path}")
-    description = await _handle_entity_relation_summary(
-        entity_name, description, global_config
-    )
+    force_llm_summary_on_merge = global_config["force_llm_summary_on_merge"]
+
+    num_fragment = description.count(GRAPH_FIELD_SEP) + 1
+    num_new_fragment = len(set([dp["description"] for dp in nodes_data]))
+
+    if num_fragment > 1:
+        if num_fragment >= force_llm_summary_on_merge:
+            status_message = f"LLM merge N: {entity_name} | {num_new_fragment}+{num_fragment-num_new_fragment}"
+            logger.info(status_message)
+            if pipeline_status is not None and pipeline_status_lock is not None:
+                async with pipeline_status_lock:
+                    pipeline_status["latest_message"] = status_message
+                    pipeline_status["history_messages"].append(status_message)
+            description = await _handle_entity_relation_summary(
+                entity_name,
+                description,
+                global_config,
+                pipeline_status,
+                pipeline_status_lock,
+                llm_response_cache,
+            )
+        else:
+            status_message = f"Merge N: {entity_name} | {num_new_fragment}+{num_fragment-num_new_fragment}"
+            logger.info(status_message)
+            if pipeline_status is not None and pipeline_status_lock is not None:
+                async with pipeline_status_lock:
+                    pipeline_status["latest_message"] = status_message
+                    pipeline_status["history_messages"].append(status_message)
+
    node_data = dict(
        entity_id=entity_name,
        entity_type=entity_type,
@@ -272,6 +323,9 @@ async def _merge_edges_then_upsert(
    edges_data: list[dict],
    knowledge_graph_inst: BaseGraphStorage,
    global_config: dict,
+    pipeline_status: dict = None,
+    pipeline_status_lock=None,
+    llm_response_cache: BaseKVStorage | None = None,
 ):
    already_weights = []
    already_source_ids = []
@@ -357,9 +411,38 @@ async def _merge_edges_then_upsert(
                    "file_path": file_path,
                },
            )
-    description = await _handle_entity_relation_summary(
-        f"({src_id}, {tgt_id})", description, global_config
+
+    force_llm_summary_on_merge = global_config["force_llm_summary_on_merge"]
+
+    num_fragment = description.count(GRAPH_FIELD_SEP) + 1
+    num_new_fragment = len(
+        set([dp["description"] for dp in edges_data if dp.get("description")])
    )
+
+    if num_fragment > 1:
+        if num_fragment >= force_llm_summary_on_merge:
+            status_message = f"LLM merge E: {src_id} - {tgt_id} | {num_new_fragment}+{num_fragment-num_new_fragment}"
+            logger.info(status_message)
+            if pipeline_status is not None and pipeline_status_lock is not None:
+                async with pipeline_status_lock:
+                    pipeline_status["latest_message"] = status_message
+                    pipeline_status["history_messages"].append(status_message)
+            description = await _handle_entity_relation_summary(
+                f"({src_id}, {tgt_id})",
+                description,
+                global_config,
+                pipeline_status,
+                pipeline_status_lock,
+                llm_response_cache,
+            )
+        else:
+            status_message = f"Merge E: {src_id} - {tgt_id} | {num_new_fragment}+{num_fragment-num_new_fragment}"
+            logger.info(status_message)
+            if pipeline_status is not None and pipeline_status_lock is not None:
+                async with pipeline_status_lock:
+                    pipeline_status["latest_message"] = status_message
+                    pipeline_status["history_messages"].append(status_message)
+
    await knowledge_graph_inst.upsert_edge(
        src_id,
        tgt_id,
@@ -396,9 +479,6 @@ async def extract_entities(
 ) -> None:
    use_llm_func: callable = global_config["llm_model_func"]
    entity_extract_max_gleaning = global_config["entity_extract_max_gleaning"]
-    enable_llm_cache_for_entity_extract: bool = global_config[
-        "enable_llm_cache_for_entity_extract"
-    ]

    ordered_chunks = list(chunks.items())
    # add language and example number params to prompt
@@ -449,51 +529,7 @@ async def extract_entities(

    graph_db_lock = get_graph_db_lock(enable_logging=False)

-    async def _user_llm_func_with_cache(
-        input_text: str, history_messages: list[dict[str, str]] = None
-    ) -> str:
-        if enable_llm_cache_for_entity_extract and llm_response_cache:
-            if history_messages:
-                history = json.dumps(history_messages, ensure_ascii=False)
-                _prompt = history + "\n" + input_text
-            else:
-                _prompt = input_text
-
-            # TODO： add cache_type="extract"
-            arg_hash = compute_args_hash(_prompt)
-            cached_return, _1, _2, _3 = await handle_cache(
-                llm_response_cache,
-                arg_hash,
-                _prompt,
-                "default",
-                cache_type="extract",
-            )
-            if cached_return:
-                logger.debug(f"Found cache for {arg_hash}")
-                statistic_data["llm_cache"] += 1
-                return cached_return
-            statistic_data["llm_call"] += 1
-            if history_messages:
-                res: str = await use_llm_func(
-                    input_text, history_messages=history_messages
-                )
-            else:
-                res: str = await use_llm_func(input_text)
-            await save_to_cache(
-                llm_response_cache,
-                CacheData(
-                    args_hash=arg_hash,
-                    content=res,
-                    prompt=_prompt,
-                    cache_type="extract",
-                ),
-            )
-            return res
-
-        if history_messages:
-            return await use_llm_func(input_text, history_messages=history_messages)
-        else:
-            return await use_llm_func(input_text)
+    # Use the global use_llm_func_with_cache function from utils.py

    async def _process_extraction_result(
        result: str, chunk_key: str, file_path: str = "unknown_source"
@@ -545,8 +581,10 @@ async def extract_entities(
        Args:
            chunk_key_dp (tuple[str, TextChunkSchema]):
                ("chunk-xxxxxx", {"tokens": int, "content": str, "full_doc_id": str, "chunk_order_index": int})
+        Returns:
+            tuple: (maybe_nodes, maybe_edges) containing extracted entities and relationships
        """
-        nonlocal processed_chunks, total_entities_count, total_relations_count
+        nonlocal processed_chunks
        chunk_key = chunk_key_dp[0]
        chunk_dp = chunk_key_dp[1]
        content = chunk_dp["content"]
@@ -558,7 +596,12 @@ async def extract_entities(
            **context_base, input_text="{input_text}"
        ).format(**context_base, input_text=content)

-        final_result = await _user_llm_func_with_cache(hint_prompt)
+        final_result = await use_llm_func_with_cache(
+            hint_prompt,
+            use_llm_func,
+            llm_response_cache=llm_response_cache,
+            cache_type="extract",
+        )
        history = pack_user_ass_to_openai_messages(hint_prompt, final_result)

        # Process initial extraction with file path
@@ -568,8 +611,12 @@ async def extract_entities(

        # Process additional gleaning results
        for now_glean_index in range(entity_extract_max_gleaning):
-            glean_result = await _user_llm_func_with_cache(
-                continue_prompt, history_messages=history
+            glean_result = await use_llm_func_with_cache(
+                continue_prompt,
+                use_llm_func,
+                llm_response_cache=llm_response_cache,
+                history_messages=history,
+                cache_type="extract",
            )

            history += pack_user_ass_to_openai_messages(continue_prompt, glean_result)
@@ -579,17 +626,27 @@ async def extract_entities(
                glean_result, chunk_key, file_path
            )

-            # Merge results
+            # Merge results - only add entities and edges with new names
            for entity_name, entities in glean_nodes.items():
-                maybe_nodes[entity_name].extend(entities)
+                if (
+                    entity_name not in maybe_nodes
+                ):  # Only accetp entities with new name in gleaning stage
+                    maybe_nodes[entity_name].extend(entities)
            for edge_key, edges in glean_edges.items():
-                maybe_edges[edge_key].extend(edges)
+                if (
+                    edge_key not in maybe_edges
+                ):  # Only accetp edges with new name in gleaning stage
+                    maybe_edges[edge_key].extend(edges)

            if now_glean_index == entity_extract_max_gleaning - 1:
                break

-            if_loop_result: str = await _user_llm_func_with_cache(
-                if_loop_prompt, history_messages=history
+            if_loop_result: str = await use_llm_func_with_cache(
+                if_loop_prompt,
+                use_llm_func,
+                llm_response_cache=llm_response_cache,
+                history_messages=history,
+                cache_type="extract",
            )
            if_loop_result = if_loop_result.strip().strip('"').strip("'").lower()
            if if_loop_result != "yes":
@@ -598,73 +655,98 @@ async def extract_entities(
        processed_chunks += 1
        entities_count = len(maybe_nodes)
        relations_count = len(maybe_edges)
-        log_message = f"  Chk {processed_chunks}/{total_chunks}: extracted {entities_count} Ent + {relations_count} Rel (deduplicated)"
+        log_message = f"Chk {processed_chunks}/{total_chunks}: extracted {entities_count} Ent + {relations_count} Rel"
        logger.info(log_message)
        if pipeline_status is not None:
            async with pipeline_status_lock:
                pipeline_status["latest_message"] = log_message
                pipeline_status["history_messages"].append(log_message)

-        # Use graph database lock to ensure atomic merges and updates
-        chunk_entities_data = []
-        chunk_relationships_data = []
+        # Return the extracted nodes and edges for centralized processing
+        return maybe_nodes, maybe_edges

-        async with graph_db_lock:
-            # Process and update entities
-            for entity_name, entities in maybe_nodes.items():
-                entity_data = await _merge_nodes_then_upsert(
-                    entity_name, entities, knowledge_graph_inst, global_config
-                )
-                chunk_entities_data.append(entity_data)
-
-            # Process and update relationships
-            for edge_key, edges in maybe_edges.items():
-                # Ensure edge direction consistency
-                sorted_edge_key = tuple(sorted(edge_key))
-                edge_data = await _merge_edges_then_upsert(
-                    sorted_edge_key[0],
-                    sorted_edge_key[1],
-                    edges,
-                    knowledge_graph_inst,
-                    global_config,
-                )
-                chunk_relationships_data.append(edge_data)
-
-            # Update vector database (within the same lock to ensure atomicity)
-            if entity_vdb is not None and chunk_entities_data:
-                data_for_vdb = {
-                    compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
-                        "entity_name": dp["entity_name"],
-                        "entity_type": dp["entity_type"],
-                        "content": f"{dp['entity_name']}\n{dp['description']}",
-                        "source_id": dp["source_id"],
-                        "file_path": dp.get("file_path", "unknown_source"),
-                    }
-                    for dp in chunk_entities_data
-                }
-                await entity_vdb.upsert(data_for_vdb)
-
-            if relationships_vdb is not None and chunk_relationships_data:
-                data_for_vdb = {
-                    compute_mdhash_id(dp["src_id"] + dp["tgt_id"], prefix="rel-"): {
-                        "src_id": dp["src_id"],
-                        "tgt_id": dp["tgt_id"],
-                        "keywords": dp["keywords"],
-                        "content": f"{dp['src_id']}\t{dp['tgt_id']}\n{dp['keywords']}\n{dp['description']}",
-                        "source_id": dp["source_id"],
-                        "file_path": dp.get("file_path", "unknown_source"),
-                    }
-                    for dp in chunk_relationships_data
-                }
-                await relationships_vdb.upsert(data_for_vdb)
-
-            # Update counters
-            total_entities_count += len(chunk_entities_data)
-            total_relations_count += len(chunk_relationships_data)
-
-    # Handle all chunks in parallel
+    # Handle all chunks in parallel and collect results
    tasks = [_process_single_content(c) for c in ordered_chunks]
-    await asyncio.gather(*tasks)
+    chunk_results = await asyncio.gather(*tasks)
+
+    # Collect all nodes and edges from all chunks
+    all_nodes = defaultdict(list)
+    all_edges = defaultdict(list)
+
+    for maybe_nodes, maybe_edges in chunk_results:
+        # Collect nodes
+        for entity_name, entities in maybe_nodes.items():
+            all_nodes[entity_name].extend(entities)
+
+        # Collect edges with sorted keys for undirected graph
+        for edge_key, edges in maybe_edges.items():
+            sorted_edge_key = tuple(sorted(edge_key))
+            all_edges[sorted_edge_key].extend(edges)
+
+    # Centralized processing of all nodes and edges
+    entities_data = []
+    relationships_data = []
+
+    # Use graph database lock to ensure atomic merges and updates
+    async with graph_db_lock:
+        # Process and update all entities at once
+        for entity_name, entities in all_nodes.items():
+            entity_data = await _merge_nodes_then_upsert(
+                entity_name,
+                entities,
+                knowledge_graph_inst,
+                global_config,
+                pipeline_status,
+                pipeline_status_lock,
+                llm_response_cache,
+            )
+            entities_data.append(entity_data)
+
+        # Process and update all relationships at once
+        for edge_key, edges in all_edges.items():
+            edge_data = await _merge_edges_then_upsert(
+                edge_key[0],
+                edge_key[1],
+                edges,
+                knowledge_graph_inst,
+                global_config,
+                pipeline_status,
+                pipeline_status_lock,
+                llm_response_cache,
+            )
+            relationships_data.append(edge_data)
+
+        # Update vector databases with all collected data
+        if entity_vdb is not None and entities_data:
+            data_for_vdb = {
+                compute_mdhash_id(dp["entity_name"], prefix="ent-"): {
+                    "entity_name": dp["entity_name"],
+                    "entity_type": dp["entity_type"],
+                    "content": f"{dp['entity_name']}\n{dp['description']}",
+                    "source_id": dp["source_id"],
+                    "file_path": dp.get("file_path", "unknown_source"),
+                }
+                for dp in entities_data
+            }
+            await entity_vdb.upsert(data_for_vdb)
+
+        if relationships_vdb is not None and relationships_data:
+            data_for_vdb = {
+                compute_mdhash_id(dp["src_id"] + dp["tgt_id"], prefix="rel-"): {
+                    "src_id": dp["src_id"],
+                    "tgt_id": dp["tgt_id"],
+                    "keywords": dp["keywords"],
+                    "content": f"{dp['src_id']}\t{dp['tgt_id']}\n{dp['keywords']}\n{dp['description']}",
+                    "source_id": dp["source_id"],
+                    "file_path": dp.get("file_path", "unknown_source"),
+                }
+                for dp in relationships_data
+            }
+            await relationships_vdb.upsert(data_for_vdb)
+
+    # Update total counts
+    total_entities_count = len(entities_data)
+    total_relations_count = len(relationships_data)

    log_message = f"Extracted {total_entities_count} entities + {total_relations_count} relationships (total)"
    logger.info(log_message)
@@ -1031,6 +1113,7 @@ async def mix_kg_vector_query(
                    chunk_with_time = {
                        "content": chunk["content"],
                        "created_at": result.get("created_at", None),
+                        "file_path": result.get("file_path", None),
                    }
                    valid_chunks.append(chunk_with_time)

@@ -1072,7 +1155,14 @@ async def mix_kg_vector_query(
        return PROMPTS["fail_response"]

    if query_param.only_need_context:
-        return {"kg_context": kg_context, "vector_context": vector_context}
+        context_str = f"""
+        -----Knowledge Graph Context-----
+        {kg_context if kg_context else "No relevant knowledge graph information found"}
+
+        -----Vector Context-----
+        {vector_context if vector_context else "No relevant text information found"}
+        """.strip()
+        return context_str

    # 5. Construct hybrid prompt
    sys_prompt = (
@@ -1363,6 +1453,7 @@ async def _find_most_related_text_unit_from_entities(
    text_units = [
        split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
        for dp in node_datas
+        if dp["source_id"] is not None
    ]
    edges = await asyncio.gather(
        *[knowledge_graph_inst.get_node_edges(dp["entity_name"]) for dp in node_datas]
@@ -1657,6 +1748,7 @@ async def _find_most_related_entities_from_relationships(
    node_datas = [
        {**n, "entity_name": k, "rank": d}
        for k, n, d in zip(entity_names, node_datas, node_degrees)
+        if n is not None
    ]

    len_node_datas = len(node_datas)
@@ -1681,6 +1773,7 @@ async def _find_related_text_unit_from_relationships(
    text_units = [
        split_string_by_multi_markers(dp["source_id"], [GRAPH_FIELD_SEP])
        for dp in edge_datas
+        if dp["source_id"] is not None
    ]
    all_text_units_lookup = {}

--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -12,13 +12,17 @@ import re
 from dataclasses import dataclass
 from functools import wraps
 from hashlib import md5
-from typing import Any, Callable
+from typing import Any, Callable, TYPE_CHECKING
 import xml.etree.ElementTree as ET
 import numpy as np
 import tiktoken
 from lightrag.prompt import PROMPTS
 from dotenv import load_dotenv

+# Use TYPE_CHECKING to avoid circular imports
+if TYPE_CHECKING:
+    from lightrag.base import BaseKVStorage
+
 # use the .env that is inside the current folder
 # allows to use different .env file for each lightrag instance
 # the OS environment variables take precedence over the .env file
@@ -334,6 +338,7 @@ def split_string_by_multi_markers(content: str, markers: list[str]) -> list[str]
    """Split a string by multiple markers"""
    if not markers:
        return [content]
+    content = content if content is not None else ""
    results = re.split("|".join(re.escape(marker) for marker in markers), content)
    return [r.strip() for r in results if r.strip()]

@@ -907,6 +912,84 @@ def lazy_external_import(module_name: str, class_name: str) -> Callable[..., Any
    return import_class


+async def use_llm_func_with_cache(
+    input_text: str,
+    use_llm_func: callable,
+    llm_response_cache: "BaseKVStorage | None" = None,
+    max_tokens: int = None,
+    history_messages: list[dict[str, str]] = None,
+    cache_type: str = "extract",
+) -> str:
+    """Call LLM function with cache support
+
+    If cache is available and enabled (determined by handle_cache based on mode),
+    retrieve result from cache; otherwise call LLM function and save result to cache.
+
+    Args:
+        input_text: Input text to send to LLM
+        use_llm_func: LLM function to call
+        llm_response_cache: Cache storage instance
+        max_tokens: Maximum tokens for generation
+        history_messages: History messages list
+        cache_type: Type of cache
+
+    Returns:
+        LLM response text
+    """
+    if llm_response_cache:
+        if history_messages:
+            history = json.dumps(history_messages, ensure_ascii=False)
+            _prompt = history + "\n" + input_text
+        else:
+            _prompt = input_text
+
+        arg_hash = compute_args_hash(_prompt)
+        cached_return, _1, _2, _3 = await handle_cache(
+            llm_response_cache,
+            arg_hash,
+            _prompt,
+            "default",
+            cache_type=cache_type,
+        )
+        if cached_return:
+            logger.debug(f"Found cache for {arg_hash}")
+            statistic_data["llm_cache"] += 1
+            return cached_return
+        statistic_data["llm_call"] += 1
+
+        # Call LLM
+        kwargs = {}
+        if history_messages:
+            kwargs["history_messages"] = history_messages
+        if max_tokens is not None:
+            kwargs["max_tokens"] = max_tokens
+
+        res: str = await use_llm_func(input_text, **kwargs)
+
+        # Save to cache
+        logger.info(f" == LLM cache == saving {arg_hash}")
+        await save_to_cache(
+            llm_response_cache,
+            CacheData(
+                args_hash=arg_hash,
+                content=res,
+                prompt=_prompt,
+                cache_type=cache_type,
+            ),
+        )
+        return res
+
+    # When cache is disabled, directly call LLM
+    kwargs = {}
+    if history_messages:
+        kwargs["history_messages"] = history_messages
+    if max_tokens is not None:
+        kwargs["max_tokens"] = max_tokens
+
+    logger.info(f"Call LLM function with query text lenght: {len(input_text)}")
+    return await use_llm_func(input_text, **kwargs)
+
+
 def get_content_summary(content: str, max_length: int = 250) -> str:
    """Get summary of document content

@@ -923,6 +1006,50 @@ def get_content_summary(content: str, max_length: int = 250) -> str:
    return content[:max_length] + "..."


+def normalize_extracted_info(name: str, is_entity=False) -> str:
+    """Normalize entity/relation names and description with the following rules:
+    1. Remove spaces between Chinese characters
+    2. Remove spaces between Chinese characters and English letters/numbers
+    3. Preserve spaces within English text and numbers
+    4. Replace Chinese parentheses with English parentheses
+    5. Replace Chinese dash with English dash
+
+    Args:
+        name: Entity name to normalize
+
+    Returns:
+        Normalized entity name
+    """
+    # Replace Chinese parentheses with English parentheses
+    name = name.replace("（", "(").replace("）", ")")
+
+    # Replace Chinese dash with English dash
+    name = name.replace("—", "-").replace("－", "-")
+
+    # Use regex to remove spaces between Chinese characters
+    # Regex explanation:
+    # (?<=[\u4e00-\u9fa5]): Positive lookbehind for Chinese character
+    # \s+: One or more whitespace characters
+    # (?=[\u4e00-\u9fa5]): Positive lookahead for Chinese character
+    name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[\u4e00-\u9fa5])", "", name)
+
+    # Remove spaces between Chinese and English/numbers
+    name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9])", "", name)
+    name = re.sub(r"(?<=[a-zA-Z0-9])\s+(?=[\u4e00-\u9fa5])", "", name)
+
+    # Remove English quotation marks from the beginning and end
+    name = name.strip('"').strip("'")
+
+    if is_entity:
+        # remove Chinese quotes
+        name = name.replace("“", "").replace("”", "").replace("‘", "").replace("’", "")
+        # remove English queotes in and around chinese
+        name = re.sub(r"['\"]+(?=[\u4e00-\u9fa5])", "", name)
+        name = re.sub(r"(?<=[\u4e00-\u9fa5])['\"]+", "", name)
+
+    return name
+
+
 def clean_text(text: str) -> str:
    """Clean text by removing null bytes (0x00) and whitespace

--- a/lightrag_webui/src/api/lightrag.ts
+++ b/lightrag_webui/src/api/lightrag.ts
@@ -65,8 +65,9 @@ export type LightragDocumentsScanProgress = {
 * - "global": Utilizes global knowledge.
 * - "hybrid": Combines local and global retrieval methods.
 * - "mix": Integrates knowledge graph and vector retrieval.
+ * - "bypass": Bypasses knowledge retrieval and directly uses the LLM.
 */
-export type QueryMode = 'naive' | 'local' | 'global' | 'hybrid' | 'mix'
+export type QueryMode = 'naive' | 'local' | 'global' | 'hybrid' | 'mix' | 'bypass'

 export type Message = {
  role: 'user' | 'assistant' | 'system'
@@ -244,10 +245,10 @@ export const checkHealth = async (): Promise<
  try {
    const response = await axiosInstance.get('/health')
    return response.data
-  } catch (e) {
+  } catch (error) {
    return {
      status: 'error',
-      message: errorMessage(e)
+      message: errorMessage(error)
    }
  }
 }
@@ -277,65 +278,100 @@ export const queryTextStream = async (
  onChunk: (chunk: string) => void,
  onError?: (error: string) => void
 ) => {
+  const apiKey = useSettingsStore.getState().apiKey;
+  const token = localStorage.getItem('LIGHTRAG-API-TOKEN');
+  const headers: HeadersInit = {
+    'Content-Type': 'application/json',
+    'Accept': 'application/x-ndjson',
+  };
+  if (token) {
+    headers['Authorization'] = `Bearer ${token}`;
+  }
+  if (apiKey) {
+    headers['X-API-Key'] = apiKey;
+  }
+
  try {
-    let buffer = ''
-    await axiosInstance
-      .post('/query/stream', request, {
-        responseType: 'text',
-        headers: {
-          Accept: 'application/x-ndjson'
-        },
-        transformResponse: [
-          (data: string) => {
-            // Accumulate the data and process complete lines
-            buffer += data
-            const lines = buffer.split('\n')
-            // Keep the last potentially incomplete line in the buffer
-            buffer = lines.pop() || ''
+    const response = await fetch(`${backendBaseUrl}/query/stream`, {
+      method: 'POST',
+      headers: headers,
+      body: JSON.stringify(request),
+    });

-            for (const line of lines) {
-              if (line.trim()) {
-                try {
-                  const parsed = JSON.parse(line)
-                  if (parsed.response) {
-                    onChunk(parsed.response)
-                  } else if (parsed.error && onError) {
-                    onError(parsed.error)
-                  }
-                } catch (e) {
-                  console.error('Error parsing stream chunk:', e)
-                  if (onError) onError('Error parsing server response')
-                }
-              }
-            }
-            return data
-          }
-        ]
-      })
-      .catch((error) => {
-        if (onError) onError(errorMessage(error))
-      })
-
-    // Process any remaining data in the buffer
-    if (buffer.trim()) {
+    if (!response.ok) {
+      // Handle HTTP errors (e.g., 4xx, 5xx)
+      let errorBody = 'Unknown error';
      try {
-        const parsed = JSON.parse(buffer)
-        if (parsed.response) {
-          onChunk(parsed.response)
-        } else if (parsed.error && onError) {
-          onError(parsed.error)
+        errorBody = await response.text(); // Try to get error details from body
+      } catch { /* ignore */ }
+      throw new Error(`HTTP error ${response.status}: ${response.statusText}\n${errorBody}`);
+    }
+
+    if (!response.body) {
+      throw new Error('Response body is null');
+    }
+
+    const reader = response.body.getReader();
+    const decoder = new TextDecoder();
+    let buffer = '';
+
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) {
+        break; // Stream finished
+      }
+
+      // Decode the chunk and add to buffer
+      buffer += decoder.decode(value, { stream: true }); // stream: true handles multi-byte chars split across chunks
+
+      // Process complete lines (NDJSON)
+      const lines = buffer.split('\n');
+      buffer = lines.pop() || ''; // Keep potentially incomplete line in buffer
+
+      for (const line of lines) {
+        if (line.trim()) {
+          try {
+            const parsed = JSON.parse(line);
+            if (parsed.response) {
+              console.log('Received chunk:', parsed.response); // Log for debugging
+              onChunk(parsed.response);
+            } else if (parsed.error && onError) {
+              onError(parsed.error);
+            }
+          } catch (error) {
+            console.error('Error parsing stream chunk:', line, error);
+            if (onError) onError(`Error parsing server response: ${line}`);
+          }
        }
-      } catch (e) {
-        console.error('Error parsing final chunk:', e)
-        if (onError) onError('Error parsing server response')
      }
    }
+
+    // Process any remaining data in the buffer after the stream ends
+    if (buffer.trim()) {
+      try {
+        const parsed = JSON.parse(buffer);
+        if (parsed.response) {
+          onChunk(parsed.response);
+        } else if (parsed.error && onError) {
+          onError(parsed.error);
+        }
+      } catch (error) {
+        console.error('Error parsing final chunk:', buffer, error);
+        if (onError) onError(`Error parsing final server response: ${buffer}`);
+      }
+    }
+
  } catch (error) {
-    const message = errorMessage(error)
-    console.error('Stream request failed:', message)
-    if (onError) onError(message)
+    const message = errorMessage(error);
+    console.error('Stream request failed:', message);
+    if (onError) {
+      onError(message);
+    } else {
+      // If no specific onError handler, maybe throw or log more prominently
+      console.error('Unhandled stream error:', message);
+    }
  }
-}
+};

 export const insertText = async (text: string): Promise<DocActionResponse> => {
  const response = await axiosInstance.post('/documents/text', { text })
--- a/lightrag_webui/src/components/documents/PipelineStatusDialog.tsx
+++ b/lightrag_webui/src/components/documents/PipelineStatusDialog.tsx
@@ -166,7 +166,7 @@ export default function PipelineStatusDialog({
          {/* Latest Message */}
          <div className="space-y-2">
            <div className="text-sm font-medium">{t('documentPanel.pipelineStatus.latestMessage')}:</div>
-            <div className="font-mono text-sm rounded-md bg-zinc-800 text-zinc-100 p-3">
+            <div className="font-mono text-xs rounded-md bg-zinc-800 text-zinc-100 p-3">
              {status?.latest_message || '-'}
            </div>
          </div>
@@ -177,7 +177,7 @@ export default function PipelineStatusDialog({
            <div
              ref={historyRef}
              onScroll={handleScroll}
-              className="font-mono text-sm rounded-md bg-zinc-800 text-zinc-100 p-3 overflow-y-auto min-h-[7.5em] max-h-[40vh]"
+              className="font-mono text-xs rounded-md bg-zinc-800 text-zinc-100 p-3 overflow-y-auto min-h-[7.5em] max-h-[40vh]"
            >
              {status?.history_messages?.length ? (
                status.history_messages.map((msg, idx) => (
--- a/lightrag_webui/src/components/graph/GraphLabels.tsx
+++ b/lightrag_webui/src/components/graph/GraphLabels.tsx
@@ -46,8 +46,30 @@ const GraphLabels = () => {

      let result: string[] = labels
      if (query) {
-        // Search labels
+        // Search labels using MiniSearch
        result = searchEngine.search(query).map((r: { id: number }) => labels[r.id])
+
+        // Add middle-content matching if results are few
+        // This enables matching content in the middle of text, not just from the beginning
+        if (result.length < 5) {
+          // Get already matched labels to avoid duplicates
+          const matchedLabels = new Set(result)
+
+          // Perform middle-content matching on all labels
+          const middleMatchResults = labels.filter(label => {
+            // Skip already matched labels
+            if (matchedLabels.has(label)) return false
+
+            // Match if label contains query string but doesn't start with it
+            return label &&
+                   typeof label === 'string' &&
+                   !label.toLowerCase().startsWith(query.toLowerCase()) &&
+                   label.toLowerCase().includes(query.toLowerCase())
+          })
+
+          // Merge results
+          result = [...result, ...middleMatchResults]
+        }
      }

      return result.length <= labelListLimit
@@ -108,12 +130,12 @@ const GraphLabels = () => {
        variant={controlButtonVariant}
        onClick={handleRefresh}
        tooltip={t('graphPanel.graphLabels.refreshTooltip')}
-        className="mr-1"
+        className="mr-2"
      >
        <RefreshCw className="h-4 w-4" />
      </Button>
      <AsyncSelect<string>
-        className="ml-2"
+        className="min-w-[300px]"
        triggerClassName="max-h-8"
        searchInputClassName="max-h-8"
        triggerTooltip={t('graphPanel.graphLabels.selectTooltip')}
--- a/lightrag_webui/src/components/graph/GraphSearch.tsx
+++ b/lightrag_webui/src/components/graph/GraphSearch.tsx
@@ -123,13 +123,42 @@ export const GraphSearchInput = ({
      }

      // If has query, search nodes and verify they still exist
-      const result: OptionItem[] = searchEngine.search(query)
+      let result: OptionItem[] = searchEngine.search(query)
        .filter((r: { id: string }) => graph.hasNode(r.id))
        .map((r: { id: string }) => ({
          id: r.id,
          type: 'nodes'
        }))

+      // Add middle-content matching if results are few
+      // This enables matching content in the middle of text, not just from the beginning
+      if (result.length < 5) {
+        // Get already matched IDs to avoid duplicates
+        const matchedIds = new Set(result.map(item => item.id))
+
+        // Perform middle-content matching on all nodes
+        const middleMatchResults = graph.nodes()
+          .filter(id => {
+            // Skip already matched nodes
+            if (matchedIds.has(id)) return false
+
+            // Get node label
+            const label = graph.getNodeAttribute(id, 'label')
+            // Match if label contains query string but doesn't start with it
+            return label &&
+                   typeof label === 'string' &&
+                   !label.toLowerCase().startsWith(query.toLowerCase()) &&
+                   label.toLowerCase().includes(query.toLowerCase())
+          })
+          .map(id => ({
+            id,
+            type: 'nodes' as const
+          }))
+
+        // Merge results
+        result = [...result, ...middleMatchResults]
+      }
+
      // prettier-ignore
      return result.length <= searchResultLimit
        ? result
--- a/lightrag_webui/src/components/graph/LayoutsControl.tsx
+++ b/lightrag_webui/src/components/graph/LayoutsControl.tsx
@@ -289,7 +289,7 @@ const LayoutsControl = () => {
  )

  return (
-    <>
+    <div>
      <div>
        {layouts[layout] && 'worker' in layouts[layout] && (
          <WorkerLayoutControl
@@ -310,7 +310,14 @@ const LayoutsControl = () => {
              <GripIcon />
            </Button>
          </PopoverTrigger>
-          <PopoverContent side="right" align="center" className="p-1">
+          <PopoverContent
+            side="right"
+            align="start"
+            sideOffset={8}
+            collisionPadding={5}
+            sticky="always"
+            className="p-1 min-w-auto"
+          >
            <Command>
              <CommandList>
                <CommandGroup>
@@ -331,7 +338,7 @@ const LayoutsControl = () => {
          </PopoverContent>
        </Popover>
      </div>
-    </>
+    </div>
  )
 }

--- a/lightrag_webui/src/components/graph/Legend.tsx
+++ b/lightrag_webui/src/components/graph/Legend.tsx
@@ -4,21 +4,6 @@ import { useGraphStore } from '@/stores/graph'
 import { Card } from '@/components/ui/Card'
 import { ScrollArea } from '@/components/ui/ScrollArea'

-// Predefined node types list
-const PREDEFINED_TYPES = [
-  'person',
-  'category',
-  'geo',
-  'location',
-  'organization',
-  'event',
-  'equipment',
-  'weapon',
-  'animal',
-  'unknown',
-  'technology'
-]
-
 interface LegendProps {
  className?: string
 }
@@ -34,7 +19,7 @@ const Legend: React.FC<LegendProps> = ({ className }) => {
  return (
    <Card className={`p-2 max-w-xs ${className}`}>
      <h3 className="text-sm font-medium mb-2">{t('graphPanel.legend')}</h3>
-      <ScrollArea className="max-h-40">
+      <ScrollArea className="max-h-80">
        <div className="flex flex-col gap-1">
          {Array.from(typeColorMap.entries()).map(([type, color]) => (
            <div key={type} className="flex items-center gap-2">
@@ -43,9 +28,7 @@ const Legend: React.FC<LegendProps> = ({ className }) => {
                style={{ backgroundColor: color }}
              />
              <span className="text-xs truncate" title={type}>
-                {PREDEFINED_TYPES.includes(type.toLowerCase())
-                  ? t(`graphPanel.nodeTypes.${type.toLowerCase()}`)
-                  : type}
+                {t(`graphPanel.nodeTypes.${type.toLowerCase()}`, type)}
              </span>
            </div>
          ))}
--- a/lightrag_webui/src/components/graph/Settings.tsx
+++ b/lightrag_webui/src/components/graph/Settings.tsx
@@ -245,8 +245,10 @@ export default function Settings() {
        </PopoverTrigger>
        <PopoverContent
          side="right"
-          align="start"
-          className="mb-2 p-2"
+          align="end"
+          sideOffset={8}
+          collisionPadding={5}
+          className="p-2 max-w-[200px]"
          onCloseAutoFocus={(e) => e.preventDefault()}
        >
          <div className="flex flex-col gap-2">
--- a/lightrag_webui/src/components/retrieval/QuerySettings.tsx
+++ b/lightrag_webui/src/components/retrieval/QuerySettings.tsx
@@ -55,6 +55,7 @@ export default function QuerySettings() {
                    <SelectItem value="global">{t('retrievePanel.querySettings.queryModeOptions.global')}</SelectItem>
                    <SelectItem value="hybrid">{t('retrievePanel.querySettings.queryModeOptions.hybrid')}</SelectItem>
                    <SelectItem value="mix">{t('retrievePanel.querySettings.queryModeOptions.mix')}</SelectItem>
+                    <SelectItem value="bypass">{t('retrievePanel.querySettings.queryModeOptions.bypass')}</SelectItem>
                  </SelectGroup>
                </SelectContent>
              </Select>
--- a/lightrag_webui/src/components/ui/AsyncSelect.tsx
+++ b/lightrag_webui/src/components/ui/AsyncSelect.tsx
@@ -210,7 +210,13 @@ export function AsyncSelect<T>({
          <ChevronsUpDown className="opacity-50" size={10} />
        </Button>
      </PopoverTrigger>
-      <PopoverContent className={cn('p-0', className)} onCloseAutoFocus={(e) => e.preventDefault()}>
+      <PopoverContent
+        className={cn('p-0', className)}
+        onCloseAutoFocus={(e) => e.preventDefault()}
+        align="start"
+        sideOffset={8}
+        collisionPadding={5}
+      >
        <Command shouldFilter={false}>
          <div className="relative w-full border-b">
            <CommandInput
--- a/lightrag_webui/src/components/ui/Popover.tsx
+++ b/lightrag_webui/src/components/ui/Popover.tsx
@@ -7,20 +7,32 @@ const Popover = PopoverPrimitive.Root

 const PopoverTrigger = PopoverPrimitive.Trigger

+// Define the props type to include positioning props
+type PopoverContentProps = React.ComponentPropsWithoutRef<typeof PopoverPrimitive.Content> & {
+  collisionPadding?: number | Partial<Record<'top' | 'right' | 'bottom' | 'left', number>>;
+  sticky?: 'partial' | 'always';
+  avoidCollisions?: boolean;
+};
+
 const PopoverContent = React.forwardRef<
  React.ComponentRef<typeof PopoverPrimitive.Content>,
-  React.ComponentPropsWithoutRef<typeof PopoverPrimitive.Content>
->(({ className, align = 'center', sideOffset = 4, ...props }, ref) => (
-  <PopoverPrimitive.Content
-    ref={ref}
-    align={align}
-    sideOffset={sideOffset}
-    className={cn(
-      'bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 rounded-md border p-4 shadow-md outline-none',
-      className
-    )}
-    {...props}
-  />
+  PopoverContentProps
+>(({ className, align = 'center', sideOffset = 4, collisionPadding, sticky, avoidCollisions = false, ...props }, ref) => (
+  <PopoverPrimitive.Portal>
+    <PopoverPrimitive.Content
+      ref={ref}
+      align={align}
+      sideOffset={sideOffset}
+      collisionPadding={collisionPadding}
+      sticky={sticky}
+      avoidCollisions={avoidCollisions}
+      className={cn(
+        'bg-popover text-popover-foreground data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2 z-50 rounded-md border p-4 shadow-md outline-none',
+        className
+      )}
+      {...props}
+    />
+  </PopoverPrimitive.Portal>
 ))
 PopoverContent.displayName = PopoverPrimitive.Content.displayName

--- a/lightrag_webui/src/features/DocumentManager.tsx
+++ b/lightrag_webui/src/features/DocumentManager.tsx
@@ -158,7 +158,7 @@ export default function DocumentManager() {
  }, []);

  const [showPipelineStatus, setShowPipelineStatus] = useState(false)
-  const { t } = useTranslation()
+  const { t, i18n } = useTranslation()
  const health = useBackendState.use.health()
  const pipelineBusy = useBackendState.use.pipelineBusy()
  const [docs, setDocs] = useState<DocsStatusesResponse | null>(null)
@@ -216,33 +216,42 @@ export default function DocumentManager() {
    });
  }, [sortField, sortDirection, showFileName]);

+  // Define a new type that includes status information
+  type DocStatusWithStatus = DocStatusResponse & { status: DocStatus };
+
  const filteredAndSortedDocs = useMemo(() => {
    if (!docs) return null;

-    let filteredDocs = { ...docs };
+    // Create a flat array of documents with status information
+    const allDocuments: DocStatusWithStatus[] = [];

-    if (statusFilter !== 'all') {
-      filteredDocs = {
-        ...docs,
-        statuses: {
-          pending: [],
-          processing: [],
-          processed: [],
-          failed: [],
-          [statusFilter]: docs.statuses[statusFilter] || []
-        }
-      };
+    if (statusFilter === 'all') {
+      // When filter is 'all', include documents from all statuses
+      Object.entries(docs.statuses).forEach(([status, documents]) => {
+        documents.forEach(doc => {
+          allDocuments.push({
+            ...doc,
+            status: status as DocStatus
+          });
+        });
+      });
+    } else {
+      // When filter is specific status, only include documents from that status
+      const documents = docs.statuses[statusFilter] || [];
+      documents.forEach(doc => {
+        allDocuments.push({
+          ...doc,
+          status: statusFilter
+        });
+      });
    }

-    if (!sortField || !sortDirection) return filteredDocs;
+    // Sort all documents together if sort field and direction are specified
+    if (sortField && sortDirection) {
+      return sortDocuments(allDocuments);
+    }

-    const sortedStatuses = Object.entries(filteredDocs.statuses).reduce((acc, [status, documents]) => {
-      const sortedDocuments = sortDocuments(documents);
-      acc[status as DocStatus] = sortedDocuments;
-      return acc;
-    }, {} as DocsStatusesResponse['statuses']);
-
-    return { ...filteredDocs, statuses: sortedStatuses };
+    return allDocuments;
  }, [docs, sortField, sortDirection, statusFilter, sortDocuments]);

  // Calculate document counts for each status
@@ -499,7 +508,7 @@ export default function DocumentManager() {
              <CardTitle>{t('documentPanel.documentManager.uploadedTitle')}</CardTitle>
              <div className="flex items-center gap-2">
                <FilterIcon className="h-4 w-4" />
-                <div className="flex gap-1">
+                <div className="flex gap-1" dir={i18n.dir()}>
                  <Button
                    size="sm"
                    variant={statusFilter === 'all' ? 'secondary' : 'outline'}
@@ -641,69 +650,71 @@ export default function DocumentManager() {
                      </TableRow>
                    </TableHeader>
                    <TableBody className="text-sm overflow-auto">
-                      {filteredAndSortedDocs?.statuses && Object.entries(filteredAndSortedDocs.statuses).flatMap(([status, documents]) =>
-                        documents.map((doc) => (
-                          <TableRow key={doc.id}>
-                            <TableCell className="truncate font-mono overflow-visible max-w-[250px]">
-                              {showFileName ? (
-                                <>
-                                  <div className="group relative overflow-visible tooltip-container">
-                                    <div className="truncate">
-                                      {getDisplayFileName(doc, 30)}
-                                    </div>
-                                    <div className="invisible group-hover:visible tooltip">
-                                      {doc.file_path}
-                                    </div>
-                                  </div>
-                                  <div className="text-xs text-gray-500">{doc.id}</div>
-                                </>
-                              ) : (
+                      {filteredAndSortedDocs && filteredAndSortedDocs.map((doc) => (
+                        <TableRow key={doc.id}>
+                          <TableCell className="truncate font-mono overflow-visible max-w-[250px]">
+                            {showFileName ? (
+                              <>
                                <div className="group relative overflow-visible tooltip-container">
                                  <div className="truncate">
-                                    {doc.id}
+                                    {getDisplayFileName(doc, 30)}
                                  </div>
                                  <div className="invisible group-hover:visible tooltip">
                                    {doc.file_path}
                                  </div>
                                </div>
-                              )}
-                            </TableCell>
-                            <TableCell className="max-w-xs min-w-45 truncate overflow-visible">
+                                <div className="text-xs text-gray-500">{doc.id}</div>
+                              </>
+                            ) : (
                              <div className="group relative overflow-visible tooltip-container">
                                <div className="truncate">
-                                  {doc.content_summary}
+                                  {doc.id}
                                </div>
                                <div className="invisible group-hover:visible tooltip">
-                                  {doc.content_summary}
+                                  {doc.file_path}
                                </div>
                              </div>
-                            </TableCell>
-                            <TableCell>
-                              {status === 'processed' && (
-                                <span className="text-green-600">{t('documentPanel.documentManager.status.completed')}</span>
-                              )}
-                              {status === 'processing' && (
-                                <span className="text-blue-600">{t('documentPanel.documentManager.status.processing')}</span>
-                              )}
-                              {status === 'pending' && <span className="text-yellow-600">{t('documentPanel.documentManager.status.pending')}</span>}
-                              {status === 'failed' && <span className="text-red-600">{t('documentPanel.documentManager.status.failed')}</span>}
-                              {doc.error && (
-                                <span className="ml-2 text-red-500" title={doc.error}>
-                                  ⚠️
-                                </span>
-                              )}
-                            </TableCell>
-                            <TableCell>{doc.content_length ?? '-'}</TableCell>
-                            <TableCell>{doc.chunks_count ?? '-'}</TableCell>
-                            <TableCell className="truncate">
-                              {new Date(doc.created_at).toLocaleString()}
-                            </TableCell>
-                            <TableCell className="truncate">
-                              {new Date(doc.updated_at).toLocaleString()}
-                            </TableCell>
-                          </TableRow>
-                        )))
-                      }
+                            )}
+                          </TableCell>
+                          <TableCell className="max-w-xs min-w-45 truncate overflow-visible">
+                            <div className="group relative overflow-visible tooltip-container">
+                              <div className="truncate">
+                                {doc.content_summary}
+                              </div>
+                              <div className="invisible group-hover:visible tooltip">
+                                {doc.content_summary}
+                              </div>
+                            </div>
+                          </TableCell>
+                          <TableCell>
+                            {doc.status === 'processed' && (
+                              <span className="text-green-600">{t('documentPanel.documentManager.status.completed')}</span>
+                            )}
+                            {doc.status === 'processing' && (
+                              <span className="text-blue-600">{t('documentPanel.documentManager.status.processing')}</span>
+                            )}
+                            {doc.status === 'pending' && (
+                              <span className="text-yellow-600">{t('documentPanel.documentManager.status.pending')}</span>
+                            )}
+                            {doc.status === 'failed' && (
+                              <span className="text-red-600">{t('documentPanel.documentManager.status.failed')}</span>
+                            )}
+                            {doc.error && (
+                              <span className="ml-2 text-red-500" title={doc.error}>
+                                ⚠️
+                              </span>
+                            )}
+                          </TableCell>
+                          <TableCell>{doc.content_length ?? '-'}</TableCell>
+                          <TableCell>{doc.chunks_count ?? '-'}</TableCell>
+                          <TableCell className="truncate">
+                            {new Date(doc.created_at).toLocaleString()}
+                          </TableCell>
+                          <TableCell className="truncate">
+                            {new Date(doc.updated_at).toLocaleString()}
+                          </TableCell>
+                        </TableRow>
+                      ))}
                    </TableBody>
                  </Table>
                </div>
--- a/lightrag_webui/src/features/RetrievalTesting.tsx
+++ b/lightrag_webui/src/features/RetrievalTesting.tsx
@@ -60,6 +60,7 @@ export default function RetrievalTesting() {
          }
          return newMessages
        })
+        scrollToBottom()
      }

      // Prepare query parameters
@@ -69,6 +70,7 @@ export default function RetrievalTesting() {
        query: userMessage.content,
        conversation_history: prevMessages
          .filter((m) => m.isError !== true)
+          .slice(-(state.querySettings.history_turns || 0) * 2)
          .map((m) => ({ role: m.role, content: m.content }))
      }

--- a/lightrag_webui/src/hooks/useLightragGraph.tsx
+++ b/lightrag_webui/src/hooks/useLightragGraph.tsx
@@ -11,112 +11,146 @@ import { useSettingsStore } from '@/stores/settings'

 import seedrandom from 'seedrandom'

-// Predefined node colors - Primary colors
-const NODE_COLORS = [
-  '#fdd868', // Yellow - UNKNOWN
-  '#e3493b', // Google Red - geo
-  '#1212a1', // Deep Cyan - weapon
-  '#0f705d', // Green - organization
-  '#a64dff', // Purple - technology
-  '#f46a9b', // Magenta
-  '#00bfa0', // Turquoise
-  '#fdcce5', // Pale Pink
-  '#0f558a', // Blue - location
-  '#b2e061', // Yellow Green
-  '#bd7ebe', // Light Violet - event
-  '#439bd6', // Cyan - person
-  '#094338', // Deep Green
-  '#dc0ab4', // Pink Red
-  '#fd7f6f', // Light Red - category
-  '#b04238', // Brown
-];
+const TYPE_SYNONYMS: Record<string, string> = {
+  'unknown': 'unknown',
+  '未知': 'unknown',
+  'other': 'unknown',

-// Extended colors - Used when node types exceed primary colors
+  'category': 'category',
+  '类别': 'category',
+  'type': 'category',
+  '分类': 'category',
+
+  'organization': 'organization',
+  '组织': 'organization',
+  'org': 'organization',
+  'company': 'organization',
+  '公司': 'organization',
+  '机构': 'organization',
+
+  'event': 'event',
+  '事件': 'event',
+  'activity': 'event',
+  '活动': 'event',
+
+  'person': 'person',
+  '人物': 'person',
+  'people': 'person',
+  'human': 'person',
+  '人': 'person',
+
+  'animal': 'animal',
+  '动物': 'animal',
+  'creature': 'animal',
+  '生物': 'animal',
+
+  'geo': 'geo',
+  '地理': 'geo',
+  'geography': 'geo',
+  '地域': 'geo',
+
+  'location': 'location',
+  '地点': 'location',
+  'place': 'location',
+  'address': 'location',
+  '位置': 'location',
+  '地址': 'location',
+
+  'technology': 'technology',
+  '技术': 'technology',
+  'tech': 'technology',
+  '科技': 'technology',
+
+  'equipment': 'equipment',
+  '设备': 'equipment',
+  'device': 'equipment',
+  '装备': 'equipment',
+
+  'weapon': 'weapon',
+  '武器': 'weapon',
+  'arms': 'weapon',
+  '军火': 'weapon',
+
+  'object': 'object',
+  '物品': 'object',
+  'stuff': 'object',
+  '物体': 'object',
+
+  'group': 'group',
+  '群组': 'group',
+  'community': 'group',
+  '社区': 'group'
+};
+
+// 节点类型到颜色的映射
+const NODE_TYPE_COLORS: Record<string, string> = {
+  'unknown': '#f4d371', // Yellow
+  'category': '#e3493b', // GoogleRed
+  'organization': '#0f705d', // Green
+  'event': '#00bfa0', // Turquoise
+  'person': '#4169E1', // RoyalBlue
+  'animal': '#84a3e1', // SkyBlue
+  'geo': '#ff99cc', // Pale Pink
+  'location': '#cf6d17', // Carrot
+  'technology': '#b300b3', // Purple
+  'equipment': '#2F4F4F', // DarkSlateGray
+  'weapon': '#4421af', // DeepPurple
+  'object': '#00cc00', // Green
+  'group': '#0f558a', // NavyBlue
+};
+
+// Extended colors pool - Used for unknown node types
 const EXTENDED_COLORS = [
-  '#5ad45a', // Light Green
-  '#5a2c6d', // Deep Violet
-  '#6c1313', // Dark Red
-  '#184868', // Dark Cyan
-  '#996600', // Yellow Brown
-  '#4421af', // Deep Purple
-  '#E67E22', // Carrot
-  '#ff1a1a', // Pure Red
+  '#5a2c6d', // DeepViolet
+  '#0000ff', // Blue
+  '#cd071e', // ChinaRed
+  '#00CED1', // DarkTurquoise
+  '#9b3a31', // DarkBrown
+  '#b2e061', // YellowGreen
+  '#bd7ebe', // LightViolet
+  '#6ef7b3', // LightGreen
+  '#003366', // DarkBlue
+  '#DEB887', // BurlyWood
 ];

-// All available colors combined
-const ALL_COLORS = [...NODE_COLORS, ...EXTENDED_COLORS];
-
-// Helper function to get color based on node type
+// Select color based on node type
 const getNodeColorByType = (nodeType: string | undefined): string => {
-  const defaultColor = '#5D6D7E'; // Default color for nodes without a type or undefined type

-  // Return default color if node type is undefined
-  if (!nodeType) {
-    return defaultColor;
-  }
+  const defaultColor = '#5D6D7E';

-  // Get type color map from store
+  const normalizedType = nodeType ? nodeType.toLowerCase() : 'unknown';
  const typeColorMap = useGraphStore.getState().typeColorMap;

-  // If this type already has an assigned color, return it
-  if (typeColorMap.has(nodeType)) {
-    return typeColorMap.get(nodeType) || defaultColor;
+  // Return previous color if already mapped
+  if (typeColorMap.has(normalizedType)) {
+    return typeColorMap.get(normalizedType) || defaultColor;
  }

-  // Get all currently used colors
-  const usedColors = new Set<string>();
-  typeColorMap.forEach(color => {
-    usedColors.add(color);
-  });
-
-  // Assign color for new node type
-  // Use a simple hash function to map node type to color index
-  const getColorIndex = (str: string): number => {
-    let hash = 0;
-    for (let i = 0; i < str.length; i++) {
-      hash = ((hash << 5) - hash) + str.charCodeAt(i);
-      hash |= 0; // Convert to 32bit integer
-    }
-    // Ensure result is positive and within NODE_COLORS range only
-    return Math.abs(hash) % NODE_COLORS.length;
-  };
-
-  // Get initial color index from hash
-  const colorIndex = getColorIndex(nodeType);
-  let newColor = NODE_COLORS[colorIndex];
-
-  // If the color is already used, find the next available color
-  if (usedColors.has(newColor) && usedColors.size < ALL_COLORS.length) {
-    // First try to find an unused color in NODE_COLORS
-    let foundUnused = false;
-    for (let i = 0; i < NODE_COLORS.length; i++) {
-      const candidateColor = NODE_COLORS[i];
-      if (!usedColors.has(candidateColor)) {
-        newColor = candidateColor;
-        foundUnused = true;
-        break;
-      }
-    }
-
-    // If all NODE_COLORS are used, then try EXTENDED_COLORS
-    if (!foundUnused) {
-      newColor = defaultColor;
-      for (let i = 0; i < EXTENDED_COLORS.length; i++) {
-        const candidateColor = EXTENDED_COLORS[i];
-        if (!usedColors.has(candidateColor)) {
-          newColor = candidateColor;
-          break;
-        }
-      }
-    }
+  const standardType = TYPE_SYNONYMS[normalizedType];
+  if (standardType) {
+    const color = NODE_TYPE_COLORS[standardType];
+    // Update color mapping
+    const newMap = new Map(typeColorMap);
+    newMap.set(normalizedType, color);
+    useGraphStore.setState({ typeColorMap: newMap });
+    return color;
  }

-  // If all colors are used, we'll still use the hashed color
-  // In a more advanced implementation, we could create color variants here
+  // For unpredefind nodeTypes, use extended colors
+  // Find used extended colors
+  const usedExtendedColors = new Set(
+    Array.from(typeColorMap.entries())
+      .filter(([, color]) => !Object.values(NODE_TYPE_COLORS).includes(color))
+      .map(([, color]) => color)
+  );
+
+  // Find and use the first unused extended color
+  const unusedColor = EXTENDED_COLORS.find(color => !usedExtendedColors.has(color));
+  const newColor = unusedColor || defaultColor;

  // Update color mapping
  const newMap = new Map(typeColorMap);
-  newMap.set(nodeType, newColor);
+  newMap.set(normalizedType, newColor);
  useGraphStore.setState({ typeColorMap: newMap });

  return newColor;
--- a/lightrag_webui/src/i18n.ts
+++ b/lightrag_webui/src/i18n.ts
@@ -29,11 +29,14 @@ i18n
      fr: { translation: fr },
      ar: { translation: ar }
    },
-    lng: getStoredLanguage(), // 使用存储的语言设置
+    lng: getStoredLanguage(), // Use stored language settings
    fallbackLng: 'en',
    interpolation: {
      escapeValue: false
-    }
+    },
+    // Configuration to handle missing translations
+    returnEmptyString: false,
+    returnNull: false,
  })

 // Subscribe to language changes
--- a/lightrag_webui/src/locales/ar.json
+++ b/lightrag_webui/src/locales/ar.json
@@ -155,6 +155,8 @@
      "weapon": "سلاح",
      "animal": "حيوان",
      "unknown": "غير معروف",
+      "object": "مصنوع",
+      "group": "مجموعة",
      "technology": "العلوم"
    },
    "sideBar": {
@@ -294,13 +296,14 @@
      "parametersTitle": "المعلمات",
      "parametersDescription": "تكوين معلمات الاستعلام الخاص بك",
      "queryMode": "وضع الاستعلام",
-      "queryModeTooltip": "حدد استراتيجية الاسترجاع:\n• ساذج: بحث أساسي بدون تقنيات متقدمة\n• محلي: استرجاع معلومات يعتمد على السياق\n• عالمي: يستخدم قاعدة المعرفة العالمية\n• مختلط: يجمع بين الاسترجاع المحلي والعالمي\n• مزيج: يدمج شبكة المعرفة مع الاسترجاع المتجهي",
+      "queryModeTooltip": "حدد استراتيجية الاسترجاع:\n• ساذج: بحث أساسي بدون تقنيات متقدمة\n• محلي: استرجاع معلومات يعتمد على السياق\n• عالمي: يستخدم قاعدة المعرفة العالمية\n• مختلط: يجمع بين الاسترجاع المحلي والعالمي\n• مزيج: يدمج شبكة المعرفة مع الاسترجاع المتجهي\n• تجاوز: يمرر الاستعلام مباشرة إلى LLM بدون استرجاع",
      "queryModeOptions": {
        "naive": "ساذج",
        "local": "محلي",
        "global": "عالمي",
        "hybrid": "مختلط",
-        "mix": "مزيج"
+        "mix": "مزيج",
+        "bypass": "تجاوز"
      },
      "responseFormat": "تنسيق الرد",
      "responseFormatTooltip": "يحدد تنسيق الرد. أمثلة:\n• فقرات متعددة\n• فقرة واحدة\n• نقاط نقطية",
--- a/lightrag_webui/src/locales/en.json
+++ b/lightrag_webui/src/locales/en.json
@@ -155,6 +155,7 @@
      "weapon": "Weapon",
      "animal": "Animal",
      "unknown": "Unknown",
+      "object": "Object",
      "technology": "Technology"
    },
    "sideBar": {
@@ -294,13 +295,14 @@
      "parametersTitle": "Parameters",
      "parametersDescription": "Configure your query parameters",
      "queryMode": "Query Mode",
-      "queryModeTooltip": "Select the retrieval strategy:\n• Naive: Basic search without advanced techniques\n• Local: Context-dependent information retrieval\n• Global: Utilizes global knowledge base\n• Hybrid: Combines local and global retrieval\n• Mix: Integrates knowledge graph with vector retrieval",
+      "queryModeTooltip": "Select the retrieval strategy:\n• Naive: Basic search without advanced techniques\n• Local: Context-dependent information retrieval\n• Global: Utilizes global knowledge base\n• Hybrid: Combines local and global retrieval\n• Mix: Integrates knowledge graph with vector retrieval\n• Bypass: Passes query directly to LLM without retrieval",
      "queryModeOptions": {
        "naive": "Naive",
        "local": "Local",
        "global": "Global",
        "hybrid": "Hybrid",
-        "mix": "Mix"
+        "mix": "Mix",
+        "bypass": "Bypass"
      },
      "responseFormat": "Response Format",
      "responseFormatTooltip": "Defines the response format. Examples:\n• Multiple Paragraphs\n• Single Paragraph\n• Bullet Points",
--- a/lightrag_webui/src/locales/fr.json
+++ b/lightrag_webui/src/locales/fr.json
@@ -155,6 +155,8 @@
      "weapon": "Arme",
      "animal": "Animal",
      "unknown": "Inconnu",
+      "object": "Objet",
+      "group": "Groupe",
      "technology": "Technologie"
    },
    "sideBar": {
@@ -294,13 +296,14 @@
      "parametersTitle": "Paramètres",
      "parametersDescription": "Configurez vos paramètres de requête",
      "queryMode": "Mode de requête",
-      "queryModeTooltip": "Sélectionnez la stratégie de récupération :\n• Naïf : Recherche de base sans techniques avancées\n• Local : Récupération d'informations dépendante du contexte\n• Global : Utilise une base de connaissances globale\n• Hybride : Combine récupération locale et globale\n• Mixte : Intègre le graphe de connaissances avec la récupération vectorielle",
+      "queryModeTooltip": "Sélectionnez la stratégie de récupération :\n• Naïf : Recherche de base sans techniques avancées\n• Local : Récupération d'informations dépendante du contexte\n• Global : Utilise une base de connaissances globale\n• Hybride : Combine récupération locale et globale\n• Mixte : Intègre le graphe de connaissances avec la récupération vectorielle\n• Bypass : Transmet directement la requête au LLM sans récupération",
      "queryModeOptions": {
        "naive": "Naïf",
        "local": "Local",
        "global": "Global",
        "hybrid": "Hybride",
-        "mix": "Mixte"
+        "mix": "Mixte",
+        "bypass": "Bypass"
      },
      "responseFormat": "Format de réponse",
      "responseFormatTooltip": "Définit le format de la réponse. Exemples :\n• Plusieurs paragraphes\n• Paragraphe unique\n• Points à puces",
--- a/lightrag_webui/src/locales/zh.json
+++ b/lightrag_webui/src/locales/zh.json
@@ -145,8 +145,8 @@
    },
    "legend": "图例",
    "nodeTypes": {
-      "person": "人物",
-      "category": "类别",
+      "person": "人物角色",
+      "category": "分类",
      "geo": "地理名称",
      "location": "位置",
      "organization": "组织机构",
@@ -155,6 +155,8 @@
      "weapon": "武器",
      "animal": "动物",
      "unknown": "未知",
+      "object": "物品",
+      "group": "群组",
      "technology": "技术"
    },
    "sideBar": {
@@ -294,13 +296,14 @@
      "parametersTitle": "参数",
      "parametersDescription": "配置查询参数",
      "queryMode": "查询模式",
-      "queryModeTooltip": "选择检索策略：\n• Naive：基础搜索，无高级技术\n• Local：上下文相关信息检索\n• Global：利用全局知识库\n• Hybrid：结合本地和全局检索\n• Mix：整合知识图谱和向量检索",
+      "queryModeTooltip": "选择检索策略：\n• Naive：基础搜索，无高级技术\n• Local：上下文相关信息检索\n• Global：利用全局知识库\n• Hybrid：结合本地和全局检索\n• Mix：整合知识图谱和向量检索\n• Bypass：直接传递查询到LLM，不进行检索",
      "queryModeOptions": {
        "naive": "Naive",
        "local": "Local",
        "global": "Global",
        "hybrid": "Hybrid",
-        "mix": "Mix"
+        "mix": "Mix",
+        "bypass": "Bypass"
      },
      "responseFormat": "响应格式",
      "responseFormatTooltip": "定义响应格式。例如：\n• 多段落\n• 单段落\n• 要点",