Merge branch 'main' into edit-node

2025-04-13 11:51:55 +08:00
parent 53f18b6c84 ecfe4209c3
commit 4900cf07d9
25 changed files with 241 additions and 251 deletions
--- a/lightrag/api/README-zh.md
+++ b/lightrag/api/README-zh.md
@@ -102,6 +102,10 @@ lightrag-gunicorn --workers 4
 - `--log-level`：日志级别（默认：INFO）
 - --input-dir：指定要扫描文档的目录（默认：./input）
 > ** 要求将.env文件置于启动目录中是经过特意设计的**。 这样做的目的是支持用户同时启动多个LightRAG实例，并为不同实例配置不同的.env文件。
 > **修改.env文件后，您需要重新打开终端以使新设置生效**。 这是因为每次启动时，LightRAG Server会将.env文件中的环境变量加载至系统环境变量，且系统环境变量的设置具有更高优先级。
 ### 启动时自动扫描
 当使用 `--auto-scan-at-startup` 参数启动任何服务器时，系统将自动：
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@@ -106,6 +106,8 @@ Here are some commonly used startup parameters:
 > The requirement for the .env file to be in the startup directory is intentionally designed this way. The purpose is to support users in launching multiple LightRAG instances simultaneously, allowing different .env files for different instances.
 > **After changing the .env file, you need to open a new terminal to make  the new settings take effect.** This because the LightRAG Server will load the environment variables from .env into the system environment variables each time it starts, and LightRAG Server will prioritize the settings in the system environment variables.
 ### Auto scan on startup
 When starting any of the servers with the `--auto-scan-at-startup` parameter, the system will automatically:
--- a/lightrag/api/init.py
+++ b/lightrag/api/init.py
@@ -1 +1 @@
-__api_version__ = "0146"
+__api_version__ = "0148"
--- a/lightrag/api/routers/document_routes.py
+++ b/lightrag/api/routers/document_routes.py
@@ -499,6 +499,9 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
                    content = result.document.export_to_markdown()
                else:
                    if not pm.is_installed("python-docx"):  # type: ignore
                        try:
                            pm.install("python-docx")
                        except Exception:
                            pm.install("docx")
                    from docx import Document  # type: ignore
                    from io import BytesIO
--- a/lightrag/api/routers/ollama_api.py
+++ b/lightrag/api/routers/ollama_api.py
@@ -308,7 +308,7 @@ class OllamaAPI:
                            "Cache-Control": "no-cache",
                            "Connection": "keep-alive",
                            "Content-Type": "application/x-ndjson",
-                            "X-Accel-Buffering": "no",  # 确保在Nginx代理时正确处理流式响应
+                            "X-Accel-Buffering": "no",  # Ensure proper handling of streaming responses in Nginx proxy
                        },
                    )
                else:
--- a/lightrag/api/routers/query_routes.py
+++ b/lightrag/api/routers/query_routes.py
@@ -22,7 +22,7 @@ class QueryRequest(BaseModel):
        description="The query text",
    )
-    mode: Literal["local", "global", "hybrid", "naive", "mix"] = Field(
+    mode: Literal["local", "global", "hybrid", "naive", "mix", "bypass"] = Field(
        default="hybrid",
        description="Query mode",
    )
--- a/lightrag/api/webui/assets/index-CkwV8nfm.js
+++ b/lightrag/api/webui/assets/index-CkwV8nfm.js
--- a/lightrag/api/webui/index.html
+++ b/lightrag/api/webui/index.html
@@ -8,7 +8,7 @@
    <link rel="icon" type="image/svg+xml" href="logo.png" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>Lightrag</title>
-    <script type="module" crossorigin src="/webui/assets/index-DSVCuARS.js"></script>
+    <script type="module" crossorigin src="/webui/assets/index-CkwV8nfm.js"></script>
    <link rel="stylesheet" crossorigin href="/webui/assets/index-CTB4Vp_z.css">
  </head>
  <body>
--- a/lightrag/base.py
+++ b/lightrag/base.py
@@ -12,7 +12,6 @@ from typing import (
    TypeVar,
    Callable,
 )
 import numpy as np
 from .utils import EmbeddingFunc
 from .types import KnowledgeGraph
@@ -36,7 +35,7 @@ T = TypeVar("T")
 class QueryParam:
    """Configuration parameters for query execution in LightRAG."""
-    mode: Literal["local", "global", "hybrid", "naive", "mix"] = "global"
+    mode: Literal["local", "global", "hybrid", "naive", "mix", "bypass"] = "global"
    """Specifies the retrieval mode:
    - "local": Focuses on context-dependent information.
    - "global": Utilizes global knowledge.
@@ -281,63 +280,164 @@ class BaseGraphStorage(StorageNameSpace, ABC):
    @abstractmethod
    async def has_node(self, node_id: str) -> bool:
-        """Check if an edge exists in the graph."""
+        """Check if a node exists in the graph.
        Args:
            node_id: The ID of the node to check
        Returns:
            True if the node exists, False otherwise
        """
    @abstractmethod
    async def has_edge(self, source_node_id: str, target_node_id: str) -> bool:
-        """Get the degree of a node."""
+        """Check if an edge exists between two nodes.
        Args:
            source_node_id: The ID of the source node
            target_node_id: The ID of the target node
        Returns:
            True if the edge exists, False otherwise
        """
    @abstractmethod
    async def node_degree(self, node_id: str) -> int:
-        """Get the degree of an edge."""
+        """Get the degree (number of connected edges) of a node.
        Args:
            node_id: The ID of the node
        Returns:
            The number of edges connected to the node
        """
    @abstractmethod
    async def edge_degree(self, src_id: str, tgt_id: str) -> int:
-        """Get a node by its id."""
+        """Get the total degree of an edge (sum of degrees of its source and target nodes).
        Args:
            src_id: The ID of the source node
            tgt_id: The ID of the target node
        Returns:
            The sum of the degrees of the source and target nodes
        """
    @abstractmethod
    async def get_node(self, node_id: str) -> dict[str, str] | None:
-        """Get node by its label identifier, return only node properties"""
+        """Get node by its ID, returning only node properties.
        Args:
            node_id: The ID of the node to retrieve
        Returns:
            A dictionary of node properties if found, None otherwise
        """
    @abstractmethod
    async def get_edge(
        self, source_node_id: str, target_node_id: str
    ) -> dict[str, str] | None:
-        """Get edge properties between two nodes"""
+        """Get edge properties between two nodes.
        Args:
            source_node_id: The ID of the source node
            target_node_id: The ID of the target node
        Returns:
            A dictionary of edge properties if found, None otherwise
        """
    @abstractmethod
    async def get_node_edges(self, source_node_id: str) -> list[tuple[str, str]] | None:
-        """Upsert a node into the graph."""
+        """Get all edges connected to a node.
        Args:
            source_node_id: The ID of the node to get edges for
        Returns:
            A list of (source_id, target_id) tuples representing edges,
            or None if the node doesn't exist
        """
    @abstractmethod
    async def upsert_node(self, node_id: str, node_data: dict[str, str]) -> None:
-        """Upsert an edge into the graph."""
+        """Insert a new node or update an existing node in the graph.
        Importance notes for in-memory storage:
        1. Changes will be persisted to disk during the next index_done_callback
        2. Only one process should updating the storage at a time before index_done_callback,
           KG-storage-log should be used to avoid data corruption
        Args:
            node_id: The ID of the node to insert or update
            node_data: A dictionary of node properties
        """
    @abstractmethod
    async def upsert_edge(
        self, source_node_id: str, target_node_id: str, edge_data: dict[str, str]
    ) -> None:
        """Insert a new edge or update an existing edge in the graph.
        Importance notes for in-memory storage:
        1. Changes will be persisted to disk during the next index_done_callback
        2. Only one process should updating the storage at a time before index_done_callback,
           KG-storage-log should be used to avoid data corruption
        Args:
            source_node_id: The ID of the source node
            target_node_id: The ID of the target node
            edge_data: A dictionary of edge properties
        """
    @abstractmethod
    async def delete_node(self, node_id: str) -> None:
        """Delete a node from the graph.
        Importance notes for in-memory storage:
        1. Changes will be persisted to disk during the next index_done_callback
        2. Only one process should updating the storage at a time before index_done_callback,
           KG-storage-log should be used to avoid data corruption
        Args:
            node_id: The ID of the node to delete
        """
    @abstractmethod
-    async def delete_node(self, node_id: str) -> None:
+    async def remove_nodes(self, nodes: list[str]):
-        """Embed nodes using an algorithm."""
+        """Delete multiple nodes
        Importance notes:
        1. Changes will be persisted to disk during the next index_done_callback
        2. Only one process should updating the storage at a time before index_done_callback,
           KG-storage-log should be used to avoid data corruption
        Args:
            nodes: List of node IDs to be deleted
        """
    @abstractmethod
-    async def embed_nodes(
+    async def remove_edges(self, edges: list[tuple[str, str]]):
-        self, algorithm: str
+        """Delete multiple edges
-    ) -> tuple[np.ndarray[Any, Any], list[str]]:
+
-        """Get all labels in the graph."""
+        Importance notes:
        1. Changes will be persisted to disk during the next index_done_callback
        2. Only one process should updating the storage at a time before index_done_callback,
           KG-storage-log should be used to avoid data corruption
        Args:
            edges: List of edges to be deleted, each edge is a (source, target) tuple
        """
    @abstractmethod
    async def get_all_labels(self) -> list[str]:
-        """Get a knowledge graph of a node."""
+        """Get all labels in the graph.
        Returns:
            A list of all node labels in the graph, sorted alphabetically
        """
    @abstractmethod
    async def get_knowledge_graph(
--- a/lightrag/kg/age_impl.py
+++ b/lightrag/kg/age_impl.py
@@ -6,7 +6,6 @@ import sys
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
 from typing import Any, Dict, List, NamedTuple, Optional, Union, final
 import numpy as np
 import pipmaster as pm
 from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
@@ -89,11 +88,6 @@ class AGEStorage(BaseGraphStorage):
        return None
    def __post_init__(self):
        self._node_embed_algorithms = {
            "node2vec": self._node2vec_embed,
        }
    async def close(self):
        if self._driver:
            await self._driver.close()
@@ -593,9 +587,6 @@ class AGEStorage(BaseGraphStorage):
            logger.error("Error during edge upsert: {%s}", e)
            raise
    async def _node2vec_embed(self):
        print("Implemented but never called.")
    @asynccontextmanager
    async def _get_pool_connection(self, timeout: Optional[float] = None):
        """Workaround for a psycopg_pool bug"""
@@ -668,21 +659,6 @@ class AGEStorage(BaseGraphStorage):
                logger.error(f"Error during edge deletion: {str(e)}")
                raise
    async def embed_nodes(
        self, algorithm: str
    ) -> tuple[np.ndarray[Any, Any], list[str]]:
        """Embed nodes using the specified algorithm
        Args:
            algorithm: Name of the embedding algorithm
        Returns:
            tuple: (embedding matrix, list of node identifiers)
        """
        if algorithm not in self._node_embed_algorithms:
            raise ValueError(f"Node embedding algorithm {algorithm} not supported")
        return await self._node_embed_algorithms[algorithm]()
    async def get_all_labels(self) -> list[str]:
        """Get all node labels in the database
--- a/lightrag/kg/gremlin_impl.py
+++ b/lightrag/kg/gremlin_impl.py
@@ -6,9 +6,6 @@ import pipmaster as pm
 from dataclasses import dataclass
 from typing import Any, Dict, List, final
 import numpy as np
 from tenacity import (
    retry,
    retry_if_exception_type,
@@ -72,11 +69,6 @@ class GremlinStorage(BaseGraphStorage):
            transport_factory=lambda: AiohttpTransport(call_from_event_loop=True),
        )
    def __post_init__(self):
        self._node_embed_algorithms = {
            "node2vec": self._node2vec_embed,
        }
    async def close(self):
        if self._driver:
            self._driver.close()
@@ -392,9 +384,6 @@ class GremlinStorage(BaseGraphStorage):
            logger.error("Error during edge upsert: {%s}", e)
            raise
    async def _node2vec_embed(self):
        print("Implemented but never called.")
    async def delete_node(self, node_id: str) -> None:
        """Delete a node with the specified entity_name
@@ -419,27 +408,6 @@ class GremlinStorage(BaseGraphStorage):
            logger.error(f"Error during node deletion: {str(e)}")
            raise
    async def embed_nodes(
        self, algorithm: str
    ) -> tuple[np.ndarray[Any, Any], list[str]]:
        """
        Embed nodes using the specified algorithm.
        Currently, only node2vec is supported but never called.
        Args:
            algorithm: The name of the embedding algorithm to use
        Returns:
            A tuple of (embeddings, node_ids)
        Raises:
            NotImplementedError: If the specified algorithm is not supported
            ValueError: If the algorithm is not supported
        """
        if algorithm not in self._node_embed_algorithms:
            raise ValueError(f"Node embedding algorithm {algorithm} not supported")
        return await self._node_embed_algorithms[algorithm]()
    async def get_all_labels(self) -> list[str]:
        """
        Get all node entity_names in the graph
--- a/lightrag/kg/mongo_impl.py
+++ b/lightrag/kg/mongo_impl.py
@@ -663,20 +663,6 @@ class MongoGraphStorage(BaseGraphStorage):
        # Remove the node doc
        await self.collection.delete_one({"_id": node_id})
    #
    # -------------------------------------------------------------------------
    # EMBEDDINGS (NOT IMPLEMENTED)
    # -------------------------------------------------------------------------
    #
    async def embed_nodes(
        self, algorithm: str
    ) -> tuple[np.ndarray[Any, Any], list[str]]:
        """
        Placeholder for demonstration, raises NotImplementedError.
        """
        raise NotImplementedError("Node embedding is not used in lightrag.")
    #
    # -------------------------------------------------------------------------
    # QUERY
--- a/lightrag/kg/neo4j_impl.py
+++ b/lightrag/kg/neo4j_impl.py
@@ -2,8 +2,7 @@ import inspect
 import os
 import re
 from dataclasses import dataclass
-from typing import Any, final
+from typing import final
 import numpy as np
 import configparser
@@ -51,11 +50,6 @@ class Neo4JStorage(BaseGraphStorage):
        )
        self._driver = None
    def __post_init__(self):
        self._node_embed_algorithms = {
            "node2vec": self._node2vec_embed,
        }
    async def initialize(self):
        URI = os.environ.get("NEO4J_URI", config.get("neo4j", "uri", fallback=None))
        USERNAME = os.environ.get(
@@ -635,9 +629,6 @@ class Neo4JStorage(BaseGraphStorage):
            logger.error(f"Error during edge upsert: {str(e)}")
            raise
    async def _node2vec_embed(self):
        print("Implemented but never called.")
    async def get_knowledge_graph(
        self,
        node_label: str,
@@ -1126,11 +1117,6 @@ class Neo4JStorage(BaseGraphStorage):
                logger.error(f"Error during edge deletion: {str(e)}")
                raise
    async def embed_nodes(
        self, algorithm: str
    ) -> tuple[np.ndarray[Any, Any], list[str]]:
        raise NotImplementedError
    async def drop(self) -> dict[str, str]:
        """Drop all data from storage and clean up resources
--- a/lightrag/kg/networkx_impl.py
+++ b/lightrag/kg/networkx_impl.py
@@ -1,7 +1,6 @@
 import os
 from dataclasses import dataclass
-from typing import Any, final
+from typing import final
 import numpy as np
 from lightrag.types import KnowledgeGraph, KnowledgeGraphNode, KnowledgeGraphEdge
 from lightrag.utils import logger
@@ -16,7 +15,6 @@ if not pm.is_installed("graspologic"):
    pm.install("graspologic")
 import networkx as nx
 from graspologic import embed
 from .shared_storage import (
    get_storage_lock,
    get_update_flag,
@@ -42,40 +40,6 @@ class NetworkXStorage(BaseGraphStorage):
        )
        nx.write_graphml(graph, file_name)
    # TODO：deprecated, remove later
    @staticmethod
    def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
        """Refer to https://github.com/microsoft/graphrag/index/graph/utils/stable_lcc.py
        Ensure an undirected graph with the same relationships will always be read the same way.
        """
        fixed_graph = nx.DiGraph() if graph.is_directed() else nx.Graph()
        sorted_nodes = graph.nodes(data=True)
        sorted_nodes = sorted(sorted_nodes, key=lambda x: x[0])
        fixed_graph.add_nodes_from(sorted_nodes)
        edges = list(graph.edges(data=True))
        if not graph.is_directed():
            def _sort_source_target(edge):
                source, target, edge_data = edge
                if source > target:
                    temp = source
                    source = target
                    target = temp
                return source, target, edge_data
            edges = [_sort_source_target(edge) for edge in edges]
        def _get_edge_key(source: Any, target: Any) -> str:
            return f"{source} -> {target}"
        edges = sorted(edges, key=lambda x: _get_edge_key(x[0], x[1]))
        fixed_graph.add_edges_from(edges)
        return fixed_graph
    def __post_init__(self):
        self._graphml_xml_file = os.path.join(
            self.global_config["working_dir"], f"graph_{self.namespace}.graphml"
@@ -94,10 +58,6 @@ class NetworkXStorage(BaseGraphStorage):
            logger.info("Created new empty graph")
        self._graph = preloaded_graph or nx.Graph()
        self._node_embed_algorithms = {
            "node2vec": self._node2vec_embed,
        }
    async def initialize(self):
        """Initialize storage data"""
        # Get the update flag for cross-process update notification
@@ -191,24 +151,6 @@ class NetworkXStorage(BaseGraphStorage):
        else:
            logger.warning(f"Node {node_id} not found in the graph for deletion.")
    # TODO: NOT USED
    async def embed_nodes(
        self, algorithm: str
    ) -> tuple[np.ndarray[Any, Any], list[str]]:
        if algorithm not in self._node_embed_algorithms:
            raise ValueError(f"Node embedding algorithm {algorithm} not supported")
        return await self._node_embed_algorithms[algorithm]()
    # TODO: NOT USED
    async def _node2vec_embed(self):
        graph = await self._get_graph()
        embeddings, nodes = embed.node2vec_embed(
            graph,
            **self.global_config["node2vec_params"],
        )
        nodes_ids = [graph.nodes[node_id]["id"] for node_id in nodes]
        return embeddings, nodes_ids
    async def remove_nodes(self, nodes: list[str]):
        """Delete multiple nodes
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@@ -1021,9 +1021,6 @@ class PGGraphQueryException(Exception):
 class PGGraphStorage(BaseGraphStorage):
    def __post_init__(self):
        self.graph_name = self.namespace or os.environ.get("AGE_GRAPH_NAME", "lightrag")
        self._node_embed_algorithms = {
            "node2vec": self._node2vec_embed,
        }
        self.db: PostgreSQLDB | None = None
    async def initialize(self):
@@ -1396,9 +1393,6 @@ class PGGraphStorage(BaseGraphStorage):
            )
            raise
    async def _node2vec_embed(self):
        print("Implemented but never called.")
    async def delete_node(self, node_id: str) -> None:
        """
        Delete a node from the graph.
@@ -1485,24 +1479,6 @@ class PGGraphStorage(BaseGraphStorage):
        labels = [result["label"] for result in results]
        return labels
    async def embed_nodes(
        self, algorithm: str
    ) -> tuple[np.ndarray[Any, Any], list[str]]:
        """
        Generate node embeddings using the specified algorithm.
        Args:
            algorithm (str): The name of the embedding algorithm to use.
        Returns:
            tuple[np.ndarray[Any, Any], list[str]]: A tuple containing the embeddings and the corresponding node IDs.
        """
        if algorithm not in self._node_embed_algorithms:
            raise ValueError(f"Unsupported embedding algorithm: {algorithm}")
        embed_func = self._node_embed_algorithms[algorithm]
        return await embed_func()
    async def get_knowledge_graph(
        self,
        node_label: str,
--- a/lightrag/kg/tidb_impl.py
+++ b/lightrag/kg/tidb_impl.py
@@ -800,13 +800,6 @@ class TiDBGraphStorage(BaseGraphStorage):
        }
        await self.db.execute(merge_sql, data)
    async def embed_nodes(
        self, algorithm: str
    ) -> tuple[np.ndarray[Any, Any], list[str]]:
        if algorithm not in self._node_embed_algorithms:
            raise ValueError(f"Node embedding algorithm {algorithm} not supported")
        return await self._node_embed_algorithms[algorithm]()
    # Query
    async def has_node(self, node_id: str) -> bool:
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -155,31 +155,6 @@ class LightRAG:
    Defaults to `chunking_by_token_size` if not specified.
    """
    # Node embedding
    # ---
    node_embedding_algorithm: str = field(default="node2vec")
    """Algorithm used for node embedding in knowledge graphs."""
    node2vec_params: dict[str, int] = field(
        default_factory=lambda: {
            "dimensions": 1536,
            "num_walks": 10,
            "walk_length": 40,
            "window_size": 2,
            "iterations": 3,
            "random_seed": 3,
        }
    )
    """Configuration for the node2vec embedding algorithm:
    - dimensions: Number of dimensions for embeddings.
    - num_walks: Number of random walks per node.
    - walk_length: Number of steps per random walk.
    - window_size: Context window size for training.
    - iterations: Number of iterations for training.
    - random_seed: Seed value for reproducibility.
    """
    # Embedding
    # ---
@@ -904,8 +879,10 @@ class LightRAG:
                        async with pipeline_status_lock:
                            log_message = f"Processing file: {file_path}"
                            logger.info(log_message)
                            pipeline_status["history_messages"].append(log_message)
                            log_message = f"Processing d-id: {doc_id}"
                            logger.info(log_message)
                            pipeline_status["latest_message"] = log_message
                            pipeline_status["history_messages"].append(log_message)
@@ -1381,6 +1358,16 @@ class LightRAG:
                hashing_kv=self.llm_response_cache,  # Directly use llm_response_cache
                system_prompt=system_prompt,
            )
        elif param.mode == "bypass":
            # Bypass mode: directly use LLM without knowledge retrieval
            use_llm_func = param.model_func or global_config["llm_model_func"]
            param.stream = True if param.stream is None else param.stream
            response = await use_llm_func(
                query.strip(),
                system_prompt=system_prompt,
                history_messages=param.conversation_history,
                stream=param.stream,
            )
        else:
            raise ValueError(f"Unknown mode {param.mode}")
        await self._query_done()
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -16,6 +16,7 @@ from .utils import (
    encode_string_by_tiktoken,
    is_float_regex,
    list_of_list_to_csv,
    normalize_extracted_info,
    pack_user_ass_to_openai_messages,
    split_string_by_multi_markers,
    truncate_list_by_token_size,
@@ -163,6 +164,9 @@ async def _handle_single_entity_extraction(
        )
        return None
    # Normalize entity name
    entity_name = normalize_extracted_info(entity_name, is_entity=True)
    # Clean and validate entity type
    entity_type = clean_str(record_attributes[2]).strip('"')
    if not entity_type.strip() or entity_type.startswith('("'):
@@ -172,7 +176,9 @@ async def _handle_single_entity_extraction(
        return None
    # Clean and validate description
-    entity_description = clean_str(record_attributes[3]).strip('"')
+    entity_description = clean_str(record_attributes[3])
    entity_description = normalize_extracted_info(entity_description)
    if not entity_description.strip():
        logger.warning(
            f"Entity extraction error: empty description for entity '{entity_name}' of type '{entity_type}'"
@@ -196,13 +202,20 @@ async def _handle_single_relationship_extraction(
    if len(record_attributes) < 5 or record_attributes[0] != '"relationship"':
        return None
    # add this record as edge
-    source = clean_str(record_attributes[1]).strip('"')
+    source = clean_str(record_attributes[1])
-    target = clean_str(record_attributes[2]).strip('"')
+    target = clean_str(record_attributes[2])
-    edge_description = clean_str(record_attributes[3]).strip('"')
+
-    edge_keywords = clean_str(record_attributes[4]).strip('"')
+    # Normalize source and target entity names
    source = normalize_extracted_info(source, is_entity=True)
    target = normalize_extracted_info(target, is_entity=True)
    edge_description = clean_str(record_attributes[3])
    edge_description = normalize_extracted_info(edge_description)
    edge_keywords = clean_str(record_attributes[4]).strip('"').strip("'")
    edge_source_id = chunk_key
    weight = (
-        float(record_attributes[-1].strip('"'))
+        float(record_attributes[-1].strip('"').strip("'"))
        if is_float_regex(record_attributes[-1])
        else 1.0
    )
@@ -642,7 +655,7 @@ async def extract_entities(
        processed_chunks += 1
        entities_count = len(maybe_nodes)
        relations_count = len(maybe_edges)
-        log_message = f"Chk {processed_chunks}/{total_chunks}: extracted {entities_count} Ent + {relations_count} Rel (deduplicated)"
+        log_message = f"Chk {processed_chunks}/{total_chunks}: extracted {entities_count} Ent + {relations_count} Rel"
        logger.info(log_message)
        if pipeline_status is not None:
            async with pipeline_status_lock:
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -1006,6 +1006,50 @@ def get_content_summary(content: str, max_length: int = 250) -> str:
    return content[:max_length] + "..."
 def normalize_extracted_info(name: str, is_entity=False) -> str:
    """Normalize entity/relation names and description with the following rules:
    1. Remove spaces between Chinese characters
    2. Remove spaces between Chinese characters and English letters/numbers
    3. Preserve spaces within English text and numbers
    4. Replace Chinese parentheses with English parentheses
    5. Replace Chinese dash with English dash
    Args:
        name: Entity name to normalize
    Returns:
        Normalized entity name
    """
    # Replace Chinese parentheses with English parentheses
    name = name.replace("（", "(").replace("）", ")")
    # Replace Chinese dash with English dash
    name = name.replace("—", "-").replace("－", "-")
    # Use regex to remove spaces between Chinese characters
    # Regex explanation:
    # (?<=[\u4e00-\u9fa5]): Positive lookbehind for Chinese character
    # \s+: One or more whitespace characters
    # (?=[\u4e00-\u9fa5]): Positive lookahead for Chinese character
    name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[\u4e00-\u9fa5])", "", name)
    # Remove spaces between Chinese and English/numbers
    name = re.sub(r"(?<=[\u4e00-\u9fa5])\s+(?=[a-zA-Z0-9])", "", name)
    name = re.sub(r"(?<=[a-zA-Z0-9])\s+(?=[\u4e00-\u9fa5])", "", name)
    # Remove English quotation marks from the beginning and end
    name = name.strip('"').strip("'")
    if is_entity:
        # remove Chinese quotes
        name = name.replace("“", "").replace("”", "").replace("‘", "").replace("’", "")
        # remove English queotes in and around chinese
        name = re.sub(r"['\"]+(?=[\u4e00-\u9fa5])", "", name)
        name = re.sub(r"(?<=[\u4e00-\u9fa5])['\"]+", "", name)
    return name
 def clean_text(text: str) -> str:
    """Clean text by removing null bytes (0x00) and whitespace
--- a/lightrag_webui/src/api/lightrag.ts
+++ b/lightrag_webui/src/api/lightrag.ts
@@ -65,8 +65,9 @@ export type LightragDocumentsScanProgress = {
 * - "global": Utilizes global knowledge.
 * - "hybrid": Combines local and global retrieval methods.
 * - "mix": Integrates knowledge graph and vector retrieval.
 * - "bypass": Bypasses knowledge retrieval and directly uses the LLM.
 */
-export type QueryMode = 'naive' | 'local' | 'global' | 'hybrid' | 'mix'
+export type QueryMode = 'naive' | 'local' | 'global' | 'hybrid' | 'mix' | 'bypass'
 export type Message = {
  role: 'user' | 'assistant' | 'system'
--- a/lightrag_webui/src/components/retrieval/QuerySettings.tsx
+++ b/lightrag_webui/src/components/retrieval/QuerySettings.tsx
@@ -55,6 +55,7 @@ export default function QuerySettings() {
                    <SelectItem value="global">{t('retrievePanel.querySettings.queryModeOptions.global')}</SelectItem>
                    <SelectItem value="hybrid">{t('retrievePanel.querySettings.queryModeOptions.hybrid')}</SelectItem>
                    <SelectItem value="mix">{t('retrievePanel.querySettings.queryModeOptions.mix')}</SelectItem>
                    <SelectItem value="bypass">{t('retrievePanel.querySettings.queryModeOptions.bypass')}</SelectItem>
                  </SelectGroup>
                </SelectContent>
              </Select>
--- a/lightrag_webui/src/locales/ar.json
+++ b/lightrag_webui/src/locales/ar.json
@@ -306,13 +306,14 @@
      "parametersTitle": "المعلمات",
      "parametersDescription": "تكوين معلمات الاستعلام الخاص بك",
      "queryMode": "وضع الاستعلام",
-      "queryModeTooltip": "حدد استراتيجية الاسترجاع:\n• ساذج: بحث أساسي بدون تقنيات متقدمة\n• محلي: استرجاع معلومات يعتمد على السياق\n• عالمي: يستخدم قاعدة المعرفة العالمية\n• مختلط: يجمع بين الاسترجاع المحلي والعالمي\n• مزيج: يدمج شبكة المعرفة مع الاسترجاع المتجهي",
+      "queryModeTooltip": "حدد استراتيجية الاسترجاع:\n• ساذج: بحث أساسي بدون تقنيات متقدمة\n• محلي: استرجاع معلومات يعتمد على السياق\n• عالمي: يستخدم قاعدة المعرفة العالمية\n• مختلط: يجمع بين الاسترجاع المحلي والعالمي\n• مزيج: يدمج شبكة المعرفة مع الاسترجاع المتجهي\n• تجاوز: يمرر الاستعلام مباشرة إلى LLM بدون استرجاع",
      "queryModeOptions": {
        "naive": "ساذج",
        "local": "محلي",
        "global": "عالمي",
        "hybrid": "مختلط",
-        "mix": "مزيج"
+        "mix": "مزيج",
        "bypass": "تجاوز"
      },
      "responseFormat": "تنسيق الرد",
      "responseFormatTooltip": "يحدد تنسيق الرد. أمثلة:\n• فقرات متعددة\n• فقرة واحدة\n• نقاط نقطية",
--- a/lightrag_webui/src/locales/en.json
+++ b/lightrag_webui/src/locales/en.json
@@ -305,13 +305,14 @@
      "parametersTitle": "Parameters",
      "parametersDescription": "Configure your query parameters",
      "queryMode": "Query Mode",
-      "queryModeTooltip": "Select the retrieval strategy:\n• Naive: Basic search without advanced techniques\n• Local: Context-dependent information retrieval\n• Global: Utilizes global knowledge base\n• Hybrid: Combines local and global retrieval\n• Mix: Integrates knowledge graph with vector retrieval",
+      "queryModeTooltip": "Select the retrieval strategy:\n• Naive: Basic search without advanced techniques\n• Local: Context-dependent information retrieval\n• Global: Utilizes global knowledge base\n• Hybrid: Combines local and global retrieval\n• Mix: Integrates knowledge graph with vector retrieval\n• Bypass: Passes query directly to LLM without retrieval",
      "queryModeOptions": {
        "naive": "Naive",
        "local": "Local",
        "global": "Global",
        "hybrid": "Hybrid",
-        "mix": "Mix"
+        "mix": "Mix",
        "bypass": "Bypass"
      },
      "responseFormat": "Response Format",
      "responseFormatTooltip": "Defines the response format. Examples:\n• Multiple Paragraphs\n• Single Paragraph\n• Bullet Points",
--- a/lightrag_webui/src/locales/fr.json
+++ b/lightrag_webui/src/locales/fr.json
@@ -306,13 +306,14 @@
      "parametersTitle": "Paramètres",
      "parametersDescription": "Configurez vos paramètres de requête",
      "queryMode": "Mode de requête",
-      "queryModeTooltip": "Sélectionnez la stratégie de récupération :\n• Naïf : Recherche de base sans techniques avancées\n• Local : Récupération d'informations dépendante du contexte\n• Global : Utilise une base de connaissances globale\n• Hybride : Combine récupération locale et globale\n• Mixte : Intègre le graphe de connaissances avec la récupération vectorielle",
+      "queryModeTooltip": "Sélectionnez la stratégie de récupération :\n• Naïf : Recherche de base sans techniques avancées\n• Local : Récupération d'informations dépendante du contexte\n• Global : Utilise une base de connaissances globale\n• Hybride : Combine récupération locale et globale\n• Mixte : Intègre le graphe de connaissances avec la récupération vectorielle\n• Bypass : Transmet directement la requête au LLM sans récupération",
      "queryModeOptions": {
        "naive": "Naïf",
        "local": "Local",
        "global": "Global",
        "hybrid": "Hybride",
-        "mix": "Mixte"
+        "mix": "Mixte",
        "bypass": "Bypass"
      },
      "responseFormat": "Format de réponse",
      "responseFormatTooltip": "Définit le format de la réponse. Exemples :\n• Plusieurs paragraphes\n• Paragraphe unique\n• Points à puces",
--- a/lightrag_webui/src/locales/zh.json
+++ b/lightrag_webui/src/locales/zh.json
@@ -306,13 +306,14 @@
      "parametersTitle": "参数",
      "parametersDescription": "配置查询参数",
      "queryMode": "查询模式",
-      "queryModeTooltip": "选择检索策略：\n• Naive：基础搜索，无高级技术\n• Local：上下文相关信息检索\n• Global：利用全局知识库\n• Hybrid：结合本地和全局检索\n• Mix：整合知识图谱和向量检索",
+      "queryModeTooltip": "选择检索策略：\n• Naive：基础搜索，无高级技术\n• Local：上下文相关信息检索\n• Global：利用全局知识库\n• Hybrid：结合本地和全局检索\n• Mix：整合知识图谱和向量检索\n• Bypass：直接传递查询到LLM，不进行检索",
      "queryModeOptions": {
        "naive": "Naive",
        "local": "Local",
        "global": "Global",
        "hybrid": "Hybrid",
-        "mix": "Mix"
+        "mix": "Mix",
        "bypass": "Bypass"
      },
      "responseFormat": "响应格式",
      "responseFormatTooltip": "定义响应格式。例如：\n• 多段落\n• 单段落\n• 要点",
`@@ -1 +1 @@`
	`__api_version__ = "0146"`	`__api_version__ = "0148"`