Update README.md

2025-05-08 05:26:59 +08:00
parent f9d2415108
commit 4a03218450
2 changed files with 303 additions and 387 deletions
--- a/README-zh.md
+++ b/README-zh.md
@@ -260,6 +260,11 @@ class QueryParam:
    If provided, this will be used instead of the global model function.
    This allows using different models for different query modes.
    """
    user_prompt: str | None = None
    """User-provided prompt for the query.
    If proivded, this will be use instead of the default vaulue from prompt template.
    """
 ```
 > top_k的默认值可以通过环境变量TOP_K更改。
@@ -527,128 +532,23 @@ response = rag.query(
 )
 ```
-### 自定义提示词
+### 自定义用户提示词
-LightRAG现在支持自定义提示，以便对系统行为进行精细控制。以下是使用方法：
+自定义用户提示词不影响查询内容，仅仅用于向LLM指示如何处理查询结果。以下是使用方法：
 ```python
 # 创建查询参数
 query_param = QueryParam(
-    mode="hybrid",  # 或其他模式："local"、"global"、"hybrid"、"mix"和"naive"
+    mode = "hybrid",  # 或其他模式："local"、"global"、"hybrid"、"mix"和"naive"
    user_prompt = "Please create the diagram using the Mermaid syntax"
 )
-# 示例1：使用默认系统提示
+# 查询和处理
 response_default = rag.query(
-    "可再生能源的主要好处是什么？",
+    "Please draw a character relationship diagram for Scrooge",
    param=query_param
 )
 print(response_default)
 # 示例2：使用自定义提示
 custom_prompt = """
 您是环境科学领域的专家助手。请提供详细且结构化的答案，并附带示例。
 ---对话历史---
 {history}
 ---知识库---
 {context_data}
 ---响应规则---
 - 目标格式和长度：{response_type}
 """
 response_custom = rag.query(
    "可再生能源的主要好处是什么？",
    param=query_param,
    system_prompt=custom_prompt  # 传递自定义提示
 )
 print(response_custom)
 ```
 ### 关键词提取
 我们引入了新函数`query_with_separate_keyword_extraction`来增强关键词提取功能。该函数将关键词提取过程与用户提示分开，专注于查询以提高提取关键词的相关性。
 * 工作原理
 该函数将输入分为两部分：
 - `用户查询`
 - `提示`
 然后仅对`用户查询`执行关键词提取。这种分离确保提取过程是集中和相关的，不受`提示`中任何额外语言的影响。它还允许`提示`纯粹用于响应格式化，保持用户原始问题的意图和清晰度。
 * 使用示例
 这个`示例`展示了如何为教育内容定制函数，专注于为高年级学生提供详细解释。
 ```python
 rag.query_with_separate_keyword_extraction(
    query="解释重力定律",
    prompt="提供适合学习物理的高中生的详细解释。",
    param=QueryParam(mode="hybrid")
 )
 ```
 ### 插入自定义知识
 ```python
 custom_kg = {
    "chunks": [
        {
            "content": "Alice和Bob正在合作进行量子计算研究。",
            "source_id": "doc-1"
        }
    ],
    "entities": [
        {
            "entity_name": "Alice",
            "entity_type": "person",
            "description": "Alice是一位专门研究量子物理的研究员。",
            "source_id": "doc-1"
        },
        {
            "entity_name": "Bob",
            "entity_type": "person",
            "description": "Bob是一位数学家。",
            "source_id": "doc-1"
        },
        {
            "entity_name": "量子计算",
            "entity_type": "technology",
            "description": "量子计算利用量子力学现象进行计算。",
            "source_id": "doc-1"
        }
    ],
    "relationships": [
        {
            "src_id": "Alice",
            "tgt_id": "Bob",
            "description": "Alice和Bob是研究伙伴。",
            "keywords": "合作 研究",
            "weight": 1.0,
            "source_id": "doc-1"
        },
        {
            "src_id": "Alice",
            "tgt_id": "量子计算",
            "description": "Alice进行量子计算研究。",
            "keywords": "研究 专业",
            "weight": 1.0,
            "source_id": "doc-1"
        },
        {
            "src_id": "Bob",
            "tgt_id": "量子计算",
            "description": "Bob研究量子计算。",
            "keywords": "研究 应用",
            "weight": 1.0,
            "source_id": "doc-1"
        }
    ]
 }
 rag.insert_custom_kg(custom_kg)
 ```
 ### 插入
@@ -934,23 +834,160 @@ updated_relation = rag.edit_relation("Google", "Google Mail", {
 })
 ```
 </details>
 所有操作都有同步和异步版本。异步版本带有前缀"a"（例如，`acreate_entity`，`aedit_relation`）。
-#### 实体操作
+</details>
 <details>
 <summary> <b>插入自定义知识</b> </summary>
 ```python
 custom_kg = {
    "chunks": [
        {
            "content": "Alice和Bob正在合作进行量子计算研究。",
            "source_id": "doc-1"
        }
    ],
    "entities": [
        {
            "entity_name": "Alice",
            "entity_type": "person",
            "description": "Alice是一位专门研究量子物理的研究员。",
            "source_id": "doc-1"
        },
        {
            "entity_name": "Bob",
            "entity_type": "person",
            "description": "Bob是一位数学家。",
            "source_id": "doc-1"
        },
        {
            "entity_name": "量子计算",
            "entity_type": "technology",
            "description": "量子计算利用量子力学现象进行计算。",
            "source_id": "doc-1"
        }
    ],
    "relationships": [
        {
            "src_id": "Alice",
            "tgt_id": "Bob",
            "description": "Alice和Bob是研究伙伴。",
            "keywords": "合作 研究",
            "weight": 1.0,
            "source_id": "doc-1"
        },
        {
            "src_id": "Alice",
            "tgt_id": "量子计算",
            "description": "Alice进行量子计算研究。",
            "keywords": "研究 专业",
            "weight": 1.0,
            "source_id": "doc-1"
        },
        {
            "src_id": "Bob",
            "tgt_id": "量子计算",
            "description": "Bob研究量子计算。",
            "keywords": "研究 应用",
            "weight": 1.0,
            "source_id": "doc-1"
        }
    ]
 }
 rag.insert_custom_kg(custom_kg)
 ```
 </details>
 <details>
 <summary> <b>其它实体与关系操作</b> </summary>
 - **create_entity**：创建具有指定属性的新实体
 - **edit_entity**：更新现有实体的属性或重命名它
 #### 关系操作
 - **create_relation**：在现有实体之间创建新关系
 - **edit_relation**：更新现有关系的属性
 这些操作在图数据库和向量数据库组件之间保持数据一致性，确保您的知识图谱保持连贯。
 </details>
 ## 实体合并
 <details>
 <summary> <b>合并实体及其关系</b> </summary>
 LightRAG现在支持将多个实体合并为单个实体，自动处理所有关系：
 ```python
 # 基本实体合并
 rag.merge_entities(
    source_entities=["人工智能", "AI", "机器智能"],
    target_entity="AI技术"
 )
 ```
 使用自定义合并策略：
 ```python
 # 为不同字段定义自定义合并策略
 rag.merge_entities(
    source_entities=["约翰·史密斯", "史密斯博士", "J·史密斯"],
    target_entity="约翰·史密斯",
    merge_strategy={
        "description": "concatenate",  # 组合所有描述
        "entity_type": "keep_first",   # 保留第一个实体的类型
        "source_id": "join_unique"     # 组合所有唯一的源ID
    }
 )
 ```
 使用自定义目标实体数据：
 ```python
 # 为合并后的实体指定确切值
 rag.merge_entities(
    source_entities=["纽约", "NYC", "大苹果"],
    target_entity="纽约市",
    target_entity_data={
        "entity_type": "LOCATION",
        "description": "纽约市是美国人口最多的城市。",
    }
 )
 ```
 结合两种方法的高级用法：
 ```python
 # 使用策略和自定义数据合并公司实体
 rag.merge_entities(
    source_entities=["微软公司", "Microsoft Corporation", "MSFT"],
    target_entity="微软",
    merge_strategy={
        "description": "concatenate",  # 组合所有描述
        "source_id": "join_unique"     # 组合源ID
    },
    target_entity_data={
        "entity_type": "ORGANIZATION",
    }
 )
 ```
 合并实体时：
 * 所有来自源实体的关系都会重定向到目标实体
 * 重复的关系会被智能合并
 * 防止自我关系（循环）
 * 合并后删除源实体
 * 保留关系权重和属性
 </details>
 ## Token统计功能
 <details>
 <summary> <b>概述和使用</b> </summary>
@@ -1048,77 +1085,6 @@ rag.export_data("complete_data.csv", include_vector_data=True)
 * 关系数据（实体之间的连接）
 * 来自向量数据库的关系信息
 ## 实体合并
 <details>
 <summary> <b>合并实体及其关系</b> </summary>
 LightRAG现在支持将多个实体合并为单个实体，自动处理所有关系：
 ```python
 # 基本实体合并
 rag.merge_entities(
    source_entities=["人工智能", "AI", "机器智能"],
    target_entity="AI技术"
 )
 ```
 使用自定义合并策略：
 ```python
 # 为不同字段定义自定义合并策略
 rag.merge_entities(
    source_entities=["约翰·史密斯", "史密斯博士", "J·史密斯"],
    target_entity="约翰·史密斯",
    merge_strategy={
        "description": "concatenate",  # 组合所有描述
        "entity_type": "keep_first",   # 保留第一个实体的类型
        "source_id": "join_unique"     # 组合所有唯一的源ID
    }
 )
 ```
 使用自定义目标实体数据：
 ```python
 # 为合并后的实体指定确切值
 rag.merge_entities(
    source_entities=["纽约", "NYC", "大苹果"],
    target_entity="纽约市",
    target_entity_data={
        "entity_type": "LOCATION",
        "description": "纽约市是美国人口最多的城市。",
    }
 )
 ```
 结合两种方法的高级用法：
 ```python
 # 使用策略和自定义数据合并公司实体
 rag.merge_entities(
    source_entities=["微软公司", "Microsoft Corporation", "MSFT"],
    target_entity="微软",
    merge_strategy={
        "description": "concatenate",  # 组合所有描述
        "source_id": "join_unique"     # 组合源ID
    },
    target_entity_data={
        "entity_type": "ORGANIZATION",
    }
 )
 ```
 合并实体时：
 * 所有来自源实体的关系都会重定向到目标实体
 * 重复的关系会被智能合并
 * 防止自我关系（循环）
 * 合并后删除源实体
 * 保留关系权重和属性
 </details>
 ## 缓存
 <details>
--- a/README.md
+++ b/README.md
@@ -274,12 +274,6 @@ class QueryParam:
    max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
    """Maximum number of tokens allocated for entity descriptions in local retrieval."""
    hl_keywords: list[str] = field(default_factory=list)
    """List of high-level keywords to prioritize in retrieval."""
    ll_keywords: list[str] = field(default_factory=list)
    """List of low-level keywords to refine retrieval focus."""
    conversation_history: list[dict[str, str]] = field(default_factory=list)
    """Stores past conversation history to maintain context.
    Format: [{"role": "user/assistant", "content": "message"}].
@@ -296,6 +290,11 @@ class QueryParam:
    If provided, this will be used instead of the global model function.
    This allows using different models for different query modes.
    """
    user_prompt: str | None = None
    """User-provided prompt for the query.
    If proivded, this will be use instead of the default vaulue from prompt template.
    """
 ```
 > default value of Top_k can be change by environment  variables  TOP_K.
@@ -571,76 +570,26 @@ response = rag.query(
 </details>
-### Custom Prompt Support
+### Custom User Prompt Support
-LightRAG now supports custom prompts for fine-tuned control over the system's behavior. Here's how to use it:
+Custom user prompts do not affect the query content; they are only used to instruct the LLM on how to handle the query results.  Here's how to use it:
 <details>
  <summary> <b> Usage Example </b></summary>
 ```python
 # Create query parameters
 query_param = QueryParam(
-    mode="hybrid",  # or other mode: "local", "global", "hybrid", "mix" and "naive"
+    mode = "hybrid",  # 或其他模式："local"、"global"、"hybrid"、"mix"和"naive"
    user_prompt = "Please create the diagram using the Mermaid syntax"
 )
-# Example 1: Using the default system prompt
+# Query and process
 response_default = rag.query(
-    "What are the primary benefits of renewable energy?",
+    "Please draw a character relationship diagram for Scrooge",
    param=query_param
 )
 print(response_default)
 # Example 2: Using a custom prompt
 custom_prompt = """
 You are an expert assistant in environmental science. Provide detailed and structured answers with examples.
 ---Conversation History---
 {history}
 ---Knowledge Base---
 {context_data}
 ---Response Rules---
 - Target format and length: {response_type}
 """
 response_custom = rag.query(
    "What are the primary benefits of renewable energy?",
    param=query_param,
    system_prompt=custom_prompt  # Pass the custom prompt
 )
 print(response_custom)
 ```
 </details>
 ### Separate Keyword Extraction
 We've introduced a new function `query_with_separate_keyword_extraction` to enhance the keyword extraction capabilities. This function separates the keyword extraction process from the user's prompt, focusing solely on the query to improve the relevance of extracted keywords.
 **How It Works?**
 The function operates by dividing the input into two parts:
 - `User Query`
 - `Prompt`
 It then performs keyword extraction exclusively on the `user query`. This separation ensures that the extraction process is focused and relevant, unaffected by any additional language in the `prompt`. It also allows the `prompt` to serve purely for response formatting, maintaining the intent and clarity of the user's original question.
 <details>
  <summary> <b> Usage Example </b></summary>
 This `example` shows how to tailor the function for educational content, focusing on detailed explanations for older students.
 ```python
 rag.query_with_separate_keyword_extraction(
    query="Explain the law of gravity",
    prompt="Provide a detailed explanation suitable for high school students studying physics.",
    param=QueryParam(mode="hybrid")
 )
 ```
 </details>
 ### Insert
@@ -725,70 +674,6 @@ rag.insert(text_content.decode('utf-8'))
 </details>
 <details>
  <summary> <b> Insert Custom KG </b></summary>
 ```python
 custom_kg = {
    "chunks": [
        {
            "content": "Alice and Bob are collaborating on quantum computing research.",
            "source_id": "doc-1"
        }
    ],
    "entities": [
        {
            "entity_name": "Alice",
            "entity_type": "person",
            "description": "Alice is a researcher specializing in quantum physics.",
            "source_id": "doc-1"
        },
        {
            "entity_name": "Bob",
            "entity_type": "person",
            "description": "Bob is a mathematician.",
            "source_id": "doc-1"
        },
        {
            "entity_name": "Quantum Computing",
            "entity_type": "technology",
            "description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
            "source_id": "doc-1"
        }
    ],
    "relationships": [
        {
            "src_id": "Alice",
            "tgt_id": "Bob",
            "description": "Alice and Bob are research partners.",
            "keywords": "collaboration research",
            "weight": 1.0,
            "source_id": "doc-1"
        },
        {
            "src_id": "Alice",
            "tgt_id": "Quantum Computing",
            "description": "Alice conducts research on quantum computing.",
            "keywords": "research expertise",
            "weight": 1.0,
            "source_id": "doc-1"
        },
        {
            "src_id": "Bob",
            "tgt_id": "Quantum Computing",
            "description": "Bob researches quantum computing.",
            "keywords": "research application",
            "weight": 1.0,
            "source_id": "doc-1"
        }
    ]
 }
 rag.insert_custom_kg(custom_kg)
 ```
 </details>
 <details>
  <summary><b>Citation Functionality</b></summary>
@@ -992,12 +877,78 @@ updated_relation = rag.edit_relation("Google", "Google Mail", {
 All operations are available in both synchronous and asynchronous versions. The asynchronous versions have the prefix "a" (e.g., `acreate_entity`, `aedit_relation`).
-#### Entity Operations
+</details>
 <details>
  <summary> <b> Insert Custom KG </b></summary>
 ```python
 custom_kg = {
    "chunks": [
        {
            "content": "Alice and Bob are collaborating on quantum computing research.",
            "source_id": "doc-1"
        }
    ],
    "entities": [
        {
            "entity_name": "Alice",
            "entity_type": "person",
            "description": "Alice is a researcher specializing in quantum physics.",
            "source_id": "doc-1"
        },
        {
            "entity_name": "Bob",
            "entity_type": "person",
            "description": "Bob is a mathematician.",
            "source_id": "doc-1"
        },
        {
            "entity_name": "Quantum Computing",
            "entity_type": "technology",
            "description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
            "source_id": "doc-1"
        }
    ],
    "relationships": [
        {
            "src_id": "Alice",
            "tgt_id": "Bob",
            "description": "Alice and Bob are research partners.",
            "keywords": "collaboration research",
            "weight": 1.0,
            "source_id": "doc-1"
        },
        {
            "src_id": "Alice",
            "tgt_id": "Quantum Computing",
            "description": "Alice conducts research on quantum computing.",
            "keywords": "research expertise",
            "weight": 1.0,
            "source_id": "doc-1"
        },
        {
            "src_id": "Bob",
            "tgt_id": "Quantum Computing",
            "description": "Bob researches quantum computing.",
            "keywords": "research application",
            "weight": 1.0,
            "source_id": "doc-1"
        }
    ]
 }
 rag.insert_custom_kg(custom_kg)
 ```
 </details>
 <details>
  <summary> <b>Other Entity and Relation Operations</b></summary>
 - **create_entity**: Creates a new entity with specified attributes
 - **edit_entity**: Updates an existing entity's attributes or renames it
 #### Relation Operations
 - **create_relation**: Creates a new relation between existing entities
 - **edit_relation**: Updates an existing relation's attributes
@@ -1006,6 +957,77 @@ These operations maintain data consistency across both the graph database and ve
 </details>
 ## Entity Merging
 <details>
 <summary> <b>Merge Entities and Their Relationships</b> </summary>
 LightRAG now supports merging multiple entities into a single entity, automatically handling all relationships:
 ```python
 # Basic entity merging
 rag.merge_entities(
    source_entities=["Artificial Intelligence", "AI", "Machine Intelligence"],
    target_entity="AI Technology"
 )
 ```
 With custom merge strategy:
 ```python
 # Define custom merge strategy for different fields
 rag.merge_entities(
    source_entities=["John Smith", "Dr. Smith", "J. Smith"],
    target_entity="John Smith",
    merge_strategy={
        "description": "concatenate",  # Combine all descriptions
        "entity_type": "keep_first",   # Keep the entity type from the first entity
        "source_id": "join_unique"     # Combine all unique source IDs
    }
 )
 ```
 With custom target entity data:
 ```python
 # Specify exact values for the merged entity
 rag.merge_entities(
    source_entities=["New York", "NYC", "Big Apple"],
    target_entity="New York City",
    target_entity_data={
        "entity_type": "LOCATION",
        "description": "New York City is the most populous city in the United States.",
    }
 )
 ```
 Advanced usage combining both approaches:
 ```python
 # Merge company entities with both strategy and custom data
 rag.merge_entities(
    source_entities=["Microsoft Corp", "Microsoft Corporation", "MSFT"],
    target_entity="Microsoft",
    merge_strategy={
        "description": "concatenate",  # Combine all descriptions
        "source_id": "join_unique"     # Combine source IDs
    },
    target_entity_data={
        "entity_type": "ORGANIZATION",
    }
 )
 ```
 When merging entities:
 * All relationships from source entities are redirected to the target entity
 * Duplicate relationships are intelligently merged
 * Self-relationships (loops) are prevented
 * Source entities are removed after merging
 * Relationship weights and attributes are preserved
 </details>
 ## Token Usage Tracking
 <details>
@@ -1112,78 +1134,6 @@ All exports include:
 * Relation data (connections between entities)
 * Relationship information from vector database
 ## Entity Merging
 <details>
 <summary> <b>Merge Entities and Their Relationships</b> </summary>
 LightRAG now supports merging multiple entities into a single entity, automatically handling all relationships:
 ```python
 # Basic entity merging
 rag.merge_entities(
    source_entities=["Artificial Intelligence", "AI", "Machine Intelligence"],
    target_entity="AI Technology"
 )
 ```
 With custom merge strategy:
 ```python
 # Define custom merge strategy for different fields
 rag.merge_entities(
    source_entities=["John Smith", "Dr. Smith", "J. Smith"],
    target_entity="John Smith",
    merge_strategy={
        "description": "concatenate",  # Combine all descriptions
        "entity_type": "keep_first",   # Keep the entity type from the first entity
        "source_id": "join_unique"     # Combine all unique source IDs
    }
 )
 ```
 With custom target entity data:
 ```python
 # Specify exact values for the merged entity
 rag.merge_entities(
    source_entities=["New York", "NYC", "Big Apple"],
    target_entity="New York City",
    target_entity_data={
        "entity_type": "LOCATION",
        "description": "New York City is the most populous city in the United States.",
    }
 )
 ```
 Advanced usage combining both approaches:
 ```python
 # Merge company entities with both strategy and custom data
 rag.merge_entities(
    source_entities=["Microsoft Corp", "Microsoft Corporation", "MSFT"],
    target_entity="Microsoft",
    merge_strategy={
        "description": "concatenate",  # Combine all descriptions
        "source_id": "join_unique"     # Combine source IDs
    },
    target_entity_data={
        "entity_type": "ORGANIZATION",
    }
 )
 ```
 When merging entities:
 * All relationships from source entities are redirected to the target entity
 * Duplicate relationships are intelligently merged
 * Self-relationships (loops) are prevented
 * Source entities are removed after merging
 * Relationship weights and attributes are preserved
 </details>
 ## Cache
 <details>