From 4a0321845058a53d29f69cce3cbb6ef3d06fdad8 Mon Sep 17 00:00:00 2001 From: yangdx Date: Thu, 8 May 2025 05:26:59 +0800 Subject: [PATCH] Update README.md --- README-zh.md | 340 ++++++++++++++++++++++--------------------------- README.md | 350 ++++++++++++++++++++++----------------------------- 2 files changed, 303 insertions(+), 387 deletions(-) diff --git a/README-zh.md b/README-zh.md index 6ca44cd5..9f16dd7c 100644 --- a/README-zh.md +++ b/README-zh.md @@ -260,6 +260,11 @@ class QueryParam: If provided, this will be used instead of the global model function. This allows using different models for different query modes. """ + + user_prompt: str | None = None + """User-provided prompt for the query. + If proivded, this will be use instead of the default vaulue from prompt template. + """ ``` > top_k的默认值可以通过环境变量TOP_K更改。 @@ -527,128 +532,23 @@ response = rag.query( ) ``` -### 自定义提示词 +### 自定义用户提示词 -LightRAG现在支持自定义提示,以便对系统行为进行精细控制。以下是使用方法: +自定义用户提示词不影响查询内容,仅仅用于向LLM指示如何处理查询结果。以下是使用方法: ```python # 创建查询参数 query_param = QueryParam( - mode="hybrid", # 或其他模式:"local"、"global"、"hybrid"、"mix"和"naive" + mode = "hybrid", # 或其他模式:"local"、"global"、"hybrid"、"mix"和"naive" + user_prompt = "Please create the diagram using the Mermaid syntax" ) -# 示例1:使用默认系统提示 +# 查询和处理 response_default = rag.query( - "可再生能源的主要好处是什么?", + "Please draw a character relationship diagram for Scrooge", param=query_param ) print(response_default) - -# 示例2:使用自定义提示 -custom_prompt = """ -您是环境科学领域的专家助手。请提供详细且结构化的答案,并附带示例。 ----对话历史--- -{history} - ----知识库--- -{context_data} - ----响应规则--- - -- 目标格式和长度:{response_type} -""" -response_custom = rag.query( - "可再生能源的主要好处是什么?", - param=query_param, - system_prompt=custom_prompt # 传递自定义提示 -) -print(response_custom) -``` - -### 关键词提取 - -我们引入了新函数`query_with_separate_keyword_extraction`来增强关键词提取功能。该函数将关键词提取过程与用户提示分开,专注于查询以提高提取关键词的相关性。 - -* 工作原理 - -该函数将输入分为两部分: - -- `用户查询` -- `提示` - -然后仅对`用户查询`执行关键词提取。这种分离确保提取过程是集中和相关的,不受`提示`中任何额外语言的影响。它还允许`提示`纯粹用于响应格式化,保持用户原始问题的意图和清晰度。 - -* 使用示例 - -这个`示例`展示了如何为教育内容定制函数,专注于为高年级学生提供详细解释。 - -```python -rag.query_with_separate_keyword_extraction( - query="解释重力定律", - prompt="提供适合学习物理的高中生的详细解释。", - param=QueryParam(mode="hybrid") -) -``` - -### 插入自定义知识 - -```python -custom_kg = { - "chunks": [ - { - "content": "Alice和Bob正在合作进行量子计算研究。", - "source_id": "doc-1" - } - ], - "entities": [ - { - "entity_name": "Alice", - "entity_type": "person", - "description": "Alice是一位专门研究量子物理的研究员。", - "source_id": "doc-1" - }, - { - "entity_name": "Bob", - "entity_type": "person", - "description": "Bob是一位数学家。", - "source_id": "doc-1" - }, - { - "entity_name": "量子计算", - "entity_type": "technology", - "description": "量子计算利用量子力学现象进行计算。", - "source_id": "doc-1" - } - ], - "relationships": [ - { - "src_id": "Alice", - "tgt_id": "Bob", - "description": "Alice和Bob是研究伙伴。", - "keywords": "合作 研究", - "weight": 1.0, - "source_id": "doc-1" - }, - { - "src_id": "Alice", - "tgt_id": "量子计算", - "description": "Alice进行量子计算研究。", - "keywords": "研究 专业", - "weight": 1.0, - "source_id": "doc-1" - }, - { - "src_id": "Bob", - "tgt_id": "量子计算", - "description": "Bob研究量子计算。", - "keywords": "研究 应用", - "weight": 1.0, - "source_id": "doc-1" - } - ] -} - -rag.insert_custom_kg(custom_kg) ``` ### 插入 @@ -934,23 +834,160 @@ updated_relation = rag.edit_relation("Google", "Google Mail", { }) ``` - - 所有操作都有同步和异步版本。异步版本带有前缀"a"(例如,`acreate_entity`,`aedit_relation`)。 -#### 实体操作 + + +
+ 插入自定义知识 + +```python +custom_kg = { + "chunks": [ + { + "content": "Alice和Bob正在合作进行量子计算研究。", + "source_id": "doc-1" + } + ], + "entities": [ + { + "entity_name": "Alice", + "entity_type": "person", + "description": "Alice是一位专门研究量子物理的研究员。", + "source_id": "doc-1" + }, + { + "entity_name": "Bob", + "entity_type": "person", + "description": "Bob是一位数学家。", + "source_id": "doc-1" + }, + { + "entity_name": "量子计算", + "entity_type": "technology", + "description": "量子计算利用量子力学现象进行计算。", + "source_id": "doc-1" + } + ], + "relationships": [ + { + "src_id": "Alice", + "tgt_id": "Bob", + "description": "Alice和Bob是研究伙伴。", + "keywords": "合作 研究", + "weight": 1.0, + "source_id": "doc-1" + }, + { + "src_id": "Alice", + "tgt_id": "量子计算", + "description": "Alice进行量子计算研究。", + "keywords": "研究 专业", + "weight": 1.0, + "source_id": "doc-1" + }, + { + "src_id": "Bob", + "tgt_id": "量子计算", + "description": "Bob研究量子计算。", + "keywords": "研究 应用", + "weight": 1.0, + "source_id": "doc-1" + } + ] +} + +rag.insert_custom_kg(custom_kg) +``` + +
+ +
+ 其它实体与关系操作 - **create_entity**:创建具有指定属性的新实体 - **edit_entity**:更新现有实体的属性或重命名它 -#### 关系操作 - - **create_relation**:在现有实体之间创建新关系 - **edit_relation**:更新现有关系的属性 这些操作在图数据库和向量数据库组件之间保持数据一致性,确保您的知识图谱保持连贯。 +
+ +## 实体合并 + +
+ 合并实体及其关系 + +LightRAG现在支持将多个实体合并为单个实体,自动处理所有关系: + +```python +# 基本实体合并 +rag.merge_entities( + source_entities=["人工智能", "AI", "机器智能"], + target_entity="AI技术" +) +``` + +使用自定义合并策略: + +```python +# 为不同字段定义自定义合并策略 +rag.merge_entities( + source_entities=["约翰·史密斯", "史密斯博士", "J·史密斯"], + target_entity="约翰·史密斯", + merge_strategy={ + "description": "concatenate", # 组合所有描述 + "entity_type": "keep_first", # 保留第一个实体的类型 + "source_id": "join_unique" # 组合所有唯一的源ID + } +) +``` + +使用自定义目标实体数据: + +```python +# 为合并后的实体指定确切值 +rag.merge_entities( + source_entities=["纽约", "NYC", "大苹果"], + target_entity="纽约市", + target_entity_data={ + "entity_type": "LOCATION", + "description": "纽约市是美国人口最多的城市。", + } +) +``` + +结合两种方法的高级用法: + +```python +# 使用策略和自定义数据合并公司实体 +rag.merge_entities( + source_entities=["微软公司", "Microsoft Corporation", "MSFT"], + target_entity="微软", + merge_strategy={ + "description": "concatenate", # 组合所有描述 + "source_id": "join_unique" # 组合源ID + }, + target_entity_data={ + "entity_type": "ORGANIZATION", + } +) +``` + +合并实体时: + +* 所有来自源实体的关系都会重定向到目标实体 +* 重复的关系会被智能合并 +* 防止自我关系(循环) +* 合并后删除源实体 +* 保留关系权重和属性 + +
+ ## Token统计功能 +
概述和使用 @@ -1048,77 +1085,6 @@ rag.export_data("complete_data.csv", include_vector_data=True) * 关系数据(实体之间的连接) * 来自向量数据库的关系信息 -## 实体合并 - -
- 合并实体及其关系 - -LightRAG现在支持将多个实体合并为单个实体,自动处理所有关系: - -```python -# 基本实体合并 -rag.merge_entities( - source_entities=["人工智能", "AI", "机器智能"], - target_entity="AI技术" -) -``` - -使用自定义合并策略: - -```python -# 为不同字段定义自定义合并策略 -rag.merge_entities( - source_entities=["约翰·史密斯", "史密斯博士", "J·史密斯"], - target_entity="约翰·史密斯", - merge_strategy={ - "description": "concatenate", # 组合所有描述 - "entity_type": "keep_first", # 保留第一个实体的类型 - "source_id": "join_unique" # 组合所有唯一的源ID - } -) -``` - -使用自定义目标实体数据: - -```python -# 为合并后的实体指定确切值 -rag.merge_entities( - source_entities=["纽约", "NYC", "大苹果"], - target_entity="纽约市", - target_entity_data={ - "entity_type": "LOCATION", - "description": "纽约市是美国人口最多的城市。", - } -) -``` - -结合两种方法的高级用法: - -```python -# 使用策略和自定义数据合并公司实体 -rag.merge_entities( - source_entities=["微软公司", "Microsoft Corporation", "MSFT"], - target_entity="微软", - merge_strategy={ - "description": "concatenate", # 组合所有描述 - "source_id": "join_unique" # 组合源ID - }, - target_entity_data={ - "entity_type": "ORGANIZATION", - } -) -``` - -合并实体时: - -* 所有来自源实体的关系都会重定向到目标实体 -* 重复的关系会被智能合并 -* 防止自我关系(循环) -* 合并后删除源实体 -* 保留关系权重和属性 - -
- ## 缓存
diff --git a/README.md b/README.md index 66da4375..e060a0b4 100644 --- a/README.md +++ b/README.md @@ -274,12 +274,6 @@ class QueryParam: max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000")) """Maximum number of tokens allocated for entity descriptions in local retrieval.""" - hl_keywords: list[str] = field(default_factory=list) - """List of high-level keywords to prioritize in retrieval.""" - - ll_keywords: list[str] = field(default_factory=list) - """List of low-level keywords to refine retrieval focus.""" - conversation_history: list[dict[str, str]] = field(default_factory=list) """Stores past conversation history to maintain context. Format: [{"role": "user/assistant", "content": "message"}]. @@ -296,6 +290,11 @@ class QueryParam: If provided, this will be used instead of the global model function. This allows using different models for different query modes. """ + + user_prompt: str | None = None + """User-provided prompt for the query. + If proivded, this will be use instead of the default vaulue from prompt template. + """ ``` > default value of Top_k can be change by environment variables TOP_K. @@ -571,76 +570,26 @@ response = rag.query(
-### Custom Prompt Support +### Custom User Prompt Support -LightRAG now supports custom prompts for fine-tuned control over the system's behavior. Here's how to use it: - -
- Usage Example +Custom user prompts do not affect the query content; they are only used to instruct the LLM on how to handle the query results. Here's how to use it: ```python # Create query parameters query_param = QueryParam( - mode="hybrid", # or other mode: "local", "global", "hybrid", "mix" and "naive" + mode = "hybrid", # 或其他模式:"local"、"global"、"hybrid"、"mix"和"naive" + user_prompt = "Please create the diagram using the Mermaid syntax" ) -# Example 1: Using the default system prompt +# Query and process response_default = rag.query( - "What are the primary benefits of renewable energy?", + "Please draw a character relationship diagram for Scrooge", param=query_param ) print(response_default) - -# Example 2: Using a custom prompt -custom_prompt = """ -You are an expert assistant in environmental science. Provide detailed and structured answers with examples. ----Conversation History--- -{history} - ----Knowledge Base--- -{context_data} - ----Response Rules--- - -- Target format and length: {response_type} -""" -response_custom = rag.query( - "What are the primary benefits of renewable energy?", - param=query_param, - system_prompt=custom_prompt # Pass the custom prompt -) -print(response_custom) ``` -
-### Separate Keyword Extraction - -We've introduced a new function `query_with_separate_keyword_extraction` to enhance the keyword extraction capabilities. This function separates the keyword extraction process from the user's prompt, focusing solely on the query to improve the relevance of extracted keywords. - -**How It Works?** - -The function operates by dividing the input into two parts: - -- `User Query` -- `Prompt` - -It then performs keyword extraction exclusively on the `user query`. This separation ensures that the extraction process is focused and relevant, unaffected by any additional language in the `prompt`. It also allows the `prompt` to serve purely for response formatting, maintaining the intent and clarity of the user's original question. - -
- Usage Example - -This `example` shows how to tailor the function for educational content, focusing on detailed explanations for older students. - -```python -rag.query_with_separate_keyword_extraction( - query="Explain the law of gravity", - prompt="Provide a detailed explanation suitable for high school students studying physics.", - param=QueryParam(mode="hybrid") -) -``` - -
### Insert @@ -725,70 +674,6 @@ rag.insert(text_content.decode('utf-8'))
-
- Insert Custom KG - -```python -custom_kg = { - "chunks": [ - { - "content": "Alice and Bob are collaborating on quantum computing research.", - "source_id": "doc-1" - } - ], - "entities": [ - { - "entity_name": "Alice", - "entity_type": "person", - "description": "Alice is a researcher specializing in quantum physics.", - "source_id": "doc-1" - }, - { - "entity_name": "Bob", - "entity_type": "person", - "description": "Bob is a mathematician.", - "source_id": "doc-1" - }, - { - "entity_name": "Quantum Computing", - "entity_type": "technology", - "description": "Quantum computing utilizes quantum mechanical phenomena for computation.", - "source_id": "doc-1" - } - ], - "relationships": [ - { - "src_id": "Alice", - "tgt_id": "Bob", - "description": "Alice and Bob are research partners.", - "keywords": "collaboration research", - "weight": 1.0, - "source_id": "doc-1" - }, - { - "src_id": "Alice", - "tgt_id": "Quantum Computing", - "description": "Alice conducts research on quantum computing.", - "keywords": "research expertise", - "weight": 1.0, - "source_id": "doc-1" - }, - { - "src_id": "Bob", - "tgt_id": "Quantum Computing", - "description": "Bob researches quantum computing.", - "keywords": "research application", - "weight": 1.0, - "source_id": "doc-1" - } - ] -} - -rag.insert_custom_kg(custom_kg) -``` - -
-
Citation Functionality @@ -992,12 +877,78 @@ updated_relation = rag.edit_relation("Google", "Google Mail", { All operations are available in both synchronous and asynchronous versions. The asynchronous versions have the prefix "a" (e.g., `acreate_entity`, `aedit_relation`). -#### Entity Operations +
+ +
+ Insert Custom KG + +```python +custom_kg = { + "chunks": [ + { + "content": "Alice and Bob are collaborating on quantum computing research.", + "source_id": "doc-1" + } + ], + "entities": [ + { + "entity_name": "Alice", + "entity_type": "person", + "description": "Alice is a researcher specializing in quantum physics.", + "source_id": "doc-1" + }, + { + "entity_name": "Bob", + "entity_type": "person", + "description": "Bob is a mathematician.", + "source_id": "doc-1" + }, + { + "entity_name": "Quantum Computing", + "entity_type": "technology", + "description": "Quantum computing utilizes quantum mechanical phenomena for computation.", + "source_id": "doc-1" + } + ], + "relationships": [ + { + "src_id": "Alice", + "tgt_id": "Bob", + "description": "Alice and Bob are research partners.", + "keywords": "collaboration research", + "weight": 1.0, + "source_id": "doc-1" + }, + { + "src_id": "Alice", + "tgt_id": "Quantum Computing", + "description": "Alice conducts research on quantum computing.", + "keywords": "research expertise", + "weight": 1.0, + "source_id": "doc-1" + }, + { + "src_id": "Bob", + "tgt_id": "Quantum Computing", + "description": "Bob researches quantum computing.", + "keywords": "research application", + "weight": 1.0, + "source_id": "doc-1" + } + ] +} + +rag.insert_custom_kg(custom_kg) +``` + +
+ +
+ Other Entity and Relation Operations - **create_entity**: Creates a new entity with specified attributes - **edit_entity**: Updates an existing entity's attributes or renames it -#### Relation Operations - **create_relation**: Creates a new relation between existing entities - **edit_relation**: Updates an existing relation's attributes @@ -1006,6 +957,77 @@ These operations maintain data consistency across both the graph database and ve
+## Entity Merging + +
+ Merge Entities and Their Relationships + +LightRAG now supports merging multiple entities into a single entity, automatically handling all relationships: + +```python +# Basic entity merging +rag.merge_entities( + source_entities=["Artificial Intelligence", "AI", "Machine Intelligence"], + target_entity="AI Technology" +) +``` + +With custom merge strategy: + +```python +# Define custom merge strategy for different fields +rag.merge_entities( + source_entities=["John Smith", "Dr. Smith", "J. Smith"], + target_entity="John Smith", + merge_strategy={ + "description": "concatenate", # Combine all descriptions + "entity_type": "keep_first", # Keep the entity type from the first entity + "source_id": "join_unique" # Combine all unique source IDs + } +) +``` + +With custom target entity data: + +```python +# Specify exact values for the merged entity +rag.merge_entities( + source_entities=["New York", "NYC", "Big Apple"], + target_entity="New York City", + target_entity_data={ + "entity_type": "LOCATION", + "description": "New York City is the most populous city in the United States.", + } +) +``` + +Advanced usage combining both approaches: + +```python +# Merge company entities with both strategy and custom data +rag.merge_entities( + source_entities=["Microsoft Corp", "Microsoft Corporation", "MSFT"], + target_entity="Microsoft", + merge_strategy={ + "description": "concatenate", # Combine all descriptions + "source_id": "join_unique" # Combine source IDs + }, + target_entity_data={ + "entity_type": "ORGANIZATION", + } +) +``` + +When merging entities: + +* All relationships from source entities are redirected to the target entity +* Duplicate relationships are intelligently merged +* Self-relationships (loops) are prevented +* Source entities are removed after merging +* Relationship weights and attributes are preserved + +
+ ## Token Usage Tracking
@@ -1112,78 +1134,6 @@ All exports include: * Relation data (connections between entities) * Relationship information from vector database - -## Entity Merging - -
- Merge Entities and Their Relationships - -LightRAG now supports merging multiple entities into a single entity, automatically handling all relationships: - -```python -# Basic entity merging -rag.merge_entities( - source_entities=["Artificial Intelligence", "AI", "Machine Intelligence"], - target_entity="AI Technology" -) -``` - -With custom merge strategy: - -```python -# Define custom merge strategy for different fields -rag.merge_entities( - source_entities=["John Smith", "Dr. Smith", "J. Smith"], - target_entity="John Smith", - merge_strategy={ - "description": "concatenate", # Combine all descriptions - "entity_type": "keep_first", # Keep the entity type from the first entity - "source_id": "join_unique" # Combine all unique source IDs - } -) -``` - -With custom target entity data: - -```python -# Specify exact values for the merged entity -rag.merge_entities( - source_entities=["New York", "NYC", "Big Apple"], - target_entity="New York City", - target_entity_data={ - "entity_type": "LOCATION", - "description": "New York City is the most populous city in the United States.", - } -) -``` - -Advanced usage combining both approaches: - -```python -# Merge company entities with both strategy and custom data -rag.merge_entities( - source_entities=["Microsoft Corp", "Microsoft Corporation", "MSFT"], - target_entity="Microsoft", - merge_strategy={ - "description": "concatenate", # Combine all descriptions - "source_id": "join_unique" # Combine source IDs - }, - target_entity_data={ - "entity_type": "ORGANIZATION", - } -) -``` - -When merging entities: - -* All relationships from source entities are redirected to the target entity -* Duplicate relationships are intelligently merged -* Self-relationships (loops) are prevented -* Source entities are removed after merging -* Relationship weights and attributes are preserved - -
- ## Cache