Update README.md

This commit is contained in:
yangdx
2025-05-08 05:26:59 +08:00
parent f9d2415108
commit 4a03218450
2 changed files with 303 additions and 387 deletions

View File

@@ -260,6 +260,11 @@ class QueryParam:
If provided, this will be used instead of the global model function. If provided, this will be used instead of the global model function.
This allows using different models for different query modes. This allows using different models for different query modes.
""" """
user_prompt: str | None = None
"""User-provided prompt for the query.
If proivded, this will be use instead of the default vaulue from prompt template.
"""
``` ```
> top_k的默认值可以通过环境变量TOP_K更改。 > top_k的默认值可以通过环境变量TOP_K更改。
@@ -527,128 +532,23 @@ response = rag.query(
) )
``` ```
### 自定义提示词 ### 自定义用户提示词
LightRAG现在支持自定义提示以便对系统行为进行精细控制。以下是使用方法: 自定义用户提示词不影响查询内容仅仅用于向LLM指示如何处理查询结果。以下是使用方法:
```python ```python
# 创建查询参数 # 创建查询参数
query_param = QueryParam( query_param = QueryParam(
mode="hybrid", # 或其他模式:"local"、"global"、"hybrid"、"mix"和"naive" mode = "hybrid", # 或其他模式:"local"、"global"、"hybrid"、"mix"和"naive"
user_prompt = "Please create the diagram using the Mermaid syntax"
) )
# 示例1使用默认系统提示 # 查询和处理
response_default = rag.query( response_default = rag.query(
"可再生能源的主要好处是什么?", "Please draw a character relationship diagram for Scrooge",
param=query_param param=query_param
) )
print(response_default) print(response_default)
# 示例2使用自定义提示
custom_prompt = """
您是环境科学领域的专家助手。请提供详细且结构化的答案,并附带示例。
---对话历史---
{history}
---知识库---
{context_data}
---响应规则---
- 目标格式和长度:{response_type}
"""
response_custom = rag.query(
"可再生能源的主要好处是什么?",
param=query_param,
system_prompt=custom_prompt # 传递自定义提示
)
print(response_custom)
```
### 关键词提取
我们引入了新函数`query_with_separate_keyword_extraction`来增强关键词提取功能。该函数将关键词提取过程与用户提示分开,专注于查询以提高提取关键词的相关性。
* 工作原理
该函数将输入分为两部分:
- `用户查询`
- `提示`
然后仅对`用户查询`执行关键词提取。这种分离确保提取过程是集中和相关的,不受`提示`中任何额外语言的影响。它还允许`提示`纯粹用于响应格式化,保持用户原始问题的意图和清晰度。
* 使用示例
这个`示例`展示了如何为教育内容定制函数,专注于为高年级学生提供详细解释。
```python
rag.query_with_separate_keyword_extraction(
query="解释重力定律",
prompt="提供适合学习物理的高中生的详细解释。",
param=QueryParam(mode="hybrid")
)
```
### 插入自定义知识
```python
custom_kg = {
"chunks": [
{
"content": "Alice和Bob正在合作进行量子计算研究。",
"source_id": "doc-1"
}
],
"entities": [
{
"entity_name": "Alice",
"entity_type": "person",
"description": "Alice是一位专门研究量子物理的研究员。",
"source_id": "doc-1"
},
{
"entity_name": "Bob",
"entity_type": "person",
"description": "Bob是一位数学家。",
"source_id": "doc-1"
},
{
"entity_name": "量子计算",
"entity_type": "technology",
"description": "量子计算利用量子力学现象进行计算。",
"source_id": "doc-1"
}
],
"relationships": [
{
"src_id": "Alice",
"tgt_id": "Bob",
"description": "Alice和Bob是研究伙伴。",
"keywords": "合作 研究",
"weight": 1.0,
"source_id": "doc-1"
},
{
"src_id": "Alice",
"tgt_id": "量子计算",
"description": "Alice进行量子计算研究。",
"keywords": "研究 专业",
"weight": 1.0,
"source_id": "doc-1"
},
{
"src_id": "Bob",
"tgt_id": "量子计算",
"description": "Bob研究量子计算。",
"keywords": "研究 应用",
"weight": 1.0,
"source_id": "doc-1"
}
]
}
rag.insert_custom_kg(custom_kg)
``` ```
### 插入 ### 插入
@@ -934,23 +834,160 @@ updated_relation = rag.edit_relation("Google", "Google Mail", {
}) })
``` ```
</details>
所有操作都有同步和异步版本。异步版本带有前缀"a"(例如,`acreate_entity``aedit_relation`)。 所有操作都有同步和异步版本。异步版本带有前缀"a"(例如,`acreate_entity``aedit_relation`)。
#### 实体操作 </details>
<details>
<summary> <b>插入自定义知识</b> </summary>
```python
custom_kg = {
"chunks": [
{
"content": "Alice和Bob正在合作进行量子计算研究。",
"source_id": "doc-1"
}
],
"entities": [
{
"entity_name": "Alice",
"entity_type": "person",
"description": "Alice是一位专门研究量子物理的研究员。",
"source_id": "doc-1"
},
{
"entity_name": "Bob",
"entity_type": "person",
"description": "Bob是一位数学家。",
"source_id": "doc-1"
},
{
"entity_name": "量子计算",
"entity_type": "technology",
"description": "量子计算利用量子力学现象进行计算。",
"source_id": "doc-1"
}
],
"relationships": [
{
"src_id": "Alice",
"tgt_id": "Bob",
"description": "Alice和Bob是研究伙伴。",
"keywords": "合作 研究",
"weight": 1.0,
"source_id": "doc-1"
},
{
"src_id": "Alice",
"tgt_id": "量子计算",
"description": "Alice进行量子计算研究。",
"keywords": "研究 专业",
"weight": 1.0,
"source_id": "doc-1"
},
{
"src_id": "Bob",
"tgt_id": "量子计算",
"description": "Bob研究量子计算。",
"keywords": "研究 应用",
"weight": 1.0,
"source_id": "doc-1"
}
]
}
rag.insert_custom_kg(custom_kg)
```
</details>
<details>
<summary> <b>其它实体与关系操作</b> </summary>
- **create_entity**:创建具有指定属性的新实体 - **create_entity**:创建具有指定属性的新实体
- **edit_entity**:更新现有实体的属性或重命名它 - **edit_entity**:更新现有实体的属性或重命名它
#### 关系操作
- **create_relation**:在现有实体之间创建新关系 - **create_relation**:在现有实体之间创建新关系
- **edit_relation**:更新现有关系的属性 - **edit_relation**:更新现有关系的属性
这些操作在图数据库和向量数据库组件之间保持数据一致性,确保您的知识图谱保持连贯。 这些操作在图数据库和向量数据库组件之间保持数据一致性,确保您的知识图谱保持连贯。
</details>
## 实体合并
<details>
<summary> <b>合并实体及其关系</b> </summary>
LightRAG现在支持将多个实体合并为单个实体自动处理所有关系
```python
# 基本实体合并
rag.merge_entities(
source_entities=["人工智能", "AI", "机器智能"],
target_entity="AI技术"
)
```
使用自定义合并策略:
```python
# 为不同字段定义自定义合并策略
rag.merge_entities(
source_entities=["约翰·史密斯", "史密斯博士", "J·史密斯"],
target_entity="约翰·史密斯",
merge_strategy={
"description": "concatenate", # 组合所有描述
"entity_type": "keep_first", # 保留第一个实体的类型
"source_id": "join_unique" # 组合所有唯一的源ID
}
)
```
使用自定义目标实体数据:
```python
# 为合并后的实体指定确切值
rag.merge_entities(
source_entities=["纽约", "NYC", "大苹果"],
target_entity="纽约市",
target_entity_data={
"entity_type": "LOCATION",
"description": "纽约市是美国人口最多的城市。",
}
)
```
结合两种方法的高级用法:
```python
# 使用策略和自定义数据合并公司实体
rag.merge_entities(
source_entities=["微软公司", "Microsoft Corporation", "MSFT"],
target_entity="微软",
merge_strategy={
"description": "concatenate", # 组合所有描述
"source_id": "join_unique" # 组合源ID
},
target_entity_data={
"entity_type": "ORGANIZATION",
}
)
```
合并实体时:
* 所有来自源实体的关系都会重定向到目标实体
* 重复的关系会被智能合并
* 防止自我关系(循环)
* 合并后删除源实体
* 保留关系权重和属性
</details>
## Token统计功能 ## Token统计功能
<details> <details>
<summary> <b>概述和使用</b> </summary> <summary> <b>概述和使用</b> </summary>
@@ -1048,77 +1085,6 @@ rag.export_data("complete_data.csv", include_vector_data=True)
* 关系数据(实体之间的连接) * 关系数据(实体之间的连接)
* 来自向量数据库的关系信息 * 来自向量数据库的关系信息
## 实体合并
<details>
<summary> <b>合并实体及其关系</b> </summary>
LightRAG现在支持将多个实体合并为单个实体自动处理所有关系
```python
# 基本实体合并
rag.merge_entities(
source_entities=["人工智能", "AI", "机器智能"],
target_entity="AI技术"
)
```
使用自定义合并策略:
```python
# 为不同字段定义自定义合并策略
rag.merge_entities(
source_entities=["约翰·史密斯", "史密斯博士", "J·史密斯"],
target_entity="约翰·史密斯",
merge_strategy={
"description": "concatenate", # 组合所有描述
"entity_type": "keep_first", # 保留第一个实体的类型
"source_id": "join_unique" # 组合所有唯一的源ID
}
)
```
使用自定义目标实体数据:
```python
# 为合并后的实体指定确切值
rag.merge_entities(
source_entities=["纽约", "NYC", "大苹果"],
target_entity="纽约市",
target_entity_data={
"entity_type": "LOCATION",
"description": "纽约市是美国人口最多的城市。",
}
)
```
结合两种方法的高级用法:
```python
# 使用策略和自定义数据合并公司实体
rag.merge_entities(
source_entities=["微软公司", "Microsoft Corporation", "MSFT"],
target_entity="微软",
merge_strategy={
"description": "concatenate", # 组合所有描述
"source_id": "join_unique" # 组合源ID
},
target_entity_data={
"entity_type": "ORGANIZATION",
}
)
```
合并实体时:
* 所有来自源实体的关系都会重定向到目标实体
* 重复的关系会被智能合并
* 防止自我关系(循环)
* 合并后删除源实体
* 保留关系权重和属性
</details>
## 缓存 ## 缓存
<details> <details>

350
README.md
View File

@@ -274,12 +274,6 @@ class QueryParam:
max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000")) max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
"""Maximum number of tokens allocated for entity descriptions in local retrieval.""" """Maximum number of tokens allocated for entity descriptions in local retrieval."""
hl_keywords: list[str] = field(default_factory=list)
"""List of high-level keywords to prioritize in retrieval."""
ll_keywords: list[str] = field(default_factory=list)
"""List of low-level keywords to refine retrieval focus."""
conversation_history: list[dict[str, str]] = field(default_factory=list) conversation_history: list[dict[str, str]] = field(default_factory=list)
"""Stores past conversation history to maintain context. """Stores past conversation history to maintain context.
Format: [{"role": "user/assistant", "content": "message"}]. Format: [{"role": "user/assistant", "content": "message"}].
@@ -296,6 +290,11 @@ class QueryParam:
If provided, this will be used instead of the global model function. If provided, this will be used instead of the global model function.
This allows using different models for different query modes. This allows using different models for different query modes.
""" """
user_prompt: str | None = None
"""User-provided prompt for the query.
If proivded, this will be use instead of the default vaulue from prompt template.
"""
``` ```
> default value of Top_k can be change by environment variables TOP_K. > default value of Top_k can be change by environment variables TOP_K.
@@ -571,76 +570,26 @@ response = rag.query(
</details> </details>
### Custom Prompt Support ### Custom User Prompt Support
LightRAG now supports custom prompts for fine-tuned control over the system's behavior. Here's how to use it: Custom user prompts do not affect the query content; they are only used to instruct the LLM on how to handle the query results. Here's how to use it:
<details>
<summary> <b> Usage Example </b></summary>
```python ```python
# Create query parameters # Create query parameters
query_param = QueryParam( query_param = QueryParam(
mode="hybrid", # or other mode: "local", "global", "hybrid", "mix" and "naive" mode = "hybrid", # 或其他模式:"local""global""hybrid""mix""naive"
user_prompt = "Please create the diagram using the Mermaid syntax"
) )
# Example 1: Using the default system prompt # Query and process
response_default = rag.query( response_default = rag.query(
"What are the primary benefits of renewable energy?", "Please draw a character relationship diagram for Scrooge",
param=query_param param=query_param
) )
print(response_default) print(response_default)
# Example 2: Using a custom prompt
custom_prompt = """
You are an expert assistant in environmental science. Provide detailed and structured answers with examples.
---Conversation History---
{history}
---Knowledge Base---
{context_data}
---Response Rules---
- Target format and length: {response_type}
"""
response_custom = rag.query(
"What are the primary benefits of renewable energy?",
param=query_param,
system_prompt=custom_prompt # Pass the custom prompt
)
print(response_custom)
``` ```
</details>
### Separate Keyword Extraction
We've introduced a new function `query_with_separate_keyword_extraction` to enhance the keyword extraction capabilities. This function separates the keyword extraction process from the user's prompt, focusing solely on the query to improve the relevance of extracted keywords.
**How It Works?**
The function operates by dividing the input into two parts:
- `User Query`
- `Prompt`
It then performs keyword extraction exclusively on the `user query`. This separation ensures that the extraction process is focused and relevant, unaffected by any additional language in the `prompt`. It also allows the `prompt` to serve purely for response formatting, maintaining the intent and clarity of the user's original question.
<details>
<summary> <b> Usage Example </b></summary>
This `example` shows how to tailor the function for educational content, focusing on detailed explanations for older students.
```python
rag.query_with_separate_keyword_extraction(
query="Explain the law of gravity",
prompt="Provide a detailed explanation suitable for high school students studying physics.",
param=QueryParam(mode="hybrid")
)
```
</details>
### Insert ### Insert
@@ -725,70 +674,6 @@ rag.insert(text_content.decode('utf-8'))
</details> </details>
<details>
<summary> <b> Insert Custom KG </b></summary>
```python
custom_kg = {
"chunks": [
{
"content": "Alice and Bob are collaborating on quantum computing research.",
"source_id": "doc-1"
}
],
"entities": [
{
"entity_name": "Alice",
"entity_type": "person",
"description": "Alice is a researcher specializing in quantum physics.",
"source_id": "doc-1"
},
{
"entity_name": "Bob",
"entity_type": "person",
"description": "Bob is a mathematician.",
"source_id": "doc-1"
},
{
"entity_name": "Quantum Computing",
"entity_type": "technology",
"description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
"source_id": "doc-1"
}
],
"relationships": [
{
"src_id": "Alice",
"tgt_id": "Bob",
"description": "Alice and Bob are research partners.",
"keywords": "collaboration research",
"weight": 1.0,
"source_id": "doc-1"
},
{
"src_id": "Alice",
"tgt_id": "Quantum Computing",
"description": "Alice conducts research on quantum computing.",
"keywords": "research expertise",
"weight": 1.0,
"source_id": "doc-1"
},
{
"src_id": "Bob",
"tgt_id": "Quantum Computing",
"description": "Bob researches quantum computing.",
"keywords": "research application",
"weight": 1.0,
"source_id": "doc-1"
}
]
}
rag.insert_custom_kg(custom_kg)
```
</details>
<details> <details>
<summary><b>Citation Functionality</b></summary> <summary><b>Citation Functionality</b></summary>
@@ -992,12 +877,78 @@ updated_relation = rag.edit_relation("Google", "Google Mail", {
All operations are available in both synchronous and asynchronous versions. The asynchronous versions have the prefix "a" (e.g., `acreate_entity`, `aedit_relation`). All operations are available in both synchronous and asynchronous versions. The asynchronous versions have the prefix "a" (e.g., `acreate_entity`, `aedit_relation`).
#### Entity Operations </details>
<details>
<summary> <b> Insert Custom KG </b></summary>
```python
custom_kg = {
"chunks": [
{
"content": "Alice and Bob are collaborating on quantum computing research.",
"source_id": "doc-1"
}
],
"entities": [
{
"entity_name": "Alice",
"entity_type": "person",
"description": "Alice is a researcher specializing in quantum physics.",
"source_id": "doc-1"
},
{
"entity_name": "Bob",
"entity_type": "person",
"description": "Bob is a mathematician.",
"source_id": "doc-1"
},
{
"entity_name": "Quantum Computing",
"entity_type": "technology",
"description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
"source_id": "doc-1"
}
],
"relationships": [
{
"src_id": "Alice",
"tgt_id": "Bob",
"description": "Alice and Bob are research partners.",
"keywords": "collaboration research",
"weight": 1.0,
"source_id": "doc-1"
},
{
"src_id": "Alice",
"tgt_id": "Quantum Computing",
"description": "Alice conducts research on quantum computing.",
"keywords": "research expertise",
"weight": 1.0,
"source_id": "doc-1"
},
{
"src_id": "Bob",
"tgt_id": "Quantum Computing",
"description": "Bob researches quantum computing.",
"keywords": "research application",
"weight": 1.0,
"source_id": "doc-1"
}
]
}
rag.insert_custom_kg(custom_kg)
```
</details>
<details>
<summary> <b>Other Entity and Relation Operations</b></summary>
- **create_entity**: Creates a new entity with specified attributes - **create_entity**: Creates a new entity with specified attributes
- **edit_entity**: Updates an existing entity's attributes or renames it - **edit_entity**: Updates an existing entity's attributes or renames it
#### Relation Operations
- **create_relation**: Creates a new relation between existing entities - **create_relation**: Creates a new relation between existing entities
- **edit_relation**: Updates an existing relation's attributes - **edit_relation**: Updates an existing relation's attributes
@@ -1006,6 +957,77 @@ These operations maintain data consistency across both the graph database and ve
</details> </details>
## Entity Merging
<details>
<summary> <b>Merge Entities and Their Relationships</b> </summary>
LightRAG now supports merging multiple entities into a single entity, automatically handling all relationships:
```python
# Basic entity merging
rag.merge_entities(
source_entities=["Artificial Intelligence", "AI", "Machine Intelligence"],
target_entity="AI Technology"
)
```
With custom merge strategy:
```python
# Define custom merge strategy for different fields
rag.merge_entities(
source_entities=["John Smith", "Dr. Smith", "J. Smith"],
target_entity="John Smith",
merge_strategy={
"description": "concatenate", # Combine all descriptions
"entity_type": "keep_first", # Keep the entity type from the first entity
"source_id": "join_unique" # Combine all unique source IDs
}
)
```
With custom target entity data:
```python
# Specify exact values for the merged entity
rag.merge_entities(
source_entities=["New York", "NYC", "Big Apple"],
target_entity="New York City",
target_entity_data={
"entity_type": "LOCATION",
"description": "New York City is the most populous city in the United States.",
}
)
```
Advanced usage combining both approaches:
```python
# Merge company entities with both strategy and custom data
rag.merge_entities(
source_entities=["Microsoft Corp", "Microsoft Corporation", "MSFT"],
target_entity="Microsoft",
merge_strategy={
"description": "concatenate", # Combine all descriptions
"source_id": "join_unique" # Combine source IDs
},
target_entity_data={
"entity_type": "ORGANIZATION",
}
)
```
When merging entities:
* All relationships from source entities are redirected to the target entity
* Duplicate relationships are intelligently merged
* Self-relationships (loops) are prevented
* Source entities are removed after merging
* Relationship weights and attributes are preserved
</details>
## Token Usage Tracking ## Token Usage Tracking
<details> <details>
@@ -1112,78 +1134,6 @@ All exports include:
* Relation data (connections between entities) * Relation data (connections between entities)
* Relationship information from vector database * Relationship information from vector database
## Entity Merging
<details>
<summary> <b>Merge Entities and Their Relationships</b> </summary>
LightRAG now supports merging multiple entities into a single entity, automatically handling all relationships:
```python
# Basic entity merging
rag.merge_entities(
source_entities=["Artificial Intelligence", "AI", "Machine Intelligence"],
target_entity="AI Technology"
)
```
With custom merge strategy:
```python
# Define custom merge strategy for different fields
rag.merge_entities(
source_entities=["John Smith", "Dr. Smith", "J. Smith"],
target_entity="John Smith",
merge_strategy={
"description": "concatenate", # Combine all descriptions
"entity_type": "keep_first", # Keep the entity type from the first entity
"source_id": "join_unique" # Combine all unique source IDs
}
)
```
With custom target entity data:
```python
# Specify exact values for the merged entity
rag.merge_entities(
source_entities=["New York", "NYC", "Big Apple"],
target_entity="New York City",
target_entity_data={
"entity_type": "LOCATION",
"description": "New York City is the most populous city in the United States.",
}
)
```
Advanced usage combining both approaches:
```python
# Merge company entities with both strategy and custom data
rag.merge_entities(
source_entities=["Microsoft Corp", "Microsoft Corporation", "MSFT"],
target_entity="Microsoft",
merge_strategy={
"description": "concatenate", # Combine all descriptions
"source_id": "join_unique" # Combine source IDs
},
target_entity_data={
"entity_type": "ORGANIZATION",
}
)
```
When merging entities:
* All relationships from source entities are redirected to the target entity
* Duplicate relationships are intelligently merged
* Self-relationships (loops) are prevented
* Source entities are removed after merging
* Relationship weights and attributes are preserved
</details>
## Cache ## Cache
<details> <details>