Update README.md
This commit is contained in:
340
README-zh.md
340
README-zh.md
@@ -260,6 +260,11 @@ class QueryParam:
|
|||||||
If provided, this will be used instead of the global model function.
|
If provided, this will be used instead of the global model function.
|
||||||
This allows using different models for different query modes.
|
This allows using different models for different query modes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
user_prompt: str | None = None
|
||||||
|
"""User-provided prompt for the query.
|
||||||
|
If proivded, this will be use instead of the default vaulue from prompt template.
|
||||||
|
"""
|
||||||
```
|
```
|
||||||
|
|
||||||
> top_k的默认值可以通过环境变量TOP_K更改。
|
> top_k的默认值可以通过环境变量TOP_K更改。
|
||||||
@@ -527,128 +532,23 @@ response = rag.query(
|
|||||||
)
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
### 自定义提示词
|
### 自定义用户提示词
|
||||||
|
|
||||||
LightRAG现在支持自定义提示,以便对系统行为进行精细控制。以下是使用方法:
|
自定义用户提示词不影响查询内容,仅仅用于向LLM指示如何处理查询结果。以下是使用方法:
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# 创建查询参数
|
# 创建查询参数
|
||||||
query_param = QueryParam(
|
query_param = QueryParam(
|
||||||
mode="hybrid", # 或其他模式:"local"、"global"、"hybrid"、"mix"和"naive"
|
mode = "hybrid", # 或其他模式:"local"、"global"、"hybrid"、"mix"和"naive"
|
||||||
|
user_prompt = "Please create the diagram using the Mermaid syntax"
|
||||||
)
|
)
|
||||||
|
|
||||||
# 示例1:使用默认系统提示
|
# 查询和处理
|
||||||
response_default = rag.query(
|
response_default = rag.query(
|
||||||
"可再生能源的主要好处是什么?",
|
"Please draw a character relationship diagram for Scrooge",
|
||||||
param=query_param
|
param=query_param
|
||||||
)
|
)
|
||||||
print(response_default)
|
print(response_default)
|
||||||
|
|
||||||
# 示例2:使用自定义提示
|
|
||||||
custom_prompt = """
|
|
||||||
您是环境科学领域的专家助手。请提供详细且结构化的答案,并附带示例。
|
|
||||||
---对话历史---
|
|
||||||
{history}
|
|
||||||
|
|
||||||
---知识库---
|
|
||||||
{context_data}
|
|
||||||
|
|
||||||
---响应规则---
|
|
||||||
|
|
||||||
- 目标格式和长度:{response_type}
|
|
||||||
"""
|
|
||||||
response_custom = rag.query(
|
|
||||||
"可再生能源的主要好处是什么?",
|
|
||||||
param=query_param,
|
|
||||||
system_prompt=custom_prompt # 传递自定义提示
|
|
||||||
)
|
|
||||||
print(response_custom)
|
|
||||||
```
|
|
||||||
|
|
||||||
### 关键词提取
|
|
||||||
|
|
||||||
我们引入了新函数`query_with_separate_keyword_extraction`来增强关键词提取功能。该函数将关键词提取过程与用户提示分开,专注于查询以提高提取关键词的相关性。
|
|
||||||
|
|
||||||
* 工作原理
|
|
||||||
|
|
||||||
该函数将输入分为两部分:
|
|
||||||
|
|
||||||
- `用户查询`
|
|
||||||
- `提示`
|
|
||||||
|
|
||||||
然后仅对`用户查询`执行关键词提取。这种分离确保提取过程是集中和相关的,不受`提示`中任何额外语言的影响。它还允许`提示`纯粹用于响应格式化,保持用户原始问题的意图和清晰度。
|
|
||||||
|
|
||||||
* 使用示例
|
|
||||||
|
|
||||||
这个`示例`展示了如何为教育内容定制函数,专注于为高年级学生提供详细解释。
|
|
||||||
|
|
||||||
```python
|
|
||||||
rag.query_with_separate_keyword_extraction(
|
|
||||||
query="解释重力定律",
|
|
||||||
prompt="提供适合学习物理的高中生的详细解释。",
|
|
||||||
param=QueryParam(mode="hybrid")
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
### 插入自定义知识
|
|
||||||
|
|
||||||
```python
|
|
||||||
custom_kg = {
|
|
||||||
"chunks": [
|
|
||||||
{
|
|
||||||
"content": "Alice和Bob正在合作进行量子计算研究。",
|
|
||||||
"source_id": "doc-1"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"entities": [
|
|
||||||
{
|
|
||||||
"entity_name": "Alice",
|
|
||||||
"entity_type": "person",
|
|
||||||
"description": "Alice是一位专门研究量子物理的研究员。",
|
|
||||||
"source_id": "doc-1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"entity_name": "Bob",
|
|
||||||
"entity_type": "person",
|
|
||||||
"description": "Bob是一位数学家。",
|
|
||||||
"source_id": "doc-1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"entity_name": "量子计算",
|
|
||||||
"entity_type": "technology",
|
|
||||||
"description": "量子计算利用量子力学现象进行计算。",
|
|
||||||
"source_id": "doc-1"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"relationships": [
|
|
||||||
{
|
|
||||||
"src_id": "Alice",
|
|
||||||
"tgt_id": "Bob",
|
|
||||||
"description": "Alice和Bob是研究伙伴。",
|
|
||||||
"keywords": "合作 研究",
|
|
||||||
"weight": 1.0,
|
|
||||||
"source_id": "doc-1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"src_id": "Alice",
|
|
||||||
"tgt_id": "量子计算",
|
|
||||||
"description": "Alice进行量子计算研究。",
|
|
||||||
"keywords": "研究 专业",
|
|
||||||
"weight": 1.0,
|
|
||||||
"source_id": "doc-1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"src_id": "Bob",
|
|
||||||
"tgt_id": "量子计算",
|
|
||||||
"description": "Bob研究量子计算。",
|
|
||||||
"keywords": "研究 应用",
|
|
||||||
"weight": 1.0,
|
|
||||||
"source_id": "doc-1"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
rag.insert_custom_kg(custom_kg)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### 插入
|
### 插入
|
||||||
@@ -934,23 +834,160 @@ updated_relation = rag.edit_relation("Google", "Google Mail", {
|
|||||||
})
|
})
|
||||||
```
|
```
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
所有操作都有同步和异步版本。异步版本带有前缀"a"(例如,`acreate_entity`,`aedit_relation`)。
|
所有操作都有同步和异步版本。异步版本带有前缀"a"(例如,`acreate_entity`,`aedit_relation`)。
|
||||||
|
|
||||||
#### 实体操作
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary> <b>插入自定义知识</b> </summary>
|
||||||
|
|
||||||
|
```python
|
||||||
|
custom_kg = {
|
||||||
|
"chunks": [
|
||||||
|
{
|
||||||
|
"content": "Alice和Bob正在合作进行量子计算研究。",
|
||||||
|
"source_id": "doc-1"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"entities": [
|
||||||
|
{
|
||||||
|
"entity_name": "Alice",
|
||||||
|
"entity_type": "person",
|
||||||
|
"description": "Alice是一位专门研究量子物理的研究员。",
|
||||||
|
"source_id": "doc-1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entity_name": "Bob",
|
||||||
|
"entity_type": "person",
|
||||||
|
"description": "Bob是一位数学家。",
|
||||||
|
"source_id": "doc-1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entity_name": "量子计算",
|
||||||
|
"entity_type": "technology",
|
||||||
|
"description": "量子计算利用量子力学现象进行计算。",
|
||||||
|
"source_id": "doc-1"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"relationships": [
|
||||||
|
{
|
||||||
|
"src_id": "Alice",
|
||||||
|
"tgt_id": "Bob",
|
||||||
|
"description": "Alice和Bob是研究伙伴。",
|
||||||
|
"keywords": "合作 研究",
|
||||||
|
"weight": 1.0,
|
||||||
|
"source_id": "doc-1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"src_id": "Alice",
|
||||||
|
"tgt_id": "量子计算",
|
||||||
|
"description": "Alice进行量子计算研究。",
|
||||||
|
"keywords": "研究 专业",
|
||||||
|
"weight": 1.0,
|
||||||
|
"source_id": "doc-1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"src_id": "Bob",
|
||||||
|
"tgt_id": "量子计算",
|
||||||
|
"description": "Bob研究量子计算。",
|
||||||
|
"keywords": "研究 应用",
|
||||||
|
"weight": 1.0,
|
||||||
|
"source_id": "doc-1"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
rag.insert_custom_kg(custom_kg)
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary> <b>其它实体与关系操作</b> </summary>
|
||||||
|
|
||||||
- **create_entity**:创建具有指定属性的新实体
|
- **create_entity**:创建具有指定属性的新实体
|
||||||
- **edit_entity**:更新现有实体的属性或重命名它
|
- **edit_entity**:更新现有实体的属性或重命名它
|
||||||
|
|
||||||
#### 关系操作
|
|
||||||
|
|
||||||
- **create_relation**:在现有实体之间创建新关系
|
- **create_relation**:在现有实体之间创建新关系
|
||||||
- **edit_relation**:更新现有关系的属性
|
- **edit_relation**:更新现有关系的属性
|
||||||
|
|
||||||
这些操作在图数据库和向量数据库组件之间保持数据一致性,确保您的知识图谱保持连贯。
|
这些操作在图数据库和向量数据库组件之间保持数据一致性,确保您的知识图谱保持连贯。
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
## 实体合并
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary> <b>合并实体及其关系</b> </summary>
|
||||||
|
|
||||||
|
LightRAG现在支持将多个实体合并为单个实体,自动处理所有关系:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 基本实体合并
|
||||||
|
rag.merge_entities(
|
||||||
|
source_entities=["人工智能", "AI", "机器智能"],
|
||||||
|
target_entity="AI技术"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
使用自定义合并策略:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 为不同字段定义自定义合并策略
|
||||||
|
rag.merge_entities(
|
||||||
|
source_entities=["约翰·史密斯", "史密斯博士", "J·史密斯"],
|
||||||
|
target_entity="约翰·史密斯",
|
||||||
|
merge_strategy={
|
||||||
|
"description": "concatenate", # 组合所有描述
|
||||||
|
"entity_type": "keep_first", # 保留第一个实体的类型
|
||||||
|
"source_id": "join_unique" # 组合所有唯一的源ID
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
使用自定义目标实体数据:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 为合并后的实体指定确切值
|
||||||
|
rag.merge_entities(
|
||||||
|
source_entities=["纽约", "NYC", "大苹果"],
|
||||||
|
target_entity="纽约市",
|
||||||
|
target_entity_data={
|
||||||
|
"entity_type": "LOCATION",
|
||||||
|
"description": "纽约市是美国人口最多的城市。",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
结合两种方法的高级用法:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# 使用策略和自定义数据合并公司实体
|
||||||
|
rag.merge_entities(
|
||||||
|
source_entities=["微软公司", "Microsoft Corporation", "MSFT"],
|
||||||
|
target_entity="微软",
|
||||||
|
merge_strategy={
|
||||||
|
"description": "concatenate", # 组合所有描述
|
||||||
|
"source_id": "join_unique" # 组合源ID
|
||||||
|
},
|
||||||
|
target_entity_data={
|
||||||
|
"entity_type": "ORGANIZATION",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
合并实体时:
|
||||||
|
|
||||||
|
* 所有来自源实体的关系都会重定向到目标实体
|
||||||
|
* 重复的关系会被智能合并
|
||||||
|
* 防止自我关系(循环)
|
||||||
|
* 合并后删除源实体
|
||||||
|
* 保留关系权重和属性
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
## Token统计功能
|
## Token统计功能
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary> <b>概述和使用</b> </summary>
|
<summary> <b>概述和使用</b> </summary>
|
||||||
|
|
||||||
@@ -1048,77 +1085,6 @@ rag.export_data("complete_data.csv", include_vector_data=True)
|
|||||||
* 关系数据(实体之间的连接)
|
* 关系数据(实体之间的连接)
|
||||||
* 来自向量数据库的关系信息
|
* 来自向量数据库的关系信息
|
||||||
|
|
||||||
## 实体合并
|
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary> <b>合并实体及其关系</b> </summary>
|
|
||||||
|
|
||||||
LightRAG现在支持将多个实体合并为单个实体,自动处理所有关系:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# 基本实体合并
|
|
||||||
rag.merge_entities(
|
|
||||||
source_entities=["人工智能", "AI", "机器智能"],
|
|
||||||
target_entity="AI技术"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
使用自定义合并策略:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# 为不同字段定义自定义合并策略
|
|
||||||
rag.merge_entities(
|
|
||||||
source_entities=["约翰·史密斯", "史密斯博士", "J·史密斯"],
|
|
||||||
target_entity="约翰·史密斯",
|
|
||||||
merge_strategy={
|
|
||||||
"description": "concatenate", # 组合所有描述
|
|
||||||
"entity_type": "keep_first", # 保留第一个实体的类型
|
|
||||||
"source_id": "join_unique" # 组合所有唯一的源ID
|
|
||||||
}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
使用自定义目标实体数据:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# 为合并后的实体指定确切值
|
|
||||||
rag.merge_entities(
|
|
||||||
source_entities=["纽约", "NYC", "大苹果"],
|
|
||||||
target_entity="纽约市",
|
|
||||||
target_entity_data={
|
|
||||||
"entity_type": "LOCATION",
|
|
||||||
"description": "纽约市是美国人口最多的城市。",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
结合两种方法的高级用法:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# 使用策略和自定义数据合并公司实体
|
|
||||||
rag.merge_entities(
|
|
||||||
source_entities=["微软公司", "Microsoft Corporation", "MSFT"],
|
|
||||||
target_entity="微软",
|
|
||||||
merge_strategy={
|
|
||||||
"description": "concatenate", # 组合所有描述
|
|
||||||
"source_id": "join_unique" # 组合源ID
|
|
||||||
},
|
|
||||||
target_entity_data={
|
|
||||||
"entity_type": "ORGANIZATION",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
合并实体时:
|
|
||||||
|
|
||||||
* 所有来自源实体的关系都会重定向到目标实体
|
|
||||||
* 重复的关系会被智能合并
|
|
||||||
* 防止自我关系(循环)
|
|
||||||
* 合并后删除源实体
|
|
||||||
* 保留关系权重和属性
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
## 缓存
|
## 缓存
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
|
350
README.md
350
README.md
@@ -274,12 +274,6 @@ class QueryParam:
|
|||||||
max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
|
max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
|
||||||
"""Maximum number of tokens allocated for entity descriptions in local retrieval."""
|
"""Maximum number of tokens allocated for entity descriptions in local retrieval."""
|
||||||
|
|
||||||
hl_keywords: list[str] = field(default_factory=list)
|
|
||||||
"""List of high-level keywords to prioritize in retrieval."""
|
|
||||||
|
|
||||||
ll_keywords: list[str] = field(default_factory=list)
|
|
||||||
"""List of low-level keywords to refine retrieval focus."""
|
|
||||||
|
|
||||||
conversation_history: list[dict[str, str]] = field(default_factory=list)
|
conversation_history: list[dict[str, str]] = field(default_factory=list)
|
||||||
"""Stores past conversation history to maintain context.
|
"""Stores past conversation history to maintain context.
|
||||||
Format: [{"role": "user/assistant", "content": "message"}].
|
Format: [{"role": "user/assistant", "content": "message"}].
|
||||||
@@ -296,6 +290,11 @@ class QueryParam:
|
|||||||
If provided, this will be used instead of the global model function.
|
If provided, this will be used instead of the global model function.
|
||||||
This allows using different models for different query modes.
|
This allows using different models for different query modes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
user_prompt: str | None = None
|
||||||
|
"""User-provided prompt for the query.
|
||||||
|
If proivded, this will be use instead of the default vaulue from prompt template.
|
||||||
|
"""
|
||||||
```
|
```
|
||||||
|
|
||||||
> default value of Top_k can be change by environment variables TOP_K.
|
> default value of Top_k can be change by environment variables TOP_K.
|
||||||
@@ -571,76 +570,26 @@ response = rag.query(
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
### Custom Prompt Support
|
### Custom User Prompt Support
|
||||||
|
|
||||||
LightRAG now supports custom prompts for fine-tuned control over the system's behavior. Here's how to use it:
|
Custom user prompts do not affect the query content; they are only used to instruct the LLM on how to handle the query results. Here's how to use it:
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary> <b> Usage Example </b></summary>
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
# Create query parameters
|
# Create query parameters
|
||||||
query_param = QueryParam(
|
query_param = QueryParam(
|
||||||
mode="hybrid", # or other mode: "local", "global", "hybrid", "mix" and "naive"
|
mode = "hybrid", # 或其他模式:"local"、"global"、"hybrid"、"mix"和"naive"
|
||||||
|
user_prompt = "Please create the diagram using the Mermaid syntax"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Example 1: Using the default system prompt
|
# Query and process
|
||||||
response_default = rag.query(
|
response_default = rag.query(
|
||||||
"What are the primary benefits of renewable energy?",
|
"Please draw a character relationship diagram for Scrooge",
|
||||||
param=query_param
|
param=query_param
|
||||||
)
|
)
|
||||||
print(response_default)
|
print(response_default)
|
||||||
|
|
||||||
# Example 2: Using a custom prompt
|
|
||||||
custom_prompt = """
|
|
||||||
You are an expert assistant in environmental science. Provide detailed and structured answers with examples.
|
|
||||||
---Conversation History---
|
|
||||||
{history}
|
|
||||||
|
|
||||||
---Knowledge Base---
|
|
||||||
{context_data}
|
|
||||||
|
|
||||||
---Response Rules---
|
|
||||||
|
|
||||||
- Target format and length: {response_type}
|
|
||||||
"""
|
|
||||||
response_custom = rag.query(
|
|
||||||
"What are the primary benefits of renewable energy?",
|
|
||||||
param=query_param,
|
|
||||||
system_prompt=custom_prompt # Pass the custom prompt
|
|
||||||
)
|
|
||||||
print(response_custom)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
### Separate Keyword Extraction
|
|
||||||
|
|
||||||
We've introduced a new function `query_with_separate_keyword_extraction` to enhance the keyword extraction capabilities. This function separates the keyword extraction process from the user's prompt, focusing solely on the query to improve the relevance of extracted keywords.
|
|
||||||
|
|
||||||
**How It Works?**
|
|
||||||
|
|
||||||
The function operates by dividing the input into two parts:
|
|
||||||
|
|
||||||
- `User Query`
|
|
||||||
- `Prompt`
|
|
||||||
|
|
||||||
It then performs keyword extraction exclusively on the `user query`. This separation ensures that the extraction process is focused and relevant, unaffected by any additional language in the `prompt`. It also allows the `prompt` to serve purely for response formatting, maintaining the intent and clarity of the user's original question.
|
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary> <b> Usage Example </b></summary>
|
|
||||||
|
|
||||||
This `example` shows how to tailor the function for educational content, focusing on detailed explanations for older students.
|
|
||||||
|
|
||||||
```python
|
|
||||||
rag.query_with_separate_keyword_extraction(
|
|
||||||
query="Explain the law of gravity",
|
|
||||||
prompt="Provide a detailed explanation suitable for high school students studying physics.",
|
|
||||||
param=QueryParam(mode="hybrid")
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
### Insert
|
### Insert
|
||||||
|
|
||||||
@@ -725,70 +674,6 @@ rag.insert(text_content.decode('utf-8'))
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary> <b> Insert Custom KG </b></summary>
|
|
||||||
|
|
||||||
```python
|
|
||||||
custom_kg = {
|
|
||||||
"chunks": [
|
|
||||||
{
|
|
||||||
"content": "Alice and Bob are collaborating on quantum computing research.",
|
|
||||||
"source_id": "doc-1"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"entities": [
|
|
||||||
{
|
|
||||||
"entity_name": "Alice",
|
|
||||||
"entity_type": "person",
|
|
||||||
"description": "Alice is a researcher specializing in quantum physics.",
|
|
||||||
"source_id": "doc-1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"entity_name": "Bob",
|
|
||||||
"entity_type": "person",
|
|
||||||
"description": "Bob is a mathematician.",
|
|
||||||
"source_id": "doc-1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"entity_name": "Quantum Computing",
|
|
||||||
"entity_type": "technology",
|
|
||||||
"description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
|
|
||||||
"source_id": "doc-1"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"relationships": [
|
|
||||||
{
|
|
||||||
"src_id": "Alice",
|
|
||||||
"tgt_id": "Bob",
|
|
||||||
"description": "Alice and Bob are research partners.",
|
|
||||||
"keywords": "collaboration research",
|
|
||||||
"weight": 1.0,
|
|
||||||
"source_id": "doc-1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"src_id": "Alice",
|
|
||||||
"tgt_id": "Quantum Computing",
|
|
||||||
"description": "Alice conducts research on quantum computing.",
|
|
||||||
"keywords": "research expertise",
|
|
||||||
"weight": 1.0,
|
|
||||||
"source_id": "doc-1"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"src_id": "Bob",
|
|
||||||
"tgt_id": "Quantum Computing",
|
|
||||||
"description": "Bob researches quantum computing.",
|
|
||||||
"keywords": "research application",
|
|
||||||
"weight": 1.0,
|
|
||||||
"source_id": "doc-1"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|
||||||
rag.insert_custom_kg(custom_kg)
|
|
||||||
```
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary><b>Citation Functionality</b></summary>
|
<summary><b>Citation Functionality</b></summary>
|
||||||
|
|
||||||
@@ -992,12 +877,78 @@ updated_relation = rag.edit_relation("Google", "Google Mail", {
|
|||||||
|
|
||||||
All operations are available in both synchronous and asynchronous versions. The asynchronous versions have the prefix "a" (e.g., `acreate_entity`, `aedit_relation`).
|
All operations are available in both synchronous and asynchronous versions. The asynchronous versions have the prefix "a" (e.g., `acreate_entity`, `aedit_relation`).
|
||||||
|
|
||||||
#### Entity Operations
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary> <b> Insert Custom KG </b></summary>
|
||||||
|
|
||||||
|
```python
|
||||||
|
custom_kg = {
|
||||||
|
"chunks": [
|
||||||
|
{
|
||||||
|
"content": "Alice and Bob are collaborating on quantum computing research.",
|
||||||
|
"source_id": "doc-1"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"entities": [
|
||||||
|
{
|
||||||
|
"entity_name": "Alice",
|
||||||
|
"entity_type": "person",
|
||||||
|
"description": "Alice is a researcher specializing in quantum physics.",
|
||||||
|
"source_id": "doc-1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entity_name": "Bob",
|
||||||
|
"entity_type": "person",
|
||||||
|
"description": "Bob is a mathematician.",
|
||||||
|
"source_id": "doc-1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"entity_name": "Quantum Computing",
|
||||||
|
"entity_type": "technology",
|
||||||
|
"description": "Quantum computing utilizes quantum mechanical phenomena for computation.",
|
||||||
|
"source_id": "doc-1"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"relationships": [
|
||||||
|
{
|
||||||
|
"src_id": "Alice",
|
||||||
|
"tgt_id": "Bob",
|
||||||
|
"description": "Alice and Bob are research partners.",
|
||||||
|
"keywords": "collaboration research",
|
||||||
|
"weight": 1.0,
|
||||||
|
"source_id": "doc-1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"src_id": "Alice",
|
||||||
|
"tgt_id": "Quantum Computing",
|
||||||
|
"description": "Alice conducts research on quantum computing.",
|
||||||
|
"keywords": "research expertise",
|
||||||
|
"weight": 1.0,
|
||||||
|
"source_id": "doc-1"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"src_id": "Bob",
|
||||||
|
"tgt_id": "Quantum Computing",
|
||||||
|
"description": "Bob researches quantum computing.",
|
||||||
|
"keywords": "research application",
|
||||||
|
"weight": 1.0,
|
||||||
|
"source_id": "doc-1"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
rag.insert_custom_kg(custom_kg)
|
||||||
|
```
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary> <b>Other Entity and Relation Operations</b></summary>
|
||||||
|
|
||||||
- **create_entity**: Creates a new entity with specified attributes
|
- **create_entity**: Creates a new entity with specified attributes
|
||||||
- **edit_entity**: Updates an existing entity's attributes or renames it
|
- **edit_entity**: Updates an existing entity's attributes or renames it
|
||||||
|
|
||||||
#### Relation Operations
|
|
||||||
|
|
||||||
- **create_relation**: Creates a new relation between existing entities
|
- **create_relation**: Creates a new relation between existing entities
|
||||||
- **edit_relation**: Updates an existing relation's attributes
|
- **edit_relation**: Updates an existing relation's attributes
|
||||||
@@ -1006,6 +957,77 @@ These operations maintain data consistency across both the graph database and ve
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
|
## Entity Merging
|
||||||
|
|
||||||
|
<details>
|
||||||
|
<summary> <b>Merge Entities and Their Relationships</b> </summary>
|
||||||
|
|
||||||
|
LightRAG now supports merging multiple entities into a single entity, automatically handling all relationships:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Basic entity merging
|
||||||
|
rag.merge_entities(
|
||||||
|
source_entities=["Artificial Intelligence", "AI", "Machine Intelligence"],
|
||||||
|
target_entity="AI Technology"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
With custom merge strategy:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Define custom merge strategy for different fields
|
||||||
|
rag.merge_entities(
|
||||||
|
source_entities=["John Smith", "Dr. Smith", "J. Smith"],
|
||||||
|
target_entity="John Smith",
|
||||||
|
merge_strategy={
|
||||||
|
"description": "concatenate", # Combine all descriptions
|
||||||
|
"entity_type": "keep_first", # Keep the entity type from the first entity
|
||||||
|
"source_id": "join_unique" # Combine all unique source IDs
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
With custom target entity data:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Specify exact values for the merged entity
|
||||||
|
rag.merge_entities(
|
||||||
|
source_entities=["New York", "NYC", "Big Apple"],
|
||||||
|
target_entity="New York City",
|
||||||
|
target_entity_data={
|
||||||
|
"entity_type": "LOCATION",
|
||||||
|
"description": "New York City is the most populous city in the United States.",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
Advanced usage combining both approaches:
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Merge company entities with both strategy and custom data
|
||||||
|
rag.merge_entities(
|
||||||
|
source_entities=["Microsoft Corp", "Microsoft Corporation", "MSFT"],
|
||||||
|
target_entity="Microsoft",
|
||||||
|
merge_strategy={
|
||||||
|
"description": "concatenate", # Combine all descriptions
|
||||||
|
"source_id": "join_unique" # Combine source IDs
|
||||||
|
},
|
||||||
|
target_entity_data={
|
||||||
|
"entity_type": "ORGANIZATION",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
When merging entities:
|
||||||
|
|
||||||
|
* All relationships from source entities are redirected to the target entity
|
||||||
|
* Duplicate relationships are intelligently merged
|
||||||
|
* Self-relationships (loops) are prevented
|
||||||
|
* Source entities are removed after merging
|
||||||
|
* Relationship weights and attributes are preserved
|
||||||
|
|
||||||
|
</details>
|
||||||
|
|
||||||
## Token Usage Tracking
|
## Token Usage Tracking
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
@@ -1112,78 +1134,6 @@ All exports include:
|
|||||||
* Relation data (connections between entities)
|
* Relation data (connections between entities)
|
||||||
* Relationship information from vector database
|
* Relationship information from vector database
|
||||||
|
|
||||||
|
|
||||||
## Entity Merging
|
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary> <b>Merge Entities and Their Relationships</b> </summary>
|
|
||||||
|
|
||||||
LightRAG now supports merging multiple entities into a single entity, automatically handling all relationships:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Basic entity merging
|
|
||||||
rag.merge_entities(
|
|
||||||
source_entities=["Artificial Intelligence", "AI", "Machine Intelligence"],
|
|
||||||
target_entity="AI Technology"
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
With custom merge strategy:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Define custom merge strategy for different fields
|
|
||||||
rag.merge_entities(
|
|
||||||
source_entities=["John Smith", "Dr. Smith", "J. Smith"],
|
|
||||||
target_entity="John Smith",
|
|
||||||
merge_strategy={
|
|
||||||
"description": "concatenate", # Combine all descriptions
|
|
||||||
"entity_type": "keep_first", # Keep the entity type from the first entity
|
|
||||||
"source_id": "join_unique" # Combine all unique source IDs
|
|
||||||
}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
With custom target entity data:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Specify exact values for the merged entity
|
|
||||||
rag.merge_entities(
|
|
||||||
source_entities=["New York", "NYC", "Big Apple"],
|
|
||||||
target_entity="New York City",
|
|
||||||
target_entity_data={
|
|
||||||
"entity_type": "LOCATION",
|
|
||||||
"description": "New York City is the most populous city in the United States.",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
Advanced usage combining both approaches:
|
|
||||||
|
|
||||||
```python
|
|
||||||
# Merge company entities with both strategy and custom data
|
|
||||||
rag.merge_entities(
|
|
||||||
source_entities=["Microsoft Corp", "Microsoft Corporation", "MSFT"],
|
|
||||||
target_entity="Microsoft",
|
|
||||||
merge_strategy={
|
|
||||||
"description": "concatenate", # Combine all descriptions
|
|
||||||
"source_id": "join_unique" # Combine source IDs
|
|
||||||
},
|
|
||||||
target_entity_data={
|
|
||||||
"entity_type": "ORGANIZATION",
|
|
||||||
}
|
|
||||||
)
|
|
||||||
```
|
|
||||||
|
|
||||||
When merging entities:
|
|
||||||
|
|
||||||
* All relationships from source entities are redirected to the target entity
|
|
||||||
* Duplicate relationships are intelligently merged
|
|
||||||
* Self-relationships (loops) are prevented
|
|
||||||
* Source entities are removed after merging
|
|
||||||
* Relationship weights and attributes are preserved
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
## Cache
|
## Cache
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
|
Reference in New Issue
Block a user