Merge remote-tracking branch 'origin/main'
# Conflicts: # README.md
This commit is contained in:
2
LICENSE
2
LICENSE
@@ -1,6 +1,6 @@
|
|||||||
MIT License
|
MIT License
|
||||||
|
|
||||||
Copyright (c) 2025 Gustavo Ye
|
Copyright (c) 2025 LarFii
|
||||||
|
|
||||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
of this software and associated documentation files (the "Software"), to deal
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
67
README.md
67
README.md
@@ -12,7 +12,7 @@
|
|||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
<img src='https://img.shields.io/github/stars/hkuds/lightrag?color=green&style=social' />
|
<img src='https://img.shields.io/github/stars/hkuds/lightrag?color=green&style=social' />
|
||||||
<img src="https://img.shields.io/badge/python->=3.10-blue">
|
<img src="https://img.shields.io/badge/python-3.10-blue">
|
||||||
<a href="https://pypi.org/project/lightrag-hku/"><img src="https://img.shields.io/pypi/v/lightrag-hku.svg"></a>
|
<a href="https://pypi.org/project/lightrag-hku/"><img src="https://img.shields.io/pypi/v/lightrag-hku.svg"></a>
|
||||||
<a href="https://pepy.tech/project/lightrag-hku"><img src="https://static.pepy.tech/badge/lightrag-hku/month"></a>
|
<a href="https://pepy.tech/project/lightrag-hku"><img src="https://static.pepy.tech/badge/lightrag-hku/month"></a>
|
||||||
</p>
|
</p>
|
||||||
@@ -637,7 +637,7 @@ if __name__ == "__main__":
|
|||||||
| **llm\_model\_kwargs** | `dict` | Additional parameters for LLM generation | |
|
| **llm\_model\_kwargs** | `dict` | Additional parameters for LLM generation | |
|
||||||
| **vector\_db\_storage\_cls\_kwargs** | `dict` | Additional parameters for vector database (currently not used) | |
|
| **vector\_db\_storage\_cls\_kwargs** | `dict` | Additional parameters for vector database (currently not used) | |
|
||||||
| **enable\_llm\_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` |
|
| **enable\_llm\_cache** | `bool` | If `TRUE`, stores LLM results in cache; repeated prompts return cached responses | `TRUE` |
|
||||||
| **enable\_llm\_cache\_for\_entity\_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application | `FALSE` |
|
| **enable\_llm\_cache\_for\_entity\_extract** | `bool` | If `TRUE`, stores LLM results in cache for entity extraction; Good for beginners to debug your application | `TRUE` |
|
||||||
| **addon\_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"], "insert_batch_size": 10}`: sets example limit, output language, and batch size for document processing | `example_number: all examples, language: English, insert_batch_size: 10` |
|
| **addon\_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"], "insert_batch_size": 10}`: sets example limit, output language, and batch size for document processing | `example_number: all examples, language: English, insert_batch_size: 10` |
|
||||||
| **convert\_response\_to\_json\_func** | `callable` | Not used | `convert_response_to_json` |
|
| **convert\_response\_to\_json\_func** | `callable` | Not used | `convert_response_to_json` |
|
||||||
| **embedding\_cache\_config** | `dict` | Configuration for question-answer caching. Contains three parameters:<br>- `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers.<br>- `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM.<br>- `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
|
| **embedding\_cache\_config** | `dict` | Configuration for question-answer caching. Contains three parameters:<br>- `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers.<br>- `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM.<br>- `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
|
||||||
@@ -892,69 +892,6 @@ def extract_queries(file_path):
|
|||||||
```
|
```
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
## Code Structure
|
|
||||||
|
|
||||||
```python
|
|
||||||
.
|
|
||||||
├── .github/
|
|
||||||
│ ├── workflows/
|
|
||||||
│ │ └── linting.yaml
|
|
||||||
├── examples/
|
|
||||||
│ ├── batch_eval.py
|
|
||||||
│ ├── generate_query.py
|
|
||||||
│ ├── graph_visual_with_html.py
|
|
||||||
│ ├── graph_visual_with_neo4j.py
|
|
||||||
│ ├── insert_custom_kg.py
|
|
||||||
│ ├── lightrag_api_openai_compatible_demo.py
|
|
||||||
│ ├── lightrag_api_oracle_demo..py
|
|
||||||
│ ├── lightrag_azure_openai_demo.py
|
|
||||||
│ ├── lightrag_bedrock_demo.py
|
|
||||||
│ ├── lightrag_hf_demo.py
|
|
||||||
│ ├── lightrag_lmdeploy_demo.py
|
|
||||||
│ ├── lightrag_nvidia_demo.py
|
|
||||||
│ ├── lightrag_ollama_demo.py
|
|
||||||
│ ├── lightrag_openai_compatible_demo.py
|
|
||||||
│ ├── lightrag_openai_demo.py
|
|
||||||
│ ├── lightrag_oracle_demo.py
|
|
||||||
│ ├── lightrag_siliconcloud_demo.py
|
|
||||||
│ └── vram_management_demo.py
|
|
||||||
├── lightrag/
|
|
||||||
│ ├── api/
|
|
||||||
│ │ ├── lollms_lightrag_server.py
|
|
||||||
│ │ ├── ollama_lightrag_server.py
|
|
||||||
│ │ ├── openai_lightrag_server.py
|
|
||||||
│ │ ├── azure_openai_lightrag_server.py
|
|
||||||
│ │ └── requirements.txt
|
|
||||||
│ ├── kg/
|
|
||||||
│ │ ├── __init__.py
|
|
||||||
│ │ ├── oracle_impl.py
|
|
||||||
│ │ └── neo4j_impl.py
|
|
||||||
│ ├── __init__.py
|
|
||||||
│ ├── base.py
|
|
||||||
│ ├── lightrag.py
|
|
||||||
│ ├── llm.py
|
|
||||||
│ ├── operate.py
|
|
||||||
│ ├── prompt.py
|
|
||||||
│ ├── storage.py
|
|
||||||
│ └── utils.py
|
|
||||||
├── reproduce/
|
|
||||||
│ ├── Step_0.py
|
|
||||||
│ ├── Step_1_openai_compatible.py
|
|
||||||
│ ├── Step_1.py
|
|
||||||
│ ├── Step_2.py
|
|
||||||
│ ├── Step_3_openai_compatible.py
|
|
||||||
│ └── Step_3.py
|
|
||||||
├── .gitignore
|
|
||||||
├── .pre-commit-config.yaml
|
|
||||||
├── get_all_edges_nx.py
|
|
||||||
├── LICENSE
|
|
||||||
├── README.md
|
|
||||||
├── requirements.txt
|
|
||||||
├── setup.py
|
|
||||||
├── test_neo4j.py
|
|
||||||
└── test.py
|
|
||||||
```
|
|
||||||
|
|
||||||
## Install with API Support
|
## Install with API Support
|
||||||
|
|
||||||
LightRAG provides optional API support through FastAPI servers that add RAG capabilities to existing LLM services. You can install LightRAG with API support in two ways:
|
LightRAG provides optional API support through FastAPI servers that add RAG capabilities to existing LLM services. You can install LightRAG with API support in two ways:
|
||||||
|
1296
examples/test_split_by_character.ipynb
Normal file
1296
examples/test_split_by_character.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
|||||||
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
|
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
|
||||||
|
|
||||||
__version__ = "1.0.9"
|
__version__ = "1.1.0"
|
||||||
__author__ = "Zirui Guo"
|
__author__ = "Zirui Guo"
|
||||||
__url__ = "https://github.com/HKUDS/LightRAG"
|
__url__ = "https://github.com/HKUDS/LightRAG"
|
||||||
|
@@ -45,6 +45,7 @@ from .storage import (
|
|||||||
|
|
||||||
from .prompt import GRAPH_FIELD_SEP
|
from .prompt import GRAPH_FIELD_SEP
|
||||||
|
|
||||||
|
|
||||||
# future KG integrations
|
# future KG integrations
|
||||||
|
|
||||||
# from .kg.ArangoDB_impl import (
|
# from .kg.ArangoDB_impl import (
|
||||||
@@ -167,7 +168,7 @@ class LightRAG:
|
|||||||
|
|
||||||
# LLM
|
# LLM
|
||||||
llm_model_func: callable = gpt_4o_mini_complete # hf_model_complete#
|
llm_model_func: callable = gpt_4o_mini_complete # hf_model_complete#
|
||||||
llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" #'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
|
llm_model_name: str = "meta-llama/Llama-3.2-1B-Instruct" # 'meta-llama/Llama-3.2-1B'#'google/gemma-2-2b-it'
|
||||||
llm_model_max_token_size: int = 32768
|
llm_model_max_token_size: int = 32768
|
||||||
llm_model_max_async: int = 16
|
llm_model_max_async: int = 16
|
||||||
llm_model_kwargs: dict = field(default_factory=dict)
|
llm_model_kwargs: dict = field(default_factory=dict)
|
||||||
@@ -177,7 +178,7 @@ class LightRAG:
|
|||||||
|
|
||||||
enable_llm_cache: bool = True
|
enable_llm_cache: bool = True
|
||||||
# Sometimes there are some reason the LLM failed at Extracting Entities, and we want to continue without LLM cost, we can use this flag
|
# Sometimes there are some reason the LLM failed at Extracting Entities, and we want to continue without LLM cost, we can use this flag
|
||||||
enable_llm_cache_for_entity_extract: bool = False
|
enable_llm_cache_for_entity_extract: bool = True
|
||||||
|
|
||||||
# extension
|
# extension
|
||||||
addon_params: dict = field(default_factory=dict)
|
addon_params: dict = field(default_factory=dict)
|
||||||
@@ -186,6 +187,10 @@ class LightRAG:
|
|||||||
# Add new field for document status storage type
|
# Add new field for document status storage type
|
||||||
doc_status_storage: str = field(default="JsonDocStatusStorage")
|
doc_status_storage: str = field(default="JsonDocStatusStorage")
|
||||||
|
|
||||||
|
# Custom Chunking Function
|
||||||
|
chunking_func: callable = chunking_by_token_size
|
||||||
|
chunking_func_kwargs: dict = field(default_factory=dict)
|
||||||
|
|
||||||
def __post_init__(self):
|
def __post_init__(self):
|
||||||
log_file = os.path.join("lightrag.log")
|
log_file = os.path.join("lightrag.log")
|
||||||
set_logger(log_file)
|
set_logger(log_file)
|
||||||
@@ -313,15 +318,25 @@ class LightRAG:
|
|||||||
"JsonDocStatusStorage": JsonDocStatusStorage,
|
"JsonDocStatusStorage": JsonDocStatusStorage,
|
||||||
}
|
}
|
||||||
|
|
||||||
def insert(self, string_or_strings):
|
def insert(
|
||||||
|
self, string_or_strings, split_by_character=None, split_by_character_only=False
|
||||||
|
):
|
||||||
loop = always_get_an_event_loop()
|
loop = always_get_an_event_loop()
|
||||||
return loop.run_until_complete(self.ainsert(string_or_strings))
|
return loop.run_until_complete(
|
||||||
|
self.ainsert(string_or_strings, split_by_character, split_by_character_only)
|
||||||
|
)
|
||||||
|
|
||||||
async def ainsert(self, string_or_strings):
|
async def ainsert(
|
||||||
|
self, string_or_strings, split_by_character=None, split_by_character_only=False
|
||||||
|
):
|
||||||
"""Insert documents with checkpoint support
|
"""Insert documents with checkpoint support
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
string_or_strings: Single document string or list of document strings
|
string_or_strings: Single document string or list of document strings
|
||||||
|
split_by_character: if split_by_character is not None, split the string by character, if chunk longer than
|
||||||
|
chunk_size, split the sub chunk by token size.
|
||||||
|
split_by_character_only: if split_by_character_only is True, split the string by character only, when
|
||||||
|
split_by_character is None, this parameter is ignored.
|
||||||
"""
|
"""
|
||||||
if isinstance(string_or_strings, str):
|
if isinstance(string_or_strings, str):
|
||||||
string_or_strings = [string_or_strings]
|
string_or_strings = [string_or_strings]
|
||||||
@@ -358,7 +373,7 @@ class LightRAG:
|
|||||||
batch_docs = dict(list(new_docs.items())[i : i + batch_size])
|
batch_docs = dict(list(new_docs.items())[i : i + batch_size])
|
||||||
|
|
||||||
for doc_id, doc in tqdm_async(
|
for doc_id, doc in tqdm_async(
|
||||||
batch_docs.items(), desc=f"Processing batch {i//batch_size + 1}"
|
batch_docs.items(), desc=f"Processing batch {i // batch_size + 1}"
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
# Update status to processing
|
# Update status to processing
|
||||||
@@ -377,11 +392,14 @@ class LightRAG:
|
|||||||
**dp,
|
**dp,
|
||||||
"full_doc_id": doc_id,
|
"full_doc_id": doc_id,
|
||||||
}
|
}
|
||||||
for dp in chunking_by_token_size(
|
for dp in self.chunking_func(
|
||||||
doc["content"],
|
doc["content"],
|
||||||
|
split_by_character=split_by_character,
|
||||||
|
split_by_character_only=split_by_character_only,
|
||||||
overlap_token_size=self.chunk_overlap_token_size,
|
overlap_token_size=self.chunk_overlap_token_size,
|
||||||
max_token_size=self.chunk_token_size,
|
max_token_size=self.chunk_token_size,
|
||||||
tiktoken_model=self.tiktoken_model_name,
|
tiktoken_model=self.tiktoken_model_name,
|
||||||
|
**self.chunking_func_kwargs,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -453,6 +471,73 @@ class LightRAG:
|
|||||||
# Ensure all indexes are updated after each document
|
# Ensure all indexes are updated after each document
|
||||||
await self._insert_done()
|
await self._insert_done()
|
||||||
|
|
||||||
|
def insert_custom_chunks(self, full_text: str, text_chunks: list[str]):
|
||||||
|
loop = always_get_an_event_loop()
|
||||||
|
return loop.run_until_complete(
|
||||||
|
self.ainsert_custom_chunks(full_text, text_chunks)
|
||||||
|
)
|
||||||
|
|
||||||
|
async def ainsert_custom_chunks(self, full_text: str, text_chunks: list[str]):
|
||||||
|
update_storage = False
|
||||||
|
try:
|
||||||
|
doc_key = compute_mdhash_id(full_text.strip(), prefix="doc-")
|
||||||
|
new_docs = {doc_key: {"content": full_text.strip()}}
|
||||||
|
|
||||||
|
_add_doc_keys = await self.full_docs.filter_keys([doc_key])
|
||||||
|
new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
|
||||||
|
if not len(new_docs):
|
||||||
|
logger.warning("This document is already in the storage.")
|
||||||
|
return
|
||||||
|
|
||||||
|
update_storage = True
|
||||||
|
logger.info(f"[New Docs] inserting {len(new_docs)} docs")
|
||||||
|
|
||||||
|
inserting_chunks = {}
|
||||||
|
for chunk_text in text_chunks:
|
||||||
|
chunk_text_stripped = chunk_text.strip()
|
||||||
|
chunk_key = compute_mdhash_id(chunk_text_stripped, prefix="chunk-")
|
||||||
|
|
||||||
|
inserting_chunks[chunk_key] = {
|
||||||
|
"content": chunk_text_stripped,
|
||||||
|
"full_doc_id": doc_key,
|
||||||
|
}
|
||||||
|
|
||||||
|
_add_chunk_keys = await self.text_chunks.filter_keys(
|
||||||
|
list(inserting_chunks.keys())
|
||||||
|
)
|
||||||
|
inserting_chunks = {
|
||||||
|
k: v for k, v in inserting_chunks.items() if k in _add_chunk_keys
|
||||||
|
}
|
||||||
|
if not len(inserting_chunks):
|
||||||
|
logger.warning("All chunks are already in the storage.")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"[New Chunks] inserting {len(inserting_chunks)} chunks")
|
||||||
|
|
||||||
|
await self.chunks_vdb.upsert(inserting_chunks)
|
||||||
|
|
||||||
|
logger.info("[Entity Extraction]...")
|
||||||
|
maybe_new_kg = await extract_entities(
|
||||||
|
inserting_chunks,
|
||||||
|
knowledge_graph_inst=self.chunk_entity_relation_graph,
|
||||||
|
entity_vdb=self.entities_vdb,
|
||||||
|
relationships_vdb=self.relationships_vdb,
|
||||||
|
global_config=asdict(self),
|
||||||
|
)
|
||||||
|
|
||||||
|
if maybe_new_kg is None:
|
||||||
|
logger.warning("No new entities and relationships found")
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
self.chunk_entity_relation_graph = maybe_new_kg
|
||||||
|
|
||||||
|
await self.full_docs.upsert(new_docs)
|
||||||
|
await self.text_chunks.upsert(inserting_chunks)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if update_storage:
|
||||||
|
await self._insert_done()
|
||||||
|
|
||||||
async def _insert_done(self):
|
async def _insert_done(self):
|
||||||
tasks = []
|
tasks = []
|
||||||
for storage_inst in [
|
for storage_inst in [
|
||||||
|
@@ -4,7 +4,6 @@ import re
|
|||||||
from tqdm.asyncio import tqdm as tqdm_async
|
from tqdm.asyncio import tqdm as tqdm_async
|
||||||
from typing import Union
|
from typing import Union
|
||||||
from collections import Counter, defaultdict
|
from collections import Counter, defaultdict
|
||||||
import warnings
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
logger,
|
logger,
|
||||||
clean_str,
|
clean_str,
|
||||||
@@ -34,10 +33,48 @@ import time
|
|||||||
|
|
||||||
|
|
||||||
def chunking_by_token_size(
|
def chunking_by_token_size(
|
||||||
content: str, overlap_token_size=128, max_token_size=1024, tiktoken_model="gpt-4o"
|
content: str,
|
||||||
|
split_by_character=None,
|
||||||
|
split_by_character_only=False,
|
||||||
|
overlap_token_size=128,
|
||||||
|
max_token_size=1024,
|
||||||
|
tiktoken_model="gpt-4o",
|
||||||
|
**kwargs,
|
||||||
):
|
):
|
||||||
tokens = encode_string_by_tiktoken(content, model_name=tiktoken_model)
|
tokens = encode_string_by_tiktoken(content, model_name=tiktoken_model)
|
||||||
results = []
|
results = []
|
||||||
|
if split_by_character:
|
||||||
|
raw_chunks = content.split(split_by_character)
|
||||||
|
new_chunks = []
|
||||||
|
if split_by_character_only:
|
||||||
|
for chunk in raw_chunks:
|
||||||
|
_tokens = encode_string_by_tiktoken(chunk, model_name=tiktoken_model)
|
||||||
|
new_chunks.append((len(_tokens), chunk))
|
||||||
|
else:
|
||||||
|
for chunk in raw_chunks:
|
||||||
|
_tokens = encode_string_by_tiktoken(chunk, model_name=tiktoken_model)
|
||||||
|
if len(_tokens) > max_token_size:
|
||||||
|
for start in range(
|
||||||
|
0, len(_tokens), max_token_size - overlap_token_size
|
||||||
|
):
|
||||||
|
chunk_content = decode_tokens_by_tiktoken(
|
||||||
|
_tokens[start : start + max_token_size],
|
||||||
|
model_name=tiktoken_model,
|
||||||
|
)
|
||||||
|
new_chunks.append(
|
||||||
|
(min(max_token_size, len(_tokens) - start), chunk_content)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
new_chunks.append((len(_tokens), chunk))
|
||||||
|
for index, (_len, chunk) in enumerate(new_chunks):
|
||||||
|
results.append(
|
||||||
|
{
|
||||||
|
"tokens": _len,
|
||||||
|
"content": chunk.strip(),
|
||||||
|
"chunk_order_index": index,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
else:
|
||||||
for index, start in enumerate(
|
for index, start in enumerate(
|
||||||
range(0, len(tokens), max_token_size - overlap_token_size)
|
range(0, len(tokens), max_token_size - overlap_token_size)
|
||||||
):
|
):
|
||||||
@@ -574,15 +611,22 @@ async def kg_query(
|
|||||||
logger.warning("low_level_keywords and high_level_keywords is empty")
|
logger.warning("low_level_keywords and high_level_keywords is empty")
|
||||||
return PROMPTS["fail_response"]
|
return PROMPTS["fail_response"]
|
||||||
if ll_keywords == [] and query_param.mode in ["local", "hybrid"]:
|
if ll_keywords == [] and query_param.mode in ["local", "hybrid"]:
|
||||||
logger.warning("low_level_keywords is empty")
|
logger.warning(
|
||||||
return PROMPTS["fail_response"]
|
"low_level_keywords is empty, switching from %s mode to global mode",
|
||||||
else:
|
query_param.mode,
|
||||||
ll_keywords = ", ".join(ll_keywords)
|
)
|
||||||
|
query_param.mode = "global"
|
||||||
if hl_keywords == [] and query_param.mode in ["global", "hybrid"]:
|
if hl_keywords == [] and query_param.mode in ["global", "hybrid"]:
|
||||||
logger.warning("high_level_keywords is empty")
|
logger.warning(
|
||||||
return PROMPTS["fail_response"]
|
"high_level_keywords is empty, switching from %s mode to local mode",
|
||||||
else:
|
query_param.mode,
|
||||||
hl_keywords = ", ".join(hl_keywords)
|
)
|
||||||
|
query_param.mode = "local"
|
||||||
|
|
||||||
|
ll_keywords = ", ".join(ll_keywords) if ll_keywords else ""
|
||||||
|
hl_keywords = ", ".join(hl_keywords) if hl_keywords else ""
|
||||||
|
|
||||||
|
logger.info("Using %s mode for query processing", query_param.mode)
|
||||||
|
|
||||||
# Build context
|
# Build context
|
||||||
keywords = [ll_keywords, hl_keywords]
|
keywords = [ll_keywords, hl_keywords]
|
||||||
@@ -648,78 +692,52 @@ async def _build_query_context(
|
|||||||
# ll_entities_context, ll_relations_context, ll_text_units_context = "", "", ""
|
# ll_entities_context, ll_relations_context, ll_text_units_context = "", "", ""
|
||||||
# hl_entities_context, hl_relations_context, hl_text_units_context = "", "", ""
|
# hl_entities_context, hl_relations_context, hl_text_units_context = "", "", ""
|
||||||
|
|
||||||
ll_kewwords, hl_keywrds = query[0], query[1]
|
ll_keywords, hl_keywords = query[0], query[1]
|
||||||
if query_param.mode in ["local", "hybrid"]:
|
|
||||||
if ll_kewwords == "":
|
if query_param.mode == "local":
|
||||||
ll_entities_context, ll_relations_context, ll_text_units_context = (
|
entities_context, relations_context, text_units_context = await _get_node_data(
|
||||||
"",
|
ll_keywords,
|
||||||
"",
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
warnings.warn(
|
|
||||||
"Low Level context is None. Return empty Low entity/relationship/source"
|
|
||||||
)
|
|
||||||
query_param.mode = "global"
|
|
||||||
else:
|
|
||||||
(
|
|
||||||
ll_entities_context,
|
|
||||||
ll_relations_context,
|
|
||||||
ll_text_units_context,
|
|
||||||
) = await _get_node_data(
|
|
||||||
ll_kewwords,
|
|
||||||
knowledge_graph_inst,
|
knowledge_graph_inst,
|
||||||
entities_vdb,
|
entities_vdb,
|
||||||
text_chunks_db,
|
text_chunks_db,
|
||||||
query_param,
|
query_param,
|
||||||
)
|
)
|
||||||
if query_param.mode in ["global", "hybrid"]:
|
elif query_param.mode == "global":
|
||||||
if hl_keywrds == "":
|
entities_context, relations_context, text_units_context = await _get_edge_data(
|
||||||
hl_entities_context, hl_relations_context, hl_text_units_context = (
|
hl_keywords,
|
||||||
"",
|
knowledge_graph_inst,
|
||||||
"",
|
relationships_vdb,
|
||||||
"",
|
text_chunks_db,
|
||||||
)
|
query_param,
|
||||||
warnings.warn(
|
)
|
||||||
"High Level context is None. Return empty High entity/relationship/source"
|
else: # hybrid mode
|
||||||
)
|
(
|
||||||
query_param.mode = "local"
|
ll_entities_context,
|
||||||
else:
|
ll_relations_context,
|
||||||
(
|
ll_text_units_context,
|
||||||
hl_entities_context,
|
) = await _get_node_data(
|
||||||
hl_relations_context,
|
ll_keywords,
|
||||||
hl_text_units_context,
|
knowledge_graph_inst,
|
||||||
) = await _get_edge_data(
|
entities_vdb,
|
||||||
hl_keywrds,
|
text_chunks_db,
|
||||||
|
query_param,
|
||||||
|
)
|
||||||
|
(
|
||||||
|
hl_entities_context,
|
||||||
|
hl_relations_context,
|
||||||
|
hl_text_units_context,
|
||||||
|
) = await _get_edge_data(
|
||||||
|
hl_keywords,
|
||||||
knowledge_graph_inst,
|
knowledge_graph_inst,
|
||||||
relationships_vdb,
|
relationships_vdb,
|
||||||
text_chunks_db,
|
text_chunks_db,
|
||||||
query_param,
|
query_param,
|
||||||
)
|
)
|
||||||
if (
|
|
||||||
hl_entities_context == ""
|
|
||||||
and hl_relations_context == ""
|
|
||||||
and hl_text_units_context == ""
|
|
||||||
):
|
|
||||||
logger.warn("No high level context found. Switching to local mode.")
|
|
||||||
query_param.mode = "local"
|
|
||||||
if query_param.mode == "hybrid":
|
|
||||||
entities_context, relations_context, text_units_context = combine_contexts(
|
entities_context, relations_context, text_units_context = combine_contexts(
|
||||||
[hl_entities_context, ll_entities_context],
|
[hl_entities_context, ll_entities_context],
|
||||||
[hl_relations_context, ll_relations_context],
|
[hl_relations_context, ll_relations_context],
|
||||||
[hl_text_units_context, ll_text_units_context],
|
[hl_text_units_context, ll_text_units_context],
|
||||||
)
|
)
|
||||||
elif query_param.mode == "local":
|
|
||||||
entities_context, relations_context, text_units_context = (
|
|
||||||
ll_entities_context,
|
|
||||||
ll_relations_context,
|
|
||||||
ll_text_units_context,
|
|
||||||
)
|
|
||||||
elif query_param.mode == "global":
|
|
||||||
entities_context, relations_context, text_units_context = (
|
|
||||||
hl_entities_context,
|
|
||||||
hl_relations_context,
|
|
||||||
hl_text_units_context,
|
|
||||||
)
|
|
||||||
return f"""
|
return f"""
|
||||||
-----Entities-----
|
-----Entities-----
|
||||||
```csv
|
```csv
|
||||||
|
@@ -1,38 +1,38 @@
|
|||||||
accelerate
|
accelerate
|
||||||
aioboto3~=13.3.0
|
aioboto3
|
||||||
aiofiles~=24.1.0
|
aiofiles
|
||||||
aiohttp~=3.11.11
|
aiohttp
|
||||||
asyncpg~=0.30.0
|
asyncpg
|
||||||
|
|
||||||
# database packages
|
# database packages
|
||||||
graspologic
|
graspologic
|
||||||
gremlinpython
|
gremlinpython
|
||||||
hnswlib
|
hnswlib
|
||||||
nano-vectordb
|
nano-vectordb
|
||||||
neo4j~=5.27.0
|
neo4j
|
||||||
networkx~=3.2.1
|
networkx
|
||||||
|
|
||||||
numpy~=2.2.0
|
numpy
|
||||||
ollama~=0.4.4
|
ollama
|
||||||
openai~=1.58.1
|
openai
|
||||||
oracledb
|
oracledb
|
||||||
psycopg-pool~=3.2.4
|
psycopg-pool
|
||||||
psycopg[binary,pool]~=3.2.3
|
psycopg[binary,pool]
|
||||||
pydantic~=2.10.4
|
pydantic
|
||||||
pymilvus
|
pymilvus
|
||||||
pymongo
|
pymongo
|
||||||
pymysql
|
pymysql
|
||||||
python-dotenv~=1.0.1
|
python-dotenv
|
||||||
pyvis~=0.3.2
|
pyvis
|
||||||
setuptools~=70.0.0
|
setuptools
|
||||||
# lmdeploy[all]
|
# lmdeploy[all]
|
||||||
sqlalchemy~=2.0.36
|
sqlalchemy
|
||||||
tenacity~=9.0.0
|
tenacity
|
||||||
|
|
||||||
|
|
||||||
# LLM packages
|
# LLM packages
|
||||||
tiktoken~=0.8.0
|
tiktoken
|
||||||
torch~=2.5.1+cu121
|
torch
|
||||||
tqdm~=4.67.1
|
tqdm
|
||||||
transformers~=4.47.1
|
transformers
|
||||||
xxhash
|
xxhash
|
||||||
|
Reference in New Issue
Block a user