Merge branch 'main' into main
This commit is contained in:
@@ -12,7 +12,7 @@
|
|||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
<img src='https://img.shields.io/github/stars/hkuds/lightrag?color=green&style=social' />
|
<img src='https://img.shields.io/github/stars/hkuds/lightrag?color=green&style=social' />
|
||||||
<img src="https://img.shields.io/badge/python->=3.10-blue">
|
<img src="https://img.shields.io/badge/python-3.10-blue">
|
||||||
<a href="https://pypi.org/project/lightrag-hku/"><img src="https://img.shields.io/pypi/v/lightrag-hku.svg"></a>
|
<a href="https://pypi.org/project/lightrag-hku/"><img src="https://img.shields.io/pypi/v/lightrag-hku.svg"></a>
|
||||||
<a href="https://pepy.tech/project/lightrag-hku"><img src="https://static.pepy.tech/badge/lightrag-hku/month"></a>
|
<a href="https://pepy.tech/project/lightrag-hku"><img src="https://static.pepy.tech/badge/lightrag-hku/month"></a>
|
||||||
</p>
|
</p>
|
||||||
|
@@ -323,7 +323,7 @@ class LightRAG:
|
|||||||
)
|
)
|
||||||
|
|
||||||
async def ainsert(
|
async def ainsert(
|
||||||
self, string_or_strings, split_by_character, split_by_character_only
|
self, string_or_strings, split_by_character=None, split_by_character_only=False
|
||||||
):
|
):
|
||||||
"""Insert documents with checkpoint support
|
"""Insert documents with checkpoint support
|
||||||
|
|
||||||
@@ -466,6 +466,73 @@ class LightRAG:
|
|||||||
# Ensure all indexes are updated after each document
|
# Ensure all indexes are updated after each document
|
||||||
await self._insert_done()
|
await self._insert_done()
|
||||||
|
|
||||||
|
def insert_custom_chunks(self, full_text: str, text_chunks: list[str]):
|
||||||
|
loop = always_get_an_event_loop()
|
||||||
|
return loop.run_until_complete(
|
||||||
|
self.ainsert_custom_chunks(full_text, text_chunks)
|
||||||
|
)
|
||||||
|
|
||||||
|
async def ainsert_custom_chunks(self, full_text: str, text_chunks: list[str]):
|
||||||
|
update_storage = False
|
||||||
|
try:
|
||||||
|
doc_key = compute_mdhash_id(full_text.strip(), prefix="doc-")
|
||||||
|
new_docs = {doc_key: {"content": full_text.strip()}}
|
||||||
|
|
||||||
|
_add_doc_keys = await self.full_docs.filter_keys([doc_key])
|
||||||
|
new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
|
||||||
|
if not len(new_docs):
|
||||||
|
logger.warning("This document is already in the storage.")
|
||||||
|
return
|
||||||
|
|
||||||
|
update_storage = True
|
||||||
|
logger.info(f"[New Docs] inserting {len(new_docs)} docs")
|
||||||
|
|
||||||
|
inserting_chunks = {}
|
||||||
|
for chunk_text in text_chunks:
|
||||||
|
chunk_text_stripped = chunk_text.strip()
|
||||||
|
chunk_key = compute_mdhash_id(chunk_text_stripped, prefix="chunk-")
|
||||||
|
|
||||||
|
inserting_chunks[chunk_key] = {
|
||||||
|
"content": chunk_text_stripped,
|
||||||
|
"full_doc_id": doc_key,
|
||||||
|
}
|
||||||
|
|
||||||
|
_add_chunk_keys = await self.text_chunks.filter_keys(
|
||||||
|
list(inserting_chunks.keys())
|
||||||
|
)
|
||||||
|
inserting_chunks = {
|
||||||
|
k: v for k, v in inserting_chunks.items() if k in _add_chunk_keys
|
||||||
|
}
|
||||||
|
if not len(inserting_chunks):
|
||||||
|
logger.warning("All chunks are already in the storage.")
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.info(f"[New Chunks] inserting {len(inserting_chunks)} chunks")
|
||||||
|
|
||||||
|
await self.chunks_vdb.upsert(inserting_chunks)
|
||||||
|
|
||||||
|
logger.info("[Entity Extraction]...")
|
||||||
|
maybe_new_kg = await extract_entities(
|
||||||
|
inserting_chunks,
|
||||||
|
knowledge_graph_inst=self.chunk_entity_relation_graph,
|
||||||
|
entity_vdb=self.entities_vdb,
|
||||||
|
relationships_vdb=self.relationships_vdb,
|
||||||
|
global_config=asdict(self),
|
||||||
|
)
|
||||||
|
|
||||||
|
if maybe_new_kg is None:
|
||||||
|
logger.warning("No new entities and relationships found")
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
self.chunk_entity_relation_graph = maybe_new_kg
|
||||||
|
|
||||||
|
await self.full_docs.upsert(new_docs)
|
||||||
|
await self.text_chunks.upsert(inserting_chunks)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if update_storage:
|
||||||
|
await self._insert_done()
|
||||||
|
|
||||||
async def _insert_done(self):
|
async def _insert_done(self):
|
||||||
tasks = []
|
tasks = []
|
||||||
for storage_inst in [
|
for storage_inst in [
|
||||||
|
@@ -4,7 +4,6 @@ import re
|
|||||||
from tqdm.asyncio import tqdm as tqdm_async
|
from tqdm.asyncio import tqdm as tqdm_async
|
||||||
from typing import Union
|
from typing import Union
|
||||||
from collections import Counter, defaultdict
|
from collections import Counter, defaultdict
|
||||||
import warnings
|
|
||||||
from .utils import (
|
from .utils import (
|
||||||
logger,
|
logger,
|
||||||
clean_str,
|
clean_str,
|
||||||
@@ -611,15 +610,22 @@ async def kg_query(
|
|||||||
logger.warning("low_level_keywords and high_level_keywords is empty")
|
logger.warning("low_level_keywords and high_level_keywords is empty")
|
||||||
return PROMPTS["fail_response"]
|
return PROMPTS["fail_response"]
|
||||||
if ll_keywords == [] and query_param.mode in ["local", "hybrid"]:
|
if ll_keywords == [] and query_param.mode in ["local", "hybrid"]:
|
||||||
logger.warning("low_level_keywords is empty")
|
logger.warning(
|
||||||
return PROMPTS["fail_response"]
|
"low_level_keywords is empty, switching from %s mode to global mode",
|
||||||
else:
|
query_param.mode,
|
||||||
ll_keywords = ", ".join(ll_keywords)
|
)
|
||||||
|
query_param.mode = "global"
|
||||||
if hl_keywords == [] and query_param.mode in ["global", "hybrid"]:
|
if hl_keywords == [] and query_param.mode in ["global", "hybrid"]:
|
||||||
logger.warning("high_level_keywords is empty")
|
logger.warning(
|
||||||
return PROMPTS["fail_response"]
|
"high_level_keywords is empty, switching from %s mode to local mode",
|
||||||
else:
|
query_param.mode,
|
||||||
hl_keywords = ", ".join(hl_keywords)
|
)
|
||||||
|
query_param.mode = "local"
|
||||||
|
|
||||||
|
ll_keywords = ", ".join(ll_keywords) if ll_keywords else ""
|
||||||
|
hl_keywords = ", ".join(hl_keywords) if hl_keywords else ""
|
||||||
|
|
||||||
|
logger.info("Using %s mode for query processing", query_param.mode)
|
||||||
|
|
||||||
# Build context
|
# Build context
|
||||||
keywords = [ll_keywords, hl_keywords]
|
keywords = [ll_keywords, hl_keywords]
|
||||||
@@ -685,78 +691,52 @@ async def _build_query_context(
|
|||||||
# ll_entities_context, ll_relations_context, ll_text_units_context = "", "", ""
|
# ll_entities_context, ll_relations_context, ll_text_units_context = "", "", ""
|
||||||
# hl_entities_context, hl_relations_context, hl_text_units_context = "", "", ""
|
# hl_entities_context, hl_relations_context, hl_text_units_context = "", "", ""
|
||||||
|
|
||||||
ll_kewwords, hl_keywrds = query[0], query[1]
|
ll_keywords, hl_keywords = query[0], query[1]
|
||||||
if query_param.mode in ["local", "hybrid"]:
|
|
||||||
if ll_kewwords == "":
|
if query_param.mode == "local":
|
||||||
ll_entities_context, ll_relations_context, ll_text_units_context = (
|
entities_context, relations_context, text_units_context = await _get_node_data(
|
||||||
"",
|
ll_keywords,
|
||||||
"",
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
warnings.warn(
|
|
||||||
"Low Level context is None. Return empty Low entity/relationship/source"
|
|
||||||
)
|
|
||||||
query_param.mode = "global"
|
|
||||||
else:
|
|
||||||
(
|
|
||||||
ll_entities_context,
|
|
||||||
ll_relations_context,
|
|
||||||
ll_text_units_context,
|
|
||||||
) = await _get_node_data(
|
|
||||||
ll_kewwords,
|
|
||||||
knowledge_graph_inst,
|
knowledge_graph_inst,
|
||||||
entities_vdb,
|
entities_vdb,
|
||||||
text_chunks_db,
|
text_chunks_db,
|
||||||
query_param,
|
query_param,
|
||||||
)
|
)
|
||||||
if query_param.mode in ["global", "hybrid"]:
|
elif query_param.mode == "global":
|
||||||
if hl_keywrds == "":
|
entities_context, relations_context, text_units_context = await _get_edge_data(
|
||||||
hl_entities_context, hl_relations_context, hl_text_units_context = (
|
hl_keywords,
|
||||||
"",
|
knowledge_graph_inst,
|
||||||
"",
|
relationships_vdb,
|
||||||
"",
|
text_chunks_db,
|
||||||
)
|
query_param,
|
||||||
warnings.warn(
|
)
|
||||||
"High Level context is None. Return empty High entity/relationship/source"
|
else: # hybrid mode
|
||||||
)
|
(
|
||||||
query_param.mode = "local"
|
ll_entities_context,
|
||||||
else:
|
ll_relations_context,
|
||||||
(
|
ll_text_units_context,
|
||||||
hl_entities_context,
|
) = await _get_node_data(
|
||||||
hl_relations_context,
|
ll_keywords,
|
||||||
hl_text_units_context,
|
knowledge_graph_inst,
|
||||||
) = await _get_edge_data(
|
entities_vdb,
|
||||||
hl_keywrds,
|
text_chunks_db,
|
||||||
|
query_param,
|
||||||
|
)
|
||||||
|
(
|
||||||
|
hl_entities_context,
|
||||||
|
hl_relations_context,
|
||||||
|
hl_text_units_context,
|
||||||
|
) = await _get_edge_data(
|
||||||
|
hl_keywords,
|
||||||
knowledge_graph_inst,
|
knowledge_graph_inst,
|
||||||
relationships_vdb,
|
relationships_vdb,
|
||||||
text_chunks_db,
|
text_chunks_db,
|
||||||
query_param,
|
query_param,
|
||||||
)
|
)
|
||||||
if (
|
|
||||||
hl_entities_context == ""
|
|
||||||
and hl_relations_context == ""
|
|
||||||
and hl_text_units_context == ""
|
|
||||||
):
|
|
||||||
logger.warn("No high level context found. Switching to local mode.")
|
|
||||||
query_param.mode = "local"
|
|
||||||
if query_param.mode == "hybrid":
|
|
||||||
entities_context, relations_context, text_units_context = combine_contexts(
|
entities_context, relations_context, text_units_context = combine_contexts(
|
||||||
[hl_entities_context, ll_entities_context],
|
[hl_entities_context, ll_entities_context],
|
||||||
[hl_relations_context, ll_relations_context],
|
[hl_relations_context, ll_relations_context],
|
||||||
[hl_text_units_context, ll_text_units_context],
|
[hl_text_units_context, ll_text_units_context],
|
||||||
)
|
)
|
||||||
elif query_param.mode == "local":
|
|
||||||
entities_context, relations_context, text_units_context = (
|
|
||||||
ll_entities_context,
|
|
||||||
ll_relations_context,
|
|
||||||
ll_text_units_context,
|
|
||||||
)
|
|
||||||
elif query_param.mode == "global":
|
|
||||||
entities_context, relations_context, text_units_context = (
|
|
||||||
hl_entities_context,
|
|
||||||
hl_relations_context,
|
|
||||||
hl_text_units_context,
|
|
||||||
)
|
|
||||||
return f"""
|
return f"""
|
||||||
-----Entities-----
|
-----Entities-----
|
||||||
```csv
|
```csv
|
||||||
|
Reference in New Issue
Block a user