Merge branch 'main' of https://github.com/jin38324/LightRAG
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
|
||||
|
||||
__version__ = "0.0.8"
|
||||
__version__ = "0.0.9"
|
||||
__author__ = "Zirui Guo"
|
||||
__url__ = "https://github.com/HKUDS/LightRAG"
|
||||
|
@@ -1,3 +1 @@
|
||||
# print ("init package vars here. ......")
|
||||
|
||||
|
||||
|
@@ -146,11 +146,11 @@ class Neo4JStorage(BaseGraphStorage):
|
||||
entity_name_label_target = target_node_id.strip('"')
|
||||
"""
|
||||
Find all edges between nodes of two given labels
|
||||
|
||||
|
||||
Args:
|
||||
source_node_label (str): Label of the source nodes
|
||||
target_node_label (str): Label of the target nodes
|
||||
|
||||
|
||||
Returns:
|
||||
list: List of all relationships/edges found
|
||||
"""
|
||||
|
@@ -66,7 +66,6 @@ def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
|
||||
return loop
|
||||
|
||||
|
||||
|
||||
@dataclass
|
||||
class LightRAG:
|
||||
working_dir: str = field(
|
||||
|
@@ -562,19 +562,19 @@ async def _find_most_related_text_unit_from_entities(
|
||||
if not this_edges:
|
||||
continue
|
||||
all_one_hop_nodes.update([e[1] for e in this_edges])
|
||||
|
||||
|
||||
all_one_hop_nodes = list(all_one_hop_nodes)
|
||||
all_one_hop_nodes_data = await asyncio.gather(
|
||||
*[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes]
|
||||
)
|
||||
|
||||
|
||||
# Add null check for node data
|
||||
all_one_hop_text_units_lookup = {
|
||||
k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP]))
|
||||
for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data)
|
||||
if v is not None and "source_id" in v # Add source_id check
|
||||
}
|
||||
|
||||
|
||||
all_text_units_lookup = {}
|
||||
for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)):
|
||||
for c_id in this_text_units:
|
||||
@@ -588,7 +588,7 @@ async def _find_most_related_text_unit_from_entities(
|
||||
and c_id in all_one_hop_text_units_lookup[e[1]]
|
||||
):
|
||||
relation_counts += 1
|
||||
|
||||
|
||||
chunk_data = await text_chunks_db.get_by_id(c_id)
|
||||
if chunk_data is not None and "content" in chunk_data: # Add content check
|
||||
all_text_units_lookup[c_id] = {
|
||||
@@ -596,29 +596,28 @@ async def _find_most_related_text_unit_from_entities(
|
||||
"order": index,
|
||||
"relation_counts": relation_counts,
|
||||
}
|
||||
|
||||
|
||||
# Filter out None values and ensure data has content
|
||||
all_text_units = [
|
||||
{"id": k, **v}
|
||||
for k, v in all_text_units_lookup.items()
|
||||
{"id": k, **v}
|
||||
for k, v in all_text_units_lookup.items()
|
||||
if v is not None and v.get("data") is not None and "content" in v["data"]
|
||||
]
|
||||
|
||||
|
||||
if not all_text_units:
|
||||
logger.warning("No valid text units found")
|
||||
return []
|
||||
|
||||
|
||||
all_text_units = sorted(
|
||||
all_text_units,
|
||||
key=lambda x: (x["order"], -x["relation_counts"])
|
||||
all_text_units, key=lambda x: (x["order"], -x["relation_counts"])
|
||||
)
|
||||
|
||||
|
||||
all_text_units = truncate_list_by_token_size(
|
||||
all_text_units,
|
||||
key=lambda x: x["data"]["content"],
|
||||
max_token_size=query_param.max_token_for_text_unit,
|
||||
)
|
||||
|
||||
|
||||
all_text_units = [t["data"] for t in all_text_units]
|
||||
return all_text_units
|
||||
|
||||
|
Reference in New Issue
Block a user