bug fix issue #95
This commit is contained in:
@@ -55,12 +55,11 @@ from .base import (
|
|||||||
|
|
||||||
def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
|
def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
|
||||||
try:
|
try:
|
||||||
loop = asyncio.get_running_loop()
|
return asyncio.get_event_loop()
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
logger.info("Creating a new event loop in main thread.")
|
logger.info("Creating a new event loop in main thread.")
|
||||||
# loop = asyncio.new_event_loop()
|
loop = asyncio.new_event_loop()
|
||||||
# asyncio.set_event_loop(loop)
|
asyncio.set_event_loop(loop)
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
return loop
|
return loop
|
||||||
|
|
||||||
|
|
||||||
|
@@ -561,46 +561,64 @@ async def _find_most_related_text_unit_from_entities(
|
|||||||
if not this_edges:
|
if not this_edges:
|
||||||
continue
|
continue
|
||||||
all_one_hop_nodes.update([e[1] for e in this_edges])
|
all_one_hop_nodes.update([e[1] for e in this_edges])
|
||||||
|
|
||||||
all_one_hop_nodes = list(all_one_hop_nodes)
|
all_one_hop_nodes = list(all_one_hop_nodes)
|
||||||
all_one_hop_nodes_data = await asyncio.gather(
|
all_one_hop_nodes_data = await asyncio.gather(
|
||||||
*[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes]
|
*[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Add null check for node data
|
||||||
all_one_hop_text_units_lookup = {
|
all_one_hop_text_units_lookup = {
|
||||||
k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP]))
|
k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP]))
|
||||||
for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data)
|
for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data)
|
||||||
if v is not None
|
if v is not None and "source_id" in v # Add source_id check
|
||||||
}
|
}
|
||||||
|
|
||||||
all_text_units_lookup = {}
|
all_text_units_lookup = {}
|
||||||
for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)):
|
for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)):
|
||||||
for c_id in this_text_units:
|
for c_id in this_text_units:
|
||||||
if c_id in all_text_units_lookup:
|
if c_id in all_text_units_lookup:
|
||||||
continue
|
continue
|
||||||
relation_counts = 0
|
relation_counts = 0
|
||||||
|
if this_edges: # Add check for None edges
|
||||||
for e in this_edges:
|
for e in this_edges:
|
||||||
if (
|
if (
|
||||||
e[1] in all_one_hop_text_units_lookup
|
e[1] in all_one_hop_text_units_lookup
|
||||||
and c_id in all_one_hop_text_units_lookup[e[1]]
|
and c_id in all_one_hop_text_units_lookup[e[1]]
|
||||||
):
|
):
|
||||||
relation_counts += 1
|
relation_counts += 1
|
||||||
|
|
||||||
|
chunk_data = await text_chunks_db.get_by_id(c_id)
|
||||||
|
if chunk_data is not None and "content" in chunk_data: # Add content check
|
||||||
all_text_units_lookup[c_id] = {
|
all_text_units_lookup[c_id] = {
|
||||||
"data": await text_chunks_db.get_by_id(c_id),
|
"data": chunk_data,
|
||||||
"order": index,
|
"order": index,
|
||||||
"relation_counts": relation_counts,
|
"relation_counts": relation_counts,
|
||||||
}
|
}
|
||||||
if any([v is None for v in all_text_units_lookup.values()]):
|
|
||||||
logger.warning("Text chunks are missing, maybe the storage is damaged")
|
# Filter out None values and ensure data has content
|
||||||
all_text_units = [
|
all_text_units = [
|
||||||
{"id": k, **v} for k, v in all_text_units_lookup.items() if v is not None
|
{"id": k, **v}
|
||||||
|
for k, v in all_text_units_lookup.items()
|
||||||
|
if v is not None and v.get("data") is not None and "content" in v["data"]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if not all_text_units:
|
||||||
|
logger.warning("No valid text units found")
|
||||||
|
return []
|
||||||
|
|
||||||
all_text_units = sorted(
|
all_text_units = sorted(
|
||||||
all_text_units, key=lambda x: (x["order"], -x["relation_counts"])
|
all_text_units,
|
||||||
|
key=lambda x: (x["order"], -x["relation_counts"])
|
||||||
)
|
)
|
||||||
|
|
||||||
all_text_units = truncate_list_by_token_size(
|
all_text_units = truncate_list_by_token_size(
|
||||||
all_text_units,
|
all_text_units,
|
||||||
key=lambda x: x["data"]["content"],
|
key=lambda x: x["data"]["content"],
|
||||||
max_token_size=query_param.max_token_for_text_unit,
|
max_token_size=query_param.max_token_for_text_unit,
|
||||||
)
|
)
|
||||||
all_text_units: list[TextChunkSchema] = [t["data"] for t in all_text_units]
|
|
||||||
|
all_text_units = [t["data"] for t in all_text_units]
|
||||||
return all_text_units
|
return all_text_units
|
||||||
|
|
||||||
|
|
||||||
@@ -1028,7 +1046,7 @@ def combine_contexts(high_level_context, low_level_context):
|
|||||||
-----Sources-----
|
-----Sources-----
|
||||||
```csv
|
```csv
|
||||||
{combined_sources}
|
{combined_sources}
|
||||||
``
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user