Merge pull request #215 from benx13/main

bug fix issue #95
This commit is contained in:
zrguo
2024-11-07 14:54:59 +08:00
committed by GitHub
2 changed files with 41 additions and 20 deletions

View File

@@ -51,12 +51,15 @@ from .base import (
def always_get_an_event_loop() -> asyncio.AbstractEventLoop: def always_get_an_event_loop() -> asyncio.AbstractEventLoop:
try: try:
loop = asyncio.get_event_loop() return asyncio.get_event_loop()
except RuntimeError: except RuntimeError:
logger.info("Creating a new event loop in main thread.") logger.info("Creating a new event loop in main thread.")
loop = asyncio.new_event_loop() loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop) asyncio.set_event_loop(loop)
return loop
return loop
@dataclass @dataclass

View File

@@ -560,46 +560,64 @@ async def _find_most_related_text_unit_from_entities(
if not this_edges: if not this_edges:
continue continue
all_one_hop_nodes.update([e[1] for e in this_edges]) all_one_hop_nodes.update([e[1] for e in this_edges])
all_one_hop_nodes = list(all_one_hop_nodes) all_one_hop_nodes = list(all_one_hop_nodes)
all_one_hop_nodes_data = await asyncio.gather( all_one_hop_nodes_data = await asyncio.gather(
*[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes] *[knowledge_graph_inst.get_node(e) for e in all_one_hop_nodes]
) )
# Add null check for node data
all_one_hop_text_units_lookup = { all_one_hop_text_units_lookup = {
k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP])) k: set(split_string_by_multi_markers(v["source_id"], [GRAPH_FIELD_SEP]))
for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data) for k, v in zip(all_one_hop_nodes, all_one_hop_nodes_data)
if v is not None if v is not None and "source_id" in v # Add source_id check
} }
all_text_units_lookup = {} all_text_units_lookup = {}
for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)): for index, (this_text_units, this_edges) in enumerate(zip(text_units, edges)):
for c_id in this_text_units: for c_id in this_text_units:
if c_id in all_text_units_lookup: if c_id in all_text_units_lookup:
continue continue
relation_counts = 0 relation_counts = 0
for e in this_edges: if this_edges: # Add check for None edges
if ( for e in this_edges:
e[1] in all_one_hop_text_units_lookup if (
and c_id in all_one_hop_text_units_lookup[e[1]] e[1] in all_one_hop_text_units_lookup
): and c_id in all_one_hop_text_units_lookup[e[1]]
relation_counts += 1 ):
all_text_units_lookup[c_id] = { relation_counts += 1
"data": await text_chunks_db.get_by_id(c_id),
"order": index, chunk_data = await text_chunks_db.get_by_id(c_id)
"relation_counts": relation_counts, if chunk_data is not None and "content" in chunk_data: # Add content check
} all_text_units_lookup[c_id] = {
if any([v is None for v in all_text_units_lookup.values()]): "data": chunk_data,
logger.warning("Text chunks are missing, maybe the storage is damaged") "order": index,
"relation_counts": relation_counts,
}
# Filter out None values and ensure data has content
all_text_units = [ all_text_units = [
{"id": k, **v} for k, v in all_text_units_lookup.items() if v is not None {"id": k, **v}
for k, v in all_text_units_lookup.items()
if v is not None and v.get("data") is not None and "content" in v["data"]
] ]
if not all_text_units:
logger.warning("No valid text units found")
return []
all_text_units = sorted( all_text_units = sorted(
all_text_units, key=lambda x: (x["order"], -x["relation_counts"]) all_text_units,
key=lambda x: (x["order"], -x["relation_counts"])
) )
all_text_units = truncate_list_by_token_size( all_text_units = truncate_list_by_token_size(
all_text_units, all_text_units,
key=lambda x: x["data"]["content"], key=lambda x: x["data"]["content"],
max_token_size=query_param.max_token_for_text_unit, max_token_size=query_param.max_token_for_text_unit,
) )
all_text_units: list[TextChunkSchema] = [t["data"] for t in all_text_units]
all_text_units = [t["data"] for t in all_text_units]
return all_text_units return all_text_units
@@ -1027,7 +1045,7 @@ def combine_contexts(high_level_context, low_level_context):
-----Sources----- -----Sources-----
```csv ```csv
{combined_sources} {combined_sources}
`` ```
""" """