Fix linting
This commit is contained in:
@@ -466,12 +466,12 @@ async def extract_entities(
|
||||
"""
|
||||
maybe_nodes = defaultdict(list)
|
||||
maybe_edges = defaultdict(list)
|
||||
|
||||
|
||||
records = split_string_by_multi_markers(
|
||||
result,
|
||||
[context_base["record_delimiter"], context_base["completion_delimiter"]],
|
||||
)
|
||||
|
||||
|
||||
for record in records:
|
||||
record = re.search(r"\((.*)\)", record)
|
||||
if record is None:
|
||||
@@ -480,14 +480,14 @@ async def extract_entities(
|
||||
record_attributes = split_string_by_multi_markers(
|
||||
record, [context_base["tuple_delimiter"]]
|
||||
)
|
||||
|
||||
|
||||
if_entities = await _handle_single_entity_extraction(
|
||||
record_attributes, chunk_key
|
||||
)
|
||||
if if_entities is not None:
|
||||
maybe_nodes[if_entities["entity_name"]].append(if_entities)
|
||||
continue
|
||||
|
||||
|
||||
if_relation = await _handle_single_relationship_extraction(
|
||||
record_attributes, chunk_key
|
||||
)
|
||||
@@ -495,7 +495,7 @@ async def extract_entities(
|
||||
maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append(
|
||||
if_relation
|
||||
)
|
||||
|
||||
|
||||
return maybe_nodes, maybe_edges
|
||||
|
||||
async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]):
|
||||
@@ -508,45 +508,49 @@ async def extract_entities(
|
||||
chunk_key = chunk_key_dp[0]
|
||||
chunk_dp = chunk_key_dp[1]
|
||||
content = chunk_dp["content"]
|
||||
|
||||
|
||||
# Get initial extraction
|
||||
hint_prompt = entity_extract_prompt.format(
|
||||
**context_base, input_text="{input_text}"
|
||||
).format(**context_base, input_text=content)
|
||||
|
||||
|
||||
final_result = await _user_llm_func_with_cache(hint_prompt)
|
||||
history = pack_user_ass_to_openai_messages(hint_prompt, final_result)
|
||||
|
||||
|
||||
# Process initial extraction
|
||||
maybe_nodes, maybe_edges = await _process_extraction_result(final_result, chunk_key)
|
||||
|
||||
maybe_nodes, maybe_edges = await _process_extraction_result(
|
||||
final_result, chunk_key
|
||||
)
|
||||
|
||||
# Process additional gleaning results
|
||||
for now_glean_index in range(entity_extract_max_gleaning):
|
||||
glean_result = await _user_llm_func_with_cache(
|
||||
continue_prompt, history_messages=history
|
||||
)
|
||||
|
||||
|
||||
history += pack_user_ass_to_openai_messages(continue_prompt, glean_result)
|
||||
|
||||
|
||||
# Process gleaning result separately
|
||||
glean_nodes, glean_edges = await _process_extraction_result(glean_result, chunk_key)
|
||||
|
||||
glean_nodes, glean_edges = await _process_extraction_result(
|
||||
glean_result, chunk_key
|
||||
)
|
||||
|
||||
# Merge results
|
||||
for entity_name, entities in glean_nodes.items():
|
||||
maybe_nodes[entity_name].extend(entities)
|
||||
for edge_key, edges in glean_edges.items():
|
||||
maybe_edges[edge_key].extend(edges)
|
||||
|
||||
|
||||
if now_glean_index == entity_extract_max_gleaning - 1:
|
||||
break
|
||||
|
||||
|
||||
if_loop_result: str = await _user_llm_func_with_cache(
|
||||
if_loop_prompt, history_messages=history
|
||||
)
|
||||
if_loop_result = if_loop_result.strip().strip('"').strip("'").lower()
|
||||
if if_loop_result != "yes":
|
||||
break
|
||||
|
||||
|
||||
processed_chunks += 1
|
||||
entities_count = len(maybe_nodes)
|
||||
relations_count = len(maybe_edges)
|
||||
|
Reference in New Issue
Block a user