Fix linting

This commit is contained in:
yangdx
2025-03-11 12:23:51 +08:00
parent 061350b2bf
commit 9d1dc2c9c3

View File

@@ -466,12 +466,12 @@ async def extract_entities(
""" """
maybe_nodes = defaultdict(list) maybe_nodes = defaultdict(list)
maybe_edges = defaultdict(list) maybe_edges = defaultdict(list)
records = split_string_by_multi_markers( records = split_string_by_multi_markers(
result, result,
[context_base["record_delimiter"], context_base["completion_delimiter"]], [context_base["record_delimiter"], context_base["completion_delimiter"]],
) )
for record in records: for record in records:
record = re.search(r"\((.*)\)", record) record = re.search(r"\((.*)\)", record)
if record is None: if record is None:
@@ -480,14 +480,14 @@ async def extract_entities(
record_attributes = split_string_by_multi_markers( record_attributes = split_string_by_multi_markers(
record, [context_base["tuple_delimiter"]] record, [context_base["tuple_delimiter"]]
) )
if_entities = await _handle_single_entity_extraction( if_entities = await _handle_single_entity_extraction(
record_attributes, chunk_key record_attributes, chunk_key
) )
if if_entities is not None: if if_entities is not None:
maybe_nodes[if_entities["entity_name"]].append(if_entities) maybe_nodes[if_entities["entity_name"]].append(if_entities)
continue continue
if_relation = await _handle_single_relationship_extraction( if_relation = await _handle_single_relationship_extraction(
record_attributes, chunk_key record_attributes, chunk_key
) )
@@ -495,7 +495,7 @@ async def extract_entities(
maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append( maybe_edges[(if_relation["src_id"], if_relation["tgt_id"])].append(
if_relation if_relation
) )
return maybe_nodes, maybe_edges return maybe_nodes, maybe_edges
async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]): async def _process_single_content(chunk_key_dp: tuple[str, TextChunkSchema]):
@@ -508,45 +508,49 @@ async def extract_entities(
chunk_key = chunk_key_dp[0] chunk_key = chunk_key_dp[0]
chunk_dp = chunk_key_dp[1] chunk_dp = chunk_key_dp[1]
content = chunk_dp["content"] content = chunk_dp["content"]
# Get initial extraction # Get initial extraction
hint_prompt = entity_extract_prompt.format( hint_prompt = entity_extract_prompt.format(
**context_base, input_text="{input_text}" **context_base, input_text="{input_text}"
).format(**context_base, input_text=content) ).format(**context_base, input_text=content)
final_result = await _user_llm_func_with_cache(hint_prompt) final_result = await _user_llm_func_with_cache(hint_prompt)
history = pack_user_ass_to_openai_messages(hint_prompt, final_result) history = pack_user_ass_to_openai_messages(hint_prompt, final_result)
# Process initial extraction # Process initial extraction
maybe_nodes, maybe_edges = await _process_extraction_result(final_result, chunk_key) maybe_nodes, maybe_edges = await _process_extraction_result(
final_result, chunk_key
)
# Process additional gleaning results # Process additional gleaning results
for now_glean_index in range(entity_extract_max_gleaning): for now_glean_index in range(entity_extract_max_gleaning):
glean_result = await _user_llm_func_with_cache( glean_result = await _user_llm_func_with_cache(
continue_prompt, history_messages=history continue_prompt, history_messages=history
) )
history += pack_user_ass_to_openai_messages(continue_prompt, glean_result) history += pack_user_ass_to_openai_messages(continue_prompt, glean_result)
# Process gleaning result separately # Process gleaning result separately
glean_nodes, glean_edges = await _process_extraction_result(glean_result, chunk_key) glean_nodes, glean_edges = await _process_extraction_result(
glean_result, chunk_key
)
# Merge results # Merge results
for entity_name, entities in glean_nodes.items(): for entity_name, entities in glean_nodes.items():
maybe_nodes[entity_name].extend(entities) maybe_nodes[entity_name].extend(entities)
for edge_key, edges in glean_edges.items(): for edge_key, edges in glean_edges.items():
maybe_edges[edge_key].extend(edges) maybe_edges[edge_key].extend(edges)
if now_glean_index == entity_extract_max_gleaning - 1: if now_glean_index == entity_extract_max_gleaning - 1:
break break
if_loop_result: str = await _user_llm_func_with_cache( if_loop_result: str = await _user_llm_func_with_cache(
if_loop_prompt, history_messages=history if_loop_prompt, history_messages=history
) )
if_loop_result = if_loop_result.strip().strip('"').strip("'").lower() if_loop_result = if_loop_result.strip().strip('"').strip("'").lower()
if if_loop_result != "yes": if if_loop_result != "yes":
break break
processed_chunks += 1 processed_chunks += 1
entities_count = len(maybe_nodes) entities_count = len(maybe_nodes)
relations_count = len(maybe_edges) relations_count = len(maybe_edges)