Only merge new entities/edges during gleaning
- Restrict gleaning to new entity names - Only add edges with new keys - Prevent similar decription of the same entity or edge
This commit is contained in:
@@ -613,11 +613,13 @@ async def extract_entities(
|
|||||||
glean_result, chunk_key, file_path
|
glean_result, chunk_key, file_path
|
||||||
)
|
)
|
||||||
|
|
||||||
# Merge results
|
# Merge results - only add entities and edges with new names
|
||||||
for entity_name, entities in glean_nodes.items():
|
for entity_name, entities in glean_nodes.items():
|
||||||
maybe_nodes[entity_name].extend(entities)
|
if entity_name not in maybe_nodes: # Only accetp entities with new name in gleaning stage
|
||||||
|
maybe_nodes[entity_name].extend(entities)
|
||||||
for edge_key, edges in glean_edges.items():
|
for edge_key, edges in glean_edges.items():
|
||||||
maybe_edges[edge_key].extend(edges)
|
if edge_key not in maybe_edges: # Only accetp edges with new name in gleaning stage
|
||||||
|
maybe_edges[edge_key].extend(edges)
|
||||||
|
|
||||||
if now_glean_index == entity_extract_max_gleaning - 1:
|
if now_glean_index == entity_extract_max_gleaning - 1:
|
||||||
break
|
break
|
||||||
|
Reference in New Issue
Block a user