Merge pull request #1337 from danielaskdd/main

Only merge new entities/edges during gleaning stage
This commit is contained in:
Daniel.y
2025-04-10 21:45:58 +08:00
committed by GitHub
2 changed files with 17 additions and 4 deletions

View File

@@ -902,6 +902,13 @@ class LightRAG:
# Get file path from status document
file_path = getattr(status_doc, "file_path", "unknown_source")
async with pipeline_status_lock:
log_message = f"Processing file: {file_path}"
pipeline_status["history_messages"].append(log_message)
log_message = f"Processing d-id: {doc_id}"
pipeline_status["latest_message"] = log_message
pipeline_status["history_messages"].append(log_message)
# Generate chunks from document
chunks: dict[str, Any] = {
compute_mdhash_id(dp["content"], prefix="chunk-"): {

View File

@@ -613,10 +613,16 @@ async def extract_entities(
glean_result, chunk_key, file_path
)
# Merge results
# Merge results - only add entities and edges with new names
for entity_name, entities in glean_nodes.items():
if (
entity_name not in maybe_nodes
): # Only accetp entities with new name in gleaning stage
maybe_nodes[entity_name].extend(entities)
for edge_key, edges in glean_edges.items():
if (
edge_key not in maybe_edges
): # Only accetp edges with new name in gleaning stage
maybe_edges[edge_key].extend(edges)
if now_glean_index == entity_extract_max_gleaning - 1: