Enhance Neo4j graph storage with error handling and label validation

- Add label existence check and validation methods in Neo4j implementation - Improve error handling in get_node, get_edge, and upsert methods - Add default values and logging for missing edge properties - Ensure consistent label processing across graph storage methods
2025-02-14 16:04:06 +01:00
parent 4d58ff8bb4
commit cd81312659
2 changed files with 150 additions and 46 deletions
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -237,25 +237,65 @@ async def _merge_edges_then_upsert(

    if await knowledge_graph_inst.has_edge(src_id, tgt_id):
        already_edge = await knowledge_graph_inst.get_edge(src_id, tgt_id)
-        already_weights.append(already_edge["weight"])
-        already_source_ids.extend(
-            split_string_by_multi_markers(already_edge["source_id"], [GRAPH_FIELD_SEP])
-        )
-        already_description.append(already_edge["description"])
-        already_keywords.extend(
-            split_string_by_multi_markers(already_edge["keywords"], [GRAPH_FIELD_SEP])
-        )
+        # Handle the case where get_edge returns None or missing fields
+        if already_edge:
+            # Get weight with default 0.0 if missing
+            if "weight" in already_edge:
+                already_weights.append(already_edge["weight"])
+            else:
+                logger.warning(
+                    f"Edge between {src_id} and {tgt_id} missing weight field"
+                )
+                already_weights.append(0.0)

+            # Get source_id with empty string default if missing or None
+            if "source_id" in already_edge and already_edge["source_id"] is not None:
+                already_source_ids.extend(
+                    split_string_by_multi_markers(
+                        already_edge["source_id"], [GRAPH_FIELD_SEP]
+                    )
+                )
+
+            # Get description with empty string default if missing or None
+            if (
+                "description" in already_edge
+                and already_edge["description"] is not None
+            ):
+                already_description.append(already_edge["description"])
+
+            # Get keywords with empty string default if missing or None
+            if "keywords" in already_edge and already_edge["keywords"] is not None:
+                already_keywords.extend(
+                    split_string_by_multi_markers(
+                        already_edge["keywords"], [GRAPH_FIELD_SEP]
+                    )
+                )
+
+    # Process edges_data with None checks
    weight = sum([dp["weight"] for dp in edges_data] + already_weights)
    description = GRAPH_FIELD_SEP.join(
-        sorted(set([dp["description"] for dp in edges_data] + already_description))
+        sorted(
+            set(
+                [dp["description"] for dp in edges_data if dp.get("description")]
+                + already_description
+            )
+        )
    )
    keywords = GRAPH_FIELD_SEP.join(
-        sorted(set([dp["keywords"] for dp in edges_data] + already_keywords))
+        sorted(
+            set(
+                [dp["keywords"] for dp in edges_data if dp.get("keywords")]
+                + already_keywords
+            )
+        )
    )
    source_id = GRAPH_FIELD_SEP.join(
-        set([dp["source_id"] for dp in edges_data] + already_source_ids)
+        set(
+            [dp["source_id"] for dp in edges_data if dp.get("source_id")]
+            + already_source_ids
+        )
    )
+
    for need_insert_id in [src_id, tgt_id]:
        if not (await knowledge_graph_inst.has_node(need_insert_id)):
            await knowledge_graph_inst.upsert_node(