From 66c4b01fdd7d57b0e8853151bdac378494b37484 Mon Sep 17 00:00:00 2001
From: Yannick Stephan <stephan.yannick@me.com>
Date: Mon, 17 Feb 2025 23:16:17 +0100
Subject: [PATCH 1/5] remove drops unused

---
 lightrag/kg/json_doc_status_impl.py | 5 +----
 lightrag/kg/json_kv_impl.py         | 3 ---
 lightrag/kg/mongo_impl.py           | 9 ---------
 lightrag/kg/oracle_impl.py          | 4 ----
 lightrag/kg/postgres_impl.py        | 9 +--------
 lightrag/kg/redis_impl.py           | 5 -----
 lightrag/kg/tidb_impl.py            | 4 ----
 7 files changed, 2 insertions(+), 37 deletions(-)

diff --git a/lightrag/kg/json_doc_status_impl.py b/lightrag/kg/json_doc_status_impl.py
index 6c667891..050752eb 100644
--- a/lightrag/kg/json_doc_status_impl.py
+++ b/lightrag/kg/json_doc_status_impl.py
@@ -67,7 +67,4 @@ class JsonDocStatusStorage(DocStatusStorage):
     async def delete(self, doc_ids: list[str]):
         for doc_id in doc_ids:
             self._data.pop(doc_id, None)
-        await self.index_done_callback()
-
-    async def drop(self) -> None:
-        raise NotImplementedError
+        await self.index_done_callback()
\ No newline at end of file
diff --git a/lightrag/kg/json_kv_impl.py b/lightrag/kg/json_kv_impl.py
index 658e1239..7e13dea7 100644
--- a/lightrag/kg/json_kv_impl.py
+++ b/lightrag/kg/json_kv_impl.py
@@ -46,9 +46,6 @@ class JsonKVStorage(BaseKVStorage):
         left_data = {k: v for k, v in data.items() if k not in self._data}
         self._data.update(left_data)
 
-    async def drop(self) -> None:
-        self._data = {}
-
     async def delete(self, ids: list[str]) -> None:
         for doc_id in ids:
             self._data.pop(doc_id, None)
diff --git a/lightrag/kg/mongo_impl.py b/lightrag/kg/mongo_impl.py
index 4eb968cf..384bb604 100644
--- a/lightrag/kg/mongo_impl.py
+++ b/lightrag/kg/mongo_impl.py
@@ -117,11 +117,6 @@ class MongoKVStorage(BaseKVStorage):
         # Mongo handles persistence automatically
         pass
 
-    async def drop(self) -> None:
-        """Drop the collection"""
-        await self._data.drop()
-
-
 @final
 @dataclass
 class MongoDocStatusStorage(DocStatusStorage):
@@ -169,10 +164,6 @@ class MongoDocStatusStorage(DocStatusStorage):
             )
         await asyncio.gather(*update_tasks)
 
-    async def drop(self) -> None:
-        """Drop the collection"""
-        await self._data.drop()
-
     async def get_status_counts(self) -> dict[str, int]:
         """Get counts of documents in each status"""
         pipeline = [{"$group": {"_id": "$status", "count": {"$sum": 1}}}]
diff --git a/lightrag/kg/oracle_impl.py b/lightrag/kg/oracle_impl.py
index c9d8d1b5..16013fe8 100644
--- a/lightrag/kg/oracle_impl.py
+++ b/lightrag/kg/oracle_impl.py
@@ -320,10 +320,6 @@ class OracleKVStorage(BaseKVStorage):
         # Oracle handles persistence automatically
         pass
 
-    async def drop(self) -> None:
-        raise NotImplementedError
-
-
 @final
 @dataclass
 class OracleVectorDBStorage(BaseVectorStorage):
diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py
index 5f845894..34c7c54d 100644
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@@ -300,11 +300,7 @@ class PGKVStorage(BaseKVStorage):
     async def index_done_callback(self) -> None:
         # PG handles persistence automatically
         pass
-
-    async def drop(self) -> None:
-        raise NotImplementedError
-
-
+    
 @final
 @dataclass
 class PGVectorStorage(BaseVectorStorage):
@@ -534,9 +530,6 @@ class PGDocStatusStorage(DocStatusStorage):
             )
         return data
 
-    async def drop(self) -> None:
-        raise NotImplementedError
-
 
 class PGGraphQueryException(Exception):
     """Exception for the AGE queries."""
diff --git a/lightrag/kg/redis_impl.py b/lightrag/kg/redis_impl.py
index 2d5c94ce..056fbc9e 100644
--- a/lightrag/kg/redis_impl.py
+++ b/lightrag/kg/redis_impl.py
@@ -58,11 +58,6 @@ class RedisKVStorage(BaseKVStorage):
         for k in data:
             data[k]["_id"] = k
 
-    async def drop(self) -> None:
-        keys = await self._redis.keys(f"{self.namespace}:*")
-        if keys:
-            await self._redis.delete(*keys)
-
     async def index_done_callback(self) -> None:
         # Redis handles persistence automatically
         pass
diff --git a/lightrag/kg/tidb_impl.py b/lightrag/kg/tidb_impl.py
index 6dbfb934..1cff0bd9 100644
--- a/lightrag/kg/tidb_impl.py
+++ b/lightrag/kg/tidb_impl.py
@@ -214,10 +214,6 @@ class TiDBKVStorage(BaseKVStorage):
         # Ti handles persistence automatically
         pass
 
-    async def drop(self) -> None:
-        raise NotImplementedError
-
-
 @final
 @dataclass
 class TiDBVectorDBStorage(BaseVectorStorage):

From efb28c8003dbefd4ab90c22b86d4906f4f20534b Mon Sep 17 00:00:00 2001
From: Yannick Stephan <stephan.yannick@me.com>
Date: Mon, 17 Feb 2025 23:16:37 +0100
Subject: [PATCH 2/5] fixed already edge

---
 lightrag/operate.py | 45 +++++++++++++++++++++------------------------
 1 file changed, 21 insertions(+), 24 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index 727c65f7..0a8003b8 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -239,39 +239,36 @@ async def _merge_edges_then_upsert(
 
     if await knowledge_graph_inst.has_edge(src_id, tgt_id):
         already_edge = await knowledge_graph_inst.get_edge(src_id, tgt_id)
+        if not already_edge:
+            return
         # Handle the case where get_edge returns None or missing fields
-        if already_edge:
-            # Get weight with default 0.0 if missing
-            if "weight" in already_edge:
-                already_weights.append(already_edge["weight"])
-            else:
-                logger.warning(
-                    f"Edge between {src_id} and {tgt_id} missing weight field"
-                )
-                already_weights.append(0.0)
+
+        # Get weight with default 0.0 if missing
+        if "weight" in already_edge:
+            already_weights.append(already_edge["weight"])
+        else:
+            logger.warning(f"Edge between {src_id} and {tgt_id} missing weight field")
+            already_weights.append(0.0)
 
             # Get source_id with empty string default if missing or None
-            if "source_id" in already_edge and already_edge["source_id"] is not None:
-                already_source_ids.extend(
-                    split_string_by_multi_markers(
-                        already_edge["source_id"], [GRAPH_FIELD_SEP]
-                    )
+        if "source_id" in already_edge and already_edge["source_id"] is not None:
+            already_source_ids.extend(
+                split_string_by_multi_markers(
+                    already_edge["source_id"], [GRAPH_FIELD_SEP]
                 )
+            )
 
             # Get description with empty string default if missing or None
-            if (
-                "description" in already_edge
-                and already_edge["description"] is not None
-            ):
-                already_description.append(already_edge["description"])
+        if "description" in already_edge and already_edge["description"] is not None:
+            already_description.append(already_edge["description"])
 
             # Get keywords with empty string default if missing or None
-            if "keywords" in already_edge and already_edge["keywords"] is not None:
-                already_keywords.extend(
-                    split_string_by_multi_markers(
-                        already_edge["keywords"], [GRAPH_FIELD_SEP]
-                    )
+        if "keywords" in already_edge and already_edge["keywords"] is not None:
+            already_keywords.extend(
+                split_string_by_multi_markers(
+                    already_edge["keywords"], [GRAPH_FIELD_SEP]
                 )
+            )
 
     # Process edges_data with None checks
     weight = sum([dp["weight"] for dp in edges_data] + already_weights)

From 80272cbf16f34fc6511e50e022113191a2bfa6d2 Mon Sep 17 00:00:00 2001
From: Yannick Stephan <stephan.yannick@me.com>
Date: Mon, 17 Feb 2025 23:20:10 +0100
Subject: [PATCH 3/5] fixed edge

---
 lightrag/kg/json_doc_status_impl.py |  2 +-
 lightrag/kg/mongo_impl.py           |  1 +
 lightrag/kg/oracle_impl.py          |  1 +
 lightrag/kg/postgres_impl.py        |  3 ++-
 lightrag/kg/tidb_impl.py            |  1 +
 lightrag/operate.py                 | 22 +++++++++-------------
 6 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/lightrag/kg/json_doc_status_impl.py b/lightrag/kg/json_doc_status_impl.py
index 050752eb..e69352f3 100644
--- a/lightrag/kg/json_doc_status_impl.py
+++ b/lightrag/kg/json_doc_status_impl.py
@@ -67,4 +67,4 @@ class JsonDocStatusStorage(DocStatusStorage):
     async def delete(self, doc_ids: list[str]):
         for doc_id in doc_ids:
             self._data.pop(doc_id, None)
-        await self.index_done_callback()
\ No newline at end of file
+        await self.index_done_callback()
diff --git a/lightrag/kg/mongo_impl.py b/lightrag/kg/mongo_impl.py
index 384bb604..8cfc84b9 100644
--- a/lightrag/kg/mongo_impl.py
+++ b/lightrag/kg/mongo_impl.py
@@ -117,6 +117,7 @@ class MongoKVStorage(BaseKVStorage):
         # Mongo handles persistence automatically
         pass
 
+
 @final
 @dataclass
 class MongoDocStatusStorage(DocStatusStorage):
diff --git a/lightrag/kg/oracle_impl.py b/lightrag/kg/oracle_impl.py
index 16013fe8..63c43ce0 100644
--- a/lightrag/kg/oracle_impl.py
+++ b/lightrag/kg/oracle_impl.py
@@ -320,6 +320,7 @@ class OracleKVStorage(BaseKVStorage):
         # Oracle handles persistence automatically
         pass
 
+
 @final
 @dataclass
 class OracleVectorDBStorage(BaseVectorStorage):
diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py
index 34c7c54d..16aee8b8 100644
--- a/lightrag/kg/postgres_impl.py
+++ b/lightrag/kg/postgres_impl.py
@@ -300,7 +300,8 @@ class PGKVStorage(BaseKVStorage):
     async def index_done_callback(self) -> None:
         # PG handles persistence automatically
         pass
-    
+
+
 @final
 @dataclass
 class PGVectorStorage(BaseVectorStorage):
diff --git a/lightrag/kg/tidb_impl.py b/lightrag/kg/tidb_impl.py
index 1cff0bd9..f0e5a45b 100644
--- a/lightrag/kg/tidb_impl.py
+++ b/lightrag/kg/tidb_impl.py
@@ -214,6 +214,7 @@ class TiDBKVStorage(BaseKVStorage):
         # Ti handles persistence automatically
         pass
 
+
 @final
 @dataclass
 class TiDBVectorDBStorage(BaseVectorStorage):
diff --git a/lightrag/operate.py b/lightrag/operate.py
index 0a8003b8..2264e7f6 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -228,29 +228,25 @@ async def _merge_nodes_then_upsert(
 async def _merge_edges_then_upsert(
     src_id: str,
     tgt_id: str,
-    edges_data: list[dict],
+    edges_data: list[dict[str, Any]],
     knowledge_graph_inst: BaseGraphStorage,
-    global_config: dict,
+    global_config: dict[str, str],
 ):
-    already_weights = []
-    already_source_ids = []
-    already_description = []
-    already_keywords = []
+    already_weights: list[float] = []
+    already_source_ids: list[str] = []
+    already_description: list[str] = []
+    already_keywords: list[str] = []
 
     if await knowledge_graph_inst.has_edge(src_id, tgt_id):
         already_edge = await knowledge_graph_inst.get_edge(src_id, tgt_id)
+        # Handle the case where get_edge returns None or missing fields
         if not already_edge:
             return
-        # Handle the case where get_edge returns None or missing fields
 
         # Get weight with default 0.0 if missing
-        if "weight" in already_edge:
-            already_weights.append(already_edge["weight"])
-        else:
-            logger.warning(f"Edge between {src_id} and {tgt_id} missing weight field")
-            already_weights.append(0.0)
+        already_weights.append(already_edge.get("weight", 0.0))
 
-            # Get source_id with empty string default if missing or None
+        # Get source_id with empty string default if missing or None
         if "source_id" in already_edge and already_edge["source_id"] is not None:
             already_source_ids.extend(
                 split_string_by_multi_markers(

From ba65329898609849ded942c320873a51175422dc Mon Sep 17 00:00:00 2001
From: Yannick Stephan <stephan.yannick@me.com>
Date: Mon, 17 Feb 2025 23:21:14 +0100
Subject: [PATCH 4/5] cleanup

---
 lightrag/operate.py | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index 2264e7f6..e872c57c 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -228,21 +228,21 @@ async def _merge_nodes_then_upsert(
 async def _merge_edges_then_upsert(
     src_id: str,
     tgt_id: str,
-    edges_data: list[dict[str, Any]],
+    edges_data: list[dict],
     knowledge_graph_inst: BaseGraphStorage,
-    global_config: dict[str, str],
+    global_config: dict,
 ):
-    already_weights: list[float] = []
-    already_source_ids: list[str] = []
-    already_description: list[str] = []
-    already_keywords: list[str] = []
+    already_weights = []
+    already_source_ids = []
+    already_description = []
+    already_keywords = []
 
     if await knowledge_graph_inst.has_edge(src_id, tgt_id):
         already_edge = await knowledge_graph_inst.get_edge(src_id, tgt_id)
         # Handle the case where get_edge returns None or missing fields
         if not already_edge:
             return
-
+        
         # Get weight with default 0.0 if missing
         already_weights.append(already_edge.get("weight", 0.0))
 
@@ -254,11 +254,11 @@ async def _merge_edges_then_upsert(
                 )
             )
 
-            # Get description with empty string default if missing or None
+        # Get description with empty string default if missing or None
         if "description" in already_edge and already_edge["description"] is not None:
             already_description.append(already_edge["description"])
 
-            # Get keywords with empty string default if missing or None
+        # Get keywords with empty string default if missing or None
         if "keywords" in already_edge and already_edge["keywords"] is not None:
             already_keywords.extend(
                 split_string_by_multi_markers(

From 0ab8eeed14be98a3b739dc3768247cd281721234 Mon Sep 17 00:00:00 2001
From: Yannick Stephan <stephan.yannick@me.com>
Date: Mon, 17 Feb 2025 23:26:51 +0100
Subject: [PATCH 5/5] cleaned code

---
 lightrag/operate.py | 38 ++++++++++++++++++--------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/lightrag/operate.py b/lightrag/operate.py
index e872c57c..08fc8ca1 100644
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -240,31 +240,29 @@ async def _merge_edges_then_upsert(
     if await knowledge_graph_inst.has_edge(src_id, tgt_id):
         already_edge = await knowledge_graph_inst.get_edge(src_id, tgt_id)
         # Handle the case where get_edge returns None or missing fields
-        if not already_edge:
-            return
-        
-        # Get weight with default 0.0 if missing
-        already_weights.append(already_edge.get("weight", 0.0))
+        if already_edge:
+            # Get weight with default 0.0 if missing
+            already_weights.append(already_edge.get("weight", 0.0))
 
-        # Get source_id with empty string default if missing or None
-        if "source_id" in already_edge and already_edge["source_id"] is not None:
-            already_source_ids.extend(
-                split_string_by_multi_markers(
-                    already_edge["source_id"], [GRAPH_FIELD_SEP]
+            # Get source_id with empty string default if missing or None
+            if already_edge.get("source_id") is not None:
+                already_source_ids.extend(
+                    split_string_by_multi_markers(
+                        already_edge["source_id"], [GRAPH_FIELD_SEP]
+                    )
                 )
-            )
 
-        # Get description with empty string default if missing or None
-        if "description" in already_edge and already_edge["description"] is not None:
-            already_description.append(already_edge["description"])
+            # Get description with empty string default if missing or None
+            if already_edge.get("description") is not None:
+                already_description.append(already_edge["description"])
 
-        # Get keywords with empty string default if missing or None
-        if "keywords" in already_edge and already_edge["keywords"] is not None:
-            already_keywords.extend(
-                split_string_by_multi_markers(
-                    already_edge["keywords"], [GRAPH_FIELD_SEP]
+            # Get keywords with empty string default if missing or None
+            if already_edge.get("keywords") is not None:
+                already_keywords.extend(
+                    split_string_by_multi_markers(
+                        already_edge["keywords"], [GRAPH_FIELD_SEP]
+                    )
                 )
-            )
 
     # Process edges_data with None checks
     weight = sum([dp["weight"] for dp in edges_data] + already_weights)