From 66c4b01fdd7d57b0e8853151bdac378494b37484 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Mon, 17 Feb 2025 23:16:17 +0100 Subject: [PATCH 1/5] remove drops unused --- lightrag/kg/json_doc_status_impl.py | 5 +---- lightrag/kg/json_kv_impl.py | 3 --- lightrag/kg/mongo_impl.py | 9 --------- lightrag/kg/oracle_impl.py | 4 ---- lightrag/kg/postgres_impl.py | 9 +-------- lightrag/kg/redis_impl.py | 5 ----- lightrag/kg/tidb_impl.py | 4 ---- 7 files changed, 2 insertions(+), 37 deletions(-) diff --git a/lightrag/kg/json_doc_status_impl.py b/lightrag/kg/json_doc_status_impl.py index 6c667891..050752eb 100644 --- a/lightrag/kg/json_doc_status_impl.py +++ b/lightrag/kg/json_doc_status_impl.py @@ -67,7 +67,4 @@ class JsonDocStatusStorage(DocStatusStorage): async def delete(self, doc_ids: list[str]): for doc_id in doc_ids: self._data.pop(doc_id, None) - await self.index_done_callback() - - async def drop(self) -> None: - raise NotImplementedError + await self.index_done_callback() \ No newline at end of file diff --git a/lightrag/kg/json_kv_impl.py b/lightrag/kg/json_kv_impl.py index 658e1239..7e13dea7 100644 --- a/lightrag/kg/json_kv_impl.py +++ b/lightrag/kg/json_kv_impl.py @@ -46,9 +46,6 @@ class JsonKVStorage(BaseKVStorage): left_data = {k: v for k, v in data.items() if k not in self._data} self._data.update(left_data) - async def drop(self) -> None: - self._data = {} - async def delete(self, ids: list[str]) -> None: for doc_id in ids: self._data.pop(doc_id, None) diff --git a/lightrag/kg/mongo_impl.py b/lightrag/kg/mongo_impl.py index 4eb968cf..384bb604 100644 --- a/lightrag/kg/mongo_impl.py +++ b/lightrag/kg/mongo_impl.py @@ -117,11 +117,6 @@ class MongoKVStorage(BaseKVStorage): # Mongo handles persistence automatically pass - async def drop(self) -> None: - """Drop the collection""" - await self._data.drop() - - @final @dataclass class MongoDocStatusStorage(DocStatusStorage): @@ -169,10 +164,6 @@ class MongoDocStatusStorage(DocStatusStorage): ) await asyncio.gather(*update_tasks) - async def drop(self) -> None: - """Drop the collection""" - await self._data.drop() - async def get_status_counts(self) -> dict[str, int]: """Get counts of documents in each status""" pipeline = [{"$group": {"_id": "$status", "count": {"$sum": 1}}}] diff --git a/lightrag/kg/oracle_impl.py b/lightrag/kg/oracle_impl.py index c9d8d1b5..16013fe8 100644 --- a/lightrag/kg/oracle_impl.py +++ b/lightrag/kg/oracle_impl.py @@ -320,10 +320,6 @@ class OracleKVStorage(BaseKVStorage): # Oracle handles persistence automatically pass - async def drop(self) -> None: - raise NotImplementedError - - @final @dataclass class OracleVectorDBStorage(BaseVectorStorage): diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 5f845894..34c7c54d 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -300,11 +300,7 @@ class PGKVStorage(BaseKVStorage): async def index_done_callback(self) -> None: # PG handles persistence automatically pass - - async def drop(self) -> None: - raise NotImplementedError - - + @final @dataclass class PGVectorStorage(BaseVectorStorage): @@ -534,9 +530,6 @@ class PGDocStatusStorage(DocStatusStorage): ) return data - async def drop(self) -> None: - raise NotImplementedError - class PGGraphQueryException(Exception): """Exception for the AGE queries.""" diff --git a/lightrag/kg/redis_impl.py b/lightrag/kg/redis_impl.py index 2d5c94ce..056fbc9e 100644 --- a/lightrag/kg/redis_impl.py +++ b/lightrag/kg/redis_impl.py @@ -58,11 +58,6 @@ class RedisKVStorage(BaseKVStorage): for k in data: data[k]["_id"] = k - async def drop(self) -> None: - keys = await self._redis.keys(f"{self.namespace}:*") - if keys: - await self._redis.delete(*keys) - async def index_done_callback(self) -> None: # Redis handles persistence automatically pass diff --git a/lightrag/kg/tidb_impl.py b/lightrag/kg/tidb_impl.py index 6dbfb934..1cff0bd9 100644 --- a/lightrag/kg/tidb_impl.py +++ b/lightrag/kg/tidb_impl.py @@ -214,10 +214,6 @@ class TiDBKVStorage(BaseKVStorage): # Ti handles persistence automatically pass - async def drop(self) -> None: - raise NotImplementedError - - @final @dataclass class TiDBVectorDBStorage(BaseVectorStorage): From efb28c8003dbefd4ab90c22b86d4906f4f20534b Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Mon, 17 Feb 2025 23:16:37 +0100 Subject: [PATCH 2/5] fixed already edge --- lightrag/operate.py | 45 +++++++++++++++++++++------------------------ 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 727c65f7..0a8003b8 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -239,39 +239,36 @@ async def _merge_edges_then_upsert( if await knowledge_graph_inst.has_edge(src_id, tgt_id): already_edge = await knowledge_graph_inst.get_edge(src_id, tgt_id) + if not already_edge: + return # Handle the case where get_edge returns None or missing fields - if already_edge: - # Get weight with default 0.0 if missing - if "weight" in already_edge: - already_weights.append(already_edge["weight"]) - else: - logger.warning( - f"Edge between {src_id} and {tgt_id} missing weight field" - ) - already_weights.append(0.0) + + # Get weight with default 0.0 if missing + if "weight" in already_edge: + already_weights.append(already_edge["weight"]) + else: + logger.warning(f"Edge between {src_id} and {tgt_id} missing weight field") + already_weights.append(0.0) # Get source_id with empty string default if missing or None - if "source_id" in already_edge and already_edge["source_id"] is not None: - already_source_ids.extend( - split_string_by_multi_markers( - already_edge["source_id"], [GRAPH_FIELD_SEP] - ) + if "source_id" in already_edge and already_edge["source_id"] is not None: + already_source_ids.extend( + split_string_by_multi_markers( + already_edge["source_id"], [GRAPH_FIELD_SEP] ) + ) # Get description with empty string default if missing or None - if ( - "description" in already_edge - and already_edge["description"] is not None - ): - already_description.append(already_edge["description"]) + if "description" in already_edge and already_edge["description"] is not None: + already_description.append(already_edge["description"]) # Get keywords with empty string default if missing or None - if "keywords" in already_edge and already_edge["keywords"] is not None: - already_keywords.extend( - split_string_by_multi_markers( - already_edge["keywords"], [GRAPH_FIELD_SEP] - ) + if "keywords" in already_edge and already_edge["keywords"] is not None: + already_keywords.extend( + split_string_by_multi_markers( + already_edge["keywords"], [GRAPH_FIELD_SEP] ) + ) # Process edges_data with None checks weight = sum([dp["weight"] for dp in edges_data] + already_weights) From 80272cbf16f34fc6511e50e022113191a2bfa6d2 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Mon, 17 Feb 2025 23:20:10 +0100 Subject: [PATCH 3/5] fixed edge --- lightrag/kg/json_doc_status_impl.py | 2 +- lightrag/kg/mongo_impl.py | 1 + lightrag/kg/oracle_impl.py | 1 + lightrag/kg/postgres_impl.py | 3 ++- lightrag/kg/tidb_impl.py | 1 + lightrag/operate.py | 22 +++++++++------------- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/lightrag/kg/json_doc_status_impl.py b/lightrag/kg/json_doc_status_impl.py index 050752eb..e69352f3 100644 --- a/lightrag/kg/json_doc_status_impl.py +++ b/lightrag/kg/json_doc_status_impl.py @@ -67,4 +67,4 @@ class JsonDocStatusStorage(DocStatusStorage): async def delete(self, doc_ids: list[str]): for doc_id in doc_ids: self._data.pop(doc_id, None) - await self.index_done_callback() \ No newline at end of file + await self.index_done_callback() diff --git a/lightrag/kg/mongo_impl.py b/lightrag/kg/mongo_impl.py index 384bb604..8cfc84b9 100644 --- a/lightrag/kg/mongo_impl.py +++ b/lightrag/kg/mongo_impl.py @@ -117,6 +117,7 @@ class MongoKVStorage(BaseKVStorage): # Mongo handles persistence automatically pass + @final @dataclass class MongoDocStatusStorage(DocStatusStorage): diff --git a/lightrag/kg/oracle_impl.py b/lightrag/kg/oracle_impl.py index 16013fe8..63c43ce0 100644 --- a/lightrag/kg/oracle_impl.py +++ b/lightrag/kg/oracle_impl.py @@ -320,6 +320,7 @@ class OracleKVStorage(BaseKVStorage): # Oracle handles persistence automatically pass + @final @dataclass class OracleVectorDBStorage(BaseVectorStorage): diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 34c7c54d..16aee8b8 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -300,7 +300,8 @@ class PGKVStorage(BaseKVStorage): async def index_done_callback(self) -> None: # PG handles persistence automatically pass - + + @final @dataclass class PGVectorStorage(BaseVectorStorage): diff --git a/lightrag/kg/tidb_impl.py b/lightrag/kg/tidb_impl.py index 1cff0bd9..f0e5a45b 100644 --- a/lightrag/kg/tidb_impl.py +++ b/lightrag/kg/tidb_impl.py @@ -214,6 +214,7 @@ class TiDBKVStorage(BaseKVStorage): # Ti handles persistence automatically pass + @final @dataclass class TiDBVectorDBStorage(BaseVectorStorage): diff --git a/lightrag/operate.py b/lightrag/operate.py index 0a8003b8..2264e7f6 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -228,29 +228,25 @@ async def _merge_nodes_then_upsert( async def _merge_edges_then_upsert( src_id: str, tgt_id: str, - edges_data: list[dict], + edges_data: list[dict[str, Any]], knowledge_graph_inst: BaseGraphStorage, - global_config: dict, + global_config: dict[str, str], ): - already_weights = [] - already_source_ids = [] - already_description = [] - already_keywords = [] + already_weights: list[float] = [] + already_source_ids: list[str] = [] + already_description: list[str] = [] + already_keywords: list[str] = [] if await knowledge_graph_inst.has_edge(src_id, tgt_id): already_edge = await knowledge_graph_inst.get_edge(src_id, tgt_id) + # Handle the case where get_edge returns None or missing fields if not already_edge: return - # Handle the case where get_edge returns None or missing fields # Get weight with default 0.0 if missing - if "weight" in already_edge: - already_weights.append(already_edge["weight"]) - else: - logger.warning(f"Edge between {src_id} and {tgt_id} missing weight field") - already_weights.append(0.0) + already_weights.append(already_edge.get("weight", 0.0)) - # Get source_id with empty string default if missing or None + # Get source_id with empty string default if missing or None if "source_id" in already_edge and already_edge["source_id"] is not None: already_source_ids.extend( split_string_by_multi_markers( From ba65329898609849ded942c320873a51175422dc Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Mon, 17 Feb 2025 23:21:14 +0100 Subject: [PATCH 4/5] cleanup --- lightrag/operate.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index 2264e7f6..e872c57c 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -228,21 +228,21 @@ async def _merge_nodes_then_upsert( async def _merge_edges_then_upsert( src_id: str, tgt_id: str, - edges_data: list[dict[str, Any]], + edges_data: list[dict], knowledge_graph_inst: BaseGraphStorage, - global_config: dict[str, str], + global_config: dict, ): - already_weights: list[float] = [] - already_source_ids: list[str] = [] - already_description: list[str] = [] - already_keywords: list[str] = [] + already_weights = [] + already_source_ids = [] + already_description = [] + already_keywords = [] if await knowledge_graph_inst.has_edge(src_id, tgt_id): already_edge = await knowledge_graph_inst.get_edge(src_id, tgt_id) # Handle the case where get_edge returns None or missing fields if not already_edge: return - + # Get weight with default 0.0 if missing already_weights.append(already_edge.get("weight", 0.0)) @@ -254,11 +254,11 @@ async def _merge_edges_then_upsert( ) ) - # Get description with empty string default if missing or None + # Get description with empty string default if missing or None if "description" in already_edge and already_edge["description"] is not None: already_description.append(already_edge["description"]) - # Get keywords with empty string default if missing or None + # Get keywords with empty string default if missing or None if "keywords" in already_edge and already_edge["keywords"] is not None: already_keywords.extend( split_string_by_multi_markers( From 0ab8eeed14be98a3b739dc3768247cd281721234 Mon Sep 17 00:00:00 2001 From: Yannick Stephan Date: Mon, 17 Feb 2025 23:26:51 +0100 Subject: [PATCH 5/5] cleaned code --- lightrag/operate.py | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/lightrag/operate.py b/lightrag/operate.py index e872c57c..08fc8ca1 100644 --- a/lightrag/operate.py +++ b/lightrag/operate.py @@ -240,31 +240,29 @@ async def _merge_edges_then_upsert( if await knowledge_graph_inst.has_edge(src_id, tgt_id): already_edge = await knowledge_graph_inst.get_edge(src_id, tgt_id) # Handle the case where get_edge returns None or missing fields - if not already_edge: - return - - # Get weight with default 0.0 if missing - already_weights.append(already_edge.get("weight", 0.0)) + if already_edge: + # Get weight with default 0.0 if missing + already_weights.append(already_edge.get("weight", 0.0)) - # Get source_id with empty string default if missing or None - if "source_id" in already_edge and already_edge["source_id"] is not None: - already_source_ids.extend( - split_string_by_multi_markers( - already_edge["source_id"], [GRAPH_FIELD_SEP] + # Get source_id with empty string default if missing or None + if already_edge.get("source_id") is not None: + already_source_ids.extend( + split_string_by_multi_markers( + already_edge["source_id"], [GRAPH_FIELD_SEP] + ) ) - ) - # Get description with empty string default if missing or None - if "description" in already_edge and already_edge["description"] is not None: - already_description.append(already_edge["description"]) + # Get description with empty string default if missing or None + if already_edge.get("description") is not None: + already_description.append(already_edge["description"]) - # Get keywords with empty string default if missing or None - if "keywords" in already_edge and already_edge["keywords"] is not None: - already_keywords.extend( - split_string_by_multi_markers( - already_edge["keywords"], [GRAPH_FIELD_SEP] + # Get keywords with empty string default if missing or None + if already_edge.get("keywords") is not None: + already_keywords.extend( + split_string_by_multi_markers( + already_edge["keywords"], [GRAPH_FIELD_SEP] + ) ) - ) # Process edges_data with None checks weight = sum([dp["weight"] for dp in edges_data] + already_weights)