From 4c0352ee2b6dd89b34e39cd1e2483038c447ff60 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Mon, 11 Nov 2024 17:48:40 +0800
Subject: [PATCH 1/3] Add delete method
---
.gitignore | 1 +
README.md | 18 ++++++++++++++++
lightrag/__init__.py | 2 +-
lightrag/base.py | 2 +-
lightrag/lightrag.py | 30 +++++++++++++++++++++++++-
lightrag/storage.py | 51 +++++++++++++++++++++++++++++++++++++++++++-
6 files changed, 100 insertions(+), 4 deletions(-)
diff --git a/.gitignore b/.gitignore
index def738b2..b5c4bb11 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,3 +10,4 @@ local_neo4jWorkDir/
neo4jWorkDir/
ignore_this.txt
.venv/
+.ruff_cache/
\ No newline at end of file
diff --git a/README.md b/README.md
index 59245c44..d32458a8 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,7 @@ This repository hosts the code of LightRAG. The structure of this code is based
## 🎉 News
+- [x] [2024.11.11]🎯📢LightRAG now supports [deleting entities by their names](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#delete-entity).
- [x] [2024.11.09]🎯📢Now comes [LightRAG Gui](https://lightrag-gui.streamlit.app) that lets you insert, query, visualize, and download LightRAG knowledge.
- [x] [2024.11.04]🎯📢You can [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage) now.
- [x] [2024.10.29]🎯📢LightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`.
@@ -318,6 +319,23 @@ with open("./newText.txt") as f:
rag.insert(f.read())
```
+### Delete Entity
+
+```python
+# Delete Entity: Deleting entities by their names
+rag = LightRAG(
+ working_dir=WORKING_DIR,
+ llm_model_func=llm_model_func,
+ embedding_func=EmbeddingFunc(
+ embedding_dim=embedding_dimension,
+ max_token_size=8192,
+ func=embedding_func,
+ ),
+)
+
+rag.delete_by_entity("Project Gutenberg")
+```
+
### Multi-file Type Support
The `textract` supports reading file types such as TXT, DOCX, PPTX, CSV, and PDF.
diff --git a/lightrag/__init__.py b/lightrag/__init__.py
index b73db1b9..6d9003ff 100644
--- a/lightrag/__init__.py
+++ b/lightrag/__init__.py
@@ -1,5 +1,5 @@
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
-__version__ = "0.0.9"
+__version__ = "1.0.0"
__author__ = "Zirui Guo"
__url__ = "https://github.com/HKUDS/LightRAG"
diff --git a/lightrag/base.py b/lightrag/base.py
index cecd5edd..bd472570 100644
--- a/lightrag/base.py
+++ b/lightrag/base.py
@@ -116,7 +116,7 @@ class BaseGraphStorage(StorageNameSpace):
):
raise NotImplementedError
- async def clustering(self, algorithm: str):
+ async def delete_node(self, node_id: str):
raise NotImplementedError
async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 3abe9185..b4e4886d 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -188,7 +188,6 @@ class LightRAG:
return {
"Neo4JStorage": Neo4JStorage,
"NetworkXStorage": NetworkXStorage,
- # "ArangoDBStorage": ArangoDBStorage
}
def insert(self, string_or_strings):
@@ -328,3 +327,32 @@ class LightRAG:
continue
tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
await asyncio.gather(*tasks)
+
+ def delete_by_entity(self, entity_name: str):
+ loop = always_get_an_event_loop()
+ return loop.run_until_complete(self.adelete_by_entity(entity_name))
+
+ async def adelete_by_entity(self, entity_name: str):
+ entity_name = f"\"{entity_name.upper()}\""
+
+ try:
+ await self.entities_vdb.delete_entity(entity_name)
+ await self.relationships_vdb.delete_relation(entity_name)
+ await self.chunk_entity_relation_graph.delete_node(entity_name)
+
+ logger.info(f"Entity '{entity_name}' and its relationships have been deleted.")
+ await self._delete_by_entity_done()
+ except Exception as e:
+ logger.error(f"Error while deleting entity '{entity_name}': {e}")
+
+ async def _delete_by_entity_done(self):
+ tasks = []
+ for storage_inst in [
+ self.entities_vdb,
+ self.relationships_vdb,
+ self.chunk_entity_relation_graph,
+ ]:
+ if storage_inst is None:
+ continue
+ tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
+ await asyncio.gather(*tasks)
\ No newline at end of file
diff --git a/lightrag/storage.py b/lightrag/storage.py
index 61bebf2d..080562df 100644
--- a/lightrag/storage.py
+++ b/lightrag/storage.py
@@ -7,7 +7,13 @@ import networkx as nx
import numpy as np
from nano_vectordb import NanoVectorDB
-from .utils import load_json, logger, write_json
+from .utils import (
+ logger,
+ load_json,
+ write_json,
+ compute_mdhash_id,
+)
+
from .base import (
BaseGraphStorage,
BaseKVStorage,
@@ -110,6 +116,37 @@ class NanoVectorDBStorage(BaseVectorStorage):
{**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results
]
return results
+
+ @property
+ def client_storage(self):
+ return getattr(self._client, "_NanoVectorDB__storage")
+
+ async def delete_entity(self, entity_name: str):
+ try:
+ entity_id = [compute_mdhash_id(entity_name, prefix="ent-")]
+
+ if self._client.get(entity_id):
+ self._client.delete(entity_id)
+ logger.info(f"Entity {entity_name} have been deleted.")
+ else:
+ logger.info(f"No entity found with name {entity_name}.")
+ except Exception as e:
+ logger.error(f"Error while deleting entity {entity_name}: {e}")
+
+ async def delete_relation(self, entity_name: str):
+ try:
+ relations = [
+ dp for dp in self.client_storage["data"] if dp["src_id"] == entity_name or dp["tgt_id"] == entity_name
+ ]
+ ids_to_delete = [relation["__id__"] for relation in relations]
+
+ if ids_to_delete:
+ self._client.delete(ids_to_delete)
+ logger.info(f"All relations related to entity {entity_name} have been deleted.")
+ else:
+ logger.info(f"No relations found for entity {entity_name}.")
+ except Exception as e:
+ logger.error(f"Error while deleting relations for entity {entity_name}: {e}")
async def index_done_callback(self):
self._client.save()
@@ -228,6 +265,18 @@ class NetworkXStorage(BaseGraphStorage):
):
self._graph.add_edge(source_node_id, target_node_id, **edge_data)
+ async def delete_node(self, node_id: str):
+ """
+ Delete a node from the graph based on the specified node_id.
+
+ :param node_id: The node_id to delete
+ """
+ if self._graph.has_node(node_id):
+ self._graph.remove_node(node_id)
+ logger.info(f"Node {node_id} deleted from the graph.")
+ else:
+ logger.warning(f"Node {node_id} not found in the graph for deletion.")
+
async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
if algorithm not in self._node_embed_algorithms:
raise ValueError(f"Node embedding algorithm {algorithm} not supported")
From b49f73181c77c8a1fd5446677dd9b3b892979f38 Mon Sep 17 00:00:00 2001
From: LarFii <834462287@qq.com>
Date: Mon, 11 Nov 2024 17:54:22 +0800
Subject: [PATCH 2/3] update
---
.gitignore | 2 +-
lightrag/lightrag.py | 12 +++++++-----
lightrag/storage.py | 24 +++++++++++++++---------
3 files changed, 23 insertions(+), 15 deletions(-)
diff --git a/.gitignore b/.gitignore
index b5c4bb11..3a6c7858 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,4 @@ local_neo4jWorkDir/
neo4jWorkDir/
ignore_this.txt
.venv/
-.ruff_cache/
\ No newline at end of file
+.ruff_cache/
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index b4e4886d..8b299749 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -331,20 +331,22 @@ class LightRAG:
def delete_by_entity(self, entity_name: str):
loop = always_get_an_event_loop()
return loop.run_until_complete(self.adelete_by_entity(entity_name))
-
+
async def adelete_by_entity(self, entity_name: str):
- entity_name = f"\"{entity_name.upper()}\""
+ entity_name = f'"{entity_name.upper()}"'
try:
await self.entities_vdb.delete_entity(entity_name)
await self.relationships_vdb.delete_relation(entity_name)
await self.chunk_entity_relation_graph.delete_node(entity_name)
- logger.info(f"Entity '{entity_name}' and its relationships have been deleted.")
+ logger.info(
+ f"Entity '{entity_name}' and its relationships have been deleted."
+ )
await self._delete_by_entity_done()
except Exception as e:
logger.error(f"Error while deleting entity '{entity_name}': {e}")
-
+
async def _delete_by_entity_done(self):
tasks = []
for storage_inst in [
@@ -355,4 +357,4 @@ class LightRAG:
if storage_inst is None:
continue
tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
- await asyncio.gather(*tasks)
\ No newline at end of file
+ await asyncio.gather(*tasks)
diff --git a/lightrag/storage.py b/lightrag/storage.py
index 080562df..9a4c3d4c 100644
--- a/lightrag/storage.py
+++ b/lightrag/storage.py
@@ -8,8 +8,8 @@ import numpy as np
from nano_vectordb import NanoVectorDB
from .utils import (
- logger,
- load_json,
+ logger,
+ load_json,
write_json,
compute_mdhash_id,
)
@@ -116,7 +116,7 @@ class NanoVectorDBStorage(BaseVectorStorage):
{**dp, "id": dp["__id__"], "distance": dp["__metrics__"]} for dp in results
]
return results
-
+
@property
def client_storage(self):
return getattr(self._client, "_NanoVectorDB__storage")
@@ -124,7 +124,7 @@ class NanoVectorDBStorage(BaseVectorStorage):
async def delete_entity(self, entity_name: str):
try:
entity_id = [compute_mdhash_id(entity_name, prefix="ent-")]
-
+
if self._client.get(entity_id):
self._client.delete(entity_id)
logger.info(f"Entity {entity_name} have been deleted.")
@@ -132,21 +132,27 @@ class NanoVectorDBStorage(BaseVectorStorage):
logger.info(f"No entity found with name {entity_name}.")
except Exception as e:
logger.error(f"Error while deleting entity {entity_name}: {e}")
-
+
async def delete_relation(self, entity_name: str):
try:
relations = [
- dp for dp in self.client_storage["data"] if dp["src_id"] == entity_name or dp["tgt_id"] == entity_name
+ dp
+ for dp in self.client_storage["data"]
+ if dp["src_id"] == entity_name or dp["tgt_id"] == entity_name
]
ids_to_delete = [relation["__id__"] for relation in relations]
if ids_to_delete:
self._client.delete(ids_to_delete)
- logger.info(f"All relations related to entity {entity_name} have been deleted.")
+ logger.info(
+ f"All relations related to entity {entity_name} have been deleted."
+ )
else:
logger.info(f"No relations found for entity {entity_name}.")
except Exception as e:
- logger.error(f"Error while deleting relations for entity {entity_name}: {e}")
+ logger.error(
+ f"Error while deleting relations for entity {entity_name}: {e}"
+ )
async def index_done_callback(self):
self._client.save()
@@ -268,7 +274,7 @@ class NetworkXStorage(BaseGraphStorage):
async def delete_node(self, node_id: str):
"""
Delete a node from the graph based on the specified node_id.
-
+
:param node_id: The node_id to delete
"""
if self._graph.has_node(node_id):
From c3ffce47295b358354929a82954128293107352f Mon Sep 17 00:00:00 2001
From: zrguo <49157727+LarFii@users.noreply.github.com>
Date: Tue, 12 Nov 2024 07:51:21 +0800
Subject: [PATCH 3/3] Update Discord link
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index d32458a8..1e970c8c 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
-
+