add Oracle support
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -11,3 +11,4 @@ neo4jWorkDir/
|
|||||||
ignore_this.txt
|
ignore_this.txt
|
||||||
.venv/
|
.venv/
|
||||||
*.ignore.*
|
*.ignore.*
|
||||||
|
.ruff_cache/
|
||||||
|
22
README.md
22
README.md
@@ -8,7 +8,7 @@
|
|||||||
<a href='https://lightrag.github.io'><img src='https://img.shields.io/badge/Project-Page-Green'></a>
|
<a href='https://lightrag.github.io'><img src='https://img.shields.io/badge/Project-Page-Green'></a>
|
||||||
<a href='https://youtu.be/oageL-1I0GE'><img src='https://badges.aleen42.com/src/youtube.svg'></a>
|
<a href='https://youtu.be/oageL-1I0GE'><img src='https://badges.aleen42.com/src/youtube.svg'></a>
|
||||||
<a href='https://arxiv.org/abs/2410.05779'><img src='https://img.shields.io/badge/arXiv-2410.05779-b31b1b'></a>
|
<a href='https://arxiv.org/abs/2410.05779'><img src='https://img.shields.io/badge/arXiv-2410.05779-b31b1b'></a>
|
||||||
<a href='https://discord.gg/rdE8YVPm'><img src='https://discordapp.com/api/guilds/1296348098003734629/widget.png?style=shield'></a>
|
<a href='https://discord.gg/yF2MmDJyGJ'><img src='https://discordapp.com/api/guilds/1296348098003734629/widget.png?style=shield'></a>
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
<img src='https://img.shields.io/github/stars/hkuds/lightrag?color=green&style=social' />
|
<img src='https://img.shields.io/github/stars/hkuds/lightrag?color=green&style=social' />
|
||||||
@@ -22,7 +22,8 @@ This repository hosts the code of LightRAG. The structure of this code is based
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
## 🎉 News
|
## 🎉 News
|
||||||
- [x] [2024.11.11]🎯📢You can [use Oracle Database 23ai for all storage types (kv/vector/graph)](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_oracle_demo.py) now.
|
- [x] [2024.11.12]🎯📢You can [use Oracle Database 23ai for all storage types (kv/vector/graph)](https://github.com/HKUDS/LightRAG/blob/main/examples/lightrag_oracle_demo.py) now.
|
||||||
|
- [x] [2024.11.11]🎯📢LightRAG now supports [deleting entities by their names](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#delete-entity).
|
||||||
- [x] [2024.11.09]🎯📢Now comes [LightRAG Gui](https://lightrag-gui.streamlit.app) that lets you insert, query, visualize, and download LightRAG knowledge.
|
- [x] [2024.11.09]🎯📢Now comes [LightRAG Gui](https://lightrag-gui.streamlit.app) that lets you insert, query, visualize, and download LightRAG knowledge.
|
||||||
- [x] [2024.11.04]🎯📢You can [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage) now.
|
- [x] [2024.11.04]🎯📢You can [use Neo4J for Storage](https://github.com/HKUDS/LightRAG?tab=readme-ov-file#using-neo4j-for-storage) now.
|
||||||
- [x] [2024.10.29]🎯📢LightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`.
|
- [x] [2024.10.29]🎯📢LightRAG now supports multiple file types, including PDF, DOC, PPT, and CSV via `textract`.
|
||||||
@@ -319,6 +320,23 @@ with open("./newText.txt") as f:
|
|||||||
rag.insert(f.read())
|
rag.insert(f.read())
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Delete Entity
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Delete Entity: Deleting entities by their names
|
||||||
|
rag = LightRAG(
|
||||||
|
working_dir=WORKING_DIR,
|
||||||
|
llm_model_func=llm_model_func,
|
||||||
|
embedding_func=EmbeddingFunc(
|
||||||
|
embedding_dim=embedding_dimension,
|
||||||
|
max_token_size=8192,
|
||||||
|
func=embedding_func,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
rag.delete_by_entity("Project Gutenberg")
|
||||||
|
```
|
||||||
|
|
||||||
### Multi-file Type Support
|
### Multi-file Type Support
|
||||||
|
|
||||||
The `textract` supports reading file types such as TXT, DOCX, PPTX, CSV, and PDF.
|
The `textract` supports reading file types such as TXT, DOCX, PPTX, CSV, and PDF.
|
||||||
|
@@ -1,5 +1,5 @@
|
|||||||
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
|
from .lightrag import LightRAG as LightRAG, QueryParam as QueryParam
|
||||||
|
|
||||||
__version__ = "0.0.9"
|
__version__ = "1.0.0"
|
||||||
__author__ = "Zirui Guo"
|
__author__ = "Zirui Guo"
|
||||||
__url__ = "https://github.com/HKUDS/LightRAG"
|
__url__ = "https://github.com/HKUDS/LightRAG"
|
||||||
|
@@ -118,7 +118,7 @@ class BaseGraphStorage(StorageNameSpace):
|
|||||||
):
|
):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
async def clustering(self, algorithm: str):
|
async def delete_node(self, node_id: str):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
|
async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
|
||||||
|
@@ -592,7 +592,9 @@ TABLES = {
|
|||||||
workspace varchar(1024),
|
workspace varchar(1024),
|
||||||
doc_name varchar(1024),
|
doc_name varchar(1024),
|
||||||
content CLOB,
|
content CLOB,
|
||||||
meta JSON
|
meta JSON,
|
||||||
|
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
updatetime TIMESTAMP DEFAULT NULL
|
||||||
)"""},
|
)"""},
|
||||||
|
|
||||||
"LIGHTRAG_DOC_CHUNKS":
|
"LIGHTRAG_DOC_CHUNKS":
|
||||||
@@ -603,7 +605,9 @@ TABLES = {
|
|||||||
chunk_order_index NUMBER,
|
chunk_order_index NUMBER,
|
||||||
tokens NUMBER,
|
tokens NUMBER,
|
||||||
content CLOB,
|
content CLOB,
|
||||||
content_vector VECTOR
|
content_vector VECTOR,
|
||||||
|
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
updatetime TIMESTAMP DEFAULT NULL
|
||||||
)"""},
|
)"""},
|
||||||
|
|
||||||
"LIGHTRAG_GRAPH_NODES":
|
"LIGHTRAG_GRAPH_NODES":
|
||||||
@@ -615,7 +619,9 @@ TABLES = {
|
|||||||
description CLOB,
|
description CLOB,
|
||||||
source_chunk_id varchar(256),
|
source_chunk_id varchar(256),
|
||||||
content CLOB,
|
content CLOB,
|
||||||
content_vector VECTOR
|
content_vector VECTOR,
|
||||||
|
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
updatetime TIMESTAMP DEFAULT NULL
|
||||||
)"""},
|
)"""},
|
||||||
"LIGHTRAG_GRAPH_EDGES":
|
"LIGHTRAG_GRAPH_EDGES":
|
||||||
{"ddl":"""CREATE TABLE LIGHTRAG_GRAPH_EDGES (
|
{"ddl":"""CREATE TABLE LIGHTRAG_GRAPH_EDGES (
|
||||||
@@ -628,13 +634,18 @@ TABLES = {
|
|||||||
description CLOB,
|
description CLOB,
|
||||||
source_chunk_id varchar(256),
|
source_chunk_id varchar(256),
|
||||||
content CLOB,
|
content CLOB,
|
||||||
content_vector VECTOR
|
content_vector VECTOR,
|
||||||
|
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
updatetime TIMESTAMP DEFAULT NULL
|
||||||
)"""},
|
)"""},
|
||||||
"LIGHTRAG_LLM_CACHE":
|
"LIGHTRAG_LLM_CACHE":
|
||||||
{"ddl":"""CREATE TABLE LIGHTRAG_LLM_CACHE (
|
{"ddl":"""CREATE TABLE LIGHTRAG_LLM_CACHE (
|
||||||
id varchar(256) PRIMARY KEY,
|
id varchar(256) PRIMARY KEY,
|
||||||
|
send clob,
|
||||||
return clob,
|
return clob,
|
||||||
model varchar(1024)
|
model varchar(1024),
|
||||||
|
createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
updatetime TIMESTAMP DEFAULT NULL
|
||||||
)"""},
|
)"""},
|
||||||
|
|
||||||
"LIGHTRAG_GRAPH":
|
"LIGHTRAG_GRAPH":
|
||||||
|
@@ -351,3 +351,34 @@ class LightRAG:
|
|||||||
continue
|
continue
|
||||||
tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
|
tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
|
||||||
await asyncio.gather(*tasks)
|
await asyncio.gather(*tasks)
|
||||||
|
|
||||||
|
def delete_by_entity(self, entity_name: str):
|
||||||
|
loop = always_get_an_event_loop()
|
||||||
|
return loop.run_until_complete(self.adelete_by_entity(entity_name))
|
||||||
|
|
||||||
|
async def adelete_by_entity(self, entity_name: str):
|
||||||
|
entity_name = f'"{entity_name.upper()}"'
|
||||||
|
|
||||||
|
try:
|
||||||
|
await self.entities_vdb.delete_entity(entity_name)
|
||||||
|
await self.relationships_vdb.delete_relation(entity_name)
|
||||||
|
await self.chunk_entity_relation_graph.delete_node(entity_name)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Entity '{entity_name}' and its relationships have been deleted."
|
||||||
|
)
|
||||||
|
await self._delete_by_entity_done()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error while deleting entity '{entity_name}': {e}")
|
||||||
|
|
||||||
|
async def _delete_by_entity_done(self):
|
||||||
|
tasks = []
|
||||||
|
for storage_inst in [
|
||||||
|
self.entities_vdb,
|
||||||
|
self.relationships_vdb,
|
||||||
|
self.chunk_entity_relation_graph,
|
||||||
|
]:
|
||||||
|
if storage_inst is None:
|
||||||
|
continue
|
||||||
|
tasks.append(cast(StorageNameSpace, storage_inst).index_done_callback())
|
||||||
|
await asyncio.gather(*tasks)
|
||||||
|
@@ -7,7 +7,13 @@ import networkx as nx
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from nano_vectordb import NanoVectorDB
|
from nano_vectordb import NanoVectorDB
|
||||||
|
|
||||||
from .utils import load_json, logger, write_json
|
from .utils import (
|
||||||
|
logger,
|
||||||
|
load_json,
|
||||||
|
write_json,
|
||||||
|
compute_mdhash_id,
|
||||||
|
)
|
||||||
|
|
||||||
from .base import (
|
from .base import (
|
||||||
BaseGraphStorage,
|
BaseGraphStorage,
|
||||||
BaseKVStorage,
|
BaseKVStorage,
|
||||||
@@ -111,6 +117,43 @@ class NanoVectorDBStorage(BaseVectorStorage):
|
|||||||
]
|
]
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
@property
|
||||||
|
def client_storage(self):
|
||||||
|
return getattr(self._client, "_NanoVectorDB__storage")
|
||||||
|
|
||||||
|
async def delete_entity(self, entity_name: str):
|
||||||
|
try:
|
||||||
|
entity_id = [compute_mdhash_id(entity_name, prefix="ent-")]
|
||||||
|
|
||||||
|
if self._client.get(entity_id):
|
||||||
|
self._client.delete(entity_id)
|
||||||
|
logger.info(f"Entity {entity_name} have been deleted.")
|
||||||
|
else:
|
||||||
|
logger.info(f"No entity found with name {entity_name}.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error while deleting entity {entity_name}: {e}")
|
||||||
|
|
||||||
|
async def delete_relation(self, entity_name: str):
|
||||||
|
try:
|
||||||
|
relations = [
|
||||||
|
dp
|
||||||
|
for dp in self.client_storage["data"]
|
||||||
|
if dp["src_id"] == entity_name or dp["tgt_id"] == entity_name
|
||||||
|
]
|
||||||
|
ids_to_delete = [relation["__id__"] for relation in relations]
|
||||||
|
|
||||||
|
if ids_to_delete:
|
||||||
|
self._client.delete(ids_to_delete)
|
||||||
|
logger.info(
|
||||||
|
f"All relations related to entity {entity_name} have been deleted."
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
logger.info(f"No relations found for entity {entity_name}.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
f"Error while deleting relations for entity {entity_name}: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
async def index_done_callback(self):
|
async def index_done_callback(self):
|
||||||
self._client.save()
|
self._client.save()
|
||||||
|
|
||||||
@@ -228,6 +271,18 @@ class NetworkXStorage(BaseGraphStorage):
|
|||||||
):
|
):
|
||||||
self._graph.add_edge(source_node_id, target_node_id, **edge_data)
|
self._graph.add_edge(source_node_id, target_node_id, **edge_data)
|
||||||
|
|
||||||
|
async def delete_node(self, node_id: str):
|
||||||
|
"""
|
||||||
|
Delete a node from the graph based on the specified node_id.
|
||||||
|
|
||||||
|
:param node_id: The node_id to delete
|
||||||
|
"""
|
||||||
|
if self._graph.has_node(node_id):
|
||||||
|
self._graph.remove_node(node_id)
|
||||||
|
logger.info(f"Node {node_id} deleted from the graph.")
|
||||||
|
else:
|
||||||
|
logger.warning(f"Node {node_id} not found in the graph for deletion.")
|
||||||
|
|
||||||
async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
|
async def embed_nodes(self, algorithm: str) -> tuple[np.ndarray, list[str]]:
|
||||||
if algorithm not in self._node_embed_algorithms:
|
if algorithm not in self._node_embed_algorithms:
|
||||||
raise ValueError(f"Node embedding algorithm {algorithm} not supported")
|
raise ValueError(f"Node embedding algorithm {algorithm} not supported")
|
||||||
|
Reference in New Issue
Block a user