From f11900451a976b6822d7d31acf507b6218d35037 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Tue, 18 Feb 2025 09:10:50 +0100 Subject: [PATCH 01/14] Addded drop to postgresql --- lightrag/kg/postgres_impl.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 16aee8b8..c4f13c62 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -301,7 +301,11 @@ class PGKVStorage(BaseKVStorage): # PG handles persistence automatically pass - + async def drop(self) -> None: + """Drop the storage""" + drop_sql = SQL_TEMPLATES["DROP_ALL"] + await self.db.execute(drop_sql) + @final @dataclass class PGVectorStorage(BaseVectorStorage): @@ -1194,4 +1198,12 @@ SQL_TEMPLATES = { FROM LIGHTRAG_DOC_CHUNKS where workspace=$1) WHERE distance>$2 ORDER BY distance DESC LIMIT $3 """, + #DROP everything + "drop": """ + DROP TABLE IF EXISTS LIGHTRAG_DOC_FULL CASCADE; + DROP TABLE IF EXISTS LIGHTRAG_DOC_CHUNKS CASCADE; + DROP TABLE IF EXISTS LIGHTRAG_LLM_CACHE CASCADE; + DROP TABLE IF EXISTS LIGHTRAG_VDB_ENTITY CASCADE; + DROP TABLE IF EXISTS LIGHTRAG_VDB_RELATION CASCADE; + """, } From 6a0366cb24e7e4f60b0abfa8760d43c4842578a4 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Tue, 18 Feb 2025 09:39:10 +0100 Subject: [PATCH 02/14] Fixed indentation bug --- lightrag/kg/postgres_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index c4f13c62..77bb170a 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -303,7 +303,7 @@ class PGKVStorage(BaseKVStorage): async def drop(self) -> None: """Drop the storage""" - drop_sql = SQL_TEMPLATES["DROP_ALL"] + drop_sql = SQL_TEMPLATES["DROP_ALL"] await self.db.execute(drop_sql) @final From f02c881fd3f0bed68ba5b0c8e72832cad8463d6c Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Tue, 18 Feb 2025 09:57:10 +0100 Subject: [PATCH 03/14] Added drop for Doc status --- lightrag/kg/postgres_impl.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 77bb170a..5e99ea4b 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -303,7 +303,7 @@ class PGKVStorage(BaseKVStorage): async def drop(self) -> None: """Drop the storage""" - drop_sql = SQL_TEMPLATES["DROP_ALL"] + drop_sql = SQL_TEMPLATES["drop_all"] await self.db.execute(drop_sql) @final @@ -534,7 +534,10 @@ class PGDocStatusStorage(DocStatusStorage): }, ) return data - + async def drop(self) -> None: + """Drop the storage""" + drop_sql = SQL_TEMPLATES["drop_doc_full"] + await self.db.execute(drop_sql) class PGGraphQueryException(Exception): """Exception for the AGE queries.""" @@ -1198,12 +1201,28 @@ SQL_TEMPLATES = { FROM LIGHTRAG_DOC_CHUNKS where workspace=$1) WHERE distance>$2 ORDER BY distance DESC LIMIT $3 """, - #DROP everything - "drop": """ + # DROP tables + "drop_all": """ DROP TABLE IF EXISTS LIGHTRAG_DOC_FULL CASCADE; DROP TABLE IF EXISTS LIGHTRAG_DOC_CHUNKS CASCADE; DROP TABLE IF EXISTS LIGHTRAG_LLM_CACHE CASCADE; DROP TABLE IF EXISTS LIGHTRAG_VDB_ENTITY CASCADE; DROP TABLE IF EXISTS LIGHTRAG_VDB_RELATION CASCADE; """, + "drop_doc_full": """ + DROP TABLE IF EXISTS LIGHTRAG_DOC_FULL CASCADE; + """, + "drop_doc_chunks": """ + DROP TABLE IF EXISTS LIGHTRAG_DOC_CHUNKS CASCADE; + """, + "drop_llm_cache": """ + DROP TABLE IF EXISTS LIGHTRAG_LLM_CACHE CASCADE; + """, + "drop_vdb_entity": """ + DROP TABLE IF EXISTS LIGHTRAG_VDB_ENTITY CASCADE; + """, + "drop_vdb_relation": """ + DROP TABLE IF EXISTS LIGHTRAG_VDB_RELATION CASCADE; + """, + } From 8f6f4c249a86891d94dcff4fa756b14c1f391d08 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Tue, 18 Feb 2025 10:01:21 +0100 Subject: [PATCH 04/14] Added more drops --- lightrag/kg/postgres_impl.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 5e99ea4b..b2ee6013 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -1018,7 +1018,13 @@ class PGGraphStorage(BaseGraphStorage): self, node_label: str, max_depth: int = 5 ) -> KnowledgeGraph: raise NotImplementedError - + + async def drop(self) -> None: + """Drop the storage""" + drop_sql = SQL_TEMPLATES["drop_vdb_entity"] + await self.db.execute(drop_sql) + drop_sql = SQL_TEMPLATES["drop_vdb_relation"] + await self.db.execute(drop_sql) NAMESPACE_TABLE_MAP = { NameSpace.KV_STORE_FULL_DOCS: "LIGHTRAG_DOC_FULL", From becf76a528b72c6540e82d858df2960bfb9123bb Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Tue, 18 Feb 2025 10:07:57 +0100 Subject: [PATCH 05/14] Debuggging --- lightrag/kg/postgres_impl.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index b2ee6013..3816b360 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -263,8 +263,10 @@ class PGKVStorage(BaseKVStorage): exist_keys = [key["id"] for key in res] else: exist_keys = [] - data = set([s for s in keys if s not in exist_keys]) - return data + new_keys = set([s for s in keys if s not in exist_keys]) + print(f"keys: {keys}") + print(f"new_keys: {new_keys}") + return new_keys except Exception as e: logger.error(f"PostgreSQL database error: {e}") print(sql) From 941c89521c6e0b426a3b9d71b46d73e4eb81ffe0 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Tue, 18 Feb 2025 10:12:08 +0100 Subject: [PATCH 06/14] Debug --- lightrag/kg/postgres_impl.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 3816b360..13f7d5d1 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -438,16 +438,26 @@ class PGVectorStorage(BaseVectorStorage): @dataclass class PGDocStatusStorage(DocStatusStorage): async def filter_keys(self, keys: set[str]) -> set[str]: - """Return keys that don't exist in storage""" - keys = ",".join([f"'{_id}'" for _id in keys]) - sql = f"SELECT id FROM LIGHTRAG_DOC_STATUS WHERE workspace='{self.db.workspace}' AND id IN ({keys})" - result = await self.db.query(sql, multirows=True) - # The result is like [{'id': 'id1'}, {'id': 'id2'}, ...]. - if result is None: - return set(keys) - else: - existed = set([element["id"] for element in result]) - return set(keys) - existed + """Filter out duplicated content""" + sql = SQL_TEMPLATES["filter_keys"].format( + table_name=namespace_to_table_name(self.namespace), + ids=",".join([f"'{id}'" for id in keys]), + ) + params = {"workspace": self.db.workspace} + try: + res = await self.db.query(sql, params, multirows=True) + if res: + exist_keys = [key["id"] for key in res] + else: + exist_keys = [] + new_keys = set([s for s in keys if s not in exist_keys]) + print(f"keys: {keys}") + print(f"new_keys: {new_keys}") + return new_keys + except Exception as e: + logger.error(f"PostgreSQL database error: {e}") + print(sql) + print(params) async def get_by_id(self, id: str) -> Union[dict[str, Any], None]: sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and id=$2" From a15d164d87fa2e201900f81cbcf69a549641cb73 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Tue, 18 Feb 2025 10:16:00 +0100 Subject: [PATCH 07/14] Update postgres_impl.py --- lightrag/kg/postgres_impl.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 13f7d5d1..199a8440 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -499,7 +499,8 @@ class PGDocStatusStorage(DocStatusStorage): sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and status=$2" params = {"workspace": self.db.workspace, "status": status.value} result = await self.db.query(sql, params, True) - return { + print("") + docs_by_status = { element["id"]: DocProcessingStatus( content=result[0]["content"], content_summary=element["content_summary"], @@ -511,6 +512,8 @@ class PGDocStatusStorage(DocStatusStorage): ) for element in result } + print(f"Docs by status: {docs_by_status}") + return docs_by_status async def index_done_callback(self) -> None: # PG handles persistence automatically From 20909e495b02e892ab7bc32ab576ed322904e547 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Tue, 18 Feb 2025 10:22:16 +0100 Subject: [PATCH 08/14] Added drop --- lightrag/kg/json_doc_status_impl.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lightrag/kg/json_doc_status_impl.py b/lightrag/kg/json_doc_status_impl.py index e69352f3..871e7121 100644 --- a/lightrag/kg/json_doc_status_impl.py +++ b/lightrag/kg/json_doc_status_impl.py @@ -68,3 +68,8 @@ class JsonDocStatusStorage(DocStatusStorage): for doc_id in doc_ids: self._data.pop(doc_id, None) await self.index_done_callback() + + async def drop(self) -> None: + """Drop the storage""" + self._data.clear() + From 9b9f98916089e6e6585551c34d0d923e02c400e8 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Tue, 18 Feb 2025 10:24:19 +0100 Subject: [PATCH 09/14] Update postgres_impl.py --- lightrag/kg/postgres_impl.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 199a8440..eb7fd2a0 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -307,7 +307,8 @@ class PGKVStorage(BaseKVStorage): """Drop the storage""" drop_sql = SQL_TEMPLATES["drop_all"] await self.db.execute(drop_sql) - + + @final @dataclass class PGVectorStorage(BaseVectorStorage): @@ -549,11 +550,13 @@ class PGDocStatusStorage(DocStatusStorage): }, ) return data + async def drop(self) -> None: """Drop the storage""" drop_sql = SQL_TEMPLATES["drop_doc_full"] await self.db.execute(drop_sql) + class PGGraphQueryException(Exception): """Exception for the AGE queries.""" @@ -1033,7 +1036,7 @@ class PGGraphStorage(BaseGraphStorage): self, node_label: str, max_depth: int = 5 ) -> KnowledgeGraph: raise NotImplementedError - + async def drop(self) -> None: """Drop the storage""" drop_sql = SQL_TEMPLATES["drop_vdb_entity"] @@ -1041,6 +1044,7 @@ class PGGraphStorage(BaseGraphStorage): drop_sql = SQL_TEMPLATES["drop_vdb_relation"] await self.db.execute(drop_sql) + NAMESPACE_TABLE_MAP = { NameSpace.KV_STORE_FULL_DOCS: "LIGHTRAG_DOC_FULL", NameSpace.KV_STORE_TEXT_CHUNKS: "LIGHTRAG_DOC_CHUNKS", @@ -1245,5 +1249,4 @@ SQL_TEMPLATES = { "drop_vdb_relation": """ DROP TABLE IF EXISTS LIGHTRAG_VDB_RELATION CASCADE; """, - } From 1fe47e5ef14184b456e4bf117871b3857be111da Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Tue, 18 Feb 2025 10:24:54 +0100 Subject: [PATCH 10/14] Update json_doc_status_impl.py --- lightrag/kg/json_doc_status_impl.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lightrag/kg/json_doc_status_impl.py b/lightrag/kg/json_doc_status_impl.py index 871e7121..1a05abc2 100644 --- a/lightrag/kg/json_doc_status_impl.py +++ b/lightrag/kg/json_doc_status_impl.py @@ -68,8 +68,7 @@ class JsonDocStatusStorage(DocStatusStorage): for doc_id in doc_ids: self._data.pop(doc_id, None) await self.index_done_callback() - + async def drop(self) -> None: """Drop the storage""" self._data.clear() - From fec78894583d0d5cbcddd1c4646789893a728109 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Tue, 18 Feb 2025 10:27:38 +0100 Subject: [PATCH 11/14] Update postgres_impl.py --- lightrag/kg/postgres_impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index eb7fd2a0..35f0ed2e 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -507,8 +507,8 @@ class PGDocStatusStorage(DocStatusStorage): content_summary=element["content_summary"], content_length=element["content_length"], status=element["status"], - created_at=element["created_at"], - updated_at=element["updated_at"], + created_at=str(element["created_at"]), + updated_at=str(element["updated_at"]), chunks_count=element["chunks_count"], ) for element in result From b4e0c476074d55056b1872e65b301725847658bf Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Tue, 18 Feb 2025 10:29:07 +0100 Subject: [PATCH 12/14] Update postgres_impl.py --- lightrag/kg/postgres_impl.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index 35f0ed2e..cdb4748e 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -264,8 +264,6 @@ class PGKVStorage(BaseKVStorage): else: exist_keys = [] new_keys = set([s for s in keys if s not in exist_keys]) - print(f"keys: {keys}") - print(f"new_keys: {new_keys}") return new_keys except Exception as e: logger.error(f"PostgreSQL database error: {e}") @@ -500,7 +498,6 @@ class PGDocStatusStorage(DocStatusStorage): sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and status=$2" params = {"workspace": self.db.workspace, "status": status.value} result = await self.db.query(sql, params, True) - print("") docs_by_status = { element["id"]: DocProcessingStatus( content=result[0]["content"], @@ -513,7 +510,6 @@ class PGDocStatusStorage(DocStatusStorage): ) for element in result } - print(f"Docs by status: {docs_by_status}") return docs_by_status async def index_done_callback(self) -> None: From 8d043c599d5acf69ba1f7be6e39430a458dde6f6 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Tue, 18 Feb 2025 15:50:00 +0100 Subject: [PATCH 13/14] Update LightRagWithPostGRESQL.md --- lightrag/api/docs/LightRagWithPostGRESQL.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lightrag/api/docs/LightRagWithPostGRESQL.md b/lightrag/api/docs/LightRagWithPostGRESQL.md index 30dce42f..cd00c252 100644 --- a/lightrag/api/docs/LightRagWithPostGRESQL.md +++ b/lightrag/api/docs/LightRagWithPostGRESQL.md @@ -130,7 +130,7 @@ Replace placeholders like `your_role_name`, `your_password`, and `your_database` Start the LightRAG server using specified options: ```bash -lightrag-server --port 9626 --key sk-SL1 --kv-storage PGKVStorage --graph-storage PGGraphStorage --vector-storage PGVectorStorage --doc-status-storage PGDocStatusStorage +lightrag-server --port 9621 --key sk-somepassword --kv-storage PGKVStorage --graph-storage PGGraphStorage --vector-storage PGVectorStorage --doc-status-storage PGDocStatusStorage ``` Replace `the-port-number` with your desired port number (default is 9621) and `your-secret-key` with a secure key. From f7ef4c7ee89a25af2dde21182c1babc1293f6ebe Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Tue, 18 Feb 2025 16:10:26 +0100 Subject: [PATCH 14/14] Update postgres_impl.py --- lightrag/kg/postgres_impl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lightrag/kg/postgres_impl.py b/lightrag/kg/postgres_impl.py index cdb4748e..b5e3e1e3 100644 --- a/lightrag/kg/postgres_impl.py +++ b/lightrag/kg/postgres_impl.py @@ -504,8 +504,8 @@ class PGDocStatusStorage(DocStatusStorage): content_summary=element["content_summary"], content_length=element["content_length"], status=element["status"], - created_at=str(element["created_at"]), - updated_at=str(element["updated_at"]), + created_at=element["created_at"], + updated_at=element["updated_at"], chunks_count=element["chunks_count"], ) for element in result