Merge pull request #695 from ShanGor/main
Fix the bug from main stream that using doc['status'] and improve Apache AGE performance
This commit is contained in:
@@ -30,6 +30,7 @@ from ..base import (
|
||||
DocStatus,
|
||||
DocProcessingStatus,
|
||||
BaseGraphStorage,
|
||||
T,
|
||||
)
|
||||
|
||||
if sys.platform.startswith("win"):
|
||||
@@ -442,6 +443,22 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||
existed = set([element["id"] for element in result])
|
||||
return set(data) - existed
|
||||
|
||||
async def get_by_id(self, id: str) -> Union[T, None]:
|
||||
sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and id=$2"
|
||||
params = {"workspace": self.db.workspace, "id": id}
|
||||
result = await self.db.query(sql, params, True)
|
||||
if result is None:
|
||||
return None
|
||||
else:
|
||||
return DocProcessingStatus(
|
||||
content_length=result[0]["content_length"],
|
||||
content_summary=result[0]["content_summary"],
|
||||
status=result[0]["status"],
|
||||
chunks_count=result[0]["chunks_count"],
|
||||
created_at=result[0]["created_at"],
|
||||
updated_at=result[0]["updated_at"],
|
||||
)
|
||||
|
||||
async def get_status_counts(self) -> Dict[str, int]:
|
||||
"""Get counts of documents in each status"""
|
||||
sql = """SELECT status as "status", COUNT(1) as "count"
|
||||
@@ -884,9 +901,9 @@ class PGGraphStorage(BaseGraphStorage):
|
||||
|
||||
query = """SELECT * FROM cypher('%s', $$
|
||||
MATCH (n:Entity {node_id: "%s"})
|
||||
OPTIONAL MATCH (n)-[r]-(connected)
|
||||
RETURN n, r, connected
|
||||
$$) AS (n agtype, r agtype, connected agtype)""" % (
|
||||
OPTIONAL MATCH (n)-[]-(connected)
|
||||
RETURN n, connected
|
||||
$$) AS (n agtype, connected agtype)""" % (
|
||||
self.graph_name,
|
||||
label,
|
||||
)
|
||||
|
@@ -373,7 +373,7 @@ class LightRAG:
|
||||
doc_id
|
||||
for doc_id in new_docs.keys()
|
||||
if (current_doc := await self.doc_status.get_by_id(doc_id)) is None
|
||||
or current_doc["status"] == DocStatus.FAILED
|
||||
or current_doc.status == DocStatus.FAILED
|
||||
}
|
||||
new_docs = {k: v for k, v in new_docs.items() if k in _add_doc_keys}
|
||||
|
||||
|
Reference in New Issue
Block a user