unify doc status retrieval with get_docs_by_status
This commit is contained in:
@@ -93,36 +93,14 @@ class JsonDocStatusStorage(DocStatusStorage):
|
||||
counts[doc["status"]] += 1
|
||||
return counts
|
||||
|
||||
async def get_failed_docs(self) -> dict[str, DocProcessingStatus]:
|
||||
"""Get all failed documents"""
|
||||
async def get_docs_by_status(
|
||||
self, status: DocStatus
|
||||
) -> dict[str, DocProcessingStatus]:
|
||||
"""all documents with a specific status"""
|
||||
return {
|
||||
k: DocProcessingStatus(**v)
|
||||
for k, v in self._data.items()
|
||||
if v["status"] == DocStatus.FAILED
|
||||
}
|
||||
|
||||
async def get_pending_docs(self) -> dict[str, DocProcessingStatus]:
|
||||
"""Get all pending documents"""
|
||||
return {
|
||||
k: DocProcessingStatus(**v)
|
||||
for k, v in self._data.items()
|
||||
if v["status"] == DocStatus.PENDING
|
||||
}
|
||||
|
||||
async def get_processed_docs(self) -> dict[str, DocProcessingStatus]:
|
||||
"""Get all processed documents"""
|
||||
return {
|
||||
k: DocProcessingStatus(**v)
|
||||
for k, v in self._data.items()
|
||||
if v["status"] == DocStatus.PROCESSED
|
||||
}
|
||||
|
||||
async def get_processing_docs(self) -> dict[str, DocProcessingStatus]:
|
||||
"""Get all processing documents"""
|
||||
return {
|
||||
k: DocProcessingStatus(**v)
|
||||
for k, v in self._data.items()
|
||||
if v["status"] == DocStatus.PROCESSING
|
||||
if v["status"] == status
|
||||
}
|
||||
|
||||
async def index_done_callback(self):
|
||||
|
@@ -175,7 +175,7 @@ class MongoDocStatusStorage(DocStatusStorage):
|
||||
async def get_docs_by_status(
|
||||
self, status: DocStatus
|
||||
) -> dict[str, DocProcessingStatus]:
|
||||
"""Get all documents by status"""
|
||||
"""Get all documents with a specific status"""
|
||||
cursor = self._data.find({"status": status.value})
|
||||
result = await cursor.to_list()
|
||||
return {
|
||||
@@ -191,22 +191,6 @@ class MongoDocStatusStorage(DocStatusStorage):
|
||||
for doc in result
|
||||
}
|
||||
|
||||
async def get_failed_docs(self) -> dict[str, DocProcessingStatus]:
|
||||
"""Get all failed documents"""
|
||||
return await self.get_docs_by_status(DocStatus.FAILED)
|
||||
|
||||
async def get_pending_docs(self) -> dict[str, DocProcessingStatus]:
|
||||
"""Get all pending documents"""
|
||||
return await self.get_docs_by_status(DocStatus.PENDING)
|
||||
|
||||
async def get_processing_docs(self) -> dict[str, DocProcessingStatus]:
|
||||
"""Get all processing documents"""
|
||||
return await self.get_docs_by_status(DocStatus.PROCESSING)
|
||||
|
||||
async def get_processed_docs(self) -> dict[str, DocProcessingStatus]:
|
||||
"""Get all procesed documents"""
|
||||
return await self.get_docs_by_status(DocStatus.PROCESSED)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MongoGraphStorage(BaseGraphStorage):
|
||||
|
@@ -468,7 +468,7 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||
async def get_docs_by_status(
|
||||
self, status: DocStatus
|
||||
) -> Dict[str, DocProcessingStatus]:
|
||||
"""Get all documents by status"""
|
||||
"""all documents with a specific status"""
|
||||
sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and status=$2"
|
||||
params = {"workspace": self.db.workspace, "status": status}
|
||||
result = await self.db.query(sql, params, True)
|
||||
@@ -485,22 +485,6 @@ class PGDocStatusStorage(DocStatusStorage):
|
||||
for element in result
|
||||
}
|
||||
|
||||
async def get_failed_docs(self) -> Dict[str, DocProcessingStatus]:
|
||||
"""Get all failed documents"""
|
||||
return await self.get_docs_by_status(DocStatus.FAILED)
|
||||
|
||||
async def get_pending_docs(self) -> Dict[str, DocProcessingStatus]:
|
||||
"""Get all pending documents"""
|
||||
return await self.get_docs_by_status(DocStatus.PENDING)
|
||||
|
||||
async def get_processing_docs(self) -> dict[str, DocProcessingStatus]:
|
||||
"""Get all processing documents"""
|
||||
return await self.get_docs_by_status(DocStatus.PROCESSING)
|
||||
|
||||
async def get_processed_docs(self) -> dict[str, DocProcessingStatus]:
|
||||
"""Get all procesed documents"""
|
||||
return await self.get_docs_by_status(DocStatus.PROCESSED)
|
||||
|
||||
async def index_done_callback(self):
|
||||
"""Save data after indexing, but for PostgreSQL, we already saved them during the upsert stage, so no action to take here"""
|
||||
logger.info("Doc status had been saved into postgresql db!")
|
||||
|
Reference in New Issue
Block a user