修复断点续传bug,过滤已经存在的文档是只过滤状态是已经处理完成的
This commit is contained in:
@@ -341,8 +341,14 @@ class JsonDocStatusStorage(DocStatusStorage):
|
|||||||
logger.info(f"Loaded document status storage with {len(self._data)} records")
|
logger.info(f"Loaded document status storage with {len(self._data)} records")
|
||||||
|
|
||||||
async def filter_keys(self, data: list[str]) -> set[str]:
|
async def filter_keys(self, data: list[str]) -> set[str]:
|
||||||
"""Return keys that don't exist in storage"""
|
"""Return keys that should be processed (not in storage or not successfully processed)"""
|
||||||
return set([k for k in data if k not in self._data])
|
return set(
|
||||||
|
[
|
||||||
|
k
|
||||||
|
for k in data
|
||||||
|
if k not in self._data or self._data[k]["status"] != DocStatus.PROCESSED
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
async def get_status_counts(self) -> Dict[str, int]:
|
async def get_status_counts(self) -> Dict[str, int]:
|
||||||
"""Get counts of documents in each status"""
|
"""Get counts of documents in each status"""
|
||||||
|
Reference in New Issue
Block a user