From f776db07799abc4897e2c7122dcc67c6aaf67e0f Mon Sep 17 00:00:00 2001 From: yangdx Date: Thu, 20 Feb 2025 04:09:02 +0800 Subject: [PATCH] Improved document status retrieval with content fallback. - Added content fallback to content_summary - Handled missing fields gracefully - Made data copy to avoid modification - Added error logging for missing fields - Improved code readability and robustness --- lightrag/kg/json_doc_status_impl.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/lightrag/kg/json_doc_status_impl.py b/lightrag/kg/json_doc_status_impl.py index 1a05abc2..76b7158b 100644 --- a/lightrag/kg/json_doc_status_impl.py +++ b/lightrag/kg/json_doc_status_impl.py @@ -48,11 +48,20 @@ class JsonDocStatusStorage(DocStatusStorage): self, status: DocStatus ) -> dict[str, DocProcessingStatus]: """Get all documents with a specific status""" - return { - k: DocProcessingStatus(**v) - for k, v in self._data.items() - if v["status"] == status.value - } + result = {} + for k, v in self._data.items(): + if v["status"] == status.value: + try: + # Make a copy of the data to avoid modifying the original + data = v.copy() + # If content is missing, use content_summary as content + if "content" not in data and "content_summary" in data: + data["content"] = data["content_summary"] + result[k] = DocProcessingStatus(**data) + except KeyError as e: + logger.error(f"Missing required field for document {k}: {e}") + continue + return result async def index_done_callback(self) -> None: write_json(self._data, self._file_name)