Improved document status retrieval with content fallback.

- Added content fallback to content_summary - Handled missing fields gracefully - Made data copy to avoid modification - Added error logging for missing fields - Improved code readability and robustness
2025-02-20 04:09:02 +08:00
parent 3c080a9ebf
commit f776db0779
1 changed files with 14 additions and 5 deletions
--- a/lightrag/kg/json_doc_status_impl.py
+++ b/lightrag/kg/json_doc_status_impl.py
@@ -48,11 +48,20 @@ class JsonDocStatusStorage(DocStatusStorage):
        self, status: DocStatus
    ) -> dict[str, DocProcessingStatus]:
        """Get all documents with a specific status"""
-        return {
-            k: DocProcessingStatus(**v)
-            for k, v in self._data.items()
-            if v["status"] == status.value
-        }
+        result = {}
+        for k, v in self._data.items():
+            if v["status"] == status.value:
+                try:
+                    # Make a copy of the data to avoid modifying the original
+                    data = v.copy()
+                    # If content is missing, use content_summary as content
+                    if "content" not in data and "content_summary" in data:
+                        data["content"] = data["content_summary"]
+                    result[k] = DocProcessingStatus(**data)
+                except KeyError as e:
+                    logger.error(f"Missing required field for document {k}: {e}")
+                    continue
+        return result

    async def index_done_callback(self) -> None:
        write_json(self._data, self._file_name)