fixed bugs

This commit is contained in:
Yannick Stephan
2025-02-09 19:21:49 +01:00
parent 832a9bb6fb
commit 7d63898015
7 changed files with 67 additions and 61 deletions

View File

@@ -38,7 +38,7 @@ class JsonKVStorage(BaseKVStorage):
for id in ids
]
async def filter_keys(self, data: list[str]) -> set[str]:
async def filter_keys(self, data: set[str]) -> set[str]:
return set([s for s in data if s not in self._data])
async def upsert(self, data: dict[str, dict[str, Any]]) -> None:

View File

@@ -52,17 +52,16 @@ import os
from dataclasses import dataclass
from typing import Any, Union
from lightrag.utils import (
logger,
load_json,
write_json,
)
from lightrag.base import (
DocStatus,
DocProcessingStatus,
DocStatus,
DocStatusStorage,
)
from lightrag.utils import (
load_json,
logger,
write_json,
)
@dataclass
@@ -75,15 +74,17 @@ class JsonDocStatusStorage(DocStatusStorage):
self._data: dict[str, Any] = load_json(self._file_name) or {}
logger.info(f"Loaded document status storage with {len(self._data)} records")
async def filter_keys(self, data: list[str]) -> set[str]:
async def filter_keys(self, data: set[str]) -> set[str]:
"""Return keys that should be processed (not in storage or not successfully processed)"""
return set(
[
k
for k in data
if k not in self._data or self._data[k]["status"] != DocStatus.PROCESSED
]
)
return {k for k, _ in self._data.items() if k in data}
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
result: list[dict[str, Any]] = []
for id in ids:
data = self._data.get(id, None)
if data:
result.append(data)
return result
async def get_status_counts(self) -> dict[str, int]:
"""Get counts of documents in each status"""
@@ -94,11 +95,19 @@ class JsonDocStatusStorage(DocStatusStorage):
async def get_failed_docs(self) -> dict[str, DocProcessingStatus]:
"""Get all failed documents"""
return {k: v for k, v in self._data.items() if v["status"] == DocStatus.FAILED}
return {
k: DocProcessingStatus(**v)
for k, v in self._data.items()
if v["status"] == DocStatus.FAILED
}
async def get_pending_docs(self) -> dict[str, DocProcessingStatus]:
"""Get all pending documents"""
return {k: v for k, v in self._data.items() if v["status"] == DocStatus.PENDING}
return {
k: DocProcessingStatus(**v)
for k, v in self._data.items()
if v["status"] == DocStatus.PENDING
}
async def index_done_callback(self):
"""Save data to file after indexing"""
@@ -118,7 +127,11 @@ class JsonDocStatusStorage(DocStatusStorage):
async def get(self, doc_id: str) -> Union[DocProcessingStatus, None]:
"""Get document status by ID"""
return self._data.get(doc_id)
data = self._data.get(doc_id)
if data:
return DocProcessingStatus(**data)
else:
return None
async def delete(self, doc_ids: list[str]):
"""Delete document status by IDs"""

View File

@@ -35,7 +35,7 @@ class MongoKVStorage(BaseKVStorage):
async def get_by_ids(self, ids: list[str]) -> list[dict[str, Any]]:
return list(self._data.find({"_id": {"$in": ids}}))
async def filter_keys(self, data: list[str]) -> set[str]:
async def filter_keys(self, data: set[str]) -> set[str]:
existing_ids = [
str(x["_id"]) for x in self._data.find({"_id": {"$in": data}}, {"_id": 1})
]

View File

@@ -421,7 +421,7 @@ class PGDocStatusStorage(DocStatusStorage):
def __post_init__(self):
pass
async def filter_keys(self, data: list[str]) -> set[str]:
async def filter_keys(self, data: set[str]) -> set[str]:
"""Return keys that don't exist in storage"""
keys = ",".join([f"'{_id}'" for _id in data])
sql = (

View File

@@ -32,7 +32,7 @@ class RedisKVStorage(BaseKVStorage):
results = await pipe.execute()
return [json.loads(result) if result else None for result in results]
async def filter_keys(self, data: list[str]) -> set[str]:
async def filter_keys(self, data: set[str]) -> set[str]:
pipe = self._redis.pipeline()
for key in data:
pipe.exists(f"{self.namespace}:{key}")