Merge remote-tracking branch 'origin/main' into make-clear-what-implemented-or-not

# Conflicts:
#	lightrag/base.py
#	lightrag/kg/json_doc_status_impl.py
#	lightrag/kg/mongo_impl.py
#	lightrag/kg/postgres_impl.py
This commit is contained in:
Yannick Stephan
2025-02-16 15:29:16 +01:00
9 changed files with 536 additions and 447 deletions

View File

@@ -39,8 +39,8 @@ class FaissVectorDBStorage(BaseVectorStorage):
def __post_init__(self):
# Grab config values if available
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
cosine_threshold = config.get("cosine_better_than_threshold")
kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
cosine_threshold = kwargs.get("cosine_better_than_threshold")
if cosine_threshold is None:
raise ValueError(
"cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs"

View File

@@ -185,7 +185,7 @@ class MongoDocStatusStorage(DocStatusStorage):
async def get_docs_by_status(
self, status: DocStatus
) -> dict[str, DocProcessingStatus]:
"""Get all documents by status"""
"""Get all documents with a specific status"""
cursor = self._data.find({"status": status.value})
result = await cursor.to_list()
return {

View File

@@ -34,8 +34,8 @@ class NanoVectorDBStorage(BaseVectorStorage):
# Initialize lock only for file operations
self._save_lock = asyncio.Lock()
# Use global config value if specified, otherwise use default
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
cosine_threshold = config.get("cosine_better_than_threshold")
kwargs = self.global_config.get("vector_db_storage_cls_kwargs", {})
cosine_threshold = kwargs.get("cosine_better_than_threshold")
if cosine_threshold is None:
raise ValueError(
"cosine_better_than_threshold must be specified in vector_db_storage_cls_kwargs"

View File

@@ -482,7 +482,7 @@ class PGDocStatusStorage(DocStatusStorage):
async def get_docs_by_status(
self, status: DocStatus
) -> Dict[str, DocProcessingStatus]:
"""Get all documents by status"""
"""all documents with a specific status"""
sql = "select * from LIGHTRAG_DOC_STATUS where workspace=$1 and status=$2"
params = {"workspace": self.db.workspace, "status": status}
result = await self.db.query(sql, params, True)