Fix linting

This commit is contained in:
yangdx
2025-03-31 23:22:27 +08:00
parent 3d4f8f67c9
commit 95a8ee27ed
18 changed files with 296 additions and 222 deletions

View File

@@ -60,7 +60,9 @@ class InsertResponse(BaseModel):
class ClearDocumentsResponse(BaseModel):
status: str = Field(description="Status of the clear operation: success/partial_success/busy/fail")
status: str = Field(
description="Status of the clear operation: success/partial_success/busy/fail"
)
message: str = Field(description="Message describing the operation result")
@@ -783,7 +785,10 @@ def create_document_routes(
HTTPException: Raised when a serious error occurs during the clearing process,
with status code 500 and error details in the detail field.
"""
from lightrag.kg.shared_storage import get_namespace_data, get_pipeline_status_lock
from lightrag.kg.shared_storage import (
get_namespace_data,
get_pipeline_status_lock,
)
# Get pipeline status and lock
pipeline_status = await get_namespace_data("pipeline_status")
@@ -794,14 +799,16 @@ def create_document_routes(
if pipeline_status.get("busy", False):
return ClearDocumentsResponse(
status="busy",
message="Cannot clear documents while pipeline is busy"
message="Cannot clear documents while pipeline is busy",
)
# Set busy to true
pipeline_status["busy"] = True
pipeline_status["job_name"] = "Clearing Documents"
pipeline_status["latest_message"] = "Starting document clearing process"
if "history_messages" in pipeline_status:
pipeline_status["history_messages"].append("Starting document clearing process")
pipeline_status["history_messages"].append(
"Starting document clearing process"
)
try:
# Use drop method to clear all data
@@ -813,12 +820,14 @@ def create_document_routes(
rag.relationships_vdb,
rag.chunks_vdb,
rag.chunk_entity_relation_graph,
rag.doc_status
rag.doc_status,
]
# Log storage drop start
if "history_messages" in pipeline_status:
pipeline_status["history_messages"].append("Starting to drop storage components")
pipeline_status["history_messages"].append(
"Starting to drop storage components"
)
for storage in storages:
if storage is not None:
@@ -860,14 +869,13 @@ def create_document_routes(
logger.error(error_message)
if "history_messages" in pipeline_status:
pipeline_status["history_messages"].append(error_message)
return ClearDocumentsResponse(
status="fail",
message=error_message
)
return ClearDocumentsResponse(status="fail", message=error_message)
# Log file deletion start
if "history_messages" in pipeline_status:
pipeline_status["history_messages"].append("Starting to delete files in input directory")
pipeline_status["history_messages"].append(
"Starting to delete files in input directory"
)
# Delete all files in input_dir
deleted_files_count = 0
@@ -903,16 +911,12 @@ def create_document_routes(
final_message = f"All documents cleared successfully. Deleted {deleted_files_count} files."
status = "success"
# Log final result
if "history_messages" in pipeline_status:
pipeline_status["history_messages"].append(final_message)
# Return response based on results
return ClearDocumentsResponse(
status=status,
message=final_message
)
return ClearDocumentsResponse(status=status, message=final_message)
except Exception as e:
error_msg = f"Error clearing documents: {str(e)}"
logger.error(error_msg)

View File

@@ -259,7 +259,7 @@ class BaseKVStorage(StorageNameSpace, ABC):
None
"""
async def drop_cache_by_modes(self, modes: list[str] | None = None) -> bool:
async def drop_cache_by_modes(self, modes: list[str] | None = None) -> bool:
"""Delete specific records from storage by cache mode
Importance notes for in-memory storage:
@@ -274,6 +274,7 @@ class BaseKVStorage(StorageNameSpace, ABC):
False: if the cache drop failed, or the cache mode is not supported
"""
@dataclass
class BaseGraphStorage(StorageNameSpace, ABC):
embedding_func: EmbeddingFunc
@@ -394,7 +395,7 @@ class DocStatusStorage(BaseKVStorage, ABC):
) -> dict[str, DocProcessingStatus]:
"""Get all documents with a specific status"""
async def drop_cache_by_modes(self, modes: list[str] | None = None) -> bool:
async def drop_cache_by_modes(self, modes: list[str] | None = None) -> bool:
"""Drop cache is not supported for Doc Status storage"""
return False

View File

@@ -34,9 +34,9 @@ if not pm.is_installed("psycopg-pool"):
if not pm.is_installed("asyncpg"):
pm.install("asyncpg")
import psycopg # type: ignore
from psycopg.rows import namedtuple_row # type: ignore
from psycopg_pool import AsyncConnectionPool, PoolTimeout # type: ignore
import psycopg # type: ignore
from psycopg.rows import namedtuple_row # type: ignore
from psycopg_pool import AsyncConnectionPool, PoolTimeout # type: ignore
class AGEQueryException(Exception):

View File

@@ -11,8 +11,8 @@ import pipmaster as pm
if not pm.is_installed("chromadb"):
pm.install("chromadb")
from chromadb import HttpClient, PersistentClient # type: ignore
from chromadb.config import Settings # type: ignore
from chromadb import HttpClient, PersistentClient # type: ignore
from chromadb.config import Settings # type: ignore
@final
@@ -354,7 +354,9 @@ class ChromaVectorDBStorage(BaseVectorStorage):
# Delete all documents
self._collection.delete(ids=result["ids"])
logger.info(f"Process {os.getpid()} drop ChromaDB collection {self.namespace}")
logger.info(
f"Process {os.getpid()} drop ChromaDB collection {self.namespace}"
)
return {"status": "success", "message": "data dropped"}
except Exception as e:
logger.error(f"Error dropping ChromaDB collection {self.namespace}: {e}")

View File

@@ -24,9 +24,9 @@ from ..base import BaseGraphStorage
if not pm.is_installed("gremlinpython"):
pm.install("gremlinpython")
from gremlin_python.driver import client, serializer # type: ignore
from gremlin_python.driver.aiohttp.transport import AiohttpTransport # type: ignore
from gremlin_python.driver.protocol import GremlinServerError # type: ignore
from gremlin_python.driver import client, serializer # type: ignore
from gremlin_python.driver.aiohttp.transport import AiohttpTransport # type: ignore
from gremlin_python.driver.protocol import GremlinServerError # type: ignore
@final

View File

@@ -144,7 +144,7 @@ class JsonKVStorage(BaseKVStorage):
self._data.pop(doc_id, None)
await set_all_update_flags(self.namespace)
async def drop_cache_by_modes(self, modes: list[str] | None = None) -> bool:
async def drop_cache_by_modes(self, modes: list[str] | None = None) -> bool:
"""Delete specific records from storage by by cache mode
Importance notes for in-memory storage:
@@ -167,7 +167,6 @@ class JsonKVStorage(BaseKVStorage):
except Exception:
return False
async def drop(self) -> dict[str, str]:
"""Drop all data from storage and clean up resources
This action will persistent the data to disk immediately.

View File

@@ -15,7 +15,7 @@ if not pm.is_installed("pymilvus"):
pm.install("pymilvus")
import configparser
from pymilvus import MilvusClient # type: ignore
from pymilvus import MilvusClient # type: ignore
config = configparser.ConfigParser()
config.read("config.ini", "utf-8")
@@ -310,7 +310,9 @@ class MilvusVectorDBStorage(BaseVectorStorage):
dimension=self.embedding_func.embedding_dim,
)
logger.info(f"Process {os.getpid()} drop Milvus collection {self.namespace}")
logger.info(
f"Process {os.getpid()} drop Milvus collection {self.namespace}"
)
return {"status": "success", "message": "data dropped"}
except Exception as e:
logger.error(f"Error dropping Milvus collection {self.namespace}: {e}")

View File

@@ -25,13 +25,13 @@ if not pm.is_installed("pymongo"):
if not pm.is_installed("motor"):
pm.install("motor")
from motor.motor_asyncio import ( # type: ignore
from motor.motor_asyncio import ( # type: ignore
AsyncIOMotorClient,
AsyncIOMotorDatabase,
AsyncIOMotorCollection,
)
from pymongo.operations import SearchIndexModel # type: ignore
from pymongo.errors import PyMongoError # type: ignore
from pymongo.operations import SearchIndexModel # type: ignore
from pymongo.errors import PyMongoError # type: ignore
config = configparser.ConfigParser()
config.read("config.ini", "utf-8")
@@ -161,7 +161,9 @@ class MongoKVStorage(BaseKVStorage):
try:
result = await self._data.delete_many({"_id": {"$in": ids}})
logger.info(f"Deleted {result.deleted_count} documents from {self.namespace}")
logger.info(
f"Deleted {result.deleted_count} documents from {self.namespace}"
)
except PyMongoError as e:
logger.error(f"Error deleting documents from {self.namespace}: {e}")
@@ -197,8 +199,13 @@ class MongoKVStorage(BaseKVStorage):
result = await self._data.delete_many({})
deleted_count = result.deleted_count
logger.info(f"Dropped {deleted_count} documents from doc status {self._collection_name}")
return {"status": "success", "message": f"{deleted_count} documents dropped"}
logger.info(
f"Dropped {deleted_count} documents from doc status {self._collection_name}"
)
return {
"status": "success",
"message": f"{deleted_count} documents dropped",
}
except PyMongoError as e:
logger.error(f"Error dropping doc status {self._collection_name}: {e}")
return {"status": "error", "message": str(e)}
@@ -293,8 +300,13 @@ class MongoDocStatusStorage(DocStatusStorage):
result = await self._data.delete_many({})
deleted_count = result.deleted_count
logger.info(f"Dropped {deleted_count} documents from doc status {self._collection_name}")
return {"status": "success", "message": f"{deleted_count} documents dropped"}
logger.info(
f"Dropped {deleted_count} documents from doc status {self._collection_name}"
)
return {
"status": "success",
"message": f"{deleted_count} documents dropped",
}
except PyMongoError as e:
logger.error(f"Error dropping doc status {self._collection_name}: {e}")
return {"status": "error", "message": str(e)}
@@ -919,8 +931,13 @@ class MongoGraphStorage(BaseGraphStorage):
result = await self.collection.delete_many({})
deleted_count = result.deleted_count
logger.info(f"Dropped {deleted_count} documents from graph {self._collection_name}")
return {"status": "success", "message": f"{deleted_count} documents dropped"}
logger.info(
f"Dropped {deleted_count} documents from graph {self._collection_name}"
)
return {
"status": "success",
"message": f"{deleted_count} documents dropped",
}
except PyMongoError as e:
logger.error(f"Error dropping graph {self._collection_name}: {e}")
return {"status": "error", "message": str(e)}
@@ -1226,8 +1243,13 @@ class MongoVectorDBStorage(BaseVectorStorage):
# Recreate vector index
await self.create_vector_index_if_not_exists()
logger.info(f"Dropped {deleted_count} documents from vector storage {self._collection_name} and recreated vector index")
return {"status": "success", "message": f"{deleted_count} documents dropped and vector index recreated"}
logger.info(
f"Dropped {deleted_count} documents from vector storage {self._collection_name} and recreated vector index"
)
return {
"status": "success",
"message": f"{deleted_count} documents dropped and vector index recreated",
}
except PyMongoError as e:
logger.error(f"Error dropping vector storage {self._collection_name}: {e}")
return {"status": "error", "message": str(e)}

View File

@@ -339,7 +339,9 @@ class NanoVectorDBStorage(BaseVectorStorage):
# Reset own update flag to avoid self-reloading
self.storage_updated.value = False
logger.info(f"Process {os.getpid()} drop {self.namespace}(file:{self._client_file_name})")
logger.info(
f"Process {os.getpid()} drop {self.namespace}(file:{self._client_file_name})"
)
return {"status": "success", "message": "data dropped"}
except Exception as e:
logger.error(f"Error dropping {self.namespace}: {e}")

View File

@@ -1046,7 +1046,9 @@ class Neo4JStorage(BaseGraphStorage):
result = await session.run(query)
await result.consume() # Ensure result is fully consumed
logger.info(f"Process {os.getpid()} drop Neo4j database {self._DATABASE}")
logger.info(
f"Process {os.getpid()} drop Neo4j database {self._DATABASE}"
)
return {"status": "success", "message": "data dropped"}
except Exception as e:
logger.error(f"Error dropping Neo4j database {self._DATABASE}: {e}")

View File

@@ -479,7 +479,9 @@ class NetworkXStorage(BaseGraphStorage):
await set_all_update_flags(self.namespace)
# Reset own update flag to avoid self-reloading
self.storage_updated.value = False
logger.info(f"Process {os.getpid()} drop graph {self.namespace} (file:{self._graphml_xml_file})")
logger.info(
f"Process {os.getpid()} drop graph {self.namespace} (file:{self._graphml_xml_file})"
)
return {"status": "success", "message": "data dropped"}
except Exception as e:
logger.error(f"Error dropping graph {self.namespace}: {e}")

View File

@@ -27,7 +27,7 @@ if not pm.is_installed("oracledb"):
pm.install("oracledb")
from graspologic import embed
import oracledb # type: ignore
import oracledb # type: ignore
class OracleDB:
@@ -411,7 +411,9 @@ class OracleKVStorage(BaseKVStorage):
delete_sql = f"DELETE FROM {table_name} WHERE workspace=:workspace AND id IN ({ids_list})"
await self.db.execute(delete_sql, {"workspace": self.db.workspace})
logger.info(f"Successfully deleted {len(ids)} records from {self.namespace}")
logger.info(
f"Successfully deleted {len(ids)} records from {self.namespace}"
)
except Exception as e:
logger.error(f"Error deleting records from {self.namespace}: {e}")
@@ -455,7 +457,10 @@ class OracleKVStorage(BaseKVStorage):
try:
table_name = namespace_to_table_name(self.namespace)
if not table_name:
return {"status": "error", "message": f"Unknown namespace: {self.namespace}"}
return {
"status": "error",
"message": f"Unknown namespace: {self.namespace}",
}
drop_sql = SQL_TEMPLATES["drop_specifiy_table_workspace"].format(
table_name=table_name
@@ -683,7 +688,10 @@ class OracleVectorDBStorage(BaseVectorStorage):
try:
table_name = namespace_to_table_name(self.namespace)
if not table_name:
return {"status": "error", "message": f"Unknown namespace: {self.namespace}"}
return {
"status": "error",
"message": f"Unknown namespace: {self.namespace}",
}
drop_sql = SQL_TEMPLATES["drop_specifiy_table_workspace"].format(
table_name=table_name
@@ -1025,10 +1033,14 @@ class OracleGraphStorage(BaseGraphStorage):
"""Drop the storage"""
try:
# 使用图形查询删除所有节点和关系
delete_edges_sql = """DELETE FROM LIGHTRAG_GRAPH_EDGES WHERE workspace=:workspace"""
delete_edges_sql = (
"""DELETE FROM LIGHTRAG_GRAPH_EDGES WHERE workspace=:workspace"""
)
await self.db.execute(delete_edges_sql, {"workspace": self.db.workspace})
delete_nodes_sql = """DELETE FROM LIGHTRAG_GRAPH_NODES WHERE workspace=:workspace"""
delete_nodes_sql = (
"""DELETE FROM LIGHTRAG_GRAPH_NODES WHERE workspace=:workspace"""
)
await self.db.execute(delete_nodes_sql, {"workspace": self.db.workspace})
return {"status": "success", "message": "graph data dropped"}

View File

@@ -398,8 +398,12 @@ class PGKVStorage(BaseKVStorage):
delete_sql = f"DELETE FROM {table_name} WHERE workspace=$1 AND id = ANY($2)"
try:
await self.db.execute(delete_sql, {"workspace": self.db.workspace, "ids": ids})
logger.debug(f"Successfully deleted {len(ids)} records from {self.namespace}")
await self.db.execute(
delete_sql, {"workspace": self.db.workspace, "ids": ids}
)
logger.debug(
f"Successfully deleted {len(ids)} records from {self.namespace}"
)
except Exception as e:
logger.error(f"Error while deleting records from {self.namespace}: {e}")
@@ -427,10 +431,7 @@ class PGKVStorage(BaseKVStorage):
DELETE FROM {table_name}
WHERE workspace = $1 AND mode = ANY($2)
"""
params = {
"workspace": self.db.workspace,
"modes": modes
}
params = {"workspace": self.db.workspace, "modes": modes}
logger.info(f"Deleting cache by modes: {modes}")
await self.db.execute(sql, params)
@@ -444,7 +445,10 @@ class PGKVStorage(BaseKVStorage):
try:
table_name = namespace_to_table_name(self.namespace)
if not table_name:
return {"status": "error", "message": f"Unknown namespace: {self.namespace}"}
return {
"status": "error",
"message": f"Unknown namespace: {self.namespace}",
}
drop_sql = SQL_TEMPLATES["drop_specifiy_table_workspace"].format(
table_name=table_name
@@ -622,7 +626,9 @@ class PGVectorStorage(BaseVectorStorage):
delete_sql = f"DELETE FROM {table_name} WHERE workspace=$1 AND id = ANY($2)"
try:
await self.db.execute(delete_sql, {"workspace": self.db.workspace, "ids": ids})
await self.db.execute(
delete_sql, {"workspace": self.db.workspace, "ids": ids}
)
logger.debug(
f"Successfully deleted {len(ids)} vectors from {self.namespace}"
)
@@ -759,7 +765,10 @@ class PGVectorStorage(BaseVectorStorage):
try:
table_name = namespace_to_table_name(self.namespace)
if not table_name:
return {"status": "error", "message": f"Unknown namespace: {self.namespace}"}
return {
"status": "error",
"message": f"Unknown namespace: {self.namespace}",
}
drop_sql = SQL_TEMPLATES["drop_specifiy_table_workspace"].format(
table_name=table_name
@@ -930,7 +939,10 @@ class PGDocStatusStorage(DocStatusStorage):
try:
table_name = namespace_to_table_name(self.namespace)
if not table_name:
return {"status": "error", "message": f"Unknown namespace: {self.namespace}"}
return {
"status": "error",
"message": f"Unknown namespace: {self.namespace}",
}
drop_sql = SQL_TEMPLATES["drop_specifiy_table_workspace"].format(
table_name=table_name

View File

@@ -13,11 +13,12 @@ import pipmaster as pm
if not pm.is_installed("qdrant-client"):
pm.install("qdrant-client")
from qdrant_client import QdrantClient, models # type: ignore
from qdrant_client import QdrantClient, models # type: ignore
config = configparser.ConfigParser()
config.read("config.ini", "utf-8")
def compute_mdhash_id_for_qdrant(
content: str, prefix: str = "", style: str = "simple"
) -> str:
@@ -349,11 +350,14 @@ class QdrantVectorDBStorage(BaseVectorStorage):
self._client,
self.namespace,
vectors_config=models.VectorParams(
size=self.embedding_func.embedding_dim, distance=models.Distance.COSINE
size=self.embedding_func.embedding_dim,
distance=models.Distance.COSINE,
),
)
logger.info(f"Process {os.getpid()} drop Qdrant collection {self.namespace}")
logger.info(
f"Process {os.getpid()} drop Qdrant collection {self.namespace}"
)
return {"status": "success", "message": "data dropped"}
except Exception as e:
logger.error(f"Error dropping Qdrant collection {self.namespace}: {e}")

View File

@@ -8,7 +8,7 @@ if not pm.is_installed("redis"):
pm.install("redis")
# aioredis is a depricated library, replaced with redis
from redis.asyncio import Redis # type: ignore
from redis.asyncio import Redis # type: ignore
from lightrag.utils import logger
from lightrag.base import BaseKVStorage
import json
@@ -84,7 +84,7 @@ class RedisKVStorage(BaseKVStorage):
f"Deleted {deleted_count} of {len(ids)} entries from {self.namespace}"
)
async def drop_cache_by_modes(self, modes: list[str] | None = None) -> bool:
async def drop_cache_by_modes(self, modes: list[str] | None = None) -> bool:
"""Delete specific records from storage by by cache mode
Importance notes for Redis storage:

View File

@@ -20,7 +20,7 @@ if not pm.is_installed("pymysql"):
if not pm.is_installed("sqlalchemy"):
pm.install("sqlalchemy")
from sqlalchemy import create_engine, text # type: ignore
from sqlalchemy import create_engine, text # type: ignore
class TiDB:
@@ -299,7 +299,9 @@ class TiDBKVStorage(BaseKVStorage):
delete_sql = f"DELETE FROM {table_name} WHERE workspace = :workspace AND {id_field} IN ({ids_list})"
await self.db.execute(delete_sql, {"workspace": self.db.workspace})
logger.info(f"Successfully deleted {len(ids)} records from {self.namespace}")
logger.info(
f"Successfully deleted {len(ids)} records from {self.namespace}"
)
except Exception as e:
logger.error(f"Error deleting records from {self.namespace}: {e}")
@@ -343,7 +345,10 @@ class TiDBKVStorage(BaseKVStorage):
try:
table_name = namespace_to_table_name(self.namespace)
if not table_name:
return {"status": "error", "message": f"Unknown namespace: {self.namespace}"}
return {
"status": "error",
"message": f"Unknown namespace: {self.namespace}",
}
drop_sql = SQL_TEMPLATES["drop_specifiy_table_workspace"].format(
table_name=table_name
@@ -502,7 +507,9 @@ class TiDBVectorDBStorage(BaseVectorStorage):
try:
await self.db.execute(delete_sql, {"workspace": self.db.workspace})
logger.debug(f"Successfully deleted {len(ids)} vectors from {self.namespace}")
logger.debug(
f"Successfully deleted {len(ids)} vectors from {self.namespace}"
)
except Exception as e:
logger.error(f"Error while deleting vectors from {self.namespace}: {e}")
@@ -551,7 +558,10 @@ class TiDBVectorDBStorage(BaseVectorStorage):
try:
table_name = namespace_to_table_name(self.namespace)
if not table_name:
return {"status": "error", "message": f"Unknown namespace: {self.namespace}"}
return {
"status": "error",
"message": f"Unknown namespace: {self.namespace}",
}
drop_sql = SQL_TEMPLATES["drop_specifiy_table_workspace"].format(
table_name=table_name