Fix linting

2025-02-28 21:35:04 +08:00
parent c37b1e8aa7
commit c973498c34
8 changed files with 136 additions and 130 deletions
--- a/gunicorn_config.py
+++ b/gunicorn_config.py
@@ -28,58 +28,55 @@ keepalive = 5
 errorlog = os.getenv("ERROR_LOG", log_file_path)  # 默认写入到 lightrag.log
 accesslog = os.getenv("ACCESS_LOG", log_file_path)  # 默认写入到 lightrag.log

-# 配置日志系统
 logconfig_dict = {
-    'version': 1,
-    'disable_existing_loggers': False,
-    'formatters': {
-        'standard': {
-            'format': '%(asctime)s [%(levelname)s] %(name)s: %(message)s'
+    "version": 1,
+    "disable_existing_loggers": False,
+    "formatters": {
+        "standard": {"format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s"},
+    },
+    "handlers": {
+        "console": {
+            "class": "logging.StreamHandler",
+            "formatter": "standard",
+            "stream": "ext://sys.stdout",
+        },
+        "file": {
+            "class": "logging.handlers.RotatingFileHandler",
+            "formatter": "standard",
+            "filename": log_file_path,
+            "maxBytes": 10485760,  # 10MB
+            "backupCount": 5,
+            "encoding": "utf8",
        },
    },
-    'handlers': {
-        'console': {
-            'class': 'logging.StreamHandler',
-            'formatter': 'standard',
-            'stream': 'ext://sys.stdout'
-        },
-        'file': {
-            'class': 'logging.handlers.RotatingFileHandler',
-            'formatter': 'standard',
-            'filename': log_file_path,
-            'maxBytes': 10485760,  # 10MB
-            'backupCount': 5,
-            'encoding': 'utf8'
-        }
-    },
-    'filters': {
-        'path_filter': {
-            '()': 'lightrag.api.lightrag_server.LightragPathFilter',
+    "filters": {
+        "path_filter": {
+            "()": "lightrag.api.lightrag_server.LightragPathFilter",
        },
    },
-    'loggers': {
-        'lightrag': {
-            'handlers': ['console', 'file'],
-            'level': loglevel.upper() if loglevel else 'INFO',
-            'propagate': False
+    "loggers": {
+        "lightrag": {
+            "handlers": ["console", "file"],
+            "level": loglevel.upper() if loglevel else "INFO",
+            "propagate": False,
        },
-        'gunicorn': {
-            'handlers': ['console', 'file'],
-            'level': loglevel.upper() if loglevel else 'INFO',
-            'propagate': False
+        "gunicorn": {
+            "handlers": ["console", "file"],
+            "level": loglevel.upper() if loglevel else "INFO",
+            "propagate": False,
        },
-        'gunicorn.error': {
-            'handlers': ['console', 'file'],
-            'level': loglevel.upper() if loglevel else 'INFO',
-            'propagate': False
+        "gunicorn.error": {
+            "handlers": ["console", "file"],
+            "level": loglevel.upper() if loglevel else "INFO",
+            "propagate": False,
        },
-        'gunicorn.access': {
-            'handlers': ['console', 'file'],
-            'level': loglevel.upper() if loglevel else 'INFO',
-            'propagate': False,
-            'filters': ['path_filter']
-        }
-    }
+        "gunicorn.access": {
+            "handlers": ["console", "file"],
+            "level": loglevel.upper() if loglevel else "INFO",
+            "propagate": False,
+            "filters": ["path_filter"],
+        },
+    },
 }


@@ -134,6 +131,7 @@ def post_fork(server, worker):
    """
    # Set lightrag logger level in worker processes using gunicorn's loglevel
    from lightrag.utils import logger
+
    logger.setLevel(loglevel.upper())

    # Disable uvicorn.error logger in worker processes
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -9,7 +9,6 @@ from fastapi import (
 from fastapi.responses import FileResponse
 import asyncio
 import os
-import json
 import logging
 import logging.config
 import uvicorn
@@ -139,7 +138,10 @@ def create_app(args):
            # Auto scan documents if enabled
            if args.auto_scan_at_startup:
                # Import necessary functions from shared_storage
-                from lightrag.kg.shared_storage import get_namespace_data, get_storage_lock
+                from lightrag.kg.shared_storage import (
+                    get_namespace_data,
+                    get_storage_lock,
+                )

                # Get pipeline status and lock
                pipeline_status = get_namespace_data("pipeline_status")
@@ -453,7 +455,7 @@ def configure_logging():
                    "formatter": "detailed",
                    "class": "logging.handlers.RotatingFileHandler",
                    "filename": log_file_path,
-                    "maxBytes": 10*1024*1024,  # 10MB
+                    "maxBytes": 10 * 1024 * 1024,  # 10MB
                    "backupCount": 5,
                    "encoding": "utf-8",
                },
--- a/lightrag/api/routers/document_routes.py
+++ b/lightrag/api/routers/document_routes.py
@@ -406,7 +406,6 @@ def create_document_routes(
        background_tasks.add_task(run_scanning_process, rag, doc_manager)
        return {"status": "scanning_started"}

-
    @router.post("/upload", dependencies=[Depends(optional_api_key)])
    async def upload_to_input_dir(
        background_tasks: BackgroundTasks, file: UploadFile = File(...)
@@ -667,6 +666,7 @@ def create_document_routes(
        """
        try:
            from lightrag.kg.shared_storage import get_namespace_data
+
            pipeline_status = get_namespace_data("pipeline_status")

            # Convert to regular dict if it's a Manager.dict
--- a/lightrag/api/utils_api.py
+++ b/lightrag/api/utils_api.py
@@ -295,7 +295,9 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
        original_workers = args.workers
        args.workers = 1
        # Log warning directly here
-        logging.warning(f"In uvicorn mode, workers parameter was set to {original_workers}. Forcing workers=1")
+        logging.warning(
+            f"In uvicorn mode, workers parameter was set to {original_workers}. Forcing workers=1"
+        )

    # convert relative path to absolute path
    args.working_dir = os.path.abspath(args.working_dir)
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -274,6 +274,7 @@ class LightRAG:
        from lightrag.kg.shared_storage import (
            initialize_share_data,
        )
+
        initialize_share_data()

        if not os.path.exists(self.working_dir):
@@ -679,32 +680,33 @@ class LightRAG:
        # Check if another process is already processing the queue
        process_documents = False
        with storage_lock:
+            # Ensure only one worker is processing documents
            if not pipeline_status.get("busy", False):
-                # No other process is busy, we can process documents
-                # 获取当前的 history_messages 列表
+                # Cleaning history_messages without breaking it as a shared list object 
                current_history = pipeline_status.get("history_messages", [])
-                
-                # 清空当前列表内容但保持同一个列表对象
                if hasattr(current_history, "clear"):
                    current_history.clear()

-                pipeline_status.update({
-                    "busy": True,
-                    "job_name": "indexing files",
-                    "job_start": datetime.now().isoformat(),
-                    "docs": 0,
-                    "batchs": 0,
-                    "cur_batch": 0,
-                    "request_pending": False,  # Clear any previous request
-                    "latest_message": "",
-                    # 保持使用同一个列表对象
-                    "history_messages": current_history,
-                })
+                pipeline_status.update(
+                    {
+                        "busy": True,
+                        "job_name": "indexing files",
+                        "job_start": datetime.now().isoformat(),
+                        "docs": 0,
+                        "batchs": 0,
+                        "cur_batch": 0,
+                        "request_pending": False,  # Clear any previous request
+                        "latest_message": "",
+                        "history_messages": current_history,  # keep it as a shared list object
+                    }
+                )
                process_documents = True
            else:
                # Another process is busy, just set request flag and return
                pipeline_status["request_pending"] = True
-                logger.info("Another process is already processing the document queue. Request queued.")
+                logger.info(
+                    "Another process is already processing the document queue. Request queued."
+                )

        if not process_documents:
            return
@@ -742,10 +744,7 @@ class LightRAG:
                ]

                # Update pipeline status with batch information (directly, as it's atomic)
-                pipeline_status.update({
-                    "batchs": len(docs_batches),
-                    "cur_batch": 0
-                })
+                pipeline_status.update({"batchs": len(docs_batches), "cur_batch": 0})

                log_message = f"Number of batches to process: {len(docs_batches)}."
                logger.info(log_message)
@@ -763,7 +762,9 @@ class LightRAG:
                        docs_batch: list[tuple[str, DocProcessingStatus]],
                        size_batch: int,
                    ) -> None:
-                        log_message = f"Start processing batch {batch_idx + 1} of {size_batch}."
+                        log_message = (
+                            f"Start processing batch {batch_idx + 1} of {size_batch}."
+                        )
                        logger.info(log_message)
                        pipeline_status["latest_message"] = log_message
                        pipeline_status["history_messages"].append(log_message)
@@ -822,7 +823,9 @@ class LightRAG:
                                    }
                                )
                            except Exception as e:
-                                logger.error(f"Failed to process document {doc_id}: {str(e)}")
+                                logger.error(
+                                    f"Failed to process document {doc_id}: {str(e)}"
+                                )
                                await self.doc_status.upsert(
                                    {
                                        doc_id: {
@@ -837,7 +840,9 @@ class LightRAG:
                                    }
                                )
                                continue
-                        log_message = f"Completed batch {batch_idx + 1} of {len(docs_batches)}."
+                        log_message = (
+                            f"Completed batch {batch_idx + 1} of {len(docs_batches)}."
+                        )
                        logger.info(log_message)
                        pipeline_status["latest_message"] = log_message
                        pipeline_status["history_messages"].append(log_message)
@@ -907,6 +912,7 @@ class LightRAG:

        # 获取 pipeline_status 并更新 latest_message 和 history_messages
        from lightrag.kg.shared_storage import get_namespace_data
+
        pipeline_status = get_namespace_data("pipeline_status")
        pipeline_status["latest_message"] = log_message
        pipeline_status["history_messages"].append(log_message)
--- a/lightrag/operate.py
+++ b/lightrag/operate.py
@@ -336,8 +336,9 @@ async def extract_entities(
    global_config: dict[str, str],
    llm_response_cache: BaseKVStorage | None = None,
 ) -> None:
-    # 在函数开始处添加获取 pipeline_status 的代码
+
    from lightrag.kg.shared_storage import get_namespace_data
+
    pipeline_status = get_namespace_data("pipeline_status")
    use_llm_func: callable = global_config["llm_model_func"]
    entity_extract_max_gleaning = global_config["entity_extract_max_gleaning"]
--- a/lightrag/utils.py
+++ b/lightrag/utils.py
@@ -75,50 +75,42 @@ def set_logger(log_file: str, level: int = logging.DEBUG):
        log_file: Path to the log file
        level: Logging level (e.g. logging.DEBUG, logging.INFO)
    """
-    # 设置日志级别
+
    logger.setLevel(level)
-    
-    # 确保使用绝对路径
    log_file = os.path.abspath(log_file)
-    
-    # 创建格式化器
    formatter = logging.Formatter(
        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
    )
-    
-    # 检查是否已经有文件处理器
    has_file_handler = False
    has_console_handler = False

-    # 检查现有处理器
    for handler in logger.handlers:
        if isinstance(handler, logging.FileHandler):
            has_file_handler = True
-        elif isinstance(handler, logging.StreamHandler) and not isinstance(handler, logging.FileHandler):
+        elif isinstance(handler, logging.StreamHandler) and not isinstance(
+            handler, logging.FileHandler
+        ):
            has_console_handler = True

-    # 如果没有文件处理器，添加一个
    if not has_file_handler:
-        # 使用 RotatingFileHandler 代替 FileHandler
        from logging.handlers import RotatingFileHandler
+
        file_handler = RotatingFileHandler(
            log_file,
-            maxBytes=10*1024*1024,  # 10MB
+            maxBytes=10 * 1024 * 1024,  # 10MB
            backupCount=5,
-            encoding="utf-8"
+            encoding="utf-8",
        )
        file_handler.setLevel(level)
        file_handler.setFormatter(formatter)
        logger.addHandler(file_handler)

-    # 如果没有控制台处理器，添加一个
    if not has_console_handler:
        console_handler = logging.StreamHandler()
        console_handler.setLevel(level)
        console_handler.setFormatter(formatter)
        logger.addHandler(console_handler)

-    # 设置日志传播为 False，避免重复输出
    logger.propagate = False


--- a/run_with_gunicorn.py
+++ b/run_with_gunicorn.py
@@ -5,9 +5,7 @@ Start LightRAG server with Gunicorn

 import os
 import sys
-import json
 import signal
-import argparse
 from lightrag.api.utils_api import parse_args, display_splash_screen
 from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data

@@ -34,7 +32,6 @@ def main():
    # Parse all arguments using parse_args
    args = parse_args(is_uvicorn_mode=False)

-
    # Display startup information
    display_splash_screen(args)

@@ -101,8 +98,14 @@ def main():

            # Set configuration variables in gunicorn_config
            gunicorn_config.workers = int(os.getenv("WORKERS", args.workers))
-            gunicorn_config.bind = f"{os.getenv('HOST', args.host)}:{os.getenv('PORT', args.port)}"
-            gunicorn_config.loglevel = args.log_level.lower() if args.log_level else os.getenv("LOG_LEVEL", "info")
+            gunicorn_config.bind = (
+                f"{os.getenv('HOST', args.host)}:{os.getenv('PORT', args.port)}"
+            )
+            gunicorn_config.loglevel = (
+                args.log_level.lower()
+                if args.log_level
+                else os.getenv("LOG_LEVEL", "info")
+            )

            # Set SSL configuration if enabled
            if args.ssl:
@@ -122,9 +125,11 @@ def main():
                    if callable(value):
                        self.cfg.set(key, value)

-            # 确保正确加载 logconfig_dict
-            if hasattr(gunicorn_config, 'logconfig_dict'):
-                self.cfg.set('logconfig_dict', getattr(gunicorn_config, 'logconfig_dict'))
+
+            if hasattr(gunicorn_config, "logconfig_dict"):
+                self.cfg.set(
+                    "logconfig_dict", getattr(gunicorn_config, "logconfig_dict")
+                )

        def load(self):
            # Import the application