From b4bcd765991cc29cab35ef13900aabdbff194fb8 Mon Sep 17 00:00:00 2001
From: yangdx <yangdx@znipower.com>
Date: Fri, 28 Feb 2025 10:53:36 +0800
Subject: [PATCH] Remove useless scan progress tracking functionality and
 related code

---
 lightrag/api/routers/document_routes.py | 75 +------------------------
 lightrag/lightrag.py                    | 13 -----
 2 files changed, 1 insertion(+), 87 deletions(-)

diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py
index 2a6459fb..3bb36830 100644
--- a/lightrag/api/routers/document_routes.py
+++ b/lightrag/api/routers/document_routes.py
@@ -4,7 +4,6 @@ This module contains all document-related routes for the LightRAG API.
 
 import asyncio
 import logging
-import os
 import aiofiles
 import shutil
 import traceback
@@ -12,17 +11,12 @@ import pipmaster as pm
 from datetime import datetime
 from pathlib import Path
 from typing import Dict, List, Optional, Any
-from ascii_colors import ASCIIColors
 from fastapi import APIRouter, BackgroundTasks, Depends, File, HTTPException, UploadFile
 from pydantic import BaseModel, Field, field_validator
 
 from lightrag import LightRAG
 from lightrag.base import DocProcessingStatus, DocStatus
 from ..utils_api import get_api_key_dependency
-from lightrag.kg.shared_storage import (
-    get_namespace_data,
-    get_storage_lock,
-)
 
 
 router = APIRouter(prefix="/documents", tags=["documents"])
@@ -376,72 +370,19 @@ async def save_temp_file(input_dir: Path, file: UploadFile = File(...)) -> Path:
 
 async def run_scanning_process(rag: LightRAG, doc_manager: DocumentManager):
     """Background task to scan and index documents"""
-    scan_progress = get_namespace_data("scan_progress")
-    scan_lock = get_storage_lock()
-    with scan_lock:
-        if scan_progress.get("is_scanning", False):
-            ASCIIColors.info("Skip document scanning(another scanning is active)")
-            return
-        scan_progress.update(
-            {
-                "is_scanning": True,
-                "current_file": "",
-                "indexed_count": 0,
-                "total_files": 0,
-                "progress": 0,
-            }
-        )
-
     try:
         new_files = doc_manager.scan_directory_for_new_files()
         total_files = len(new_files)
-        scan_progress.update(
-            {
-                "current_file": "",
-                "total_files": total_files,
-                "indexed_count": 0,
-                "progress": 0,
-            }
-        )
-
         logging.info(f"Found {total_files} new files to index.")
+
         for idx, file_path in enumerate(new_files):
             try:
-                progress = (idx / total_files * 100) if total_files > 0 else 0
-                scan_progress.update(
-                    {
-                        "current_file": os.path.basename(file_path),
-                        "indexed_count": idx,
-                        "progress": progress,
-                    }
-                )
-
                 await pipeline_index_file(rag, file_path)
-
-                progress = ((idx + 1) / total_files * 100) if total_files > 0 else 0
-                scan_progress.update(
-                    {
-                        "current_file": os.path.basename(file_path),
-                        "indexed_count": idx + 1,
-                        "progress": progress,
-                    }
-                )
-
             except Exception as e:
                 logging.error(f"Error indexing file {file_path}: {str(e)}")
 
     except Exception as e:
         logging.error(f"Error during scanning process: {str(e)}")
-    finally:
-        scan_progress.update(
-            {
-                "is_scanning": False,
-                "current_file": "",
-                "indexed_count": 0,
-                "total_files": 0,
-                "progress": 0,
-            }
-        )
 
 
 def create_document_routes(
@@ -465,20 +406,6 @@ def create_document_routes(
         background_tasks.add_task(run_scanning_process, rag, doc_manager)
         return {"status": "scanning_started"}
 
-    @router.get("/scan-progress")
-    async def get_scanning_progress():
-        """
-        Get the current progress of the document scanning process.
-
-        Returns:
-            dict: A dictionary containing the current scanning progress information including:
-                - is_scanning: Whether a scan is currently in progress
-                - current_file: The file currently being processed
-                - indexed_count: Number of files indexed so far
-                - total_files: Total number of files to process
-                - progress: Percentage of completion
-        """
-        return dict(get_namespace_data("scan_progress"))
 
     @router.post("/upload", dependencies=[Depends(optional_api_key)])
     async def upload_to_input_dir(
diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py
index 0011fb6f..72f31315 100644
--- a/lightrag/lightrag.py
+++ b/lightrag/lightrag.py
@@ -276,20 +276,7 @@ class LightRAG:
             try_initialize_namespace,
             get_namespace_data,
         )
-
         initialize_share_data()
-        need_init = try_initialize_namespace("scan_progress")
-        scan_progress = get_namespace_data("scan_progress")
-        if need_init:
-            scan_progress.update(
-                {
-                    "is_scanning": False,
-                    "current_file": "",
-                    "indexed_count": 0,
-                    "total_files": 0,
-                    "progress": 0,
-                }
-            )
 
         if not os.path.exists(self.working_dir):
             logger.info(f"Creating working directory {self.working_dir}")