Merge pull request #682 from ParisNeo/main

Few upgrades in the webui
This commit is contained in:
zrguo
2025-01-31 23:33:28 +08:00
committed by GitHub
3 changed files with 486 additions and 410 deletions

View File

@@ -1,9 +1,28 @@
from fastapi import FastAPI, HTTPException, File, UploadFile, Form, Request from fastapi import FastAPI, HTTPException, File, UploadFile, Form, Request, BackgroundTasks
# Backend (Python)
# Add this to store progress globally
from typing import Dict
import threading
# Global progress tracker
scan_progress: Dict = {
"is_scanning": False,
"current_file": "",
"indexed_count": 0,
"total_files": 0,
"progress": 0,
}
# Lock for thread-safe operations
progress_lock = threading.Lock()
import json
import os
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel from pydantic import BaseModel
import logging import logging
import argparse import argparse
import json
import time import time
import re import re
from typing import List, Dict, Any, Optional, Union from typing import List, Dict, Any, Optional, Union
@@ -16,7 +35,6 @@ from pathlib import Path
import shutil import shutil
import aiofiles import aiofiles
from ascii_colors import trace_exception, ASCIIColors from ascii_colors import trace_exception, ASCIIColors
import os
import sys import sys
import configparser import configparser
@@ -538,7 +556,7 @@ class DocumentManager:
# Create input directory if it doesn't exist # Create input directory if it doesn't exist
self.input_dir.mkdir(parents=True, exist_ok=True) self.input_dir.mkdir(parents=True, exist_ok=True)
def scan_directory(self) -> List[Path]: def scan_directory_for_new_files(self) -> List[Path]:
"""Scan input directory for new files""" """Scan input directory for new files"""
new_files = [] new_files = []
for ext in self.supported_extensions: for ext in self.supported_extensions:
@@ -547,6 +565,14 @@ class DocumentManager:
new_files.append(file_path) new_files.append(file_path)
return new_files return new_files
def scan_directory(self) -> List[Path]:
"""Scan input directory for new files"""
new_files = []
for ext in self.supported_extensions:
for file_path in self.input_dir.rglob(f"*{ext}"):
new_files.append(file_path)
return new_files
def mark_as_indexed(self, file_path: Path): def mark_as_indexed(self, file_path: Path):
"""Mark a file as indexed""" """Mark a file as indexed"""
self.indexed_files.add(file_path) self.indexed_files.add(file_path)
@@ -730,7 +756,7 @@ def create_app(args):
# Startup logic # Startup logic
if args.auto_scan_at_startup: if args.auto_scan_at_startup:
try: try:
new_files = doc_manager.scan_directory() new_files = doc_manager.scan_directory_for_new_files()
for file_path in new_files: for file_path in new_files:
try: try:
await index_file(file_path) await index_file(file_path)
@@ -983,42 +1009,59 @@ def create_app(args):
logging.warning(f"No content extracted from file: {file_path}") logging.warning(f"No content extracted from file: {file_path}")
@app.post("/documents/scan", dependencies=[Depends(optional_api_key)]) @app.post("/documents/scan", dependencies=[Depends(optional_api_key)])
async def scan_for_new_documents(): async def scan_for_new_documents(background_tasks: BackgroundTasks):
""" """Trigger the scanning process"""
Manually trigger scanning for new documents in the directory managed by `doc_manager`. global scan_progress
This endpoint facilitates manual initiation of a document scan to identify and index new files. with progress_lock:
It processes all newly detected files, attempts indexing each file, logs any errors that occur, if scan_progress["is_scanning"]:
and returns a summary of the operation. return {"status": "already_scanning"}
Returns: scan_progress["is_scanning"] = True
dict: A dictionary containing: scan_progress["indexed_count"] = 0
- "status" (str): Indicates success or failure of the scanning process. scan_progress["progress"] = 0
- "indexed_count" (int): The number of successfully indexed documents.
- "total_documents" (int): Total number of documents that have been indexed so far. # Start the scanning process in the background
background_tasks.add_task(run_scanning_process)
return {"status": "scanning_started"}
async def run_scanning_process():
"""Background task to scan and index documents"""
global scan_progress
Raises:
HTTPException: If an error occurs during the document scanning process, a 500 status
code is returned with details about the exception.
"""
try: try:
new_files = doc_manager.scan_directory() new_files = doc_manager.scan_directory_for_new_files()
indexed_count = 0 scan_progress["total_files"] = len(new_files)
for file_path in new_files: for file_path in new_files:
try: try:
with progress_lock:
scan_progress["current_file"] = os.path.basename(file_path)
await index_file(file_path) await index_file(file_path)
indexed_count += 1
with progress_lock:
scan_progress["indexed_count"] += 1
scan_progress["progress"] = (
scan_progress["indexed_count"]
/ scan_progress["total_files"]
) * 100
except Exception as e: except Exception as e:
logging.error(f"Error indexing file {file_path}: {str(e)}") logging.error(f"Error indexing file {file_path}: {str(e)}")
return {
"status": "success",
"indexed_count": indexed_count,
"total_documents": len(doc_manager.indexed_files),
}
except Exception as e: except Exception as e:
raise HTTPException(status_code=500, detail=str(e)) logging.error(f"Error during scanning process: {str(e)}")
finally:
with progress_lock:
scan_progress["is_scanning"] = False
@app.get("/documents/scan-progress")
async def get_scan_progress():
"""Get the current scanning progress"""
with progress_lock:
return scan_progress
@app.post("/documents/upload", dependencies=[Depends(optional_api_key)]) @app.post("/documents/upload", dependencies=[Depends(optional_api_key)])
async def upload_to_input_dir(file: UploadFile = File(...)): async def upload_to_input_dir(file: UploadFile = File(...)):
@@ -1849,7 +1892,7 @@ def create_app(args):
"status": "healthy", "status": "healthy",
"working_directory": str(args.working_dir), "working_directory": str(args.working_dir),
"input_directory": str(args.input_dir), "input_directory": str(args.input_dir),
"indexed_files": files, "indexed_files": [str(f) for f in files],
"indexed_files_count": len(files), "indexed_files_count": len(files),
"configuration": { "configuration": {
# LLM configuration binding/host address (if applicable)/model (if applicable) # LLM configuration binding/host address (if applicable)/model (if applicable)

View File

@@ -98,7 +98,7 @@
</div> </div>
</div> </div>
<script src="/js/lightrag_api.js"></script> <script src="/js/api.js"></script>
</body> </body>
</html> </html>

View File

@@ -17,11 +17,12 @@ const showToast = (message, duration = 3000) => {
const fetchWithAuth = async (url, options = {}) => { const fetchWithAuth = async (url, options = {}) => {
const headers = { const headers = {
...(options.headers || {}), ...(options.headers || {}),
...(state.apiKey ? { 'Authorization': `Bearer ${state.apiKey}` } : {}) ...(state.apiKey ? { 'X-API-Key': state.apiKey } : {}) // Use X-API-Key instead of Bearer
}; };
return fetch(url, { ...options, headers }); return fetch(url, { ...options, headers });
}; };
// Page renderers // Page renderers
const pages = { const pages = {
'file-manager': () => ` 'file-manager': () => `
@@ -49,21 +50,23 @@ const pages = {
</div> </div>
<p class="text-sm text-gray-600 mt-2"><span id="uploadStatus">0</span> files processed</p> <p class="text-sm text-gray-600 mt-2"><span id="uploadStatus">0</span> files processed</p>
</div> </div>
<div class="flex items-center space-x-4 bg-gray-100 p-4 rounded-lg shadow-md">
<button id="rescanBtn" class="flex items-center bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20" height="20" fill="currentColor" class="mr-2">
<path d="M12 4a8 8 0 1 1-8 8H2.5a9.5 9.5 0 1 0 2.8-6.7L2 3v6h6L5.7 6.7A7.96 7.96 0 0 1 12 4z"/>
</svg>
Rescan Files
</button>
<button id="uploadBtn" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors"> <button id="uploadBtn" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
Upload & Index Files Upload & Index Files
</button> </button>
</div>
<div id="indexedFiles" class="space-y-2"> <div id="indexedFiles" class="space-y-2">
<h3 class="text-lg font-semibold text-gray-700">Indexed Files</h3> <h3 class="text-lg font-semibold text-gray-700">Indexed Files</h3>
<div class="space-y-2"></div> <div class="space-y-2"></div>
</div> </div>
<button id="rescanBtn" class="flex items-center bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20" height="20" fill="currentColor" class="mr-2">
<path d="M12 4a8 8 0 1 1-8 8H2.5a9.5 9.5 0 1 0 2.8-6.7L2 3v6h6L5.7 6.7A7.96 7.96 0 0 1 12 4z"/>
</svg>
Rescan Files
</button>
</div> </div>
@@ -232,22 +235,52 @@ const handlers = {
} }
progress.classList.add('hidden'); progress.classList.add('hidden');
}); });
rescanBtn.addEventListener('click', async () => { rescanBtn.addEventListener('click', async () => {
let apiKey = localStorage.getItem('apiKey') || '';
const progress = document.getElementById('uploadProgress'); const progress = document.getElementById('uploadProgress');
const progressBar = progress.querySelector('div'); const progressBar = progress.querySelector('div');
const statusText = document.getElementById('uploadStatus'); const statusText = document.getElementById('uploadStatus');
progress.classList.remove('hidden'); progress.classList.remove('hidden');
try { try {
const scan_output = await fetch('/documents/scan', { // Start the scanning process
method: 'GET', const scanResponse = await fetch('/documents/scan', {
method: 'POST',
}); });
statusText.textContent = scan_output.data;
if (!scanResponse.ok) {
throw new Error('Scan failed to start');
}
// Start polling for progress
const pollInterval = setInterval(async () => {
const progressResponse = await fetch('/documents/scan-progress');
const progressData = await progressResponse.json();
// Update progress bar
progressBar.style.width = `${progressData.progress}%`;
// Update status text
if (progressData.total_files > 0) {
statusText.textContent = `Processing ${progressData.current_file} (${progressData.indexed_count}/${progressData.total_files})`;
}
// Check if scanning is complete
if (!progressData.is_scanning) {
clearInterval(pollInterval);
progress.classList.add('hidden');
statusText.textContent = 'Scan complete!';
}
}, 1000); // Poll every second
} catch (error) { } catch (error) {
console.error('Upload error:', error); console.error('Upload error:', error);
progress.classList.add('hidden');
statusText.textContent = 'Error during scanning process';
} }
progress.classList.add('hidden');
}); });
updateIndexedFiles(); updateIndexedFiles();
}, },