@@ -1,9 +1,28 @@
|
|||||||
from fastapi import FastAPI, HTTPException, File, UploadFile, Form, Request
|
from fastapi import FastAPI, HTTPException, File, UploadFile, Form, Request, BackgroundTasks
|
||||||
|
# Backend (Python)
|
||||||
|
# Add this to store progress globally
|
||||||
|
from typing import Dict
|
||||||
|
import threading
|
||||||
|
|
||||||
|
# Global progress tracker
|
||||||
|
scan_progress: Dict = {
|
||||||
|
"is_scanning": False,
|
||||||
|
"current_file": "",
|
||||||
|
"indexed_count": 0,
|
||||||
|
"total_files": 0,
|
||||||
|
"progress": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Lock for thread-safe operations
|
||||||
|
progress_lock = threading.Lock()
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
import logging
|
import logging
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
from typing import List, Dict, Any, Optional, Union
|
from typing import List, Dict, Any, Optional, Union
|
||||||
@@ -16,7 +35,6 @@ from pathlib import Path
|
|||||||
import shutil
|
import shutil
|
||||||
import aiofiles
|
import aiofiles
|
||||||
from ascii_colors import trace_exception, ASCIIColors
|
from ascii_colors import trace_exception, ASCIIColors
|
||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
import configparser
|
import configparser
|
||||||
|
|
||||||
@@ -538,7 +556,7 @@ class DocumentManager:
|
|||||||
# Create input directory if it doesn't exist
|
# Create input directory if it doesn't exist
|
||||||
self.input_dir.mkdir(parents=True, exist_ok=True)
|
self.input_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
def scan_directory(self) -> List[Path]:
|
def scan_directory_for_new_files(self) -> List[Path]:
|
||||||
"""Scan input directory for new files"""
|
"""Scan input directory for new files"""
|
||||||
new_files = []
|
new_files = []
|
||||||
for ext in self.supported_extensions:
|
for ext in self.supported_extensions:
|
||||||
@@ -547,6 +565,14 @@ class DocumentManager:
|
|||||||
new_files.append(file_path)
|
new_files.append(file_path)
|
||||||
return new_files
|
return new_files
|
||||||
|
|
||||||
|
def scan_directory(self) -> List[Path]:
|
||||||
|
"""Scan input directory for new files"""
|
||||||
|
new_files = []
|
||||||
|
for ext in self.supported_extensions:
|
||||||
|
for file_path in self.input_dir.rglob(f"*{ext}"):
|
||||||
|
new_files.append(file_path)
|
||||||
|
return new_files
|
||||||
|
|
||||||
def mark_as_indexed(self, file_path: Path):
|
def mark_as_indexed(self, file_path: Path):
|
||||||
"""Mark a file as indexed"""
|
"""Mark a file as indexed"""
|
||||||
self.indexed_files.add(file_path)
|
self.indexed_files.add(file_path)
|
||||||
@@ -730,7 +756,7 @@ def create_app(args):
|
|||||||
# Startup logic
|
# Startup logic
|
||||||
if args.auto_scan_at_startup:
|
if args.auto_scan_at_startup:
|
||||||
try:
|
try:
|
||||||
new_files = doc_manager.scan_directory()
|
new_files = doc_manager.scan_directory_for_new_files()
|
||||||
for file_path in new_files:
|
for file_path in new_files:
|
||||||
try:
|
try:
|
||||||
await index_file(file_path)
|
await index_file(file_path)
|
||||||
@@ -983,42 +1009,59 @@ def create_app(args):
|
|||||||
logging.warning(f"No content extracted from file: {file_path}")
|
logging.warning(f"No content extracted from file: {file_path}")
|
||||||
|
|
||||||
@app.post("/documents/scan", dependencies=[Depends(optional_api_key)])
|
@app.post("/documents/scan", dependencies=[Depends(optional_api_key)])
|
||||||
async def scan_for_new_documents():
|
async def scan_for_new_documents(background_tasks: BackgroundTasks):
|
||||||
"""
|
"""Trigger the scanning process"""
|
||||||
Manually trigger scanning for new documents in the directory managed by `doc_manager`.
|
global scan_progress
|
||||||
|
|
||||||
This endpoint facilitates manual initiation of a document scan to identify and index new files.
|
with progress_lock:
|
||||||
It processes all newly detected files, attempts indexing each file, logs any errors that occur,
|
if scan_progress["is_scanning"]:
|
||||||
and returns a summary of the operation.
|
return {"status": "already_scanning"}
|
||||||
|
|
||||||
Returns:
|
scan_progress["is_scanning"] = True
|
||||||
dict: A dictionary containing:
|
scan_progress["indexed_count"] = 0
|
||||||
- "status" (str): Indicates success or failure of the scanning process.
|
scan_progress["progress"] = 0
|
||||||
- "indexed_count" (int): The number of successfully indexed documents.
|
|
||||||
- "total_documents" (int): Total number of documents that have been indexed so far.
|
# Start the scanning process in the background
|
||||||
|
background_tasks.add_task(run_scanning_process)
|
||||||
|
|
||||||
|
return {"status": "scanning_started"}
|
||||||
|
|
||||||
|
async def run_scanning_process():
|
||||||
|
"""Background task to scan and index documents"""
|
||||||
|
global scan_progress
|
||||||
|
|
||||||
Raises:
|
|
||||||
HTTPException: If an error occurs during the document scanning process, a 500 status
|
|
||||||
code is returned with details about the exception.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
new_files = doc_manager.scan_directory()
|
new_files = doc_manager.scan_directory_for_new_files()
|
||||||
indexed_count = 0
|
scan_progress["total_files"] = len(new_files)
|
||||||
|
|
||||||
for file_path in new_files:
|
for file_path in new_files:
|
||||||
try:
|
try:
|
||||||
|
with progress_lock:
|
||||||
|
scan_progress["current_file"] = os.path.basename(file_path)
|
||||||
|
|
||||||
await index_file(file_path)
|
await index_file(file_path)
|
||||||
indexed_count += 1
|
|
||||||
|
with progress_lock:
|
||||||
|
scan_progress["indexed_count"] += 1
|
||||||
|
scan_progress["progress"] = (
|
||||||
|
scan_progress["indexed_count"]
|
||||||
|
/ scan_progress["total_files"]
|
||||||
|
) * 100
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Error indexing file {file_path}: {str(e)}")
|
logging.error(f"Error indexing file {file_path}: {str(e)}")
|
||||||
|
|
||||||
return {
|
|
||||||
"status": "success",
|
|
||||||
"indexed_count": indexed_count,
|
|
||||||
"total_documents": len(doc_manager.indexed_files),
|
|
||||||
}
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
logging.error(f"Error during scanning process: {str(e)}")
|
||||||
|
finally:
|
||||||
|
with progress_lock:
|
||||||
|
scan_progress["is_scanning"] = False
|
||||||
|
|
||||||
|
@app.get("/documents/scan-progress")
|
||||||
|
async def get_scan_progress():
|
||||||
|
"""Get the current scanning progress"""
|
||||||
|
with progress_lock:
|
||||||
|
return scan_progress
|
||||||
|
|
||||||
@app.post("/documents/upload", dependencies=[Depends(optional_api_key)])
|
@app.post("/documents/upload", dependencies=[Depends(optional_api_key)])
|
||||||
async def upload_to_input_dir(file: UploadFile = File(...)):
|
async def upload_to_input_dir(file: UploadFile = File(...)):
|
||||||
@@ -1849,7 +1892,7 @@ def create_app(args):
|
|||||||
"status": "healthy",
|
"status": "healthy",
|
||||||
"working_directory": str(args.working_dir),
|
"working_directory": str(args.working_dir),
|
||||||
"input_directory": str(args.input_dir),
|
"input_directory": str(args.input_dir),
|
||||||
"indexed_files": files,
|
"indexed_files": [str(f) for f in files],
|
||||||
"indexed_files_count": len(files),
|
"indexed_files_count": len(files),
|
||||||
"configuration": {
|
"configuration": {
|
||||||
# LLM configuration binding/host address (if applicable)/model (if applicable)
|
# LLM configuration binding/host address (if applicable)/model (if applicable)
|
||||||
|
@@ -98,7 +98,7 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script src="/js/lightrag_api.js"></script>
|
<script src="/js/api.js"></script>
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
@@ -17,11 +17,12 @@ const showToast = (message, duration = 3000) => {
|
|||||||
const fetchWithAuth = async (url, options = {}) => {
|
const fetchWithAuth = async (url, options = {}) => {
|
||||||
const headers = {
|
const headers = {
|
||||||
...(options.headers || {}),
|
...(options.headers || {}),
|
||||||
...(state.apiKey ? { 'Authorization': `Bearer ${state.apiKey}` } : {})
|
...(state.apiKey ? { 'X-API-Key': state.apiKey } : {}) // Use X-API-Key instead of Bearer
|
||||||
};
|
};
|
||||||
return fetch(url, { ...options, headers });
|
return fetch(url, { ...options, headers });
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// Page renderers
|
// Page renderers
|
||||||
const pages = {
|
const pages = {
|
||||||
'file-manager': () => `
|
'file-manager': () => `
|
||||||
@@ -49,15 +50,7 @@ const pages = {
|
|||||||
</div>
|
</div>
|
||||||
<p class="text-sm text-gray-600 mt-2"><span id="uploadStatus">0</span> files processed</p>
|
<p class="text-sm text-gray-600 mt-2"><span id="uploadStatus">0</span> files processed</p>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="flex items-center space-x-4 bg-gray-100 p-4 rounded-lg shadow-md">
|
||||||
<button id="uploadBtn" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
|
||||||
Upload & Index Files
|
|
||||||
</button>
|
|
||||||
|
|
||||||
<div id="indexedFiles" class="space-y-2">
|
|
||||||
<h3 class="text-lg font-semibold text-gray-700">Indexed Files</h3>
|
|
||||||
<div class="space-y-2"></div>
|
|
||||||
</div>
|
|
||||||
<button id="rescanBtn" class="flex items-center bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
<button id="rescanBtn" class="flex items-center bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20" height="20" fill="currentColor" class="mr-2">
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20" height="20" fill="currentColor" class="mr-2">
|
||||||
<path d="M12 4a8 8 0 1 1-8 8H2.5a9.5 9.5 0 1 0 2.8-6.7L2 3v6h6L5.7 6.7A7.96 7.96 0 0 1 12 4z"/>
|
<path d="M12 4a8 8 0 1 1-8 8H2.5a9.5 9.5 0 1 0 2.8-6.7L2 3v6h6L5.7 6.7A7.96 7.96 0 0 1 12 4z"/>
|
||||||
@@ -65,6 +58,16 @@ const pages = {
|
|||||||
Rescan Files
|
Rescan Files
|
||||||
</button>
|
</button>
|
||||||
|
|
||||||
|
<button id="uploadBtn" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
||||||
|
Upload & Index Files
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div id="indexedFiles" class="space-y-2">
|
||||||
|
<h3 class="text-lg font-semibold text-gray-700">Indexed Files</h3>
|
||||||
|
<div class="space-y-2"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
`,
|
`,
|
||||||
@@ -232,22 +235,52 @@ const handlers = {
|
|||||||
}
|
}
|
||||||
progress.classList.add('hidden');
|
progress.classList.add('hidden');
|
||||||
});
|
});
|
||||||
|
|
||||||
rescanBtn.addEventListener('click', async () => {
|
rescanBtn.addEventListener('click', async () => {
|
||||||
let apiKey = localStorage.getItem('apiKey') || '';
|
|
||||||
const progress = document.getElementById('uploadProgress');
|
const progress = document.getElementById('uploadProgress');
|
||||||
const progressBar = progress.querySelector('div');
|
const progressBar = progress.querySelector('div');
|
||||||
const statusText = document.getElementById('uploadStatus');
|
const statusText = document.getElementById('uploadStatus');
|
||||||
progress.classList.remove('hidden');
|
progress.classList.remove('hidden');
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const scan_output = await fetch('/documents/scan', {
|
// Start the scanning process
|
||||||
method: 'GET',
|
const scanResponse = await fetch('/documents/scan', {
|
||||||
|
method: 'POST',
|
||||||
});
|
});
|
||||||
statusText.textContent = scan_output.data;
|
|
||||||
|
if (!scanResponse.ok) {
|
||||||
|
throw new Error('Scan failed to start');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start polling for progress
|
||||||
|
const pollInterval = setInterval(async () => {
|
||||||
|
const progressResponse = await fetch('/documents/scan-progress');
|
||||||
|
const progressData = await progressResponse.json();
|
||||||
|
|
||||||
|
// Update progress bar
|
||||||
|
progressBar.style.width = `${progressData.progress}%`;
|
||||||
|
|
||||||
|
// Update status text
|
||||||
|
if (progressData.total_files > 0) {
|
||||||
|
statusText.textContent = `Processing ${progressData.current_file} (${progressData.indexed_count}/${progressData.total_files})`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if scanning is complete
|
||||||
|
if (!progressData.is_scanning) {
|
||||||
|
clearInterval(pollInterval);
|
||||||
|
progress.classList.add('hidden');
|
||||||
|
statusText.textContent = 'Scan complete!';
|
||||||
|
}
|
||||||
|
}, 1000); // Poll every second
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Upload error:', error);
|
console.error('Upload error:', error);
|
||||||
}
|
|
||||||
progress.classList.add('hidden');
|
progress.classList.add('hidden');
|
||||||
|
statusText.textContent = 'Error during scanning process';
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
updateIndexedFiles();
|
updateIndexedFiles();
|
||||||
},
|
},
|
||||||
|
|
Reference in New Issue
Block a user