Merge pull request #766 from danielaskdd/background-scan-at-startup
Move document scanning trigger by command line to background task
This commit is contained in:
@@ -6,7 +6,7 @@ from fastapi import (
|
|||||||
Form,
|
Form,
|
||||||
BackgroundTasks,
|
BackgroundTasks,
|
||||||
)
|
)
|
||||||
|
import asyncio
|
||||||
import threading
|
import threading
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
@@ -730,6 +730,8 @@ def create_app(args):
|
|||||||
postgres_db = None
|
postgres_db = None
|
||||||
oracle_db = None
|
oracle_db = None
|
||||||
tidb_db = None
|
tidb_db = None
|
||||||
|
# Store background tasks
|
||||||
|
app.state.background_tasks = set()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Check if PostgreSQL is needed
|
# Check if PostgreSQL is needed
|
||||||
@@ -794,20 +796,23 @@ def create_app(args):
|
|||||||
|
|
||||||
# Auto scan documents if enabled
|
# Auto scan documents if enabled
|
||||||
if args.auto_scan_at_startup:
|
if args.auto_scan_at_startup:
|
||||||
try:
|
# Start scanning in background
|
||||||
new_files = doc_manager.scan_directory_for_new_files()
|
with progress_lock:
|
||||||
for file_path in new_files:
|
if not scan_progress["is_scanning"]:
|
||||||
try:
|
scan_progress["is_scanning"] = True
|
||||||
await index_file(file_path)
|
scan_progress["indexed_count"] = 0
|
||||||
except Exception as e:
|
scan_progress["progress"] = 0
|
||||||
trace_exception(e)
|
# Create background task
|
||||||
logging.error(f"Error indexing file {file_path}: {str(e)}")
|
task = asyncio.create_task(run_scanning_process())
|
||||||
|
app.state.background_tasks.add(task)
|
||||||
|
task.add_done_callback(app.state.background_tasks.discard)
|
||||||
ASCIIColors.info(
|
ASCIIColors.info(
|
||||||
f"Indexed {len(new_files)} documents from {args.input_dir}"
|
f"Started background scanning of documents from {args.input_dir}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
ASCIIColors.info(
|
||||||
|
"Skip document scanning(anohter scanning is active)"
|
||||||
)
|
)
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"Error during startup indexing: {str(e)}")
|
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
|
||||||
@@ -1145,9 +1150,15 @@ def create_app(args):
|
|||||||
pm.install("docling")
|
pm.install("docling")
|
||||||
from docling.document_converter import DocumentConverter
|
from docling.document_converter import DocumentConverter
|
||||||
|
|
||||||
|
async def convert_doc():
|
||||||
|
def sync_convert():
|
||||||
converter = DocumentConverter()
|
converter = DocumentConverter()
|
||||||
result = converter.convert(file_path)
|
result = converter.convert(file_path)
|
||||||
content = result.document.export_to_markdown()
|
return result.document.export_to_markdown()
|
||||||
|
|
||||||
|
return await asyncio.to_thread(sync_convert)
|
||||||
|
|
||||||
|
content = await convert_doc()
|
||||||
|
|
||||||
case _:
|
case _:
|
||||||
raise ValueError(f"Unsupported file format: {ext}")
|
raise ValueError(f"Unsupported file format: {ext}")
|
||||||
@@ -1439,9 +1450,16 @@ def create_app(args):
|
|||||||
f.write(await file.read())
|
f.write(await file.read())
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
|
async def convert_doc():
|
||||||
|
def sync_convert():
|
||||||
converter = DocumentConverter()
|
converter = DocumentConverter()
|
||||||
result = converter.convert(str(temp_path))
|
result = converter.convert(str(temp_path))
|
||||||
content = result.document.export_to_markdown()
|
return result.document.export_to_markdown()
|
||||||
|
|
||||||
|
return await asyncio.to_thread(sync_convert)
|
||||||
|
|
||||||
|
content = await convert_doc()
|
||||||
finally:
|
finally:
|
||||||
# Clean up the temporary file
|
# Clean up the temporary file
|
||||||
temp_path.unlink()
|
temp_path.unlink()
|
||||||
|
Reference in New Issue
Block a user