Simplified file loading
This commit is contained in:
@@ -973,33 +973,7 @@ def create_app(args):
|
|||||||
async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
|
async with aiofiles.open(file_path, "r", encoding="utf-8") as f:
|
||||||
content = await f.read()
|
content = await f.read()
|
||||||
|
|
||||||
case ".pdf":
|
case ".pdf" | ".docx" | ".pptx" | ".xlsx":
|
||||||
if not pm.is_installed("docling"):
|
|
||||||
pm.install("docling")
|
|
||||||
from docling.document_converter import DocumentConverter
|
|
||||||
|
|
||||||
converter = DocumentConverter()
|
|
||||||
result = converter.convert(file_path)
|
|
||||||
content = result.document.export_to_markdown()
|
|
||||||
|
|
||||||
case ".docx":
|
|
||||||
if not pm.is_installed("docling"):
|
|
||||||
pm.install("docling")
|
|
||||||
from docling.document_converter import DocumentConverter
|
|
||||||
|
|
||||||
converter = DocumentConverter()
|
|
||||||
result = converter.convert(file_path)
|
|
||||||
content = result.document.export_to_markdown()
|
|
||||||
|
|
||||||
case ".pptx":
|
|
||||||
if not pm.is_installed("docling"):
|
|
||||||
pm.install("docling")
|
|
||||||
from docling.document_converter import DocumentConverter
|
|
||||||
|
|
||||||
converter = DocumentConverter()
|
|
||||||
result = converter.convert(file_path)
|
|
||||||
content = result.document.export_to_markdown()
|
|
||||||
case ".xlsx":
|
|
||||||
if not pm.is_installed("docling"):
|
if not pm.is_installed("docling"):
|
||||||
pm.install("docling")
|
pm.install("docling")
|
||||||
from docling.document_converter import DocumentConverter
|
from docling.document_converter import DocumentConverter
|
||||||
@@ -1284,45 +1258,26 @@ def create_app(args):
|
|||||||
text_content = await file.read()
|
text_content = await file.read()
|
||||||
content = text_content.decode("utf-8")
|
content = text_content.decode("utf-8")
|
||||||
|
|
||||||
case ".pdf":
|
case ".pdf" | ".docx" | ".pptx" | ".xlsx":
|
||||||
if not pm.is_installed("docling"):
|
if not pm.is_installed("docling"):
|
||||||
pm.install("docling")
|
pm.install("docling")
|
||||||
from docling.document_converter import DocumentConverter
|
from docling.document_converter import DocumentConverter
|
||||||
|
|
||||||
converter = DocumentConverter()
|
# Create a temporary file to save the uploaded content
|
||||||
result = converter.convert(file.filename)
|
temp_path = Path("temp") / file.filename
|
||||||
content = result.document.export_to_markdown()
|
temp_path.parent.mkdir(exist_ok=True)
|
||||||
|
|
||||||
case ".docx":
|
# Save the uploaded file
|
||||||
if not pm.is_installed("docling"):
|
with temp_path.open("wb") as f:
|
||||||
pm.install("docling")
|
f.write(await file.read())
|
||||||
from docling.document_converter import DocumentConverter
|
|
||||||
|
|
||||||
converter = DocumentConverter()
|
try:
|
||||||
result = converter.convert(file.filename)
|
converter = DocumentConverter()
|
||||||
content = result.document.export_to_markdown()
|
result = converter.convert(str(temp_path))
|
||||||
|
content = result.document.export_to_markdown()
|
||||||
case ".pptx":
|
finally:
|
||||||
if not pm.is_installed("docling"):
|
# Clean up the temporary file
|
||||||
pm.install("docling")
|
temp_path.unlink()
|
||||||
from docling.document_converter import DocumentConverter
|
|
||||||
|
|
||||||
converter = DocumentConverter()
|
|
||||||
result = converter.convert(file.filename)
|
|
||||||
content = result.document.export_to_markdown()
|
|
||||||
case ".xlsx":
|
|
||||||
if not pm.is_installed("docling"):
|
|
||||||
pm.install("docling")
|
|
||||||
from docling.document_converter import DocumentConverter
|
|
||||||
|
|
||||||
converter = DocumentConverter()
|
|
||||||
result = converter.convert(file.filename)
|
|
||||||
content = result.document.export_to_markdown()
|
|
||||||
case _:
|
|
||||||
raise HTTPException(
|
|
||||||
status_code=400,
|
|
||||||
detail=f"Unsupported file type. Supported types: {doc_manager.supported_extensions}",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Insert content into RAG system
|
# Insert content into RAG system
|
||||||
if content:
|
if content:
|
||||||
|
Reference in New Issue
Block a user