Update document_routes.py
This commit is contained in:
@@ -16,7 +16,7 @@ from pydantic import BaseModel, Field, field_validator
|
|||||||
|
|
||||||
from lightrag import LightRAG
|
from lightrag import LightRAG
|
||||||
from lightrag.base import DocProcessingStatus, DocStatus
|
from lightrag.base import DocProcessingStatus, DocStatus
|
||||||
from ..utils_api import get_api_key_dependency
|
from lightrag.api.utils_api import get_api_key_dependency, global_args
|
||||||
|
|
||||||
|
|
||||||
router = APIRouter(prefix="/documents", tags=["documents"])
|
router = APIRouter(prefix="/documents", tags=["documents"])
|
||||||
@@ -237,6 +237,14 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
|||||||
)
|
)
|
||||||
return False
|
return False
|
||||||
case ".pdf":
|
case ".pdf":
|
||||||
|
if global_args["main_args"].document_loading_tool=="DOCLING":
|
||||||
|
if not pm.is_installed("docling"): # type: ignore
|
||||||
|
pm.install("docling")
|
||||||
|
from docling.document_converter import DocumentConverter
|
||||||
|
converter = DocumentConverter()
|
||||||
|
result = converter.convert(file_path)
|
||||||
|
content = result.document.export_to_markdown()
|
||||||
|
else:
|
||||||
if not pm.is_installed("pypdf2"): # type: ignore
|
if not pm.is_installed("pypdf2"): # type: ignore
|
||||||
pm.install("pypdf2")
|
pm.install("pypdf2")
|
||||||
from PyPDF2 import PdfReader # type: ignore
|
from PyPDF2 import PdfReader # type: ignore
|
||||||
@@ -247,6 +255,14 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
|||||||
for page in reader.pages:
|
for page in reader.pages:
|
||||||
content += page.extract_text() + "\n"
|
content += page.extract_text() + "\n"
|
||||||
case ".docx":
|
case ".docx":
|
||||||
|
if global_args["main_args"].document_loading_tool=="DOCLING":
|
||||||
|
if not pm.is_installed("docling"): # type: ignore
|
||||||
|
pm.install("docling")
|
||||||
|
from docling.document_converter import DocumentConverter
|
||||||
|
converter = DocumentConverter()
|
||||||
|
result = converter.convert(file_path)
|
||||||
|
content = result.document.export_to_markdown()
|
||||||
|
else:
|
||||||
if not pm.is_installed("python-docx"): # type: ignore
|
if not pm.is_installed("python-docx"): # type: ignore
|
||||||
pm.install("docx")
|
pm.install("docx")
|
||||||
from docx import Document # type: ignore
|
from docx import Document # type: ignore
|
||||||
@@ -256,6 +272,14 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
|||||||
doc = Document(docx_file)
|
doc = Document(docx_file)
|
||||||
content = "\n".join([paragraph.text for paragraph in doc.paragraphs])
|
content = "\n".join([paragraph.text for paragraph in doc.paragraphs])
|
||||||
case ".pptx":
|
case ".pptx":
|
||||||
|
if global_args["main_args"].document_loading_tool=="DOCLING":
|
||||||
|
if not pm.is_installed("docling"): # type: ignore
|
||||||
|
pm.install("docling")
|
||||||
|
from docling.document_converter import DocumentConverter
|
||||||
|
converter = DocumentConverter()
|
||||||
|
result = converter.convert(file_path)
|
||||||
|
content = result.document.export_to_markdown()
|
||||||
|
else:
|
||||||
if not pm.is_installed("python-pptx"): # type: ignore
|
if not pm.is_installed("python-pptx"): # type: ignore
|
||||||
pm.install("pptx")
|
pm.install("pptx")
|
||||||
from pptx import Presentation # type: ignore
|
from pptx import Presentation # type: ignore
|
||||||
@@ -268,6 +292,14 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
|||||||
if hasattr(shape, "text"):
|
if hasattr(shape, "text"):
|
||||||
content += shape.text + "\n"
|
content += shape.text + "\n"
|
||||||
case ".xlsx":
|
case ".xlsx":
|
||||||
|
if global_args["main_args"].document_loading_tool=="DOCLING":
|
||||||
|
if not pm.is_installed("docling"): # type: ignore
|
||||||
|
pm.install("docling")
|
||||||
|
from docling.document_converter import DocumentConverter
|
||||||
|
converter = DocumentConverter()
|
||||||
|
result = converter.convert(file_path)
|
||||||
|
content = result.document.export_to_markdown()
|
||||||
|
else:
|
||||||
if not pm.is_installed("openpyxl"): # type: ignore
|
if not pm.is_installed("openpyxl"): # type: ignore
|
||||||
pm.install("openpyxl")
|
pm.install("openpyxl")
|
||||||
from openpyxl import load_workbook # type: ignore
|
from openpyxl import load_workbook # type: ignore
|
||||||
|
Reference in New Issue
Block a user