Upgraded document loading engine

This commit is contained in:
Saifeddine ALOUI
2025-03-06 01:11:48 +01:00
parent bf1557fc2c
commit 00f3c6c6dd
2 changed files with 5 additions and 5 deletions

View File

@@ -237,7 +237,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
)
return False
case ".pdf":
if global_args["main_args"].document_loading_tool == "DOCLING":
if global_args["main_args"].document_loading_engine == "DOCLING":
if not pm.is_installed("docling"): # type: ignore
pm.install("docling")
from docling.document_converter import DocumentConverter
@@ -256,7 +256,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
for page in reader.pages:
content += page.extract_text() + "\n"
case ".docx":
if global_args["main_args"].document_loading_tool == "DOCLING":
if global_args["main_args"].document_loading_engine == "DOCLING":
if not pm.is_installed("docling"): # type: ignore
pm.install("docling")
from docling.document_converter import DocumentConverter
@@ -276,7 +276,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
[paragraph.text for paragraph in doc.paragraphs]
)
case ".pptx":
if global_args["main_args"].document_loading_tool == "DOCLING":
if global_args["main_args"].document_loading_engine == "DOCLING":
if not pm.is_installed("docling"): # type: ignore
pm.install("docling")
from docling.document_converter import DocumentConverter
@@ -297,7 +297,7 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
if hasattr(shape, "text"):
content += shape.text + "\n"
case ".xlsx":
if global_args["main_args"].document_loading_tool == "DOCLING":
if global_args["main_args"].document_loading_engine == "DOCLING":
if not pm.is_installed("docling"): # type: ignore
pm.install("docling")
from docling.document_converter import DocumentConverter

View File

@@ -344,7 +344,7 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int)
# Select Document loading tool
args.document_loading_tool = get_env_value("DOCUMENT_LOADING_TOOL", "DOCLING")
args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DOCLING")
ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name