From 6e4daea056940b17f6773c59e492bd8a5eb5d308 Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Wed, 5 Mar 2025 15:36:47 +0100 Subject: [PATCH] Linting --- lightrag/api/routers/document_routes.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/lightrag/api/routers/document_routes.py b/lightrag/api/routers/document_routes.py index 9d161f6c..a6830389 100644 --- a/lightrag/api/routers/document_routes.py +++ b/lightrag/api/routers/document_routes.py @@ -237,10 +237,11 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool: ) return False case ".pdf": - if global_args["main_args"].document_loading_tool=="DOCLING": + if global_args["main_args"].document_loading_tool == "DOCLING": if not pm.is_installed("docling"): # type: ignore pm.install("docling") from docling.document_converter import DocumentConverter + converter = DocumentConverter() result = converter.convert(file_path) content = result.document.export_to_markdown() @@ -255,10 +256,11 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool: for page in reader.pages: content += page.extract_text() + "\n" case ".docx": - if global_args["main_args"].document_loading_tool=="DOCLING": + if global_args["main_args"].document_loading_tool == "DOCLING": if not pm.is_installed("docling"): # type: ignore pm.install("docling") from docling.document_converter import DocumentConverter + converter = DocumentConverter() result = converter.convert(file_path) content = result.document.export_to_markdown() @@ -270,12 +272,15 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool: docx_file = BytesIO(file) doc = Document(docx_file) - content = "\n".join([paragraph.text for paragraph in doc.paragraphs]) + content = "\n".join( + [paragraph.text for paragraph in doc.paragraphs] + ) case ".pptx": - if global_args["main_args"].document_loading_tool=="DOCLING": + if global_args["main_args"].document_loading_tool == "DOCLING": if not pm.is_installed("docling"): # type: ignore pm.install("docling") from docling.document_converter import DocumentConverter + converter = DocumentConverter() result = converter.convert(file_path) content = result.document.export_to_markdown() @@ -292,10 +297,11 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool: if hasattr(shape, "text"): content += shape.text + "\n" case ".xlsx": - if global_args["main_args"].document_loading_tool=="DOCLING": + if global_args["main_args"].document_loading_tool == "DOCLING": if not pm.is_installed("docling"): # type: ignore pm.install("docling") from docling.document_converter import DocumentConverter + converter = DocumentConverter() result = converter.convert(file_path) content = result.document.export_to_markdown() @@ -312,7 +318,8 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool: for row in sheet.iter_rows(values_only=True): content += ( "\t".join( - str(cell) if cell is not None else "" for cell in row + str(cell) if cell is not None else "" + for cell in row ) + "\n" )