From cac93424d90fd38019c9dfb9957fa4b170ff8355 Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 18 Feb 2025 20:17:42 +0800 Subject: [PATCH] Added support for reading .xlsx files in LightRAG. - Install openpyxl if not present - Load .xlsx file using openpyxl - Extract sheet titles and content - Format rows with tab-separated values - Append sheet content to overall text --- lightrag/api/lightrag_server.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index f9420153..36403241 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -1360,6 +1360,19 @@ def create_app(args): for shape in slide.shapes: if hasattr(shape, "text"): content += shape.text + "\n" + case ".xlsx": + if not pm.is_installed("openpyxl"): + pm.install("openpyxl") + from openpyxl import load_workbook + from io import BytesIO + + xlsx_file = BytesIO(file) + wb = load_workbook(xlsx_file) + for sheet in wb: + content += f"Sheet: {sheet.title}\n" + for row in sheet.iter_rows(values_only=True): + content += "\t".join(str(cell) if cell is not None else "" for cell in row) + "\n" + content += "\n" case _: logging.error( f"Unsupported file type: {file_path.name} (extension {ext})"