From 9c45824e78754e06f392053aac2460b1988787a1 Mon Sep 17 00:00:00 2001 From: yangdx Date: Tue, 18 Feb 2025 20:13:09 +0800 Subject: [PATCH] Optimize file handling for DOCX and PPTX processing. - Removed redundant file content reading. - Directly passed file to BytesIO. - Simplified DOCX content extraction. - Streamlined PPTX slide processing. - Reduced memory usage in file handling. --- lightrag/api/lightrag_server.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index fba81086..f9420153 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -1343,8 +1343,7 @@ def create_app(args): from docx import Document from io import BytesIO - docx_content = await file.read() - docx_file = BytesIO(docx_content) + docx_file = BytesIO(file) doc = Document(docx_file) content = "\n".join( [paragraph.text for paragraph in doc.paragraphs] @@ -1355,8 +1354,7 @@ def create_app(args): from pptx import Presentation # type: ignore from io import BytesIO - pptx_content = await file.read() - pptx_file = BytesIO(pptx_content) + pptx_file = BytesIO(file) prs = Presentation(pptx_file) for slide in prs.slides: for shape in slide.shapes: