Optimize file handling for DOCX and PPTX processing.

- Removed redundant file content reading. - Directly passed file to BytesIO. - Simplified DOCX content extraction. - Streamlined PPTX slide processing. - Reduced memory usage in file handling.
2025-02-18 20:13:09 +08:00
parent 780d0b45f7
commit 9c45824e78
1 changed files with 2 additions and 4 deletions
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -1343,8 +1343,7 @@ def create_app(args):
                    from docx import Document
                    from io import BytesIO
-                    docx_content = await file.read()
+                    docx_file = BytesIO(file)
                    docx_file = BytesIO(docx_content)
                    doc = Document(docx_file)
                    content = "\n".join(
                        [paragraph.text for paragraph in doc.paragraphs]
@@ -1355,8 +1354,7 @@ def create_app(args):
                    from pptx import Presentation  # type: ignore
                    from io import BytesIO
-                    pptx_content = await file.read()
+                    pptx_file = BytesIO(file)
                    pptx_file = BytesIO(pptx_content)
                    prs = Presentation(pptx_file)
                    for slide in prs.slides:
                        for shape in slide.shapes: