Optimize file handling for DOCX and PPTX processing.

- Removed redundant file content reading.
- Directly passed file to BytesIO.
- Simplified DOCX content extraction.
- Streamlined PPTX slide processing.
- Reduced memory usage in file handling.
This commit is contained in:
yangdx
2025-02-18 20:13:09 +08:00
parent 780d0b45f7
commit 9c45824e78

View File

@@ -1343,8 +1343,7 @@ def create_app(args):
from docx import Document from docx import Document
from io import BytesIO from io import BytesIO
docx_content = await file.read() docx_file = BytesIO(file)
docx_file = BytesIO(docx_content)
doc = Document(docx_file) doc = Document(docx_file)
content = "\n".join( content = "\n".join(
[paragraph.text for paragraph in doc.paragraphs] [paragraph.text for paragraph in doc.paragraphs]
@@ -1355,8 +1354,7 @@ def create_app(args):
from pptx import Presentation # type: ignore from pptx import Presentation # type: ignore
from io import BytesIO from io import BytesIO
pptx_content = await file.read() pptx_file = BytesIO(file)
pptx_file = BytesIO(pptx_content)
prs = Presentation(pptx_file) prs = Presentation(pptx_file)
for slide in prs.slides: for slide in prs.slides:
for shape in slide.shapes: for shape in slide.shapes: