Optimize: Use python-docx for better parsing.

This commit is contained in:
cuikunyu
2025-04-11 03:10:20 +00:00
parent 9487eca772
commit 135a40d696

View File

@@ -499,6 +499,9 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
content = result.document.export_to_markdown() content = result.document.export_to_markdown()
else: else:
if not pm.is_installed("python-docx"): # type: ignore if not pm.is_installed("python-docx"): # type: ignore
try:
pm.install("python-docx")
except Exception:
pm.install("docx") pm.install("docx")
from docx import Document # type: ignore from docx import Document # type: ignore
from io import BytesIO from io import BytesIO