fixed linting

This commit is contained in:
Saifeddine ALOUI
2025-01-14 23:11:23 +01:00
parent c3aba5423f
commit 29661c92da
2 changed files with 50 additions and 46 deletions

View File

@@ -175,7 +175,11 @@ def parse_args():
class DocumentManager:
"""Handles document operations and tracking"""
def __init__(self, input_dir: str, supported_extensions: tuple = (".txt", ".md", ".pdf", ".docx", ".pptx")):
def __init__(
self,
input_dir: str,
supported_extensions: tuple = (".txt", ".md", ".pdf", ".docx", ".pptx"),
):
self.input_dir = Path(input_dir)
self.supported_extensions = supported_extensions
self.indexed_files = set()
@@ -357,8 +361,6 @@ def create_app(args):
),
)
async def index_file(file_path: Union[str, Path]) -> None:
"""Index all files inside the folder with support for multiple file formats
@@ -371,8 +373,6 @@ def create_app(args):
"""
if not pm.is_installed("aiofiles"):
pm.install("aiofiles")
import aiofiles
# Convert to Path object if string
file_path = Path(file_path)
@@ -395,6 +395,7 @@ def create_app(args):
if not pm.is_installed("pypdf2"):
pm.install("pypdf2")
from pypdf2 import PdfReader
# PDF handling
reader = PdfReader(str(file_path))
content = ""
@@ -414,6 +415,7 @@ def create_app(args):
if not pm.is_installed("pptx"):
pm.install("pptx")
from pptx import Presentation
# PowerPoint handling
prs = Presentation(file_path)
content = ""
@@ -433,9 +435,6 @@ def create_app(args):
else:
logging.warning(f"No content extracted from file: {file_path}")
@app.on_event("startup")
async def startup_event():
"""Index all files in input directory during startup"""
@@ -559,6 +558,7 @@ def create_app(args):
)
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.post(
"/documents/file",
response_model=InsertResponse,
@@ -612,7 +612,9 @@ def create_app(args):
docx_content = await file.read()
docx_file = BytesIO(docx_content)
doc = Document(docx_file)
content = "\n".join([paragraph.text for paragraph in doc.paragraphs])
content = "\n".join(
[paragraph.text for paragraph in doc.paragraphs]
)
case ".pptx":
if not pm.is_installed("pptx"):
@@ -661,6 +663,7 @@ def create_app(args):
except Exception as e:
logging.error(f"Error processing file {file.filename}: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.post(
"/documents/batch",
response_model=InsertResponse,
@@ -713,7 +716,9 @@ def create_app(args):
docx_content = await file.read()
docx_file = BytesIO(docx_content)
doc = Document(docx_file)
content = "\n".join([paragraph.text for paragraph in doc.paragraphs])
content = "\n".join(
[paragraph.text for paragraph in doc.paragraphs]
)
case ".pptx":
if not pm.is_installed("pptx"):
@@ -771,7 +776,6 @@ def create_app(args):
logging.error(f"Batch processing error: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@app.delete(
"/documents",
response_model=InsertResponse,

View File

@@ -7,6 +7,7 @@ nest_asyncio
numpy
ollama
openai
pipmaster
python-dotenv
python-multipart
tenacity
@@ -15,4 +16,3 @@ torch
tqdm
transformers
uvicorn
pipmaster