From 38b62d58ad3e37bb2725f7dcd73d2ed5da5f25ae Mon Sep 17 00:00:00 2001 From: Saifeddine ALOUI Date: Thu, 20 Feb 2025 10:59:07 +0100 Subject: [PATCH] Added extra extentions --- lightrag/api/lightrag_server.py | 40 +++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 58931eec..306c9816 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -579,7 +579,39 @@ class DocumentManager: ".docx", ".pptx", ".xlsx", - ), + ".rtf", # Rich Text Format + ".odt", # OpenDocument Text + ".tex", # LaTeX + ".epub", # Electronic Publication + ".html", # HyperText Markup Language + ".htm", # HyperText Markup Language + ".csv", # Comma-Separated Values + ".json", # JavaScript Object Notation + ".xml", # eXtensible Markup Language + ".yaml", # YAML Ain't Markup Language + ".yml", # YAML + ".log", # Log files + ".conf", # Configuration files + ".ini", # Initialization files + ".properties",# Java properties files + ".sql", # SQL scripts + ".bat", # Batch files + ".sh", # Shell scripts + ".c", # C source code + ".cpp", # C++ source code + ".py", # Python source code + ".java", # Java source code + ".js", # JavaScript source code + ".ts", # TypeScript source code + ".swift", # Swift source code + ".go", # Go source code + ".rb", # Ruby source code + ".php", # PHP source code + ".css", # Cascading Style Sheets + ".scss", # Sassy CSS + ".less", # LESS CSS + ) + ): self.input_dir = Path(input_dir) self.supported_extensions = supported_extensions @@ -1148,8 +1180,12 @@ def create_app(args): # Process based on file type match ext: - case ".txt" | ".md": + case ".txt" | ".md" | ".html" | ".htm" | ".tex" | ".json" | ".xml" | ".yaml" | ".yml" | \ + ".rtf" | ".odt" | ".epub" | ".csv" | ".log" | ".conf" | ".ini" | ".properties" | \ + ".sql" | ".bat" | ".sh" | ".c" | ".cpp" | ".py" | ".java" | ".js" | ".ts" | \ + ".swift" | ".go" | ".rb" | ".php" | ".css" | ".scss" | ".less": content = file.decode("utf-8") + case ".pdf": if not pm.is_installed("pypdf2"): pm.install("pypdf2")