Sync modifications from main branch
This commit is contained in:
@@ -117,6 +117,37 @@ class DocumentManager:
|
||||
".docx",
|
||||
".pptx",
|
||||
".xlsx",
|
||||
".rtf", # Rich Text Format
|
||||
".odt", # OpenDocument Text
|
||||
".tex", # LaTeX
|
||||
".epub", # Electronic Publication
|
||||
".html", # HyperText Markup Language
|
||||
".htm", # HyperText Markup Language
|
||||
".csv", # Comma-Separated Values
|
||||
".json", # JavaScript Object Notation
|
||||
".xml", # eXtensible Markup Language
|
||||
".yaml", # YAML Ain't Markup Language
|
||||
".yml", # YAML
|
||||
".log", # Log files
|
||||
".conf", # Configuration files
|
||||
".ini", # Initialization files
|
||||
".properties", # Java properties files
|
||||
".sql", # SQL scripts
|
||||
".bat", # Batch files
|
||||
".sh", # Shell scripts
|
||||
".c", # C source code
|
||||
".cpp", # C++ source code
|
||||
".py", # Python source code
|
||||
".java", # Java source code
|
||||
".js", # JavaScript source code
|
||||
".ts", # TypeScript source code
|
||||
".swift", # Swift source code
|
||||
".go", # Go source code
|
||||
".rb", # Ruby source code
|
||||
".php", # PHP source code
|
||||
".css", # Cascading Style Sheets
|
||||
".scss", # Sassy CSS
|
||||
".less", # LESS CSS
|
||||
),
|
||||
):
|
||||
self.input_dir = Path(input_dir)
|
||||
@@ -170,7 +201,41 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
||||
|
||||
# Process based on file type
|
||||
match ext:
|
||||
case ".txt" | ".md":
|
||||
case (
|
||||
".txt"
|
||||
| ".md"
|
||||
| ".html"
|
||||
| ".htm"
|
||||
| ".tex"
|
||||
| ".json"
|
||||
| ".xml"
|
||||
| ".yaml"
|
||||
| ".yml"
|
||||
| ".rtf"
|
||||
| ".odt"
|
||||
| ".epub"
|
||||
| ".csv"
|
||||
| ".log"
|
||||
| ".conf"
|
||||
| ".ini"
|
||||
| ".properties"
|
||||
| ".sql"
|
||||
| ".bat"
|
||||
| ".sh"
|
||||
| ".c"
|
||||
| ".cpp"
|
||||
| ".py"
|
||||
| ".java"
|
||||
| ".js"
|
||||
| ".ts"
|
||||
| ".swift"
|
||||
| ".go"
|
||||
| ".rb"
|
||||
| ".php"
|
||||
| ".css"
|
||||
| ".scss"
|
||||
| ".less"
|
||||
):
|
||||
content = file.decode("utf-8")
|
||||
case ".pdf":
|
||||
if not pm.is_installed("pypdf2"):
|
||||
|
Reference in New Issue
Block a user