Sync modifications from main branch
This commit is contained in:
@@ -117,6 +117,37 @@ class DocumentManager:
|
|||||||
".docx",
|
".docx",
|
||||||
".pptx",
|
".pptx",
|
||||||
".xlsx",
|
".xlsx",
|
||||||
|
".rtf", # Rich Text Format
|
||||||
|
".odt", # OpenDocument Text
|
||||||
|
".tex", # LaTeX
|
||||||
|
".epub", # Electronic Publication
|
||||||
|
".html", # HyperText Markup Language
|
||||||
|
".htm", # HyperText Markup Language
|
||||||
|
".csv", # Comma-Separated Values
|
||||||
|
".json", # JavaScript Object Notation
|
||||||
|
".xml", # eXtensible Markup Language
|
||||||
|
".yaml", # YAML Ain't Markup Language
|
||||||
|
".yml", # YAML
|
||||||
|
".log", # Log files
|
||||||
|
".conf", # Configuration files
|
||||||
|
".ini", # Initialization files
|
||||||
|
".properties", # Java properties files
|
||||||
|
".sql", # SQL scripts
|
||||||
|
".bat", # Batch files
|
||||||
|
".sh", # Shell scripts
|
||||||
|
".c", # C source code
|
||||||
|
".cpp", # C++ source code
|
||||||
|
".py", # Python source code
|
||||||
|
".java", # Java source code
|
||||||
|
".js", # JavaScript source code
|
||||||
|
".ts", # TypeScript source code
|
||||||
|
".swift", # Swift source code
|
||||||
|
".go", # Go source code
|
||||||
|
".rb", # Ruby source code
|
||||||
|
".php", # PHP source code
|
||||||
|
".css", # Cascading Style Sheets
|
||||||
|
".scss", # Sassy CSS
|
||||||
|
".less", # LESS CSS
|
||||||
),
|
),
|
||||||
):
|
):
|
||||||
self.input_dir = Path(input_dir)
|
self.input_dir = Path(input_dir)
|
||||||
@@ -170,7 +201,41 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
|||||||
|
|
||||||
# Process based on file type
|
# Process based on file type
|
||||||
match ext:
|
match ext:
|
||||||
case ".txt" | ".md":
|
case (
|
||||||
|
".txt"
|
||||||
|
| ".md"
|
||||||
|
| ".html"
|
||||||
|
| ".htm"
|
||||||
|
| ".tex"
|
||||||
|
| ".json"
|
||||||
|
| ".xml"
|
||||||
|
| ".yaml"
|
||||||
|
| ".yml"
|
||||||
|
| ".rtf"
|
||||||
|
| ".odt"
|
||||||
|
| ".epub"
|
||||||
|
| ".csv"
|
||||||
|
| ".log"
|
||||||
|
| ".conf"
|
||||||
|
| ".ini"
|
||||||
|
| ".properties"
|
||||||
|
| ".sql"
|
||||||
|
| ".bat"
|
||||||
|
| ".sh"
|
||||||
|
| ".c"
|
||||||
|
| ".cpp"
|
||||||
|
| ".py"
|
||||||
|
| ".java"
|
||||||
|
| ".js"
|
||||||
|
| ".ts"
|
||||||
|
| ".swift"
|
||||||
|
| ".go"
|
||||||
|
| ".rb"
|
||||||
|
| ".php"
|
||||||
|
| ".css"
|
||||||
|
| ".scss"
|
||||||
|
| ".less"
|
||||||
|
):
|
||||||
content = file.decode("utf-8")
|
content = file.decode("utf-8")
|
||||||
case ".pdf":
|
case ".pdf":
|
||||||
if not pm.is_installed("pypdf2"):
|
if not pm.is_installed("pypdf2"):
|
||||||
|
Reference in New Issue
Block a user