Merge branch 'main' into fix-extract-entity-concurrent-problem
This commit is contained in:
29
README.md
29
README.md
@@ -465,7 +465,36 @@ For production level scenarios you will most likely want to leverage an enterpri
|
|||||||
>
|
>
|
||||||
> You can Compile the AGE from source code and fix it.
|
> You can Compile the AGE from source code and fix it.
|
||||||
|
|
||||||
|
### Using Faiss for Storage
|
||||||
|
- Install the required dependencies:
|
||||||
|
```
|
||||||
|
pip install faiss-cpu
|
||||||
|
```
|
||||||
|
You can also install `faiss-gpu` if you have GPU support.
|
||||||
|
|
||||||
|
- Here we are using `sentence-transformers` but you can also use `OpenAIEmbedding` model with `3072` dimensions.
|
||||||
|
|
||||||
|
```
|
||||||
|
async def embedding_func(texts: list[str]) -> np.ndarray:
|
||||||
|
model = SentenceTransformer('all-MiniLM-L6-v2')
|
||||||
|
embeddings = model.encode(texts, convert_to_numpy=True)
|
||||||
|
return embeddings
|
||||||
|
|
||||||
|
# Initialize LightRAG with the LLM model function and embedding function
|
||||||
|
rag = LightRAG(
|
||||||
|
working_dir=WORKING_DIR,
|
||||||
|
llm_model_func=llm_model_func,
|
||||||
|
embedding_func=EmbeddingFunc(
|
||||||
|
embedding_dim=384,
|
||||||
|
max_token_size=8192,
|
||||||
|
func=embedding_func,
|
||||||
|
),
|
||||||
|
vector_storage="FaissVectorDBStorage",
|
||||||
|
vector_db_storage_cls_kwargs={
|
||||||
|
"cosine_better_than_threshold": 0.3 # Your desired threshold
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
### Insert Custom KG
|
### Insert Custom KG
|
||||||
|
|
||||||
|
99
examples/test_faiss.py
Normal file
99
examples/test_faiss.py
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
import os
|
||||||
|
import logging
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from sentence_transformers import SentenceTransformer
|
||||||
|
|
||||||
|
from openai import AzureOpenAI
|
||||||
|
from lightrag import LightRAG, QueryParam
|
||||||
|
from lightrag.utils import EmbeddingFunc
|
||||||
|
|
||||||
|
# Configure Logging
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
|
# Load environment variables from .env file
|
||||||
|
load_dotenv()
|
||||||
|
AZURE_OPENAI_API_VERSION = os.getenv("AZURE_OPENAI_API_VERSION")
|
||||||
|
AZURE_OPENAI_DEPLOYMENT = os.getenv("AZURE_OPENAI_DEPLOYMENT")
|
||||||
|
AZURE_OPENAI_API_KEY = os.getenv("AZURE_OPENAI_API_KEY")
|
||||||
|
AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
|
||||||
|
|
||||||
|
|
||||||
|
async def llm_model_func(
|
||||||
|
prompt, system_prompt=None, history_messages=[], keyword_extraction=False, **kwargs
|
||||||
|
) -> str:
|
||||||
|
# Create a client for AzureOpenAI
|
||||||
|
client = AzureOpenAI(
|
||||||
|
api_key=AZURE_OPENAI_API_KEY,
|
||||||
|
api_version=AZURE_OPENAI_API_VERSION,
|
||||||
|
azure_endpoint=AZURE_OPENAI_ENDPOINT,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Build the messages list for the conversation
|
||||||
|
messages = []
|
||||||
|
if system_prompt:
|
||||||
|
messages.append({"role": "system", "content": system_prompt})
|
||||||
|
if history_messages:
|
||||||
|
messages.extend(history_messages)
|
||||||
|
messages.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
|
# Call the LLM
|
||||||
|
chat_completion = client.chat.completions.create(
|
||||||
|
model=AZURE_OPENAI_DEPLOYMENT,
|
||||||
|
messages=messages,
|
||||||
|
temperature=kwargs.get("temperature", 0),
|
||||||
|
top_p=kwargs.get("top_p", 1),
|
||||||
|
n=kwargs.get("n", 1),
|
||||||
|
)
|
||||||
|
|
||||||
|
return chat_completion.choices[0].message.content
|
||||||
|
|
||||||
|
|
||||||
|
async def embedding_func(texts: list[str]) -> np.ndarray:
|
||||||
|
model = SentenceTransformer("all-MiniLM-L6-v2")
|
||||||
|
embeddings = model.encode(texts, convert_to_numpy=True)
|
||||||
|
return embeddings
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
WORKING_DIR = "./dickens"
|
||||||
|
|
||||||
|
# Initialize LightRAG with the LLM model function and embedding function
|
||||||
|
rag = LightRAG(
|
||||||
|
working_dir=WORKING_DIR,
|
||||||
|
llm_model_func=llm_model_func,
|
||||||
|
embedding_func=EmbeddingFunc(
|
||||||
|
embedding_dim=384,
|
||||||
|
max_token_size=8192,
|
||||||
|
func=embedding_func,
|
||||||
|
),
|
||||||
|
vector_storage="FaissVectorDBStorage",
|
||||||
|
vector_db_storage_cls_kwargs={
|
||||||
|
"cosine_better_than_threshold": 0.3 # Your desired threshold
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# Insert the custom chunks into LightRAG
|
||||||
|
book1 = open("./book_1.txt", encoding="utf-8")
|
||||||
|
book2 = open("./book_2.txt", encoding="utf-8")
|
||||||
|
|
||||||
|
rag.insert([book1.read(), book2.read()])
|
||||||
|
|
||||||
|
query_text = "What are the main themes?"
|
||||||
|
|
||||||
|
print("Result (Naive):")
|
||||||
|
print(rag.query(query_text, param=QueryParam(mode="naive")))
|
||||||
|
|
||||||
|
print("\nResult (Local):")
|
||||||
|
print(rag.query(query_text, param=QueryParam(mode="local")))
|
||||||
|
|
||||||
|
print("\nResult (Global):")
|
||||||
|
print(rag.query(query_text, param=QueryParam(mode="global")))
|
||||||
|
|
||||||
|
print("\nResult (Hybrid):")
|
||||||
|
print(rag.query(query_text, param=QueryParam(mode="hybrid")))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@@ -1,9 +1,37 @@
|
|||||||
from fastapi import FastAPI, HTTPException, File, UploadFile, Form, Request
|
from fastapi import (
|
||||||
|
FastAPI,
|
||||||
|
HTTPException,
|
||||||
|
File,
|
||||||
|
UploadFile,
|
||||||
|
Form,
|
||||||
|
Request,
|
||||||
|
BackgroundTasks,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Backend (Python)
|
||||||
|
# Add this to store progress globally
|
||||||
|
from typing import Dict
|
||||||
|
import threading
|
||||||
|
|
||||||
|
# Global progress tracker
|
||||||
|
scan_progress: Dict = {
|
||||||
|
"is_scanning": False,
|
||||||
|
"current_file": "",
|
||||||
|
"indexed_count": 0,
|
||||||
|
"total_files": 0,
|
||||||
|
"progress": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Lock for thread-safe operations
|
||||||
|
progress_lock = threading.Lock()
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
import logging
|
import logging
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
from typing import List, Dict, Any, Optional, Union
|
from typing import List, Dict, Any, Optional, Union
|
||||||
@@ -16,7 +44,6 @@ from pathlib import Path
|
|||||||
import shutil
|
import shutil
|
||||||
import aiofiles
|
import aiofiles
|
||||||
from ascii_colors import trace_exception, ASCIIColors
|
from ascii_colors import trace_exception, ASCIIColors
|
||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
import configparser
|
import configparser
|
||||||
|
|
||||||
@@ -538,7 +565,7 @@ class DocumentManager:
|
|||||||
# Create input directory if it doesn't exist
|
# Create input directory if it doesn't exist
|
||||||
self.input_dir.mkdir(parents=True, exist_ok=True)
|
self.input_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
def scan_directory(self) -> List[Path]:
|
def scan_directory_for_new_files(self) -> List[Path]:
|
||||||
"""Scan input directory for new files"""
|
"""Scan input directory for new files"""
|
||||||
new_files = []
|
new_files = []
|
||||||
for ext in self.supported_extensions:
|
for ext in self.supported_extensions:
|
||||||
@@ -547,6 +574,14 @@ class DocumentManager:
|
|||||||
new_files.append(file_path)
|
new_files.append(file_path)
|
||||||
return new_files
|
return new_files
|
||||||
|
|
||||||
|
def scan_directory(self) -> List[Path]:
|
||||||
|
"""Scan input directory for new files"""
|
||||||
|
new_files = []
|
||||||
|
for ext in self.supported_extensions:
|
||||||
|
for file_path in self.input_dir.rglob(f"*{ext}"):
|
||||||
|
new_files.append(file_path)
|
||||||
|
return new_files
|
||||||
|
|
||||||
def mark_as_indexed(self, file_path: Path):
|
def mark_as_indexed(self, file_path: Path):
|
||||||
"""Mark a file as indexed"""
|
"""Mark a file as indexed"""
|
||||||
self.indexed_files.add(file_path)
|
self.indexed_files.add(file_path)
|
||||||
@@ -730,7 +765,7 @@ def create_app(args):
|
|||||||
# Startup logic
|
# Startup logic
|
||||||
if args.auto_scan_at_startup:
|
if args.auto_scan_at_startup:
|
||||||
try:
|
try:
|
||||||
new_files = doc_manager.scan_directory()
|
new_files = doc_manager.scan_directory_for_new_files()
|
||||||
for file_path in new_files:
|
for file_path in new_files:
|
||||||
try:
|
try:
|
||||||
await index_file(file_path)
|
await index_file(file_path)
|
||||||
@@ -983,42 +1018,59 @@ def create_app(args):
|
|||||||
logging.warning(f"No content extracted from file: {file_path}")
|
logging.warning(f"No content extracted from file: {file_path}")
|
||||||
|
|
||||||
@app.post("/documents/scan", dependencies=[Depends(optional_api_key)])
|
@app.post("/documents/scan", dependencies=[Depends(optional_api_key)])
|
||||||
async def scan_for_new_documents():
|
async def scan_for_new_documents(background_tasks: BackgroundTasks):
|
||||||
"""
|
"""Trigger the scanning process"""
|
||||||
Manually trigger scanning for new documents in the directory managed by `doc_manager`.
|
global scan_progress
|
||||||
|
|
||||||
This endpoint facilitates manual initiation of a document scan to identify and index new files.
|
with progress_lock:
|
||||||
It processes all newly detected files, attempts indexing each file, logs any errors that occur,
|
if scan_progress["is_scanning"]:
|
||||||
and returns a summary of the operation.
|
return {"status": "already_scanning"}
|
||||||
|
|
||||||
Returns:
|
scan_progress["is_scanning"] = True
|
||||||
dict: A dictionary containing:
|
scan_progress["indexed_count"] = 0
|
||||||
- "status" (str): Indicates success or failure of the scanning process.
|
scan_progress["progress"] = 0
|
||||||
- "indexed_count" (int): The number of successfully indexed documents.
|
|
||||||
- "total_documents" (int): Total number of documents that have been indexed so far.
|
# Start the scanning process in the background
|
||||||
|
background_tasks.add_task(run_scanning_process)
|
||||||
|
|
||||||
|
return {"status": "scanning_started"}
|
||||||
|
|
||||||
|
async def run_scanning_process():
|
||||||
|
"""Background task to scan and index documents"""
|
||||||
|
global scan_progress
|
||||||
|
|
||||||
Raises:
|
|
||||||
HTTPException: If an error occurs during the document scanning process, a 500 status
|
|
||||||
code is returned with details about the exception.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
new_files = doc_manager.scan_directory()
|
new_files = doc_manager.scan_directory_for_new_files()
|
||||||
indexed_count = 0
|
scan_progress["total_files"] = len(new_files)
|
||||||
|
|
||||||
for file_path in new_files:
|
for file_path in new_files:
|
||||||
try:
|
try:
|
||||||
|
with progress_lock:
|
||||||
|
scan_progress["current_file"] = os.path.basename(file_path)
|
||||||
|
|
||||||
await index_file(file_path)
|
await index_file(file_path)
|
||||||
indexed_count += 1
|
|
||||||
|
with progress_lock:
|
||||||
|
scan_progress["indexed_count"] += 1
|
||||||
|
scan_progress["progress"] = (
|
||||||
|
scan_progress["indexed_count"]
|
||||||
|
/ scan_progress["total_files"]
|
||||||
|
) * 100
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Error indexing file {file_path}: {str(e)}")
|
logging.error(f"Error indexing file {file_path}: {str(e)}")
|
||||||
|
|
||||||
return {
|
|
||||||
"status": "success",
|
|
||||||
"indexed_count": indexed_count,
|
|
||||||
"total_documents": len(doc_manager.indexed_files),
|
|
||||||
}
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise HTTPException(status_code=500, detail=str(e))
|
logging.error(f"Error during scanning process: {str(e)}")
|
||||||
|
finally:
|
||||||
|
with progress_lock:
|
||||||
|
scan_progress["is_scanning"] = False
|
||||||
|
|
||||||
|
@app.get("/documents/scan-progress")
|
||||||
|
async def get_scan_progress():
|
||||||
|
"""Get the current scanning progress"""
|
||||||
|
with progress_lock:
|
||||||
|
return scan_progress
|
||||||
|
|
||||||
@app.post("/documents/upload", dependencies=[Depends(optional_api_key)])
|
@app.post("/documents/upload", dependencies=[Depends(optional_api_key)])
|
||||||
async def upload_to_input_dir(file: UploadFile = File(...)):
|
async def upload_to_input_dir(file: UploadFile = File(...)):
|
||||||
@@ -1849,7 +1901,7 @@ def create_app(args):
|
|||||||
"status": "healthy",
|
"status": "healthy",
|
||||||
"working_directory": str(args.working_dir),
|
"working_directory": str(args.working_dir),
|
||||||
"input_directory": str(args.input_dir),
|
"input_directory": str(args.input_dir),
|
||||||
"indexed_files": files,
|
"indexed_files": [str(f) for f in files],
|
||||||
"indexed_files_count": len(files),
|
"indexed_files_count": len(files),
|
||||||
"configuration": {
|
"configuration": {
|
||||||
# LLM configuration binding/host address (if applicable)/model (if applicable)
|
# LLM configuration binding/host address (if applicable)/model (if applicable)
|
||||||
|
@@ -98,7 +98,7 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script src="/js/lightrag_api.js"></script>
|
<script src="/js/api.js"></script>
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
@@ -1,375 +1,408 @@
|
|||||||
// State management
|
// State management
|
||||||
const state = {
|
const state = {
|
||||||
apiKey: localStorage.getItem('apiKey') || '',
|
apiKey: localStorage.getItem('apiKey') || '',
|
||||||
files: [],
|
files: [],
|
||||||
indexedFiles: [],
|
indexedFiles: [],
|
||||||
currentPage: 'file-manager'
|
currentPage: 'file-manager'
|
||||||
};
|
};
|
||||||
|
|
||||||
// Utility functions
|
// Utility functions
|
||||||
const showToast = (message, duration = 3000) => {
|
const showToast = (message, duration = 3000) => {
|
||||||
const toast = document.getElementById('toast');
|
const toast = document.getElementById('toast');
|
||||||
toast.querySelector('div').textContent = message;
|
toast.querySelector('div').textContent = message;
|
||||||
toast.classList.remove('hidden');
|
toast.classList.remove('hidden');
|
||||||
setTimeout(() => toast.classList.add('hidden'), duration);
|
setTimeout(() => toast.classList.add('hidden'), duration);
|
||||||
};
|
};
|
||||||
|
|
||||||
const fetchWithAuth = async (url, options = {}) => {
|
const fetchWithAuth = async (url, options = {}) => {
|
||||||
const headers = {
|
const headers = {
|
||||||
...(options.headers || {}),
|
...(options.headers || {}),
|
||||||
...(state.apiKey ? { 'Authorization': `Bearer ${state.apiKey}` } : {})
|
...(state.apiKey ? { 'X-API-Key': state.apiKey } : {}) // Use X-API-Key instead of Bearer
|
||||||
};
|
};
|
||||||
return fetch(url, { ...options, headers });
|
return fetch(url, { ...options, headers });
|
||||||
};
|
};
|
||||||
|
|
||||||
// Page renderers
|
|
||||||
const pages = {
|
// Page renderers
|
||||||
'file-manager': () => `
|
const pages = {
|
||||||
<div class="space-y-6">
|
'file-manager': () => `
|
||||||
<h2 class="text-2xl font-bold text-gray-800">File Manager</h2>
|
<div class="space-y-6">
|
||||||
|
<h2 class="text-2xl font-bold text-gray-800">File Manager</h2>
|
||||||
<div class="border-2 border-dashed border-gray-300 rounded-lg p-8 text-center hover:border-gray-400 transition-colors">
|
|
||||||
<input type="file" id="fileInput" multiple accept=".txt,.md,.doc,.docx,.pdf,.pptx" class="hidden">
|
<div class="border-2 border-dashed border-gray-300 rounded-lg p-8 text-center hover:border-gray-400 transition-colors">
|
||||||
<label for="fileInput" class="cursor-pointer">
|
<input type="file" id="fileInput" multiple accept=".txt,.md,.doc,.docx,.pdf,.pptx" class="hidden">
|
||||||
<svg class="mx-auto h-12 w-12 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
<label for="fileInput" class="cursor-pointer">
|
||||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M7 16a4 4 0 01-.88-7.903A5 5 0 1115.9 6L16 6a5 5 0 011 9.9M15 13l-3-3m0 0l-3 3m3-3v12"/>
|
<svg class="mx-auto h-12 w-12 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
</svg>
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M7 16a4 4 0 01-.88-7.903A5 5 0 1115.9 6L16 6a5 5 0 011 9.9M15 13l-3-3m0 0l-3 3m3-3v12"/>
|
||||||
<p class="mt-2 text-gray-600">Drag files here or click to select</p>
|
</svg>
|
||||||
<p class="text-sm text-gray-500">Supported formats: TXT, MD, DOC, PDF, PPTX</p>
|
<p class="mt-2 text-gray-600">Drag files here or click to select</p>
|
||||||
</label>
|
<p class="text-sm text-gray-500">Supported formats: TXT, MD, DOC, PDF, PPTX</p>
|
||||||
</div>
|
</label>
|
||||||
|
</div>
|
||||||
<div id="fileList" class="space-y-2">
|
|
||||||
<h3 class="text-lg font-semibold text-gray-700">Selected Files</h3>
|
<div id="fileList" class="space-y-2">
|
||||||
<div class="space-y-2"></div>
|
<h3 class="text-lg font-semibold text-gray-700">Selected Files</h3>
|
||||||
</div>
|
<div class="space-y-2"></div>
|
||||||
<div id="uploadProgress" class="hidden mt-4">
|
</div>
|
||||||
<div class="w-full bg-gray-200 rounded-full h-2.5">
|
<div id="uploadProgress" class="hidden mt-4">
|
||||||
<div class="bg-blue-600 h-2.5 rounded-full" style="width: 0%"></div>
|
<div class="w-full bg-gray-200 rounded-full h-2.5">
|
||||||
</div>
|
<div class="bg-blue-600 h-2.5 rounded-full" style="width: 0%"></div>
|
||||||
<p class="text-sm text-gray-600 mt-2"><span id="uploadStatus">0</span> files processed</p>
|
</div>
|
||||||
</div>
|
<p class="text-sm text-gray-600 mt-2"><span id="uploadStatus">0</span> files processed</p>
|
||||||
|
</div>
|
||||||
<button id="uploadBtn" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
<div class="flex items-center space-x-4 bg-gray-100 p-4 rounded-lg shadow-md">
|
||||||
Upload & Index Files
|
<button id="rescanBtn" class="flex items-center bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
||||||
</button>
|
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20" height="20" fill="currentColor" class="mr-2">
|
||||||
|
<path d="M12 4a8 8 0 1 1-8 8H2.5a9.5 9.5 0 1 0 2.8-6.7L2 3v6h6L5.7 6.7A7.96 7.96 0 0 1 12 4z"/>
|
||||||
<div id="indexedFiles" class="space-y-2">
|
</svg>
|
||||||
<h3 class="text-lg font-semibold text-gray-700">Indexed Files</h3>
|
Rescan Files
|
||||||
<div class="space-y-2"></div>
|
</button>
|
||||||
</div>
|
|
||||||
<button id="rescanBtn" class="flex items-center bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
<button id="uploadBtn" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
||||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20" height="20" fill="currentColor" class="mr-2">
|
Upload & Index Files
|
||||||
<path d="M12 4a8 8 0 1 1-8 8H2.5a9.5 9.5 0 1 0 2.8-6.7L2 3v6h6L5.7 6.7A7.96 7.96 0 0 1 12 4z"/>
|
</button>
|
||||||
</svg>
|
</div>
|
||||||
Rescan Files
|
|
||||||
</button>
|
<div id="indexedFiles" class="space-y-2">
|
||||||
|
<h3 class="text-lg font-semibold text-gray-700">Indexed Files</h3>
|
||||||
|
<div class="space-y-2"></div>
|
||||||
</div>
|
</div>
|
||||||
`,
|
|
||||||
|
|
||||||
'query': () => `
|
</div>
|
||||||
<div class="space-y-6">
|
`,
|
||||||
<h2 class="text-2xl font-bold text-gray-800">Query Database</h2>
|
|
||||||
|
'query': () => `
|
||||||
<div class="space-y-4">
|
<div class="space-y-6">
|
||||||
<div>
|
<h2 class="text-2xl font-bold text-gray-800">Query Database</h2>
|
||||||
<label class="block text-sm font-medium text-gray-700">Query Mode</label>
|
|
||||||
<select id="queryMode" class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500">
|
<div class="space-y-4">
|
||||||
<option value="hybrid">Hybrid</option>
|
<div>
|
||||||
<option value="local">Local</option>
|
<label class="block text-sm font-medium text-gray-700">Query Mode</label>
|
||||||
<option value="global">Global</option>
|
<select id="queryMode" class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500">
|
||||||
<option value="naive">Naive</option>
|
<option value="hybrid">Hybrid</option>
|
||||||
</select>
|
<option value="local">Local</option>
|
||||||
</div>
|
<option value="global">Global</option>
|
||||||
|
<option value="naive">Naive</option>
|
||||||
<div>
|
</select>
|
||||||
<label class="block text-sm font-medium text-gray-700">Query</label>
|
</div>
|
||||||
<textarea id="queryInput" rows="4" class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"></textarea>
|
|
||||||
</div>
|
<div>
|
||||||
|
<label class="block text-sm font-medium text-gray-700">Query</label>
|
||||||
<button id="queryBtn" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
<textarea id="queryInput" rows="4" class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"></textarea>
|
||||||
Send Query
|
</div>
|
||||||
</button>
|
|
||||||
|
<button id="queryBtn" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
||||||
<div id="queryResult" class="mt-4 p-4 bg-white rounded-lg shadow"></div>
|
Send Query
|
||||||
</div>
|
</button>
|
||||||
</div>
|
|
||||||
`,
|
<div id="queryResult" class="mt-4 p-4 bg-white rounded-lg shadow"></div>
|
||||||
|
</div>
|
||||||
'knowledge-graph': () => `
|
</div>
|
||||||
<div class="flex items-center justify-center h-full">
|
`,
|
||||||
<div class="text-center">
|
|
||||||
<svg class="mx-auto h-12 w-12 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
'knowledge-graph': () => `
|
||||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 11H5m14 0a2 2 0 012 2v6a2 2 0 01-2 2H5a2 2 0 01-2-2v-6a2 2 0 012-2m14 0V9a2 2 0 00-2-2M5 11V9a2 2 0 012-2m0 0V5a2 2 0 012-2h6a2 2 0 012 2v2M7 7h10"/>
|
<div class="flex items-center justify-center h-full">
|
||||||
</svg>
|
<div class="text-center">
|
||||||
<h3 class="mt-2 text-sm font-medium text-gray-900">Under Construction</h3>
|
<svg class="mx-auto h-12 w-12 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
<p class="mt-1 text-sm text-gray-500">Knowledge graph visualization will be available in a future update.</p>
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 11H5m14 0a2 2 0 012 2v6a2 2 0 01-2 2H5a2 2 0 01-2-2v-6a2 2 0 012-2m14 0V9a2 2 0 00-2-2M5 11V9a2 2 0 012-2m0 0V5a2 2 0 012-2h6a2 2 0 012 2v2M7 7h10"/>
|
||||||
</div>
|
</svg>
|
||||||
</div>
|
<h3 class="mt-2 text-sm font-medium text-gray-900">Under Construction</h3>
|
||||||
`,
|
<p class="mt-1 text-sm text-gray-500">Knowledge graph visualization will be available in a future update.</p>
|
||||||
|
</div>
|
||||||
'status': () => `
|
</div>
|
||||||
<div class="space-y-6">
|
`,
|
||||||
<h2 class="text-2xl font-bold text-gray-800">System Status</h2>
|
|
||||||
<div id="statusContent" class="grid grid-cols-1 md:grid-cols-2 gap-6">
|
'status': () => `
|
||||||
<div class="p-6 bg-white rounded-lg shadow-sm">
|
<div class="space-y-6">
|
||||||
<h3 class="text-lg font-semibold mb-4">System Health</h3>
|
<h2 class="text-2xl font-bold text-gray-800">System Status</h2>
|
||||||
<div id="healthStatus"></div>
|
<div id="statusContent" class="grid grid-cols-1 md:grid-cols-2 gap-6">
|
||||||
</div>
|
<div class="p-6 bg-white rounded-lg shadow-sm">
|
||||||
<div class="p-6 bg-white rounded-lg shadow-sm">
|
<h3 class="text-lg font-semibold mb-4">System Health</h3>
|
||||||
<h3 class="text-lg font-semibold mb-4">Configuration</h3>
|
<div id="healthStatus"></div>
|
||||||
<div id="configStatus"></div>
|
</div>
|
||||||
</div>
|
<div class="p-6 bg-white rounded-lg shadow-sm">
|
||||||
</div>
|
<h3 class="text-lg font-semibold mb-4">Configuration</h3>
|
||||||
</div>
|
<div id="configStatus"></div>
|
||||||
`,
|
</div>
|
||||||
|
</div>
|
||||||
'settings': () => `
|
</div>
|
||||||
<div class="space-y-6">
|
`,
|
||||||
<h2 class="text-2xl font-bold text-gray-800">Settings</h2>
|
|
||||||
|
'settings': () => `
|
||||||
<div class="max-w-xl">
|
<div class="space-y-6">
|
||||||
<div class="space-y-4">
|
<h2 class="text-2xl font-bold text-gray-800">Settings</h2>
|
||||||
<div>
|
|
||||||
<label class="block text-sm font-medium text-gray-700">API Key</label>
|
<div class="max-w-xl">
|
||||||
<input type="password" id="apiKeyInput" value="${state.apiKey}"
|
<div class="space-y-4">
|
||||||
class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500">
|
<div>
|
||||||
</div>
|
<label class="block text-sm font-medium text-gray-700">API Key</label>
|
||||||
|
<input type="password" id="apiKeyInput" value="${state.apiKey}"
|
||||||
<button id="saveSettings" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500">
|
||||||
Save Settings
|
</div>
|
||||||
</button>
|
|
||||||
</div>
|
<button id="saveSettings" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
||||||
</div>
|
Save Settings
|
||||||
</div>
|
</button>
|
||||||
`
|
</div>
|
||||||
};
|
</div>
|
||||||
|
</div>
|
||||||
// Page handlers
|
`
|
||||||
const handlers = {
|
};
|
||||||
'file-manager': () => {
|
|
||||||
const fileInput = document.getElementById('fileInput');
|
// Page handlers
|
||||||
const dropZone = fileInput.parentElement.parentElement;
|
const handlers = {
|
||||||
const fileList = document.querySelector('#fileList div');
|
'file-manager': () => {
|
||||||
const indexedFiles = document.querySelector('#indexedFiles div');
|
const fileInput = document.getElementById('fileInput');
|
||||||
const uploadBtn = document.getElementById('uploadBtn');
|
const dropZone = fileInput.parentElement.parentElement;
|
||||||
|
const fileList = document.querySelector('#fileList div');
|
||||||
const updateFileList = () => {
|
const indexedFiles = document.querySelector('#indexedFiles div');
|
||||||
fileList.innerHTML = state.files.map(file => `
|
const uploadBtn = document.getElementById('uploadBtn');
|
||||||
<div class="flex items-center justify-between bg-white p-3 rounded-lg shadow-sm">
|
|
||||||
<span>${file.name}</span>
|
const updateFileList = () => {
|
||||||
<button class="text-red-600 hover:text-red-700" onclick="removeFile('${file.name}')">
|
fileList.innerHTML = state.files.map(file => `
|
||||||
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
<div class="flex items-center justify-between bg-white p-3 rounded-lg shadow-sm">
|
||||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"/>
|
<span>${file.name}</span>
|
||||||
</svg>
|
<button class="text-red-600 hover:text-red-700" onclick="removeFile('${file.name}')">
|
||||||
</button>
|
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
</div>
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"/>
|
||||||
`).join('');
|
</svg>
|
||||||
};
|
</button>
|
||||||
|
</div>
|
||||||
const updateIndexedFiles = async () => {
|
`).join('');
|
||||||
const response = await fetchWithAuth('/health');
|
};
|
||||||
const data = await response.json();
|
|
||||||
indexedFiles.innerHTML = data.indexed_files.map(file => `
|
const updateIndexedFiles = async () => {
|
||||||
<div class="flex items-center justify-between bg-white p-3 rounded-lg shadow-sm">
|
const response = await fetchWithAuth('/health');
|
||||||
<span>${file}</span>
|
const data = await response.json();
|
||||||
</div>
|
indexedFiles.innerHTML = data.indexed_files.map(file => `
|
||||||
`).join('');
|
<div class="flex items-center justify-between bg-white p-3 rounded-lg shadow-sm">
|
||||||
};
|
<span>${file}</span>
|
||||||
|
</div>
|
||||||
dropZone.addEventListener('dragover', (e) => {
|
`).join('');
|
||||||
e.preventDefault();
|
};
|
||||||
dropZone.classList.add('border-blue-500');
|
|
||||||
});
|
dropZone.addEventListener('dragover', (e) => {
|
||||||
|
e.preventDefault();
|
||||||
dropZone.addEventListener('dragleave', () => {
|
dropZone.classList.add('border-blue-500');
|
||||||
dropZone.classList.remove('border-blue-500');
|
});
|
||||||
});
|
|
||||||
|
dropZone.addEventListener('dragleave', () => {
|
||||||
dropZone.addEventListener('drop', (e) => {
|
dropZone.classList.remove('border-blue-500');
|
||||||
e.preventDefault();
|
});
|
||||||
dropZone.classList.remove('border-blue-500');
|
|
||||||
const files = Array.from(e.dataTransfer.files);
|
dropZone.addEventListener('drop', (e) => {
|
||||||
state.files.push(...files);
|
e.preventDefault();
|
||||||
updateFileList();
|
dropZone.classList.remove('border-blue-500');
|
||||||
});
|
const files = Array.from(e.dataTransfer.files);
|
||||||
|
state.files.push(...files);
|
||||||
fileInput.addEventListener('change', () => {
|
updateFileList();
|
||||||
state.files.push(...Array.from(fileInput.files));
|
});
|
||||||
updateFileList();
|
|
||||||
});
|
fileInput.addEventListener('change', () => {
|
||||||
|
state.files.push(...Array.from(fileInput.files));
|
||||||
uploadBtn.addEventListener('click', async () => {
|
updateFileList();
|
||||||
if (state.files.length === 0) {
|
});
|
||||||
showToast('Please select files to upload');
|
|
||||||
return;
|
uploadBtn.addEventListener('click', async () => {
|
||||||
}
|
if (state.files.length === 0) {
|
||||||
let apiKey = localStorage.getItem('apiKey') || '';
|
showToast('Please select files to upload');
|
||||||
const progress = document.getElementById('uploadProgress');
|
return;
|
||||||
const progressBar = progress.querySelector('div');
|
}
|
||||||
const statusText = document.getElementById('uploadStatus');
|
let apiKey = localStorage.getItem('apiKey') || '';
|
||||||
progress.classList.remove('hidden');
|
const progress = document.getElementById('uploadProgress');
|
||||||
|
const progressBar = progress.querySelector('div');
|
||||||
for (let i = 0; i < state.files.length; i++) {
|
const statusText = document.getElementById('uploadStatus');
|
||||||
const formData = new FormData();
|
progress.classList.remove('hidden');
|
||||||
formData.append('file', state.files[i]);
|
|
||||||
|
for (let i = 0; i < state.files.length; i++) {
|
||||||
try {
|
const formData = new FormData();
|
||||||
await fetch('/documents/upload', {
|
formData.append('file', state.files[i]);
|
||||||
method: 'POST',
|
|
||||||
headers: apiKey ? { 'Authorization': `Bearer ${apiKey}` } : {},
|
try {
|
||||||
body: formData
|
await fetch('/documents/upload', {
|
||||||
});
|
method: 'POST',
|
||||||
|
headers: apiKey ? { 'Authorization': `Bearer ${apiKey}` } : {},
|
||||||
const percentage = ((i + 1) / state.files.length) * 100;
|
body: formData
|
||||||
progressBar.style.width = `${percentage}%`;
|
});
|
||||||
statusText.textContent = `${i + 1}/${state.files.length}`;
|
|
||||||
} catch (error) {
|
const percentage = ((i + 1) / state.files.length) * 100;
|
||||||
console.error('Upload error:', error);
|
progressBar.style.width = `${percentage}%`;
|
||||||
}
|
statusText.textContent = `${i + 1}/${state.files.length}`;
|
||||||
}
|
} catch (error) {
|
||||||
progress.classList.add('hidden');
|
console.error('Upload error:', error);
|
||||||
});
|
}
|
||||||
rescanBtn.addEventListener('click', async () => {
|
}
|
||||||
let apiKey = localStorage.getItem('apiKey') || '';
|
progress.classList.add('hidden');
|
||||||
const progress = document.getElementById('uploadProgress');
|
});
|
||||||
const progressBar = progress.querySelector('div');
|
|
||||||
const statusText = document.getElementById('uploadStatus');
|
rescanBtn.addEventListener('click', async () => {
|
||||||
progress.classList.remove('hidden');
|
const progress = document.getElementById('uploadProgress');
|
||||||
try {
|
const progressBar = progress.querySelector('div');
|
||||||
const scan_output = await fetch('/documents/scan', {
|
const statusText = document.getElementById('uploadStatus');
|
||||||
method: 'GET',
|
progress.classList.remove('hidden');
|
||||||
});
|
|
||||||
statusText.textContent = scan_output.data;
|
try {
|
||||||
} catch (error) {
|
// Start the scanning process
|
||||||
console.error('Upload error:', error);
|
const scanResponse = await fetch('/documents/scan', {
|
||||||
}
|
method: 'POST',
|
||||||
progress.classList.add('hidden');
|
});
|
||||||
});
|
|
||||||
updateIndexedFiles();
|
if (!scanResponse.ok) {
|
||||||
},
|
throw new Error('Scan failed to start');
|
||||||
|
}
|
||||||
'query': () => {
|
|
||||||
const queryBtn = document.getElementById('queryBtn');
|
// Start polling for progress
|
||||||
const queryInput = document.getElementById('queryInput');
|
const pollInterval = setInterval(async () => {
|
||||||
const queryMode = document.getElementById('queryMode');
|
const progressResponse = await fetch('/documents/scan-progress');
|
||||||
const queryResult = document.getElementById('queryResult');
|
const progressData = await progressResponse.json();
|
||||||
|
|
||||||
let apiKey = localStorage.getItem('apiKey') || '';
|
// Update progress bar
|
||||||
|
progressBar.style.width = `${progressData.progress}%`;
|
||||||
queryBtn.addEventListener('click', async () => {
|
|
||||||
const query = queryInput.value.trim();
|
// Update status text
|
||||||
if (!query) {
|
if (progressData.total_files > 0) {
|
||||||
showToast('Please enter a query');
|
statusText.textContent = `Processing ${progressData.current_file} (${progressData.indexed_count}/${progressData.total_files})`;
|
||||||
return;
|
}
|
||||||
}
|
|
||||||
|
// Check if scanning is complete
|
||||||
queryBtn.disabled = true;
|
if (!progressData.is_scanning) {
|
||||||
queryBtn.innerHTML = `
|
clearInterval(pollInterval);
|
||||||
<svg class="animate-spin h-5 w-5 mr-3" viewBox="0 0 24 24">
|
progress.classList.add('hidden');
|
||||||
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4" fill="none"/>
|
statusText.textContent = 'Scan complete!';
|
||||||
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"/>
|
}
|
||||||
</svg>
|
}, 1000); // Poll every second
|
||||||
Processing...
|
|
||||||
`;
|
} catch (error) {
|
||||||
|
console.error('Upload error:', error);
|
||||||
try {
|
progress.classList.add('hidden');
|
||||||
const response = await fetchWithAuth('/query', {
|
statusText.textContent = 'Error during scanning process';
|
||||||
method: 'POST',
|
}
|
||||||
headers: { 'Content-Type': 'application/json' },
|
});
|
||||||
body: JSON.stringify({
|
|
||||||
query,
|
|
||||||
mode: queryMode.value,
|
updateIndexedFiles();
|
||||||
stream: false,
|
},
|
||||||
only_need_context: false
|
|
||||||
})
|
'query': () => {
|
||||||
});
|
const queryBtn = document.getElementById('queryBtn');
|
||||||
|
const queryInput = document.getElementById('queryInput');
|
||||||
const data = await response.json();
|
const queryMode = document.getElementById('queryMode');
|
||||||
queryResult.innerHTML = marked.parse(data.response);
|
const queryResult = document.getElementById('queryResult');
|
||||||
} catch (error) {
|
|
||||||
showToast('Error processing query');
|
let apiKey = localStorage.getItem('apiKey') || '';
|
||||||
} finally {
|
|
||||||
queryBtn.disabled = false;
|
queryBtn.addEventListener('click', async () => {
|
||||||
queryBtn.textContent = 'Send Query';
|
const query = queryInput.value.trim();
|
||||||
}
|
if (!query) {
|
||||||
});
|
showToast('Please enter a query');
|
||||||
},
|
return;
|
||||||
|
}
|
||||||
'status': async () => {
|
|
||||||
const healthStatus = document.getElementById('healthStatus');
|
queryBtn.disabled = true;
|
||||||
const configStatus = document.getElementById('configStatus');
|
queryBtn.innerHTML = `
|
||||||
|
<svg class="animate-spin h-5 w-5 mr-3" viewBox="0 0 24 24">
|
||||||
try {
|
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4" fill="none"/>
|
||||||
const response = await fetchWithAuth('/health');
|
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"/>
|
||||||
const data = await response.json();
|
</svg>
|
||||||
|
Processing...
|
||||||
healthStatus.innerHTML = `
|
`;
|
||||||
<div class="space-y-2">
|
|
||||||
<div class="flex items-center">
|
try {
|
||||||
<div class="w-3 h-3 rounded-full ${data.status === 'healthy' ? 'bg-green-500' : 'bg-red-500'} mr-2"></div>
|
const response = await fetchWithAuth('/query', {
|
||||||
<span class="font-medium">${data.status}</span>
|
method: 'POST',
|
||||||
</div>
|
headers: { 'Content-Type': 'application/json' },
|
||||||
<div>
|
body: JSON.stringify({
|
||||||
<p class="text-sm text-gray-600">Working Directory: ${data.working_directory}</p>
|
query,
|
||||||
<p class="text-sm text-gray-600">Input Directory: ${data.input_directory}</p>
|
mode: queryMode.value,
|
||||||
<p class="text-sm text-gray-600">Indexed Files: ${data.indexed_files_count}</p>
|
stream: false,
|
||||||
</div>
|
only_need_context: false
|
||||||
</div>
|
})
|
||||||
`;
|
});
|
||||||
|
|
||||||
configStatus.innerHTML = Object.entries(data.configuration)
|
const data = await response.json();
|
||||||
.map(([key, value]) => `
|
queryResult.innerHTML = marked.parse(data.response);
|
||||||
<div class="mb-2">
|
} catch (error) {
|
||||||
<span class="text-sm font-medium text-gray-700">${key}:</span>
|
showToast('Error processing query');
|
||||||
<span class="text-sm text-gray-600 ml-2">${value}</span>
|
} finally {
|
||||||
</div>
|
queryBtn.disabled = false;
|
||||||
`).join('');
|
queryBtn.textContent = 'Send Query';
|
||||||
} catch (error) {
|
}
|
||||||
showToast('Error fetching status');
|
});
|
||||||
}
|
},
|
||||||
},
|
|
||||||
|
'status': async () => {
|
||||||
'settings': () => {
|
const healthStatus = document.getElementById('healthStatus');
|
||||||
const saveBtn = document.getElementById('saveSettings');
|
const configStatus = document.getElementById('configStatus');
|
||||||
const apiKeyInput = document.getElementById('apiKeyInput');
|
|
||||||
|
try {
|
||||||
saveBtn.addEventListener('click', () => {
|
const response = await fetchWithAuth('/health');
|
||||||
state.apiKey = apiKeyInput.value;
|
const data = await response.json();
|
||||||
localStorage.setItem('apiKey', state.apiKey);
|
|
||||||
showToast('Settings saved successfully');
|
healthStatus.innerHTML = `
|
||||||
});
|
<div class="space-y-2">
|
||||||
}
|
<div class="flex items-center">
|
||||||
};
|
<div class="w-3 h-3 rounded-full ${data.status === 'healthy' ? 'bg-green-500' : 'bg-red-500'} mr-2"></div>
|
||||||
|
<span class="font-medium">${data.status}</span>
|
||||||
// Navigation handling
|
</div>
|
||||||
document.querySelectorAll('.nav-item').forEach(item => {
|
<div>
|
||||||
item.addEventListener('click', (e) => {
|
<p class="text-sm text-gray-600">Working Directory: ${data.working_directory}</p>
|
||||||
e.preventDefault();
|
<p class="text-sm text-gray-600">Input Directory: ${data.input_directory}</p>
|
||||||
const page = item.dataset.page;
|
<p class="text-sm text-gray-600">Indexed Files: ${data.indexed_files_count}</p>
|
||||||
document.getElementById('content').innerHTML = pages[page]();
|
</div>
|
||||||
if (handlers[page]) handlers[page]();
|
</div>
|
||||||
state.currentPage = page;
|
`;
|
||||||
});
|
|
||||||
});
|
configStatus.innerHTML = Object.entries(data.configuration)
|
||||||
|
.map(([key, value]) => `
|
||||||
// Initialize with file manager
|
<div class="mb-2">
|
||||||
document.getElementById('content').innerHTML = pages['file-manager']();
|
<span class="text-sm font-medium text-gray-700">${key}:</span>
|
||||||
handlers['file-manager']();
|
<span class="text-sm text-gray-600 ml-2">${value}</span>
|
||||||
|
</div>
|
||||||
// Global functions
|
`).join('');
|
||||||
window.removeFile = (fileName) => {
|
} catch (error) {
|
||||||
state.files = state.files.filter(file => file.name !== fileName);
|
showToast('Error fetching status');
|
||||||
document.querySelector('#fileList div').innerHTML = state.files.map(file => `
|
}
|
||||||
<div class="flex items-center justify-between bg-white p-3 rounded-lg shadow-sm">
|
},
|
||||||
<span>${file.name}</span>
|
|
||||||
<button class="text-red-600 hover:text-red-700" onclick="removeFile('${file.name}')">
|
'settings': () => {
|
||||||
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
const saveBtn = document.getElementById('saveSettings');
|
||||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"/>
|
const apiKeyInput = document.getElementById('apiKeyInput');
|
||||||
</svg>
|
|
||||||
</button>
|
saveBtn.addEventListener('click', () => {
|
||||||
</div>
|
state.apiKey = apiKeyInput.value;
|
||||||
`).join('');
|
localStorage.setItem('apiKey', state.apiKey);
|
||||||
|
showToast('Settings saved successfully');
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// Navigation handling
|
||||||
|
document.querySelectorAll('.nav-item').forEach(item => {
|
||||||
|
item.addEventListener('click', (e) => {
|
||||||
|
e.preventDefault();
|
||||||
|
const page = item.dataset.page;
|
||||||
|
document.getElementById('content').innerHTML = pages[page]();
|
||||||
|
if (handlers[page]) handlers[page]();
|
||||||
|
state.currentPage = page;
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Initialize with file manager
|
||||||
|
document.getElementById('content').innerHTML = pages['file-manager']();
|
||||||
|
handlers['file-manager']();
|
||||||
|
|
||||||
|
// Global functions
|
||||||
|
window.removeFile = (fileName) => {
|
||||||
|
state.files = state.files.filter(file => file.name !== fileName);
|
||||||
|
document.querySelector('#fileList div').innerHTML = state.files.map(file => `
|
||||||
|
<div class="flex items-center justify-between bg-white p-3 rounded-lg shadow-sm">
|
||||||
|
<span>${file.name}</span>
|
||||||
|
<button class="text-red-600 hover:text-red-700" onclick="removeFile('${file.name}')">
|
||||||
|
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||||
|
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"/>
|
||||||
|
</svg>
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
`).join('');
|
||||||
};
|
};
|
323
lightrag/kg/faiss_impl.py
Normal file
323
lightrag/kg/faiss_impl.py
Normal file
@@ -0,0 +1,323 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
import asyncio
|
||||||
|
import faiss
|
||||||
|
import json
|
||||||
|
import numpy as np
|
||||||
|
from tqdm.asyncio import tqdm as tqdm_async
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
from lightrag.utils import (
|
||||||
|
logger,
|
||||||
|
compute_mdhash_id,
|
||||||
|
)
|
||||||
|
from lightrag.base import (
|
||||||
|
BaseVectorStorage,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class FaissVectorDBStorage(BaseVectorStorage):
|
||||||
|
"""
|
||||||
|
A Faiss-based Vector DB Storage for LightRAG.
|
||||||
|
Uses cosine similarity by storing normalized vectors in a Faiss index with inner product search.
|
||||||
|
"""
|
||||||
|
|
||||||
|
cosine_better_than_threshold: float = float(os.getenv("COSINE_THRESHOLD", "0.2"))
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
# Grab config values if available
|
||||||
|
config = self.global_config.get("vector_db_storage_cls_kwargs", {})
|
||||||
|
self.cosine_better_than_threshold = config.get(
|
||||||
|
"cosine_better_than_threshold", self.cosine_better_than_threshold
|
||||||
|
)
|
||||||
|
|
||||||
|
# Where to save index file if you want persistent storage
|
||||||
|
self._faiss_index_file = os.path.join(
|
||||||
|
self.global_config["working_dir"], f"faiss_index_{self.namespace}.index"
|
||||||
|
)
|
||||||
|
self._meta_file = self._faiss_index_file + ".meta.json"
|
||||||
|
|
||||||
|
self._max_batch_size = self.global_config["embedding_batch_num"]
|
||||||
|
# Embedding dimension (e.g. 768) must match your embedding function
|
||||||
|
self._dim = self.embedding_func.embedding_dim
|
||||||
|
|
||||||
|
# Create an empty Faiss index for inner product (useful for normalized vectors = cosine similarity).
|
||||||
|
# If you have a large number of vectors, you might want IVF or other indexes.
|
||||||
|
# For demonstration, we use a simple IndexFlatIP.
|
||||||
|
self._index = faiss.IndexFlatIP(self._dim)
|
||||||
|
|
||||||
|
# Keep a local store for metadata, IDs, etc.
|
||||||
|
# Maps <int faiss_id> → metadata (including your original ID).
|
||||||
|
self._id_to_meta = {}
|
||||||
|
|
||||||
|
# Attempt to load an existing index + metadata from disk
|
||||||
|
self._load_faiss_index()
|
||||||
|
|
||||||
|
async def upsert(self, data: dict[str, dict]):
|
||||||
|
"""
|
||||||
|
Insert or update vectors in the Faiss index.
|
||||||
|
|
||||||
|
data: {
|
||||||
|
"custom_id_1": {
|
||||||
|
"content": <text>,
|
||||||
|
...metadata...
|
||||||
|
},
|
||||||
|
"custom_id_2": {
|
||||||
|
"content": <text>,
|
||||||
|
...metadata...
|
||||||
|
},
|
||||||
|
...
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
logger.info(f"Inserting {len(data)} vectors to {self.namespace}")
|
||||||
|
if not data:
|
||||||
|
logger.warning("You are inserting empty data to the vector DB")
|
||||||
|
return []
|
||||||
|
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# Prepare data for embedding
|
||||||
|
list_data = []
|
||||||
|
contents = []
|
||||||
|
for k, v in data.items():
|
||||||
|
# Store only known meta fields if needed
|
||||||
|
meta = {mf: v[mf] for mf in self.meta_fields if mf in v}
|
||||||
|
meta["__id__"] = k
|
||||||
|
meta["__created_at__"] = current_time
|
||||||
|
list_data.append(meta)
|
||||||
|
contents.append(v["content"])
|
||||||
|
|
||||||
|
# Split into batches for embedding if needed
|
||||||
|
batches = [
|
||||||
|
contents[i : i + self._max_batch_size]
|
||||||
|
for i in range(0, len(contents), self._max_batch_size)
|
||||||
|
]
|
||||||
|
|
||||||
|
pbar = tqdm_async(
|
||||||
|
total=len(batches), desc="Generating embeddings", unit="batch"
|
||||||
|
)
|
||||||
|
|
||||||
|
async def wrapped_task(batch):
|
||||||
|
result = await self.embedding_func(batch)
|
||||||
|
pbar.update(1)
|
||||||
|
return result
|
||||||
|
|
||||||
|
embedding_tasks = [wrapped_task(batch) for batch in batches]
|
||||||
|
embeddings_list = await asyncio.gather(*embedding_tasks)
|
||||||
|
|
||||||
|
# Flatten the list of arrays
|
||||||
|
embeddings = np.concatenate(embeddings_list, axis=0)
|
||||||
|
if len(embeddings) != len(list_data):
|
||||||
|
logger.error(
|
||||||
|
f"Embedding size mismatch. Embeddings: {len(embeddings)}, Data: {len(list_data)}"
|
||||||
|
)
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Normalize embeddings for cosine similarity (in-place)
|
||||||
|
faiss.normalize_L2(embeddings)
|
||||||
|
|
||||||
|
# Upsert logic:
|
||||||
|
# 1. Identify which vectors to remove if they exist
|
||||||
|
# 2. Remove them
|
||||||
|
# 3. Add the new vectors
|
||||||
|
existing_ids_to_remove = []
|
||||||
|
for meta, emb in zip(list_data, embeddings):
|
||||||
|
faiss_internal_id = self._find_faiss_id_by_custom_id(meta["__id__"])
|
||||||
|
if faiss_internal_id is not None:
|
||||||
|
existing_ids_to_remove.append(faiss_internal_id)
|
||||||
|
|
||||||
|
if existing_ids_to_remove:
|
||||||
|
self._remove_faiss_ids(existing_ids_to_remove)
|
||||||
|
|
||||||
|
# Step 2: Add new vectors
|
||||||
|
start_idx = self._index.ntotal
|
||||||
|
self._index.add(embeddings)
|
||||||
|
|
||||||
|
# Step 3: Store metadata + vector for each new ID
|
||||||
|
for i, meta in enumerate(list_data):
|
||||||
|
fid = start_idx + i
|
||||||
|
# Store the raw vector so we can rebuild if something is removed
|
||||||
|
meta["__vector__"] = embeddings[i].tolist()
|
||||||
|
self._id_to_meta[fid] = meta
|
||||||
|
|
||||||
|
logger.info(f"Upserted {len(list_data)} vectors into Faiss index.")
|
||||||
|
return [m["__id__"] for m in list_data]
|
||||||
|
|
||||||
|
async def query(self, query: str, top_k=5):
|
||||||
|
"""
|
||||||
|
Search by a textual query; returns top_k results with their metadata + similarity distance.
|
||||||
|
"""
|
||||||
|
embedding = await self.embedding_func([query])
|
||||||
|
# embedding is shape (1, dim)
|
||||||
|
embedding = np.array(embedding, dtype=np.float32)
|
||||||
|
faiss.normalize_L2(embedding) # we do in-place normalization
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Query: {query}, top_k: {top_k}, threshold: {self.cosine_better_than_threshold}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Perform the similarity search
|
||||||
|
distances, indices = self._index.search(embedding, top_k)
|
||||||
|
|
||||||
|
distances = distances[0]
|
||||||
|
indices = indices[0]
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for dist, idx in zip(distances, indices):
|
||||||
|
if idx == -1:
|
||||||
|
# Faiss returns -1 if no neighbor
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Cosine similarity threshold
|
||||||
|
if dist < self.cosine_better_than_threshold:
|
||||||
|
continue
|
||||||
|
|
||||||
|
meta = self._id_to_meta.get(idx, {})
|
||||||
|
results.append(
|
||||||
|
{
|
||||||
|
**meta,
|
||||||
|
"id": meta.get("__id__"),
|
||||||
|
"distance": float(dist),
|
||||||
|
"created_at": meta.get("__created_at__"),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
@property
|
||||||
|
def client_storage(self):
|
||||||
|
# Return whatever structure LightRAG might need for debugging
|
||||||
|
return {"data": list(self._id_to_meta.values())}
|
||||||
|
|
||||||
|
async def delete(self, ids: list[str]):
|
||||||
|
"""
|
||||||
|
Delete vectors for the provided custom IDs.
|
||||||
|
"""
|
||||||
|
logger.info(f"Deleting {len(ids)} vectors from {self.namespace}")
|
||||||
|
to_remove = []
|
||||||
|
for cid in ids:
|
||||||
|
fid = self._find_faiss_id_by_custom_id(cid)
|
||||||
|
if fid is not None:
|
||||||
|
to_remove.append(fid)
|
||||||
|
|
||||||
|
if to_remove:
|
||||||
|
self._remove_faiss_ids(to_remove)
|
||||||
|
logger.info(
|
||||||
|
f"Successfully deleted {len(to_remove)} vectors from {self.namespace}"
|
||||||
|
)
|
||||||
|
|
||||||
|
async def delete_entity(self, entity_name: str):
|
||||||
|
"""
|
||||||
|
Delete a single entity by computing its hashed ID
|
||||||
|
the same way your code does it with `compute_mdhash_id`.
|
||||||
|
"""
|
||||||
|
entity_id = compute_mdhash_id(entity_name, prefix="ent-")
|
||||||
|
logger.debug(f"Attempting to delete entity {entity_name} with ID {entity_id}")
|
||||||
|
await self.delete([entity_id])
|
||||||
|
|
||||||
|
async def delete_entity_relation(self, entity_name: str):
|
||||||
|
"""
|
||||||
|
Delete relations for a given entity by scanning metadata.
|
||||||
|
"""
|
||||||
|
logger.debug(f"Searching relations for entity {entity_name}")
|
||||||
|
relations = []
|
||||||
|
for fid, meta in self._id_to_meta.items():
|
||||||
|
if meta.get("src_id") == entity_name or meta.get("tgt_id") == entity_name:
|
||||||
|
relations.append(fid)
|
||||||
|
|
||||||
|
logger.debug(f"Found {len(relations)} relations for {entity_name}")
|
||||||
|
if relations:
|
||||||
|
self._remove_faiss_ids(relations)
|
||||||
|
logger.debug(f"Deleted {len(relations)} relations for {entity_name}")
|
||||||
|
|
||||||
|
async def index_done_callback(self):
|
||||||
|
"""
|
||||||
|
Called after indexing is done (save Faiss index + metadata).
|
||||||
|
"""
|
||||||
|
self._save_faiss_index()
|
||||||
|
logger.info("Faiss index saved successfully.")
|
||||||
|
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
# Internal helper methods
|
||||||
|
# --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _find_faiss_id_by_custom_id(self, custom_id: str):
|
||||||
|
"""
|
||||||
|
Return the Faiss internal ID for a given custom ID, or None if not found.
|
||||||
|
"""
|
||||||
|
for fid, meta in self._id_to_meta.items():
|
||||||
|
if meta.get("__id__") == custom_id:
|
||||||
|
return fid
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _remove_faiss_ids(self, fid_list):
|
||||||
|
"""
|
||||||
|
Remove a list of internal Faiss IDs from the index.
|
||||||
|
Because IndexFlatIP doesn't support 'removals',
|
||||||
|
we rebuild the index excluding those vectors.
|
||||||
|
"""
|
||||||
|
keep_fids = [fid for fid in self._id_to_meta if fid not in fid_list]
|
||||||
|
|
||||||
|
# Rebuild the index
|
||||||
|
vectors_to_keep = []
|
||||||
|
new_id_to_meta = {}
|
||||||
|
for new_fid, old_fid in enumerate(keep_fids):
|
||||||
|
vec_meta = self._id_to_meta[old_fid]
|
||||||
|
vectors_to_keep.append(vec_meta["__vector__"]) # stored as list
|
||||||
|
new_id_to_meta[new_fid] = vec_meta
|
||||||
|
|
||||||
|
# Re-init index
|
||||||
|
self._index = faiss.IndexFlatIP(self._dim)
|
||||||
|
if vectors_to_keep:
|
||||||
|
arr = np.array(vectors_to_keep, dtype=np.float32)
|
||||||
|
self._index.add(arr)
|
||||||
|
|
||||||
|
self._id_to_meta = new_id_to_meta
|
||||||
|
|
||||||
|
def _save_faiss_index(self):
|
||||||
|
"""
|
||||||
|
Save the current Faiss index + metadata to disk so it can persist across runs.
|
||||||
|
"""
|
||||||
|
faiss.write_index(self._index, self._faiss_index_file)
|
||||||
|
|
||||||
|
# Save metadata dict to JSON. Convert all keys to strings for JSON storage.
|
||||||
|
# _id_to_meta is { int: { '__id__': doc_id, '__vector__': [float,...], ... } }
|
||||||
|
# We'll keep the int -> dict, but JSON requires string keys.
|
||||||
|
serializable_dict = {}
|
||||||
|
for fid, meta in self._id_to_meta.items():
|
||||||
|
serializable_dict[str(fid)] = meta
|
||||||
|
|
||||||
|
with open(self._meta_file, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(serializable_dict, f)
|
||||||
|
|
||||||
|
def _load_faiss_index(self):
|
||||||
|
"""
|
||||||
|
Load the Faiss index + metadata from disk if it exists,
|
||||||
|
and rebuild in-memory structures so we can query.
|
||||||
|
"""
|
||||||
|
if not os.path.exists(self._faiss_index_file):
|
||||||
|
logger.warning("No existing Faiss index file found. Starting fresh.")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Load the Faiss index
|
||||||
|
self._index = faiss.read_index(self._faiss_index_file)
|
||||||
|
# Load metadata
|
||||||
|
with open(self._meta_file, "r", encoding="utf-8") as f:
|
||||||
|
stored_dict = json.load(f)
|
||||||
|
|
||||||
|
# Convert string keys back to int
|
||||||
|
self._id_to_meta = {}
|
||||||
|
for fid_str, meta in stored_dict.items():
|
||||||
|
fid = int(fid_str)
|
||||||
|
self._id_to_meta[fid] = meta
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Faiss index loaded with {self._index.ntotal} vectors from {self._faiss_index_file}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to load Faiss index or metadata: {e}")
|
||||||
|
logger.warning("Starting with an empty Faiss index.")
|
||||||
|
self._index = faiss.IndexFlatIP(self._dim)
|
||||||
|
self._id_to_meta = {}
|
@@ -60,6 +60,7 @@ STORAGES = {
|
|||||||
"PGGraphStorage": ".kg.postgres_impl",
|
"PGGraphStorage": ".kg.postgres_impl",
|
||||||
"GremlinStorage": ".kg.gremlin_impl",
|
"GremlinStorage": ".kg.gremlin_impl",
|
||||||
"PGDocStatusStorage": ".kg.postgres_impl",
|
"PGDocStatusStorage": ".kg.postgres_impl",
|
||||||
|
"FaissVectorDBStorage": ".kg.faiss_impl",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user