Merge remote-tracking branch 'origin/main' into api_improvment

# Conflicts:
#	lightrag/api/lightrag_server.py
This commit is contained in:
Yannick Stephan
2025-02-17 08:48:15 +01:00
70 changed files with 3845 additions and 1995 deletions

View File

@@ -222,6 +222,7 @@ You can select storage implementation by enviroment variables or command line a
| --max-embed-tokens | 8192 | Maximum embedding token size |
| --timeout | None | Timeout in seconds (useful when using slow AI). Use None for infinite timeout |
| --log-level | INFO | Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) |
| --verbose | False | Verbose debug output (True, Flase) |
| --key | None | API key for authentication. Protects lightrag server against unauthorized access |
| --ssl | False | Enable HTTPS |
| --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) |

View File

@@ -61,7 +61,10 @@ from ..kg.tidb_impl import (
)
# Load environment variables
load_dotenv(override=True)
try:
load_dotenv(override=True)
except Exception as e:
logger.warning(f"Failed to load .env file: {e}")
# Initialize config parser
config = configparser.ConfigParser()
@@ -131,8 +134,8 @@ def get_env_value(env_key: str, default: Any, value_type: type = str) -> Any:
if value is None:
return default
if isinstance(value_type, bool):
return value.lower() in ("true", "1", "yes")
if value_type is bool:
return value.lower() in ("true", "1", "yes", "t", "on")
try:
return value_type(value)
except ValueError:
@@ -234,6 +237,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.yellow(f"{ollama_server_infos.LIGHTRAG_MODEL}")
ASCIIColors.white(" ├─ Log Level: ", end="")
ASCIIColors.yellow(f"{args.log_level}")
ASCIIColors.white(" ├─ Verbose Debug: ", end="")
ASCIIColors.yellow(f"{args.verbose}")
ASCIIColors.white(" └─ Timeout: ", end="")
ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
@@ -252,10 +257,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.yellow(f"{protocol}://localhost:{args.port}/docs")
ASCIIColors.white(" ├─ Alternative Documentation (local): ", end="")
ASCIIColors.yellow(f"{protocol}://localhost:{args.port}/redoc")
ASCIIColors.white(" ─ WebUI (local): ", end="")
ASCIIColors.white(" ─ WebUI (local): ", end="")
ASCIIColors.yellow(f"{protocol}://localhost:{args.port}/webui")
ASCIIColors.white(" └─ Graph Viewer (local): ", end="")
ASCIIColors.yellow(f"{protocol}://localhost:{args.port}/graph-viewer")
ASCIIColors.yellow("\n📝 Note:")
ASCIIColors.white(""" Since the server is running on 0.0.0.0:
@@ -565,6 +568,13 @@ def parse_args() -> argparse.Namespace:
help="Prefix of the namespace",
)
parser.add_argument(
"--verbose",
type=bool,
default=get_env_value("VERBOSE", False, bool),
help="Verbose debug output(default: from env or false)",
)
args = parser.parse_args()
# conver relative path to absolute path
@@ -776,6 +786,23 @@ class InsertResponse(BaseModel):
message: str = Field(description="Message describing the operation result")
class DocStatusResponse(BaseModel):
id: str
content_summary: str
content_length: int
status: DocStatus
created_at: str
updated_at: str
chunks_count: Optional[int] = None
error: Optional[str] = None
metadata: Optional[dict[str, Any]] = None
class DocsStatusesResponse(BaseModel):
statuses: Dict[DocStatus, List[DocStatusResponse]] = {}
def get_api_key_dependency(api_key: Optional[str]):
if not api_key:
# If no API key is configured, return a dummy dependency that always succeeds
@@ -809,6 +836,11 @@ temp_prefix = "__tmp_" # prefix for temporary files
def create_app(args):
# Initialize verbose debug setting
from lightrag.utils import set_verbose_debug
set_verbose_debug(args.verbose)
global global_top_k
global_top_k = args.top_k # save top_k from args
@@ -1806,20 +1838,57 @@ def create_app(args):
app.include_router(ollama_api.router, prefix="/api")
@app.get("/documents", dependencies=[Depends(optional_api_key)])
async def documents():
"""Get current system status"""
return doc_manager.indexed_files
async def documents() -> DocsStatusesResponse:
"""
Get documents statuses
Returns:
DocsStatusesResponse: A response object containing a dictionary where keys are DocStatus
and values are lists of DocStatusResponse objects representing documents in each status category.
"""
try:
statuses = (
DocStatus.PENDING,
DocStatus.PROCESSING,
DocStatus.PROCESSED,
DocStatus.FAILED,
)
tasks = [rag.get_docs_by_status(status) for status in statuses]
results: List[Dict[str, DocProcessingStatus]] = await asyncio.gather(*tasks)
response = DocsStatusesResponse()
for idx, result in enumerate(results):
status = statuses[idx]
for doc_id, doc_status in result.items():
if status not in response.statuses:
response.statuses[status] = []
response.statuses[status].append(
DocStatusResponse(
id=doc_id,
content_summary=doc_status.content_summary,
content_length=doc_status.content_length,
status=doc_status.status,
created_at=doc_status.created_at,
updated_at=doc_status.updated_at,
chunks_count=doc_status.chunks_count,
error=doc_status.error,
metadata=doc_status.metadata,
)
)
return response
except Exception as e:
logging.error(f"Error GET /documents: {str(e)}")
logging.error(traceback.format_exc())
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health", dependencies=[Depends(optional_api_key)])
async def get_status():
"""Get current system status"""
files = doc_manager.scan_directory()
return {
"status": "healthy",
"working_directory": str(args.working_dir),
"input_directory": str(args.input_dir),
"indexed_files": [str(f) for f in files],
"indexed_files_count": len(files),
"configuration": {
# LLM configuration binding/host address (if applicable)/model (if applicable)
"llm_binding": args.llm_binding,
@@ -1838,17 +1907,9 @@ def create_app(args):
}
# Webui mount webui/index.html
webui_dir = Path(__file__).parent / "webui"
app.mount(
"/graph-viewer",
StaticFiles(directory=webui_dir, html=True),
name="webui",
)
# Serve the static files
static_dir = Path(__file__).parent / "static"
static_dir = Path(__file__).parent / "webui"
static_dir.mkdir(exist_ok=True)
app.mount("/webui", StaticFiles(directory=static_dir, html=True), name="static")
app.mount("/webui", StaticFiles(directory=static_dir, html=True), name="webui")
return app

View File

@@ -11,6 +11,7 @@ from fastapi.responses import StreamingResponse
import asyncio
from ascii_colors import trace_exception
from lightrag import LightRAG, QueryParam
from lightrag.utils import encode_string_by_tiktoken
from dotenv import load_dotenv
@@ -111,18 +112,9 @@ class OllamaTagResponse(BaseModel):
def estimate_tokens(text: str) -> int:
"""Estimate the number of tokens in text
Chinese characters: approximately 1.5 tokens per character
English characters: approximately 0.25 tokens per character
"""
# Use regex to match Chinese and non-Chinese characters separately
chinese_chars = len(re.findall(r"[\u4e00-\u9fff]", text))
non_chinese_chars = len(re.findall(r"[^\u4e00-\u9fff]", text))
# Calculate estimated token count
tokens = chinese_chars * 1.5 + non_chinese_chars * 0.25
return int(tokens)
"""Estimate the number of tokens in text using tiktoken"""
tokens = encode_string_by_tiktoken(text)
return len(tokens)
def parse_query_mode(query: str) -> tuple[str, SearchMode]:

View File

@@ -1,2 +0,0 @@
# LightRag Webui
A simple webui to interact with the lightrag datalake

Binary file not shown.

Before

Width:  |  Height:  |  Size: 734 KiB

View File

@@ -1,104 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>LightRAG Interface</title>
<script src="https://cdn.tailwindcss.com"></script>
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
<style>
.fade-in {
animation: fadeIn 0.3s ease-in;
}
@keyframes fadeIn {
from { opacity: 0; }
to { opacity: 1; }
}
.spin {
animation: spin 1s linear infinite;
}
@keyframes spin {
from { transform: rotate(0deg); }
to { transform: rotate(360deg); }
}
.slide-in {
animation: slideIn 0.3s ease-out;
}
@keyframes slideIn {
from { transform: translateX(-100%); }
to { transform: translateX(0); }
}
</style>
</head>
<body class="bg-gray-50">
<div class="flex h-screen">
<!-- Sidebar -->
<div class="w-64 bg-white shadow-lg">
<div class="p-4">
<h1 class="text-xl font-bold text-gray-800 mb-6">LightRAG</h1>
<nav class="space-y-2">
<a href="#" class="nav-item" data-page="file-manager">
<div class="flex items-center p-2 rounded-lg hover:bg-gray-100 transition-colors">
<svg class="w-5 h-5 mr-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M3 7v10a2 2 0 002 2h14a2 2 0 002-2V9a2 2 0 00-2-2h-6l-2-2H5a2 2 0 00-2 2z"/>
</svg>
File Manager
</div>
</a>
<a href="#" class="nav-item" data-page="query">
<div class="flex items-center p-2 rounded-lg hover:bg-gray-100 transition-colors">
<svg class="w-5 h-5 mr-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z"/>
</svg>
Query Database
</div>
</a>
<a href="#" class="nav-item" data-page="knowledge-graph">
<div class="flex items-center p-2 rounded-lg hover:bg-gray-100 transition-colors">
<svg class="w-5 h-5 mr-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 10V3L4 14h7v7l9-11h-7z"/>
</svg>
Knowledge Graph
</div>
</a>
<a href="#" class="nav-item" data-page="status">
<div class="flex items-center p-2 rounded-lg hover:bg-gray-100 transition-colors">
<svg class="w-5 h-5 mr-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z"/>
</svg>
Status
</div>
</a>
<a href="#" class="nav-item" data-page="settings">
<div class="flex items-center p-2 rounded-lg hover:bg-gray-100 transition-colors">
<svg class="w-5 h-5 mr-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10.325 4.317c.426-1.756 2.924-1.756 3.35 0a1.724 1.724 0 002.573 1.066c1.543-.94 3.31.826 2.37 2.37a1.724 1.724 0 001.065 2.572c1.756.426 1.756 2.924 0 3.35a1.724 1.724 0 00-1.066 2.573c.94 1.543-.826 3.31-2.37 2.37a1.724 1.724 0 00-2.572 1.065c-.426 1.756-2.924 1.756-3.35 0a1.724 1.724 0 00-2.573-1.066c-1.543.94-3.31-.826-2.37-2.37a1.724 1.724 0 00-1.065-2.572c-1.756-.426-1.756-2.924 0-3.35a1.724 1.724 0 001.066-2.573c-.94-1.543.826-3.31 2.37-2.37.996.608 2.296.07 2.572-1.065z"/>
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 12a3 3 0 11-6 0 3 3 0 016 0z"/>
</svg>
Settings
</div>
</a>
</nav>
</div>
</div>
<!-- Main Content -->
<div class="flex-1 overflow-auto p-6">
<div id="content" class="fade-in"></div>
</div>
<!-- Toast Notification -->
<div id="toast" class="fixed bottom-4 right-4 hidden">
<div class="bg-gray-800 text-white px-6 py-3 rounded-lg shadow-lg"></div>
</div>
</div>
<script src="./js/api.js"></script>
</body>
</html>

View File

@@ -1,408 +0,0 @@
// State management
const state = {
apiKey: localStorage.getItem('apiKey') || '',
files: [],
indexedFiles: [],
currentPage: 'file-manager'
};
// Utility functions
const showToast = (message, duration = 3000) => {
const toast = document.getElementById('toast');
toast.querySelector('div').textContent = message;
toast.classList.remove('hidden');
setTimeout(() => toast.classList.add('hidden'), duration);
};
const fetchWithAuth = async (url, options = {}) => {
const headers = {
...(options.headers || {}),
...(state.apiKey ? { 'X-API-Key': state.apiKey } : {}) // Use X-API-Key instead of Bearer
};
return fetch(url, { ...options, headers });
};
// Page renderers
const pages = {
'file-manager': () => `
<div class="space-y-6">
<h2 class="text-2xl font-bold text-gray-800">File Manager</h2>
<div class="border-2 border-dashed border-gray-300 rounded-lg p-8 text-center hover:border-gray-400 transition-colors">
<input type="file" id="fileInput" multiple accept=".txt,.md,.doc,.docx,.pdf,.pptx" class="hidden">
<label for="fileInput" class="cursor-pointer">
<svg class="mx-auto h-12 w-12 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M7 16a4 4 0 01-.88-7.903A5 5 0 1115.9 6L16 6a5 5 0 011 9.9M15 13l-3-3m0 0l-3 3m3-3v12"/>
</svg>
<p class="mt-2 text-gray-600">Drag files here or click to select</p>
<p class="text-sm text-gray-500">Supported formats: TXT, MD, DOC, PDF, PPTX</p>
</label>
</div>
<div id="fileList" class="space-y-2">
<h3 class="text-lg font-semibold text-gray-700">Selected Files</h3>
<div class="space-y-2"></div>
</div>
<div id="uploadProgress" class="hidden mt-4">
<div class="w-full bg-gray-200 rounded-full h-2.5">
<div class="bg-blue-600 h-2.5 rounded-full" style="width: 0%"></div>
</div>
<p class="text-sm text-gray-600 mt-2"><span id="uploadStatus">0</span> files processed</p>
</div>
<div class="flex items-center space-x-4 bg-gray-100 p-4 rounded-lg shadow-md">
<button id="rescanBtn" class="flex items-center bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20" height="20" fill="currentColor" class="mr-2">
<path d="M12 4a8 8 0 1 1-8 8H2.5a9.5 9.5 0 1 0 2.8-6.7L2 3v6h6L5.7 6.7A7.96 7.96 0 0 1 12 4z"/>
</svg>
Rescan Files
</button>
<button id="uploadBtn" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
Upload & Index Files
</button>
</div>
<div id="indexedFiles" class="space-y-2">
<h3 class="text-lg font-semibold text-gray-700">Indexed Files</h3>
<div class="space-y-2"></div>
</div>
</div>
`,
'query': () => `
<div class="space-y-6">
<h2 class="text-2xl font-bold text-gray-800">Query Database</h2>
<div class="space-y-4">
<div>
<label class="block text-sm font-medium text-gray-700">Query Mode</label>
<select id="queryMode" class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500">
<option value="hybrid">Hybrid</option>
<option value="local">Local</option>
<option value="global">Global</option>
<option value="naive">Naive</option>
</select>
</div>
<div>
<label class="block text-sm font-medium text-gray-700">Query</label>
<textarea id="queryInput" rows="4" class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"></textarea>
</div>
<button id="queryBtn" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
Send Query
</button>
<div id="queryResult" class="mt-4 p-4 bg-white rounded-lg shadow"></div>
</div>
</div>
`,
'knowledge-graph': () => `
<div class="flex items-center justify-center h-full">
<div class="text-center">
<svg class="mx-auto h-12 w-12 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 11H5m14 0a2 2 0 012 2v6a2 2 0 01-2 2H5a2 2 0 01-2-2v-6a2 2 0 012-2m14 0V9a2 2 0 00-2-2M5 11V9a2 2 0 012-2m0 0V5a2 2 0 012-2h6a2 2 0 012 2v2M7 7h10"/>
</svg>
<h3 class="mt-2 text-sm font-medium text-gray-900">Under Construction</h3>
<p class="mt-1 text-sm text-gray-500">Knowledge graph visualization will be available in a future update.</p>
</div>
</div>
`,
'status': () => `
<div class="space-y-6">
<h2 class="text-2xl font-bold text-gray-800">System Status</h2>
<div id="statusContent" class="grid grid-cols-1 md:grid-cols-2 gap-6">
<div class="p-6 bg-white rounded-lg shadow-sm">
<h3 class="text-lg font-semibold mb-4">System Health</h3>
<div id="healthStatus"></div>
</div>
<div class="p-6 bg-white rounded-lg shadow-sm">
<h3 class="text-lg font-semibold mb-4">Configuration</h3>
<div id="configStatus"></div>
</div>
</div>
</div>
`,
'settings': () => `
<div class="space-y-6">
<h2 class="text-2xl font-bold text-gray-800">Settings</h2>
<div class="max-w-xl">
<div class="space-y-4">
<div>
<label class="block text-sm font-medium text-gray-700">API Key</label>
<input type="password" id="apiKeyInput" value="${state.apiKey}"
class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500">
</div>
<button id="saveSettings" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
Save Settings
</button>
</div>
</div>
</div>
`
};
// Page handlers
const handlers = {
'file-manager': () => {
const fileInput = document.getElementById('fileInput');
const dropZone = fileInput.parentElement.parentElement;
const fileList = document.querySelector('#fileList div');
const indexedFiles = document.querySelector('#indexedFiles div');
const uploadBtn = document.getElementById('uploadBtn');
const updateFileList = () => {
fileList.innerHTML = state.files.map(file => `
<div class="flex items-center justify-between bg-white p-3 rounded-lg shadow-sm">
<span>${file.name}</span>
<button class="text-red-600 hover:text-red-700" onclick="removeFile('${file.name}')">
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"/>
</svg>
</button>
</div>
`).join('');
};
const updateIndexedFiles = async () => {
const response = await fetchWithAuth('/health');
const data = await response.json();
indexedFiles.innerHTML = data.indexed_files.map(file => `
<div class="flex items-center justify-between bg-white p-3 rounded-lg shadow-sm">
<span>${file}</span>
</div>
`).join('');
};
dropZone.addEventListener('dragover', (e) => {
e.preventDefault();
dropZone.classList.add('border-blue-500');
});
dropZone.addEventListener('dragleave', () => {
dropZone.classList.remove('border-blue-500');
});
dropZone.addEventListener('drop', (e) => {
e.preventDefault();
dropZone.classList.remove('border-blue-500');
const files = Array.from(e.dataTransfer.files);
state.files.push(...files);
updateFileList();
});
fileInput.addEventListener('change', () => {
state.files.push(...Array.from(fileInput.files));
updateFileList();
});
uploadBtn.addEventListener('click', async () => {
if (state.files.length === 0) {
showToast('Please select files to upload');
return;
}
let apiKey = localStorage.getItem('apiKey') || '';
const progress = document.getElementById('uploadProgress');
const progressBar = progress.querySelector('div');
const statusText = document.getElementById('uploadStatus');
progress.classList.remove('hidden');
for (let i = 0; i < state.files.length; i++) {
const formData = new FormData();
formData.append('file', state.files[i]);
try {
await fetch('/documents/upload', {
method: 'POST',
headers: apiKey ? { 'Authorization': `Bearer ${apiKey}` } : {},
body: formData
});
const percentage = ((i + 1) / state.files.length) * 100;
progressBar.style.width = `${percentage}%`;
statusText.textContent = `${i + 1}/${state.files.length}`;
} catch (error) {
console.error('Upload error:', error);
}
}
progress.classList.add('hidden');
});
rescanBtn.addEventListener('click', async () => {
const progress = document.getElementById('uploadProgress');
const progressBar = progress.querySelector('div');
const statusText = document.getElementById('uploadStatus');
progress.classList.remove('hidden');
try {
// Start the scanning process
const scanResponse = await fetch('/documents/scan', {
method: 'POST',
});
if (!scanResponse.ok) {
throw new Error('Scan failed to start');
}
// Start polling for progress
const pollInterval = setInterval(async () => {
const progressResponse = await fetch('/documents/scan-progress');
const progressData = await progressResponse.json();
// Update progress bar
progressBar.style.width = `${progressData.progress}%`;
// Update status text
if (progressData.total_files > 0) {
statusText.textContent = `Processing ${progressData.current_file} (${progressData.indexed_count}/${progressData.total_files})`;
}
// Check if scanning is complete
if (!progressData.is_scanning) {
clearInterval(pollInterval);
progress.classList.add('hidden');
statusText.textContent = 'Scan complete!';
}
}, 1000); // Poll every second
} catch (error) {
console.error('Upload error:', error);
progress.classList.add('hidden');
statusText.textContent = 'Error during scanning process';
}
});
updateIndexedFiles();
},
'query': () => {
const queryBtn = document.getElementById('queryBtn');
const queryInput = document.getElementById('queryInput');
const queryMode = document.getElementById('queryMode');
const queryResult = document.getElementById('queryResult');
let apiKey = localStorage.getItem('apiKey') || '';
queryBtn.addEventListener('click', async () => {
const query = queryInput.value.trim();
if (!query) {
showToast('Please enter a query');
return;
}
queryBtn.disabled = true;
queryBtn.innerHTML = `
<svg class="animate-spin h-5 w-5 mr-3" viewBox="0 0 24 24">
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4" fill="none"/>
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"/>
</svg>
Processing...
`;
try {
const response = await fetchWithAuth('/query', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
query,
mode: queryMode.value,
stream: false,
only_need_context: false
})
});
const data = await response.json();
queryResult.innerHTML = marked.parse(data.response);
} catch (error) {
showToast('Error processing query');
} finally {
queryBtn.disabled = false;
queryBtn.textContent = 'Send Query';
}
});
},
'status': async () => {
const healthStatus = document.getElementById('healthStatus');
const configStatus = document.getElementById('configStatus');
try {
const response = await fetchWithAuth('/health');
const data = await response.json();
healthStatus.innerHTML = `
<div class="space-y-2">
<div class="flex items-center">
<div class="w-3 h-3 rounded-full ${data.status === 'healthy' ? 'bg-green-500' : 'bg-red-500'} mr-2"></div>
<span class="font-medium">${data.status}</span>
</div>
<div>
<p class="text-sm text-gray-600">Working Directory: ${data.working_directory}</p>
<p class="text-sm text-gray-600">Input Directory: ${data.input_directory}</p>
<p class="text-sm text-gray-600">Indexed Files: ${data.indexed_files_count}</p>
</div>
</div>
`;
configStatus.innerHTML = Object.entries(data.configuration)
.map(([key, value]) => `
<div class="mb-2">
<span class="text-sm font-medium text-gray-700">${key}:</span>
<span class="text-sm text-gray-600 ml-2">${value}</span>
</div>
`).join('');
} catch (error) {
showToast('Error fetching status');
}
},
'settings': () => {
const saveBtn = document.getElementById('saveSettings');
const apiKeyInput = document.getElementById('apiKeyInput');
saveBtn.addEventListener('click', () => {
state.apiKey = apiKeyInput.value;
localStorage.setItem('apiKey', state.apiKey);
showToast('Settings saved successfully');
});
}
};
// Navigation handling
document.querySelectorAll('.nav-item').forEach(item => {
item.addEventListener('click', (e) => {
e.preventDefault();
const page = item.dataset.page;
document.getElementById('content').innerHTML = pages[page]();
if (handlers[page]) handlers[page]();
state.currentPage = page;
});
});
// Initialize with file manager
document.getElementById('content').innerHTML = pages['file-manager']();
handlers['file-manager']();
// Global functions
window.removeFile = (fileName) => {
state.files = state.files.filter(file => file.name !== fileName);
document.querySelector('#fileList div').innerHTML = state.files.map(file => `
<div class="flex items-center justify-between bg-white p-3 rounded-lg shadow-sm">
<span>${file.name}</span>
<button class="text-red-600 hover:text-red-700" onclick="removeFile('${file.name}')">
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"/>
</svg>
</button>
</div>
`).join('');
};

View File

@@ -1,211 +0,0 @@
// js/graph.js
function openGraphModal(label) {
const modal = document.getElementById("graph-modal");
const graphTitle = document.getElementById("graph-title");
if (!modal || !graphTitle) {
console.error("Key element not found");
return;
}
graphTitle.textContent = `Knowledge Graph - ${label}`;
modal.style.display = "flex";
renderGraph(label);
}
function closeGraphModal() {
const modal = document.getElementById("graph-modal");
modal.style.display = "none";
clearGraph();
}
function clearGraph() {
const svg = document.getElementById("graph-svg");
svg.innerHTML = "";
}
async function getGraph(label) {
try {
const response = await fetch(`/graphs?label=${label}`);
const rawData = await response.json();
console.log({data: JSON.parse(JSON.stringify(rawData))});
const nodes = rawData.nodes
nodes.forEach(node => {
node.id = Date.now().toString(36) + Math.random().toString(36).substring(2); // 使用 crypto.randomUUID() 生成唯一 UUID
});
// Strictly verify edge data
const edges = (rawData.edges || []).map(edge => {
const sourceNode = nodes.find(n => n.labels.includes(edge.source));
const targetNode = nodes.find(n => n.labels.includes(edge.target)
)
;
if (!sourceNode || !targetNode) {
console.warn("NOT VALID EDGE:", edge);
return null;
}
return {
source: sourceNode,
target: targetNode,
type: edge.type || ""
};
}).filter(edge => edge !== null);
return {nodes, edges};
} catch (error) {
console.error("Loading graph failed:", error);
return {nodes: [], edges: []};
}
}
async function renderGraph(label) {
const data = await getGraph(label);
if (!data.nodes || data.nodes.length === 0) {
d3.select("#graph-svg")
.html(`<text x="50%" y="50%" text-anchor="middle">No valid nodes</text>`);
return;
}
const svg = d3.select("#graph-svg");
const width = svg.node().clientWidth;
const height = svg.node().clientHeight;
svg.selectAll("*").remove();
// Create a force oriented diagram layout
const simulation = d3.forceSimulation(data.nodes)
.force("charge", d3.forceManyBody().strength(-300))
.force("center", d3.forceCenter(width / 2, height / 2));
// Add a connection (if there are valid edges)
if (data.edges.length > 0) {
simulation.force("link",
d3.forceLink(data.edges)
.id(d => d.id)
.distance(100)
);
}
// Draw nodes
const nodes = svg.selectAll(".node")
.data(data.nodes)
.enter()
.append("circle")
.attr("class", "node")
.attr("r", 10)
.call(d3.drag()
.on("start", dragStarted)
.on("drag", dragged)
.on("end", dragEnded)
);
svg.append("defs")
.append("marker")
.attr("id", "arrow-out")
.attr("viewBox", "0 0 10 10")
.attr("refX", 8)
.attr("refY", 5)
.attr("markerWidth", 6)
.attr("markerHeight", 6)
.attr("orient", "auto")
.append("path")
.attr("d", "M0,0 L10,5 L0,10 Z")
.attr("fill", "#999");
// Draw edges (with arrows)
const links = svg.selectAll(".link")
.data(data.edges)
.enter()
.append("line")
.attr("class", "link")
.attr("marker-end", "url(#arrow-out)"); // Always draw arrows on the target side
// Edge style configuration
links
.attr("stroke", "#999")
.attr("stroke-width", 2)
.attr("stroke-opacity", 0.8);
// Draw label (with background box)
const labels = svg.selectAll(".label")
.data(data.nodes)
.enter()
.append("text")
.attr("class", "label")
.text(d => d.labels[0] || "")
.attr("text-anchor", "start")
.attr("dy", "0.3em")
.attr("fill", "#333");
// Update Location
simulation.on("tick", () => {
links
.attr("x1", d => {
// Calculate the direction vector from the source node to the target node
const dx = d.target.x - d.source.x;
const dy = d.target.y - d.source.y;
const distance = Math.sqrt(dx * dx + dy * dy);
if (distance === 0) return d.source.x; // 避免除以零 Avoid dividing by zero
// Adjust the starting point coordinates (source node edge) based on radius 10
return d.source.x + (dx / distance) * 10;
})
.attr("y1", d => {
const dx = d.target.x - d.source.x;
const dy = d.target.y - d.source.y;
const distance = Math.sqrt(dx * dx + dy * dy);
if (distance === 0) return d.source.y;
return d.source.y + (dy / distance) * 10;
})
.attr("x2", d => {
// Adjust the endpoint coordinates (target node edge) based on a radius of 10
const dx = d.target.x - d.source.x;
const dy = d.target.y - d.source.y;
const distance = Math.sqrt(dx * dx + dy * dy);
if (distance === 0) return d.target.x;
return d.target.x - (dx / distance) * 10;
})
.attr("y2", d => {
const dx = d.target.x - d.source.x;
const dy = d.target.y - d.source.y;
const distance = Math.sqrt(dx * dx + dy * dy);
if (distance === 0) return d.target.y;
return d.target.y - (dy / distance) * 10;
});
// Update the position of nodes and labels (keep unchanged)
nodes
.attr("cx", d => d.x)
.attr("cy", d => d.y);
labels
.attr("x", d => d.x + 12)
.attr("y", d => d.y + 4);
});
// Drag and drop logic
function dragStarted(event, d) {
if (!event.active) simulation.alphaTarget(0.3).restart();
d.fx = d.x;
d.fy = d.y;
}
function dragged(event, d) {
d.fx = event.x;
d.fy = event.y;
simulation.alpha(0.3).restart();
}
function dragEnded(event, d) {
if (!event.active) simulation.alphaTarget(0);
d.fx = null;
d.fy = null;
}
}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -4,9 +4,9 @@
<meta charset="UTF-8" />
<link rel="icon" type="image/svg+xml" href="./vite.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Lightrag Graph Viewer</title>
<script type="module" crossorigin src="./assets/index-CF-pcoIl.js"></script>
<link rel="stylesheet" crossorigin href="./assets/index-BAeLPZpd.css">
<title>Lightrag</title>
<script type="module" crossorigin src="./assets/index-BMB0OroL.js"></script>
<link rel="stylesheet" crossorigin href="./assets/index-CLgSwrjG.css">
</head>
<body>
<div id="root"></div>

View File

@@ -1,6 +1,7 @@
from __future__ import annotations
import os
from dotenv import load_dotenv
from dataclasses import dataclass, field
from enum import Enum
from typing import (
@@ -9,12 +10,12 @@ from typing import (
TypedDict,
TypeVar,
)
import numpy as np
from .utils import EmbeddingFunc
from .types import KnowledgeGraph
load_dotenv()
class TextChunkSchema(TypedDict):
tokens: int
@@ -54,13 +55,15 @@ class QueryParam:
top_k: int = int(os.getenv("TOP_K", "60"))
"""Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
max_token_for_text_unit: int = 4000
max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
"""Maximum number of tokens allowed for each retrieved text chunk."""
max_token_for_global_context: int = 4000
max_token_for_global_context: int = int(
os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
)
"""Maximum number of tokens allocated for relationship descriptions in global retrieval."""
max_token_for_local_context: int = 4000
max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
"""Maximum number of tokens allocated for entity descriptions in local retrieval."""
hl_keywords: list[str] = field(default_factory=list)

View File

@@ -268,10 +268,10 @@ class LightRAG:
"""Directory where logs are stored. Defaults to the current working directory."""
# Text chunking
chunk_token_size: int = 1200
chunk_token_size: int = int(os.getenv("CHUNK_SIZE", "1200"))
"""Maximum number of tokens per text chunk when splitting documents."""
chunk_overlap_token_size: int = 100
chunk_overlap_token_size: int = int(os.getenv("CHUNK_OVERLAP_SIZE", "100"))
"""Number of overlapping tokens between consecutive text chunks to preserve context."""
tiktoken_model_name: str = "gpt-4o-mini"
@@ -281,7 +281,7 @@ class LightRAG:
entity_extract_max_gleaning: int = 1
"""Maximum number of entity extraction attempts for ambiguous content."""
entity_summary_to_max_tokens: int = 500
entity_summary_to_max_tokens: int = int(os.getenv("MAX_TOKEN_SUMMARY", "500"))
"""Maximum number of tokens used for summarizing extracted entities."""
# Node embedding
@@ -1254,6 +1254,16 @@ class LightRAG:
"""
return await self.doc_status.get_status_counts()
async def get_docs_by_status(
self, status: DocStatus
) -> dict[str, DocProcessingStatus]:
"""Get documents by status
Returns:
Dict with document id is keys and document status is values
"""
return await self.doc_status.get_docs_by_status(status)
async def adelete_by_doc_id(self, doc_id: str) -> None:
"""Delete a document and all its related data

View File

@@ -40,9 +40,10 @@ __version__ = "1.0.0"
__author__ = "lightrag Team"
__status__ = "Production"
from ..utils import verbose_debug, VERBOSE_DEBUG
import sys
import os
import logging
if sys.version_info < (3, 9):
from typing import AsyncIterator
@@ -110,6 +111,11 @@ async def openai_complete_if_cache(
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",
"Content-Type": "application/json",
}
# Set openai logger level to INFO when VERBOSE_DEBUG is off
if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
logging.getLogger("openai").setLevel(logging.INFO)
openai_async_client = (
AsyncOpenAI(default_headers=default_headers, api_key=api_key)
if base_url is None
@@ -125,13 +131,11 @@ async def openai_complete_if_cache(
messages.extend(history_messages)
messages.append({"role": "user", "content": prompt})
# 添加日志输出
logger.debug("===== Query Input to LLM =====")
logger.debug("===== Sending Query to LLM =====")
logger.debug(f"Model: {model} Base URL: {base_url}")
logger.debug(f"Additional kwargs: {kwargs}")
logger.debug(f"Query: {prompt}")
logger.debug(f"System prompt: {system_prompt}")
# logger.debug(f"Messages: {messages}")
verbose_debug(f"Query: {prompt}")
verbose_debug(f"System prompt: {system_prompt}")
try:
if "response_format" in kwargs:

View File

@@ -43,6 +43,7 @@ __status__ = "Production"
import sys
import re
import json
from ..utils import verbose_debug
if sys.version_info < (3, 9):
pass
@@ -119,7 +120,7 @@ async def zhipu_complete_if_cache(
# Add debug logging
logger.debug("===== Query Input to LLM =====")
logger.debug(f"Query: {prompt}")
logger.debug(f"System prompt: {system_prompt}")
verbose_debug(f"System prompt: {system_prompt}")
# Remove unsupported kwargs
kwargs = {

View File

@@ -687,6 +687,9 @@ async def kg_query(
if query_param.only_need_prompt:
return sys_prompt
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
response = await use_model_func(
query,
system_prompt=sys_prompt,
@@ -772,6 +775,9 @@ async def extract_keywords_only(
query=text, examples=examples, language=language, history=history_context
)
len_of_prompts = len(encode_string_by_tiktoken(kw_prompt))
logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
# 5. Call the LLM for keyword extraction
use_model_func = global_config["llm_model_func"]
result = await use_model_func(kw_prompt, keyword_extraction=True)
@@ -935,7 +941,9 @@ async def mix_kg_vector_query(
chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
formatted_chunks.append(chunk_text)
logger.info(f"Truncate {len(chunks)} to {len(formatted_chunks)} chunks")
logger.debug(
f"Truncate chunks from {len(chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
)
return "\n--New Chunk--\n".join(formatted_chunks)
except Exception as e:
logger.error(f"Error in get_vector_context: {e}")
@@ -968,6 +976,9 @@ async def mix_kg_vector_query(
if query_param.only_need_prompt:
return sys_prompt
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
logger.debug(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}")
# 6. Generate response
response = await use_model_func(
query,
@@ -1073,7 +1084,7 @@ async def _build_query_context(
if not entities_context.strip() and not relations_context.strip():
return None
return f"""
result = f"""
-----Entities-----
```csv
{entities_context}
@@ -1087,6 +1098,15 @@ async def _build_query_context(
{text_units_context}
```
"""
contex_tokens = len(encode_string_by_tiktoken(result))
entities_tokens = len(encode_string_by_tiktoken(entities_context))
relations_tokens = len(encode_string_by_tiktoken(relations_context))
text_units_tokens = len(encode_string_by_tiktoken(text_units_context))
logger.debug(
f"Context Tokens - Total: {contex_tokens}, Entities: {entities_tokens}, Relations: {relations_tokens}, Chunks: {text_units_tokens}"
)
return result
async def _get_node_data(
@@ -1130,8 +1150,19 @@ async def _get_node_data(
node_datas, query_param, knowledge_graph_inst
),
)
len_node_datas = len(node_datas)
node_datas = truncate_list_by_token_size(
node_datas,
key=lambda x: x["description"],
max_token_size=query_param.max_token_for_local_context,
)
logger.debug(
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
)
logger.info(
f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} text units"
f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks"
)
# build prompt
@@ -1264,6 +1295,10 @@ async def _find_most_related_text_unit_from_entities(
max_token_size=query_param.max_token_for_text_unit,
)
logger.debug(
f"Truncate chunks from {len(all_text_units_lookup)} to {len(all_text_units)} (max tokens:{query_param.max_token_for_text_unit})"
)
all_text_units = [t["data"] for t in all_text_units]
return all_text_units
@@ -1305,6 +1340,11 @@ async def _find_most_related_edges_from_entities(
key=lambda x: x["description"],
max_token_size=query_param.max_token_for_global_context,
)
logger.debug(
f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})"
)
return all_edges_data
@@ -1352,11 +1392,15 @@ async def _get_edge_data(
edge_datas = sorted(
edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
)
len_edge_datas = len(edge_datas)
edge_datas = truncate_list_by_token_size(
edge_datas,
key=lambda x: x["description"],
max_token_size=query_param.max_token_for_global_context,
)
logger.debug(
f"Truncate relations from {len_edge_datas} to {len(edge_datas)} (max tokens:{query_param.max_token_for_global_context})"
)
use_entities, use_text_units = await asyncio.gather(
_find_most_related_entities_from_relationships(
@@ -1367,7 +1411,7 @@ async def _get_edge_data(
),
)
logger.info(
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} text units"
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
)
relations_section_list = [
@@ -1456,11 +1500,15 @@ async def _find_most_related_entities_from_relationships(
for k, n, d in zip(entity_names, node_datas, node_degrees)
]
len_node_datas = len(node_datas)
node_datas = truncate_list_by_token_size(
node_datas,
key=lambda x: x["description"],
max_token_size=query_param.max_token_for_local_context,
)
logger.debug(
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
)
return node_datas
@@ -1516,6 +1564,10 @@ async def _find_related_text_unit_from_relationships(
max_token_size=query_param.max_token_for_text_unit,
)
logger.debug(
f"Truncate chunks from {len(valid_text_units)} to {len(truncated_text_units)} (max tokens:{query_param.max_token_for_text_unit})"
)
all_text_units: list[TextChunkSchema] = [t["data"] for t in truncated_text_units]
return all_text_units
@@ -1583,7 +1635,10 @@ async def naive_query(
logger.warning("No chunks left after truncation")
return PROMPTS["fail_response"]
logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
logger.debug(
f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
)
section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
if query_param.only_need_context:
@@ -1606,6 +1661,9 @@ async def naive_query(
if query_param.only_need_prompt:
return sys_prompt
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
logger.debug(f"[naive_query]Prompt Tokens: {len_of_prompts}")
response = await use_model_func(
query,
system_prompt=sys_prompt,
@@ -1748,6 +1806,9 @@ async def kg_query_with_keywords(
if query_param.only_need_prompt:
return sys_prompt
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}")
response = await use_model_func(
query,
system_prompt=sys_prompt,

View File

@@ -20,6 +20,23 @@ import tiktoken
from lightrag.prompt import PROMPTS
VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true"
def verbose_debug(msg: str, *args, **kwargs):
"""Function for outputting detailed debug information.
When VERBOSE_DEBUG=True, outputs the complete message.
When VERBOSE_DEBUG=False, outputs only the first 30 characters.
"""
if VERBOSE_DEBUG:
logger.debug(msg, *args, **kwargs)
def set_verbose_debug(enabled: bool):
"""Enable or disable verbose debug output"""
global VERBOSE_DEBUG
VERBOSE_DEBUG = enabled
class UnlimitedSemaphore:
"""A context manager that allows unlimited access."""
@@ -657,6 +674,10 @@ def get_conversation_turns(
Returns:
Formatted string of the conversation history
"""
# Check if num_turns is valid
if num_turns <= 0:
return ""
# Group messages into turns
turns: list[list[dict[str, Any]]] = []
messages: list[dict[str, Any]] = []