Merge remote-tracking branch 'origin/main' into api_improvment
# Conflicts: # lightrag/api/lightrag_server.py
This commit is contained in:
@@ -222,6 +222,7 @@ You can select storage implementation by enviroment variables or command line a
|
||||
| --max-embed-tokens | 8192 | Maximum embedding token size |
|
||||
| --timeout | None | Timeout in seconds (useful when using slow AI). Use None for infinite timeout |
|
||||
| --log-level | INFO | Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) |
|
||||
| --verbose | False | Verbose debug output (True, Flase) |
|
||||
| --key | None | API key for authentication. Protects lightrag server against unauthorized access |
|
||||
| --ssl | False | Enable HTTPS |
|
||||
| --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) |
|
||||
|
@@ -61,7 +61,10 @@ from ..kg.tidb_impl import (
|
||||
)
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv(override=True)
|
||||
try:
|
||||
load_dotenv(override=True)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load .env file: {e}")
|
||||
|
||||
# Initialize config parser
|
||||
config = configparser.ConfigParser()
|
||||
@@ -131,8 +134,8 @@ def get_env_value(env_key: str, default: Any, value_type: type = str) -> Any:
|
||||
if value is None:
|
||||
return default
|
||||
|
||||
if isinstance(value_type, bool):
|
||||
return value.lower() in ("true", "1", "yes")
|
||||
if value_type is bool:
|
||||
return value.lower() in ("true", "1", "yes", "t", "on")
|
||||
try:
|
||||
return value_type(value)
|
||||
except ValueError:
|
||||
@@ -234,6 +237,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
||||
ASCIIColors.yellow(f"{ollama_server_infos.LIGHTRAG_MODEL}")
|
||||
ASCIIColors.white(" ├─ Log Level: ", end="")
|
||||
ASCIIColors.yellow(f"{args.log_level}")
|
||||
ASCIIColors.white(" ├─ Verbose Debug: ", end="")
|
||||
ASCIIColors.yellow(f"{args.verbose}")
|
||||
ASCIIColors.white(" └─ Timeout: ", end="")
|
||||
ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
|
||||
|
||||
@@ -252,10 +257,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
||||
ASCIIColors.yellow(f"{protocol}://localhost:{args.port}/docs")
|
||||
ASCIIColors.white(" ├─ Alternative Documentation (local): ", end="")
|
||||
ASCIIColors.yellow(f"{protocol}://localhost:{args.port}/redoc")
|
||||
ASCIIColors.white(" ├─ WebUI (local): ", end="")
|
||||
ASCIIColors.white(" └─ WebUI (local): ", end="")
|
||||
ASCIIColors.yellow(f"{protocol}://localhost:{args.port}/webui")
|
||||
ASCIIColors.white(" └─ Graph Viewer (local): ", end="")
|
||||
ASCIIColors.yellow(f"{protocol}://localhost:{args.port}/graph-viewer")
|
||||
|
||||
ASCIIColors.yellow("\n📝 Note:")
|
||||
ASCIIColors.white(""" Since the server is running on 0.0.0.0:
|
||||
@@ -565,6 +568,13 @@ def parse_args() -> argparse.Namespace:
|
||||
help="Prefix of the namespace",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--verbose",
|
||||
type=bool,
|
||||
default=get_env_value("VERBOSE", False, bool),
|
||||
help="Verbose debug output(default: from env or false)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# conver relative path to absolute path
|
||||
@@ -776,6 +786,23 @@ class InsertResponse(BaseModel):
|
||||
message: str = Field(description="Message describing the operation result")
|
||||
|
||||
|
||||
class DocStatusResponse(BaseModel):
|
||||
id: str
|
||||
content_summary: str
|
||||
content_length: int
|
||||
status: DocStatus
|
||||
created_at: str
|
||||
updated_at: str
|
||||
chunks_count: Optional[int] = None
|
||||
error: Optional[str] = None
|
||||
metadata: Optional[dict[str, Any]] = None
|
||||
|
||||
|
||||
class DocsStatusesResponse(BaseModel):
|
||||
statuses: Dict[DocStatus, List[DocStatusResponse]] = {}
|
||||
|
||||
|
||||
|
||||
def get_api_key_dependency(api_key: Optional[str]):
|
||||
if not api_key:
|
||||
# If no API key is configured, return a dummy dependency that always succeeds
|
||||
@@ -809,6 +836,11 @@ temp_prefix = "__tmp_" # prefix for temporary files
|
||||
|
||||
|
||||
def create_app(args):
|
||||
# Initialize verbose debug setting
|
||||
from lightrag.utils import set_verbose_debug
|
||||
|
||||
set_verbose_debug(args.verbose)
|
||||
|
||||
global global_top_k
|
||||
global_top_k = args.top_k # save top_k from args
|
||||
|
||||
@@ -1806,20 +1838,57 @@ def create_app(args):
|
||||
app.include_router(ollama_api.router, prefix="/api")
|
||||
|
||||
@app.get("/documents", dependencies=[Depends(optional_api_key)])
|
||||
async def documents():
|
||||
"""Get current system status"""
|
||||
return doc_manager.indexed_files
|
||||
async def documents() -> DocsStatusesResponse:
|
||||
"""
|
||||
Get documents statuses
|
||||
Returns:
|
||||
DocsStatusesResponse: A response object containing a dictionary where keys are DocStatus
|
||||
and values are lists of DocStatusResponse objects representing documents in each status category.
|
||||
"""
|
||||
try:
|
||||
statuses = (
|
||||
DocStatus.PENDING,
|
||||
DocStatus.PROCESSING,
|
||||
DocStatus.PROCESSED,
|
||||
DocStatus.FAILED,
|
||||
)
|
||||
|
||||
tasks = [rag.get_docs_by_status(status) for status in statuses]
|
||||
results: List[Dict[str, DocProcessingStatus]] = await asyncio.gather(*tasks)
|
||||
|
||||
response = DocsStatusesResponse()
|
||||
|
||||
for idx, result in enumerate(results):
|
||||
status = statuses[idx]
|
||||
for doc_id, doc_status in result.items():
|
||||
if status not in response.statuses:
|
||||
response.statuses[status] = []
|
||||
response.statuses[status].append(
|
||||
DocStatusResponse(
|
||||
id=doc_id,
|
||||
content_summary=doc_status.content_summary,
|
||||
content_length=doc_status.content_length,
|
||||
status=doc_status.status,
|
||||
created_at=doc_status.created_at,
|
||||
updated_at=doc_status.updated_at,
|
||||
chunks_count=doc_status.chunks_count,
|
||||
error=doc_status.error,
|
||||
metadata=doc_status.metadata,
|
||||
)
|
||||
)
|
||||
return response
|
||||
except Exception as e:
|
||||
logging.error(f"Error GET /documents: {str(e)}")
|
||||
logging.error(traceback.format_exc())
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@app.get("/health", dependencies=[Depends(optional_api_key)])
|
||||
async def get_status():
|
||||
"""Get current system status"""
|
||||
files = doc_manager.scan_directory()
|
||||
return {
|
||||
"status": "healthy",
|
||||
"working_directory": str(args.working_dir),
|
||||
"input_directory": str(args.input_dir),
|
||||
"indexed_files": [str(f) for f in files],
|
||||
"indexed_files_count": len(files),
|
||||
"configuration": {
|
||||
# LLM configuration binding/host address (if applicable)/model (if applicable)
|
||||
"llm_binding": args.llm_binding,
|
||||
@@ -1838,17 +1907,9 @@ def create_app(args):
|
||||
}
|
||||
|
||||
# Webui mount webui/index.html
|
||||
webui_dir = Path(__file__).parent / "webui"
|
||||
app.mount(
|
||||
"/graph-viewer",
|
||||
StaticFiles(directory=webui_dir, html=True),
|
||||
name="webui",
|
||||
)
|
||||
|
||||
# Serve the static files
|
||||
static_dir = Path(__file__).parent / "static"
|
||||
static_dir = Path(__file__).parent / "webui"
|
||||
static_dir.mkdir(exist_ok=True)
|
||||
app.mount("/webui", StaticFiles(directory=static_dir, html=True), name="static")
|
||||
app.mount("/webui", StaticFiles(directory=static_dir, html=True), name="webui")
|
||||
|
||||
return app
|
||||
|
||||
|
@@ -11,6 +11,7 @@ from fastapi.responses import StreamingResponse
|
||||
import asyncio
|
||||
from ascii_colors import trace_exception
|
||||
from lightrag import LightRAG, QueryParam
|
||||
from lightrag.utils import encode_string_by_tiktoken
|
||||
from dotenv import load_dotenv
|
||||
|
||||
|
||||
@@ -111,18 +112,9 @@ class OllamaTagResponse(BaseModel):
|
||||
|
||||
|
||||
def estimate_tokens(text: str) -> int:
|
||||
"""Estimate the number of tokens in text
|
||||
Chinese characters: approximately 1.5 tokens per character
|
||||
English characters: approximately 0.25 tokens per character
|
||||
"""
|
||||
# Use regex to match Chinese and non-Chinese characters separately
|
||||
chinese_chars = len(re.findall(r"[\u4e00-\u9fff]", text))
|
||||
non_chinese_chars = len(re.findall(r"[^\u4e00-\u9fff]", text))
|
||||
|
||||
# Calculate estimated token count
|
||||
tokens = chinese_chars * 1.5 + non_chinese_chars * 0.25
|
||||
|
||||
return int(tokens)
|
||||
"""Estimate the number of tokens in text using tiktoken"""
|
||||
tokens = encode_string_by_tiktoken(text)
|
||||
return len(tokens)
|
||||
|
||||
|
||||
def parse_query_mode(query: str) -> tuple[str, SearchMode]:
|
||||
|
@@ -1,2 +0,0 @@
|
||||
# LightRag Webui
|
||||
A simple webui to interact with the lightrag datalake
|
Binary file not shown.
Before Width: | Height: | Size: 734 KiB |
@@ -1,104 +0,0 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>LightRAG Interface</title>
|
||||
<script src="https://cdn.tailwindcss.com"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
||||
<style>
|
||||
.fade-in {
|
||||
animation: fadeIn 0.3s ease-in;
|
||||
}
|
||||
|
||||
@keyframes fadeIn {
|
||||
from { opacity: 0; }
|
||||
to { opacity: 1; }
|
||||
}
|
||||
|
||||
.spin {
|
||||
animation: spin 1s linear infinite;
|
||||
}
|
||||
|
||||
@keyframes spin {
|
||||
from { transform: rotate(0deg); }
|
||||
to { transform: rotate(360deg); }
|
||||
}
|
||||
|
||||
.slide-in {
|
||||
animation: slideIn 0.3s ease-out;
|
||||
}
|
||||
|
||||
@keyframes slideIn {
|
||||
from { transform: translateX(-100%); }
|
||||
to { transform: translateX(0); }
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body class="bg-gray-50">
|
||||
<div class="flex h-screen">
|
||||
<!-- Sidebar -->
|
||||
<div class="w-64 bg-white shadow-lg">
|
||||
<div class="p-4">
|
||||
<h1 class="text-xl font-bold text-gray-800 mb-6">LightRAG</h1>
|
||||
<nav class="space-y-2">
|
||||
<a href="#" class="nav-item" data-page="file-manager">
|
||||
<div class="flex items-center p-2 rounded-lg hover:bg-gray-100 transition-colors">
|
||||
<svg class="w-5 h-5 mr-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M3 7v10a2 2 0 002 2h14a2 2 0 002-2V9a2 2 0 00-2-2h-6l-2-2H5a2 2 0 00-2 2z"/>
|
||||
</svg>
|
||||
File Manager
|
||||
</div>
|
||||
</a>
|
||||
<a href="#" class="nav-item" data-page="query">
|
||||
<div class="flex items-center p-2 rounded-lg hover:bg-gray-100 transition-colors">
|
||||
<svg class="w-5 h-5 mr-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 21l-6-6m2-5a7 7 0 11-14 0 7 7 0 0114 0z"/>
|
||||
</svg>
|
||||
Query Database
|
||||
</div>
|
||||
</a>
|
||||
<a href="#" class="nav-item" data-page="knowledge-graph">
|
||||
<div class="flex items-center p-2 rounded-lg hover:bg-gray-100 transition-colors">
|
||||
<svg class="w-5 h-5 mr-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 10V3L4 14h7v7l9-11h-7z"/>
|
||||
</svg>
|
||||
Knowledge Graph
|
||||
</div>
|
||||
</a>
|
||||
<a href="#" class="nav-item" data-page="status">
|
||||
<div class="flex items-center p-2 rounded-lg hover:bg-gray-100 transition-colors">
|
||||
<svg class="w-5 h-5 mr-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M9 19v-6a2 2 0 00-2-2H5a2 2 0 00-2 2v6a2 2 0 002 2h2a2 2 0 002-2zm0 0V9a2 2 0 012-2h2a2 2 0 012 2v10m-6 0a2 2 0 002 2h2a2 2 0 002-2m0 0V5a2 2 0 012-2h2a2 2 0 012 2v14a2 2 0 01-2 2h-2a2 2 0 01-2-2z"/>
|
||||
</svg>
|
||||
Status
|
||||
</div>
|
||||
</a>
|
||||
<a href="#" class="nav-item" data-page="settings">
|
||||
<div class="flex items-center p-2 rounded-lg hover:bg-gray-100 transition-colors">
|
||||
<svg class="w-5 h-5 mr-3" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M10.325 4.317c.426-1.756 2.924-1.756 3.35 0a1.724 1.724 0 002.573 1.066c1.543-.94 3.31.826 2.37 2.37a1.724 1.724 0 001.065 2.572c1.756.426 1.756 2.924 0 3.35a1.724 1.724 0 00-1.066 2.573c.94 1.543-.826 3.31-2.37 2.37a1.724 1.724 0 00-2.572 1.065c-.426 1.756-2.924 1.756-3.35 0a1.724 1.724 0 00-2.573-1.066c-1.543.94-3.31-.826-2.37-2.37a1.724 1.724 0 00-1.065-2.572c-1.756-.426-1.756-2.924 0-3.35a1.724 1.724 0 001.066-2.573c-.94-1.543.826-3.31 2.37-2.37.996.608 2.296.07 2.572-1.065z"/>
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M15 12a3 3 0 11-6 0 3 3 0 016 0z"/>
|
||||
</svg>
|
||||
Settings
|
||||
</div>
|
||||
</a>
|
||||
</nav>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Main Content -->
|
||||
<div class="flex-1 overflow-auto p-6">
|
||||
<div id="content" class="fade-in"></div>
|
||||
</div>
|
||||
|
||||
<!-- Toast Notification -->
|
||||
<div id="toast" class="fixed bottom-4 right-4 hidden">
|
||||
<div class="bg-gray-800 text-white px-6 py-3 rounded-lg shadow-lg"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script src="./js/api.js"></script>
|
||||
|
||||
</body>
|
||||
</html>
|
@@ -1,408 +0,0 @@
|
||||
// State management
|
||||
const state = {
|
||||
apiKey: localStorage.getItem('apiKey') || '',
|
||||
files: [],
|
||||
indexedFiles: [],
|
||||
currentPage: 'file-manager'
|
||||
};
|
||||
|
||||
// Utility functions
|
||||
const showToast = (message, duration = 3000) => {
|
||||
const toast = document.getElementById('toast');
|
||||
toast.querySelector('div').textContent = message;
|
||||
toast.classList.remove('hidden');
|
||||
setTimeout(() => toast.classList.add('hidden'), duration);
|
||||
};
|
||||
|
||||
const fetchWithAuth = async (url, options = {}) => {
|
||||
const headers = {
|
||||
...(options.headers || {}),
|
||||
...(state.apiKey ? { 'X-API-Key': state.apiKey } : {}) // Use X-API-Key instead of Bearer
|
||||
};
|
||||
return fetch(url, { ...options, headers });
|
||||
};
|
||||
|
||||
|
||||
// Page renderers
|
||||
const pages = {
|
||||
'file-manager': () => `
|
||||
<div class="space-y-6">
|
||||
<h2 class="text-2xl font-bold text-gray-800">File Manager</h2>
|
||||
|
||||
<div class="border-2 border-dashed border-gray-300 rounded-lg p-8 text-center hover:border-gray-400 transition-colors">
|
||||
<input type="file" id="fileInput" multiple accept=".txt,.md,.doc,.docx,.pdf,.pptx" class="hidden">
|
||||
<label for="fileInput" class="cursor-pointer">
|
||||
<svg class="mx-auto h-12 w-12 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M7 16a4 4 0 01-.88-7.903A5 5 0 1115.9 6L16 6a5 5 0 011 9.9M15 13l-3-3m0 0l-3 3m3-3v12"/>
|
||||
</svg>
|
||||
<p class="mt-2 text-gray-600">Drag files here or click to select</p>
|
||||
<p class="text-sm text-gray-500">Supported formats: TXT, MD, DOC, PDF, PPTX</p>
|
||||
</label>
|
||||
</div>
|
||||
|
||||
<div id="fileList" class="space-y-2">
|
||||
<h3 class="text-lg font-semibold text-gray-700">Selected Files</h3>
|
||||
<div class="space-y-2"></div>
|
||||
</div>
|
||||
<div id="uploadProgress" class="hidden mt-4">
|
||||
<div class="w-full bg-gray-200 rounded-full h-2.5">
|
||||
<div class="bg-blue-600 h-2.5 rounded-full" style="width: 0%"></div>
|
||||
</div>
|
||||
<p class="text-sm text-gray-600 mt-2"><span id="uploadStatus">0</span> files processed</p>
|
||||
</div>
|
||||
<div class="flex items-center space-x-4 bg-gray-100 p-4 rounded-lg shadow-md">
|
||||
<button id="rescanBtn" class="flex items-center bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" width="20" height="20" fill="currentColor" class="mr-2">
|
||||
<path d="M12 4a8 8 0 1 1-8 8H2.5a9.5 9.5 0 1 0 2.8-6.7L2 3v6h6L5.7 6.7A7.96 7.96 0 0 1 12 4z"/>
|
||||
</svg>
|
||||
Rescan Files
|
||||
</button>
|
||||
|
||||
<button id="uploadBtn" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
||||
Upload & Index Files
|
||||
</button>
|
||||
</div>
|
||||
|
||||
<div id="indexedFiles" class="space-y-2">
|
||||
<h3 class="text-lg font-semibold text-gray-700">Indexed Files</h3>
|
||||
<div class="space-y-2"></div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
`,
|
||||
|
||||
'query': () => `
|
||||
<div class="space-y-6">
|
||||
<h2 class="text-2xl font-bold text-gray-800">Query Database</h2>
|
||||
|
||||
<div class="space-y-4">
|
||||
<div>
|
||||
<label class="block text-sm font-medium text-gray-700">Query Mode</label>
|
||||
<select id="queryMode" class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500">
|
||||
<option value="hybrid">Hybrid</option>
|
||||
<option value="local">Local</option>
|
||||
<option value="global">Global</option>
|
||||
<option value="naive">Naive</option>
|
||||
</select>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label class="block text-sm font-medium text-gray-700">Query</label>
|
||||
<textarea id="queryInput" rows="4" class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500"></textarea>
|
||||
</div>
|
||||
|
||||
<button id="queryBtn" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
||||
Send Query
|
||||
</button>
|
||||
|
||||
<div id="queryResult" class="mt-4 p-4 bg-white rounded-lg shadow"></div>
|
||||
</div>
|
||||
</div>
|
||||
`,
|
||||
|
||||
'knowledge-graph': () => `
|
||||
<div class="flex items-center justify-center h-full">
|
||||
<div class="text-center">
|
||||
<svg class="mx-auto h-12 w-12 text-gray-400" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 11H5m14 0a2 2 0 012 2v6a2 2 0 01-2 2H5a2 2 0 01-2-2v-6a2 2 0 012-2m14 0V9a2 2 0 00-2-2M5 11V9a2 2 0 012-2m0 0V5a2 2 0 012-2h6a2 2 0 012 2v2M7 7h10"/>
|
||||
</svg>
|
||||
<h3 class="mt-2 text-sm font-medium text-gray-900">Under Construction</h3>
|
||||
<p class="mt-1 text-sm text-gray-500">Knowledge graph visualization will be available in a future update.</p>
|
||||
</div>
|
||||
</div>
|
||||
`,
|
||||
|
||||
'status': () => `
|
||||
<div class="space-y-6">
|
||||
<h2 class="text-2xl font-bold text-gray-800">System Status</h2>
|
||||
<div id="statusContent" class="grid grid-cols-1 md:grid-cols-2 gap-6">
|
||||
<div class="p-6 bg-white rounded-lg shadow-sm">
|
||||
<h3 class="text-lg font-semibold mb-4">System Health</h3>
|
||||
<div id="healthStatus"></div>
|
||||
</div>
|
||||
<div class="p-6 bg-white rounded-lg shadow-sm">
|
||||
<h3 class="text-lg font-semibold mb-4">Configuration</h3>
|
||||
<div id="configStatus"></div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`,
|
||||
|
||||
'settings': () => `
|
||||
<div class="space-y-6">
|
||||
<h2 class="text-2xl font-bold text-gray-800">Settings</h2>
|
||||
|
||||
<div class="max-w-xl">
|
||||
<div class="space-y-4">
|
||||
<div>
|
||||
<label class="block text-sm font-medium text-gray-700">API Key</label>
|
||||
<input type="password" id="apiKeyInput" value="${state.apiKey}"
|
||||
class="mt-1 block w-full rounded-md border-gray-300 shadow-sm focus:border-blue-500 focus:ring-blue-500">
|
||||
</div>
|
||||
|
||||
<button id="saveSettings" class="bg-blue-600 text-white px-4 py-2 rounded-lg hover:bg-blue-700 transition-colors">
|
||||
Save Settings
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
`
|
||||
};
|
||||
|
||||
// Page handlers
|
||||
const handlers = {
|
||||
'file-manager': () => {
|
||||
const fileInput = document.getElementById('fileInput');
|
||||
const dropZone = fileInput.parentElement.parentElement;
|
||||
const fileList = document.querySelector('#fileList div');
|
||||
const indexedFiles = document.querySelector('#indexedFiles div');
|
||||
const uploadBtn = document.getElementById('uploadBtn');
|
||||
|
||||
const updateFileList = () => {
|
||||
fileList.innerHTML = state.files.map(file => `
|
||||
<div class="flex items-center justify-between bg-white p-3 rounded-lg shadow-sm">
|
||||
<span>${file.name}</span>
|
||||
<button class="text-red-600 hover:text-red-700" onclick="removeFile('${file.name}')">
|
||||
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"/>
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
`).join('');
|
||||
};
|
||||
|
||||
const updateIndexedFiles = async () => {
|
||||
const response = await fetchWithAuth('/health');
|
||||
const data = await response.json();
|
||||
indexedFiles.innerHTML = data.indexed_files.map(file => `
|
||||
<div class="flex items-center justify-between bg-white p-3 rounded-lg shadow-sm">
|
||||
<span>${file}</span>
|
||||
</div>
|
||||
`).join('');
|
||||
};
|
||||
|
||||
dropZone.addEventListener('dragover', (e) => {
|
||||
e.preventDefault();
|
||||
dropZone.classList.add('border-blue-500');
|
||||
});
|
||||
|
||||
dropZone.addEventListener('dragleave', () => {
|
||||
dropZone.classList.remove('border-blue-500');
|
||||
});
|
||||
|
||||
dropZone.addEventListener('drop', (e) => {
|
||||
e.preventDefault();
|
||||
dropZone.classList.remove('border-blue-500');
|
||||
const files = Array.from(e.dataTransfer.files);
|
||||
state.files.push(...files);
|
||||
updateFileList();
|
||||
});
|
||||
|
||||
fileInput.addEventListener('change', () => {
|
||||
state.files.push(...Array.from(fileInput.files));
|
||||
updateFileList();
|
||||
});
|
||||
|
||||
uploadBtn.addEventListener('click', async () => {
|
||||
if (state.files.length === 0) {
|
||||
showToast('Please select files to upload');
|
||||
return;
|
||||
}
|
||||
let apiKey = localStorage.getItem('apiKey') || '';
|
||||
const progress = document.getElementById('uploadProgress');
|
||||
const progressBar = progress.querySelector('div');
|
||||
const statusText = document.getElementById('uploadStatus');
|
||||
progress.classList.remove('hidden');
|
||||
|
||||
for (let i = 0; i < state.files.length; i++) {
|
||||
const formData = new FormData();
|
||||
formData.append('file', state.files[i]);
|
||||
|
||||
try {
|
||||
await fetch('/documents/upload', {
|
||||
method: 'POST',
|
||||
headers: apiKey ? { 'Authorization': `Bearer ${apiKey}` } : {},
|
||||
body: formData
|
||||
});
|
||||
|
||||
const percentage = ((i + 1) / state.files.length) * 100;
|
||||
progressBar.style.width = `${percentage}%`;
|
||||
statusText.textContent = `${i + 1}/${state.files.length}`;
|
||||
} catch (error) {
|
||||
console.error('Upload error:', error);
|
||||
}
|
||||
}
|
||||
progress.classList.add('hidden');
|
||||
});
|
||||
|
||||
rescanBtn.addEventListener('click', async () => {
|
||||
const progress = document.getElementById('uploadProgress');
|
||||
const progressBar = progress.querySelector('div');
|
||||
const statusText = document.getElementById('uploadStatus');
|
||||
progress.classList.remove('hidden');
|
||||
|
||||
try {
|
||||
// Start the scanning process
|
||||
const scanResponse = await fetch('/documents/scan', {
|
||||
method: 'POST',
|
||||
});
|
||||
|
||||
if (!scanResponse.ok) {
|
||||
throw new Error('Scan failed to start');
|
||||
}
|
||||
|
||||
// Start polling for progress
|
||||
const pollInterval = setInterval(async () => {
|
||||
const progressResponse = await fetch('/documents/scan-progress');
|
||||
const progressData = await progressResponse.json();
|
||||
|
||||
// Update progress bar
|
||||
progressBar.style.width = `${progressData.progress}%`;
|
||||
|
||||
// Update status text
|
||||
if (progressData.total_files > 0) {
|
||||
statusText.textContent = `Processing ${progressData.current_file} (${progressData.indexed_count}/${progressData.total_files})`;
|
||||
}
|
||||
|
||||
// Check if scanning is complete
|
||||
if (!progressData.is_scanning) {
|
||||
clearInterval(pollInterval);
|
||||
progress.classList.add('hidden');
|
||||
statusText.textContent = 'Scan complete!';
|
||||
}
|
||||
}, 1000); // Poll every second
|
||||
|
||||
} catch (error) {
|
||||
console.error('Upload error:', error);
|
||||
progress.classList.add('hidden');
|
||||
statusText.textContent = 'Error during scanning process';
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
updateIndexedFiles();
|
||||
},
|
||||
|
||||
'query': () => {
|
||||
const queryBtn = document.getElementById('queryBtn');
|
||||
const queryInput = document.getElementById('queryInput');
|
||||
const queryMode = document.getElementById('queryMode');
|
||||
const queryResult = document.getElementById('queryResult');
|
||||
|
||||
let apiKey = localStorage.getItem('apiKey') || '';
|
||||
|
||||
queryBtn.addEventListener('click', async () => {
|
||||
const query = queryInput.value.trim();
|
||||
if (!query) {
|
||||
showToast('Please enter a query');
|
||||
return;
|
||||
}
|
||||
|
||||
queryBtn.disabled = true;
|
||||
queryBtn.innerHTML = `
|
||||
<svg class="animate-spin h-5 w-5 mr-3" viewBox="0 0 24 24">
|
||||
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4" fill="none"/>
|
||||
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"/>
|
||||
</svg>
|
||||
Processing...
|
||||
`;
|
||||
|
||||
try {
|
||||
const response = await fetchWithAuth('/query', {
|
||||
method: 'POST',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({
|
||||
query,
|
||||
mode: queryMode.value,
|
||||
stream: false,
|
||||
only_need_context: false
|
||||
})
|
||||
});
|
||||
|
||||
const data = await response.json();
|
||||
queryResult.innerHTML = marked.parse(data.response);
|
||||
} catch (error) {
|
||||
showToast('Error processing query');
|
||||
} finally {
|
||||
queryBtn.disabled = false;
|
||||
queryBtn.textContent = 'Send Query';
|
||||
}
|
||||
});
|
||||
},
|
||||
|
||||
'status': async () => {
|
||||
const healthStatus = document.getElementById('healthStatus');
|
||||
const configStatus = document.getElementById('configStatus');
|
||||
|
||||
try {
|
||||
const response = await fetchWithAuth('/health');
|
||||
const data = await response.json();
|
||||
|
||||
healthStatus.innerHTML = `
|
||||
<div class="space-y-2">
|
||||
<div class="flex items-center">
|
||||
<div class="w-3 h-3 rounded-full ${data.status === 'healthy' ? 'bg-green-500' : 'bg-red-500'} mr-2"></div>
|
||||
<span class="font-medium">${data.status}</span>
|
||||
</div>
|
||||
<div>
|
||||
<p class="text-sm text-gray-600">Working Directory: ${data.working_directory}</p>
|
||||
<p class="text-sm text-gray-600">Input Directory: ${data.input_directory}</p>
|
||||
<p class="text-sm text-gray-600">Indexed Files: ${data.indexed_files_count}</p>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
|
||||
configStatus.innerHTML = Object.entries(data.configuration)
|
||||
.map(([key, value]) => `
|
||||
<div class="mb-2">
|
||||
<span class="text-sm font-medium text-gray-700">${key}:</span>
|
||||
<span class="text-sm text-gray-600 ml-2">${value}</span>
|
||||
</div>
|
||||
`).join('');
|
||||
} catch (error) {
|
||||
showToast('Error fetching status');
|
||||
}
|
||||
},
|
||||
|
||||
'settings': () => {
|
||||
const saveBtn = document.getElementById('saveSettings');
|
||||
const apiKeyInput = document.getElementById('apiKeyInput');
|
||||
|
||||
saveBtn.addEventListener('click', () => {
|
||||
state.apiKey = apiKeyInput.value;
|
||||
localStorage.setItem('apiKey', state.apiKey);
|
||||
showToast('Settings saved successfully');
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
// Navigation handling
|
||||
document.querySelectorAll('.nav-item').forEach(item => {
|
||||
item.addEventListener('click', (e) => {
|
||||
e.preventDefault();
|
||||
const page = item.dataset.page;
|
||||
document.getElementById('content').innerHTML = pages[page]();
|
||||
if (handlers[page]) handlers[page]();
|
||||
state.currentPage = page;
|
||||
});
|
||||
});
|
||||
|
||||
// Initialize with file manager
|
||||
document.getElementById('content').innerHTML = pages['file-manager']();
|
||||
handlers['file-manager']();
|
||||
|
||||
// Global functions
|
||||
window.removeFile = (fileName) => {
|
||||
state.files = state.files.filter(file => file.name !== fileName);
|
||||
document.querySelector('#fileList div').innerHTML = state.files.map(file => `
|
||||
<div class="flex items-center justify-between bg-white p-3 rounded-lg shadow-sm">
|
||||
<span>${file.name}</span>
|
||||
<button class="text-red-600 hover:text-red-700" onclick="removeFile('${file.name}')">
|
||||
<svg class="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
|
||||
<path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19 7l-.867 12.142A2 2 0 0116.138 21H7.862a2 2 0 01-1.995-1.858L5 7m5 4v6m4-6v6m1-10V4a1 1 0 00-1-1h-4a1 1 0 00-1 1v3M4 7h16"/>
|
||||
</svg>
|
||||
</button>
|
||||
</div>
|
||||
`).join('');
|
||||
};
|
@@ -1,211 +0,0 @@
|
||||
// js/graph.js
|
||||
function openGraphModal(label) {
|
||||
const modal = document.getElementById("graph-modal");
|
||||
const graphTitle = document.getElementById("graph-title");
|
||||
|
||||
if (!modal || !graphTitle) {
|
||||
console.error("Key element not found");
|
||||
return;
|
||||
}
|
||||
|
||||
graphTitle.textContent = `Knowledge Graph - ${label}`;
|
||||
modal.style.display = "flex";
|
||||
|
||||
renderGraph(label);
|
||||
}
|
||||
|
||||
function closeGraphModal() {
|
||||
const modal = document.getElementById("graph-modal");
|
||||
modal.style.display = "none";
|
||||
clearGraph();
|
||||
}
|
||||
|
||||
function clearGraph() {
|
||||
const svg = document.getElementById("graph-svg");
|
||||
svg.innerHTML = "";
|
||||
}
|
||||
|
||||
|
||||
async function getGraph(label) {
|
||||
try {
|
||||
const response = await fetch(`/graphs?label=${label}`);
|
||||
const rawData = await response.json();
|
||||
console.log({data: JSON.parse(JSON.stringify(rawData))});
|
||||
|
||||
const nodes = rawData.nodes
|
||||
|
||||
nodes.forEach(node => {
|
||||
node.id = Date.now().toString(36) + Math.random().toString(36).substring(2); // 使用 crypto.randomUUID() 生成唯一 UUID
|
||||
});
|
||||
|
||||
// Strictly verify edge data
|
||||
const edges = (rawData.edges || []).map(edge => {
|
||||
const sourceNode = nodes.find(n => n.labels.includes(edge.source));
|
||||
const targetNode = nodes.find(n => n.labels.includes(edge.target)
|
||||
)
|
||||
;
|
||||
if (!sourceNode || !targetNode) {
|
||||
console.warn("NOT VALID EDGE:", edge);
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
source: sourceNode,
|
||||
target: targetNode,
|
||||
type: edge.type || ""
|
||||
};
|
||||
}).filter(edge => edge !== null);
|
||||
|
||||
return {nodes, edges};
|
||||
} catch (error) {
|
||||
console.error("Loading graph failed:", error);
|
||||
return {nodes: [], edges: []};
|
||||
}
|
||||
}
|
||||
|
||||
async function renderGraph(label) {
|
||||
const data = await getGraph(label);
|
||||
|
||||
|
||||
if (!data.nodes || data.nodes.length === 0) {
|
||||
d3.select("#graph-svg")
|
||||
.html(`<text x="50%" y="50%" text-anchor="middle">No valid nodes</text>`);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
const svg = d3.select("#graph-svg");
|
||||
const width = svg.node().clientWidth;
|
||||
const height = svg.node().clientHeight;
|
||||
|
||||
svg.selectAll("*").remove();
|
||||
|
||||
// Create a force oriented diagram layout
|
||||
const simulation = d3.forceSimulation(data.nodes)
|
||||
.force("charge", d3.forceManyBody().strength(-300))
|
||||
.force("center", d3.forceCenter(width / 2, height / 2));
|
||||
|
||||
// Add a connection (if there are valid edges)
|
||||
if (data.edges.length > 0) {
|
||||
simulation.force("link",
|
||||
d3.forceLink(data.edges)
|
||||
.id(d => d.id)
|
||||
.distance(100)
|
||||
);
|
||||
}
|
||||
|
||||
// Draw nodes
|
||||
const nodes = svg.selectAll(".node")
|
||||
.data(data.nodes)
|
||||
.enter()
|
||||
.append("circle")
|
||||
.attr("class", "node")
|
||||
.attr("r", 10)
|
||||
.call(d3.drag()
|
||||
.on("start", dragStarted)
|
||||
.on("drag", dragged)
|
||||
.on("end", dragEnded)
|
||||
);
|
||||
|
||||
|
||||
svg.append("defs")
|
||||
.append("marker")
|
||||
.attr("id", "arrow-out")
|
||||
.attr("viewBox", "0 0 10 10")
|
||||
.attr("refX", 8)
|
||||
.attr("refY", 5)
|
||||
.attr("markerWidth", 6)
|
||||
.attr("markerHeight", 6)
|
||||
.attr("orient", "auto")
|
||||
.append("path")
|
||||
.attr("d", "M0,0 L10,5 L0,10 Z")
|
||||
.attr("fill", "#999");
|
||||
|
||||
// Draw edges (with arrows)
|
||||
const links = svg.selectAll(".link")
|
||||
.data(data.edges)
|
||||
.enter()
|
||||
.append("line")
|
||||
.attr("class", "link")
|
||||
.attr("marker-end", "url(#arrow-out)"); // Always draw arrows on the target side
|
||||
|
||||
// Edge style configuration
|
||||
links
|
||||
.attr("stroke", "#999")
|
||||
.attr("stroke-width", 2)
|
||||
.attr("stroke-opacity", 0.8);
|
||||
|
||||
// Draw label (with background box)
|
||||
const labels = svg.selectAll(".label")
|
||||
.data(data.nodes)
|
||||
.enter()
|
||||
.append("text")
|
||||
.attr("class", "label")
|
||||
.text(d => d.labels[0] || "")
|
||||
.attr("text-anchor", "start")
|
||||
.attr("dy", "0.3em")
|
||||
.attr("fill", "#333");
|
||||
|
||||
// Update Location
|
||||
simulation.on("tick", () => {
|
||||
links
|
||||
.attr("x1", d => {
|
||||
// Calculate the direction vector from the source node to the target node
|
||||
const dx = d.target.x - d.source.x;
|
||||
const dy = d.target.y - d.source.y;
|
||||
const distance = Math.sqrt(dx * dx + dy * dy);
|
||||
if (distance === 0) return d.source.x; // 避免除以零 Avoid dividing by zero
|
||||
// Adjust the starting point coordinates (source node edge) based on radius 10
|
||||
return d.source.x + (dx / distance) * 10;
|
||||
})
|
||||
.attr("y1", d => {
|
||||
const dx = d.target.x - d.source.x;
|
||||
const dy = d.target.y - d.source.y;
|
||||
const distance = Math.sqrt(dx * dx + dy * dy);
|
||||
if (distance === 0) return d.source.y;
|
||||
return d.source.y + (dy / distance) * 10;
|
||||
})
|
||||
.attr("x2", d => {
|
||||
// Adjust the endpoint coordinates (target node edge) based on a radius of 10
|
||||
const dx = d.target.x - d.source.x;
|
||||
const dy = d.target.y - d.source.y;
|
||||
const distance = Math.sqrt(dx * dx + dy * dy);
|
||||
if (distance === 0) return d.target.x;
|
||||
return d.target.x - (dx / distance) * 10;
|
||||
})
|
||||
.attr("y2", d => {
|
||||
const dx = d.target.x - d.source.x;
|
||||
const dy = d.target.y - d.source.y;
|
||||
const distance = Math.sqrt(dx * dx + dy * dy);
|
||||
if (distance === 0) return d.target.y;
|
||||
return d.target.y - (dy / distance) * 10;
|
||||
});
|
||||
|
||||
// Update the position of nodes and labels (keep unchanged)
|
||||
nodes
|
||||
.attr("cx", d => d.x)
|
||||
.attr("cy", d => d.y);
|
||||
|
||||
labels
|
||||
.attr("x", d => d.x + 12)
|
||||
.attr("y", d => d.y + 4);
|
||||
});
|
||||
|
||||
// Drag and drop logic
|
||||
function dragStarted(event, d) {
|
||||
if (!event.active) simulation.alphaTarget(0.3).restart();
|
||||
d.fx = d.x;
|
||||
d.fy = d.y;
|
||||
}
|
||||
|
||||
function dragged(event, d) {
|
||||
d.fx = event.x;
|
||||
d.fy = event.y;
|
||||
simulation.alpha(0.3).restart();
|
||||
}
|
||||
|
||||
function dragEnded(event, d) {
|
||||
if (!event.active) simulation.alphaTarget(0);
|
||||
d.fx = null;
|
||||
d.fy = null;
|
||||
}
|
||||
}
|
File diff suppressed because one or more lines are too long
1065
lightrag/api/webui/assets/index-BMB0OroL.js
Normal file
1065
lightrag/api/webui/assets/index-BMB0OroL.js
Normal file
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
1
lightrag/api/webui/assets/index-CLgSwrjG.css
Normal file
1
lightrag/api/webui/assets/index-CLgSwrjG.css
Normal file
File diff suppressed because one or more lines are too long
@@ -4,9 +4,9 @@
|
||||
<meta charset="UTF-8" />
|
||||
<link rel="icon" type="image/svg+xml" href="./vite.svg" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Lightrag Graph Viewer</title>
|
||||
<script type="module" crossorigin src="./assets/index-CF-pcoIl.js"></script>
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-BAeLPZpd.css">
|
||||
<title>Lightrag</title>
|
||||
<script type="module" crossorigin src="./assets/index-BMB0OroL.js"></script>
|
||||
<link rel="stylesheet" crossorigin href="./assets/index-CLgSwrjG.css">
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
|
@@ -1,6 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from dataclasses import dataclass, field
|
||||
from enum import Enum
|
||||
from typing import (
|
||||
@@ -9,12 +10,12 @@ from typing import (
|
||||
TypedDict,
|
||||
TypeVar,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
|
||||
from .utils import EmbeddingFunc
|
||||
from .types import KnowledgeGraph
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
class TextChunkSchema(TypedDict):
|
||||
tokens: int
|
||||
@@ -54,13 +55,15 @@ class QueryParam:
|
||||
top_k: int = int(os.getenv("TOP_K", "60"))
|
||||
"""Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
|
||||
|
||||
max_token_for_text_unit: int = 4000
|
||||
max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
|
||||
"""Maximum number of tokens allowed for each retrieved text chunk."""
|
||||
|
||||
max_token_for_global_context: int = 4000
|
||||
max_token_for_global_context: int = int(
|
||||
os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
|
||||
)
|
||||
"""Maximum number of tokens allocated for relationship descriptions in global retrieval."""
|
||||
|
||||
max_token_for_local_context: int = 4000
|
||||
max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
|
||||
"""Maximum number of tokens allocated for entity descriptions in local retrieval."""
|
||||
|
||||
hl_keywords: list[str] = field(default_factory=list)
|
||||
|
@@ -268,10 +268,10 @@ class LightRAG:
|
||||
"""Directory where logs are stored. Defaults to the current working directory."""
|
||||
|
||||
# Text chunking
|
||||
chunk_token_size: int = 1200
|
||||
chunk_token_size: int = int(os.getenv("CHUNK_SIZE", "1200"))
|
||||
"""Maximum number of tokens per text chunk when splitting documents."""
|
||||
|
||||
chunk_overlap_token_size: int = 100
|
||||
chunk_overlap_token_size: int = int(os.getenv("CHUNK_OVERLAP_SIZE", "100"))
|
||||
"""Number of overlapping tokens between consecutive text chunks to preserve context."""
|
||||
|
||||
tiktoken_model_name: str = "gpt-4o-mini"
|
||||
@@ -281,7 +281,7 @@ class LightRAG:
|
||||
entity_extract_max_gleaning: int = 1
|
||||
"""Maximum number of entity extraction attempts for ambiguous content."""
|
||||
|
||||
entity_summary_to_max_tokens: int = 500
|
||||
entity_summary_to_max_tokens: int = int(os.getenv("MAX_TOKEN_SUMMARY", "500"))
|
||||
"""Maximum number of tokens used for summarizing extracted entities."""
|
||||
|
||||
# Node embedding
|
||||
@@ -1254,6 +1254,16 @@ class LightRAG:
|
||||
"""
|
||||
return await self.doc_status.get_status_counts()
|
||||
|
||||
async def get_docs_by_status(
|
||||
self, status: DocStatus
|
||||
) -> dict[str, DocProcessingStatus]:
|
||||
"""Get documents by status
|
||||
|
||||
Returns:
|
||||
Dict with document id is keys and document status is values
|
||||
"""
|
||||
return await self.doc_status.get_docs_by_status(status)
|
||||
|
||||
async def adelete_by_doc_id(self, doc_id: str) -> None:
|
||||
"""Delete a document and all its related data
|
||||
|
||||
|
@@ -40,9 +40,10 @@ __version__ = "1.0.0"
|
||||
__author__ = "lightrag Team"
|
||||
__status__ = "Production"
|
||||
|
||||
|
||||
from ..utils import verbose_debug, VERBOSE_DEBUG
|
||||
import sys
|
||||
import os
|
||||
import logging
|
||||
|
||||
if sys.version_info < (3, 9):
|
||||
from typing import AsyncIterator
|
||||
@@ -110,6 +111,11 @@ async def openai_complete_if_cache(
|
||||
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_8) LightRAG/{__api_version__}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
# Set openai logger level to INFO when VERBOSE_DEBUG is off
|
||||
if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
|
||||
logging.getLogger("openai").setLevel(logging.INFO)
|
||||
|
||||
openai_async_client = (
|
||||
AsyncOpenAI(default_headers=default_headers, api_key=api_key)
|
||||
if base_url is None
|
||||
@@ -125,13 +131,11 @@ async def openai_complete_if_cache(
|
||||
messages.extend(history_messages)
|
||||
messages.append({"role": "user", "content": prompt})
|
||||
|
||||
# 添加日志输出
|
||||
logger.debug("===== Query Input to LLM =====")
|
||||
logger.debug("===== Sending Query to LLM =====")
|
||||
logger.debug(f"Model: {model} Base URL: {base_url}")
|
||||
logger.debug(f"Additional kwargs: {kwargs}")
|
||||
logger.debug(f"Query: {prompt}")
|
||||
logger.debug(f"System prompt: {system_prompt}")
|
||||
# logger.debug(f"Messages: {messages}")
|
||||
verbose_debug(f"Query: {prompt}")
|
||||
verbose_debug(f"System prompt: {system_prompt}")
|
||||
|
||||
try:
|
||||
if "response_format" in kwargs:
|
||||
|
@@ -43,6 +43,7 @@ __status__ = "Production"
|
||||
import sys
|
||||
import re
|
||||
import json
|
||||
from ..utils import verbose_debug
|
||||
|
||||
if sys.version_info < (3, 9):
|
||||
pass
|
||||
@@ -119,7 +120,7 @@ async def zhipu_complete_if_cache(
|
||||
# Add debug logging
|
||||
logger.debug("===== Query Input to LLM =====")
|
||||
logger.debug(f"Query: {prompt}")
|
||||
logger.debug(f"System prompt: {system_prompt}")
|
||||
verbose_debug(f"System prompt: {system_prompt}")
|
||||
|
||||
# Remove unsupported kwargs
|
||||
kwargs = {
|
||||
|
@@ -687,6 +687,9 @@ async def kg_query(
|
||||
if query_param.only_need_prompt:
|
||||
return sys_prompt
|
||||
|
||||
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
||||
logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
|
||||
|
||||
response = await use_model_func(
|
||||
query,
|
||||
system_prompt=sys_prompt,
|
||||
@@ -772,6 +775,9 @@ async def extract_keywords_only(
|
||||
query=text, examples=examples, language=language, history=history_context
|
||||
)
|
||||
|
||||
len_of_prompts = len(encode_string_by_tiktoken(kw_prompt))
|
||||
logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
|
||||
|
||||
# 5. Call the LLM for keyword extraction
|
||||
use_model_func = global_config["llm_model_func"]
|
||||
result = await use_model_func(kw_prompt, keyword_extraction=True)
|
||||
@@ -935,7 +941,9 @@ async def mix_kg_vector_query(
|
||||
chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
|
||||
formatted_chunks.append(chunk_text)
|
||||
|
||||
logger.info(f"Truncate {len(chunks)} to {len(formatted_chunks)} chunks")
|
||||
logger.debug(
|
||||
f"Truncate chunks from {len(chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
||||
)
|
||||
return "\n--New Chunk--\n".join(formatted_chunks)
|
||||
except Exception as e:
|
||||
logger.error(f"Error in get_vector_context: {e}")
|
||||
@@ -968,6 +976,9 @@ async def mix_kg_vector_query(
|
||||
if query_param.only_need_prompt:
|
||||
return sys_prompt
|
||||
|
||||
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
||||
logger.debug(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}")
|
||||
|
||||
# 6. Generate response
|
||||
response = await use_model_func(
|
||||
query,
|
||||
@@ -1073,7 +1084,7 @@ async def _build_query_context(
|
||||
if not entities_context.strip() and not relations_context.strip():
|
||||
return None
|
||||
|
||||
return f"""
|
||||
result = f"""
|
||||
-----Entities-----
|
||||
```csv
|
||||
{entities_context}
|
||||
@@ -1087,6 +1098,15 @@ async def _build_query_context(
|
||||
{text_units_context}
|
||||
```
|
||||
"""
|
||||
contex_tokens = len(encode_string_by_tiktoken(result))
|
||||
entities_tokens = len(encode_string_by_tiktoken(entities_context))
|
||||
relations_tokens = len(encode_string_by_tiktoken(relations_context))
|
||||
text_units_tokens = len(encode_string_by_tiktoken(text_units_context))
|
||||
logger.debug(
|
||||
f"Context Tokens - Total: {contex_tokens}, Entities: {entities_tokens}, Relations: {relations_tokens}, Chunks: {text_units_tokens}"
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
async def _get_node_data(
|
||||
@@ -1130,8 +1150,19 @@ async def _get_node_data(
|
||||
node_datas, query_param, knowledge_graph_inst
|
||||
),
|
||||
)
|
||||
|
||||
len_node_datas = len(node_datas)
|
||||
node_datas = truncate_list_by_token_size(
|
||||
node_datas,
|
||||
key=lambda x: x["description"],
|
||||
max_token_size=query_param.max_token_for_local_context,
|
||||
)
|
||||
logger.debug(
|
||||
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} text units"
|
||||
f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks"
|
||||
)
|
||||
|
||||
# build prompt
|
||||
@@ -1264,6 +1295,10 @@ async def _find_most_related_text_unit_from_entities(
|
||||
max_token_size=query_param.max_token_for_text_unit,
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Truncate chunks from {len(all_text_units_lookup)} to {len(all_text_units)} (max tokens:{query_param.max_token_for_text_unit})"
|
||||
)
|
||||
|
||||
all_text_units = [t["data"] for t in all_text_units]
|
||||
return all_text_units
|
||||
|
||||
@@ -1305,6 +1340,11 @@ async def _find_most_related_edges_from_entities(
|
||||
key=lambda x: x["description"],
|
||||
max_token_size=query_param.max_token_for_global_context,
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})"
|
||||
)
|
||||
|
||||
return all_edges_data
|
||||
|
||||
|
||||
@@ -1352,11 +1392,15 @@ async def _get_edge_data(
|
||||
edge_datas = sorted(
|
||||
edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
|
||||
)
|
||||
len_edge_datas = len(edge_datas)
|
||||
edge_datas = truncate_list_by_token_size(
|
||||
edge_datas,
|
||||
key=lambda x: x["description"],
|
||||
max_token_size=query_param.max_token_for_global_context,
|
||||
)
|
||||
logger.debug(
|
||||
f"Truncate relations from {len_edge_datas} to {len(edge_datas)} (max tokens:{query_param.max_token_for_global_context})"
|
||||
)
|
||||
|
||||
use_entities, use_text_units = await asyncio.gather(
|
||||
_find_most_related_entities_from_relationships(
|
||||
@@ -1367,7 +1411,7 @@ async def _get_edge_data(
|
||||
),
|
||||
)
|
||||
logger.info(
|
||||
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} text units"
|
||||
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
|
||||
)
|
||||
|
||||
relations_section_list = [
|
||||
@@ -1456,11 +1500,15 @@ async def _find_most_related_entities_from_relationships(
|
||||
for k, n, d in zip(entity_names, node_datas, node_degrees)
|
||||
]
|
||||
|
||||
len_node_datas = len(node_datas)
|
||||
node_datas = truncate_list_by_token_size(
|
||||
node_datas,
|
||||
key=lambda x: x["description"],
|
||||
max_token_size=query_param.max_token_for_local_context,
|
||||
)
|
||||
logger.debug(
|
||||
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
|
||||
)
|
||||
|
||||
return node_datas
|
||||
|
||||
@@ -1516,6 +1564,10 @@ async def _find_related_text_unit_from_relationships(
|
||||
max_token_size=query_param.max_token_for_text_unit,
|
||||
)
|
||||
|
||||
logger.debug(
|
||||
f"Truncate chunks from {len(valid_text_units)} to {len(truncated_text_units)} (max tokens:{query_param.max_token_for_text_unit})"
|
||||
)
|
||||
|
||||
all_text_units: list[TextChunkSchema] = [t["data"] for t in truncated_text_units]
|
||||
|
||||
return all_text_units
|
||||
@@ -1583,7 +1635,10 @@ async def naive_query(
|
||||
logger.warning("No chunks left after truncation")
|
||||
return PROMPTS["fail_response"]
|
||||
|
||||
logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
|
||||
logger.debug(
|
||||
f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
||||
)
|
||||
|
||||
section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
|
||||
|
||||
if query_param.only_need_context:
|
||||
@@ -1606,6 +1661,9 @@ async def naive_query(
|
||||
if query_param.only_need_prompt:
|
||||
return sys_prompt
|
||||
|
||||
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
||||
logger.debug(f"[naive_query]Prompt Tokens: {len_of_prompts}")
|
||||
|
||||
response = await use_model_func(
|
||||
query,
|
||||
system_prompt=sys_prompt,
|
||||
@@ -1748,6 +1806,9 @@ async def kg_query_with_keywords(
|
||||
if query_param.only_need_prompt:
|
||||
return sys_prompt
|
||||
|
||||
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
||||
logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}")
|
||||
|
||||
response = await use_model_func(
|
||||
query,
|
||||
system_prompt=sys_prompt,
|
||||
|
@@ -20,6 +20,23 @@ import tiktoken
|
||||
|
||||
from lightrag.prompt import PROMPTS
|
||||
|
||||
VERBOSE_DEBUG = os.getenv("VERBOSE", "false").lower() == "true"
|
||||
|
||||
|
||||
def verbose_debug(msg: str, *args, **kwargs):
|
||||
"""Function for outputting detailed debug information.
|
||||
When VERBOSE_DEBUG=True, outputs the complete message.
|
||||
When VERBOSE_DEBUG=False, outputs only the first 30 characters.
|
||||
"""
|
||||
if VERBOSE_DEBUG:
|
||||
logger.debug(msg, *args, **kwargs)
|
||||
|
||||
|
||||
def set_verbose_debug(enabled: bool):
|
||||
"""Enable or disable verbose debug output"""
|
||||
global VERBOSE_DEBUG
|
||||
VERBOSE_DEBUG = enabled
|
||||
|
||||
|
||||
class UnlimitedSemaphore:
|
||||
"""A context manager that allows unlimited access."""
|
||||
@@ -657,6 +674,10 @@ def get_conversation_turns(
|
||||
Returns:
|
||||
Formatted string of the conversation history
|
||||
"""
|
||||
# Check if num_turns is valid
|
||||
if num_turns <= 0:
|
||||
return ""
|
||||
|
||||
# Group messages into turns
|
||||
turns: list[list[dict[str, Any]]] = []
|
||||
messages: list[dict[str, Any]] = []
|
||||
|
Reference in New Issue
Block a user