Merge branch 'HKUDS:main' into main

This commit is contained in:
Saifeddine ALOUI
2025-02-03 11:24:08 +01:00
committed by GitHub
8 changed files with 193 additions and 132 deletions

View File

@@ -13,18 +13,6 @@ from fastapi import (
from typing import Dict
import threading
# Global progress tracker
scan_progress: Dict = {
"is_scanning": False,
"current_file": "",
"indexed_count": 0,
"total_files": 0,
"progress": 0,
}
# Lock for thread-safe operations
progress_lock = threading.Lock()
import json
import os
@@ -34,7 +22,7 @@ import logging
import argparse
import time
import re
from typing import List, Dict, Any, Optional, Union
from typing import List, Any, Optional, Union
from lightrag import LightRAG, QueryParam
from lightrag.api import __api_version__
@@ -57,8 +45,21 @@ import pipmaster as pm
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Global progress tracker
scan_progress: Dict = {
"is_scanning": False,
"current_file": "",
"indexed_count": 0,
"total_files": 0,
"progress": 0,
}
# Lock for thread-safe operations
progress_lock = threading.Lock()
def estimate_tokens(text: str) -> int:
"""Estimate the number of tokens in text
@@ -918,6 +919,12 @@ def create_app(args):
vector_db_storage_cls_kwargs={
"cosine_better_than_threshold": args.cosine_threshold
},
enable_llm_cache_for_entity_extract=False, # set to True for debuging to reduce llm fee
embedding_cache_config={
"enabled": True,
"similarity_threshold": 0.95,
"use_llm_check": False,
},
)
else:
rag = LightRAG(
@@ -941,6 +948,12 @@ def create_app(args):
vector_db_storage_cls_kwargs={
"cosine_better_than_threshold": args.cosine_threshold
},
enable_llm_cache_for_entity_extract=False, # set to True for debuging to reduce llm fee
embedding_cache_config={
"enabled": True,
"similarity_threshold": 0.95,
"use_llm_check": False,
},
)
async def index_file(file_path: Union[str, Path]) -> None: