Merge pull request #610 from danielaskdd/indexing-during-startup

Add document indexing during FastAPI startup, Fix docx package name i…
This commit is contained in:
zrguo
2025-01-21 10:27:31 +08:00
committed by GitHub
3 changed files with 37 additions and 10 deletions

View File

@@ -64,3 +64,7 @@ LOG_LEVEL=INFO
# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large # AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
# AZURE_EMBEDDING_API_VERSION=2023-05-15 # AZURE_EMBEDDING_API_VERSION=2023-05-15
# Ollama Emulating Model Tag
# OLLAMA_EMULATING_MODEL_TAG=latest

View File

@@ -9,7 +9,7 @@ User=netman
MemoryHigh=8G MemoryHigh=8G
MemoryMax=12G MemoryMax=12G
WorkingDirectory=/home/netman/lightrag-xyj WorkingDirectory=/home/netman/lightrag-xyj
ExecStart=/home/netman/lightrag-xyj/start_lightrag_server.sh ExecStart=/home/netman/lightrag-xyj/start_lightrag.sh
Restart=always Restart=always
RestartSec=10 RestartSec=10

View File

@@ -51,8 +51,8 @@ def estimate_tokens(text: str) -> int:
# Constants for emulated Ollama model information # Constants for emulated Ollama model information
LIGHTRAG_NAME = "lightrag" LIGHTRAG_NAME = "lightrag"
LIGHTRAG_TAG = "latest" LIGHTRAG_TAG = os.getenv("OLLAMA_EMULATING_MODEL_TAG", "latest")
LIGHTRAG_MODEL = "lightrag:latest" LIGHTRAG_MODEL = f"{LIGHTRAG_NAME}:{LIGHTRAG_TAG}"
LIGHTRAG_SIZE = 7365960935 # it's a dummy value LIGHTRAG_SIZE = 7365960935 # it's a dummy value
LIGHTRAG_CREATED_AT = "2024-01-15T00:00:00Z" LIGHTRAG_CREATED_AT = "2024-01-15T00:00:00Z"
LIGHTRAG_DIGEST = "sha256:lightrag" LIGHTRAG_DIGEST = "sha256:lightrag"
@@ -161,6 +161,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
# System Configuration # System Configuration
ASCIIColors.magenta("\n🛠️ System Configuration:") ASCIIColors.magenta("\n🛠️ System Configuration:")
ASCIIColors.white(" ├─ Ollama Emulating Model: ", end="")
ASCIIColors.yellow(f"{LIGHTRAG_MODEL}")
ASCIIColors.white(" ├─ Log Level: ", end="") ASCIIColors.white(" ├─ Log Level: ", end="")
ASCIIColors.yellow(f"{args.log_level}") ASCIIColors.yellow(f"{args.log_level}")
ASCIIColors.white(" ├─ Timeout: ", end="") ASCIIColors.white(" ├─ Timeout: ", end="")
@@ -574,6 +576,29 @@ def create_app(args):
# Check if API key is provided either through env var or args # Check if API key is provided either through env var or args
api_key = os.getenv("LIGHTRAG_API_KEY") or args.key api_key = os.getenv("LIGHTRAG_API_KEY") or args.key
# Initialize document manager
doc_manager = DocumentManager(args.input_dir)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Lifespan context manager for startup and shutdown events"""
# Startup logic
try:
new_files = doc_manager.scan_directory()
for file_path in new_files:
try:
await index_file(file_path)
except Exception as e:
trace_exception(e)
logging.error(f"Error indexing file {file_path}: {str(e)}")
logging.info(f"Indexed {len(new_files)} documents from {args.input_dir}")
except Exception as e:
logging.error(f"Error during startup indexing: {str(e)}")
yield
# Cleanup logic (if needed)
pass
# Initialize FastAPI # Initialize FastAPI
app = FastAPI( app = FastAPI(
title="LightRAG API", title="LightRAG API",
@@ -583,6 +608,7 @@ def create_app(args):
else "", else "",
version=__api_version__, version=__api_version__,
openapi_tags=[{"name": "api"}], openapi_tags=[{"name": "api"}],
lifespan=lifespan,
) )
# Add CORS middleware # Add CORS middleware
@@ -600,9 +626,6 @@ def create_app(args):
# Create working directory if it doesn't exist # Create working directory if it doesn't exist
Path(args.working_dir).mkdir(parents=True, exist_ok=True) Path(args.working_dir).mkdir(parents=True, exist_ok=True)
# Initialize document manager
doc_manager = DocumentManager(args.input_dir)
async def openai_alike_model_complete( async def openai_alike_model_complete(
prompt, prompt,
system_prompt=None, system_prompt=None,
@@ -737,8 +760,8 @@ def create_app(args):
content += page.extract_text() + "\n" content += page.extract_text() + "\n"
case ".docx": case ".docx":
if not pm.is_installed("docx"): if not pm.is_installed("python-docx"):
pm.install("docx") pm.install("python-docx")
from docx import Document from docx import Document
# Word document handling # Word document handling
@@ -971,8 +994,8 @@ def create_app(args):
content += page.extract_text() + "\n" content += page.extract_text() + "\n"
case ".docx": case ".docx":
if not pm.is_installed("docx"): if not pm.is_installed("python-docx"):
pm.install("docx") pm.install("python-docx")
from docx import Document from docx import Document
from io import BytesIO from io import BytesIO