Merge pull request #610 from danielaskdd/indexing-during-startup
Add document indexing during FastAPI startup, Fix docx package name i…
This commit is contained in:
@@ -64,3 +64,7 @@ LOG_LEVEL=INFO
|
|||||||
|
|
||||||
# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
|
# AZURE_EMBEDDING_DEPLOYMENT=text-embedding-3-large
|
||||||
# AZURE_EMBEDDING_API_VERSION=2023-05-15
|
# AZURE_EMBEDDING_API_VERSION=2023-05-15
|
||||||
|
|
||||||
|
|
||||||
|
# Ollama Emulating Model Tag
|
||||||
|
# OLLAMA_EMULATING_MODEL_TAG=latest
|
||||||
|
@@ -9,7 +9,7 @@ User=netman
|
|||||||
MemoryHigh=8G
|
MemoryHigh=8G
|
||||||
MemoryMax=12G
|
MemoryMax=12G
|
||||||
WorkingDirectory=/home/netman/lightrag-xyj
|
WorkingDirectory=/home/netman/lightrag-xyj
|
||||||
ExecStart=/home/netman/lightrag-xyj/start_lightrag_server.sh
|
ExecStart=/home/netman/lightrag-xyj/start_lightrag.sh
|
||||||
Restart=always
|
Restart=always
|
||||||
RestartSec=10
|
RestartSec=10
|
||||||
|
|
||||||
|
@@ -51,8 +51,8 @@ def estimate_tokens(text: str) -> int:
|
|||||||
|
|
||||||
# Constants for emulated Ollama model information
|
# Constants for emulated Ollama model information
|
||||||
LIGHTRAG_NAME = "lightrag"
|
LIGHTRAG_NAME = "lightrag"
|
||||||
LIGHTRAG_TAG = "latest"
|
LIGHTRAG_TAG = os.getenv("OLLAMA_EMULATING_MODEL_TAG", "latest")
|
||||||
LIGHTRAG_MODEL = "lightrag:latest"
|
LIGHTRAG_MODEL = f"{LIGHTRAG_NAME}:{LIGHTRAG_TAG}"
|
||||||
LIGHTRAG_SIZE = 7365960935 # it's a dummy value
|
LIGHTRAG_SIZE = 7365960935 # it's a dummy value
|
||||||
LIGHTRAG_CREATED_AT = "2024-01-15T00:00:00Z"
|
LIGHTRAG_CREATED_AT = "2024-01-15T00:00:00Z"
|
||||||
LIGHTRAG_DIGEST = "sha256:lightrag"
|
LIGHTRAG_DIGEST = "sha256:lightrag"
|
||||||
@@ -161,6 +161,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
|
|||||||
|
|
||||||
# System Configuration
|
# System Configuration
|
||||||
ASCIIColors.magenta("\n🛠️ System Configuration:")
|
ASCIIColors.magenta("\n🛠️ System Configuration:")
|
||||||
|
ASCIIColors.white(" ├─ Ollama Emulating Model: ", end="")
|
||||||
|
ASCIIColors.yellow(f"{LIGHTRAG_MODEL}")
|
||||||
ASCIIColors.white(" ├─ Log Level: ", end="")
|
ASCIIColors.white(" ├─ Log Level: ", end="")
|
||||||
ASCIIColors.yellow(f"{args.log_level}")
|
ASCIIColors.yellow(f"{args.log_level}")
|
||||||
ASCIIColors.white(" ├─ Timeout: ", end="")
|
ASCIIColors.white(" ├─ Timeout: ", end="")
|
||||||
@@ -574,6 +576,29 @@ def create_app(args):
|
|||||||
# Check if API key is provided either through env var or args
|
# Check if API key is provided either through env var or args
|
||||||
api_key = os.getenv("LIGHTRAG_API_KEY") or args.key
|
api_key = os.getenv("LIGHTRAG_API_KEY") or args.key
|
||||||
|
|
||||||
|
# Initialize document manager
|
||||||
|
doc_manager = DocumentManager(args.input_dir)
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
"""Lifespan context manager for startup and shutdown events"""
|
||||||
|
# Startup logic
|
||||||
|
try:
|
||||||
|
new_files = doc_manager.scan_directory()
|
||||||
|
for file_path in new_files:
|
||||||
|
try:
|
||||||
|
await index_file(file_path)
|
||||||
|
except Exception as e:
|
||||||
|
trace_exception(e)
|
||||||
|
logging.error(f"Error indexing file {file_path}: {str(e)}")
|
||||||
|
|
||||||
|
logging.info(f"Indexed {len(new_files)} documents from {args.input_dir}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error during startup indexing: {str(e)}")
|
||||||
|
yield
|
||||||
|
# Cleanup logic (if needed)
|
||||||
|
pass
|
||||||
|
|
||||||
# Initialize FastAPI
|
# Initialize FastAPI
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
title="LightRAG API",
|
title="LightRAG API",
|
||||||
@@ -583,6 +608,7 @@ def create_app(args):
|
|||||||
else "",
|
else "",
|
||||||
version=__api_version__,
|
version=__api_version__,
|
||||||
openapi_tags=[{"name": "api"}],
|
openapi_tags=[{"name": "api"}],
|
||||||
|
lifespan=lifespan,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Add CORS middleware
|
# Add CORS middleware
|
||||||
@@ -600,9 +626,6 @@ def create_app(args):
|
|||||||
# Create working directory if it doesn't exist
|
# Create working directory if it doesn't exist
|
||||||
Path(args.working_dir).mkdir(parents=True, exist_ok=True)
|
Path(args.working_dir).mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Initialize document manager
|
|
||||||
doc_manager = DocumentManager(args.input_dir)
|
|
||||||
|
|
||||||
async def openai_alike_model_complete(
|
async def openai_alike_model_complete(
|
||||||
prompt,
|
prompt,
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
@@ -737,8 +760,8 @@ def create_app(args):
|
|||||||
content += page.extract_text() + "\n"
|
content += page.extract_text() + "\n"
|
||||||
|
|
||||||
case ".docx":
|
case ".docx":
|
||||||
if not pm.is_installed("docx"):
|
if not pm.is_installed("python-docx"):
|
||||||
pm.install("docx")
|
pm.install("python-docx")
|
||||||
from docx import Document
|
from docx import Document
|
||||||
|
|
||||||
# Word document handling
|
# Word document handling
|
||||||
@@ -971,8 +994,8 @@ def create_app(args):
|
|||||||
content += page.extract_text() + "\n"
|
content += page.extract_text() + "\n"
|
||||||
|
|
||||||
case ".docx":
|
case ".docx":
|
||||||
if not pm.is_installed("docx"):
|
if not pm.is_installed("python-docx"):
|
||||||
pm.install("docx")
|
pm.install("python-docx")
|
||||||
from docx import Document
|
from docx import Document
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user