Merge branch 'main' into feat_login-jwt
This commit is contained in:
@@ -2,12 +2,15 @@
|
||||
import os
|
||||
import logging
|
||||
from lightrag.kg.shared_storage import finalize_share_data
|
||||
from lightrag.api.lightrag_server import LightragPathFilter
|
||||
from lightrag.utils import setup_logger
|
||||
|
||||
# Get log directory path from environment variable
|
||||
log_dir = os.getenv("LOG_DIR", os.getcwd())
|
||||
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
|
||||
|
||||
# Ensure log directory exists
|
||||
os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
|
||||
|
||||
# Get log file max size and backup count from environment variables
|
||||
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
|
||||
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
|
||||
@@ -108,6 +111,9 @@ def on_starting(server):
|
||||
except ImportError:
|
||||
print("psutil not installed, skipping memory usage reporting")
|
||||
|
||||
# Log the location of the LightRAG log file
|
||||
print(f"LightRAG log file: {log_file_path}\n")
|
||||
|
||||
print("Gunicorn initialization complete, forking workers...\n")
|
||||
|
||||
|
||||
@@ -134,51 +140,18 @@ def post_fork(server, worker):
|
||||
Executed after a worker has been forked.
|
||||
This is a good place to set up worker-specific configurations.
|
||||
"""
|
||||
# Configure formatters
|
||||
detailed_formatter = logging.Formatter(
|
||||
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
simple_formatter = logging.Formatter("%(levelname)s: %(message)s")
|
||||
|
||||
def setup_logger(logger_name: str, level: str = "INFO", add_filter: bool = False):
|
||||
"""Set up a logger with console and file handlers"""
|
||||
logger_instance = logging.getLogger(logger_name)
|
||||
logger_instance.setLevel(level)
|
||||
logger_instance.handlers = [] # Clear existing handlers
|
||||
logger_instance.propagate = False
|
||||
|
||||
# Add console handler
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(simple_formatter)
|
||||
console_handler.setLevel(level)
|
||||
logger_instance.addHandler(console_handler)
|
||||
|
||||
# Add file handler
|
||||
file_handler = logging.handlers.RotatingFileHandler(
|
||||
filename=log_file_path,
|
||||
maxBytes=log_max_bytes,
|
||||
backupCount=log_backup_count,
|
||||
encoding="utf-8",
|
||||
)
|
||||
file_handler.setFormatter(detailed_formatter)
|
||||
file_handler.setLevel(level)
|
||||
logger_instance.addHandler(file_handler)
|
||||
|
||||
# Add path filter if requested
|
||||
if add_filter:
|
||||
path_filter = LightragPathFilter()
|
||||
logger_instance.addFilter(path_filter)
|
||||
|
||||
# Set up main loggers
|
||||
log_level = loglevel.upper() if loglevel else "INFO"
|
||||
setup_logger("uvicorn", log_level)
|
||||
setup_logger("uvicorn.access", log_level, add_filter=True)
|
||||
setup_logger("lightrag", log_level, add_filter=True)
|
||||
setup_logger("uvicorn", log_level, add_filter=False, log_file_path=log_file_path)
|
||||
setup_logger(
|
||||
"uvicorn.access", log_level, add_filter=True, log_file_path=log_file_path
|
||||
)
|
||||
setup_logger("lightrag", log_level, add_filter=True, log_file_path=log_file_path)
|
||||
|
||||
# Set up lightrag submodule loggers
|
||||
for name in logging.root.manager.loggerDict:
|
||||
if name.startswith("lightrag."):
|
||||
setup_logger(name, log_level, add_filter=True)
|
||||
setup_logger(name, log_level, add_filter=True, log_file_path=log_file_path)
|
||||
|
||||
# Disable uvicorn.error logger
|
||||
uvicorn_error_logger = logging.getLogger("uvicorn.error")
|
||||
|
@@ -9,7 +9,6 @@ from fastapi import (
|
||||
Request,
|
||||
status
|
||||
)
|
||||
from fastapi.responses import FileResponse
|
||||
import asyncio
|
||||
import os
|
||||
import logging
|
||||
@@ -23,7 +22,7 @@ from ascii_colors import ASCIIColors
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from contextlib import asynccontextmanager
|
||||
from dotenv import load_dotenv
|
||||
from .utils_api import (
|
||||
from lightrag.api.utils_api import (
|
||||
get_api_key_dependency,
|
||||
parse_args,
|
||||
get_default_host,
|
||||
@@ -34,14 +33,14 @@ from lightrag import LightRAG
|
||||
from lightrag.types import GPTKeywordExtractionFormat
|
||||
from lightrag.api import __api_version__
|
||||
from lightrag.utils import EmbeddingFunc
|
||||
from .routers.document_routes import (
|
||||
from lightrag.api.routers.document_routes import (
|
||||
DocumentManager,
|
||||
create_document_routes,
|
||||
run_scanning_process,
|
||||
)
|
||||
from .routers.query_routes import create_query_routes
|
||||
from .routers.graph_routes import create_graph_routes
|
||||
from .routers.ollama_api import OllamaAPI
|
||||
from lightrag.api.routers.query_routes import create_query_routes
|
||||
from lightrag.api.routers.graph_routes import create_graph_routes
|
||||
from lightrag.api.routers.ollama_api import OllamaAPI
|
||||
|
||||
from lightrag.utils import logger, set_verbose_debug
|
||||
from lightrag.kg.shared_storage import (
|
||||
@@ -54,7 +53,9 @@ from fastapi.security import OAuth2PasswordRequestForm
|
||||
from .auth import auth_handler
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv(override=True)
|
||||
# Updated to use the .env that is inside the current folder
|
||||
# This update allows the user to put a different.env file for each lightrag folder
|
||||
load_dotenv(".env", override=True)
|
||||
|
||||
# Initialize config parser
|
||||
config = configparser.ConfigParser()
|
||||
@@ -335,8 +336,10 @@ def create_app(args):
|
||||
"similarity_threshold": 0.95,
|
||||
"use_llm_check": False,
|
||||
},
|
||||
log_level=args.log_level,
|
||||
namespace_prefix=args.namespace_prefix,
|
||||
addon_params={
|
||||
"language": args.language,
|
||||
},
|
||||
auto_manage_storages_states=False,
|
||||
)
|
||||
else: # azure_openai
|
||||
@@ -365,7 +368,6 @@ def create_app(args):
|
||||
"similarity_threshold": 0.95,
|
||||
"use_llm_check": False,
|
||||
},
|
||||
log_level=args.log_level,
|
||||
namespace_prefix=args.namespace_prefix,
|
||||
auto_manage_storages_states=False,
|
||||
)
|
||||
@@ -437,17 +439,7 @@ def create_app(args):
|
||||
StaticFiles(directory=static_dir, html=True, check_dir=True),
|
||||
name="webui",
|
||||
)
|
||||
|
||||
@app.get("/webui/")
|
||||
async def webui_root():
|
||||
return FileResponse(static_dir / "index.html")
|
||||
|
||||
@app.middleware("http")
|
||||
async def debug_middleware(request: Request, call_next):
|
||||
print(f"Request path: {request.url.path}")
|
||||
response = await call_next(request)
|
||||
return response
|
||||
|
||||
|
||||
return app
|
||||
|
||||
|
||||
@@ -471,6 +463,9 @@ def configure_logging():
|
||||
log_dir = os.getenv("LOG_DIR", os.getcwd())
|
||||
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
|
||||
|
||||
print(f"\nLightRAG log file: {log_file_path}\n")
|
||||
os.makedirs(os.path.dirname(log_dir), exist_ok=True)
|
||||
|
||||
# Get log file max size and backup count from environment variables
|
||||
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
|
||||
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
|
||||
|
@@ -214,9 +214,29 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
||||
| ".scss"
|
||||
| ".less"
|
||||
):
|
||||
content = file.decode("utf-8")
|
||||
try:
|
||||
# Try to decode as UTF-8
|
||||
content = file.decode("utf-8")
|
||||
|
||||
# Validate content
|
||||
if not content or len(content.strip()) == 0:
|
||||
logger.error(f"Empty content in file: {file_path.name}")
|
||||
return False
|
||||
|
||||
# Check if content looks like binary data string representation
|
||||
if content.startswith("b'") or content.startswith('b"'):
|
||||
logger.error(
|
||||
f"File {file_path.name} appears to contain binary data representation instead of text"
|
||||
)
|
||||
return False
|
||||
|
||||
except UnicodeDecodeError:
|
||||
logger.error(
|
||||
f"File {file_path.name} is not valid UTF-8 encoded text. Please convert it to UTF-8 before processing."
|
||||
)
|
||||
return False
|
||||
case ".pdf":
|
||||
if not pm.is_installed("pypdf2"):
|
||||
if not pm.is_installed("pypdf2"): # type: ignore
|
||||
pm.install("pypdf2")
|
||||
from PyPDF2 import PdfReader # type: ignore
|
||||
from io import BytesIO
|
||||
@@ -226,18 +246,18 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
||||
for page in reader.pages:
|
||||
content += page.extract_text() + "\n"
|
||||
case ".docx":
|
||||
if not pm.is_installed("docx"):
|
||||
if not pm.is_installed("python-docx"): # type: ignore
|
||||
pm.install("docx")
|
||||
from docx import Document
|
||||
from docx import Document # type: ignore
|
||||
from io import BytesIO
|
||||
|
||||
docx_file = BytesIO(file)
|
||||
doc = Document(docx_file)
|
||||
content = "\n".join([paragraph.text for paragraph in doc.paragraphs])
|
||||
case ".pptx":
|
||||
if not pm.is_installed("pptx"):
|
||||
if not pm.is_installed("python-pptx"): # type: ignore
|
||||
pm.install("pptx")
|
||||
from pptx import Presentation
|
||||
from pptx import Presentation # type: ignore
|
||||
from io import BytesIO
|
||||
|
||||
pptx_file = BytesIO(file)
|
||||
@@ -247,9 +267,9 @@ async def pipeline_enqueue_file(rag: LightRAG, file_path: Path) -> bool:
|
||||
if hasattr(shape, "text"):
|
||||
content += shape.text + "\n"
|
||||
case ".xlsx":
|
||||
if not pm.is_installed("openpyxl"):
|
||||
if not pm.is_installed("openpyxl"): # type: ignore
|
||||
pm.install("openpyxl")
|
||||
from openpyxl import load_workbook
|
||||
from openpyxl import load_workbook # type: ignore
|
||||
from io import BytesIO
|
||||
|
||||
xlsx_file = BytesIO(file)
|
||||
|
@@ -9,6 +9,11 @@ import signal
|
||||
import pipmaster as pm
|
||||
from lightrag.api.utils_api import parse_args, display_splash_screen
|
||||
from lightrag.kg.shared_storage import initialize_share_data, finalize_share_data
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Updated to use the .env that is inside the current folder
|
||||
# This update allows the user to put a different.env file for each lightrag folder
|
||||
load_dotenv(".env")
|
||||
|
||||
|
||||
def check_and_install_dependencies():
|
||||
|
@@ -396,6 +396,7 @@ def parse_args(is_uvicorn_mode: bool = False) -> argparse.Namespace:
|
||||
# Inject chunk configuration
|
||||
args.chunk_size = get_env_value("CHUNK_SIZE", 1200, int)
|
||||
args.chunk_overlap_size = get_env_value("CHUNK_OVERLAP_SIZE", 100, int)
|
||||
args.language = get_env_value("LANGUAGE", "English")
|
||||
|
||||
ollama_server_infos.LIGHTRAG_MODEL = args.simulated_model_name
|
||||
|
||||
|
Reference in New Issue
Block a user