Merge pull request #982 from danielaskdd/standalone-logger-setup

Providing setup_logger for standalone LightRAG logger initialization
This commit is contained in:
zrguo
2025-03-04 12:16:37 +08:00
committed by GitHub
6 changed files with 152 additions and 56 deletions

View File

@@ -106,6 +106,9 @@ import asyncio
from lightrag import LightRAG, QueryParam from lightrag import LightRAG, QueryParam
from lightrag.llm.openai import gpt_4o_mini_complete, gpt_4o_complete, openai_embed from lightrag.llm.openai import gpt_4o_mini_complete, gpt_4o_complete, openai_embed
from lightrag.kg.shared_storage import initialize_pipeline_status from lightrag.kg.shared_storage import initialize_pipeline_status
from lightrag.utils import setup_logger
setup_logger("lightrag", level="INFO")
async def initialize_rag(): async def initialize_rag():
rag = LightRAG( rag = LightRAG(
@@ -344,6 +347,10 @@ from lightrag.llm.llama_index_impl import llama_index_complete_if_cache, llama_i
from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI from llama_index.llms.openai import OpenAI
from lightrag.kg.shared_storage import initialize_pipeline_status from lightrag.kg.shared_storage import initialize_pipeline_status
from lightrag.utils import setup_logger
# Setup log handler for LightRAG
setup_logger("lightrag", level="INFO")
async def initialize_rag(): async def initialize_rag():
rag = LightRAG( rag = LightRAG(
@@ -640,6 +647,9 @@ export NEO4J_URI="neo4j://localhost:7687"
export NEO4J_USERNAME="neo4j" export NEO4J_USERNAME="neo4j"
export NEO4J_PASSWORD="password" export NEO4J_PASSWORD="password"
# Setup logger for LightRAG
setup_logger("lightrag", level="INFO")
# When you launch the project be sure to override the default KG: NetworkX # When you launch the project be sure to override the default KG: NetworkX
# by specifying kg="Neo4JStorage". # by specifying kg="Neo4JStorage".
@@ -649,8 +659,12 @@ rag = LightRAG(
working_dir=WORKING_DIR, working_dir=WORKING_DIR,
llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model
graph_storage="Neo4JStorage", #<-----------override KG default graph_storage="Neo4JStorage", #<-----------override KG default
log_level="DEBUG" #<-----------override log_level default
) )
# Initialize database connections
await rag.initialize_storages()
# Initialize pipeline status for document processing
await initialize_pipeline_status()
``` ```
see test_neo4j.py for a working example. see test_neo4j.py for a working example.
@@ -859,7 +873,6 @@ Valid modes are:
| **kv\_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`, `OracleKVStorage` | `JsonKVStorage` | | **kv\_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`, `OracleKVStorage` | `JsonKVStorage` |
| **vector\_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`, `OracleVectorDBStorage` | `NanoVectorDBStorage` | | **vector\_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`, `OracleVectorDBStorage` | `NanoVectorDBStorage` |
| **graph\_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`, `Neo4JStorage`, `OracleGraphStorage` | `NetworkXStorage` | | **graph\_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`, `Neo4JStorage`, `OracleGraphStorage` | `NetworkXStorage` |
| **log\_level** | | Log level for application runtime | `logging.DEBUG` |
| **chunk\_token\_size** | `int` | Maximum token size per chunk when splitting documents | `1200` | | **chunk\_token\_size** | `int` | Maximum token size per chunk when splitting documents | `1200` |
| **chunk\_overlap\_token\_size** | `int` | Overlap token size between two chunks when splitting documents | `100` | | **chunk\_overlap\_token\_size** | `int` | Overlap token size between two chunks when splitting documents | `100` |
| **tiktoken\_model\_name** | `str` | Model name for the Tiktoken encoder used to calculate token numbers | `gpt-4o-mini` | | **tiktoken\_model\_name** | `str` | Model name for the Tiktoken encoder used to calculate token numbers | `gpt-4o-mini` |
@@ -881,7 +894,6 @@ Valid modes are:
| **addon\_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"], "insert_batch_size": 10}`: sets example limit, output language, and batch size for document processing | `example_number: all examples, language: English, insert_batch_size: 10` | | **addon\_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"], "insert_batch_size": 10}`: sets example limit, output language, and batch size for document processing | `example_number: all examples, language: English, insert_batch_size: 10` |
| **convert\_response\_to\_json\_func** | `callable` | Not used | `convert_response_to_json` | | **convert\_response\_to\_json\_func** | `callable` | Not used | `convert_response_to_json` |
| **embedding\_cache\_config** | `dict` | Configuration for question-answer caching. Contains three parameters:<br>- `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers.<br>- `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM.<br>- `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` | | **embedding\_cache\_config** | `dict` | Configuration for question-answer caching. Contains three parameters:<br>- `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers.<br>- `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM.<br>- `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` |
|**log\_dir** | `str` | Directory to store logs. | `./` |
</details> </details>

View File

@@ -2,12 +2,15 @@
import os import os
import logging import logging
from lightrag.kg.shared_storage import finalize_share_data from lightrag.kg.shared_storage import finalize_share_data
from lightrag.api.lightrag_server import LightragPathFilter from lightrag.utils import setup_logger
# Get log directory path from environment variable # Get log directory path from environment variable
log_dir = os.getenv("LOG_DIR", os.getcwd()) log_dir = os.getenv("LOG_DIR", os.getcwd())
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log")) log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
# Ensure log directory exists
os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
# Get log file max size and backup count from environment variables # Get log file max size and backup count from environment variables
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
@@ -108,6 +111,9 @@ def on_starting(server):
except ImportError: except ImportError:
print("psutil not installed, skipping memory usage reporting") print("psutil not installed, skipping memory usage reporting")
# Log the location of the LightRAG log file
print(f"LightRAG log file: {log_file_path}\n")
print("Gunicorn initialization complete, forking workers...\n") print("Gunicorn initialization complete, forking workers...\n")
@@ -134,51 +140,18 @@ def post_fork(server, worker):
Executed after a worker has been forked. Executed after a worker has been forked.
This is a good place to set up worker-specific configurations. This is a good place to set up worker-specific configurations.
""" """
# Configure formatters
detailed_formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
simple_formatter = logging.Formatter("%(levelname)s: %(message)s")
def setup_logger(logger_name: str, level: str = "INFO", add_filter: bool = False):
"""Set up a logger with console and file handlers"""
logger_instance = logging.getLogger(logger_name)
logger_instance.setLevel(level)
logger_instance.handlers = [] # Clear existing handlers
logger_instance.propagate = False
# Add console handler
console_handler = logging.StreamHandler()
console_handler.setFormatter(simple_formatter)
console_handler.setLevel(level)
logger_instance.addHandler(console_handler)
# Add file handler
file_handler = logging.handlers.RotatingFileHandler(
filename=log_file_path,
maxBytes=log_max_bytes,
backupCount=log_backup_count,
encoding="utf-8",
)
file_handler.setFormatter(detailed_formatter)
file_handler.setLevel(level)
logger_instance.addHandler(file_handler)
# Add path filter if requested
if add_filter:
path_filter = LightragPathFilter()
logger_instance.addFilter(path_filter)
# Set up main loggers # Set up main loggers
log_level = loglevel.upper() if loglevel else "INFO" log_level = loglevel.upper() if loglevel else "INFO"
setup_logger("uvicorn", log_level) setup_logger("uvicorn", log_level, add_filter=False, log_file_path=log_file_path)
setup_logger("uvicorn.access", log_level, add_filter=True) setup_logger(
setup_logger("lightrag", log_level, add_filter=True) "uvicorn.access", log_level, add_filter=True, log_file_path=log_file_path
)
setup_logger("lightrag", log_level, add_filter=True, log_file_path=log_file_path)
# Set up lightrag submodule loggers # Set up lightrag submodule loggers
for name in logging.root.manager.loggerDict: for name in logging.root.manager.loggerDict:
if name.startswith("lightrag."): if name.startswith("lightrag."):
setup_logger(name, log_level, add_filter=True) setup_logger(name, log_level, add_filter=True, log_file_path=log_file_path)
# Disable uvicorn.error logger # Disable uvicorn.error logger
uvicorn_error_logger = logging.getLogger("uvicorn.error") uvicorn_error_logger = logging.getLogger("uvicorn.error")

View File

@@ -329,7 +329,6 @@ def create_app(args):
"similarity_threshold": 0.95, "similarity_threshold": 0.95,
"use_llm_check": False, "use_llm_check": False,
}, },
log_level=args.log_level,
namespace_prefix=args.namespace_prefix, namespace_prefix=args.namespace_prefix,
auto_manage_storages_states=False, auto_manage_storages_states=False,
) )
@@ -359,7 +358,6 @@ def create_app(args):
"similarity_threshold": 0.95, "similarity_threshold": 0.95,
"use_llm_check": False, "use_llm_check": False,
}, },
log_level=args.log_level,
namespace_prefix=args.namespace_prefix, namespace_prefix=args.namespace_prefix,
auto_manage_storages_states=False, auto_manage_storages_states=False,
) )
@@ -437,6 +435,9 @@ def configure_logging():
log_dir = os.getenv("LOG_DIR", os.getcwd()) log_dir = os.getenv("LOG_DIR", os.getcwd())
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log")) log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
print(f"\nLightRAG log file: {log_file_path}\n")
os.makedirs(os.path.dirname(log_dir), exist_ok=True)
# Get log file max size and backup count from environment variables # Get log file max size and backup count from environment variables
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
import asyncio import asyncio
import configparser import configparser
import os import os
import warnings
from dataclasses import asdict, dataclass, field from dataclasses import asdict, dataclass, field
from datetime import datetime from datetime import datetime
from functools import partial from functools import partial
@@ -85,14 +86,10 @@ class LightRAG:
doc_status_storage: str = field(default="JsonDocStatusStorage") doc_status_storage: str = field(default="JsonDocStatusStorage")
"""Storage type for tracking document processing statuses.""" """Storage type for tracking document processing statuses."""
# Logging # Logging (Deprecated, use setup_logger in utils.py instead)
# --- # ---
log_level: int | None = field(default=None)
log_level: int = field(default=logger.level) log_file_path: str | None = field(default=None)
"""Logging level for the system (e.g., 'DEBUG', 'INFO', 'WARNING')."""
log_file_path: str = field(default=os.path.join(os.getcwd(), "lightrag.log"))
"""Log file path."""
# Entity extraction # Entity extraction
# --- # ---
@@ -266,13 +263,30 @@ class LightRAG:
_storages_status: StoragesStatus = field(default=StoragesStatus.NOT_CREATED) _storages_status: StoragesStatus = field(default=StoragesStatus.NOT_CREATED)
def __post_init__(self): def __post_init__(self):
os.makedirs(os.path.dirname(self.log_file_path), exist_ok=True)
logger.info(f"Logger initialized for working directory: {self.working_dir}")
from lightrag.kg.shared_storage import ( from lightrag.kg.shared_storage import (
initialize_share_data, initialize_share_data,
) )
# Handle deprecated parameters
if self.log_level is not None:
warnings.warn(
"WARNING: log_level parameter is deprecated, use setup_logger in utils.py instead",
UserWarning,
stacklevel=2,
)
if self.log_file_path is not None:
warnings.warn(
"WARNING: log_file_path parameter is deprecated, use setup_logger in utils.py instead",
UserWarning,
stacklevel=2,
)
# Remove these attributes to prevent their use
if hasattr(self, "log_level"):
delattr(self, "log_level")
if hasattr(self, "log_file_path"):
delattr(self, "log_file_path")
initialize_share_data() initialize_share_data()
if not os.path.exists(self.working_dir): if not os.path.exists(self.working_dir):

View File

@@ -6,6 +6,7 @@ import io
import csv import csv
import json import json
import logging import logging
import logging.handlers
import os import os
import re import re
from dataclasses import dataclass from dataclasses import dataclass
@@ -68,6 +69,101 @@ logger.setLevel(logging.INFO)
logging.getLogger("httpx").setLevel(logging.WARNING) logging.getLogger("httpx").setLevel(logging.WARNING)
class LightragPathFilter(logging.Filter):
"""Filter for lightrag logger to filter out frequent path access logs"""
def __init__(self):
super().__init__()
# Define paths to be filtered
self.filtered_paths = ["/documents", "/health", "/webui/"]
def filter(self, record):
try:
# Check if record has the required attributes for an access log
if not hasattr(record, "args") or not isinstance(record.args, tuple):
return True
if len(record.args) < 5:
return True
# Extract method, path and status from the record args
method = record.args[1]
path = record.args[2]
status = record.args[4]
# Filter out successful GET requests to filtered paths
if (
method == "GET"
and (status == 200 or status == 304)
and path in self.filtered_paths
):
return False
return True
except Exception:
# In case of any error, let the message through
return True
def setup_logger(
logger_name: str,
level: str = "INFO",
add_filter: bool = False,
log_file_path: str = None,
):
"""Set up a logger with console and file handlers
Args:
logger_name: Name of the logger to set up
level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
add_filter: Whether to add LightragPathFilter to the logger
log_file_path: Path to the log file. If None, will use current directory/lightrag.log
"""
# Configure formatters
detailed_formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
simple_formatter = logging.Formatter("%(levelname)s: %(message)s")
# Get log file path
if log_file_path is None:
log_dir = os.getenv("LOG_DIR", os.getcwd())
log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log"))
# Ensure log directory exists
os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
# Get log file max size and backup count from environment variables
log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB
log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups
logger_instance = logging.getLogger(logger_name)
logger_instance.setLevel(level)
logger_instance.handlers = [] # Clear existing handlers
logger_instance.propagate = False
# Add console handler
console_handler = logging.StreamHandler()
console_handler.setFormatter(simple_formatter)
console_handler.setLevel(level)
logger_instance.addHandler(console_handler)
# Add file handler
file_handler = logging.handlers.RotatingFileHandler(
filename=log_file_path,
maxBytes=log_max_bytes,
backupCount=log_backup_count,
encoding="utf-8",
)
file_handler.setFormatter(detailed_formatter)
file_handler.setLevel(level)
logger_instance.addHandler(file_handler)
# Add path filter if requested
if add_filter:
path_filter = LightragPathFilter()
logger_instance.addFilter(path_filter)
class UnlimitedSemaphore: class UnlimitedSemaphore:
"""A context manager that allows unlimited access.""" """A context manager that allows unlimited access."""

View File

@@ -3,7 +3,7 @@ configparser
future future
# Basic modules # Basic modules
numpy gensim
pipmaster pipmaster
pydantic pydantic
python-dotenv python-dotenv