diff --git a/README.md b/README.md index abc2f8b3..5e8c5a94 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,9 @@ import asyncio from lightrag import LightRAG, QueryParam from lightrag.llm.openai import gpt_4o_mini_complete, gpt_4o_complete, openai_embed from lightrag.kg.shared_storage import initialize_pipeline_status +from lightrag.utils import setup_logger + +setup_logger("lightrag", level="INFO") async def initialize_rag(): rag = LightRAG( @@ -344,6 +347,10 @@ from lightrag.llm.llama_index_impl import llama_index_complete_if_cache, llama_i from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.llms.openai import OpenAI from lightrag.kg.shared_storage import initialize_pipeline_status +from lightrag.utils import setup_logger + +# Setup log handler for LightRAG +setup_logger("lightrag", level="INFO") async def initialize_rag(): rag = LightRAG( @@ -640,6 +647,9 @@ export NEO4J_URI="neo4j://localhost:7687" export NEO4J_USERNAME="neo4j" export NEO4J_PASSWORD="password" +# Setup logger for LightRAG +setup_logger("lightrag", level="INFO") + # When you launch the project be sure to override the default KG: NetworkX # by specifying kg="Neo4JStorage". @@ -649,8 +659,12 @@ rag = LightRAG( working_dir=WORKING_DIR, llm_model_func=gpt_4o_mini_complete, # Use gpt_4o_mini_complete LLM model graph_storage="Neo4JStorage", #<-----------override KG default - log_level="DEBUG" #<-----------override log_level default ) + +# Initialize database connections +await rag.initialize_storages() +# Initialize pipeline status for document processing +await initialize_pipeline_status() ``` see test_neo4j.py for a working example. @@ -859,7 +873,6 @@ Valid modes are: | **kv\_storage** | `str` | Storage type for documents and text chunks. Supported types: `JsonKVStorage`, `OracleKVStorage` | `JsonKVStorage` | | **vector\_storage** | `str` | Storage type for embedding vectors. Supported types: `NanoVectorDBStorage`, `OracleVectorDBStorage` | `NanoVectorDBStorage` | | **graph\_storage** | `str` | Storage type for graph edges and nodes. Supported types: `NetworkXStorage`, `Neo4JStorage`, `OracleGraphStorage` | `NetworkXStorage` | -| **log\_level** | | Log level for application runtime | `logging.DEBUG` | | **chunk\_token\_size** | `int` | Maximum token size per chunk when splitting documents | `1200` | | **chunk\_overlap\_token\_size** | `int` | Overlap token size between two chunks when splitting documents | `100` | | **tiktoken\_model\_name** | `str` | Model name for the Tiktoken encoder used to calculate token numbers | `gpt-4o-mini` | @@ -881,7 +894,6 @@ Valid modes are: | **addon\_params** | `dict` | Additional parameters, e.g., `{"example_number": 1, "language": "Simplified Chinese", "entity_types": ["organization", "person", "geo", "event"], "insert_batch_size": 10}`: sets example limit, output language, and batch size for document processing | `example_number: all examples, language: English, insert_batch_size: 10` | | **convert\_response\_to\_json\_func** | `callable` | Not used | `convert_response_to_json` | | **embedding\_cache\_config** | `dict` | Configuration for question-answer caching. Contains three parameters:
- `enabled`: Boolean value to enable/disable cache lookup functionality. When enabled, the system will check cached responses before generating new answers.
- `similarity_threshold`: Float value (0-1), similarity threshold. When a new question's similarity with a cached question exceeds this threshold, the cached answer will be returned directly without calling the LLM.
- `use_llm_check`: Boolean value to enable/disable LLM similarity verification. When enabled, LLM will be used as a secondary check to verify the similarity between questions before returning cached answers. | Default: `{"enabled": False, "similarity_threshold": 0.95, "use_llm_check": False}` | -|**log\_dir** | `str` | Directory to store logs. | `./` | diff --git a/lightrag/api/gunicorn_config.py b/lightrag/api/gunicorn_config.py index 7f9b4d58..0594ceae 100644 --- a/lightrag/api/gunicorn_config.py +++ b/lightrag/api/gunicorn_config.py @@ -2,12 +2,15 @@ import os import logging from lightrag.kg.shared_storage import finalize_share_data -from lightrag.api.lightrag_server import LightragPathFilter +from lightrag.utils import setup_logger # Get log directory path from environment variable log_dir = os.getenv("LOG_DIR", os.getcwd()) log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log")) +# Ensure log directory exists +os.makedirs(os.path.dirname(log_file_path), exist_ok=True) + # Get log file max size and backup count from environment variables log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups @@ -108,6 +111,9 @@ def on_starting(server): except ImportError: print("psutil not installed, skipping memory usage reporting") + # Log the location of the LightRAG log file + print(f"LightRAG log file: {log_file_path}\n") + print("Gunicorn initialization complete, forking workers...\n") @@ -134,51 +140,18 @@ def post_fork(server, worker): Executed after a worker has been forked. This is a good place to set up worker-specific configurations. """ - # Configure formatters - detailed_formatter = logging.Formatter( - "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - ) - simple_formatter = logging.Formatter("%(levelname)s: %(message)s") - - def setup_logger(logger_name: str, level: str = "INFO", add_filter: bool = False): - """Set up a logger with console and file handlers""" - logger_instance = logging.getLogger(logger_name) - logger_instance.setLevel(level) - logger_instance.handlers = [] # Clear existing handlers - logger_instance.propagate = False - - # Add console handler - console_handler = logging.StreamHandler() - console_handler.setFormatter(simple_formatter) - console_handler.setLevel(level) - logger_instance.addHandler(console_handler) - - # Add file handler - file_handler = logging.handlers.RotatingFileHandler( - filename=log_file_path, - maxBytes=log_max_bytes, - backupCount=log_backup_count, - encoding="utf-8", - ) - file_handler.setFormatter(detailed_formatter) - file_handler.setLevel(level) - logger_instance.addHandler(file_handler) - - # Add path filter if requested - if add_filter: - path_filter = LightragPathFilter() - logger_instance.addFilter(path_filter) - # Set up main loggers log_level = loglevel.upper() if loglevel else "INFO" - setup_logger("uvicorn", log_level) - setup_logger("uvicorn.access", log_level, add_filter=True) - setup_logger("lightrag", log_level, add_filter=True) + setup_logger("uvicorn", log_level, add_filter=False, log_file_path=log_file_path) + setup_logger( + "uvicorn.access", log_level, add_filter=True, log_file_path=log_file_path + ) + setup_logger("lightrag", log_level, add_filter=True, log_file_path=log_file_path) # Set up lightrag submodule loggers for name in logging.root.manager.loggerDict: if name.startswith("lightrag."): - setup_logger(name, log_level, add_filter=True) + setup_logger(name, log_level, add_filter=True, log_file_path=log_file_path) # Disable uvicorn.error logger uvicorn_error_logger = logging.getLogger("uvicorn.error") diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index 5f2c437f..c91f693f 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -329,7 +329,6 @@ def create_app(args): "similarity_threshold": 0.95, "use_llm_check": False, }, - log_level=args.log_level, namespace_prefix=args.namespace_prefix, auto_manage_storages_states=False, ) @@ -359,7 +358,6 @@ def create_app(args): "similarity_threshold": 0.95, "use_llm_check": False, }, - log_level=args.log_level, namespace_prefix=args.namespace_prefix, auto_manage_storages_states=False, ) @@ -437,6 +435,9 @@ def configure_logging(): log_dir = os.getenv("LOG_DIR", os.getcwd()) log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log")) + print(f"\nLightRAG log file: {log_file_path}\n") + os.makedirs(os.path.dirname(log_dir), exist_ok=True) + # Get log file max size and backup count from environment variables log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 13202992..a5d3c94b 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -3,6 +3,7 @@ from __future__ import annotations import asyncio import configparser import os +import warnings from dataclasses import asdict, dataclass, field from datetime import datetime from functools import partial @@ -85,14 +86,10 @@ class LightRAG: doc_status_storage: str = field(default="JsonDocStatusStorage") """Storage type for tracking document processing statuses.""" - # Logging + # Logging (Deprecated, use setup_logger in utils.py instead) # --- - - log_level: int = field(default=logger.level) - """Logging level for the system (e.g., 'DEBUG', 'INFO', 'WARNING').""" - - log_file_path: str = field(default=os.path.join(os.getcwd(), "lightrag.log")) - """Log file path.""" + log_level: int | None = field(default=None) + log_file_path: str | None = field(default=None) # Entity extraction # --- @@ -266,13 +263,30 @@ class LightRAG: _storages_status: StoragesStatus = field(default=StoragesStatus.NOT_CREATED) def __post_init__(self): - os.makedirs(os.path.dirname(self.log_file_path), exist_ok=True) - logger.info(f"Logger initialized for working directory: {self.working_dir}") - from lightrag.kg.shared_storage import ( initialize_share_data, ) + # Handle deprecated parameters + if self.log_level is not None: + warnings.warn( + "WARNING: log_level parameter is deprecated, use setup_logger in utils.py instead", + UserWarning, + stacklevel=2, + ) + if self.log_file_path is not None: + warnings.warn( + "WARNING: log_file_path parameter is deprecated, use setup_logger in utils.py instead", + UserWarning, + stacklevel=2, + ) + + # Remove these attributes to prevent their use + if hasattr(self, "log_level"): + delattr(self, "log_level") + if hasattr(self, "log_file_path"): + delattr(self, "log_file_path") + initialize_share_data() if not os.path.exists(self.working_dir): diff --git a/lightrag/utils.py b/lightrag/utils.py index c86ad8c0..bb1d6fae 100644 --- a/lightrag/utils.py +++ b/lightrag/utils.py @@ -6,6 +6,7 @@ import io import csv import json import logging +import logging.handlers import os import re from dataclasses import dataclass @@ -68,6 +69,101 @@ logger.setLevel(logging.INFO) logging.getLogger("httpx").setLevel(logging.WARNING) +class LightragPathFilter(logging.Filter): + """Filter for lightrag logger to filter out frequent path access logs""" + + def __init__(self): + super().__init__() + # Define paths to be filtered + self.filtered_paths = ["/documents", "/health", "/webui/"] + + def filter(self, record): + try: + # Check if record has the required attributes for an access log + if not hasattr(record, "args") or not isinstance(record.args, tuple): + return True + if len(record.args) < 5: + return True + + # Extract method, path and status from the record args + method = record.args[1] + path = record.args[2] + status = record.args[4] + + # Filter out successful GET requests to filtered paths + if ( + method == "GET" + and (status == 200 or status == 304) + and path in self.filtered_paths + ): + return False + + return True + except Exception: + # In case of any error, let the message through + return True + + +def setup_logger( + logger_name: str, + level: str = "INFO", + add_filter: bool = False, + log_file_path: str = None, +): + """Set up a logger with console and file handlers + + Args: + logger_name: Name of the logger to set up + level: Log level (DEBUG, INFO, WARNING, ERROR, CRITICAL) + add_filter: Whether to add LightragPathFilter to the logger + log_file_path: Path to the log file. If None, will use current directory/lightrag.log + """ + # Configure formatters + detailed_formatter = logging.Formatter( + "%(asctime)s - %(name)s - %(levelname)s - %(message)s" + ) + simple_formatter = logging.Formatter("%(levelname)s: %(message)s") + + # Get log file path + if log_file_path is None: + log_dir = os.getenv("LOG_DIR", os.getcwd()) + log_file_path = os.path.abspath(os.path.join(log_dir, "lightrag.log")) + + # Ensure log directory exists + os.makedirs(os.path.dirname(log_file_path), exist_ok=True) + + # Get log file max size and backup count from environment variables + log_max_bytes = int(os.getenv("LOG_MAX_BYTES", 10485760)) # Default 10MB + log_backup_count = int(os.getenv("LOG_BACKUP_COUNT", 5)) # Default 5 backups + + logger_instance = logging.getLogger(logger_name) + logger_instance.setLevel(level) + logger_instance.handlers = [] # Clear existing handlers + logger_instance.propagate = False + + # Add console handler + console_handler = logging.StreamHandler() + console_handler.setFormatter(simple_formatter) + console_handler.setLevel(level) + logger_instance.addHandler(console_handler) + + # Add file handler + file_handler = logging.handlers.RotatingFileHandler( + filename=log_file_path, + maxBytes=log_max_bytes, + backupCount=log_backup_count, + encoding="utf-8", + ) + file_handler.setFormatter(detailed_formatter) + file_handler.setLevel(level) + logger_instance.addHandler(file_handler) + + # Add path filter if requested + if add_filter: + path_filter = LightragPathFilter() + logger_instance.addFilter(path_filter) + + class UnlimitedSemaphore: """A context manager that allows unlimited access.""" diff --git a/requirements.txt b/requirements.txt index a1a1157e..d9a5c68e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ configparser future # Basic modules -numpy +gensim pipmaster pydantic python-dotenv