Merge pull request #797 from danielaskdd/add-env-settings

Add the token size truncation for local query and token size setting by env
2025-02-17 15:00:07 +08:00
parent 0d19ca8945 b7cce9312f
commit fce24f7611
11 changed files with 142 additions and 41 deletions
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@@ -222,6 +222,7 @@ You can select storage  implementation by enviroment variables or command line a
 | --max-embed-tokens | 8192 | Maximum embedding token size |
 | --timeout | None | Timeout in seconds (useful when using slow AI). Use None for infinite timeout |
 | --log-level | INFO | Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) |
+| --verbose | False | Verbose debug output (True, Flase) |
 | --key | None | API key for authentication. Protects lightrag server against unauthorized access |
 | --ssl | False | Enable HTTPS |
 | --ssl-certfile | None | Path to SSL certificate file (required if --ssl is enabled) |
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -133,8 +133,8 @@ def get_env_value(env_key: str, default: Any, value_type: type = str) -> Any:
    if value is None:
        return default

-    if isinstance(value_type, bool):
-        return value.lower() in ("true", "1", "yes")
+    if value_type is bool:
+        return value.lower() in ("true", "1", "yes", "t", "on")
    try:
        return value_type(value)
    except ValueError:
@@ -236,6 +236,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
    ASCIIColors.yellow(f"{ollama_server_infos.LIGHTRAG_MODEL}")
    ASCIIColors.white("    ├─ Log Level: ", end="")
    ASCIIColors.yellow(f"{args.log_level}")
+    ASCIIColors.white("    ├─ Verbose Debug: ", end="")
+    ASCIIColors.yellow(f"{args.verbose}")
    ASCIIColors.white("    └─ Timeout: ", end="")
    ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")

@@ -565,6 +567,13 @@ def parse_args() -> argparse.Namespace:
        help="Prefix of the namespace",
    )

+    parser.add_argument(
+        "--verbose",
+        type=bool,
+        default=get_env_value("VERBOSE", False, bool),
+        help="Verbose debug output(default: from env or false)",
+    )
+
    args = parser.parse_args()

    # conver relative path to absolute path
@@ -768,6 +777,11 @@ temp_prefix = "__tmp_"  # prefix for temporary files


 def create_app(args):
+    # Initialize verbose debug setting
+    from lightrag.utils import set_verbose_debug
+
+    set_verbose_debug(args.verbose)
+
    global global_top_k
    global_top_k = args.top_k  # save top_k from args

--- a/lightrag/api/ollama_api.py
+++ b/lightrag/api/ollama_api.py
@@ -11,6 +11,7 @@ from fastapi.responses import StreamingResponse
 import asyncio
 from ascii_colors import trace_exception
 from lightrag import LightRAG, QueryParam
+from lightrag.utils import encode_string_by_tiktoken
 from dotenv import load_dotenv


@@ -111,18 +112,9 @@ class OllamaTagResponse(BaseModel):


 def estimate_tokens(text: str) -> int:
-    """Estimate the number of tokens in text
-    Chinese characters: approximately 1.5 tokens per character
-    English characters: approximately 0.25 tokens per character
-    """
-    # Use regex to match Chinese and non-Chinese characters separately
-    chinese_chars = len(re.findall(r"[\u4e00-\u9fff]", text))
-    non_chinese_chars = len(re.findall(r"[^\u4e00-\u9fff]", text))
-
-    # Calculate estimated token count
-    tokens = chinese_chars * 1.5 + non_chinese_chars * 0.25
-
-    return int(tokens)
+    """Estimate the number of tokens in text using tiktoken"""
+    tokens = encode_string_by_tiktoken(text)
+    return len(tokens)


 def parse_query_mode(query: str) -> tuple[str, SearchMode]: