Merge pull request #797 from danielaskdd/add-env-settings

Add the token size truncation for local query and token size setting by env
2025-02-17 15:00:07 +08:00
parent 0d19ca8945 b7cce9312f
commit fce24f7611
11 changed files with 142 additions and 41 deletions
--- a/lightrag/base.py
+++ b/lightrag/base.py
@@ -1,6 +1,7 @@
 from __future__ import annotations

 import os
+from dotenv import load_dotenv
 from dataclasses import dataclass, field
 from enum import Enum
 from typing import (
@@ -9,12 +10,12 @@ from typing import (
    TypedDict,
    TypeVar,
 )
-
 import numpy as np
-
 from .utils import EmbeddingFunc
 from .types import KnowledgeGraph

+load_dotenv()
+

 class TextChunkSchema(TypedDict):
    tokens: int
@@ -54,13 +55,15 @@ class QueryParam:
    top_k: int = int(os.getenv("TOP_K", "60"))
    """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""

-    max_token_for_text_unit: int = 4000
+    max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
    """Maximum number of tokens allowed for each retrieved text chunk."""

-    max_token_for_global_context: int = 4000
+    max_token_for_global_context: int = int(
+        os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
+    )
    """Maximum number of tokens allocated for relationship descriptions in global retrieval."""

-    max_token_for_local_context: int = 4000
+    max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
    """Maximum number of tokens allocated for entity descriptions in local retrieval."""

    hl_keywords: list[str] = field(default_factory=list)