diff --git a/lightrag/base.py b/lightrag/base.py index 1a7f9c2e..ae5ce92e 100644 --- a/lightrag/base.py +++ b/lightrag/base.py @@ -27,31 +27,54 @@ T = TypeVar("T") @dataclass class QueryParam: - mode: Literal["local", "global", "hybrid", "naive", "mix"] = "global" - only_need_context: bool = False - only_need_prompt: bool = False - response_type: str = "Multiple Paragraphs" - stream: bool = False - # Number of top-k items to retrieve; corresponds to entities in "local" mode and relationships in "global" mode. - top_k: int = int(os.getenv("TOP_K", "60")) - # Number of document chunks to retrieve. - # top_n: int = 10 - # Number of tokens for the original chunks. - max_token_for_text_unit: int = 4000 - # Number of tokens for the relationship descriptions - max_token_for_global_context: int = 4000 - # Number of tokens for the entity descriptions - max_token_for_local_context: int = 4000 - hl_keywords: list[str] = field(default_factory=list) - ll_keywords: list[str] = field(default_factory=list) - # Conversation history support - conversation_history: list[dict[str, str]] = field( - default_factory=list - ) # Format: [{"role": "user/assistant", "content": "message"}] - history_turns: int = ( - 3 # Number of complete conversation turns (user-assistant pairs) to consider - ) + """Configuration parameters for query execution in LightRAG.""" + mode: Literal["local", "global", "hybrid", "naive", "mix"] = "global" + """Specifies the retrieval mode: + - "local": Focuses on context-dependent information. + - "global": Utilizes global knowledge. + - "hybrid": Combines local and global retrieval methods. + - "naive": Performs a basic search without advanced techniques. + - "mix": Integrates knowledge graph and vector retrieval. + """ + + only_need_context: bool = False + """If True, only returns the retrieved context without generating a response.""" + + only_need_prompt: bool = False + """If True, only returns the generated prompt without producing a response.""" + + response_type: str = "Multiple Paragraphs" + """Defines the response format. Examples: 'Multiple Paragraphs', 'Single Paragraph', 'Bullet Points'.""" + + stream: bool = False + """If True, enables streaming output for real-time responses.""" + + top_k: int = int(os.getenv("TOP_K", "60")) + """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode.""" + + max_token_for_text_unit: int = 4000 + """Maximum number of tokens allowed for each retrieved text chunk.""" + + max_token_for_global_context: int = 4000 + """Maximum number of tokens allocated for relationship descriptions in global retrieval.""" + + max_token_for_local_context: int = 4000 + """Maximum number of tokens allocated for entity descriptions in local retrieval.""" + + hl_keywords: List[str] = field(default_factory=list) + """List of high-level keywords to prioritize in retrieval.""" + + ll_keywords: List[str] = field(default_factory=list) + """List of low-level keywords to refine retrieval focus.""" + + conversation_history: List[dict[str, Any]] = field(default_factory=list) + """Stores past conversation history to maintain context. + Format: [{"role": "user/assistant", "content": "message"}]. + """ + + history_turns: int = 3 + """Number of complete conversation turns (user-assistant pairs) to consider in the response context.""" @dataclass class StorageNameSpace: