fix linting

This commit is contained in:
drahnreb
2025-04-18 16:14:31 +02:00
parent e71f466910
commit 9c6b5aefcb
5 changed files with 53 additions and 28 deletions

View File

@@ -12,7 +12,7 @@ import re
from dataclasses import dataclass
from functools import wraps
from hashlib import md5
from typing import Any, Protocol, Callable, TYPE_CHECKING, List, Optional, Union
from typing import Any, Protocol, Callable, TYPE_CHECKING, List
import xml.etree.ElementTree as ET
import numpy as np
from lightrag.prompt import PROMPTS
@@ -311,6 +311,7 @@ class TokenizerInterface(Protocol):
"""
Defines the interface for a tokenizer, requiring encode and decode methods.
"""
def encode(self, content: str) -> List[int]:
"""Encodes a string into a list of tokens."""
...
@@ -319,10 +320,12 @@ class TokenizerInterface(Protocol):
"""Decodes a list of tokens into a string."""
...
class Tokenizer:
"""
A wrapper around a tokenizer to provide a consistent interface for encoding and decoding.
"""
def __init__(self, model_name: str, tokenizer: TokenizerInterface):
"""
Initializes the Tokenizer with a tokenizer model name and a tokenizer instance.
@@ -363,6 +366,7 @@ class TiktokenTokenizer(Tokenizer):
"""
A Tokenizer implementation using the tiktoken library.
"""
def __init__(self, model_name: str = "gpt-4o-mini"):
"""
Initializes the TiktokenTokenizer with a specified model name.
@@ -385,9 +389,7 @@ class TiktokenTokenizer(Tokenizer):
tokenizer = tiktoken.encoding_for_model(model_name)
super().__init__(model_name=model_name, tokenizer=tokenizer)
except KeyError:
raise ValueError(
f"Invalid model_name: {model_name}."
)
raise ValueError(f"Invalid model_name: {model_name}.")
def pack_user_ass_to_openai_messages(*args: str):
@@ -424,7 +426,10 @@ def is_float_regex(value: str) -> bool:
def truncate_list_by_token_size(
list_data: list[Any], key: Callable[[Any], str], max_token_size: int, tokenizer: Tokenizer
list_data: list[Any],
key: Callable[[Any], str],
max_token_size: int,
tokenizer: Tokenizer,
) -> list[int]:
"""Truncate a list of data by token size"""
if max_token_size <= 0: