add: to optionally replace default tiktoken Tokenizer with a custom one

This commit is contained in:
drahnreb
2025-04-17 10:56:23 +02:00
parent 4fd40fd798
commit 20ba1eb9c2
6 changed files with 138 additions and 53 deletions

View File

@@ -10,7 +10,7 @@ from fastapi.responses import StreamingResponse
import asyncio
from ascii_colors import trace_exception
from lightrag import LightRAG, QueryParam
from lightrag.utils import encode_string_by_tiktoken
from lightrag.utils import TiktokenTokenizer
from lightrag.api.utils_api import ollama_server_infos, get_combined_auth_dependency
from fastapi import Depends
@@ -97,7 +97,7 @@ class OllamaTagResponse(BaseModel):
def estimate_tokens(text: str) -> int:
"""Estimate the number of tokens in text using tiktoken"""
tokens = encode_string_by_tiktoken(text)
tokens = TiktokenTokenizer().encode(text)
return len(tokens)