add: to optionally replace default tiktoken Tokenizer with a custom one

2025-04-17 10:56:23 +02:00
parent 4fd40fd798
commit 20ba1eb9c2
6 changed files with 138 additions and 53 deletions
--- a/lightrag/api/routers/ollama_api.py
+++ b/lightrag/api/routers/ollama_api.py
@@ -10,7 +10,7 @@ from fastapi.responses import StreamingResponse
 import asyncio
 from ascii_colors import trace_exception
 from lightrag import LightRAG, QueryParam
-from lightrag.utils import encode_string_by_tiktoken
+from lightrag.utils import TiktokenTokenizer 
 from lightrag.api.utils_api import ollama_server_infos, get_combined_auth_dependency
 from fastapi import Depends

@@ -97,7 +97,7 @@ class OllamaTagResponse(BaseModel):

 def estimate_tokens(text: str) -> int:
    """Estimate the number of tokens in text using tiktoken"""
-    tokens = encode_string_by_tiktoken(text)
+    tokens = TiktokenTokenizer().encode(text)
    return len(tokens)