added typing
This commit is contained in:
@@ -2,7 +2,7 @@ import asyncio
|
|||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from tqdm.asyncio import tqdm as tqdm_async
|
from tqdm.asyncio import tqdm as tqdm_async
|
||||||
from typing import Union
|
from typing import Any, Union
|
||||||
from collections import Counter, defaultdict
|
from collections import Counter, defaultdict
|
||||||
from .utils import (
|
from .utils import (
|
||||||
logger,
|
logger,
|
||||||
@@ -42,9 +42,9 @@ def chunking_by_token_size(
|
|||||||
max_token_size=1024,
|
max_token_size=1024,
|
||||||
tiktoken_model="gpt-4o",
|
tiktoken_model="gpt-4o",
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
) -> list[dict[str, Any]]:
|
||||||
tokens = encode_string_by_tiktoken(content, model_name=tiktoken_model)
|
tokens = encode_string_by_tiktoken(content, model_name=tiktoken_model)
|
||||||
results = []
|
results: list[dict[str, Any]] = []
|
||||||
if split_by_character:
|
if split_by_character:
|
||||||
raw_chunks = content.split(split_by_character)
|
raw_chunks = content.split(split_by_character)
|
||||||
new_chunks = []
|
new_chunks = []
|
||||||
|
Reference in New Issue
Block a user