cleaned type
This commit is contained in:
@@ -4,7 +4,7 @@ from tqdm.asyncio import tqdm as tqdm_async
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime
|
||||
from functools import partial
|
||||
from typing import Any, Type, Union, cast
|
||||
from typing import Any, Callable, Optional, Type, Union, cast
|
||||
import traceback
|
||||
from .operate import (
|
||||
chunking_by_token_size,
|
||||
@@ -177,13 +177,24 @@ class LightRAG:
|
||||
|
||||
# extension
|
||||
addon_params: dict[str, Any] = field(default_factory=dict)
|
||||
convert_response_to_json_func: callable = convert_response_to_json
|
||||
convert_response_to_json_func: Callable[[str], dict[str, Any]] = convert_response_to_json
|
||||
|
||||
# Add new field for document status storage type
|
||||
doc_status_storage: str = field(default="JsonDocStatusStorage")
|
||||
|
||||
# Custom Chunking Function
|
||||
chunking_func: callable = chunking_by_token_size
|
||||
chunking_func: Callable[
|
||||
[
|
||||
str,
|
||||
Optional[str],
|
||||
bool,
|
||||
int,
|
||||
int,
|
||||
str,
|
||||
],
|
||||
list[dict[str, Any]],
|
||||
] = chunking_by_token_size
|
||||
|
||||
chunking_func_kwargs: dict = field(default_factory=dict)
|
||||
|
||||
def __post_init__(self):
|
||||
@@ -538,8 +549,6 @@ class LightRAG:
|
||||
return
|
||||
|
||||
full_docs_ids = await self.full_docs.get_by_ids(to_process_doc_keys)
|
||||
new_docs = {}
|
||||
if full_docs_ids:
|
||||
new_docs = {doc["id"]: doc for doc in full_docs_ids or []}
|
||||
|
||||
if not new_docs:
|
||||
|
@@ -36,12 +36,11 @@ import time
|
||||
|
||||
def chunking_by_token_size(
|
||||
content: str,
|
||||
split_by_character=None,
|
||||
split_by_character_only=False,
|
||||
overlap_token_size=128,
|
||||
max_token_size=1024,
|
||||
tiktoken_model="gpt-4o",
|
||||
**kwargs,
|
||||
split_by_character: Union[str, None]=None,
|
||||
split_by_character_only: bool =False,
|
||||
overlap_token_size: int =128,
|
||||
max_token_size: int =1024,
|
||||
tiktoken_model: str="gpt-4o"
|
||||
) -> list[dict[str, Any]]:
|
||||
tokens = encode_string_by_tiktoken(content, model_name=tiktoken_model)
|
||||
results: list[dict[str, Any]] = []
|
||||
|
@@ -98,7 +98,7 @@ def locate_json_string_body_from_string(content: str) -> Union[str, None]:
|
||||
return None
|
||||
|
||||
|
||||
def convert_response_to_json(response: str) -> dict:
|
||||
def convert_response_to_json(response: str) -> dict[str, Any]:
|
||||
json_str = locate_json_string_body_from_string(response)
|
||||
assert json_str is not None, f"Unable to parse JSON from response: {response}"
|
||||
try:
|
||||
|
Reference in New Issue
Block a user