Merge pull request #435 from davidleon/fix/unicode_escape
fix unicode escape. for the case of "/utils" in the response.
This commit is contained in:
@@ -29,6 +29,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
from .utils import (
|
||||
wrap_embedding_func_with_attrs,
|
||||
locate_json_string_body_from_string,
|
||||
safe_unicode_decode,
|
||||
)
|
||||
|
||||
import sys
|
||||
@@ -85,14 +86,14 @@ async def openai_complete_if_cache(
|
||||
if content is None:
|
||||
continue
|
||||
if r"\u" in content:
|
||||
content = content.encode("utf-8").decode("unicode_escape")
|
||||
content = safe_unicode_decode(content.encode("utf-8"))
|
||||
yield content
|
||||
|
||||
return inner()
|
||||
else:
|
||||
content = response.choices[0].message.content
|
||||
if r"\u" in content:
|
||||
content = content.encode("utf-8").decode("unicode_escape")
|
||||
content = safe_unicode_decode(content.encode("utf-8"))
|
||||
return content
|
||||
|
||||
|
||||
|
@@ -507,3 +507,20 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
|
||||
}
|
||||
|
||||
await hashing_kv.upsert({cache_data.mode: mode_cache})
|
||||
|
||||
|
||||
def safe_unicode_decode(content):
|
||||
# Regular expression to find all Unicode escape sequences of the form \uXXXX
|
||||
unicode_escape_pattern = re.compile(r"\\u([0-9a-fA-F]{4})")
|
||||
|
||||
# Function to replace the Unicode escape with the actual character
|
||||
def replace_unicode_escape(match):
|
||||
# Convert the matched hexadecimal value into the actual Unicode character
|
||||
return chr(int(match.group(1), 16))
|
||||
|
||||
# Perform the substitution
|
||||
decoded_content = unicode_escape_pattern.sub(
|
||||
replace_unicode_escape, content.decode("utf-8")
|
||||
)
|
||||
|
||||
return decoded_content
|
||||
|
Reference in New Issue
Block a user