Merge pull request #435 from davidleon/fix/unicode_escape
fix unicode escape. for the case of "/utils" in the response.
This commit is contained in:
@@ -29,6 +29,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|||||||
from .utils import (
|
from .utils import (
|
||||||
wrap_embedding_func_with_attrs,
|
wrap_embedding_func_with_attrs,
|
||||||
locate_json_string_body_from_string,
|
locate_json_string_body_from_string,
|
||||||
|
safe_unicode_decode,
|
||||||
)
|
)
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
@@ -85,14 +86,14 @@ async def openai_complete_if_cache(
|
|||||||
if content is None:
|
if content is None:
|
||||||
continue
|
continue
|
||||||
if r"\u" in content:
|
if r"\u" in content:
|
||||||
content = content.encode("utf-8").decode("unicode_escape")
|
content = safe_unicode_decode(content.encode("utf-8"))
|
||||||
yield content
|
yield content
|
||||||
|
|
||||||
return inner()
|
return inner()
|
||||||
else:
|
else:
|
||||||
content = response.choices[0].message.content
|
content = response.choices[0].message.content
|
||||||
if r"\u" in content:
|
if r"\u" in content:
|
||||||
content = content.encode("utf-8").decode("unicode_escape")
|
content = safe_unicode_decode(content.encode("utf-8"))
|
||||||
return content
|
return content
|
||||||
|
|
||||||
|
|
||||||
|
@@ -507,3 +507,20 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
|
|||||||
}
|
}
|
||||||
|
|
||||||
await hashing_kv.upsert({cache_data.mode: mode_cache})
|
await hashing_kv.upsert({cache_data.mode: mode_cache})
|
||||||
|
|
||||||
|
|
||||||
|
def safe_unicode_decode(content):
|
||||||
|
# Regular expression to find all Unicode escape sequences of the form \uXXXX
|
||||||
|
unicode_escape_pattern = re.compile(r"\\u([0-9a-fA-F]{4})")
|
||||||
|
|
||||||
|
# Function to replace the Unicode escape with the actual character
|
||||||
|
def replace_unicode_escape(match):
|
||||||
|
# Convert the matched hexadecimal value into the actual Unicode character
|
||||||
|
return chr(int(match.group(1), 16))
|
||||||
|
|
||||||
|
# Perform the substitution
|
||||||
|
decoded_content = unicode_escape_pattern.sub(
|
||||||
|
replace_unicode_escape, content.decode("utf-8")
|
||||||
|
)
|
||||||
|
|
||||||
|
return decoded_content
|
||||||
|
Reference in New Issue
Block a user