Merge pull request #435 from davidleon/fix/unicode_escape

fix unicode escape. for the case of "/utils" in the response.
This commit is contained in:
zrguo
2024-12-09 20:54:58 +08:00
committed by GitHub
2 changed files with 20 additions and 2 deletions

View File

@@ -29,6 +29,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
from .utils import ( from .utils import (
wrap_embedding_func_with_attrs, wrap_embedding_func_with_attrs,
locate_json_string_body_from_string, locate_json_string_body_from_string,
safe_unicode_decode,
) )
import sys import sys
@@ -85,14 +86,14 @@ async def openai_complete_if_cache(
if content is None: if content is None:
continue continue
if r"\u" in content: if r"\u" in content:
content = content.encode("utf-8").decode("unicode_escape") content = safe_unicode_decode(content.encode("utf-8"))
yield content yield content
return inner() return inner()
else: else:
content = response.choices[0].message.content content = response.choices[0].message.content
if r"\u" in content: if r"\u" in content:
content = content.encode("utf-8").decode("unicode_escape") content = safe_unicode_decode(content.encode("utf-8"))
return content return content

View File

@@ -507,3 +507,20 @@ async def save_to_cache(hashing_kv, cache_data: CacheData):
} }
await hashing_kv.upsert({cache_data.mode: mode_cache}) await hashing_kv.upsert({cache_data.mode: mode_cache})
def safe_unicode_decode(content):
# Regular expression to find all Unicode escape sequences of the form \uXXXX
unicode_escape_pattern = re.compile(r"\\u([0-9a-fA-F]{4})")
# Function to replace the Unicode escape with the actual character
def replace_unicode_escape(match):
# Convert the matched hexadecimal value into the actual Unicode character
return chr(int(match.group(1), 16))
# Perform the substitution
decoded_content = unicode_escape_pattern.sub(
replace_unicode_escape, content.decode("utf-8")
)
return decoded_content