Update token length

This commit is contained in:
tackhwa
2024-10-26 02:42:40 +08:00
committed by GitHub
parent d9054c6e4f
commit 5bfd107f5e

View File

@@ -268,7 +268,7 @@ async def hf_model_if_cache(
).to("cuda") ).to("cuda")
inputs = {k: v.to(hf_model.device) for k, v in input_ids.items()} inputs = {k: v.to(hf_model.device) for k, v in input_ids.items()}
output = hf_model.generate( output = hf_model.generate(
**input_ids, max_new_tokens=200, num_return_sequences=1, early_stopping=True **input_ids, max_new_tokens=512, num_return_sequences=1, early_stopping=True
) )
response_text = hf_tokenizer.decode(output[0][len(inputs["input_ids"][0]):], skip_special_tokens=True) response_text = hf_tokenizer.decode(output[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)
if hashing_kv is not None: if hashing_kv is not None: