Merge pull request #138 from tackhwa/main

[FIX] fix hf output bug (current output contain user prompt which cause logical error in entity extraction phase)
2024-10-26 14:10:14 +08:00
parent e07b9f0530 87f8b7dba1
commit a7811ad18c
1 changed files with 3 additions and 2 deletions
--- a/lightrag/llm.py
+++ b/lightrag/llm.py
@@ -282,10 +282,11 @@ async def hf_model_if_cache(
    input_ids = hf_tokenizer(
        input_prompt, return_tensors="pt", padding=True, truncation=True
    ).to("cuda")
+    inputs = {k: v.to(hf_model.device) for k, v in input_ids.items()}
    output = hf_model.generate(
-        **input_ids, max_new_tokens=200, num_return_sequences=1, early_stopping=True
+        **input_ids, max_new_tokens=512, num_return_sequences=1, early_stopping=True
    )
-    response_text = hf_tokenizer.decode(output[0], skip_special_tokens=True)
+    response_text = hf_tokenizer.decode(output[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)
    if hashing_kv is not None:
        await hashing_kv.upsert({args_hash: {"return": response_text, "model": model}})
    return response_text