feat: trimming the model’s reasoning

This commit is contained in:
ultrageopro
2025-02-06 22:56:17 +03:00
parent 9db1db2b38
commit 19ee3d109c
3 changed files with 55 additions and 2 deletions

View File

@@ -66,6 +66,7 @@ from lightrag.exceptions import (
RateLimitError,
APITimeoutError,
)
from lightrag.utils import extract_reasoning
import numpy as np
from typing import Union
@@ -85,6 +86,7 @@ async def ollama_model_if_cache(
**kwargs,
) -> Union[str, AsyncIterator[str]]:
stream = True if kwargs.get("stream") else False
reasoning_tag = kwargs.pop("reasoning_tag", None)
kwargs.pop("max_tokens", None)
# kwargs.pop("response_format", None) # allow json
host = kwargs.pop("host", None)
@@ -105,7 +107,7 @@ async def ollama_model_if_cache(
response = await ollama_client.chat(model=model, messages=messages, **kwargs)
if stream:
"""cannot cache stream response"""
"""cannot cache stream response and process reasoning"""
async def inner():
async for chunk in response:
@@ -113,7 +115,19 @@ async def ollama_model_if_cache(
return inner()
else:
return response["message"]["content"]
model_response = response["message"]["content"]
"""
If the model also wraps its thoughts in a specific tag,
this information is not needed for the final
response and can simply be trimmed.
"""
return (
model_response
if reasoning_tag is None
else extract_reasoning(model_response, reasoning_tag).response_content
)
async def ollama_model_complete(