fix(#5878): preserve reasoning_content from DeepSeek thinking mode

Extract reasoning_content from litellm response and store it on the
LLM instance so that executors can propagate it into conversation
history as required by the DeepSeek API.

Changes:
- LLM._handle_non_streaming_response: extract reasoning_content from
  the response message and store it as self.reasoning_content
- LLM.call: reset reasoning_content at the start of each call
- format_message_for_llm: accept optional reasoning_content param;
  include it in assistant messages only
- LLMMessage TypedDict: add reasoning_content field
- CrewAgentExecutor: pass reasoning_content through _append_message
  for both sync and async loops (ReAct + native tools)
- AgentExecutor (experimental): same propagation in
  _append_message_to_state for native tools path

Tests: 13 new tests covering LLM extraction, format_message_for_llm,
and executor integration.

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2026-05-21 05:49:01 +00:00
parent 418afd29e7
commit d93040efa7
6 changed files with 343 additions and 17 deletions

View File

@@ -422,7 +422,10 @@ class CrewAgentExecutor(BaseAgentExecutor):
)
self._invoke_step_callback(formatted_answer)
self._append_message(formatted_answer.text)
self._append_message(
formatted_answer.text,
reasoning_content=self._get_llm_reasoning_content(),
)
except OutputParserError as e:
formatted_answer = handle_output_parser_exception( # type: ignore[assignment]
@@ -525,8 +528,9 @@ class CrewAgentExecutor(BaseAgentExecutor):
output=answer,
text=answer,
)
reasoning = self._get_llm_reasoning_content()
self._invoke_step_callback(formatted_answer)
self._append_message(answer)
self._append_message(answer, reasoning_content=reasoning)
self._show_logs(formatted_answer)
return formatted_answer
@@ -537,8 +541,9 @@ class CrewAgentExecutor(BaseAgentExecutor):
output=answer,
text=output_json,
)
reasoning = self._get_llm_reasoning_content()
self._invoke_step_callback(formatted_answer)
self._append_message(output_json)
self._append_message(output_json, reasoning_content=reasoning)
self._show_logs(formatted_answer)
return formatted_answer
@@ -547,8 +552,9 @@ class CrewAgentExecutor(BaseAgentExecutor):
output=str(answer),
text=str(answer),
)
reasoning = self._get_llm_reasoning_content()
self._invoke_step_callback(formatted_answer)
self._append_message(str(answer))
self._append_message(str(answer), reasoning_content=reasoning)
self._show_logs(formatted_answer)
return formatted_answer
@@ -1234,7 +1240,10 @@ class CrewAgentExecutor(BaseAgentExecutor):
)
await self._ainvoke_step_callback(formatted_answer)
self._append_message(formatted_answer.text)
self._append_message(
formatted_answer.text,
reasoning_content=self._get_llm_reasoning_content(),
)
except OutputParserError as e:
formatted_answer = handle_output_parser_exception( # type: ignore[assignment]
@@ -1336,8 +1345,9 @@ class CrewAgentExecutor(BaseAgentExecutor):
output=answer,
text=answer,
)
reasoning = self._get_llm_reasoning_content()
await self._ainvoke_step_callback(formatted_answer)
self._append_message(answer)
self._append_message(answer, reasoning_content=reasoning)
self._show_logs(formatted_answer)
return formatted_answer
@@ -1348,8 +1358,9 @@ class CrewAgentExecutor(BaseAgentExecutor):
output=answer,
text=output_json,
)
reasoning = self._get_llm_reasoning_content()
await self._ainvoke_step_callback(formatted_answer)
self._append_message(output_json)
self._append_message(output_json, reasoning_content=reasoning)
self._show_logs(formatted_answer)
return formatted_answer
@@ -1358,8 +1369,9 @@ class CrewAgentExecutor(BaseAgentExecutor):
output=str(answer),
text=str(answer),
)
reasoning = self._get_llm_reasoning_content()
await self._ainvoke_step_callback(formatted_answer)
self._append_message(str(answer))
self._append_message(str(answer), reasoning_content=reasoning)
self._show_logs(formatted_answer)
return formatted_answer
@@ -1473,16 +1485,28 @@ class CrewAgentExecutor(BaseAgentExecutor):
if inspect.iscoroutine(cb_result):
await cb_result
def _get_llm_reasoning_content(self) -> str | None:
"""Return reasoning_content from the last LLM response, if any."""
return getattr(self.llm, "reasoning_content", None)
def _append_message(
self, text: str, role: Literal["user", "assistant", "system"] = "assistant"
self,
text: str,
role: Literal["user", "assistant", "system"] = "assistant",
reasoning_content: str | None = None,
) -> None:
"""Add message to conversation history.
Args:
text: Message content.
role: Message role (default: assistant).
reasoning_content: Optional reasoning content from the LLM response.
"""
self.messages.append(format_message_for_llm(text, role=role))
self.messages.append(
format_message_for_llm(
text, role=role, reasoning_content=reasoning_content
)
)
def _show_start_logs(self) -> None:
"""Emit agent start event."""

View File

@@ -1330,6 +1330,8 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor):
self.state.pending_tool_calls = list(answer)
return "native_tool_calls"
reasoning = self._get_llm_reasoning_content()
if isinstance(answer, BaseModel):
self.state.current_answer = AgentFinish(
thought="",
@@ -1337,7 +1339,9 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor):
text=answer.model_dump_json(),
)
self._invoke_step_callback(self.state.current_answer)
self._append_message_to_state(answer.model_dump_json())
self._append_message_to_state(
answer.model_dump_json(), reasoning_content=reasoning
)
return self._route_finish_with_todos("native_finished")
# Text response - this is the final answer
@@ -1348,7 +1352,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor):
text=answer,
)
self._invoke_step_callback(self.state.current_answer)
self._append_message_to_state(answer)
self._append_message_to_state(answer, reasoning_content=reasoning)
return self._route_finish_with_todos("native_finished")
@@ -1359,7 +1363,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor):
text=str(answer),
)
self._invoke_step_callback(self.state.current_answer)
self._append_message_to_state(str(answer))
self._append_message_to_state(str(answer), reasoning_content=reasoning)
return self._route_finish_with_todos("native_finished")
@@ -2813,16 +2817,28 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor):
color="red",
)
def _get_llm_reasoning_content(self) -> str | None:
"""Return reasoning_content from the last LLM response, if any."""
return getattr(self.llm, "reasoning_content", None)
def _append_message_to_state(
self, text: str, role: Literal["user", "assistant", "system"] = "assistant"
self,
text: str,
role: Literal["user", "assistant", "system"] = "assistant",
reasoning_content: str | None = None,
) -> None:
"""Add message to state conversation history.
Args:
text: Message content.
role: Message role (default: assistant).
reasoning_content: Optional reasoning content from the LLM response.
"""
self.state.messages.append(format_message_for_llm(text, role=role))
self.state.messages.append(
format_message_for_llm(
text, role=role, reasoning_content=reasoning_content
)
)
def _show_start_logs(self) -> None:
"""Emit agent start event."""

View File

@@ -1232,6 +1232,15 @@ class LLM(BaseLLM):
0
].message
text_response = response_message.content or ""
# Store reasoning_content for models that return it (e.g. DeepSeek thinking mode)
self.reasoning_content = getattr(
response_message, "reasoning_content", None
) or (
response_message.get("reasoning_content")
if hasattr(response_message, "get")
else None
)
# --- 3) Handle callbacks with usage info
if callbacks and len(callbacks) > 0:
for callback in callbacks:
@@ -1742,6 +1751,7 @@ class LLM(BaseLLM):
ValueError: If response format is not supported
LLMContextLengthExceededError: If input exceeds model's context limit
"""
self.reasoning_content: str | None = None
with llm_call_context() as call_id:
crewai_event_bus.emit(
self,

View File

@@ -347,20 +347,28 @@ def handle_max_iterations_exceeded(
def format_message_for_llm(
prompt: str, role: Literal["user", "assistant", "system"] = "user"
prompt: str,
role: Literal["user", "assistant", "system"] = "user",
reasoning_content: str | None = None,
) -> LLMMessage:
"""Format a message for the LLM.
Args:
prompt: The message content.
role: The role of the message sender, either 'user' or 'assistant'.
reasoning_content: Optional reasoning content for assistant messages
(e.g. from DeepSeek thinking mode). Only included when role is
'assistant' and the value is non-empty.
Returns:
A dictionary with 'role' and 'content' keys.
"""
prompt = prompt.rstrip()
return {"role": role, "content": prompt}
msg: LLMMessage = {"role": role, "content": prompt}
if reasoning_content and role == "assistant":
msg["reasoning_content"] = reasoning_content
return msg
def format_answer(answer: str) -> AgentAction | AgentFinish:

View File

@@ -27,4 +27,5 @@ class LLMMessage(TypedDict):
name: NotRequired[str]
tool_calls: NotRequired[list[dict[str, Any]]]
raw_tool_call_parts: NotRequired[list[Any]]
reasoning_content: NotRequired[str | None]
files: NotRequired[dict[str, FileInput]]

View File

@@ -0,0 +1,267 @@
"""Tests for reasoning_content support (DeepSeek thinking mode).
Verifies that reasoning_content from LLM responses is:
1. Extracted and stored by LLM.call()
2. Propagated into assistant messages by the executor
3. Omitted when the model does not return it
"""
from __future__ import annotations
import warnings
from typing import Any
from unittest.mock import MagicMock, patch
import pytest
from litellm.types.utils import Choices, Message, ModelResponse
from crewai.llm import LLM
from crewai.utilities.agent_utils import format_message_for_llm
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_response(content: str, reasoning_content: str | None = None) -> ModelResponse:
"""Build a litellm ModelResponse, optionally with reasoning_content."""
msg_kwargs: dict[str, Any] = {"content": content, "role": "assistant"}
if reasoning_content is not None:
msg_kwargs["reasoning_content"] = reasoning_content
message = Message(**msg_kwargs)
choice = Choices(message=message, index=0, finish_reason="stop")
return ModelResponse(choices=[choice])
# ---------------------------------------------------------------------------
# LLM.call tests
# ---------------------------------------------------------------------------
class TestLLMReasoningContent:
"""LLM.call should extract and store reasoning_content."""
@patch("crewai.llm.litellm.completion")
def test_stores_reasoning_content(self, mock_completion: MagicMock) -> None:
mock_completion.return_value = _make_response(
content="Paris",
reasoning_content="The user asked about the capital of France.",
)
llm = LLM(model="deepseek/deepseek-reasoner", is_litellm=True)
result = llm.call(
[{"role": "user", "content": "What is the capital of France?"}]
)
assert result == "Paris"
assert llm.reasoning_content == "The user asked about the capital of France."
@patch("crewai.llm.litellm.completion")
def test_none_when_absent(self, mock_completion: MagicMock) -> None:
mock_completion.return_value = _make_response(content="Hello!")
llm = LLM(model="gpt-4o", is_litellm=True)
result = llm.call([{"role": "user", "content": "Hi"}])
assert result == "Hello!"
assert llm.reasoning_content is None
@patch("crewai.llm.litellm.completion")
def test_resets_between_calls(self, mock_completion: MagicMock) -> None:
mock_completion.return_value = _make_response(
content="first", reasoning_content="thinking1"
)
llm = LLM(model="deepseek/deepseek-reasoner", is_litellm=True)
llm.call([{"role": "user", "content": "q1"}])
assert llm.reasoning_content == "thinking1"
mock_completion.return_value = _make_response(content="second")
llm.call([{"role": "user", "content": "q2"}])
assert llm.reasoning_content is None
# ---------------------------------------------------------------------------
# format_message_for_llm tests
# ---------------------------------------------------------------------------
class TestFormatMessageReasoningContent:
"""format_message_for_llm should handle reasoning_content correctly."""
def test_includes_reasoning_content_for_assistant(self) -> None:
msg = format_message_for_llm(
"Hello", role="assistant", reasoning_content="thinking..."
)
assert msg == {
"role": "assistant",
"content": "Hello",
"reasoning_content": "thinking...",
}
def test_omits_reasoning_content_for_user(self) -> None:
msg = format_message_for_llm(
"Hello", role="user", reasoning_content="thinking..."
)
assert msg == {"role": "user", "content": "Hello"}
def test_omits_reasoning_content_when_none(self) -> None:
msg = format_message_for_llm(
"Hello", role="assistant", reasoning_content=None
)
assert msg == {"role": "assistant", "content": "Hello"}
assert "reasoning_content" not in msg
def test_omits_reasoning_content_when_empty_string(self) -> None:
msg = format_message_for_llm(
"Hello", role="assistant", reasoning_content=""
)
assert msg == {"role": "assistant", "content": "Hello"}
assert "reasoning_content" not in msg
# ---------------------------------------------------------------------------
# CrewAgentExecutor unit tests
# ---------------------------------------------------------------------------
def _build_crew_executor(llm: Any) -> Any:
"""Build a minimal CrewAgentExecutor using model_construct to skip validation."""
from crewai.agents.crew_agent_executor import CrewAgentExecutor
from crewai.utilities.agent_utils import format_message_for_llm
agent = MagicMock()
agent.role = "test"
agent.verbose = False
agent.id = "agent-1"
agent.key = "agent-key"
agent.security_config = MagicMock()
task = MagicMock()
task.name = "test task"
task.description = "test task"
task.id = "task-1"
crew = MagicMock()
crew.verbose = False
crew._train = False
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
executor = CrewAgentExecutor.model_construct(
llm=llm,
task=task,
crew=crew,
agent=agent,
messages=[
format_message_for_llm("You are helpful.", role="system"),
],
iterations=0,
max_iter=3,
tools=[],
original_tools=[],
tools_names="",
stop=[],
tools_description="",
tools_handler=None,
step_callback=None,
function_calling_llm=None,
respect_context_window=False,
request_within_rpm_limit=None,
callbacks=[],
response_model=None,
ask_for_human_input=False,
log_error_after=3,
before_llm_call_hooks=[],
after_llm_call_hooks=[],
)
return executor
class TestCrewExecutorReasoningContent:
"""CrewAgentExecutor should propagate reasoning_content to message history."""
def test_get_llm_reasoning_content(self) -> None:
llm = MagicMock()
llm.reasoning_content = "some reasoning"
executor = _build_crew_executor(llm)
assert executor._get_llm_reasoning_content() == "some reasoning"
def test_get_llm_reasoning_content_missing(self) -> None:
llm = MagicMock(
spec=["call", "supports_stop_words", "supports_function_calling", "stop"]
)
executor = _build_crew_executor(llm)
assert executor._get_llm_reasoning_content() is None
def test_append_message_includes_reasoning_content(self) -> None:
llm = MagicMock()
executor = _build_crew_executor(llm)
initial_count = len(executor.messages)
executor._append_message(
"hello", role="assistant", reasoning_content="thinking..."
)
new_msg = executor.messages[initial_count]
assert new_msg["role"] == "assistant"
assert new_msg["content"] == "hello"
assert new_msg["reasoning_content"] == "thinking..."
def test_append_message_omits_reasoning_content_when_none(self) -> None:
llm = MagicMock()
executor = _build_crew_executor(llm)
initial_count = len(executor.messages)
executor._append_message("hello", role="assistant", reasoning_content=None)
new_msg = executor.messages[initial_count]
assert new_msg["role"] == "assistant"
assert new_msg["content"] == "hello"
assert "reasoning_content" not in new_msg
@patch("crewai.llm.litellm.completion")
def test_invoke_loop_preserves_reasoning_content(
self, mock_completion: MagicMock
) -> None:
"""The ReAct invoke loop should include reasoning_content in assistant messages."""
llm = LLM(model="deepseek/deepseek-reasoner", is_litellm=True)
mock_completion.return_value = _make_response(
content="Thought: I need to think about this.\nFinal Answer: 42",
reasoning_content="Let me reason step by step...",
)
executor = _build_crew_executor(llm)
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
executor.invoke(
{"input": "What is the answer?", "tool_names": "", "tools": ""}
)
assistant_msgs = [
m for m in executor.messages if m["role"] == "assistant"
]
assert len(assistant_msgs) >= 1
assert (
assistant_msgs[0].get("reasoning_content")
== "Let me reason step by step..."
)
@patch("crewai.llm.litellm.completion")
def test_invoke_loop_no_reasoning_content_for_normal_models(
self, mock_completion: MagicMock
) -> None:
"""Assistant messages should NOT have reasoning_content for normal models."""
llm = LLM(model="gpt-4o", is_litellm=True)
mock_completion.return_value = _make_response(
content="Thought: Simple question.\nFinal Answer: Hello!",
)
executor = _build_crew_executor(llm)
with warnings.catch_warnings():
warnings.simplefilter("ignore", DeprecationWarning)
executor.invoke(
{"input": "Say hi", "tool_names": "", "tools": ""}
)
assistant_msgs = [
m for m in executor.messages if m["role"] == "assistant"
]
assert len(assistant_msgs) >= 1
assert "reasoning_content" not in assistant_msgs[0]