From d93040efa71b86aeb046236a96401696711d5948 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 21 May 2026 05:49:01 +0000 Subject: [PATCH] fix(#5878): preserve reasoning_content from DeepSeek thinking mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract reasoning_content from litellm response and store it on the LLM instance so that executors can propagate it into conversation history as required by the DeepSeek API. Changes: - LLM._handle_non_streaming_response: extract reasoning_content from the response message and store it as self.reasoning_content - LLM.call: reset reasoning_content at the start of each call - format_message_for_llm: accept optional reasoning_content param; include it in assistant messages only - LLMMessage TypedDict: add reasoning_content field - CrewAgentExecutor: pass reasoning_content through _append_message for both sync and async loops (ReAct + native tools) - AgentExecutor (experimental): same propagation in _append_message_to_state for native tools path Tests: 13 new tests covering LLM extraction, format_message_for_llm, and executor integration. Co-Authored-By: João --- .../src/crewai/agents/crew_agent_executor.py | 44 ++- .../src/crewai/experimental/agent_executor.py | 26 +- lib/crewai/src/crewai/llm.py | 10 + .../src/crewai/utilities/agent_utils.py | 12 +- lib/crewai/src/crewai/utilities/types.py | 1 + .../llms/litellm/test_reasoning_content.py | 267 ++++++++++++++++++ 6 files changed, 343 insertions(+), 17 deletions(-) create mode 100644 lib/crewai/tests/llms/litellm/test_reasoning_content.py diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py index fce80ad7a..0e0348484 100644 --- a/lib/crewai/src/crewai/agents/crew_agent_executor.py +++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py @@ -422,7 +422,10 @@ class CrewAgentExecutor(BaseAgentExecutor): ) self._invoke_step_callback(formatted_answer) - self._append_message(formatted_answer.text) + self._append_message( + formatted_answer.text, + reasoning_content=self._get_llm_reasoning_content(), + ) except OutputParserError as e: formatted_answer = handle_output_parser_exception( # type: ignore[assignment] @@ -525,8 +528,9 @@ class CrewAgentExecutor(BaseAgentExecutor): output=answer, text=answer, ) + reasoning = self._get_llm_reasoning_content() self._invoke_step_callback(formatted_answer) - self._append_message(answer) + self._append_message(answer, reasoning_content=reasoning) self._show_logs(formatted_answer) return formatted_answer @@ -537,8 +541,9 @@ class CrewAgentExecutor(BaseAgentExecutor): output=answer, text=output_json, ) + reasoning = self._get_llm_reasoning_content() self._invoke_step_callback(formatted_answer) - self._append_message(output_json) + self._append_message(output_json, reasoning_content=reasoning) self._show_logs(formatted_answer) return formatted_answer @@ -547,8 +552,9 @@ class CrewAgentExecutor(BaseAgentExecutor): output=str(answer), text=str(answer), ) + reasoning = self._get_llm_reasoning_content() self._invoke_step_callback(formatted_answer) - self._append_message(str(answer)) + self._append_message(str(answer), reasoning_content=reasoning) self._show_logs(formatted_answer) return formatted_answer @@ -1234,7 +1240,10 @@ class CrewAgentExecutor(BaseAgentExecutor): ) await self._ainvoke_step_callback(formatted_answer) - self._append_message(formatted_answer.text) + self._append_message( + formatted_answer.text, + reasoning_content=self._get_llm_reasoning_content(), + ) except OutputParserError as e: formatted_answer = handle_output_parser_exception( # type: ignore[assignment] @@ -1336,8 +1345,9 @@ class CrewAgentExecutor(BaseAgentExecutor): output=answer, text=answer, ) + reasoning = self._get_llm_reasoning_content() await self._ainvoke_step_callback(formatted_answer) - self._append_message(answer) + self._append_message(answer, reasoning_content=reasoning) self._show_logs(formatted_answer) return formatted_answer @@ -1348,8 +1358,9 @@ class CrewAgentExecutor(BaseAgentExecutor): output=answer, text=output_json, ) + reasoning = self._get_llm_reasoning_content() await self._ainvoke_step_callback(formatted_answer) - self._append_message(output_json) + self._append_message(output_json, reasoning_content=reasoning) self._show_logs(formatted_answer) return formatted_answer @@ -1358,8 +1369,9 @@ class CrewAgentExecutor(BaseAgentExecutor): output=str(answer), text=str(answer), ) + reasoning = self._get_llm_reasoning_content() await self._ainvoke_step_callback(formatted_answer) - self._append_message(str(answer)) + self._append_message(str(answer), reasoning_content=reasoning) self._show_logs(formatted_answer) return formatted_answer @@ -1473,16 +1485,28 @@ class CrewAgentExecutor(BaseAgentExecutor): if inspect.iscoroutine(cb_result): await cb_result + def _get_llm_reasoning_content(self) -> str | None: + """Return reasoning_content from the last LLM response, if any.""" + return getattr(self.llm, "reasoning_content", None) + def _append_message( - self, text: str, role: Literal["user", "assistant", "system"] = "assistant" + self, + text: str, + role: Literal["user", "assistant", "system"] = "assistant", + reasoning_content: str | None = None, ) -> None: """Add message to conversation history. Args: text: Message content. role: Message role (default: assistant). + reasoning_content: Optional reasoning content from the LLM response. """ - self.messages.append(format_message_for_llm(text, role=role)) + self.messages.append( + format_message_for_llm( + text, role=role, reasoning_content=reasoning_content + ) + ) def _show_start_logs(self) -> None: """Emit agent start event.""" diff --git a/lib/crewai/src/crewai/experimental/agent_executor.py b/lib/crewai/src/crewai/experimental/agent_executor.py index 57e853666..b6d35ff5b 100644 --- a/lib/crewai/src/crewai/experimental/agent_executor.py +++ b/lib/crewai/src/crewai/experimental/agent_executor.py @@ -1330,6 +1330,8 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): self.state.pending_tool_calls = list(answer) return "native_tool_calls" + reasoning = self._get_llm_reasoning_content() + if isinstance(answer, BaseModel): self.state.current_answer = AgentFinish( thought="", @@ -1337,7 +1339,9 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): text=answer.model_dump_json(), ) self._invoke_step_callback(self.state.current_answer) - self._append_message_to_state(answer.model_dump_json()) + self._append_message_to_state( + answer.model_dump_json(), reasoning_content=reasoning + ) return self._route_finish_with_todos("native_finished") # Text response - this is the final answer @@ -1348,7 +1352,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): text=answer, ) self._invoke_step_callback(self.state.current_answer) - self._append_message_to_state(answer) + self._append_message_to_state(answer, reasoning_content=reasoning) return self._route_finish_with_todos("native_finished") @@ -1359,7 +1363,7 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): text=str(answer), ) self._invoke_step_callback(self.state.current_answer) - self._append_message_to_state(str(answer)) + self._append_message_to_state(str(answer), reasoning_content=reasoning) return self._route_finish_with_todos("native_finished") @@ -2813,16 +2817,28 @@ class AgentExecutor(Flow[AgentExecutorState], BaseAgentExecutor): color="red", ) + def _get_llm_reasoning_content(self) -> str | None: + """Return reasoning_content from the last LLM response, if any.""" + return getattr(self.llm, "reasoning_content", None) + def _append_message_to_state( - self, text: str, role: Literal["user", "assistant", "system"] = "assistant" + self, + text: str, + role: Literal["user", "assistant", "system"] = "assistant", + reasoning_content: str | None = None, ) -> None: """Add message to state conversation history. Args: text: Message content. role: Message role (default: assistant). + reasoning_content: Optional reasoning content from the LLM response. """ - self.state.messages.append(format_message_for_llm(text, role=role)) + self.state.messages.append( + format_message_for_llm( + text, role=role, reasoning_content=reasoning_content + ) + ) def _show_start_logs(self) -> None: """Emit agent start event.""" diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index e452dc394..d55fbc223 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -1232,6 +1232,15 @@ class LLM(BaseLLM): 0 ].message text_response = response_message.content or "" + + # Store reasoning_content for models that return it (e.g. DeepSeek thinking mode) + self.reasoning_content = getattr( + response_message, "reasoning_content", None + ) or ( + response_message.get("reasoning_content") + if hasattr(response_message, "get") + else None + ) # --- 3) Handle callbacks with usage info if callbacks and len(callbacks) > 0: for callback in callbacks: @@ -1742,6 +1751,7 @@ class LLM(BaseLLM): ValueError: If response format is not supported LLMContextLengthExceededError: If input exceeds model's context limit """ + self.reasoning_content: str | None = None with llm_call_context() as call_id: crewai_event_bus.emit( self, diff --git a/lib/crewai/src/crewai/utilities/agent_utils.py b/lib/crewai/src/crewai/utilities/agent_utils.py index 3cb72331c..c2a8e200a 100644 --- a/lib/crewai/src/crewai/utilities/agent_utils.py +++ b/lib/crewai/src/crewai/utilities/agent_utils.py @@ -347,20 +347,28 @@ def handle_max_iterations_exceeded( def format_message_for_llm( - prompt: str, role: Literal["user", "assistant", "system"] = "user" + prompt: str, + role: Literal["user", "assistant", "system"] = "user", + reasoning_content: str | None = None, ) -> LLMMessage: """Format a message for the LLM. Args: prompt: The message content. role: The role of the message sender, either 'user' or 'assistant'. + reasoning_content: Optional reasoning content for assistant messages + (e.g. from DeepSeek thinking mode). Only included when role is + 'assistant' and the value is non-empty. Returns: A dictionary with 'role' and 'content' keys. """ prompt = prompt.rstrip() - return {"role": role, "content": prompt} + msg: LLMMessage = {"role": role, "content": prompt} + if reasoning_content and role == "assistant": + msg["reasoning_content"] = reasoning_content + return msg def format_answer(answer: str) -> AgentAction | AgentFinish: diff --git a/lib/crewai/src/crewai/utilities/types.py b/lib/crewai/src/crewai/utilities/types.py index 340f6f751..5867d30f3 100644 --- a/lib/crewai/src/crewai/utilities/types.py +++ b/lib/crewai/src/crewai/utilities/types.py @@ -27,4 +27,5 @@ class LLMMessage(TypedDict): name: NotRequired[str] tool_calls: NotRequired[list[dict[str, Any]]] raw_tool_call_parts: NotRequired[list[Any]] + reasoning_content: NotRequired[str | None] files: NotRequired[dict[str, FileInput]] diff --git a/lib/crewai/tests/llms/litellm/test_reasoning_content.py b/lib/crewai/tests/llms/litellm/test_reasoning_content.py new file mode 100644 index 000000000..3851879b1 --- /dev/null +++ b/lib/crewai/tests/llms/litellm/test_reasoning_content.py @@ -0,0 +1,267 @@ +"""Tests for reasoning_content support (DeepSeek thinking mode). + +Verifies that reasoning_content from LLM responses is: +1. Extracted and stored by LLM.call() +2. Propagated into assistant messages by the executor +3. Omitted when the model does not return it +""" + +from __future__ import annotations + +import warnings +from typing import Any +from unittest.mock import MagicMock, patch + +import pytest +from litellm.types.utils import Choices, Message, ModelResponse + +from crewai.llm import LLM +from crewai.utilities.agent_utils import format_message_for_llm + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_response(content: str, reasoning_content: str | None = None) -> ModelResponse: + """Build a litellm ModelResponse, optionally with reasoning_content.""" + msg_kwargs: dict[str, Any] = {"content": content, "role": "assistant"} + if reasoning_content is not None: + msg_kwargs["reasoning_content"] = reasoning_content + message = Message(**msg_kwargs) + choice = Choices(message=message, index=0, finish_reason="stop") + return ModelResponse(choices=[choice]) + + +# --------------------------------------------------------------------------- +# LLM.call tests +# --------------------------------------------------------------------------- + +class TestLLMReasoningContent: + """LLM.call should extract and store reasoning_content.""" + + @patch("crewai.llm.litellm.completion") + def test_stores_reasoning_content(self, mock_completion: MagicMock) -> None: + mock_completion.return_value = _make_response( + content="Paris", + reasoning_content="The user asked about the capital of France.", + ) + llm = LLM(model="deepseek/deepseek-reasoner", is_litellm=True) + result = llm.call( + [{"role": "user", "content": "What is the capital of France?"}] + ) + + assert result == "Paris" + assert llm.reasoning_content == "The user asked about the capital of France." + + @patch("crewai.llm.litellm.completion") + def test_none_when_absent(self, mock_completion: MagicMock) -> None: + mock_completion.return_value = _make_response(content="Hello!") + llm = LLM(model="gpt-4o", is_litellm=True) + result = llm.call([{"role": "user", "content": "Hi"}]) + + assert result == "Hello!" + assert llm.reasoning_content is None + + @patch("crewai.llm.litellm.completion") + def test_resets_between_calls(self, mock_completion: MagicMock) -> None: + mock_completion.return_value = _make_response( + content="first", reasoning_content="thinking1" + ) + llm = LLM(model="deepseek/deepseek-reasoner", is_litellm=True) + llm.call([{"role": "user", "content": "q1"}]) + assert llm.reasoning_content == "thinking1" + + mock_completion.return_value = _make_response(content="second") + llm.call([{"role": "user", "content": "q2"}]) + assert llm.reasoning_content is None + + +# --------------------------------------------------------------------------- +# format_message_for_llm tests +# --------------------------------------------------------------------------- + +class TestFormatMessageReasoningContent: + """format_message_for_llm should handle reasoning_content correctly.""" + + def test_includes_reasoning_content_for_assistant(self) -> None: + msg = format_message_for_llm( + "Hello", role="assistant", reasoning_content="thinking..." + ) + assert msg == { + "role": "assistant", + "content": "Hello", + "reasoning_content": "thinking...", + } + + def test_omits_reasoning_content_for_user(self) -> None: + msg = format_message_for_llm( + "Hello", role="user", reasoning_content="thinking..." + ) + assert msg == {"role": "user", "content": "Hello"} + + def test_omits_reasoning_content_when_none(self) -> None: + msg = format_message_for_llm( + "Hello", role="assistant", reasoning_content=None + ) + assert msg == {"role": "assistant", "content": "Hello"} + assert "reasoning_content" not in msg + + def test_omits_reasoning_content_when_empty_string(self) -> None: + msg = format_message_for_llm( + "Hello", role="assistant", reasoning_content="" + ) + assert msg == {"role": "assistant", "content": "Hello"} + assert "reasoning_content" not in msg + + +# --------------------------------------------------------------------------- +# CrewAgentExecutor unit tests +# --------------------------------------------------------------------------- + +def _build_crew_executor(llm: Any) -> Any: + """Build a minimal CrewAgentExecutor using model_construct to skip validation.""" + from crewai.agents.crew_agent_executor import CrewAgentExecutor + from crewai.utilities.agent_utils import format_message_for_llm + + agent = MagicMock() + agent.role = "test" + agent.verbose = False + agent.id = "agent-1" + agent.key = "agent-key" + agent.security_config = MagicMock() + + task = MagicMock() + task.name = "test task" + task.description = "test task" + task.id = "task-1" + + crew = MagicMock() + crew.verbose = False + crew._train = False + + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + executor = CrewAgentExecutor.model_construct( + llm=llm, + task=task, + crew=crew, + agent=agent, + messages=[ + format_message_for_llm("You are helpful.", role="system"), + ], + iterations=0, + max_iter=3, + tools=[], + original_tools=[], + tools_names="", + stop=[], + tools_description="", + tools_handler=None, + step_callback=None, + function_calling_llm=None, + respect_context_window=False, + request_within_rpm_limit=None, + callbacks=[], + response_model=None, + ask_for_human_input=False, + log_error_after=3, + before_llm_call_hooks=[], + after_llm_call_hooks=[], + ) + return executor + + +class TestCrewExecutorReasoningContent: + """CrewAgentExecutor should propagate reasoning_content to message history.""" + + def test_get_llm_reasoning_content(self) -> None: + llm = MagicMock() + llm.reasoning_content = "some reasoning" + executor = _build_crew_executor(llm) + assert executor._get_llm_reasoning_content() == "some reasoning" + + def test_get_llm_reasoning_content_missing(self) -> None: + llm = MagicMock( + spec=["call", "supports_stop_words", "supports_function_calling", "stop"] + ) + executor = _build_crew_executor(llm) + assert executor._get_llm_reasoning_content() is None + + def test_append_message_includes_reasoning_content(self) -> None: + llm = MagicMock() + executor = _build_crew_executor(llm) + initial_count = len(executor.messages) + + executor._append_message( + "hello", role="assistant", reasoning_content="thinking..." + ) + + new_msg = executor.messages[initial_count] + assert new_msg["role"] == "assistant" + assert new_msg["content"] == "hello" + assert new_msg["reasoning_content"] == "thinking..." + + def test_append_message_omits_reasoning_content_when_none(self) -> None: + llm = MagicMock() + executor = _build_crew_executor(llm) + initial_count = len(executor.messages) + + executor._append_message("hello", role="assistant", reasoning_content=None) + + new_msg = executor.messages[initial_count] + assert new_msg["role"] == "assistant" + assert new_msg["content"] == "hello" + assert "reasoning_content" not in new_msg + + @patch("crewai.llm.litellm.completion") + def test_invoke_loop_preserves_reasoning_content( + self, mock_completion: MagicMock + ) -> None: + """The ReAct invoke loop should include reasoning_content in assistant messages.""" + llm = LLM(model="deepseek/deepseek-reasoner", is_litellm=True) + + mock_completion.return_value = _make_response( + content="Thought: I need to think about this.\nFinal Answer: 42", + reasoning_content="Let me reason step by step...", + ) + + executor = _build_crew_executor(llm) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + executor.invoke( + {"input": "What is the answer?", "tool_names": "", "tools": ""} + ) + + assistant_msgs = [ + m for m in executor.messages if m["role"] == "assistant" + ] + assert len(assistant_msgs) >= 1 + assert ( + assistant_msgs[0].get("reasoning_content") + == "Let me reason step by step..." + ) + + @patch("crewai.llm.litellm.completion") + def test_invoke_loop_no_reasoning_content_for_normal_models( + self, mock_completion: MagicMock + ) -> None: + """Assistant messages should NOT have reasoning_content for normal models.""" + llm = LLM(model="gpt-4o", is_litellm=True) + + mock_completion.return_value = _make_response( + content="Thought: Simple question.\nFinal Answer: Hello!", + ) + + executor = _build_crew_executor(llm) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + executor.invoke( + {"input": "Say hi", "tool_names": "", "tools": ""} + ) + + assistant_msgs = [ + m for m in executor.messages if m["role"] == "assistant" + ] + assert len(assistant_msgs) >= 1 + assert "reasoning_content" not in assistant_msgs[0]