Fix #5878 : Preserve reasoning_content from DeepSeek thinking mode in conversation history

DeepSeek V4 models return reasoning_content alongside content when in thinking mode. The API requires this field to be passed back in subsequent requests. Previously, LLM.call() discarded reasoning_content and only returned the content string, causing a 400 error on follow-up calls. Changes: - LLM.call(): Extract and store reasoning_content from the response message - CrewAgentExecutor._format_msg(): Accept optional reasoning_content parameter - CrewAgentExecutor._invoke_loop(): Include reasoning_content in assistant messages added to the conversation history Tests added for: - LLM storing reasoning_content from responses - LLM returning None when reasoning_content is absent - LLM resetting reasoning_content between calls - Executor _format_msg including/excluding reasoning_content appropriately - End-to-end invoke loop preserving reasoning_content in message history Co-Authored-By: João <joao@crewai.com>
2026-05-21 17:08:10 +00:00 · 2026-05-21 05:10:38 +00:00
3 changed files with 184 additions and 4 deletions
--- a/src/crewai/agents/crew_agent_executor.py
+++ b/src/crewai/agents/crew_agent_executor.py
@@ -184,7 +184,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                            )
                            self.have_forced_answer = True
                    self.messages.append(
-                        self._format_msg(formatted_answer.text, role="assistant")
+                        self._format_msg(
+                            formatted_answer.text,
+                            role="assistant",
+                            reasoning_content=getattr(
+                                self.llm, "reasoning_content", None
+                            ),
+                        )
                    )

        except OutputParserException as e:
@@ -406,9 +412,17 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
    def _format_answer(self, answer: str) -> Union[AgentAction, AgentFinish]:
        return CrewAgentParser(agent=self.agent).parse(answer)

-    def _format_msg(self, prompt: str, role: str = "user") -> Dict[str, str]:
+    def _format_msg(
+        self,
+        prompt: str,
+        role: str = "user",
+        reasoning_content: str = None,
+    ) -> Dict[str, str]:
        prompt = prompt.rstrip()
-        return {"role": role, "content": prompt}
+        msg: Dict[str, str] = {"role": role, "content": prompt}
+        if reasoning_content and role == "assistant":
+            msg["reasoning_content"] = reasoning_content
+        return msg

    def _handle_human_feedback(self, formatted_answer: AgentFinish) -> AgentFinish:
        """
--- a/src/crewai/llm.py
+++ b/src/crewai/llm.py
@@ -143,6 +143,7 @@ class LLM:
        self.set_env_callbacks()

    def call(self, messages: List[Dict[str, str]], callbacks: List[Any] = []) -> str:
+        self.reasoning_content: Optional[str] = None
        with suppress_warnings():
            if callbacks and len(callbacks) > 0:
                self.set_callbacks(callbacks)
@@ -175,7 +176,13 @@ class LLM:
                params = {k: v for k, v in params.items() if v is not None}

                response = litellm.completion(**params)
-                return response["choices"][0]["message"]["content"]
+
+                message = response["choices"][0]["message"]
+                self.reasoning_content = getattr(
+                    message, "reasoning_content", None
+                ) or message.get("reasoning_content")
+
+                return message["content"]
            except Exception as e:
                if not LLMContextLengthExceededException(
                    str(e)
--- a/tests/llm_test.py
+++ b/tests/llm_test.py
@@ -1,4 +1,7 @@
+from unittest.mock import MagicMock, patch
+
 import pytest
+from litellm.types.utils import Choices, Message, ModelResponse

 from crewai.agents.agent_builder.utilities.base_token_process import TokenProcess
 from crewai.llm import LLM
@@ -28,3 +31,159 @@ def test_llm_callback_replacement():
    assert usage_metrics_1.successful_requests == 1
    assert usage_metrics_2.successful_requests == 1
    assert usage_metrics_1 == calc_handler_1.token_cost_process.get_summary()
+
+
+def _make_response(content, reasoning_content=None):
+    """Build a litellm ModelResponse, optionally with reasoning_content."""
+    msg_kwargs = {"content": content, "role": "assistant"}
+    if reasoning_content is not None:
+        msg_kwargs["reasoning_content"] = reasoning_content
+    message = Message(**msg_kwargs)
+    choice = Choices(message=message, index=0, finish_reason="stop")
+    return ModelResponse(choices=[choice])
+
+
+@patch("crewai.llm.litellm.completion")
+def test_llm_call_stores_reasoning_content(mock_completion):
+    """LLM.call should store reasoning_content from the response."""
+    mock_completion.return_value = _make_response(
+        content="Paris",
+        reasoning_content="The user asked about the capital of France.",
+    )
+    llm = LLM(model="deepseek/deepseek-reasoner")
+    result = llm.call([{"role": "user", "content": "What is the capital of France?"}])
+
+    assert result == "Paris"
+    assert llm.reasoning_content == "The user asked about the capital of France."
+
+
+@patch("crewai.llm.litellm.completion")
+def test_llm_call_no_reasoning_content(mock_completion):
+    """LLM.call should set reasoning_content to None when absent."""
+    mock_completion.return_value = _make_response(content="Hello!")
+    llm = LLM(model="gpt-4o")
+    result = llm.call([{"role": "user", "content": "Hi"}])
+
+    assert result == "Hello!"
+    assert llm.reasoning_content is None
+
+
+@patch("crewai.llm.litellm.completion")
+def test_llm_call_reasoning_content_reset_between_calls(mock_completion):
+    """reasoning_content should be reset on each call."""
+    mock_completion.return_value = _make_response(
+        content="first", reasoning_content="thinking1"
+    )
+    llm = LLM(model="deepseek/deepseek-reasoner")
+    llm.call([{"role": "user", "content": "q1"}])
+    assert llm.reasoning_content == "thinking1"
+
+    # Second call without reasoning_content
+    mock_completion.return_value = _make_response(content="second")
+    llm.call([{"role": "user", "content": "q2"}])
+    assert llm.reasoning_content is None
+
+
+class TestExecutorReasoningContent:
+    """Tests for reasoning_content propagation in CrewAgentExecutor."""
+
+    def _build_executor(self, llm):
+        """Build a minimal CrewAgentExecutor for testing."""
+        from crewai.agents.crew_agent_executor import CrewAgentExecutor
+        from crewai.agents.tools_handler import ToolsHandler
+
+        agent = MagicMock()
+        agent.role = "test"
+        agent.verbose = False
+        agent.id = "agent-1"
+
+        task = MagicMock()
+        task.description = "test task"
+
+        crew = MagicMock()
+        crew.verbose = False
+        crew._train = False
+
+        tools_handler = ToolsHandler()
+
+        executor = CrewAgentExecutor(
+            llm=llm,
+            task=task,
+            crew=crew,
+            agent=agent,
+            prompt={"system": "You are helpful.", "user": "{input}{tool_names}{tools}"},
+            max_iter=3,
+            tools=[],
+            tools_names="",
+            stop_words=["Observation:"],
+            tools_description="",
+            tools_handler=tools_handler,
+        )
+        return executor
+
+    def test_format_msg_includes_reasoning_content(self):
+        """_format_msg should include reasoning_content for assistant messages."""
+        llm = MagicMock()
+        llm.supports_stop_words.return_value = True
+        llm.stop = None
+        executor = self._build_executor(llm)
+
+        msg = executor._format_msg(
+            "Hello", role="assistant", reasoning_content="thinking..."
+        )
+        assert msg == {
+            "role": "assistant",
+            "content": "Hello",
+            "reasoning_content": "thinking...",
+        }
+
+    def test_format_msg_omits_reasoning_content_for_user(self):
+        """_format_msg should not include reasoning_content for user messages."""
+        llm = MagicMock()
+        llm.supports_stop_words.return_value = True
+        llm.stop = None
+        executor = self._build_executor(llm)
+
+        msg = executor._format_msg(
+            "Hello", role="user", reasoning_content="thinking..."
+        )
+        assert msg == {"role": "user", "content": "Hello"}
+
+    def test_format_msg_omits_reasoning_content_when_none(self):
+        """_format_msg should not include reasoning_content key when it is None."""
+        llm = MagicMock()
+        llm.supports_stop_words.return_value = True
+        llm.stop = None
+        executor = self._build_executor(llm)
+
+        msg = executor._format_msg("Hello", role="assistant", reasoning_content=None)
+        assert msg == {"role": "assistant", "content": "Hello"}
+        assert "reasoning_content" not in msg
+
+    @patch("crewai.llm.litellm.completion")
+    def test_invoke_loop_preserves_reasoning_content_in_messages(
+        self, mock_completion
+    ):
+        """The invoke loop should include reasoning_content in assistant messages."""
+        llm = LLM(model="deepseek/deepseek-reasoner")
+
+        # First call returns an intermediate response (not a final answer)
+        # Second call returns the final answer
+        mock_completion.side_effect = [
+            _make_response(
+                content="Thought: I need to think about this.\nFinal Answer: 42",
+                reasoning_content="Let me reason step by step...",
+            ),
+        ]
+
+        executor = self._build_executor(llm)
+        executor.invoke(
+            {"input": "What is the answer?", "tool_names": "", "tools": ""}
+        )
+
+        # Find assistant messages in the message history
+        assistant_msgs = [
+            m for m in executor.messages if m["role"] == "assistant"
+        ]
+        assert len(assistant_msgs) >= 1
+        assert assistant_msgs[0].get("reasoning_content") == "Let me reason step by step..."