From d826b0ab529353641ee38188c7a2a42001b4a4ea Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 21 May 2026 05:10:38 +0000 Subject: [PATCH] Fix #5878: Preserve reasoning_content from DeepSeek thinking mode in conversation history MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DeepSeek V4 models return reasoning_content alongside content when in thinking mode. The API requires this field to be passed back in subsequent requests. Previously, LLM.call() discarded reasoning_content and only returned the content string, causing a 400 error on follow-up calls. Changes: - LLM.call(): Extract and store reasoning_content from the response message - CrewAgentExecutor._format_msg(): Accept optional reasoning_content parameter - CrewAgentExecutor._invoke_loop(): Include reasoning_content in assistant messages added to the conversation history Tests added for: - LLM storing reasoning_content from responses - LLM returning None when reasoning_content is absent - LLM resetting reasoning_content between calls - Executor _format_msg including/excluding reasoning_content appropriately - End-to-end invoke loop preserving reasoning_content in message history Co-Authored-By: João --- src/crewai/agents/crew_agent_executor.py | 20 ++- src/crewai/llm.py | 9 +- tests/llm_test.py | 159 +++++++++++++++++++++++ 3 files changed, 184 insertions(+), 4 deletions(-) diff --git a/src/crewai/agents/crew_agent_executor.py b/src/crewai/agents/crew_agent_executor.py index 813ac8a08..630b134d7 100644 --- a/src/crewai/agents/crew_agent_executor.py +++ b/src/crewai/agents/crew_agent_executor.py @@ -184,7 +184,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin): ) self.have_forced_answer = True self.messages.append( - self._format_msg(formatted_answer.text, role="assistant") + self._format_msg( + formatted_answer.text, + role="assistant", + reasoning_content=getattr( + self.llm, "reasoning_content", None + ), + ) ) except OutputParserException as e: @@ -406,9 +412,17 @@ class CrewAgentExecutor(CrewAgentExecutorMixin): def _format_answer(self, answer: str) -> Union[AgentAction, AgentFinish]: return CrewAgentParser(agent=self.agent).parse(answer) - def _format_msg(self, prompt: str, role: str = "user") -> Dict[str, str]: + def _format_msg( + self, + prompt: str, + role: str = "user", + reasoning_content: str = None, + ) -> Dict[str, str]: prompt = prompt.rstrip() - return {"role": role, "content": prompt} + msg: Dict[str, str] = {"role": role, "content": prompt} + if reasoning_content and role == "assistant": + msg["reasoning_content"] = reasoning_content + return msg def _handle_human_feedback(self, formatted_answer: AgentFinish) -> AgentFinish: """ diff --git a/src/crewai/llm.py b/src/crewai/llm.py index 5d6a0ccf5..16466ee35 100644 --- a/src/crewai/llm.py +++ b/src/crewai/llm.py @@ -143,6 +143,7 @@ class LLM: self.set_env_callbacks() def call(self, messages: List[Dict[str, str]], callbacks: List[Any] = []) -> str: + self.reasoning_content: Optional[str] = None with suppress_warnings(): if callbacks and len(callbacks) > 0: self.set_callbacks(callbacks) @@ -175,7 +176,13 @@ class LLM: params = {k: v for k, v in params.items() if v is not None} response = litellm.completion(**params) - return response["choices"][0]["message"]["content"] + + message = response["choices"][0]["message"] + self.reasoning_content = getattr( + message, "reasoning_content", None + ) or message.get("reasoning_content") + + return message["content"] except Exception as e: if not LLMContextLengthExceededException( str(e) diff --git a/tests/llm_test.py b/tests/llm_test.py index e824d54c9..27b0c9d8a 100644 --- a/tests/llm_test.py +++ b/tests/llm_test.py @@ -1,4 +1,7 @@ +from unittest.mock import MagicMock, patch + import pytest +from litellm.types.utils import Choices, Message, ModelResponse from crewai.agents.agent_builder.utilities.base_token_process import TokenProcess from crewai.llm import LLM @@ -28,3 +31,159 @@ def test_llm_callback_replacement(): assert usage_metrics_1.successful_requests == 1 assert usage_metrics_2.successful_requests == 1 assert usage_metrics_1 == calc_handler_1.token_cost_process.get_summary() + + +def _make_response(content, reasoning_content=None): + """Build a litellm ModelResponse, optionally with reasoning_content.""" + msg_kwargs = {"content": content, "role": "assistant"} + if reasoning_content is not None: + msg_kwargs["reasoning_content"] = reasoning_content + message = Message(**msg_kwargs) + choice = Choices(message=message, index=0, finish_reason="stop") + return ModelResponse(choices=[choice]) + + +@patch("crewai.llm.litellm.completion") +def test_llm_call_stores_reasoning_content(mock_completion): + """LLM.call should store reasoning_content from the response.""" + mock_completion.return_value = _make_response( + content="Paris", + reasoning_content="The user asked about the capital of France.", + ) + llm = LLM(model="deepseek/deepseek-reasoner") + result = llm.call([{"role": "user", "content": "What is the capital of France?"}]) + + assert result == "Paris" + assert llm.reasoning_content == "The user asked about the capital of France." + + +@patch("crewai.llm.litellm.completion") +def test_llm_call_no_reasoning_content(mock_completion): + """LLM.call should set reasoning_content to None when absent.""" + mock_completion.return_value = _make_response(content="Hello!") + llm = LLM(model="gpt-4o") + result = llm.call([{"role": "user", "content": "Hi"}]) + + assert result == "Hello!" + assert llm.reasoning_content is None + + +@patch("crewai.llm.litellm.completion") +def test_llm_call_reasoning_content_reset_between_calls(mock_completion): + """reasoning_content should be reset on each call.""" + mock_completion.return_value = _make_response( + content="first", reasoning_content="thinking1" + ) + llm = LLM(model="deepseek/deepseek-reasoner") + llm.call([{"role": "user", "content": "q1"}]) + assert llm.reasoning_content == "thinking1" + + # Second call without reasoning_content + mock_completion.return_value = _make_response(content="second") + llm.call([{"role": "user", "content": "q2"}]) + assert llm.reasoning_content is None + + +class TestExecutorReasoningContent: + """Tests for reasoning_content propagation in CrewAgentExecutor.""" + + def _build_executor(self, llm): + """Build a minimal CrewAgentExecutor for testing.""" + from crewai.agents.crew_agent_executor import CrewAgentExecutor + from crewai.agents.tools_handler import ToolsHandler + + agent = MagicMock() + agent.role = "test" + agent.verbose = False + agent.id = "agent-1" + + task = MagicMock() + task.description = "test task" + + crew = MagicMock() + crew.verbose = False + crew._train = False + + tools_handler = ToolsHandler() + + executor = CrewAgentExecutor( + llm=llm, + task=task, + crew=crew, + agent=agent, + prompt={"system": "You are helpful.", "user": "{input}{tool_names}{tools}"}, + max_iter=3, + tools=[], + tools_names="", + stop_words=["Observation:"], + tools_description="", + tools_handler=tools_handler, + ) + return executor + + def test_format_msg_includes_reasoning_content(self): + """_format_msg should include reasoning_content for assistant messages.""" + llm = MagicMock() + llm.supports_stop_words.return_value = True + llm.stop = None + executor = self._build_executor(llm) + + msg = executor._format_msg( + "Hello", role="assistant", reasoning_content="thinking..." + ) + assert msg == { + "role": "assistant", + "content": "Hello", + "reasoning_content": "thinking...", + } + + def test_format_msg_omits_reasoning_content_for_user(self): + """_format_msg should not include reasoning_content for user messages.""" + llm = MagicMock() + llm.supports_stop_words.return_value = True + llm.stop = None + executor = self._build_executor(llm) + + msg = executor._format_msg( + "Hello", role="user", reasoning_content="thinking..." + ) + assert msg == {"role": "user", "content": "Hello"} + + def test_format_msg_omits_reasoning_content_when_none(self): + """_format_msg should not include reasoning_content key when it is None.""" + llm = MagicMock() + llm.supports_stop_words.return_value = True + llm.stop = None + executor = self._build_executor(llm) + + msg = executor._format_msg("Hello", role="assistant", reasoning_content=None) + assert msg == {"role": "assistant", "content": "Hello"} + assert "reasoning_content" not in msg + + @patch("crewai.llm.litellm.completion") + def test_invoke_loop_preserves_reasoning_content_in_messages( + self, mock_completion + ): + """The invoke loop should include reasoning_content in assistant messages.""" + llm = LLM(model="deepseek/deepseek-reasoner") + + # First call returns an intermediate response (not a final answer) + # Second call returns the final answer + mock_completion.side_effect = [ + _make_response( + content="Thought: I need to think about this.\nFinal Answer: 42", + reasoning_content="Let me reason step by step...", + ), + ] + + executor = self._build_executor(llm) + executor.invoke( + {"input": "What is the answer?", "tool_names": "", "tools": ""} + ) + + # Find assistant messages in the message history + assistant_msgs = [ + m for m in executor.messages if m["role"] == "assistant" + ] + assert len(assistant_msgs) >= 1 + assert assistant_msgs[0].get("reasoning_content") == "Let me reason step by step..."