Compare commits

...

1 Commits

Author SHA1 Message Date
Devin AI
d826b0ab52 Fix #5878: Preserve reasoning_content from DeepSeek thinking mode in conversation history
DeepSeek V4 models return reasoning_content alongside content when in thinking
mode. The API requires this field to be passed back in subsequent requests.
Previously, LLM.call() discarded reasoning_content and only returned the content
string, causing a 400 error on follow-up calls.

Changes:
- LLM.call(): Extract and store reasoning_content from the response message
- CrewAgentExecutor._format_msg(): Accept optional reasoning_content parameter
- CrewAgentExecutor._invoke_loop(): Include reasoning_content in assistant
  messages added to the conversation history

Tests added for:
- LLM storing reasoning_content from responses
- LLM returning None when reasoning_content is absent
- LLM resetting reasoning_content between calls
- Executor _format_msg including/excluding reasoning_content appropriately
- End-to-end invoke loop preserving reasoning_content in message history

Co-Authored-By: João <joao@crewai.com>
2026-05-21 05:10:38 +00:00
3 changed files with 184 additions and 4 deletions

View File

@@ -184,7 +184,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
)
self.have_forced_answer = True
self.messages.append(
self._format_msg(formatted_answer.text, role="assistant")
self._format_msg(
formatted_answer.text,
role="assistant",
reasoning_content=getattr(
self.llm, "reasoning_content", None
),
)
)
except OutputParserException as e:
@@ -406,9 +412,17 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
def _format_answer(self, answer: str) -> Union[AgentAction, AgentFinish]:
return CrewAgentParser(agent=self.agent).parse(answer)
def _format_msg(self, prompt: str, role: str = "user") -> Dict[str, str]:
def _format_msg(
self,
prompt: str,
role: str = "user",
reasoning_content: str = None,
) -> Dict[str, str]:
prompt = prompt.rstrip()
return {"role": role, "content": prompt}
msg: Dict[str, str] = {"role": role, "content": prompt}
if reasoning_content and role == "assistant":
msg["reasoning_content"] = reasoning_content
return msg
def _handle_human_feedback(self, formatted_answer: AgentFinish) -> AgentFinish:
"""

View File

@@ -143,6 +143,7 @@ class LLM:
self.set_env_callbacks()
def call(self, messages: List[Dict[str, str]], callbacks: List[Any] = []) -> str:
self.reasoning_content: Optional[str] = None
with suppress_warnings():
if callbacks and len(callbacks) > 0:
self.set_callbacks(callbacks)
@@ -175,7 +176,13 @@ class LLM:
params = {k: v for k, v in params.items() if v is not None}
response = litellm.completion(**params)
return response["choices"][0]["message"]["content"]
message = response["choices"][0]["message"]
self.reasoning_content = getattr(
message, "reasoning_content", None
) or message.get("reasoning_content")
return message["content"]
except Exception as e:
if not LLMContextLengthExceededException(
str(e)

View File

@@ -1,4 +1,7 @@
from unittest.mock import MagicMock, patch
import pytest
from litellm.types.utils import Choices, Message, ModelResponse
from crewai.agents.agent_builder.utilities.base_token_process import TokenProcess
from crewai.llm import LLM
@@ -28,3 +31,159 @@ def test_llm_callback_replacement():
assert usage_metrics_1.successful_requests == 1
assert usage_metrics_2.successful_requests == 1
assert usage_metrics_1 == calc_handler_1.token_cost_process.get_summary()
def _make_response(content, reasoning_content=None):
"""Build a litellm ModelResponse, optionally with reasoning_content."""
msg_kwargs = {"content": content, "role": "assistant"}
if reasoning_content is not None:
msg_kwargs["reasoning_content"] = reasoning_content
message = Message(**msg_kwargs)
choice = Choices(message=message, index=0, finish_reason="stop")
return ModelResponse(choices=[choice])
@patch("crewai.llm.litellm.completion")
def test_llm_call_stores_reasoning_content(mock_completion):
"""LLM.call should store reasoning_content from the response."""
mock_completion.return_value = _make_response(
content="Paris",
reasoning_content="The user asked about the capital of France.",
)
llm = LLM(model="deepseek/deepseek-reasoner")
result = llm.call([{"role": "user", "content": "What is the capital of France?"}])
assert result == "Paris"
assert llm.reasoning_content == "The user asked about the capital of France."
@patch("crewai.llm.litellm.completion")
def test_llm_call_no_reasoning_content(mock_completion):
"""LLM.call should set reasoning_content to None when absent."""
mock_completion.return_value = _make_response(content="Hello!")
llm = LLM(model="gpt-4o")
result = llm.call([{"role": "user", "content": "Hi"}])
assert result == "Hello!"
assert llm.reasoning_content is None
@patch("crewai.llm.litellm.completion")
def test_llm_call_reasoning_content_reset_between_calls(mock_completion):
"""reasoning_content should be reset on each call."""
mock_completion.return_value = _make_response(
content="first", reasoning_content="thinking1"
)
llm = LLM(model="deepseek/deepseek-reasoner")
llm.call([{"role": "user", "content": "q1"}])
assert llm.reasoning_content == "thinking1"
# Second call without reasoning_content
mock_completion.return_value = _make_response(content="second")
llm.call([{"role": "user", "content": "q2"}])
assert llm.reasoning_content is None
class TestExecutorReasoningContent:
"""Tests for reasoning_content propagation in CrewAgentExecutor."""
def _build_executor(self, llm):
"""Build a minimal CrewAgentExecutor for testing."""
from crewai.agents.crew_agent_executor import CrewAgentExecutor
from crewai.agents.tools_handler import ToolsHandler
agent = MagicMock()
agent.role = "test"
agent.verbose = False
agent.id = "agent-1"
task = MagicMock()
task.description = "test task"
crew = MagicMock()
crew.verbose = False
crew._train = False
tools_handler = ToolsHandler()
executor = CrewAgentExecutor(
llm=llm,
task=task,
crew=crew,
agent=agent,
prompt={"system": "You are helpful.", "user": "{input}{tool_names}{tools}"},
max_iter=3,
tools=[],
tools_names="",
stop_words=["Observation:"],
tools_description="",
tools_handler=tools_handler,
)
return executor
def test_format_msg_includes_reasoning_content(self):
"""_format_msg should include reasoning_content for assistant messages."""
llm = MagicMock()
llm.supports_stop_words.return_value = True
llm.stop = None
executor = self._build_executor(llm)
msg = executor._format_msg(
"Hello", role="assistant", reasoning_content="thinking..."
)
assert msg == {
"role": "assistant",
"content": "Hello",
"reasoning_content": "thinking...",
}
def test_format_msg_omits_reasoning_content_for_user(self):
"""_format_msg should not include reasoning_content for user messages."""
llm = MagicMock()
llm.supports_stop_words.return_value = True
llm.stop = None
executor = self._build_executor(llm)
msg = executor._format_msg(
"Hello", role="user", reasoning_content="thinking..."
)
assert msg == {"role": "user", "content": "Hello"}
def test_format_msg_omits_reasoning_content_when_none(self):
"""_format_msg should not include reasoning_content key when it is None."""
llm = MagicMock()
llm.supports_stop_words.return_value = True
llm.stop = None
executor = self._build_executor(llm)
msg = executor._format_msg("Hello", role="assistant", reasoning_content=None)
assert msg == {"role": "assistant", "content": "Hello"}
assert "reasoning_content" not in msg
@patch("crewai.llm.litellm.completion")
def test_invoke_loop_preserves_reasoning_content_in_messages(
self, mock_completion
):
"""The invoke loop should include reasoning_content in assistant messages."""
llm = LLM(model="deepseek/deepseek-reasoner")
# First call returns an intermediate response (not a final answer)
# Second call returns the final answer
mock_completion.side_effect = [
_make_response(
content="Thought: I need to think about this.\nFinal Answer: 42",
reasoning_content="Let me reason step by step...",
),
]
executor = self._build_executor(llm)
executor.invoke(
{"input": "What is the answer?", "tool_names": "", "tools": ""}
)
# Find assistant messages in the message history
assistant_msgs = [
m for m in executor.messages if m["role"] == "assistant"
]
assert len(assistant_msgs) >= 1
assert assistant_msgs[0].get("reasoning_content") == "Let me reason step by step..."