From 449bb0d61e7adb1643ee8b137446e6df62e1c26c Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 14 May 2026 12:38:06 +0000 Subject: [PATCH] fix: add Claude 4.7 Opus support (no assistant prefill, drop temperature) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #5808 - Add supports_assistant_prefill() to BaseLLM, LLM (litellm), and AnthropicCompletion - Claude 4.6+ models reject assistant-role messages as the last message in a conversation; split observation into a user-role message instead - Drop temperature parameter for Claude 4.6+ (not supported) - Update handle_max_iterations_exceeded to use user-role message for no-prefill models - Use getattr() for backward compatibility with custom LLM adapters - Add 24 tests covering prefill detection, temperature dropping, message splitting, and backward compatibility Co-Authored-By: João --- .../src/crewai/agents/crew_agent_executor.py | 34 ++ lib/crewai/src/crewai/llm.py | 34 ++ lib/crewai/src/crewai/llms/base_llm.py | 12 + .../llms/providers/anthropic/completion.py | 26 +- .../src/crewai/utilities/agent_utils.py | 9 +- .../agents/test_claude_opus_4_7_support.py | 335 ++++++++++++++++++ 6 files changed, 448 insertions(+), 2 deletions(-) create mode 100644 lib/crewai/tests/agents/test_claude_opus_4_7_support.py diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py index fce80ad7a..b910f5b31 100644 --- a/lib/crewai/src/crewai/agents/crew_agent_executor.py +++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py @@ -164,6 +164,16 @@ class CrewAgentExecutor(BaseAgentExecutor): self.llm.supports_stop_words() if isinstance(self.llm, BaseLLM) else False ) + @property + def supports_prefill(self) -> bool: + """Check if the LLM supports assistant message prefill. + + Returns: + bool: True if the LLM supports assistant prefill. + """ + supports_fn = getattr(self.llm, "supports_assistant_prefill", None) + return supports_fn() if callable(supports_fn) else True + def _setup_messages(self, inputs: dict[str, Any]) -> None: """Set up messages for the agent execution. @@ -1478,10 +1488,34 @@ class CrewAgentExecutor(BaseAgentExecutor): ) -> None: """Add message to conversation history. + For models that do not support assistant message prefill (e.g. + Claude 4.6+), the observation portion of the response is moved + into a separate user-role message so the conversation never ends + with an assistant turn. + Args: text: Message content. role: Message role (default: assistant). """ + if role == "assistant" and not self.supports_prefill: + obs_marker = "\nObservation:" + if obs_marker in text: + pre_obs, obs_content = text.split(obs_marker, 1) + self.messages.append(format_message_for_llm(pre_obs, role="assistant")) + self.messages.append( + format_message_for_llm( + f"Observation:{obs_content}", role="user" + ) + ) + else: + self.messages.append(format_message_for_llm(text, role="assistant")) + self.messages.append( + format_message_for_llm( + "Please continue based on the information above.", + role="user", + ) + ) + return self.messages.append(format_message_for_llm(text, role=role)) def _show_start_logs(self) -> None: diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index 52e3b0b9f..c18c08025 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -6,6 +6,7 @@ from datetime import datetime import json import logging import os +import re from typing import ( TYPE_CHECKING, Any, @@ -2268,6 +2269,39 @@ class LLM(BaseLLM): logging.error(f"Failed to get supported params: {e!s}") return True # Default to True + def supports_assistant_prefill(self) -> bool: + """Check if the model supports assistant message prefill. + + Some Anthropic models (Claude 4.6+) reject requests where the + last message has the assistant role. Returns True for models + that support prefill or where the capability cannot be determined. + + Note: This method is only used by the litellm fallback path. + Native providers override this method with their own implementation. + """ + if LITELLM_AVAILABLE and litellm is not None: + try: + info = litellm.get_model_info(self.model) + provider = info.get("litellm_provider", "") + prefill = info.get("supports_assistant_prefill") + if "anthropic" in provider and prefill is False: + return False + except Exception: + logging.debug( + "Could not determine prefill support from litellm " + f"for model {self.model}; falling back to heuristic." + ) + + # Fallback heuristic for model names not in the litellm registry + model_lower = (self.model or "").lower() + if "claude" in model_lower: + match = re.search(r"claude.*?(\d+)[.-](\d+)", model_lower) + if match: + major, minor = int(match.group(1)), int(match.group(2)) + if (major == 4 and minor >= 6) or major >= 5: + return False + return True + def get_context_window_size(self) -> int: """ Returns the context window size, using 75% of the maximum to avoid diff --git a/lib/crewai/src/crewai/llms/base_llm.py b/lib/crewai/src/crewai/llms/base_llm.py index 8c2993d3a..b706ae2ec 100644 --- a/lib/crewai/src/crewai/llms/base_llm.py +++ b/lib/crewai/src/crewai/llms/base_llm.py @@ -430,6 +430,18 @@ class BaseLLM(BaseModel, ABC): # Default implementation - subclasses should override with model-specific values return DEFAULT_CONTEXT_WINDOW_SIZE + def supports_assistant_prefill(self) -> bool: + """Check if the LLM supports assistant message prefill. + + Some models (e.g. Claude 4.6+) reject requests where the last + message has the assistant role. Subclasses should override this + method when prefill is not supported. + + Returns: + True if the LLM supports assistant message prefill. + """ + return True + def supports_multimodal(self) -> bool: """Check if the LLM supports multimodal inputs. diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index 5eeeefb8c..986e90a4c 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -3,6 +3,7 @@ from __future__ import annotations import json import logging import os +import re from typing import Any, Final, Literal, TypeGuard, cast from pydantic import BaseModel, PrivateAttr, model_validator @@ -453,7 +454,8 @@ class AnthropicCompletion(BaseLLM): params["system"] = system_message # Add optional parameters if set - if self.temperature is not None: + # Claude 4.6+ models reject the temperature parameter + if self.temperature is not None and not self._is_no_prefill_model(): params["temperature"] = self.temperature if self.top_p is not None: params["top_p"] = self.top_p @@ -1821,6 +1823,28 @@ class AnthropicCompletion(BaseLLM): """Check if the model supports stop words.""" return True # All Claude models support stop sequences + def supports_assistant_prefill(self) -> bool: + """Check if the model supports assistant message prefill. + + Claude 4.6+ models reject requests where the last message has + the assistant role. + """ + return not self._is_no_prefill_model() + + def _is_no_prefill_model(self) -> bool: + """Return True when the model rejects assistant message prefill. + + Claude 4.6+ models (Opus 4.7, Sonnet 4.6, etc.) do not support + assistant prefill. + """ + model_lower = self.model.lower() + match = re.search(r"claude.*?(\d+)[.-](\d+)", model_lower) + if match: + major, minor = int(match.group(1)), int(match.group(2)) + if (major == 4 and minor >= 6) or major >= 5: + return True + return False + def get_context_window_size(self) -> int: """Get the context window size for the model.""" from crewai.llm import CONTEXT_WINDOW_USAGE_RATIO diff --git a/lib/crewai/src/crewai/utilities/agent_utils.py b/lib/crewai/src/crewai/utilities/agent_utils.py index 3cb72331c..da11cde93 100644 --- a/lib/crewai/src/crewai/utilities/agent_utils.py +++ b/lib/crewai/src/crewai/utilities/agent_utils.py @@ -318,7 +318,14 @@ def handle_max_iterations_exceeded( else: assistant_message = I18N_DEFAULT.errors("force_final_answer") - messages.append(format_message_for_llm(assistant_message, role="assistant")) + # For models that don't support assistant prefill, use a user message + # so the conversation doesn't end with an assistant turn. + supports_prefill_fn = getattr(llm, "supports_assistant_prefill", None) + supports_prefill = supports_prefill_fn() if callable(supports_prefill_fn) else True + if supports_prefill: + messages.append(format_message_for_llm(assistant_message, role="assistant")) + else: + messages.append(format_message_for_llm(assistant_message, role="user")) # Perform one more LLM call to get the final answer answer = llm.call( diff --git a/lib/crewai/tests/agents/test_claude_opus_4_7_support.py b/lib/crewai/tests/agents/test_claude_opus_4_7_support.py new file mode 100644 index 000000000..c2d67f23a --- /dev/null +++ b/lib/crewai/tests/agents/test_claude_opus_4_7_support.py @@ -0,0 +1,335 @@ +"""Tests for Claude 4.7 Opus support (issue #5808). + +Covers: +- BaseLLM.supports_assistant_prefill() default behaviour +- AnthropicCompletion.supports_assistant_prefill() model detection +- LLM (litellm) supports_assistant_prefill() detection +- Temperature parameter dropping for models that reject it +- CrewAgentExecutor message splitting for no-prefill models +- handle_max_iterations_exceeded prefill-aware message role +""" + +from __future__ import annotations + +from typing import Literal +from unittest.mock import MagicMock, patch + +from crewai.llms.base_llm import BaseLLM +from crewai.utilities.agent_utils import format_message_for_llm + + +# --------------------------------------------------------------------------- +# BaseLLM.supports_assistant_prefill (default) +# --------------------------------------------------------------------------- + + +class TestBaseLLMPrefillDefault: + """BaseLLM.supports_assistant_prefill() should default to True.""" + + def test_base_llm_defaults_to_true(self): + """The abstract base returns True so existing providers are + unaffected unless they override.""" + llm = MagicMock(spec=BaseLLM) + assert BaseLLM.supports_assistant_prefill(llm) is True + + +# --------------------------------------------------------------------------- +# AnthropicCompletion.supports_assistant_prefill +# --------------------------------------------------------------------------- + + +class TestAnthropicPrefillDetection: + """AnthropicCompletion.supports_assistant_prefill() should return False + for Claude 4.6+ models and True for earlier models.""" + + def _make_anthropic_llm(self, model: str) -> object: + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + + llm = AnthropicCompletion.model_construct(model=model) + return llm + + def test_claude_opus_4_7_no_prefill(self): + llm = self._make_anthropic_llm("claude-opus-4-7") + assert llm.supports_assistant_prefill() is False + + def test_claude_sonnet_4_6_no_prefill(self): + llm = self._make_anthropic_llm("claude-sonnet-4-6") + assert llm.supports_assistant_prefill() is False + + def test_claude_opus_4_5_supports_prefill(self): + llm = self._make_anthropic_llm("claude-opus-4-5") + assert llm.supports_assistant_prefill() is True + + def test_claude_3_5_sonnet_supports_prefill(self): + llm = self._make_anthropic_llm("claude-3-5-sonnet-20241022") + assert llm.supports_assistant_prefill() is True + + def test_claude_3_opus_supports_prefill(self): + llm = self._make_anthropic_llm("claude-3-opus-20240229") + assert llm.supports_assistant_prefill() is True + + def test_claude_5_0_no_prefill(self): + """Future major version should also be detected.""" + llm = self._make_anthropic_llm("claude-5-0-opus") + assert llm.supports_assistant_prefill() is False + + +# --------------------------------------------------------------------------- +# AnthropicCompletion temperature dropping +# --------------------------------------------------------------------------- + + +class TestAnthropicTemperatureDropping: + """Claude 4.6+ models reject the temperature parameter.""" + + def test_temperature_dropped_for_no_prefill_model(self): + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + + llm = AnthropicCompletion( + model="claude-opus-4-7", + max_tokens=4096, + stream=False, + temperature=0.7, + ) + params = llm._prepare_completion_params( + messages=[], system_message=None, tools=None + ) + assert "temperature" not in params + + def test_temperature_kept_for_prefill_model(self): + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + + llm = AnthropicCompletion( + model="claude-3-5-sonnet-20241022", + max_tokens=4096, + stream=False, + temperature=0.7, + ) + params = llm._prepare_completion_params( + messages=[], system_message=None, tools=None + ) + assert params.get("temperature") == 0.7 + + +# --------------------------------------------------------------------------- +# LLM (litellm) supports_assistant_prefill +# --------------------------------------------------------------------------- + + +class TestLiteLLMPrefillDetection: + """LLM.supports_assistant_prefill() should use litellm.get_model_info() + with a fallback to name-based heuristic. + + Since LLM.__new__ routes models to native provider subclasses, we + test the method by invoking it as an unbound function on a plain + object that has the necessary `.model` attribute. + """ + + def _call_method(self, model: str, **patches) -> bool: + """Call LLM.supports_assistant_prefill on a lightweight stub.""" + from crewai.llm import LLM + + stub = MagicMock() + stub.model = model + return LLM.supports_assistant_prefill(stub) + + def test_litellm_detects_no_prefill_via_model_info(self): + with patch("crewai.llm.litellm") as mock_litellm, \ + patch("crewai.llm.LITELLM_AVAILABLE", True): + mock_litellm.get_model_info.return_value = { + "litellm_provider": "anthropic", + "supports_assistant_prefill": False, + } + assert self._call_method("claude-opus-4-7") is False + + def test_litellm_supports_prefill_for_older_claude(self): + with patch("crewai.llm.litellm") as mock_litellm, \ + patch("crewai.llm.LITELLM_AVAILABLE", True): + mock_litellm.get_model_info.return_value = { + "litellm_provider": "anthropic", + "supports_assistant_prefill": True, + } + assert self._call_method("claude-3-opus-20240229") is True + + def test_litellm_non_anthropic_defaults_to_true(self): + with patch("crewai.llm.litellm") as mock_litellm, \ + patch("crewai.llm.LITELLM_AVAILABLE", True): + mock_litellm.get_model_info.return_value = { + "litellm_provider": "openai", + "supports_assistant_prefill": False, + } + assert self._call_method("gpt-4o") is True + + def test_litellm_fallback_heuristic_claude_4_7(self): + with patch("crewai.llm.litellm") as mock_litellm, \ + patch("crewai.llm.LITELLM_AVAILABLE", True): + mock_litellm.get_model_info.side_effect = Exception("not found") + assert self._call_method("claude-opus-4-7") is False + + def test_litellm_fallback_heuristic_non_claude(self): + with patch("crewai.llm.litellm") as mock_litellm, \ + patch("crewai.llm.LITELLM_AVAILABLE", True): + mock_litellm.get_model_info.side_effect = Exception("not found") + assert self._call_method("some-custom-model") is True + + +# --------------------------------------------------------------------------- +# CrewAgentExecutor._append_message +# --------------------------------------------------------------------------- + + +class TestAppendAssistantResponse: + """When the model does not support prefill, the observation part of the + response must be split into a separate user-role message.""" + + def _make_executor(self, supports_prefill: bool): + from crewai.agents.crew_agent_executor import CrewAgentExecutor + + mock_llm = MagicMock() + mock_llm.supports_stop_words.return_value = True + mock_llm.supports_assistant_prefill.return_value = supports_prefill + mock_llm.stop = None + mock_llm.model = ( + "claude-opus-4-7" if not supports_prefill else "gpt-4o" + ) + + executor = CrewAgentExecutor.model_construct( + llm=mock_llm, + messages=[], + ) + return executor + + def test_prefill_supported_single_assistant_message(self): + executor = self._make_executor(supports_prefill=True) + text = ( + "Thought: searching\n" + "Action: search\n" + "Action Input: query\n" + "Observation: result" + ) + executor._append_message(text) + assert len(executor.messages) == 1 + assert executor.messages[0]["role"] == "assistant" + + def test_no_prefill_splits_observation_into_user_message(self): + executor = self._make_executor(supports_prefill=False) + text = ( + "Thought: searching\n" + "Action: search\n" + "Action Input: query\n" + "Observation: result data" + ) + executor._append_message(text) + + assert len(executor.messages) == 2 + assert executor.messages[0]["role"] == "assistant" + assert "Observation" not in executor.messages[0]["content"] + assert executor.messages[1]["role"] == "user" + assert executor.messages[1]["content"].startswith("Observation:") + + def test_no_prefill_without_observation_adds_continuation(self): + executor = self._make_executor(supports_prefill=False) + text = "Thought: I must give my final answer\nFinal Answer: 42" + executor._append_message(text) + + assert len(executor.messages) == 2 + assert executor.messages[0]["role"] == "assistant" + assert executor.messages[1]["role"] == "user" + + def test_no_prefill_last_message_is_always_user(self): + executor = self._make_executor(supports_prefill=False) + + # Case 1: with observation + executor.messages = [] + executor._append_message( + "Thought: x\nAction: y\nAction Input: z\nObservation: r" + ) + assert executor.messages[-1]["role"] == "user" + + # Case 2: without observation + executor.messages = [] + executor._append_message("Thought: done\nFinal Answer: 42") + assert executor.messages[-1]["role"] == "user" + + def test_multiple_iterations_message_structure(self): + executor = self._make_executor(supports_prefill=False) + executor._append_message( + "Thought: step 1\nAction: tool1\nAction Input: a\nObservation: res1" + ) + executor._append_message( + "Thought: step 2\nAction: tool2\nAction Input: b\nObservation: res2" + ) + assert len(executor.messages) == 4 + roles = [m["role"] for m in executor.messages] + assert roles == ["assistant", "user", "assistant", "user"] + + def test_user_role_messages_pass_through_unchanged(self): + """Messages with role='user' should not be affected.""" + executor = self._make_executor(supports_prefill=False) + executor._append_message("some user input", role="user") + assert len(executor.messages) == 1 + assert executor.messages[0]["role"] == "user" + + def test_system_role_messages_pass_through_unchanged(self): + executor = self._make_executor(supports_prefill=False) + executor._append_message("system prompt", role="system") + assert len(executor.messages) == 1 + assert executor.messages[0]["role"] == "system" + + def test_supports_prefill_property_graceful_fallback(self): + """When the LLM doesn't have supports_assistant_prefill, default True.""" + from crewai.agents.crew_agent_executor import CrewAgentExecutor + + mock_llm = MagicMock(spec=[]) # Empty spec = no attributes + executor = CrewAgentExecutor.model_construct(llm=mock_llm, messages=[]) + assert executor.supports_prefill is True + + +# --------------------------------------------------------------------------- +# handle_max_iterations_exceeded prefill-aware +# --------------------------------------------------------------------------- + + +class TestHandleMaxIterationsExceededPrefill: + """handle_max_iterations_exceeded should use user role for the forced + answer message when the model doesn't support prefill.""" + + def test_no_prefill_uses_user_role(self): + from crewai.utilities.agent_utils import handle_max_iterations_exceeded + + mock_llm = MagicMock() + mock_llm.supports_assistant_prefill.return_value = False + mock_llm.call.return_value = "Final Answer: done" + + messages: list[dict[str, str]] = [] + handle_max_iterations_exceeded( + formatted_answer=None, + printer=MagicMock(), + messages=messages, + llm=mock_llm, + callbacks=[], + verbose=False, + ) + # The forced-answer message should be "user" role, not "assistant" + assert any(m["role"] == "user" for m in messages) + assert not any( + m["role"] == "assistant" for m in messages + ), "Should not have assistant message for no-prefill model" + + def test_prefill_uses_assistant_role(self): + from crewai.utilities.agent_utils import handle_max_iterations_exceeded + + mock_llm = MagicMock() + mock_llm.supports_assistant_prefill.return_value = True + mock_llm.call.return_value = "Final Answer: done" + + messages: list[dict[str, str]] = [] + handle_max_iterations_exceeded( + formatted_answer=None, + printer=MagicMock(), + messages=messages, + llm=mock_llm, + callbacks=[], + verbose=False, + ) + assert any(m["role"] == "assistant" for m in messages)