mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-07-03 06:08:15 +00:00
Fix #5808: Add support for Claude 4.7 Opus (no assistant prefill, drop temperature)
- Add LLM.supports_assistant_prefill() to detect Anthropic models that reject trailing assistant messages (Claude 4.6+) - Add CrewAgentExecutor._append_assistant_response() to split the observation into a separate user-role message for no-prefill models, ensuring the conversation never ends with an assistant turn - Drop the temperature parameter for Claude 4.6+ models that reject it - Add 17 unit tests covering detection, temperature dropping, and message splitting behaviour Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -69,6 +69,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
self.original_tools = original_tools
|
||||
self.step_callback = step_callback
|
||||
self.use_stop_words = self.llm.supports_stop_words()
|
||||
self.supports_prefill = self.llm.supports_assistant_prefill()
|
||||
self.tools_description = tools_description
|
||||
self.function_calling_llm = function_calling_llm
|
||||
self.respect_context_window = respect_context_window
|
||||
@@ -183,8 +184,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
f'\n{self._i18n.errors("force_final_answer")}'
|
||||
)
|
||||
self.have_forced_answer = True
|
||||
self.messages.append(
|
||||
self._format_msg(formatted_answer.text, role="assistant")
|
||||
self._append_assistant_response(
|
||||
formatted_answer.text
|
||||
)
|
||||
|
||||
except OutputParserException as e:
|
||||
@@ -406,6 +407,34 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
def _format_answer(self, answer: str) -> Union[AgentAction, AgentFinish]:
|
||||
return CrewAgentParser(agent=self.agent).parse(answer)
|
||||
|
||||
def _append_assistant_response(self, text: str) -> None:
|
||||
"""Append the agent's response to messages.
|
||||
|
||||
For models that do not support assistant message prefill (e.g.
|
||||
Claude 4.6+), the observation portion of the response is moved
|
||||
into a separate user-role message so the conversation never ends
|
||||
with an assistant turn.
|
||||
"""
|
||||
if self.supports_prefill:
|
||||
self.messages.append(self._format_msg(text, role="assistant"))
|
||||
return
|
||||
|
||||
obs_marker = "\nObservation:"
|
||||
if obs_marker in text:
|
||||
pre_obs, obs_content = text.split(obs_marker, 1)
|
||||
self.messages.append(self._format_msg(pre_obs, role="assistant"))
|
||||
self.messages.append(
|
||||
self._format_msg(f"Observation:{obs_content}", role="user")
|
||||
)
|
||||
else:
|
||||
self.messages.append(self._format_msg(text, role="assistant"))
|
||||
self.messages.append(
|
||||
self._format_msg(
|
||||
"Please continue based on the information above.",
|
||||
role="user",
|
||||
)
|
||||
)
|
||||
|
||||
def _format_msg(self, prompt: str, role: str = "user") -> Dict[str, str]:
|
||||
prompt = prompt.rstrip()
|
||||
return {"role": role, "content": prompt}
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
import warnings
|
||||
@@ -174,6 +175,10 @@ class LLM:
|
||||
# Remove None values to avoid passing unnecessary parameters
|
||||
params = {k: v for k, v in params.items() if v is not None}
|
||||
|
||||
# Claude 4.6+ models reject the temperature parameter
|
||||
if self._is_anthropic_no_prefill_model():
|
||||
params.pop("temperature", None)
|
||||
|
||||
response = litellm.completion(**params)
|
||||
return response["choices"][0]["message"]["content"]
|
||||
except Exception as e:
|
||||
@@ -184,6 +189,38 @@ class LLM:
|
||||
|
||||
raise # Re-raise the exception after logging
|
||||
|
||||
def supports_assistant_prefill(self) -> bool:
|
||||
"""Check if the model supports assistant message prefill.
|
||||
|
||||
Some Anthropic models (Claude 4.6+) reject requests where the
|
||||
last message has the assistant role. Returns True for models
|
||||
that support prefill or where the capability cannot be determined.
|
||||
"""
|
||||
try:
|
||||
info = litellm.get_model_info(self.model)
|
||||
provider = info.get("litellm_provider", "")
|
||||
prefill = info.get("supports_assistant_prefill")
|
||||
if "anthropic" in provider and prefill is False:
|
||||
return False
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback heuristic for model names not in the litellm registry
|
||||
model_lower = self.model.lower()
|
||||
if "claude" in model_lower:
|
||||
match = re.search(r"claude.*?(\d+)[.-](\d+)", model_lower)
|
||||
if match:
|
||||
major, minor = int(match.group(1)), int(match.group(2))
|
||||
if (major == 4 and minor >= 6) or major >= 5:
|
||||
return False
|
||||
return True
|
||||
|
||||
def _is_anthropic_no_prefill_model(self) -> bool:
|
||||
"""Return True when the model is an Anthropic model that does not
|
||||
support assistant prefill. Used to also drop parameters that these
|
||||
models reject (e.g. temperature)."""
|
||||
return not self.supports_assistant_prefill()
|
||||
|
||||
def supports_function_calling(self) -> bool:
|
||||
try:
|
||||
params = get_supported_openai_params(model=self.model)
|
||||
|
||||
277
tests/test_claude_opus_4_7_support.py
Normal file
277
tests/test_claude_opus_4_7_support.py
Normal file
@@ -0,0 +1,277 @@
|
||||
"""Tests for Claude 4.7 Opus support (issue #5808).
|
||||
|
||||
Covers:
|
||||
- LLM.supports_assistant_prefill() detection
|
||||
- Temperature parameter dropping for models that reject it
|
||||
- CrewAgentExecutor message splitting for no-prefill models
|
||||
"""
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.llm import LLM
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# LLM.supports_assistant_prefill
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSupportsAssistantPrefill:
|
||||
"""LLM.supports_assistant_prefill() should return False for Anthropic
|
||||
models that do not support prefill and True otherwise."""
|
||||
|
||||
@patch("crewai.llm.litellm.get_model_info")
|
||||
def test_claude_opus_4_7_no_prefill(self, mock_info):
|
||||
mock_info.return_value = {
|
||||
"litellm_provider": "anthropic",
|
||||
"supports_assistant_prefill": False,
|
||||
}
|
||||
llm = LLM(model="claude-opus-4-7")
|
||||
assert llm.supports_assistant_prefill() is False
|
||||
|
||||
@patch("crewai.llm.litellm.get_model_info")
|
||||
def test_claude_3_opus_supports_prefill(self, mock_info):
|
||||
mock_info.return_value = {
|
||||
"litellm_provider": "anthropic",
|
||||
"supports_assistant_prefill": True,
|
||||
}
|
||||
llm = LLM(model="claude-3-opus-20240229")
|
||||
assert llm.supports_assistant_prefill() is True
|
||||
|
||||
@patch("crewai.llm.litellm.get_model_info")
|
||||
def test_openai_model_supports_prefill(self, mock_info):
|
||||
"""Non-Anthropic models should default to True even when the field
|
||||
is False in model info (the flag only matters for Anthropic)."""
|
||||
mock_info.return_value = {
|
||||
"litellm_provider": "openai",
|
||||
"supports_assistant_prefill": False,
|
||||
}
|
||||
llm = LLM(model="gpt-4o")
|
||||
assert llm.supports_assistant_prefill() is True
|
||||
|
||||
@patch("crewai.llm.litellm.get_model_info")
|
||||
def test_anthropic_provider_prefix(self, mock_info):
|
||||
mock_info.return_value = {
|
||||
"litellm_provider": "anthropic",
|
||||
"supports_assistant_prefill": False,
|
||||
}
|
||||
llm = LLM(model="anthropic/claude-opus-4-7")
|
||||
assert llm.supports_assistant_prefill() is False
|
||||
|
||||
@patch("crewai.llm.litellm.get_model_info")
|
||||
def test_fallback_heuristic_claude_4_7(self, mock_info):
|
||||
"""When litellm cannot resolve the model, the name-based heuristic
|
||||
should detect Claude 4.6+ patterns."""
|
||||
mock_info.side_effect = Exception("model not found")
|
||||
llm = LLM(model="claude-opus-4-7")
|
||||
assert llm.supports_assistant_prefill() is False
|
||||
|
||||
@patch("crewai.llm.litellm.get_model_info")
|
||||
def test_fallback_heuristic_claude_4_6(self, mock_info):
|
||||
mock_info.side_effect = Exception("model not found")
|
||||
llm = LLM(model="claude-sonnet-4-6")
|
||||
assert llm.supports_assistant_prefill() is False
|
||||
|
||||
@patch("crewai.llm.litellm.get_model_info")
|
||||
def test_fallback_heuristic_claude_5(self, mock_info):
|
||||
mock_info.side_effect = Exception("model not found")
|
||||
llm = LLM(model="claude-5-0-opus")
|
||||
assert llm.supports_assistant_prefill() is False
|
||||
|
||||
@patch("crewai.llm.litellm.get_model_info")
|
||||
def test_fallback_heuristic_claude_3_5(self, mock_info):
|
||||
"""Claude 3.5 should still support prefill."""
|
||||
mock_info.side_effect = Exception("model not found")
|
||||
llm = LLM(model="claude-3-5-sonnet-20241022")
|
||||
assert llm.supports_assistant_prefill() is True
|
||||
|
||||
@patch("crewai.llm.litellm.get_model_info")
|
||||
def test_fallback_non_claude_model(self, mock_info):
|
||||
mock_info.side_effect = Exception("model not found")
|
||||
llm = LLM(model="some-custom-model")
|
||||
assert llm.supports_assistant_prefill() is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Temperature dropping for no-prefill Anthropic models
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestTemperatureDropping:
|
||||
"""Claude 4.6+ models reject the temperature parameter. The LLM.call()
|
||||
method should strip it before forwarding to litellm."""
|
||||
|
||||
@patch("crewai.llm.litellm.completion")
|
||||
def test_temperature_dropped_for_claude_4_7(self, mock_completion):
|
||||
mock_completion.return_value = {
|
||||
"choices": [{"message": {"content": "response"}}]
|
||||
}
|
||||
llm = LLM(model="claude-opus-4-7", temperature=0.7)
|
||||
with patch.object(
|
||||
llm, "_is_anthropic_no_prefill_model", return_value=True
|
||||
):
|
||||
llm.call([{"role": "user", "content": "hi"}])
|
||||
|
||||
call_kwargs = mock_completion.call_args
|
||||
assert "temperature" not in call_kwargs.kwargs and "temperature" not in (
|
||||
call_kwargs.args[0] if call_kwargs.args else {}
|
||||
)
|
||||
# Check the actual keyword arguments passed
|
||||
passed_params = call_kwargs[1] if call_kwargs[1] else {}
|
||||
assert "temperature" not in passed_params
|
||||
|
||||
@patch("crewai.llm.litellm.completion")
|
||||
def test_temperature_kept_for_normal_models(self, mock_completion):
|
||||
mock_completion.return_value = {
|
||||
"choices": [{"message": {"content": "response"}}]
|
||||
}
|
||||
llm = LLM(model="gpt-4o", temperature=0.7)
|
||||
with patch.object(
|
||||
llm, "_is_anthropic_no_prefill_model", return_value=False
|
||||
):
|
||||
llm.call([{"role": "user", "content": "hi"}])
|
||||
|
||||
passed_params = mock_completion.call_args[1]
|
||||
assert passed_params.get("temperature") == 0.7
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CrewAgentExecutor._append_assistant_response
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAppendAssistantResponse:
|
||||
"""When the model does not support prefill, the observation part of the
|
||||
response must be split into a separate user-role message."""
|
||||
|
||||
def _make_executor(self, supports_prefill: bool):
|
||||
"""Build a minimal CrewAgentExecutor with the prefill flag set."""
|
||||
from crewai.agents.crew_agent_executor import CrewAgentExecutor
|
||||
|
||||
# Build a mock LLM that returns the desired prefill support
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.supports_stop_words.return_value = True
|
||||
mock_llm.supports_assistant_prefill.return_value = supports_prefill
|
||||
mock_llm.stop = None
|
||||
mock_llm.model = (
|
||||
"claude-opus-4-7" if not supports_prefill else "gpt-4o"
|
||||
)
|
||||
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.id = "test-agent"
|
||||
|
||||
executor = CrewAgentExecutor(
|
||||
llm=mock_llm,
|
||||
task=MagicMock(),
|
||||
crew=MagicMock(),
|
||||
agent=mock_agent,
|
||||
prompt={"system": "sys", "user": "usr"},
|
||||
max_iter=5,
|
||||
tools=[],
|
||||
tools_names="",
|
||||
stop_words=["\nObservation:"],
|
||||
tools_description="",
|
||||
tools_handler=MagicMock(),
|
||||
)
|
||||
return executor
|
||||
|
||||
def test_prefill_supported_single_assistant_message(self):
|
||||
"""When prefill IS supported, the text should be added as a single
|
||||
assistant message (existing behaviour)."""
|
||||
executor = self._make_executor(supports_prefill=True)
|
||||
text = (
|
||||
"Thought: searching\n"
|
||||
"Action: search\n"
|
||||
"Action Input: query\n"
|
||||
"Observation: result"
|
||||
)
|
||||
executor._append_assistant_response(text)
|
||||
assert len(executor.messages) == 1
|
||||
assert executor.messages[0]["role"] == "assistant"
|
||||
assert executor.messages[0]["content"] == text.rstrip()
|
||||
|
||||
def test_no_prefill_splits_observation_into_user_message(self):
|
||||
"""When prefill is NOT supported, the observation should become a
|
||||
separate user message so the conversation does not end with an
|
||||
assistant turn."""
|
||||
executor = self._make_executor(supports_prefill=False)
|
||||
text = (
|
||||
"Thought: searching\n"
|
||||
"Action: search\n"
|
||||
"Action Input: query\n"
|
||||
"Observation: result data"
|
||||
)
|
||||
executor._append_assistant_response(text)
|
||||
|
||||
assert len(executor.messages) == 2
|
||||
assert executor.messages[0]["role"] == "assistant"
|
||||
assert "Observation" not in executor.messages[0]["content"]
|
||||
assert executor.messages[1]["role"] == "user"
|
||||
assert executor.messages[1]["content"].startswith("Observation:")
|
||||
|
||||
def test_no_prefill_without_observation_adds_continuation(self):
|
||||
"""When there is no Observation marker (e.g. forced answer scenario),
|
||||
a generic user continuation message should be appended."""
|
||||
executor = self._make_executor(supports_prefill=False)
|
||||
text = "Thought: I must give my final answer\nFinal Answer: 42"
|
||||
executor._append_assistant_response(text)
|
||||
|
||||
assert len(executor.messages) == 2
|
||||
assert executor.messages[0]["role"] == "assistant"
|
||||
assert executor.messages[1]["role"] == "user"
|
||||
|
||||
def test_no_prefill_with_force_answer_and_observation(self):
|
||||
"""When force-answer text is appended after the observation, the split
|
||||
should put everything from Observation: onward into the user message."""
|
||||
executor = self._make_executor(supports_prefill=False)
|
||||
text = (
|
||||
"Thought: searching\n"
|
||||
"Action: search\n"
|
||||
"Action Input: query\n"
|
||||
"Observation: tool result\n"
|
||||
"Now it's time you MUST give your absolute best final answer."
|
||||
)
|
||||
executor._append_assistant_response(text)
|
||||
|
||||
assert len(executor.messages) == 2
|
||||
assert executor.messages[0]["role"] == "assistant"
|
||||
assert executor.messages[1]["role"] == "user"
|
||||
assert "tool result" in executor.messages[1]["content"]
|
||||
assert "MUST give" in executor.messages[1]["content"]
|
||||
|
||||
def test_no_prefill_last_message_is_always_user(self):
|
||||
"""Regardless of message content, the last message must always be
|
||||
from the user role when prefill is not supported."""
|
||||
executor = self._make_executor(supports_prefill=False)
|
||||
|
||||
# Case 1: with observation
|
||||
executor.messages = []
|
||||
executor._append_assistant_response(
|
||||
"Thought: x\nAction: y\nAction Input: z\nObservation: r"
|
||||
)
|
||||
assert executor.messages[-1]["role"] == "user"
|
||||
|
||||
# Case 2: without observation
|
||||
executor.messages = []
|
||||
executor._append_assistant_response("Thought: done\nFinal Answer: 42")
|
||||
assert executor.messages[-1]["role"] == "user"
|
||||
|
||||
def test_multiple_iterations_message_structure(self):
|
||||
"""Simulate multiple tool-use iterations and verify the message
|
||||
structure stays valid for no-prefill models."""
|
||||
executor = self._make_executor(supports_prefill=False)
|
||||
|
||||
# First iteration
|
||||
executor._append_assistant_response(
|
||||
"Thought: step 1\nAction: tool1\nAction Input: a\nObservation: res1"
|
||||
)
|
||||
# Second iteration
|
||||
executor._append_assistant_response(
|
||||
"Thought: step 2\nAction: tool2\nAction Input: b\nObservation: res2"
|
||||
)
|
||||
|
||||
assert len(executor.messages) == 4 # 2 assistant + 2 user
|
||||
# Verify alternation: assistant, user, assistant, user
|
||||
roles = [m["role"] for m in executor.messages]
|
||||
assert roles == ["assistant", "user", "assistant", "user"]
|
||||
Reference in New Issue
Block a user