Compare commits

...

1 Commits

Author SHA1 Message Date
Devin AI
c2e6747176 fix: merge consecutive same-role messages for Anthropic Claude 4.6+
Claude 4.6+ rejects requests containing consecutive assistant messages,
treating the trailing one as an unsupported prefill. This occurs when
CrewAgentExecutor appends multiple assistant messages during tool-use
iterations in the ReAct loop.

Added _merge_consecutive_messages() static method to BaseLLM that
merges consecutive same-role messages by joining their text content
with double newlines. Messages with list content (tool-use blocks,
multimodal) are left untouched.

The merge is called in both:
- LLM._format_messages_for_provider() for the LiteLLM path
- AnthropicCompletion._format_messages_for_anthropic() for native path

Added 7 unit tests covering:
- Consecutive assistant message merging (LiteLLM and native paths)
- Alternating messages preserved
- List content not merged
- Original messages not mutated
- Non-Anthropic models unaffected
- Consecutive user messages also merged

Fixes #4798

Co-Authored-By: João <joao@crewai.com>
2026-03-10 13:58:36 +00:00
4 changed files with 193 additions and 0 deletions

View File

@@ -2113,6 +2113,12 @@ class LLM(BaseLLM):
if not self.is_anthropic:
return messages # type: ignore[return-value]
# Merge consecutive same-role messages to avoid Anthropic API errors.
# Claude 4.6+ rejects consecutive assistant messages (treated as prefill).
# This can happen when CrewAgentExecutor appends multiple assistant messages
# during tool-use iterations.
messages = self._merge_consecutive_messages(messages)
# Anthropic requires messages to start with 'user' role
if not messages or messages[0]["role"] == "system":
# If first message is system or empty, add a placeholder user message

View File

@@ -581,6 +581,43 @@ class BaseLLM(ABC):
return None
@staticmethod
def _merge_consecutive_messages(
messages: list[LLMMessage],
) -> list[LLMMessage]:
"""Merge consecutive messages that share the same role.
Anthropic (Claude 4.6+) rejects requests containing consecutive
assistant messages, treating the trailing one as an unsupported
prefill. This helper collapses runs of same-role messages into a
single message by joining their text content with double newlines.
Messages whose content is already a list (e.g. tool-use blocks or
multimodal content) are left untouched — only plain-string content
is merged.
Args:
messages: The message list to process.
Returns:
A new list with consecutive same-role messages merged.
"""
if not messages:
return messages
merged: list[LLMMessage] = [messages[0].copy()] # type: ignore[union-attr]
for msg in messages[1:]:
prev = merged[-1]
if (
msg["role"] == prev["role"]
and isinstance(msg.get("content"), str)
and isinstance(prev.get("content"), str)
):
prev["content"] = prev["content"] + "\n\n" + msg["content"]
else:
merged.append(msg.copy()) # type: ignore[union-attr]
return merged
def _format_messages(self, messages: str | list[LLMMessage]) -> list[LLMMessage]:
"""Convert messages to standard format.

View File

@@ -623,6 +623,12 @@ class AnthropicCompletion(BaseLLM):
if pending_tool_results:
formatted_messages.append({"role": "user", "content": pending_tool_results})
# Merge consecutive same-role messages to avoid Anthropic API errors.
# Claude 4.6+ rejects consecutive assistant messages (treated as prefill).
# This can happen when CrewAgentExecutor appends multiple assistant
# messages during tool-use iterations in the ReAct loop.
formatted_messages = self._merge_consecutive_messages(formatted_messages)
# Ensure first message is from user (Anthropic requirement)
if not formatted_messages:
# If no messages, add a default user message

View File

@@ -481,6 +481,150 @@ def test_anthropic_message_formatting(anthropic_llm, system_message, user_messag
anthropic_llm._format_messages_for_anthropic([{"invalid": "message"}])
def test_anthropic_merges_consecutive_assistant_messages_litellm():
"""Test that consecutive assistant messages are merged for Anthropic models via LiteLLM.
Claude 4.6+ rejects consecutive assistant messages (treated as prefill).
This test ensures that _format_messages_for_provider merges them.
Reproduces the scenario described in issue #4798.
"""
llm = LLM(model="anthropic/claude-3-sonnet", is_litellm=True)
messages = [
{"role": "user", "content": "Do something"},
{"role": "assistant", "content": "I will use a tool"},
{"role": "assistant", "content": "Observation: tool result"},
{"role": "assistant", "content": "Now I have the answer"},
]
formatted = llm._format_messages_for_provider(messages)
# All three consecutive assistant messages should be merged into one
assert len(formatted) == 2
assert formatted[0]["role"] == "user"
assert formatted[0]["content"] == "Do something"
assert formatted[1]["role"] == "assistant"
assert "I will use a tool" in formatted[1]["content"]
assert "Observation: tool result" in formatted[1]["content"]
assert "Now I have the answer" in formatted[1]["content"]
def test_anthropic_merges_consecutive_assistant_messages_native():
"""Test that consecutive assistant messages are merged for native Anthropic provider.
Claude 4.6+ rejects consecutive assistant messages (treated as prefill).
This test ensures that _format_messages_for_anthropic merges them.
Reproduces the scenario described in issue #4798.
"""
llm = AnthropicCompletion(model="claude-3-sonnet", is_litellm=False)
messages = [
{"role": "user", "content": "Do something"},
{"role": "assistant", "content": "I will use a tool"},
{"role": "assistant", "content": "Observation: tool result"},
{"role": "assistant", "content": "Now I have the answer"},
]
formatted, system_msg = llm._format_messages_for_anthropic(messages)
# All three consecutive assistant messages should be merged into one
assert len(formatted) == 2
assert formatted[0]["role"] == "user"
assert formatted[0]["content"] == "Do something"
assert formatted[1]["role"] == "assistant"
assert "I will use a tool" in formatted[1]["content"]
assert "Observation: tool result" in formatted[1]["content"]
assert "Now I have the answer" in formatted[1]["content"]
assert system_msg is None
def test_merge_consecutive_messages_preserves_alternating():
"""Test that already-alternating messages are not modified."""
llm = LLM(model="anthropic/claude-3-sonnet", is_litellm=True)
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Hi there"},
{"role": "user", "content": "How are you?"},
{"role": "assistant", "content": "Great!"},
]
formatted = llm._format_messages_for_provider(messages)
assert len(formatted) == 4
assert formatted[0]["role"] == "user"
assert formatted[1]["role"] == "assistant"
assert formatted[2]["role"] == "user"
assert formatted[3]["role"] == "assistant"
def test_merge_consecutive_messages_skips_list_content():
"""Test that messages with list content (e.g. tool-use blocks) are not merged."""
llm = LLM(model="anthropic/claude-3-sonnet", is_litellm=True)
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": [{"type": "tool_use", "id": "1", "name": "test", "input": {}}]},
{"role": "assistant", "content": "text after tool use"},
]
formatted = llm._format_messages_for_provider(messages)
# The list-content assistant message cannot be merged, so we get 3 messages
assert len(formatted) == 3
assert formatted[1]["role"] == "assistant"
assert isinstance(formatted[1]["content"], list)
assert formatted[2]["role"] == "assistant"
assert formatted[2]["content"] == "text after tool use"
def test_merge_consecutive_messages_does_not_mutate_original():
"""Test that merging does not mutate the original message list."""
llm = LLM(model="anthropic/claude-3-sonnet", is_litellm=True)
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Response 1"},
{"role": "assistant", "content": "Response 2"},
]
original_len = len(messages)
original_content = messages[1]["content"]
llm._format_messages_for_provider(messages)
# Original messages should not be modified
assert len(messages) == original_len
assert messages[1]["content"] == original_content
def test_merge_consecutive_messages_non_anthropic_unchanged():
"""Test that non-Anthropic models do not merge consecutive messages."""
llm = LLM(model="gpt-4", is_litellm=True)
messages = [
{"role": "user", "content": "Hello"},
{"role": "assistant", "content": "Response 1"},
{"role": "assistant", "content": "Response 2"},
]
formatted = llm._format_messages_for_provider(messages)
# Non-Anthropic models should not merge
assert len(formatted) == 3
def test_merge_consecutive_user_messages():
"""Test that consecutive user messages are also merged for Anthropic."""
llm = LLM(model="anthropic/claude-3-sonnet", is_litellm=True)
messages = [
{"role": "user", "content": "Part 1"},
{"role": "user", "content": "Part 2"},
{"role": "assistant", "content": "Response"},
]
formatted = llm._format_messages_for_provider(messages)
assert len(formatted) == 2
assert formatted[0]["role"] == "user"
assert "Part 1" in formatted[0]["content"]
assert "Part 2" in formatted[0]["content"]
assert formatted[1]["role"] == "assistant"
def test_deepseek_r1_with_open_router():
if not os.getenv("OPEN_ROUTER_API_KEY"):
pytest.skip("OPEN_ROUTER_API_KEY not set; skipping test.")