diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index 8a4ac2edd..6ee33494f 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -2113,6 +2113,12 @@ class LLM(BaseLLM): if not self.is_anthropic: return messages # type: ignore[return-value] + # Merge consecutive same-role messages to avoid Anthropic API errors. + # Claude 4.6+ rejects consecutive assistant messages (treated as prefill). + # This can happen when CrewAgentExecutor appends multiple assistant messages + # during tool-use iterations. + messages = self._merge_consecutive_messages(messages) + # Anthropic requires messages to start with 'user' role if not messages or messages[0]["role"] == "system": # If first message is system or empty, add a placeholder user message diff --git a/lib/crewai/src/crewai/llms/base_llm.py b/lib/crewai/src/crewai/llms/base_llm.py index 1ab710706..6965ef8aa 100644 --- a/lib/crewai/src/crewai/llms/base_llm.py +++ b/lib/crewai/src/crewai/llms/base_llm.py @@ -581,6 +581,43 @@ class BaseLLM(ABC): return None + @staticmethod + def _merge_consecutive_messages( + messages: list[LLMMessage], + ) -> list[LLMMessage]: + """Merge consecutive messages that share the same role. + + Anthropic (Claude 4.6+) rejects requests containing consecutive + assistant messages, treating the trailing one as an unsupported + prefill. This helper collapses runs of same-role messages into a + single message by joining their text content with double newlines. + + Messages whose content is already a list (e.g. tool-use blocks or + multimodal content) are left untouched — only plain-string content + is merged. + + Args: + messages: The message list to process. + + Returns: + A new list with consecutive same-role messages merged. + """ + if not messages: + return messages + + merged: list[LLMMessage] = [messages[0].copy()] # type: ignore[union-attr] + for msg in messages[1:]: + prev = merged[-1] + if ( + msg["role"] == prev["role"] + and isinstance(msg.get("content"), str) + and isinstance(prev.get("content"), str) + ): + prev["content"] = prev["content"] + "\n\n" + msg["content"] + else: + merged.append(msg.copy()) # type: ignore[union-attr] + return merged + def _format_messages(self, messages: str | list[LLMMessage]) -> list[LLMMessage]: """Convert messages to standard format. diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index f7cb76471..22263121e 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -623,6 +623,12 @@ class AnthropicCompletion(BaseLLM): if pending_tool_results: formatted_messages.append({"role": "user", "content": pending_tool_results}) + # Merge consecutive same-role messages to avoid Anthropic API errors. + # Claude 4.6+ rejects consecutive assistant messages (treated as prefill). + # This can happen when CrewAgentExecutor appends multiple assistant + # messages during tool-use iterations in the ReAct loop. + formatted_messages = self._merge_consecutive_messages(formatted_messages) + # Ensure first message is from user (Anthropic requirement) if not formatted_messages: # If no messages, add a default user message diff --git a/lib/crewai/tests/test_llm.py b/lib/crewai/tests/test_llm.py index 71cb69790..6f13814b9 100644 --- a/lib/crewai/tests/test_llm.py +++ b/lib/crewai/tests/test_llm.py @@ -481,6 +481,150 @@ def test_anthropic_message_formatting(anthropic_llm, system_message, user_messag anthropic_llm._format_messages_for_anthropic([{"invalid": "message"}]) +def test_anthropic_merges_consecutive_assistant_messages_litellm(): + """Test that consecutive assistant messages are merged for Anthropic models via LiteLLM. + + Claude 4.6+ rejects consecutive assistant messages (treated as prefill). + This test ensures that _format_messages_for_provider merges them. + Reproduces the scenario described in issue #4798. + """ + llm = LLM(model="anthropic/claude-3-sonnet", is_litellm=True) + messages = [ + {"role": "user", "content": "Do something"}, + {"role": "assistant", "content": "I will use a tool"}, + {"role": "assistant", "content": "Observation: tool result"}, + {"role": "assistant", "content": "Now I have the answer"}, + ] + + formatted = llm._format_messages_for_provider(messages) + + # All three consecutive assistant messages should be merged into one + assert len(formatted) == 2 + assert formatted[0]["role"] == "user" + assert formatted[0]["content"] == "Do something" + assert formatted[1]["role"] == "assistant" + assert "I will use a tool" in formatted[1]["content"] + assert "Observation: tool result" in formatted[1]["content"] + assert "Now I have the answer" in formatted[1]["content"] + + +def test_anthropic_merges_consecutive_assistant_messages_native(): + """Test that consecutive assistant messages are merged for native Anthropic provider. + + Claude 4.6+ rejects consecutive assistant messages (treated as prefill). + This test ensures that _format_messages_for_anthropic merges them. + Reproduces the scenario described in issue #4798. + """ + llm = AnthropicCompletion(model="claude-3-sonnet", is_litellm=False) + messages = [ + {"role": "user", "content": "Do something"}, + {"role": "assistant", "content": "I will use a tool"}, + {"role": "assistant", "content": "Observation: tool result"}, + {"role": "assistant", "content": "Now I have the answer"}, + ] + + formatted, system_msg = llm._format_messages_for_anthropic(messages) + + # All three consecutive assistant messages should be merged into one + assert len(formatted) == 2 + assert formatted[0]["role"] == "user" + assert formatted[0]["content"] == "Do something" + assert formatted[1]["role"] == "assistant" + assert "I will use a tool" in formatted[1]["content"] + assert "Observation: tool result" in formatted[1]["content"] + assert "Now I have the answer" in formatted[1]["content"] + assert system_msg is None + + +def test_merge_consecutive_messages_preserves_alternating(): + """Test that already-alternating messages are not modified.""" + llm = LLM(model="anthropic/claude-3-sonnet", is_litellm=True) + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi there"}, + {"role": "user", "content": "How are you?"}, + {"role": "assistant", "content": "Great!"}, + ] + + formatted = llm._format_messages_for_provider(messages) + + assert len(formatted) == 4 + assert formatted[0]["role"] == "user" + assert formatted[1]["role"] == "assistant" + assert formatted[2]["role"] == "user" + assert formatted[3]["role"] == "assistant" + + +def test_merge_consecutive_messages_skips_list_content(): + """Test that messages with list content (e.g. tool-use blocks) are not merged.""" + llm = LLM(model="anthropic/claude-3-sonnet", is_litellm=True) + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": [{"type": "tool_use", "id": "1", "name": "test", "input": {}}]}, + {"role": "assistant", "content": "text after tool use"}, + ] + + formatted = llm._format_messages_for_provider(messages) + + # The list-content assistant message cannot be merged, so we get 3 messages + assert len(formatted) == 3 + assert formatted[1]["role"] == "assistant" + assert isinstance(formatted[1]["content"], list) + assert formatted[2]["role"] == "assistant" + assert formatted[2]["content"] == "text after tool use" + + +def test_merge_consecutive_messages_does_not_mutate_original(): + """Test that merging does not mutate the original message list.""" + llm = LLM(model="anthropic/claude-3-sonnet", is_litellm=True) + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Response 1"}, + {"role": "assistant", "content": "Response 2"}, + ] + original_len = len(messages) + original_content = messages[1]["content"] + + llm._format_messages_for_provider(messages) + + # Original messages should not be modified + assert len(messages) == original_len + assert messages[1]["content"] == original_content + + +def test_merge_consecutive_messages_non_anthropic_unchanged(): + """Test that non-Anthropic models do not merge consecutive messages.""" + llm = LLM(model="gpt-4", is_litellm=True) + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Response 1"}, + {"role": "assistant", "content": "Response 2"}, + ] + + formatted = llm._format_messages_for_provider(messages) + + # Non-Anthropic models should not merge + assert len(formatted) == 3 + + +def test_merge_consecutive_user_messages(): + """Test that consecutive user messages are also merged for Anthropic.""" + llm = LLM(model="anthropic/claude-3-sonnet", is_litellm=True) + messages = [ + {"role": "user", "content": "Part 1"}, + {"role": "user", "content": "Part 2"}, + {"role": "assistant", "content": "Response"}, + ] + + formatted = llm._format_messages_for_provider(messages) + + assert len(formatted) == 2 + assert formatted[0]["role"] == "user" + assert "Part 1" in formatted[0]["content"] + assert "Part 2" in formatted[0]["content"] + assert formatted[1]["role"] == "assistant" + + def test_deepseek_r1_with_open_router(): if not os.getenv("OPEN_ROUTER_API_KEY"): pytest.skip("OPEN_ROUTER_API_KEY not set; skipping test.")