fix(bedrock): group consecutive tool results into single user message

When a Bedrock model makes multiple parallel tool calls, the Converse API requires all toolResult blocks to be sent in a single user message. Previously each tool result created a separate user message, causing a ValidationException. This fix detects consecutive tool result messages and appends them to the same user message, producing the expected structure: Message 0: user (prompt) Message 1: assistant (toolUse A, toolUse B) Message 2: user (toolResult A, toolResult B) ← grouped Fixes #4749 Co-Authored-By: João <joao@crewai.com>
2026-05-07 18:19:00 +00:00 · 2026-03-06 15:47:36 +00:00
2 changed files with 211 additions and 12 deletions
--- a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py
@@ -1815,21 +1815,33 @@ class BedrockCompletion(BaseLLM):
            elif role == "tool":
                if not tool_call_id:
                    raise ValueError("Tool message missing required tool_call_id")
-                converse_messages.append(
-                    {
-                        "role": "user",
+                tool_result_block = {
+                    "toolResult": {
+                        "toolUseId": tool_call_id,
                        "content": [
-                            {
-                                "toolResult": {
-                                    "toolUseId": tool_call_id,
-                                    "content": [
-                                        {"text": str(content) if content else ""}
-                                    ],
-                                }
-                            }
+                            {"text": str(content) if content else ""}
                        ],
                    }
-                )
+                }
+                # Group consecutive tool results into a single user message.
+                # Bedrock's Converse API requires all toolResult blocks for a
+                # given assistant response to be in one user message.
+                if (
+                    converse_messages
+                    and converse_messages[-1]["role"] == "user"
+                    and isinstance(converse_messages[-1].get("content"), list)
+                    and converse_messages[-1]["content"]
+                    and isinstance(converse_messages[-1]["content"][0], dict)
+                    and "toolResult" in converse_messages[-1]["content"][0]
+                ):
+                    converse_messages[-1]["content"].append(tool_result_block)
+                else:
+                    converse_messages.append(
+                        {
+                            "role": "user",
+                            "content": [tool_result_block],
+                        }
+                    )
            else:
                # Convert to Converse API format with proper content structure
                if isinstance(content, list):
--- a/lib/crewai/tests/llms/bedrock/test_bedrock.py
+++ b/lib/crewai/tests/llms/bedrock/test_bedrock.py
@@ -777,6 +777,193 @@ def test_bedrock_tool_use_conversation_flow():
        assert mock_converse.call_count == 2


+def test_bedrock_parallel_tool_results_grouped_in_single_message():
+    """
+    Test that multiple consecutive tool result messages are grouped into
+    a single user message for Bedrock's Converse API.
+
+    Bedrock requires all toolResult blocks for a given assistant response
+    to appear in one user message. Without grouping, parallel tool calls
+    cause a ValidationException.
+    """
+    llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0")
+
+    # Simulate: user prompt -> assistant calls two tools -> two tool results
+    test_messages = [
+        {"role": "user", "content": "Calculate 25 + 17 AND 10 * 5"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "call_add",
+                    "function": {
+                        "name": "add_tool",
+                        "arguments": '{"a": 25, "b": 17}',
+                    },
+                },
+                {
+                    "id": "call_mul",
+                    "function": {
+                        "name": "multiply_tool",
+                        "arguments": '{"a": 10, "b": 5}',
+                    },
+                },
+            ],
+        },
+        {"role": "tool", "content": "42", "tool_call_id": "call_add"},
+        {"role": "tool", "content": "50", "tool_call_id": "call_mul"},
+    ]
+
+    formatted_messages, _ = llm._format_messages_for_converse(test_messages)
+
+    # Expected structure:
+    # Message 0: user (prompt)
+    # Message 1: assistant (toolUse A, toolUse B)
+    # Message 2: user (toolResult A, toolResult B) <- grouped!
+    assert len(formatted_messages) == 3, (
+        f"Expected 3 messages (user, assistant, user with grouped tool results), "
+        f"got {len(formatted_messages)}"
+    )
+
+    # Verify the tool results message
+    tool_results_msg = formatted_messages[2]
+    assert tool_results_msg["role"] == "user"
+    assert len(tool_results_msg["content"]) == 2, (
+        f"Expected 2 toolResult blocks in one message, "
+        f"got {len(tool_results_msg['content'])}"
+    )
+
+    # Verify both tool results are present
+    tool_result_ids = [
+        block["toolResult"]["toolUseId"]
+        for block in tool_results_msg["content"]
+    ]
+    assert "call_add" in tool_result_ids
+    assert "call_mul" in tool_result_ids
+
+
+def test_bedrock_three_parallel_tool_results_grouped():
+    """
+    Test that three or more consecutive tool results are all grouped
+    into a single user message.
+    """
+    llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0")
+
+    test_messages = [
+        {"role": "user", "content": "Do three things"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {"id": "call_1", "function": {"name": "tool_a", "arguments": "{}"}},
+                {"id": "call_2", "function": {"name": "tool_b", "arguments": "{}"}},
+                {"id": "call_3", "function": {"name": "tool_c", "arguments": "{}"}},
+            ],
+        },
+        {"role": "tool", "content": "result_1", "tool_call_id": "call_1"},
+        {"role": "tool", "content": "result_2", "tool_call_id": "call_2"},
+        {"role": "tool", "content": "result_3", "tool_call_id": "call_3"},
+    ]
+
+    formatted_messages, _ = llm._format_messages_for_converse(test_messages)
+
+    assert len(formatted_messages) == 3
+    tool_results_msg = formatted_messages[2]
+    assert tool_results_msg["role"] == "user"
+    assert len(tool_results_msg["content"]) == 3, (
+        f"Expected 3 toolResult blocks grouped, got {len(tool_results_msg['content'])}"
+    )
+
+    for block in tool_results_msg["content"]:
+        assert "toolResult" in block
+
+
+def test_bedrock_single_tool_result_still_works():
+    """
+    Test that a single tool result still works correctly after the
+    grouping change (regression test).
+    """
+    llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0")
+
+    test_messages = [
+        {"role": "user", "content": "What is the weather?"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "call_weather",
+                    "function": {
+                        "name": "get_weather",
+                        "arguments": '{"location": "NYC"}',
+                    },
+                },
+            ],
+        },
+        {"role": "tool", "content": "Sunny, 75F", "tool_call_id": "call_weather"},
+    ]
+
+    formatted_messages, _ = llm._format_messages_for_converse(test_messages)
+
+    assert len(formatted_messages) == 3
+    tool_results_msg = formatted_messages[2]
+    assert tool_results_msg["role"] == "user"
+    assert len(tool_results_msg["content"]) == 1
+    assert tool_results_msg["content"][0]["toolResult"]["toolUseId"] == "call_weather"
+
+
+def test_bedrock_multi_turn_tool_results_not_merged_across_turns():
+    """
+    Test that tool results from different assistant turns are NOT merged.
+    Only consecutive tool results (from a single assistant response) should
+    be grouped together.
+    """
+    llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0")
+
+    test_messages = [
+        {"role": "user", "content": "First question"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {"id": "call_1", "function": {"name": "tool_a", "arguments": "{}"}},
+            ],
+        },
+        {"role": "tool", "content": "result_1", "tool_call_id": "call_1"},
+        # Assistant responds with text, then user asks again
+        {"role": "assistant", "content": "Here is the first result."},
+        {"role": "user", "content": "Now do something else"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {"id": "call_2", "function": {"name": "tool_b", "arguments": "{}"}},
+            ],
+        },
+        {"role": "tool", "content": "result_2", "tool_call_id": "call_2"},
+    ]
+
+    formatted_messages, _ = llm._format_messages_for_converse(test_messages)
+
+    # Tool results from different turns should be in separate messages
+    tool_result_messages = [
+        msg for msg in formatted_messages
+        if msg["role"] == "user"
+        and isinstance(msg.get("content"), list)
+        and msg["content"]
+        and isinstance(msg["content"][0], dict)
+        and "toolResult" in msg["content"][0]
+    ]
+    assert len(tool_result_messages) == 2, (
+        f"Expected 2 separate tool result messages (one per turn), "
+        f"got {len(tool_result_messages)}"
+    )
+    # Each should have exactly 1 tool result
+    for msg in tool_result_messages:
+        assert len(msg["content"]) == 1
+
+
 def test_bedrock_handles_cohere_conversation_requirements():
    """
    Test that Bedrock properly handles Cohere model's requirement for user message at end