fix(bedrock): group parallel tool results in single user message (#4775)

* fix(bedrock): group parallel tool results in single user message When an AWS Bedrock model makes multiple tool calls in a single response, the Converse API requires all corresponding tool results to be sent back in a single user message. Previously, each tool result was emitted as a separate user message, causing: ValidationException: Expected toolResult blocks at messages.2.content Fix: When processing consecutive tool messages, append the toolResult block to the preceding user message (if it already contains toolResult blocks) instead of creating a new message. This groups all parallel tool results together while keeping tool results from different assistant turns separate. Fixes #4749 Signed-off-by: Giulio Leone <6887247+giulio-leone@users.noreply.github.com> * Update lib/crewai/tests/llms/bedrock/test_bedrock.py * fix: group bedrock tool results Co-authored-by: João Moura <joaomdmoura@gmail.com> --------- Signed-off-by: Giulio Leone <6887247+giulio-leone@users.noreply.github.com> Co-authored-by: Giulio Leone <6887247+giulio-leone@users.noreply.github.com> Co-authored-by: João Moura <joaomdmoura@gmail.com> Co-authored-by: Cursor Agent <cursoragent@cursor.com>
2026-04-30 14:52:36 +00:00 · 2026-03-10 21:28:40 +01:00
parent e72a80be6e
commit 0046f9a96f
2 changed files with 250 additions and 38 deletions
--- a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py
@@ -1781,6 +1781,7 @@ class BedrockCompletion(BaseLLM):

        converse_messages: list[LLMMessage] = []
        system_message: str | None = None
+        pending_tool_results: list[dict[str, Any]] = []

        for message in formatted_messages:
            role = message.get("role")
@@ -1794,53 +1795,56 @@ class BedrockCompletion(BaseLLM):
                    system_message += f"\n\n{content}"
                else:
                    system_message = cast(str, content)
-            elif role == "assistant" and tool_calls:
-                # Convert OpenAI-style tool_calls to Bedrock toolUse format
-                bedrock_content = []
-                for tc in tool_calls:
-                    func = tc.get("function", {})
-                    tool_use_block = {
-                        "toolUse": {
-                            "toolUseId": tc.get("id", f"call_{id(tc)}"),
-                            "name": func.get("name", ""),
-                            "input": func.get("arguments", {})
-                            if isinstance(func.get("arguments"), dict)
-                            else json.loads(func.get("arguments", "{}") or "{}"),
-                        }
-                    }
-                    bedrock_content.append(tool_use_block)
-                converse_messages.append(
-                    {"role": "assistant", "content": bedrock_content}
-                )
            elif role == "tool":
                if not tool_call_id:
                    raise ValueError("Tool message missing required tool_call_id")
-                converse_messages.append(
+                pending_tool_results.append(
                    {
-                        "role": "user",
-                        "content": [
-                            {
-                                "toolResult": {
-                                    "toolUseId": tool_call_id,
-                                    "content": [
-                                        {"text": str(content) if content else ""}
-                                    ],
-                                }
-                            }
-                        ],
+                        "toolResult": {
+                            "toolUseId": tool_call_id,
+                            "content": [{"text": str(content) if content else ""}],
+                        }
                    }
                )
            else:
-                # Convert to Converse API format with proper content structure
-                if isinstance(content, list):
-                    # Already formatted as multimodal content blocks
-                    converse_messages.append({"role": role, "content": content})
-                else:
-                    # String content - wrap in text block
-                    text_content = content if content else ""
+                if pending_tool_results:
                    converse_messages.append(
-                        {"role": role, "content": [{"text": text_content}]}
+                        {"role": "user", "content": pending_tool_results}
                    )
+                    pending_tool_results = []
+
+                if role == "assistant" and tool_calls:
+                    # Convert OpenAI-style tool_calls to Bedrock toolUse format
+                    bedrock_content = []
+                    for tc in tool_calls:
+                        func = tc.get("function", {})
+                        tool_use_block = {
+                            "toolUse": {
+                                "toolUseId": tc.get("id", f"call_{id(tc)}"),
+                                "name": func.get("name", ""),
+                                "input": func.get("arguments", {})
+                                if isinstance(func.get("arguments"), dict)
+                                else json.loads(func.get("arguments", "{}") or "{}"),
+                            }
+                        }
+                        bedrock_content.append(tool_use_block)
+                    converse_messages.append(
+                        {"role": "assistant", "content": bedrock_content}
+                    )
+                else:
+                    # Convert to Converse API format with proper content structure
+                    if isinstance(content, list):
+                        # Already formatted as multimodal content blocks
+                        converse_messages.append({"role": role, "content": content})
+                    else:
+                        # String content - wrap in text block
+                        text_content = content if content else ""
+                        converse_messages.append(
+                            {"role": role, "content": [{"text": text_content}]}
+                        )
+
+        if pending_tool_results:
+            converse_messages.append({"role": "user", "content": pending_tool_results})

        # CRITICAL: Handle model-specific conversation requirements
        # Cohere and some other models require conversation to end with user message
--- a/lib/crewai/tests/llms/bedrock/test_bedrock.py
+++ b/lib/crewai/tests/llms/bedrock/test_bedrock.py
@@ -967,3 +967,211 @@ def test_bedrock_agent_kickoff_structured_output_with_tools():
    assert result.pydantic.result == 42, f"Expected result 42 but got {result.pydantic.result}"
    assert result.pydantic.operation, "Operation should not be empty"
    assert result.pydantic.explanation, "Explanation should not be empty"
+
+
+def test_bedrock_groups_three_tool_results():
+    """Consecutive tool results should be grouped into one Bedrock user message."""
+    llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0")
+
+    messages = [
+        {"role": "user", "content": "Use all three tools, then continue."},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "tool-1",
+                    "type": "function",
+                    "function": {
+                        "name": "lookup_weather",
+                        "arguments": '{"location": "New York"}',
+                    },
+                },
+                {
+                    "id": "tool-2",
+                    "type": "function",
+                    "function": {
+                        "name": "lookup_news",
+                        "arguments": '{"topic": "AI"}',
+                    },
+                },
+                {
+                    "id": "tool-3",
+                    "type": "function",
+                    "function": {
+                        "name": "lookup_stock",
+                        "arguments": '{"ticker": "AMZN"}',
+                    },
+                },
+            ],
+        },
+        {"role": "tool", "tool_call_id": "tool-1", "content": "72F and sunny"},
+        {"role": "tool", "tool_call_id": "tool-2", "content": "AI news summary"},
+        {"role": "tool", "tool_call_id": "tool-3", "content": "AMZN up 1.2%"},
+    ]
+
+    formatted_messages, system_message = llm._format_messages_for_converse(messages)
+
+    assert system_message is None
+    assert [message["role"] for message in formatted_messages] == [
+        "user",
+        "assistant",
+        "user",
+    ]
+    assert len(formatted_messages[1]["content"]) == 3
+
+    tool_results = formatted_messages[2]["content"]
+    assert len(tool_results) == 3
+    assert [block["toolResult"]["toolUseId"] for block in tool_results] == [
+        "tool-1",
+        "tool-2",
+        "tool-3",
+    ]
+    assert [block["toolResult"]["content"][0]["text"] for block in tool_results] == [
+        "72F and sunny",
+        "AI news summary",
+        "AMZN up 1.2%",
+    ]
+
+
+def test_bedrock_parallel_tool_results_grouped():
+    """Regression test for issue #4749.
+
+    When an assistant message contains multiple parallel tool calls,
+    Bedrock requires all corresponding tool results to be grouped
+    in a single user message. Previously each tool result was emitted
+    as a separate user message, causing:
+        ValidationException: Expected toolResult blocks at messages.2.content
+    """
+    llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0")
+
+    messages = [
+        {"role": "user", "content": "Calculate 25 + 17 AND 10 * 5"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "call_add",
+                    "type": "function",
+                    "function": {"name": "add_tool", "arguments": '{"a": 25, "b": 17}'},
+                },
+                {
+                    "id": "call_mul",
+                    "type": "function",
+                    "function": {"name": "multiply_tool", "arguments": '{"a": 10, "b": 5}'},
+                },
+            ],
+        },
+        {"role": "tool", "tool_call_id": "call_add", "content": "42"},
+        {"role": "tool", "tool_call_id": "call_mul", "content": "50"},
+    ]
+
+    converse_msgs, system_msg = llm._format_messages_for_converse(messages)
+
+    # Find the user message that contains toolResult blocks
+    tool_result_messages = [
+        m for m in converse_msgs
+        if m.get("role") == "user"
+        and any("toolResult" in b for b in m.get("content", []))
+    ]
+
+    # There must be exactly ONE user message with tool results (not two)
+    assert len(tool_result_messages) == 1, (
+        f"Expected 1 grouped tool-result message, got {len(tool_result_messages)}. "
+        "Bedrock requires all parallel tool results in a single user message."
+    )
+
+    # That single message must contain both tool results
+    tool_results = tool_result_messages[0]["content"]
+    assert len(tool_results) == 2, (
+        f"Expected 2 toolResult blocks in grouped message, got {len(tool_results)}"
+    )
+
+    # Verify the tool use IDs match
+    tool_use_ids = {
+        block["toolResult"]["toolUseId"] for block in tool_results
+    }
+    assert tool_use_ids == {"call_add", "call_mul"}
+
+
+def test_bedrock_single_tool_result_still_works():
+    """Ensure single tool call still produces a single-block user message."""
+    llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0")
+
+    messages = [
+        {"role": "user", "content": "Add 1 + 2"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "call_single",
+                    "type": "function",
+                    "function": {"name": "add_tool", "arguments": '{"a": 1, "b": 2}'},
+                },
+            ],
+        },
+        {"role": "tool", "tool_call_id": "call_single", "content": "3"},
+    ]
+
+    converse_msgs, _ = llm._format_messages_for_converse(messages)
+
+    tool_result_messages = [
+        m for m in converse_msgs
+        if m.get("role") == "user"
+        and any("toolResult" in b for b in m.get("content", []))
+    ]
+    assert len(tool_result_messages) == 1
+    assert len(tool_result_messages[0]["content"]) == 1
+    assert tool_result_messages[0]["content"][0]["toolResult"]["toolUseId"] == "call_single"
+
+
+def test_bedrock_tool_results_not_merged_across_assistant_messages():
+    """Tool results from different assistant turns must NOT be merged."""
+    llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0")
+
+    messages = [
+        {"role": "user", "content": "First task"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "call_a",
+                    "type": "function",
+                    "function": {"name": "tool_a", "arguments": "{}"},
+                },
+            ],
+        },
+        {"role": "tool", "tool_call_id": "call_a", "content": "result_a"},
+        {"role": "assistant", "content": "Now doing second task"},
+        {"role": "user", "content": "Second task"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "call_b",
+                    "type": "function",
+                    "function": {"name": "tool_b", "arguments": "{}"},
+                },
+            ],
+        },
+        {"role": "tool", "tool_call_id": "call_b", "content": "result_b"},
+    ]
+
+    converse_msgs, _ = llm._format_messages_for_converse(messages)
+
+    tool_result_messages = [
+        m for m in converse_msgs
+        if m.get("role") == "user"
+        and any("toolResult" in b for b in m.get("content", []))
+    ]
+
+    # Two separate tool-result messages (one per assistant turn)
+    assert len(tool_result_messages) == 2, (
+        "Tool results from different assistant turns must remain separate"
+    )
+    assert tool_result_messages[0]["content"][0]["toolResult"]["toolUseId"] == "call_a"
+    assert tool_result_messages[1]["content"][0]["toolResult"]["toolUseId"] == "call_b"