Fix GPT-5 tool calling format incompatibility

GPT-5 wraps tool arguments in an array format like: [{"arg": "value"}, []] while GPT-4 uses a flat dict format: {"arg": "value"} This commit adds a _coerce_args_dict() helper method that normalizes both formats by unwrapping the array when the first element is a dict and all trailing elements are empty. The fix is applied across all parsing attempts (JSON, Python literal, JSON5, and repaired JSON) to ensure consistent behavior. Tests added to verify: - GPT-5 array-wrapped format is correctly normalized - GPT-4 flat dict format continues to work unchanged - Invalid list formats are properly rejected - Complex nested arguments work with both formats Fixes #3889 Co-Authored-By: João <joao@crewai.com>
2025-12-16 04:18:35 +00:00 · 2025-11-11 12:23:39 +00:00
parent 01f0111d52
commit 1b7f44dc34
3 changed files with 4311 additions and 4029 deletions
--- a/lib/crewai/src/crewai/tools/tool_usage.py
+++ b/lib/crewai/src/crewai/tools/tool_usage.py
@@ -526,6 +526,29 @@ class ToolUsage:
                )
            return self._tool_calling(tool_string)

+    def _coerce_args_dict(self, val: Any) -> dict[str, Any] | None:
+        """Coerce parsed arguments to a dictionary format.
+
+        Handles both GPT-4 format (flat dict) and GPT-5 format (array-wrapped dict).
+        GPT-5 wraps arguments in an array like: [{"arg": "value"}, []]
+        while GPT-4 uses a flat dict: {"arg": "value"}
+
+        Args:
+            val: The parsed value to coerce
+
+        Returns:
+            Dictionary if coercion is successful, None otherwise
+        """
+        if isinstance(val, dict):
+            return val
+
+        if isinstance(val, list) and val and isinstance(val[0], dict):
+            trailing_elements = val[1:]
+            if all(not x for x in trailing_elements):
+                return val[0]
+
+        return None
+
    def _validate_tool_input(self, tool_input: str | None) -> dict[str, Any]:
        if tool_input is None:
            return {}
@@ -538,16 +561,18 @@ class ToolUsage:
        # Attempt 1: Parse as JSON
        try:
            arguments = json.loads(tool_input)
-            if isinstance(arguments, dict):
-                return arguments
+            coerced = self._coerce_args_dict(arguments)
+            if coerced is not None:
+                return coerced
        except (JSONDecodeError, TypeError):
            pass  # Continue to the next parsing attempt

        # Attempt 2: Parse as Python literal
        try:
            arguments = ast.literal_eval(tool_input)
-            if isinstance(arguments, dict):
-                return arguments
+            coerced = self._coerce_args_dict(arguments)
+            if coerced is not None:
+                return coerced
        except (ValueError, SyntaxError):
            repaired_input = repair_json(tool_input)
            # Continue to the next parsing attempt
@@ -555,8 +580,9 @@ class ToolUsage:
        # Attempt 3: Parse as JSON5
        try:
            arguments = json5.loads(tool_input)
-            if isinstance(arguments, dict):
-                return arguments
+            coerced = self._coerce_args_dict(arguments)
+            if coerced is not None:
+                return coerced
        except (JSONDecodeError, ValueError, TypeError):
            pass  # Continue to the next parsing attempt

@@ -567,8 +593,9 @@ class ToolUsage:
                content=f"Repaired JSON: {repaired_input}", color="blue"
            )
            arguments = json.loads(repaired_input)
-            if isinstance(arguments, dict):
-                return arguments
+            coerced = self._coerce_args_dict(arguments)
+            if coerced is not None:
+                return coerced
        except Exception as e:
            error = f"Failed to repair JSON: {e}"
            self._printer.print(content=error, color="red")
--- a/lib/crewai/tests/tools/test_tool_usage_gpt5_args_normalization.py
+++ b/lib/crewai/tests/tools/test_tool_usage_gpt5_args_normalization.py
@@ -0,0 +1,187 @@
+"""Tests for GPT-5 tool calling format normalization.
+
+This module tests the handling of GPT-5's array-wrapped tool arguments format.
+GPT-5 wraps arguments in an array like: [{"arg": "value"}, []]
+while GPT-4 uses a flat dict: {"arg": "value"}
+"""
+
+from unittest.mock import MagicMock
+
+import pytest
+from crewai.tools.tool_usage import ToolUsage
+
+
+def test_validate_tool_input_gpt5_wrapped_format():
+    """Test that GPT-5's array-wrapped format is correctly normalized to a dict."""
+    mock_agent = MagicMock()
+    mock_agent.key = "test_agent_key"
+    mock_agent.role = "test_agent_role"
+    mock_agent._original_role = "test_agent_role"
+    mock_agent.i18n = MagicMock()
+    mock_agent.verbose = False
+
+    mock_action = MagicMock()
+    mock_action.tool = "test_tool"
+    mock_action.tool_input = "test_input"
+
+    tool_usage = ToolUsage(
+        tools_handler=MagicMock(),
+        tools=[],
+        task=MagicMock(),
+        function_calling_llm=None,
+        agent=mock_agent,
+        action=mock_action,
+    )
+
+    tool_input = '[{"responsible_employee_id": null, "include_inactive": false}, []]'
+    expected_arguments = {"responsible_employee_id": None, "include_inactive": False}
+
+    arguments = tool_usage._validate_tool_input(tool_input)
+    assert arguments == expected_arguments
+
+
+def test_validate_tool_input_gpt5_wrapped_format_single_element():
+    """Test GPT-5 format with only the dict element (no trailing empty array)."""
+    mock_agent = MagicMock()
+    mock_agent.key = "test_agent_key"
+    mock_agent.role = "test_agent_role"
+    mock_agent._original_role = "test_agent_role"
+    mock_agent.i18n = MagicMock()
+    mock_agent.verbose = False
+
+    mock_action = MagicMock()
+    mock_action.tool = "test_tool"
+    mock_action.tool_input = "test_input"
+
+    tool_usage = ToolUsage(
+        tools_handler=MagicMock(),
+        tools=[],
+        task=MagicMock(),
+        function_calling_llm=None,
+        agent=mock_agent,
+        action=mock_action,
+    )
+
+    tool_input = '[{"key": "value", "number": 42}]'
+    expected_arguments = {"key": "value", "number": 42}
+
+    arguments = tool_usage._validate_tool_input(tool_input)
+    assert arguments == expected_arguments
+
+
+def test_validate_tool_input_gpt4_dict_format_unchanged():
+    """Test that GPT-4's flat dict format continues to work unchanged."""
+    tool_usage = ToolUsage(
+        tools_handler=MagicMock(),
+        tools=[],
+        task=MagicMock(),
+        function_calling_llm=None,
+        agent=MagicMock(),
+        action=MagicMock(),
+    )
+
+    tool_input = '{"responsible_employee_id": null, "include_inactive": false}'
+    expected_arguments = {"responsible_employee_id": None, "include_inactive": False}
+
+    arguments = tool_usage._validate_tool_input(tool_input)
+    assert arguments == expected_arguments
+
+
+def test_validate_tool_input_gpt5_wrapped_complex_args():
+    """Test GPT-5 format with complex nested arguments."""
+    mock_agent = MagicMock()
+    mock_agent.key = "test_agent_key"
+    mock_agent.role = "test_agent_role"
+    mock_agent._original_role = "test_agent_role"
+    mock_agent.i18n = MagicMock()
+    mock_agent.verbose = False
+
+    mock_action = MagicMock()
+    mock_action.tool = "test_tool"
+    mock_action.tool_input = "test_input"
+
+    tool_usage = ToolUsage(
+        tools_handler=MagicMock(),
+        tools=[],
+        task=MagicMock(),
+        function_calling_llm=None,
+        agent=mock_agent,
+        action=mock_action,
+    )
+
+    tool_input = '[{"user": {"name": "Alice", "age": 30}, "items": [1, 2, 3]}, []]'
+    expected_arguments = {
+        "user": {"name": "Alice", "age": 30},
+        "items": [1, 2, 3],
+    }
+
+    arguments = tool_usage._validate_tool_input(tool_input)
+    assert arguments == expected_arguments
+
+
+def test_validate_tool_input_invalid_list_format():
+    """Test that invalid list formats (non-dict first element) are rejected."""
+    # Create mock agent with proper string values
+    mock_agent = MagicMock()
+    mock_agent.key = "test_agent_key"
+    mock_agent.role = "test_agent_role"
+    mock_agent._original_role = "test_agent_role"
+    mock_agent.i18n = MagicMock()
+    mock_agent.verbose = False
+
+    # Create mock action with proper string value
+    mock_action = MagicMock()
+    mock_action.tool = "test_tool"
+    mock_action.tool_input = "test_input"
+
+    tool_usage = ToolUsage(
+        tools_handler=MagicMock(),
+        tools=[],
+        task=MagicMock(),
+        function_calling_llm=None,
+        agent=mock_agent,
+        action=mock_action,
+    )
+
+    invalid_inputs = [
+        '["string", "values"]',
+        '[1, 2, 3]',
+        '[null, {}]',
+    ]
+
+    for invalid_input in invalid_inputs:
+        with pytest.raises(Exception) as e_info:
+            tool_usage._validate_tool_input(invalid_input)
+        assert (
+            "Tool input must be a valid dictionary in JSON or Python literal format"
+            in str(e_info.value)
+        )
+
+
+def test_validate_tool_input_gpt5_with_multiple_trailing_elements():
+    """Test GPT-5 format with multiple trailing empty elements."""
+    mock_agent = MagicMock()
+    mock_agent.key = "test_agent_key"
+    mock_agent.role = "test_agent_role"
+    mock_agent._original_role = "test_agent_role"
+    mock_agent.i18n = MagicMock()
+    mock_agent.verbose = False
+
+    mock_action = MagicMock()
+    mock_action.tool = "test_tool"
+    mock_action.tool_input = "test_input"
+
+    tool_usage = ToolUsage(
+        tools_handler=MagicMock(),
+        tools=[],
+        task=MagicMock(),
+        function_calling_llm=None,
+        agent=mock_agent,
+        action=mock_action,
+    )
+
+    tool_input = '[{"key": "value"}, [], []]'
+    expected_arguments = {"key": "value"}
+
+    arguments = tool_usage._validate_tool_input(tool_input)
+    assert arguments == expected_arguments
--- a/uv.lock
+++ b/uv.lock