mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 08:08:32 +00:00
Fix GPT-5 tool calling format incompatibility
GPT-5 wraps tool arguments in an array format like:
[{"arg": "value"}, []]
while GPT-4 uses a flat dict format:
{"arg": "value"}
This commit adds a _coerce_args_dict() helper method that normalizes
both formats by unwrapping the array when the first element is a dict
and all trailing elements are empty.
The fix is applied across all parsing attempts (JSON, Python literal,
JSON5, and repaired JSON) to ensure consistent behavior.
Tests added to verify:
- GPT-5 array-wrapped format is correctly normalized
- GPT-4 flat dict format continues to work unchanged
- Invalid list formats are properly rejected
- Complex nested arguments work with both formats
Fixes #3889
Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -526,6 +526,29 @@ class ToolUsage:
|
|||||||
)
|
)
|
||||||
return self._tool_calling(tool_string)
|
return self._tool_calling(tool_string)
|
||||||
|
|
||||||
|
def _coerce_args_dict(self, val: Any) -> dict[str, Any] | None:
|
||||||
|
"""Coerce parsed arguments to a dictionary format.
|
||||||
|
|
||||||
|
Handles both GPT-4 format (flat dict) and GPT-5 format (array-wrapped dict).
|
||||||
|
GPT-5 wraps arguments in an array like: [{"arg": "value"}, []]
|
||||||
|
while GPT-4 uses a flat dict: {"arg": "value"}
|
||||||
|
|
||||||
|
Args:
|
||||||
|
val: The parsed value to coerce
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary if coercion is successful, None otherwise
|
||||||
|
"""
|
||||||
|
if isinstance(val, dict):
|
||||||
|
return val
|
||||||
|
|
||||||
|
if isinstance(val, list) and val and isinstance(val[0], dict):
|
||||||
|
trailing_elements = val[1:]
|
||||||
|
if all(not x for x in trailing_elements):
|
||||||
|
return val[0]
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
def _validate_tool_input(self, tool_input: str | None) -> dict[str, Any]:
|
def _validate_tool_input(self, tool_input: str | None) -> dict[str, Any]:
|
||||||
if tool_input is None:
|
if tool_input is None:
|
||||||
return {}
|
return {}
|
||||||
@@ -538,16 +561,18 @@ class ToolUsage:
|
|||||||
# Attempt 1: Parse as JSON
|
# Attempt 1: Parse as JSON
|
||||||
try:
|
try:
|
||||||
arguments = json.loads(tool_input)
|
arguments = json.loads(tool_input)
|
||||||
if isinstance(arguments, dict):
|
coerced = self._coerce_args_dict(arguments)
|
||||||
return arguments
|
if coerced is not None:
|
||||||
|
return coerced
|
||||||
except (JSONDecodeError, TypeError):
|
except (JSONDecodeError, TypeError):
|
||||||
pass # Continue to the next parsing attempt
|
pass # Continue to the next parsing attempt
|
||||||
|
|
||||||
# Attempt 2: Parse as Python literal
|
# Attempt 2: Parse as Python literal
|
||||||
try:
|
try:
|
||||||
arguments = ast.literal_eval(tool_input)
|
arguments = ast.literal_eval(tool_input)
|
||||||
if isinstance(arguments, dict):
|
coerced = self._coerce_args_dict(arguments)
|
||||||
return arguments
|
if coerced is not None:
|
||||||
|
return coerced
|
||||||
except (ValueError, SyntaxError):
|
except (ValueError, SyntaxError):
|
||||||
repaired_input = repair_json(tool_input)
|
repaired_input = repair_json(tool_input)
|
||||||
# Continue to the next parsing attempt
|
# Continue to the next parsing attempt
|
||||||
@@ -555,8 +580,9 @@ class ToolUsage:
|
|||||||
# Attempt 3: Parse as JSON5
|
# Attempt 3: Parse as JSON5
|
||||||
try:
|
try:
|
||||||
arguments = json5.loads(tool_input)
|
arguments = json5.loads(tool_input)
|
||||||
if isinstance(arguments, dict):
|
coerced = self._coerce_args_dict(arguments)
|
||||||
return arguments
|
if coerced is not None:
|
||||||
|
return coerced
|
||||||
except (JSONDecodeError, ValueError, TypeError):
|
except (JSONDecodeError, ValueError, TypeError):
|
||||||
pass # Continue to the next parsing attempt
|
pass # Continue to the next parsing attempt
|
||||||
|
|
||||||
@@ -567,8 +593,9 @@ class ToolUsage:
|
|||||||
content=f"Repaired JSON: {repaired_input}", color="blue"
|
content=f"Repaired JSON: {repaired_input}", color="blue"
|
||||||
)
|
)
|
||||||
arguments = json.loads(repaired_input)
|
arguments = json.loads(repaired_input)
|
||||||
if isinstance(arguments, dict):
|
coerced = self._coerce_args_dict(arguments)
|
||||||
return arguments
|
if coerced is not None:
|
||||||
|
return coerced
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error = f"Failed to repair JSON: {e}"
|
error = f"Failed to repair JSON: {e}"
|
||||||
self._printer.print(content=error, color="red")
|
self._printer.print(content=error, color="red")
|
||||||
|
|||||||
@@ -0,0 +1,187 @@
|
|||||||
|
"""Tests for GPT-5 tool calling format normalization.
|
||||||
|
|
||||||
|
This module tests the handling of GPT-5's array-wrapped tool arguments format.
|
||||||
|
GPT-5 wraps arguments in an array like: [{"arg": "value"}, []]
|
||||||
|
while GPT-4 uses a flat dict: {"arg": "value"}
|
||||||
|
"""
|
||||||
|
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from crewai.tools.tool_usage import ToolUsage
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_tool_input_gpt5_wrapped_format():
|
||||||
|
"""Test that GPT-5's array-wrapped format is correctly normalized to a dict."""
|
||||||
|
mock_agent = MagicMock()
|
||||||
|
mock_agent.key = "test_agent_key"
|
||||||
|
mock_agent.role = "test_agent_role"
|
||||||
|
mock_agent._original_role = "test_agent_role"
|
||||||
|
mock_agent.i18n = MagicMock()
|
||||||
|
mock_agent.verbose = False
|
||||||
|
|
||||||
|
mock_action = MagicMock()
|
||||||
|
mock_action.tool = "test_tool"
|
||||||
|
mock_action.tool_input = "test_input"
|
||||||
|
|
||||||
|
tool_usage = ToolUsage(
|
||||||
|
tools_handler=MagicMock(),
|
||||||
|
tools=[],
|
||||||
|
task=MagicMock(),
|
||||||
|
function_calling_llm=None,
|
||||||
|
agent=mock_agent,
|
||||||
|
action=mock_action,
|
||||||
|
)
|
||||||
|
|
||||||
|
tool_input = '[{"responsible_employee_id": null, "include_inactive": false}, []]'
|
||||||
|
expected_arguments = {"responsible_employee_id": None, "include_inactive": False}
|
||||||
|
|
||||||
|
arguments = tool_usage._validate_tool_input(tool_input)
|
||||||
|
assert arguments == expected_arguments
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_tool_input_gpt5_wrapped_format_single_element():
|
||||||
|
"""Test GPT-5 format with only the dict element (no trailing empty array)."""
|
||||||
|
mock_agent = MagicMock()
|
||||||
|
mock_agent.key = "test_agent_key"
|
||||||
|
mock_agent.role = "test_agent_role"
|
||||||
|
mock_agent._original_role = "test_agent_role"
|
||||||
|
mock_agent.i18n = MagicMock()
|
||||||
|
mock_agent.verbose = False
|
||||||
|
|
||||||
|
mock_action = MagicMock()
|
||||||
|
mock_action.tool = "test_tool"
|
||||||
|
mock_action.tool_input = "test_input"
|
||||||
|
|
||||||
|
tool_usage = ToolUsage(
|
||||||
|
tools_handler=MagicMock(),
|
||||||
|
tools=[],
|
||||||
|
task=MagicMock(),
|
||||||
|
function_calling_llm=None,
|
||||||
|
agent=mock_agent,
|
||||||
|
action=mock_action,
|
||||||
|
)
|
||||||
|
|
||||||
|
tool_input = '[{"key": "value", "number": 42}]'
|
||||||
|
expected_arguments = {"key": "value", "number": 42}
|
||||||
|
|
||||||
|
arguments = tool_usage._validate_tool_input(tool_input)
|
||||||
|
assert arguments == expected_arguments
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_tool_input_gpt4_dict_format_unchanged():
|
||||||
|
"""Test that GPT-4's flat dict format continues to work unchanged."""
|
||||||
|
tool_usage = ToolUsage(
|
||||||
|
tools_handler=MagicMock(),
|
||||||
|
tools=[],
|
||||||
|
task=MagicMock(),
|
||||||
|
function_calling_llm=None,
|
||||||
|
agent=MagicMock(),
|
||||||
|
action=MagicMock(),
|
||||||
|
)
|
||||||
|
|
||||||
|
tool_input = '{"responsible_employee_id": null, "include_inactive": false}'
|
||||||
|
expected_arguments = {"responsible_employee_id": None, "include_inactive": False}
|
||||||
|
|
||||||
|
arguments = tool_usage._validate_tool_input(tool_input)
|
||||||
|
assert arguments == expected_arguments
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_tool_input_gpt5_wrapped_complex_args():
|
||||||
|
"""Test GPT-5 format with complex nested arguments."""
|
||||||
|
mock_agent = MagicMock()
|
||||||
|
mock_agent.key = "test_agent_key"
|
||||||
|
mock_agent.role = "test_agent_role"
|
||||||
|
mock_agent._original_role = "test_agent_role"
|
||||||
|
mock_agent.i18n = MagicMock()
|
||||||
|
mock_agent.verbose = False
|
||||||
|
|
||||||
|
mock_action = MagicMock()
|
||||||
|
mock_action.tool = "test_tool"
|
||||||
|
mock_action.tool_input = "test_input"
|
||||||
|
|
||||||
|
tool_usage = ToolUsage(
|
||||||
|
tools_handler=MagicMock(),
|
||||||
|
tools=[],
|
||||||
|
task=MagicMock(),
|
||||||
|
function_calling_llm=None,
|
||||||
|
agent=mock_agent,
|
||||||
|
action=mock_action,
|
||||||
|
)
|
||||||
|
|
||||||
|
tool_input = '[{"user": {"name": "Alice", "age": 30}, "items": [1, 2, 3]}, []]'
|
||||||
|
expected_arguments = {
|
||||||
|
"user": {"name": "Alice", "age": 30},
|
||||||
|
"items": [1, 2, 3],
|
||||||
|
}
|
||||||
|
|
||||||
|
arguments = tool_usage._validate_tool_input(tool_input)
|
||||||
|
assert arguments == expected_arguments
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_tool_input_invalid_list_format():
|
||||||
|
"""Test that invalid list formats (non-dict first element) are rejected."""
|
||||||
|
# Create mock agent with proper string values
|
||||||
|
mock_agent = MagicMock()
|
||||||
|
mock_agent.key = "test_agent_key"
|
||||||
|
mock_agent.role = "test_agent_role"
|
||||||
|
mock_agent._original_role = "test_agent_role"
|
||||||
|
mock_agent.i18n = MagicMock()
|
||||||
|
mock_agent.verbose = False
|
||||||
|
|
||||||
|
# Create mock action with proper string value
|
||||||
|
mock_action = MagicMock()
|
||||||
|
mock_action.tool = "test_tool"
|
||||||
|
mock_action.tool_input = "test_input"
|
||||||
|
|
||||||
|
tool_usage = ToolUsage(
|
||||||
|
tools_handler=MagicMock(),
|
||||||
|
tools=[],
|
||||||
|
task=MagicMock(),
|
||||||
|
function_calling_llm=None,
|
||||||
|
agent=mock_agent,
|
||||||
|
action=mock_action,
|
||||||
|
)
|
||||||
|
|
||||||
|
invalid_inputs = [
|
||||||
|
'["string", "values"]',
|
||||||
|
'[1, 2, 3]',
|
||||||
|
'[null, {}]',
|
||||||
|
]
|
||||||
|
|
||||||
|
for invalid_input in invalid_inputs:
|
||||||
|
with pytest.raises(Exception) as e_info:
|
||||||
|
tool_usage._validate_tool_input(invalid_input)
|
||||||
|
assert (
|
||||||
|
"Tool input must be a valid dictionary in JSON or Python literal format"
|
||||||
|
in str(e_info.value)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_validate_tool_input_gpt5_with_multiple_trailing_elements():
|
||||||
|
"""Test GPT-5 format with multiple trailing empty elements."""
|
||||||
|
mock_agent = MagicMock()
|
||||||
|
mock_agent.key = "test_agent_key"
|
||||||
|
mock_agent.role = "test_agent_role"
|
||||||
|
mock_agent._original_role = "test_agent_role"
|
||||||
|
mock_agent.i18n = MagicMock()
|
||||||
|
mock_agent.verbose = False
|
||||||
|
|
||||||
|
mock_action = MagicMock()
|
||||||
|
mock_action.tool = "test_tool"
|
||||||
|
mock_action.tool_input = "test_input"
|
||||||
|
|
||||||
|
tool_usage = ToolUsage(
|
||||||
|
tools_handler=MagicMock(),
|
||||||
|
tools=[],
|
||||||
|
task=MagicMock(),
|
||||||
|
function_calling_llm=None,
|
||||||
|
agent=mock_agent,
|
||||||
|
action=mock_action,
|
||||||
|
)
|
||||||
|
|
||||||
|
tool_input = '[{"key": "value"}, [], []]'
|
||||||
|
expected_arguments = {"key": "value"}
|
||||||
|
|
||||||
|
arguments = tool_usage._validate_tool_input(tool_input)
|
||||||
|
assert arguments == expected_arguments
|
||||||
Reference in New Issue
Block a user