mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 00:28:31 +00:00
Enhance event handling for tool usage and agent execution
- Add new events for tool usage: ToolSelectionErrorEvent, ToolValidateInputErrorEvent - Improve error tracking and event emission in ToolUsage and LLM classes - Update AgentExecutionStartedEvent to use task_prompt instead of inputs - Add comprehensive test coverage for new event types and error scenarios
This commit is contained in:
112
tests/cassettes/test_tool_execution_error_event.yaml
Normal file
112
tests/cassettes/test_tool_execution_error_event.yaml
Normal file
@@ -0,0 +1,112 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages": [{"role": "user", "content": "Use the failing tool"}], "model":
|
||||
"gpt-4o-mini", "stop": [], "tools": [{"type": "function", "function": {"name":
|
||||
"failing_tool", "description": "This tool always fails.", "parameters": {"type":
|
||||
"object", "properties": {"param": {"type": "string", "description": "A test
|
||||
parameter"}}, "required": ["param"]}}}]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '353'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.61.0
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.61.0
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.12.8
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
content: "{\n \"id\": \"chatcmpl-B2P4zoJZuES7Aom8ugEq1modz5Vsl\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1739912761,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n
|
||||
\ \"id\": \"call_F6fJxISpMKUBIGV6dd2vjRNG\",\n \"type\":
|
||||
\"function\",\n \"function\": {\n \"name\": \"failing_tool\",\n
|
||||
\ \"arguments\": \"{\\\"param\\\":\\\"test\\\"}\"\n }\n
|
||||
\ }\n ],\n \"refusal\": null\n },\n \"logprobs\":
|
||||
null,\n \"finish_reason\": \"tool_calls\"\n }\n ],\n \"usage\": {\n
|
||||
\ \"prompt_tokens\": 51,\n \"completion_tokens\": 15,\n \"total_tokens\":
|
||||
66,\n \"prompt_tokens_details\": {\n \"cached_tokens\": 0,\n \"audio_tokens\":
|
||||
0\n },\n \"completion_tokens_details\": {\n \"reasoning_tokens\":
|
||||
0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": 0,\n \"rejected_prediction_tokens\":
|
||||
0\n }\n },\n \"service_tier\": \"default\",\n \"system_fingerprint\":
|
||||
\"fp_00428b782a\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 9140fa827f38eb1e-SJC
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Tue, 18 Feb 2025 21:06:02 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Set-Cookie:
|
||||
- __cf_bm=xbuu3IQpCMh.43ZrqL1TRMECOc6QldgHV0hzOX1GrWI-1739912762-1.0.1.1-t7iyq5xMioPrwfeaHLvPT9rwRPp7Q9A9uIm69icH9dPxRD4xMA3cWqb1aXj1_e2IyAEQQWFe1UWjlmJ22aHh3Q;
|
||||
path=/; expires=Tue, 18-Feb-25 21:36:02 GMT; domain=.api.openai.com; HttpOnly;
|
||||
Secure; SameSite=None
|
||||
- _cfuvid=x9l.Rhja8_wXDN.j8qcEU1PvvEqAwZp4Fd3s_aj4qwM-1739912762161-0.0.1.1-604800000;
|
||||
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
access-control-expose-headers:
|
||||
- X-Request-ID
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '861'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '30000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '150000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '29999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '149999978'
|
||||
x-ratelimit-reset-requests:
|
||||
- 2ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_8666ec3aa6677cb346ba00993556051d
|
||||
http_version: HTTP/1.1
|
||||
status_code: 200
|
||||
version: 1
|
||||
@@ -7,7 +7,8 @@ from pydantic import BaseModel
|
||||
|
||||
from crewai.agents.agent_builder.utilities.base_token_process import TokenProcess
|
||||
from crewai.llm import LLM
|
||||
from crewai.tools import tool
|
||||
from crewai.utilities.events import crewai_event_bus
|
||||
from crewai.utilities.events.tool_usage_events import ToolExecutionErrorEvent
|
||||
from crewai.utilities.token_counter_callback import TokenCalcHandler
|
||||
|
||||
|
||||
@@ -291,32 +292,36 @@ def anthropic_llm():
|
||||
"""Fixture providing an Anthropic LLM instance."""
|
||||
return LLM(model="anthropic/claude-3-sonnet")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def system_message():
|
||||
"""Fixture providing a system message."""
|
||||
return {"role": "system", "content": "test"}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def user_message():
|
||||
"""Fixture providing a user message."""
|
||||
return {"role": "user", "content": "test"}
|
||||
|
||||
|
||||
def test_anthropic_message_formatting_edge_cases(anthropic_llm):
|
||||
"""Test edge cases for Anthropic message formatting."""
|
||||
# Test None messages
|
||||
with pytest.raises(TypeError, match="Messages cannot be None"):
|
||||
anthropic_llm._format_messages_for_provider(None)
|
||||
|
||||
|
||||
# Test empty message list
|
||||
formatted = anthropic_llm._format_messages_for_provider([])
|
||||
assert len(formatted) == 1
|
||||
assert formatted[0]["role"] == "user"
|
||||
assert formatted[0]["content"] == "."
|
||||
|
||||
|
||||
# Test invalid message format
|
||||
with pytest.raises(TypeError, match="Invalid message format"):
|
||||
anthropic_llm._format_messages_for_provider([{"invalid": "message"}])
|
||||
|
||||
|
||||
def test_anthropic_model_detection():
|
||||
"""Test Anthropic model detection with various formats."""
|
||||
models = [
|
||||
@@ -327,11 +332,12 @@ def test_anthropic_model_detection():
|
||||
("", False),
|
||||
("anthropomorphic", False), # Should not match partial words
|
||||
]
|
||||
|
||||
|
||||
for model, expected in models:
|
||||
llm = LLM(model=model)
|
||||
assert llm.is_anthropic == expected, f"Failed for model: {model}"
|
||||
|
||||
|
||||
def test_anthropic_message_formatting(anthropic_llm, system_message, user_message):
|
||||
"""Test Anthropic message formatting with fixtures."""
|
||||
# Test when first message is system
|
||||
@@ -371,3 +377,51 @@ def test_deepseek_r1_with_open_router():
|
||||
result = llm.call("What is the capital of France?")
|
||||
assert isinstance(result, str)
|
||||
assert "Paris" in result
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_tool_execution_error_event():
|
||||
llm = LLM(model="gpt-4o-mini")
|
||||
|
||||
def failing_tool(param: str) -> str:
|
||||
"""This tool always fails."""
|
||||
raise Exception("Tool execution failed!")
|
||||
|
||||
tool_schema = {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "failing_tool",
|
||||
"description": "This tool always fails.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"param": {"type": "string", "description": "A test parameter"}
|
||||
},
|
||||
"required": ["param"],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
received_events = []
|
||||
|
||||
@crewai_event_bus.on(ToolExecutionErrorEvent)
|
||||
def event_handler(source, event):
|
||||
received_events.append(event)
|
||||
|
||||
available_functions = {"failing_tool": failing_tool}
|
||||
|
||||
messages = [{"role": "user", "content": "Use the failing tool"}]
|
||||
|
||||
llm.call(
|
||||
messages,
|
||||
tools=[tool_schema],
|
||||
available_functions=available_functions,
|
||||
)
|
||||
|
||||
assert len(received_events) == 1
|
||||
event = received_events[0]
|
||||
assert isinstance(event, ToolExecutionErrorEvent)
|
||||
assert event.tool_name == "failing_tool"
|
||||
assert event.tool_args == {"param": "test"}
|
||||
assert event.tool_class == failing_tool
|
||||
assert "Tool execution failed!" in event.error
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import json
|
||||
import random
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel, Field
|
||||
@@ -8,6 +8,11 @@ from pydantic import BaseModel, Field
|
||||
from crewai import Agent, Task
|
||||
from crewai.tools import BaseTool
|
||||
from crewai.tools.tool_usage import ToolUsage
|
||||
from crewai.utilities.events import crewai_event_bus
|
||||
from crewai.utilities.events.tool_usage_events import (
|
||||
ToolSelectionErrorEvent,
|
||||
ToolValidateInputErrorEvent,
|
||||
)
|
||||
|
||||
|
||||
class RandomNumberToolInput(BaseModel):
|
||||
@@ -226,7 +231,7 @@ def test_validate_tool_input_with_special_characters():
|
||||
)
|
||||
|
||||
# Input with special characters
|
||||
tool_input = '{"message": "Hello, world! \u263A", "valid": True}'
|
||||
tool_input = '{"message": "Hello, world! \u263a", "valid": True}'
|
||||
expected_arguments = {"message": "Hello, world! ☺", "valid": True}
|
||||
|
||||
arguments = tool_usage._validate_tool_input(tool_input)
|
||||
@@ -468,18 +473,141 @@ def test_validate_tool_input_large_json_content():
|
||||
assert arguments == expected_arguments
|
||||
|
||||
|
||||
def test_validate_tool_input_none_input():
|
||||
def test_tool_selection_error_event_direct():
|
||||
"""Test tool selection error event emission directly from ToolUsage class."""
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.key = "test_key"
|
||||
mock_agent.role = "test_role"
|
||||
mock_agent.i18n = MagicMock()
|
||||
mock_agent.verbose = False
|
||||
|
||||
mock_task = MagicMock()
|
||||
mock_tools_handler = MagicMock()
|
||||
|
||||
class TestTool(BaseTool):
|
||||
name: str = "Test Tool"
|
||||
description: str = "A test tool"
|
||||
|
||||
def _run(self, input: dict) -> str:
|
||||
return "test result"
|
||||
|
||||
test_tool = TestTool()
|
||||
|
||||
tool_usage = ToolUsage(
|
||||
tools_handler=MagicMock(),
|
||||
tools=[],
|
||||
original_tools=[],
|
||||
tools_description="",
|
||||
tools_names="",
|
||||
task=MagicMock(),
|
||||
tools_handler=mock_tools_handler,
|
||||
tools=[test_tool],
|
||||
original_tools=[test_tool],
|
||||
tools_description="Test Tool Description",
|
||||
tools_names="Test Tool",
|
||||
task=mock_task,
|
||||
function_calling_llm=None,
|
||||
agent=MagicMock(),
|
||||
agent=mock_agent,
|
||||
action=MagicMock(),
|
||||
)
|
||||
|
||||
arguments = tool_usage._validate_tool_input(None)
|
||||
assert arguments == {} # Expecting an empty dictionary
|
||||
received_events = []
|
||||
|
||||
@crewai_event_bus.on(ToolSelectionErrorEvent)
|
||||
def event_handler(source, event):
|
||||
received_events.append(event)
|
||||
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
tool_usage._select_tool("Non Existent Tool")
|
||||
assert len(received_events) == 1
|
||||
event = received_events[0]
|
||||
assert isinstance(event, ToolSelectionErrorEvent)
|
||||
assert event.agent_key == "test_key"
|
||||
assert event.agent_role == "test_role"
|
||||
assert event.tool_name == "Non Existent Tool"
|
||||
assert event.tool_args == {}
|
||||
assert event.tool_class == "Test Tool Description"
|
||||
assert "don't exist" in event.error
|
||||
|
||||
received_events.clear()
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
tool_usage._select_tool("")
|
||||
|
||||
assert len(received_events) == 1
|
||||
event = received_events[0]
|
||||
assert isinstance(event, ToolSelectionErrorEvent)
|
||||
assert event.agent_key == "test_key"
|
||||
assert event.agent_role == "test_role"
|
||||
assert event.tool_name == ""
|
||||
assert event.tool_args == {}
|
||||
assert event.tool_class == "Test Tool Description"
|
||||
assert "forgot the Action name" in event.error
|
||||
|
||||
|
||||
def test_tool_validate_input_error_event():
|
||||
"""Test tool validation input error event emission from ToolUsage class."""
|
||||
# Mock agent and required components
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.key = "test_key"
|
||||
mock_agent.role = "test_role"
|
||||
mock_agent.verbose = False
|
||||
mock_agent._original_role = "test_role"
|
||||
|
||||
# Mock i18n with error message
|
||||
mock_i18n = MagicMock()
|
||||
mock_i18n.errors.return_value = (
|
||||
"Tool input must be a valid dictionary in JSON or Python literal format"
|
||||
)
|
||||
mock_agent.i18n = mock_i18n
|
||||
|
||||
# Mock task and tools handler
|
||||
mock_task = MagicMock()
|
||||
mock_tools_handler = MagicMock()
|
||||
|
||||
# Mock printer
|
||||
mock_printer = MagicMock()
|
||||
|
||||
# Create test tool
|
||||
class TestTool(BaseTool):
|
||||
name: str = "Test Tool"
|
||||
description: str = "A test tool"
|
||||
|
||||
def _run(self, input: dict) -> str:
|
||||
return "test result"
|
||||
|
||||
test_tool = TestTool()
|
||||
|
||||
# Create ToolUsage instance
|
||||
tool_usage = ToolUsage(
|
||||
tools_handler=mock_tools_handler,
|
||||
tools=[test_tool],
|
||||
original_tools=[test_tool],
|
||||
tools_description="Test Tool Description",
|
||||
tools_names="Test Tool",
|
||||
task=mock_task,
|
||||
function_calling_llm=None,
|
||||
agent=mock_agent,
|
||||
action=MagicMock(tool="test_tool"),
|
||||
)
|
||||
tool_usage._printer = mock_printer
|
||||
|
||||
# Mock all parsing attempts to fail
|
||||
with (
|
||||
patch("json.loads", side_effect=json.JSONDecodeError("Test Error", "", 0)),
|
||||
patch("ast.literal_eval", side_effect=ValueError),
|
||||
patch("json5.loads", side_effect=json.JSONDecodeError("Test Error", "", 0)),
|
||||
patch("json_repair.repair_json", side_effect=Exception("Failed to repair")),
|
||||
):
|
||||
received_events = []
|
||||
|
||||
@crewai_event_bus.on(ToolValidateInputErrorEvent)
|
||||
def event_handler(source, event):
|
||||
received_events.append(event)
|
||||
|
||||
# Test invalid input
|
||||
invalid_input = "invalid json {[}"
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
tool_usage._validate_tool_input(invalid_input)
|
||||
|
||||
# Verify event was emitted
|
||||
assert len(received_events) == 1, "Expected one event to be emitted"
|
||||
event = received_events[0]
|
||||
assert isinstance(event, ToolValidateInputErrorEvent)
|
||||
assert event.agent_key == "test_key"
|
||||
assert event.agent_role == "test_role"
|
||||
assert event.tool_name == "test_tool"
|
||||
assert "must be a valid dictionary" in event.error
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
import json
|
||||
from datetime import datetime
|
||||
from unittest.mock import patch
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from pydantic import Field
|
||||
@@ -10,6 +11,7 @@ from crewai.crew import Crew
|
||||
from crewai.flow.flow import Flow, listen, start
|
||||
from crewai.task import Task
|
||||
from crewai.tools.base_tool import BaseTool
|
||||
from crewai.tools.tool_usage import ToolUsage
|
||||
from crewai.utilities.events.agent_events import (
|
||||
AgentExecutionCompletedEvent,
|
||||
AgentExecutionErrorEvent,
|
||||
@@ -34,7 +36,9 @@ from crewai.utilities.events.task_events import (
|
||||
TaskFailedEvent,
|
||||
TaskStartedEvent,
|
||||
)
|
||||
from crewai.utilities.events.tool_usage_events import ToolUsageErrorEvent
|
||||
from crewai.utilities.events.tool_usage_events import (
|
||||
ToolUsageErrorEvent,
|
||||
)
|
||||
|
||||
base_agent = Agent(
|
||||
role="base_agent",
|
||||
|
||||
Reference in New Issue
Block a user