mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-23 07:08:14 +00:00
- Added functionality to check if a tool has reached its maximum usage count before execution in both crew_agent_executor.py and agent_executor.py. - Enhanced error handling to return a message when a tool's usage limit is reached. - Updated tool usage logic in tool_usage.py to increment usage counts and print current usage status. - Introduced tests to validate max usage count behavior for native tool calling, ensuring proper enforcement and tracking. This update improves tool management by preventing overuse and providing clear feedback when limits are reached.
636 lines
20 KiB
Python
636 lines
20 KiB
Python
"""Integration tests for native tool calling functionality.
|
|
|
|
These tests verify that agents can use native function calling
|
|
when the LLM supports it, across multiple providers.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
from pydantic import BaseModel, Field
|
|
|
|
from crewai import Agent, Crew, Task
|
|
from crewai.llm import LLM
|
|
from crewai.tools.base_tool import BaseTool
|
|
|
|
|
|
class CalculatorInput(BaseModel):
|
|
"""Input schema for calculator tool."""
|
|
|
|
expression: str = Field(description="Mathematical expression to evaluate")
|
|
|
|
|
|
class CalculatorTool(BaseTool):
|
|
"""A calculator tool that performs mathematical calculations."""
|
|
|
|
name: str = "calculator"
|
|
description: str = "Perform mathematical calculations. Use this for any math operations."
|
|
args_schema: type[BaseModel] = CalculatorInput
|
|
|
|
def _run(self, expression: str) -> str:
|
|
"""Execute the calculation."""
|
|
try:
|
|
# Safe evaluation for basic math
|
|
result = eval(expression) # noqa: S307
|
|
return f"The result of {expression} is {result}"
|
|
except Exception as e:
|
|
return f"Error calculating {expression}: {e}"
|
|
|
|
|
|
class WeatherInput(BaseModel):
|
|
"""Input schema for weather tool."""
|
|
|
|
location: str = Field(description="City name to get weather for")
|
|
|
|
|
|
class WeatherTool(BaseTool):
|
|
"""A mock weather tool for testing."""
|
|
|
|
name: str = "get_weather"
|
|
description: str = "Get the current weather for a location"
|
|
args_schema: type[BaseModel] = WeatherInput
|
|
|
|
def _run(self, location: str) -> str:
|
|
"""Get weather (mock implementation)."""
|
|
return f"The weather in {location} is sunny with a temperature of 72°F"
|
|
|
|
class FailingTool(BaseTool):
|
|
"""A tool that always fails."""
|
|
name: str = "failing_tool"
|
|
description: str = "This tool always fails"
|
|
def _run(self) -> str:
|
|
raise Exception("This tool always fails")
|
|
|
|
@pytest.fixture
|
|
def calculator_tool() -> CalculatorTool:
|
|
"""Create a calculator tool for testing."""
|
|
return CalculatorTool()
|
|
|
|
|
|
@pytest.fixture
|
|
def weather_tool() -> WeatherTool:
|
|
"""Create a weather tool for testing."""
|
|
return WeatherTool()
|
|
|
|
@pytest.fixture
|
|
def failing_tool() -> BaseTool:
|
|
"""Create a weather tool for testing."""
|
|
return FailingTool(
|
|
|
|
)
|
|
|
|
# =============================================================================
|
|
# OpenAI Provider Tests
|
|
# =============================================================================
|
|
|
|
|
|
class TestOpenAINativeToolCalling:
|
|
"""Tests for native tool calling with OpenAI models."""
|
|
|
|
@pytest.mark.vcr()
|
|
def test_openai_agent_with_native_tool_calling(
|
|
self, calculator_tool: CalculatorTool
|
|
) -> None:
|
|
"""Test OpenAI agent can use native tool calling."""
|
|
agent = Agent(
|
|
role="Math Assistant",
|
|
goal="Help users with mathematical calculations",
|
|
backstory="You are a helpful math assistant.",
|
|
tools=[calculator_tool],
|
|
llm=LLM(model="gpt-4o-mini"),
|
|
verbose=False,
|
|
max_iter=3,
|
|
)
|
|
|
|
task = Task(
|
|
description="Calculate what is 15 * 8",
|
|
expected_output="The result of the calculation",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task])
|
|
result = crew.kickoff()
|
|
|
|
assert result is not None
|
|
assert result.raw is not None
|
|
assert "120" in str(result.raw)
|
|
|
|
def test_openai_agent_kickoff_with_tools_mocked(
|
|
self, calculator_tool: CalculatorTool
|
|
) -> None:
|
|
"""Test OpenAI agent kickoff with mocked LLM call."""
|
|
llm = LLM(model="gpt-4o-mini")
|
|
|
|
with patch.object(llm, "call", return_value="The answer is 120.") as mock_call:
|
|
agent = Agent(
|
|
role="Math Assistant",
|
|
goal="Calculate math",
|
|
backstory="You calculate.",
|
|
tools=[calculator_tool],
|
|
llm=llm,
|
|
verbose=False,
|
|
)
|
|
|
|
task = Task(
|
|
description="Calculate 15 * 8",
|
|
expected_output="Result",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task])
|
|
result = crew.kickoff()
|
|
|
|
assert mock_call.called
|
|
assert result is not None
|
|
|
|
|
|
# =============================================================================
|
|
# Anthropic Provider Tests
|
|
# =============================================================================
|
|
class TestAnthropicNativeToolCalling:
|
|
"""Tests for native tool calling with Anthropic models."""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def mock_anthropic_api_key(self):
|
|
"""Mock ANTHROPIC_API_KEY for tests."""
|
|
if "ANTHROPIC_API_KEY" not in os.environ:
|
|
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
yield
|
|
else:
|
|
yield
|
|
|
|
@pytest.mark.vcr()
|
|
def test_anthropic_agent_with_native_tool_calling(
|
|
self, calculator_tool: CalculatorTool
|
|
) -> None:
|
|
"""Test Anthropic agent can use native tool calling."""
|
|
agent = Agent(
|
|
role="Math Assistant",
|
|
goal="Help users with mathematical calculations",
|
|
backstory="You are a helpful math assistant.",
|
|
tools=[calculator_tool],
|
|
llm=LLM(model="anthropic/claude-3-5-haiku-20241022"),
|
|
verbose=False,
|
|
max_iter=3,
|
|
)
|
|
|
|
task = Task(
|
|
description="Calculate what is 15 * 8",
|
|
expected_output="The result of the calculation",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task])
|
|
result = crew.kickoff()
|
|
|
|
assert result is not None
|
|
assert result.raw is not None
|
|
|
|
def test_anthropic_agent_kickoff_with_tools_mocked(
|
|
self, calculator_tool: CalculatorTool
|
|
) -> None:
|
|
"""Test Anthropic agent kickoff with mocked LLM call."""
|
|
llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
|
|
|
|
with patch.object(llm, "call", return_value="The answer is 120.") as mock_call:
|
|
agent = Agent(
|
|
role="Math Assistant",
|
|
goal="Calculate math",
|
|
backstory="You calculate.",
|
|
tools=[calculator_tool],
|
|
llm=llm,
|
|
verbose=False,
|
|
)
|
|
|
|
task = Task(
|
|
description="Calculate 15 * 8",
|
|
expected_output="Result",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task])
|
|
result = crew.kickoff()
|
|
|
|
assert mock_call.called
|
|
assert result is not None
|
|
|
|
|
|
# =============================================================================
|
|
# Google/Gemini Provider Tests
|
|
# =============================================================================
|
|
|
|
|
|
class TestGeminiNativeToolCalling:
|
|
"""Tests for native tool calling with Gemini models."""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def mock_google_api_key(self):
|
|
"""Mock GOOGLE_API_KEY for tests."""
|
|
if "GOOGLE_API_KEY" not in os.environ and "GEMINI_API_KEY" not in os.environ:
|
|
with patch.dict(os.environ, {"GOOGLE_API_KEY": "test-key"}):
|
|
yield
|
|
else:
|
|
yield
|
|
|
|
|
|
@pytest.mark.vcr()
|
|
def test_gemini_agent_with_native_tool_calling(
|
|
self, calculator_tool: CalculatorTool
|
|
) -> None:
|
|
"""Test Gemini agent can use native tool calling."""
|
|
|
|
agent = Agent(
|
|
role="Math Assistant",
|
|
goal="Help users with mathematical calculations",
|
|
backstory="You are a helpful math assistant.",
|
|
tools=[calculator_tool],
|
|
llm=LLM(model="gemini/gemini-2.0-flash-exp"),
|
|
)
|
|
|
|
task = Task(
|
|
description="Calculate what is 15 * 8",
|
|
expected_output="The result of the calculation",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task])
|
|
result = crew.kickoff()
|
|
|
|
assert result is not None
|
|
assert result.raw is not None
|
|
|
|
def test_gemini_agent_kickoff_with_tools_mocked(
|
|
self, calculator_tool: CalculatorTool
|
|
) -> None:
|
|
"""Test Gemini agent kickoff with mocked LLM call."""
|
|
llm = LLM(model="gemini/gemini-2.0-flash-001")
|
|
|
|
with patch.object(llm, "call", return_value="The answer is 120.") as mock_call:
|
|
agent = Agent(
|
|
role="Math Assistant",
|
|
goal="Calculate math",
|
|
backstory="You calculate.",
|
|
tools=[calculator_tool],
|
|
llm=llm,
|
|
verbose=False,
|
|
)
|
|
|
|
task = Task(
|
|
description="Calculate 15 * 8",
|
|
expected_output="Result",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task])
|
|
result = crew.kickoff()
|
|
|
|
assert mock_call.called
|
|
assert result is not None
|
|
|
|
|
|
# =============================================================================
|
|
# Azure Provider Tests
|
|
# =============================================================================
|
|
|
|
|
|
class TestAzureNativeToolCalling:
|
|
"""Tests for native tool calling with Azure OpenAI models."""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def mock_azure_env(self):
|
|
"""Mock Azure environment variables for tests."""
|
|
env_vars = {
|
|
"AZURE_API_KEY": "test-key",
|
|
"AZURE_API_BASE": "https://test.openai.azure.com",
|
|
"AZURE_API_VERSION": "2024-02-15-preview",
|
|
}
|
|
# Only patch if keys are not already in environment
|
|
if "AZURE_API_KEY" not in os.environ:
|
|
with patch.dict(os.environ, env_vars):
|
|
yield
|
|
else:
|
|
yield
|
|
|
|
@pytest.mark.vcr()
|
|
def test_azure_agent_with_native_tool_calling(
|
|
self, calculator_tool: CalculatorTool
|
|
) -> None:
|
|
"""Test Azure agent can use native tool calling."""
|
|
agent = Agent(
|
|
role="Math Assistant",
|
|
goal="Help users with mathematical calculations",
|
|
backstory="You are a helpful math assistant.",
|
|
tools=[calculator_tool],
|
|
llm=LLM(model="azure/gpt-4o-mini"),
|
|
verbose=False,
|
|
max_iter=3,
|
|
)
|
|
|
|
task = Task(
|
|
description="Calculate what is 15 * 8",
|
|
expected_output="The result of the calculation",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task])
|
|
result = crew.kickoff()
|
|
|
|
assert result is not None
|
|
assert result.raw is not None
|
|
assert "120" in str(result.raw)
|
|
|
|
def test_azure_agent_kickoff_with_tools_mocked(
|
|
self, calculator_tool: CalculatorTool
|
|
) -> None:
|
|
"""Test Azure agent kickoff with mocked LLM call."""
|
|
llm = LLM(
|
|
model="azure/gpt-4o-mini",
|
|
api_key="test-key",
|
|
base_url="https://test.openai.azure.com",
|
|
)
|
|
|
|
with patch.object(llm, "call", return_value="The answer is 120.") as mock_call:
|
|
agent = Agent(
|
|
role="Math Assistant",
|
|
goal="Calculate math",
|
|
backstory="You calculate.",
|
|
tools=[calculator_tool],
|
|
llm=llm,
|
|
verbose=False,
|
|
)
|
|
|
|
task = Task(
|
|
description="Calculate 15 * 8",
|
|
expected_output="Result",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task])
|
|
result = crew.kickoff()
|
|
|
|
assert mock_call.called
|
|
assert result is not None
|
|
|
|
|
|
# =============================================================================
|
|
# Bedrock Provider Tests
|
|
# =============================================================================
|
|
|
|
|
|
class TestBedrockNativeToolCalling:
|
|
"""Tests for native tool calling with AWS Bedrock models."""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def mock_aws_env(self):
|
|
"""Mock AWS environment variables for tests."""
|
|
env_vars = {
|
|
"AWS_ACCESS_KEY_ID": "test-key",
|
|
"AWS_SECRET_ACCESS_KEY": "test-secret",
|
|
"AWS_REGION": "us-east-1",
|
|
}
|
|
if "AWS_ACCESS_KEY_ID" not in os.environ:
|
|
with patch.dict(os.environ, env_vars):
|
|
yield
|
|
else:
|
|
yield
|
|
|
|
@pytest.mark.vcr()
|
|
def test_bedrock_agent_kickoff_with_tools_mocked(
|
|
self, calculator_tool: CalculatorTool
|
|
) -> None:
|
|
"""Test Bedrock agent kickoff with mocked LLM call."""
|
|
llm = LLM(model="bedrock/anthropic.claude-3-haiku-20240307-v1:0")
|
|
|
|
agent = Agent(
|
|
role="Math Assistant",
|
|
goal="Calculate math",
|
|
backstory="You calculate.",
|
|
tools=[calculator_tool],
|
|
llm=llm,
|
|
verbose=False,
|
|
max_iter=5,
|
|
)
|
|
|
|
task = Task(
|
|
description="Calculate 15 * 8",
|
|
expected_output="Result",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task])
|
|
result = crew.kickoff()
|
|
|
|
assert result is not None
|
|
assert result.raw is not None
|
|
assert "120" in str(result.raw)
|
|
|
|
|
|
# =============================================================================
|
|
# Cross-Provider Native Tool Calling Behavior Tests
|
|
# =============================================================================
|
|
|
|
|
|
class TestNativeToolCallingBehavior:
|
|
"""Tests for native tool calling behavior across providers."""
|
|
|
|
def test_supports_function_calling_check(self) -> None:
|
|
"""Test that supports_function_calling() is properly checked."""
|
|
# OpenAI should support function calling
|
|
openai_llm = LLM(model="gpt-4o-mini")
|
|
assert hasattr(openai_llm, "supports_function_calling")
|
|
assert openai_llm.supports_function_calling() is True
|
|
|
|
def test_anthropic_supports_function_calling(self) -> None:
|
|
"""Test that Anthropic models support function calling."""
|
|
with patch.dict(os.environ, {"ANTHROPIC_API_KEY": "test-key"}):
|
|
llm = LLM(model="anthropic/claude-3-5-haiku-20241022")
|
|
assert hasattr(llm, "supports_function_calling")
|
|
assert llm.supports_function_calling() is True
|
|
|
|
def test_gemini_supports_function_calling(self) -> None:
|
|
"""Test that Gemini models support function calling."""
|
|
llm = LLM(model="gemini/gemini-2.5-flash")
|
|
assert hasattr(llm, "supports_function_calling")
|
|
assert llm.supports_function_calling() is True
|
|
|
|
|
|
# =============================================================================
|
|
# Token Usage Tests
|
|
# =============================================================================
|
|
|
|
|
|
class TestNativeToolCallingTokenUsage:
|
|
"""Tests for token usage with native tool calling."""
|
|
|
|
@pytest.mark.vcr()
|
|
def test_openai_native_tool_calling_token_usage(
|
|
self, calculator_tool: CalculatorTool
|
|
) -> None:
|
|
"""Test token usage tracking with OpenAI native tool calling."""
|
|
agent = Agent(
|
|
role="Calculator",
|
|
goal="Perform calculations efficiently",
|
|
backstory="You calculate things.",
|
|
tools=[calculator_tool],
|
|
llm=LLM(model="gpt-4o-mini"),
|
|
verbose=False,
|
|
max_iter=3,
|
|
)
|
|
|
|
task = Task(
|
|
description="What is 100 / 4?",
|
|
expected_output="The result",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task])
|
|
result = crew.kickoff()
|
|
|
|
assert result is not None
|
|
assert result.token_usage is not None
|
|
assert result.token_usage.total_tokens > 0
|
|
assert result.token_usage.successful_requests >= 1
|
|
|
|
print(f"\n[OPENAI NATIVE TOOL CALLING TOKEN USAGE]")
|
|
print(f" Prompt tokens: {result.token_usage.prompt_tokens}")
|
|
print(f" Completion tokens: {result.token_usage.completion_tokens}")
|
|
print(f" Total tokens: {result.token_usage.total_tokens}")
|
|
|
|
@pytest.mark.vcr()
|
|
def test_native_tool_calling_error_handling(failing_tool: FailingTool):
|
|
"""Test that native tool calling handles errors properly and emits error events."""
|
|
import threading
|
|
from crewai.events import crewai_event_bus
|
|
from crewai.events.types.tool_usage_events import ToolUsageErrorEvent
|
|
|
|
received_events = []
|
|
event_received = threading.Event()
|
|
|
|
@crewai_event_bus.on(ToolUsageErrorEvent)
|
|
def handle_tool_error(source, event):
|
|
received_events.append(event)
|
|
event_received.set()
|
|
|
|
agent = Agent(
|
|
role="Calculator",
|
|
goal="Perform calculations efficiently",
|
|
backstory="You calculate things.",
|
|
tools=[failing_tool],
|
|
llm=LLM(model="gpt-4o-mini"),
|
|
verbose=False,
|
|
max_iter=3,
|
|
)
|
|
|
|
result = agent.kickoff("Use the failing_tool to do something.")
|
|
assert result is not None
|
|
|
|
# Verify error event was emitted
|
|
assert event_received.wait(timeout=10), "ToolUsageErrorEvent was not emitted"
|
|
assert len(received_events) >= 1
|
|
|
|
# Verify event attributes
|
|
error_event = received_events[0]
|
|
assert error_event.tool_name == "failing_tool"
|
|
assert error_event.agent_role == agent.role
|
|
assert "This tool always fails" in str(error_event.error)
|
|
|
|
|
|
# =============================================================================
|
|
# Max Usage Count Tests for Native Tool Calling
|
|
# =============================================================================
|
|
|
|
|
|
class CountingInput(BaseModel):
|
|
"""Input schema for counting tool."""
|
|
|
|
value: str = Field(description="Value to count")
|
|
|
|
|
|
class CountingTool(BaseTool):
|
|
"""A tool that counts its usage."""
|
|
|
|
name: str = "counting_tool"
|
|
description: str = "A tool that counts how many times it's been called"
|
|
args_schema: type[BaseModel] = CountingInput
|
|
|
|
def _run(self, value: str) -> str:
|
|
"""Return the value with a count prefix."""
|
|
return f"Counted: {value}"
|
|
|
|
|
|
class TestMaxUsageCountWithNativeToolCalling:
|
|
"""Tests for max_usage_count with native tool calling."""
|
|
|
|
@pytest.mark.vcr()
|
|
def test_max_usage_count_tracked_in_native_tool_calling(self) -> None:
|
|
"""Test that max_usage_count is properly tracked when using native tool calling."""
|
|
tool = CountingTool(max_usage_count=3)
|
|
|
|
# Verify initial state
|
|
assert tool.max_usage_count == 3
|
|
assert tool.current_usage_count == 0
|
|
|
|
agent = Agent(
|
|
role="Counting Agent",
|
|
goal="Call the counting tool multiple times",
|
|
backstory="You are an agent that counts things.",
|
|
tools=[tool],
|
|
llm=LLM(model="gpt-4o-mini"),
|
|
verbose=False,
|
|
max_iter=5,
|
|
)
|
|
|
|
task = Task(
|
|
description="Call the counting_tool 3 times with values 'first', 'second', and 'third'",
|
|
expected_output="The results of the counting operations",
|
|
agent=agent,
|
|
)
|
|
|
|
crew = Crew(agents=[agent], tasks=[task])
|
|
crew.kickoff()
|
|
|
|
# Verify usage count was tracked
|
|
assert tool.max_usage_count == 3
|
|
assert tool.current_usage_count <= tool.max_usage_count
|
|
|
|
def test_max_usage_count_limit_enforced_in_native_tool_calling(self) -> None:
|
|
"""Test that when max_usage_count is reached, tool returns error message."""
|
|
tool = CountingTool(max_usage_count=2)
|
|
|
|
# Manually simulate tool being at max usage
|
|
tool.current_usage_count = 2
|
|
|
|
agent = Agent(
|
|
role="Counting Agent",
|
|
goal="Try to use the counting tool",
|
|
backstory="You are an agent that counts things.",
|
|
tools=[tool],
|
|
llm=LLM(model="gpt-4o-mini"),
|
|
verbose=False,
|
|
max_iter=3,
|
|
)
|
|
|
|
# Verify the tool is at max usage
|
|
assert tool.current_usage_count >= tool.max_usage_count
|
|
|
|
# The tool should report it has reached its limit when the agent tries to use it
|
|
# This is handled in _handle_native_tool_calls / execute_native_tool
|
|
|
|
def test_tool_usage_increments_after_successful_execution(self) -> None:
|
|
"""Test that usage count increments after each successful native tool call."""
|
|
tool = CountingTool(max_usage_count=10)
|
|
|
|
assert tool.current_usage_count == 0
|
|
|
|
# Simulate direct tool execution (which happens during native tool calling)
|
|
result = tool.run(value="test")
|
|
assert "Counted: test" in result
|
|
assert tool.current_usage_count == 1
|
|
|
|
result = tool.run(value="test2")
|
|
assert "Counted: test2" in result
|
|
assert tool.current_usage_count == 2
|