Lorenze/fix tool call twice (#3495)

* test: add test to ensure tool is called only once during crew execution

- Introduced a new test case to validate that the counting_tool is executed exactly once during crew execution.
- Created a CountingTool class to track execution counts and log call history.
- Enhanced the test suite with a YAML cassette for consistent tool behavior verification.

* ensure tool function called only once

* refactor: simplify error handling in CrewStructuredTool

- Removed unnecessary try-except block around the tool function call to streamline execution flow.
- Ensured that the tool function is called directly, improving readability and maintainability.

* linted

* need to ignore for now as we cant infer the complex generic type within pydantic create_model_func

* fix tests
This commit is contained in:
Lorenze Jay
2025-09-10 15:20:21 -07:00
committed by GitHub
parent 01be26ce2a
commit 75b916c85a
5 changed files with 583 additions and 233 deletions

View File

@@ -1,5 +1,3 @@
from typing import Optional
import pytest
from pydantic import BaseModel, Field
@@ -39,6 +37,7 @@ def test_initialization(basic_function, schema_class):
assert tool.func == basic_function
assert tool.args_schema == schema_class
def test_from_function(basic_function):
"""Test creating tool from function"""
tool = CrewStructuredTool.from_function(
@@ -50,6 +49,7 @@ def test_from_function(basic_function):
assert tool.func == basic_function
assert isinstance(tool.args_schema, type(BaseModel))
def test_validate_function_signature(basic_function, schema_class):
"""Test function signature validation"""
tool = CrewStructuredTool(
@@ -62,6 +62,7 @@ def test_validate_function_signature(basic_function, schema_class):
# Should not raise any exceptions
tool._validate_function_signature()
@pytest.mark.asyncio
async def test_ainvoke(basic_function):
"""Test asynchronous invocation"""
@@ -70,6 +71,7 @@ async def test_ainvoke(basic_function):
result = await tool.ainvoke(input={"param1": "test"})
assert result == "test 0"
def test_parse_args_dict(basic_function):
"""Test parsing dictionary arguments"""
tool = CrewStructuredTool.from_function(func=basic_function, name="test_tool")
@@ -78,6 +80,7 @@ def test_parse_args_dict(basic_function):
assert parsed["param1"] == "test"
assert parsed["param2"] == 42
def test_parse_args_string(basic_function):
"""Test parsing string arguments"""
tool = CrewStructuredTool.from_function(func=basic_function, name="test_tool")
@@ -86,6 +89,7 @@ def test_parse_args_string(basic_function):
assert parsed["param1"] == "test"
assert parsed["param2"] == 42
def test_complex_types():
"""Test handling of complex parameter types"""
@@ -99,6 +103,7 @@ def test_complex_types():
result = tool.invoke({"nested": {"key": "value"}, "items": [1, 2, 3]})
assert result == "Processed 3 items with 1 nested keys"
def test_schema_inheritance():
"""Test tool creation with inherited schema"""
@@ -119,13 +124,14 @@ def test_schema_inheritance():
result = tool.invoke({"base_param": "test", "extra_param": 42})
assert result == "test 42"
def test_default_values_in_schema():
"""Test handling of default values in schema"""
def default_func(
required_param: str,
optional_param: str = "default",
nullable_param: Optional[int] = None,
nullable_param: int | None = None,
) -> str:
"""Test function with default values."""
return f"{required_param} {optional_param} {nullable_param}"
@@ -144,6 +150,7 @@ def test_default_values_in_schema():
)
assert result == "test custom 42"
@pytest.fixture
def custom_tool_decorator():
from crewai.tools import tool
@@ -155,6 +162,7 @@ def custom_tool_decorator():
return custom_tool
@pytest.fixture
def custom_tool():
from crewai.tools import BaseTool
@@ -169,17 +177,25 @@ def custom_tool():
return CustomTool()
def build_simple_crew(tool):
from crewai import Agent, Task, Crew
from crewai import Agent, Crew, Task
agent1 = Agent(role="Simple role", goal="Simple goal", backstory="Simple backstory", tools=[tool])
say_hi_task = Task(
description="Use the custom tool result as answer.", agent=agent1, expected_output="Use the tool result"
agent1 = Agent(
role="Simple role",
goal="Simple goal",
backstory="Simple backstory",
tools=[tool],
)
crew = Crew(agents=[agent1], tasks=[say_hi_task])
return crew
say_hi_task = Task(
description="Use the custom tool result as answer.",
agent=agent1,
expected_output="Use the tool result",
)
return Crew(agents=[agent1], tasks=[say_hi_task])
@pytest.mark.vcr(filter_headers=["authorization"])
def test_async_tool_using_within_isolated_crew(custom_tool):
@@ -188,6 +204,7 @@ def test_async_tool_using_within_isolated_crew(custom_tool):
assert result.raw == "Hello World from Custom Tool"
@pytest.mark.vcr(filter_headers=["authorization"])
def test_async_tool_using_decorator_within_isolated_crew(custom_tool_decorator):
crew = build_simple_crew(custom_tool_decorator)
@@ -195,6 +212,7 @@ def test_async_tool_using_decorator_within_isolated_crew(custom_tool_decorator):
assert result.raw == "Hello World from Custom Tool"
@pytest.mark.vcr(filter_headers=["authorization"])
def test_async_tool_within_flow(custom_tool):
from crewai.flow.flow import Flow
@@ -205,8 +223,7 @@ def test_async_tool_within_flow(custom_tool):
@start()
async def start(self):
crew = build_simple_crew(custom_tool)
result = await crew.kickoff_async()
return result
return await crew.kickoff_async()
flow = StructuredExampleFlow()
result = flow.kickoff()
@@ -219,12 +236,141 @@ def test_async_tool_using_decorator_within_flow(custom_tool_decorator):
class StructuredExampleFlow(Flow):
from crewai.flow.flow import start
@start()
async def start(self):
crew = build_simple_crew(custom_tool_decorator)
result = await crew.kickoff_async()
return result
return await crew.kickoff_async()
flow = StructuredExampleFlow()
result = flow.kickoff()
assert result.raw == "Hello World from Custom Tool"
assert result.raw == "Hello World from Custom Tool"
def test_structured_tool_invoke_calls_func_only_once():
"""Test that CrewStructuredTool.invoke() calls the underlying function exactly once."""
call_count = 0
call_history = []
def counting_function(param: str) -> str:
"""Function that tracks how many times it's called."""
nonlocal call_count
call_count += 1
call_history.append(f"Call #{call_count} with param: {param}")
return f"Result from call #{call_count}: {param}"
# Create CrewStructuredTool directly
tool = CrewStructuredTool.from_function(
func=counting_function,
name="direct_test_tool",
description="Tool to test direct invoke() method",
)
# Call invoke() directly - this is where the bug was
result = tool.invoke({"param": "test_value"})
# Critical assertions that would catch the duplicate execution bug
assert call_count == 1, (
f"DUPLICATE EXECUTION BUG: Function was called {call_count} times instead of 1. "
f"This means CrewStructuredTool.invoke() has duplicate function calls. "
f"Call history: {call_history}"
)
assert len(call_history) == 1, (
f"Expected 1 call in history, got {len(call_history)}: {call_history}"
)
assert call_history[0] == "Call #1 with param: test_value", (
f"Expected 'Call #1 with param: test_value', got: {call_history[0]}"
)
assert result == "Result from call #1: test_value", (
f"Expected result from first call, got: {result}"
)
def test_structured_tool_invoke_multiple_calls_increment_correctly():
"""Test multiple calls to invoke() to ensure each increments correctly."""
call_count = 0
def incrementing_function(value: int) -> int:
nonlocal call_count
call_count += 1
return value + call_count
tool = CrewStructuredTool.from_function(
func=incrementing_function,
name="incrementing_tool",
description="Tool that increments on each call",
)
result1 = tool.invoke({"value": 10})
assert call_count == 1, (
f"After first invoke, expected call_count=1, got {call_count}"
)
assert result1 == 11, f"Expected 11 (10+1), got {result1}"
result2 = tool.invoke({"value": 20})
assert call_count == 2, (
f"After second invoke, expected call_count=2, got {call_count}"
)
assert result2 == 22, f"Expected 22 (20+2), got {result2}"
result3 = tool.invoke({"value": 30})
assert call_count == 3, (
f"After third invoke, expected call_count=3, got {call_count}"
)
assert result3 == 33, f"Expected 33 (30+3), got {result3}"
def test_structured_tool_invoke_with_side_effects():
"""Test that side effects only happen once per invoke() call."""
side_effects = []
def side_effect_function(action: str) -> str:
side_effects.append(f"SIDE_EFFECT: {action} executed at call")
return f"Action {action} completed"
tool = CrewStructuredTool.from_function(
func=side_effect_function,
name="side_effect_tool",
description="Tool with observable side effects",
)
result = tool.invoke({"action": "write_file"})
assert len(side_effects) == 1, (
f"SIDE EFFECT BUG: Expected 1 side effect, got {len(side_effects)}. "
f"This indicates the function was called multiple times. "
f"Side effects: {side_effects}"
)
assert side_effects[0] == "SIDE_EFFECT: write_file executed at call"
assert result == "Action write_file completed"
def test_structured_tool_invoke_exception_handling():
"""Test that exceptions don't cause duplicate execution."""
call_count = 0
def failing_function(should_fail: bool) -> str:
nonlocal call_count
call_count += 1
if should_fail:
raise ValueError(f"Intentional failure on call #{call_count}")
return f"Success on call #{call_count}"
tool = CrewStructuredTool.from_function(
func=failing_function, name="failing_tool", description="Tool that can fail"
)
result = tool.invoke({"should_fail": False})
assert call_count == 1, f"Expected 1 call for success case, got {call_count}"
assert result == "Success on call #1"
call_count = 0
with pytest.raises(ValueError, match="Intentional failure on call #1"):
tool.invoke({"should_fail": True})
assert call_count == 1