mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-04-11 13:32:34 +00:00
Compare commits
2 Commits
fix/trace-
...
devin/1774
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f5dc745669 | ||
|
|
99066ca278 |
@@ -1072,6 +1072,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
"result": result,
|
||||
"from_cache": from_cache,
|
||||
"original_tool": original_tool,
|
||||
"error_occurred": error_event_emitted,
|
||||
}
|
||||
|
||||
def _append_tool_result_and_check_finality(
|
||||
@@ -1082,6 +1083,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
result = cast(str, execution_result["result"])
|
||||
from_cache = cast(bool, execution_result["from_cache"])
|
||||
original_tool = execution_result["original_tool"]
|
||||
error_occurred = execution_result.get("error_occurred", False)
|
||||
|
||||
tool_message: LLMMessage = {
|
||||
"role": "tool",
|
||||
@@ -1098,10 +1100,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
|
||||
color="green",
|
||||
)
|
||||
|
||||
# Don't honor result_as_answer when the tool execution errored;
|
||||
# let the agent reflect on the error instead.
|
||||
if (
|
||||
original_tool
|
||||
and hasattr(original_tool, "result_as_answer")
|
||||
and original_tool.result_as_answer
|
||||
and not error_occurred
|
||||
):
|
||||
return AgentFinish(
|
||||
thought="Tool result is the final answer",
|
||||
|
||||
@@ -1635,6 +1635,7 @@ class AgentExecutor(Flow[AgentExecutorState], CrewAgentExecutorMixin):
|
||||
"result": f"Error executing tool: {e}",
|
||||
"from_cache": False,
|
||||
"original_tool": None,
|
||||
"error_occurred": True,
|
||||
}
|
||||
execution_results = [
|
||||
result for result in ordered_results if result is not None
|
||||
@@ -1666,10 +1667,14 @@ class AgentExecutor(Flow[AgentExecutorState], CrewAgentExecutorMixin):
|
||||
color="green",
|
||||
)
|
||||
|
||||
# Don't honor result_as_answer when the tool execution errored;
|
||||
# let the agent reflect on the error instead.
|
||||
error_occurred = execution_result.get("error_occurred", False)
|
||||
if (
|
||||
original_tool
|
||||
and hasattr(original_tool, "result_as_answer")
|
||||
and original_tool.result_as_answer
|
||||
and not error_occurred
|
||||
):
|
||||
self.state.current_answer = AgentFinish(
|
||||
thought="Tool result is the final answer",
|
||||
@@ -1704,10 +1709,14 @@ class AgentExecutor(Flow[AgentExecutorState], CrewAgentExecutorMixin):
|
||||
color="green",
|
||||
)
|
||||
|
||||
# Don't honor result_as_answer when the tool execution errored;
|
||||
# let the agent reflect on the error instead.
|
||||
error_occurred = execution_result.get("error_occurred", False)
|
||||
if (
|
||||
original_tool
|
||||
and hasattr(original_tool, "result_as_answer")
|
||||
and original_tool.result_as_answer
|
||||
and not error_occurred
|
||||
):
|
||||
# Set the result as the final answer
|
||||
self.state.current_answer = AgentFinish(
|
||||
@@ -1964,6 +1973,7 @@ class AgentExecutor(Flow[AgentExecutorState], CrewAgentExecutorMixin):
|
||||
"result": result,
|
||||
"from_cache": from_cache,
|
||||
"original_tool": original_tool,
|
||||
"error_occurred": error_event_emitted,
|
||||
}
|
||||
|
||||
def _extract_tool_name(self, tool_call: Any) -> str:
|
||||
|
||||
@@ -108,6 +108,7 @@ class ToolUsage:
|
||||
self.action = action
|
||||
self.function_calling_llm = function_calling_llm
|
||||
self.fingerprint_context = fingerprint_context or {}
|
||||
self._last_execution_errored: bool = False
|
||||
|
||||
# Set the maximum parsing attempts for bigger models
|
||||
if (
|
||||
@@ -126,8 +127,10 @@ class ToolUsage:
|
||||
def use(
|
||||
self, calling: ToolCalling | InstructorToolCalling, tool_string: str
|
||||
) -> str:
|
||||
self._last_execution_errored = False
|
||||
if isinstance(calling, ToolUsageError):
|
||||
error = calling.message
|
||||
self._last_execution_errored = True
|
||||
if self.agent and self.agent.verbose:
|
||||
self._printer.print(content=f"\n\n{error}\n", color="red")
|
||||
if self.task:
|
||||
@@ -138,6 +141,7 @@ class ToolUsage:
|
||||
tool = self._select_tool(calling.tool_name)
|
||||
except Exception as e:
|
||||
error = getattr(e, "message", str(e))
|
||||
self._last_execution_errored = True
|
||||
if self.task:
|
||||
self.task.increment_tools_errors()
|
||||
if self.agent and self.agent.verbose:
|
||||
@@ -154,6 +158,7 @@ class ToolUsage:
|
||||
|
||||
except Exception as e:
|
||||
error = getattr(e, "message", str(e))
|
||||
self._last_execution_errored = True
|
||||
if self.task:
|
||||
self.task.increment_tools_errors()
|
||||
if self.agent and self.agent.verbose:
|
||||
@@ -174,8 +179,10 @@ class ToolUsage:
|
||||
Returns:
|
||||
The result of the tool execution as a string.
|
||||
"""
|
||||
self._last_execution_errored = False
|
||||
if isinstance(calling, ToolUsageError):
|
||||
error = calling.message
|
||||
self._last_execution_errored = True
|
||||
if self.agent and self.agent.verbose:
|
||||
self._printer.print(content=f"\n\n{error}\n", color="red")
|
||||
if self.task:
|
||||
@@ -186,6 +193,7 @@ class ToolUsage:
|
||||
tool = self._select_tool(calling.tool_name)
|
||||
except Exception as e:
|
||||
error = getattr(e, "message", str(e))
|
||||
self._last_execution_errored = True
|
||||
if self.task:
|
||||
self.task.increment_tools_errors()
|
||||
if self.agent and self.agent.verbose:
|
||||
@@ -203,6 +211,7 @@ class ToolUsage:
|
||||
)
|
||||
except Exception as e:
|
||||
error = getattr(e, "message", str(e))
|
||||
self._last_execution_errored = True
|
||||
if self.task:
|
||||
self.task.increment_tools_errors()
|
||||
if self.agent and self.agent.verbose:
|
||||
@@ -410,6 +419,7 @@ class ToolUsage:
|
||||
except Exception as e:
|
||||
self.on_tool_error(tool=tool, tool_calling=calling, e=e)
|
||||
error_event_emitted = True
|
||||
self._last_execution_errored = True
|
||||
self._run_attempts += 1
|
||||
if self._run_attempts > self._max_parsing_attempts:
|
||||
self._telemetry.tool_usage_error(llm=self.function_calling_llm)
|
||||
@@ -642,6 +652,7 @@ class ToolUsage:
|
||||
except Exception as e:
|
||||
self.on_tool_error(tool=tool, tool_calling=calling, e=e)
|
||||
error_event_emitted = True
|
||||
self._last_execution_errored = True
|
||||
self._run_attempts += 1
|
||||
if self._run_attempts > self._max_parsing_attempts:
|
||||
self._telemetry.tool_usage_error(llm=self.function_calling_llm)
|
||||
|
||||
@@ -1575,11 +1575,13 @@ def execute_single_native_tool_call(
|
||||
color="green",
|
||||
)
|
||||
|
||||
# Check result_as_answer
|
||||
# Check result_as_answer — but don't honor it when the tool errored;
|
||||
# let the agent reflect on the error instead.
|
||||
is_result_as_answer = bool(
|
||||
original_tool
|
||||
and hasattr(original_tool, "result_as_answer")
|
||||
and original_tool.result_as_answer
|
||||
and not error_event_emitted
|
||||
)
|
||||
|
||||
return NativeToolCallResult(
|
||||
|
||||
@@ -140,7 +140,12 @@ async def aexecute_tool_and_check_finality(
|
||||
except Exception as e:
|
||||
logger.log("error", f"Error in after_tool_call hook: {e}")
|
||||
|
||||
return ToolResult(modified_result, tool.result_as_answer)
|
||||
# Don't honor result_as_answer when the tool execution errored;
|
||||
# let the agent reflect on the error instead.
|
||||
effective_result_as_answer = (
|
||||
tool.result_as_answer and not tool_usage._last_execution_errored
|
||||
)
|
||||
return ToolResult(modified_result, effective_result_as_answer)
|
||||
|
||||
tool_result = i18n.errors("wrong_tool_name").format(
|
||||
tool=sanitized_tool_name,
|
||||
@@ -261,7 +266,12 @@ def execute_tool_and_check_finality(
|
||||
except Exception as e:
|
||||
logger.log("error", f"Error in after_tool_call hook: {e}")
|
||||
|
||||
return ToolResult(modified_result, tool.result_as_answer)
|
||||
# Don't honor result_as_answer when the tool execution errored;
|
||||
# let the agent reflect on the error instead.
|
||||
effective_result_as_answer = (
|
||||
tool.result_as_answer and not tool_usage._last_execution_errored
|
||||
)
|
||||
return ToolResult(modified_result, effective_result_as_answer)
|
||||
|
||||
tool_result = i18n.errors("wrong_tool_name").format(
|
||||
tool=sanitized_tool_name,
|
||||
|
||||
@@ -879,6 +879,80 @@ class TestNativeToolExecution:
|
||||
assert len(tool_messages) == 1
|
||||
assert tool_messages[0]["tool_call_id"] == "call_1"
|
||||
|
||||
def test_execute_native_tool_result_as_answer_not_honored_on_error(
|
||||
self, mock_dependencies
|
||||
):
|
||||
"""Test that result_as_answer is NOT honored when the tool errors.
|
||||
|
||||
Regression test for https://github.com/crewAIInc/crewAI/issues/5156
|
||||
When a tool with result_as_answer=True raises an exception during
|
||||
native tool execution, the agent should NOT treat the error as
|
||||
the final answer.
|
||||
"""
|
||||
executor = AgentExecutor(**mock_dependencies)
|
||||
|
||||
def failing_tool() -> str:
|
||||
raise RuntimeError("Tool execution failed")
|
||||
|
||||
result_tool = Mock()
|
||||
result_tool.name = "failing_tool"
|
||||
result_tool.result_as_answer = True
|
||||
result_tool.max_usage_count = None
|
||||
result_tool.current_usage_count = 0
|
||||
|
||||
executor.original_tools = [result_tool]
|
||||
executor._available_functions = {"failing_tool": failing_tool}
|
||||
executor.state.pending_tool_calls = [
|
||||
{
|
||||
"id": "call_1",
|
||||
"function": {"name": "failing_tool", "arguments": "{}"},
|
||||
},
|
||||
]
|
||||
|
||||
result = executor.execute_native_tool()
|
||||
|
||||
# The tool errored, so it should NOT be treated as final answer
|
||||
assert result == "native_tool_completed", (
|
||||
"Expected 'native_tool_completed' (not 'tool_result_is_final') "
|
||||
"when a result_as_answer tool errors"
|
||||
)
|
||||
assert not executor.state.is_finished, (
|
||||
"Agent state should NOT be finished when tool errored"
|
||||
)
|
||||
|
||||
def test_execute_native_tool_result_as_answer_honored_on_success(
|
||||
self, mock_dependencies
|
||||
):
|
||||
"""Test that result_as_answer IS honored when the tool succeeds."""
|
||||
executor = AgentExecutor(**mock_dependencies)
|
||||
|
||||
def success_tool() -> str:
|
||||
return "final answer from tool"
|
||||
|
||||
result_tool = Mock()
|
||||
result_tool.name = "success_tool"
|
||||
result_tool.result_as_answer = True
|
||||
result_tool.max_usage_count = None
|
||||
result_tool.current_usage_count = 0
|
||||
|
||||
executor.original_tools = [result_tool]
|
||||
executor._available_functions = {"success_tool": success_tool}
|
||||
executor.state.pending_tool_calls = [
|
||||
{
|
||||
"id": "call_1",
|
||||
"function": {"name": "success_tool", "arguments": "{}"},
|
||||
},
|
||||
]
|
||||
|
||||
result = executor.execute_native_tool()
|
||||
|
||||
assert result == "tool_result_is_final", (
|
||||
"Expected 'tool_result_is_final' when result_as_answer tool succeeds"
|
||||
)
|
||||
assert executor.state.is_finished is True
|
||||
assert isinstance(executor.state.current_answer, AgentFinish)
|
||||
assert executor.state.current_answer.output == "final answer from tool"
|
||||
|
||||
def test_check_native_todo_completion_requires_current_todo(
|
||||
self, mock_dependencies
|
||||
):
|
||||
|
||||
@@ -748,6 +748,246 @@ def test_tool_usage_finished_event_with_cached_result():
|
||||
assert event.type == "tool_usage_finished"
|
||||
|
||||
|
||||
def test_result_as_answer_not_honored_on_tool_error():
|
||||
"""Test that result_as_answer is NOT honored when a tool execution errors.
|
||||
|
||||
Regression test for https://github.com/crewAIInc/crewAI/issues/5156
|
||||
When a tool with result_as_answer=True raises an exception, the agent
|
||||
should be able to reflect on the error instead of treating it as the
|
||||
final answer.
|
||||
"""
|
||||
from crewai.tools.tool_calling import ToolCalling
|
||||
|
||||
class FailingResultTool(BaseTool):
|
||||
name: str = "Failing Result Tool"
|
||||
description: str = "A tool that always fails but has result_as_answer=True"
|
||||
result_as_answer: bool = True
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
raise ValueError("Something went wrong")
|
||||
|
||||
failing_tool = FailingResultTool().to_structured_tool()
|
||||
assert failing_tool.result_as_answer is True
|
||||
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.key = "test_agent_key"
|
||||
mock_agent.role = "test_agent_role"
|
||||
mock_agent._original_role = "test_agent_role"
|
||||
mock_agent.verbose = False
|
||||
mock_agent.fingerprint = None
|
||||
mock_agent.i18n.tools.return_value = {"name": "Failing Result Tool"}
|
||||
mock_agent.i18n.errors.return_value = "Error: {error}"
|
||||
mock_agent.i18n.slice.return_value = "Available tools: {tool_names}"
|
||||
|
||||
mock_task = MagicMock()
|
||||
mock_task.delegations = 0
|
||||
mock_task.name = "Test Task"
|
||||
mock_task.description = "A test task"
|
||||
mock_task.id = "test-task-id"
|
||||
|
||||
mock_action = MagicMock()
|
||||
mock_action.tool = "failing_result_tool"
|
||||
mock_action.tool_input = "{}"
|
||||
|
||||
tool_usage = ToolUsage(
|
||||
tools_handler=MagicMock(cache=None, last_used_tool=None),
|
||||
tools=[failing_tool],
|
||||
task=mock_task,
|
||||
function_calling_llm=None,
|
||||
agent=mock_agent,
|
||||
action=mock_action,
|
||||
)
|
||||
|
||||
tool_calling = ToolCalling(tool_name="failing_result_tool", arguments={})
|
||||
tool_usage.use(calling=tool_calling, tool_string="Action: failing_result_tool")
|
||||
|
||||
# After a failed execution, the error flag should be set
|
||||
assert tool_usage._last_execution_errored is True, (
|
||||
"Expected _last_execution_errored to be True after tool failure"
|
||||
)
|
||||
|
||||
|
||||
def test_result_as_answer_honored_on_tool_success():
|
||||
"""Test that result_as_answer IS honored when a tool executes successfully."""
|
||||
from crewai.tools.tool_calling import ToolCalling
|
||||
|
||||
class SuccessfulResultTool(BaseTool):
|
||||
name: str = "Successful Result Tool"
|
||||
description: str = "A tool that succeeds and has result_as_answer=True"
|
||||
result_as_answer: bool = True
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
return "This is the final answer"
|
||||
|
||||
successful_tool = SuccessfulResultTool().to_structured_tool()
|
||||
assert successful_tool.result_as_answer is True
|
||||
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.key = "test_agent_key"
|
||||
mock_agent.role = "test_agent_role"
|
||||
mock_agent._original_role = "test_agent_role"
|
||||
mock_agent.verbose = False
|
||||
mock_agent.fingerprint = None
|
||||
mock_agent.i18n.tools.return_value = {"name": "Successful Result Tool"}
|
||||
mock_agent.i18n.errors.return_value = "Error: {error}"
|
||||
mock_agent.i18n.slice.return_value = "Available tools: {tool_names}"
|
||||
|
||||
mock_task = MagicMock()
|
||||
mock_task.delegations = 0
|
||||
mock_task.name = "Test Task"
|
||||
mock_task.description = "A test task"
|
||||
mock_task.id = "test-task-id"
|
||||
|
||||
mock_action = MagicMock()
|
||||
mock_action.tool = "successful_result_tool"
|
||||
mock_action.tool_input = "{}"
|
||||
|
||||
tool_usage = ToolUsage(
|
||||
tools_handler=MagicMock(cache=None, last_used_tool=None),
|
||||
tools=[successful_tool],
|
||||
task=mock_task,
|
||||
function_calling_llm=None,
|
||||
agent=mock_agent,
|
||||
action=mock_action,
|
||||
)
|
||||
|
||||
tool_calling = ToolCalling(tool_name="successful_result_tool", arguments={})
|
||||
tool_usage.use(calling=tool_calling, tool_string="Action: successful_result_tool")
|
||||
|
||||
# After a successful execution, the error flag should NOT be set
|
||||
assert tool_usage._last_execution_errored is False, (
|
||||
"Expected _last_execution_errored to be False after tool success"
|
||||
)
|
||||
|
||||
|
||||
def test_execute_tool_and_check_finality_not_final_on_error():
|
||||
"""Test that execute_tool_and_check_finality does NOT set result_as_answer
|
||||
when the tool with result_as_answer=True raises an error.
|
||||
|
||||
Regression test for https://github.com/crewAIInc/crewAI/issues/5156
|
||||
"""
|
||||
from crewai.agents.parser import AgentAction
|
||||
from crewai.utilities.tool_utils import execute_tool_and_check_finality
|
||||
|
||||
class FailingFinalTool(BaseTool):
|
||||
name: str = "Failing Final Tool"
|
||||
description: str = "A tool that fails but has result_as_answer=True"
|
||||
result_as_answer: bool = True
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
raise RuntimeError("Tool execution failed")
|
||||
|
||||
failing_tool = FailingFinalTool().to_structured_tool()
|
||||
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.key = "test_agent_key"
|
||||
mock_agent.role = "test_agent_role"
|
||||
mock_agent._original_role = "test_agent_role"
|
||||
mock_agent.verbose = False
|
||||
mock_agent.fingerprint = None
|
||||
mock_agent.i18n.tools.return_value = {"name": "Failing Final Tool"}
|
||||
mock_agent.i18n.errors.return_value = "Error: {error}"
|
||||
mock_agent.i18n.slice.return_value = "Available tools: {tool_names}"
|
||||
|
||||
mock_task = MagicMock()
|
||||
mock_task.delegations = 0
|
||||
mock_task.name = "Test Task"
|
||||
mock_task.description = "A test task"
|
||||
mock_task.id = "test-task-id"
|
||||
|
||||
from crewai.utilities.i18n import I18N
|
||||
|
||||
i18n = I18N()
|
||||
|
||||
action = AgentAction(
|
||||
thought="I need to use this tool",
|
||||
tool="failing_final_tool",
|
||||
tool_input="{}",
|
||||
text="Action: failing_final_tool\nAction Input: {}",
|
||||
)
|
||||
|
||||
result = execute_tool_and_check_finality(
|
||||
agent_action=action,
|
||||
tools=[failing_tool],
|
||||
i18n=i18n,
|
||||
agent_key="test_agent_key",
|
||||
agent_role="test_agent_role",
|
||||
tools_handler=MagicMock(cache=None, last_used_tool=None),
|
||||
task=mock_task,
|
||||
agent=mock_agent,
|
||||
function_calling_llm=None,
|
||||
)
|
||||
|
||||
# The result should NOT be treated as the final answer when the tool errored
|
||||
assert result.result_as_answer is False, (
|
||||
"result_as_answer should be False when tool execution fails, "
|
||||
"even if the tool has result_as_answer=True"
|
||||
)
|
||||
|
||||
|
||||
def test_execute_tool_and_check_finality_is_final_on_success():
|
||||
"""Test that execute_tool_and_check_finality DOES set result_as_answer
|
||||
when the tool with result_as_answer=True succeeds.
|
||||
"""
|
||||
from crewai.agents.parser import AgentAction
|
||||
from crewai.utilities.tool_utils import execute_tool_and_check_finality
|
||||
|
||||
class SuccessfulFinalTool(BaseTool):
|
||||
name: str = "Successful Final Tool"
|
||||
description: str = "A tool that succeeds and has result_as_answer=True"
|
||||
result_as_answer: bool = True
|
||||
|
||||
def _run(self, **kwargs) -> str:
|
||||
return "This is the correct final answer"
|
||||
|
||||
success_tool = SuccessfulFinalTool().to_structured_tool()
|
||||
|
||||
mock_agent = MagicMock()
|
||||
mock_agent.key = "test_agent_key"
|
||||
mock_agent.role = "test_agent_role"
|
||||
mock_agent._original_role = "test_agent_role"
|
||||
mock_agent.verbose = False
|
||||
mock_agent.fingerprint = None
|
||||
mock_agent.i18n.tools.return_value = {"name": "Successful Final Tool"}
|
||||
mock_agent.i18n.errors.return_value = "Error: {error}"
|
||||
mock_agent.i18n.slice.return_value = "Available tools: {tool_names}"
|
||||
|
||||
mock_task = MagicMock()
|
||||
mock_task.delegations = 0
|
||||
mock_task.name = "Test Task"
|
||||
mock_task.description = "A test task"
|
||||
mock_task.id = "test-task-id"
|
||||
|
||||
from crewai.utilities.i18n import I18N
|
||||
|
||||
i18n = I18N()
|
||||
|
||||
action = AgentAction(
|
||||
thought="I need to use this tool",
|
||||
tool="successful_final_tool",
|
||||
tool_input="{}",
|
||||
text="Action: successful_final_tool\nAction Input: {}",
|
||||
)
|
||||
|
||||
result = execute_tool_and_check_finality(
|
||||
agent_action=action,
|
||||
tools=[success_tool],
|
||||
i18n=i18n,
|
||||
agent_key="test_agent_key",
|
||||
agent_role="test_agent_role",
|
||||
tools_handler=MagicMock(cache=None, last_used_tool=None),
|
||||
task=mock_task,
|
||||
agent=mock_agent,
|
||||
function_calling_llm=None,
|
||||
)
|
||||
|
||||
# The result SHOULD be treated as the final answer when the tool succeeds
|
||||
assert result.result_as_answer is True, (
|
||||
"result_as_answer should be True when tool execution succeeds "
|
||||
"and the tool has result_as_answer=True"
|
||||
)
|
||||
|
||||
|
||||
def test_tool_error_does_not_emit_finished_event():
|
||||
from crewai.tools.tool_calling import ToolCalling
|
||||
|
||||
|
||||
Reference in New Issue
Block a user