Compare commits

...

2 Commits

Author SHA1 Message Date
Devin AI
f5dc745669 fix: set _last_execution_errored in add_image exception handlers
Address Cursor Bugbot review: the add_image exception handlers in
use() and ause() were missing the error flag, allowing result_as_answer
to be incorrectly honored when those paths errored.

Co-Authored-By: João <joao@crewai.com>
2026-03-28 16:55:16 +00:00
Devin AI
99066ca278 fix: don't honor result_as_answer when tool execution errors
When a tool with result_as_answer=True raises an exception, the agent
now continues reasoning about the error instead of treating the error
message as the final answer.

Fixes #5156

Co-Authored-By: João <joao@crewai.com>
2026-03-28 16:47:47 +00:00
7 changed files with 355 additions and 3 deletions

View File

@@ -1072,6 +1072,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
"result": result,
"from_cache": from_cache,
"original_tool": original_tool,
"error_occurred": error_event_emitted,
}
def _append_tool_result_and_check_finality(
@@ -1082,6 +1083,7 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
result = cast(str, execution_result["result"])
from_cache = cast(bool, execution_result["from_cache"])
original_tool = execution_result["original_tool"]
error_occurred = execution_result.get("error_occurred", False)
tool_message: LLMMessage = {
"role": "tool",
@@ -1098,10 +1100,13 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
color="green",
)
# Don't honor result_as_answer when the tool execution errored;
# let the agent reflect on the error instead.
if (
original_tool
and hasattr(original_tool, "result_as_answer")
and original_tool.result_as_answer
and not error_occurred
):
return AgentFinish(
thought="Tool result is the final answer",

View File

@@ -1635,6 +1635,7 @@ class AgentExecutor(Flow[AgentExecutorState], CrewAgentExecutorMixin):
"result": f"Error executing tool: {e}",
"from_cache": False,
"original_tool": None,
"error_occurred": True,
}
execution_results = [
result for result in ordered_results if result is not None
@@ -1666,10 +1667,14 @@ class AgentExecutor(Flow[AgentExecutorState], CrewAgentExecutorMixin):
color="green",
)
# Don't honor result_as_answer when the tool execution errored;
# let the agent reflect on the error instead.
error_occurred = execution_result.get("error_occurred", False)
if (
original_tool
and hasattr(original_tool, "result_as_answer")
and original_tool.result_as_answer
and not error_occurred
):
self.state.current_answer = AgentFinish(
thought="Tool result is the final answer",
@@ -1704,10 +1709,14 @@ class AgentExecutor(Flow[AgentExecutorState], CrewAgentExecutorMixin):
color="green",
)
# Don't honor result_as_answer when the tool execution errored;
# let the agent reflect on the error instead.
error_occurred = execution_result.get("error_occurred", False)
if (
original_tool
and hasattr(original_tool, "result_as_answer")
and original_tool.result_as_answer
and not error_occurred
):
# Set the result as the final answer
self.state.current_answer = AgentFinish(
@@ -1964,6 +1973,7 @@ class AgentExecutor(Flow[AgentExecutorState], CrewAgentExecutorMixin):
"result": result,
"from_cache": from_cache,
"original_tool": original_tool,
"error_occurred": error_event_emitted,
}
def _extract_tool_name(self, tool_call: Any) -> str:

View File

@@ -108,6 +108,7 @@ class ToolUsage:
self.action = action
self.function_calling_llm = function_calling_llm
self.fingerprint_context = fingerprint_context or {}
self._last_execution_errored: bool = False
# Set the maximum parsing attempts for bigger models
if (
@@ -126,8 +127,10 @@ class ToolUsage:
def use(
self, calling: ToolCalling | InstructorToolCalling, tool_string: str
) -> str:
self._last_execution_errored = False
if isinstance(calling, ToolUsageError):
error = calling.message
self._last_execution_errored = True
if self.agent and self.agent.verbose:
self._printer.print(content=f"\n\n{error}\n", color="red")
if self.task:
@@ -138,6 +141,7 @@ class ToolUsage:
tool = self._select_tool(calling.tool_name)
except Exception as e:
error = getattr(e, "message", str(e))
self._last_execution_errored = True
if self.task:
self.task.increment_tools_errors()
if self.agent and self.agent.verbose:
@@ -154,6 +158,7 @@ class ToolUsage:
except Exception as e:
error = getattr(e, "message", str(e))
self._last_execution_errored = True
if self.task:
self.task.increment_tools_errors()
if self.agent and self.agent.verbose:
@@ -174,8 +179,10 @@ class ToolUsage:
Returns:
The result of the tool execution as a string.
"""
self._last_execution_errored = False
if isinstance(calling, ToolUsageError):
error = calling.message
self._last_execution_errored = True
if self.agent and self.agent.verbose:
self._printer.print(content=f"\n\n{error}\n", color="red")
if self.task:
@@ -186,6 +193,7 @@ class ToolUsage:
tool = self._select_tool(calling.tool_name)
except Exception as e:
error = getattr(e, "message", str(e))
self._last_execution_errored = True
if self.task:
self.task.increment_tools_errors()
if self.agent and self.agent.verbose:
@@ -203,6 +211,7 @@ class ToolUsage:
)
except Exception as e:
error = getattr(e, "message", str(e))
self._last_execution_errored = True
if self.task:
self.task.increment_tools_errors()
if self.agent and self.agent.verbose:
@@ -410,6 +419,7 @@ class ToolUsage:
except Exception as e:
self.on_tool_error(tool=tool, tool_calling=calling, e=e)
error_event_emitted = True
self._last_execution_errored = True
self._run_attempts += 1
if self._run_attempts > self._max_parsing_attempts:
self._telemetry.tool_usage_error(llm=self.function_calling_llm)
@@ -642,6 +652,7 @@ class ToolUsage:
except Exception as e:
self.on_tool_error(tool=tool, tool_calling=calling, e=e)
error_event_emitted = True
self._last_execution_errored = True
self._run_attempts += 1
if self._run_attempts > self._max_parsing_attempts:
self._telemetry.tool_usage_error(llm=self.function_calling_llm)

View File

@@ -1575,11 +1575,13 @@ def execute_single_native_tool_call(
color="green",
)
# Check result_as_answer
# Check result_as_answer — but don't honor it when the tool errored;
# let the agent reflect on the error instead.
is_result_as_answer = bool(
original_tool
and hasattr(original_tool, "result_as_answer")
and original_tool.result_as_answer
and not error_event_emitted
)
return NativeToolCallResult(

View File

@@ -140,7 +140,12 @@ async def aexecute_tool_and_check_finality(
except Exception as e:
logger.log("error", f"Error in after_tool_call hook: {e}")
return ToolResult(modified_result, tool.result_as_answer)
# Don't honor result_as_answer when the tool execution errored;
# let the agent reflect on the error instead.
effective_result_as_answer = (
tool.result_as_answer and not tool_usage._last_execution_errored
)
return ToolResult(modified_result, effective_result_as_answer)
tool_result = i18n.errors("wrong_tool_name").format(
tool=sanitized_tool_name,
@@ -261,7 +266,12 @@ def execute_tool_and_check_finality(
except Exception as e:
logger.log("error", f"Error in after_tool_call hook: {e}")
return ToolResult(modified_result, tool.result_as_answer)
# Don't honor result_as_answer when the tool execution errored;
# let the agent reflect on the error instead.
effective_result_as_answer = (
tool.result_as_answer and not tool_usage._last_execution_errored
)
return ToolResult(modified_result, effective_result_as_answer)
tool_result = i18n.errors("wrong_tool_name").format(
tool=sanitized_tool_name,

View File

@@ -879,6 +879,80 @@ class TestNativeToolExecution:
assert len(tool_messages) == 1
assert tool_messages[0]["tool_call_id"] == "call_1"
def test_execute_native_tool_result_as_answer_not_honored_on_error(
self, mock_dependencies
):
"""Test that result_as_answer is NOT honored when the tool errors.
Regression test for https://github.com/crewAIInc/crewAI/issues/5156
When a tool with result_as_answer=True raises an exception during
native tool execution, the agent should NOT treat the error as
the final answer.
"""
executor = AgentExecutor(**mock_dependencies)
def failing_tool() -> str:
raise RuntimeError("Tool execution failed")
result_tool = Mock()
result_tool.name = "failing_tool"
result_tool.result_as_answer = True
result_tool.max_usage_count = None
result_tool.current_usage_count = 0
executor.original_tools = [result_tool]
executor._available_functions = {"failing_tool": failing_tool}
executor.state.pending_tool_calls = [
{
"id": "call_1",
"function": {"name": "failing_tool", "arguments": "{}"},
},
]
result = executor.execute_native_tool()
# The tool errored, so it should NOT be treated as final answer
assert result == "native_tool_completed", (
"Expected 'native_tool_completed' (not 'tool_result_is_final') "
"when a result_as_answer tool errors"
)
assert not executor.state.is_finished, (
"Agent state should NOT be finished when tool errored"
)
def test_execute_native_tool_result_as_answer_honored_on_success(
self, mock_dependencies
):
"""Test that result_as_answer IS honored when the tool succeeds."""
executor = AgentExecutor(**mock_dependencies)
def success_tool() -> str:
return "final answer from tool"
result_tool = Mock()
result_tool.name = "success_tool"
result_tool.result_as_answer = True
result_tool.max_usage_count = None
result_tool.current_usage_count = 0
executor.original_tools = [result_tool]
executor._available_functions = {"success_tool": success_tool}
executor.state.pending_tool_calls = [
{
"id": "call_1",
"function": {"name": "success_tool", "arguments": "{}"},
},
]
result = executor.execute_native_tool()
assert result == "tool_result_is_final", (
"Expected 'tool_result_is_final' when result_as_answer tool succeeds"
)
assert executor.state.is_finished is True
assert isinstance(executor.state.current_answer, AgentFinish)
assert executor.state.current_answer.output == "final answer from tool"
def test_check_native_todo_completion_requires_current_todo(
self, mock_dependencies
):

View File

@@ -748,6 +748,246 @@ def test_tool_usage_finished_event_with_cached_result():
assert event.type == "tool_usage_finished"
def test_result_as_answer_not_honored_on_tool_error():
"""Test that result_as_answer is NOT honored when a tool execution errors.
Regression test for https://github.com/crewAIInc/crewAI/issues/5156
When a tool with result_as_answer=True raises an exception, the agent
should be able to reflect on the error instead of treating it as the
final answer.
"""
from crewai.tools.tool_calling import ToolCalling
class FailingResultTool(BaseTool):
name: str = "Failing Result Tool"
description: str = "A tool that always fails but has result_as_answer=True"
result_as_answer: bool = True
def _run(self, **kwargs) -> str:
raise ValueError("Something went wrong")
failing_tool = FailingResultTool().to_structured_tool()
assert failing_tool.result_as_answer is True
mock_agent = MagicMock()
mock_agent.key = "test_agent_key"
mock_agent.role = "test_agent_role"
mock_agent._original_role = "test_agent_role"
mock_agent.verbose = False
mock_agent.fingerprint = None
mock_agent.i18n.tools.return_value = {"name": "Failing Result Tool"}
mock_agent.i18n.errors.return_value = "Error: {error}"
mock_agent.i18n.slice.return_value = "Available tools: {tool_names}"
mock_task = MagicMock()
mock_task.delegations = 0
mock_task.name = "Test Task"
mock_task.description = "A test task"
mock_task.id = "test-task-id"
mock_action = MagicMock()
mock_action.tool = "failing_result_tool"
mock_action.tool_input = "{}"
tool_usage = ToolUsage(
tools_handler=MagicMock(cache=None, last_used_tool=None),
tools=[failing_tool],
task=mock_task,
function_calling_llm=None,
agent=mock_agent,
action=mock_action,
)
tool_calling = ToolCalling(tool_name="failing_result_tool", arguments={})
tool_usage.use(calling=tool_calling, tool_string="Action: failing_result_tool")
# After a failed execution, the error flag should be set
assert tool_usage._last_execution_errored is True, (
"Expected _last_execution_errored to be True after tool failure"
)
def test_result_as_answer_honored_on_tool_success():
"""Test that result_as_answer IS honored when a tool executes successfully."""
from crewai.tools.tool_calling import ToolCalling
class SuccessfulResultTool(BaseTool):
name: str = "Successful Result Tool"
description: str = "A tool that succeeds and has result_as_answer=True"
result_as_answer: bool = True
def _run(self, **kwargs) -> str:
return "This is the final answer"
successful_tool = SuccessfulResultTool().to_structured_tool()
assert successful_tool.result_as_answer is True
mock_agent = MagicMock()
mock_agent.key = "test_agent_key"
mock_agent.role = "test_agent_role"
mock_agent._original_role = "test_agent_role"
mock_agent.verbose = False
mock_agent.fingerprint = None
mock_agent.i18n.tools.return_value = {"name": "Successful Result Tool"}
mock_agent.i18n.errors.return_value = "Error: {error}"
mock_agent.i18n.slice.return_value = "Available tools: {tool_names}"
mock_task = MagicMock()
mock_task.delegations = 0
mock_task.name = "Test Task"
mock_task.description = "A test task"
mock_task.id = "test-task-id"
mock_action = MagicMock()
mock_action.tool = "successful_result_tool"
mock_action.tool_input = "{}"
tool_usage = ToolUsage(
tools_handler=MagicMock(cache=None, last_used_tool=None),
tools=[successful_tool],
task=mock_task,
function_calling_llm=None,
agent=mock_agent,
action=mock_action,
)
tool_calling = ToolCalling(tool_name="successful_result_tool", arguments={})
tool_usage.use(calling=tool_calling, tool_string="Action: successful_result_tool")
# After a successful execution, the error flag should NOT be set
assert tool_usage._last_execution_errored is False, (
"Expected _last_execution_errored to be False after tool success"
)
def test_execute_tool_and_check_finality_not_final_on_error():
"""Test that execute_tool_and_check_finality does NOT set result_as_answer
when the tool with result_as_answer=True raises an error.
Regression test for https://github.com/crewAIInc/crewAI/issues/5156
"""
from crewai.agents.parser import AgentAction
from crewai.utilities.tool_utils import execute_tool_and_check_finality
class FailingFinalTool(BaseTool):
name: str = "Failing Final Tool"
description: str = "A tool that fails but has result_as_answer=True"
result_as_answer: bool = True
def _run(self, **kwargs) -> str:
raise RuntimeError("Tool execution failed")
failing_tool = FailingFinalTool().to_structured_tool()
mock_agent = MagicMock()
mock_agent.key = "test_agent_key"
mock_agent.role = "test_agent_role"
mock_agent._original_role = "test_agent_role"
mock_agent.verbose = False
mock_agent.fingerprint = None
mock_agent.i18n.tools.return_value = {"name": "Failing Final Tool"}
mock_agent.i18n.errors.return_value = "Error: {error}"
mock_agent.i18n.slice.return_value = "Available tools: {tool_names}"
mock_task = MagicMock()
mock_task.delegations = 0
mock_task.name = "Test Task"
mock_task.description = "A test task"
mock_task.id = "test-task-id"
from crewai.utilities.i18n import I18N
i18n = I18N()
action = AgentAction(
thought="I need to use this tool",
tool="failing_final_tool",
tool_input="{}",
text="Action: failing_final_tool\nAction Input: {}",
)
result = execute_tool_and_check_finality(
agent_action=action,
tools=[failing_tool],
i18n=i18n,
agent_key="test_agent_key",
agent_role="test_agent_role",
tools_handler=MagicMock(cache=None, last_used_tool=None),
task=mock_task,
agent=mock_agent,
function_calling_llm=None,
)
# The result should NOT be treated as the final answer when the tool errored
assert result.result_as_answer is False, (
"result_as_answer should be False when tool execution fails, "
"even if the tool has result_as_answer=True"
)
def test_execute_tool_and_check_finality_is_final_on_success():
"""Test that execute_tool_and_check_finality DOES set result_as_answer
when the tool with result_as_answer=True succeeds.
"""
from crewai.agents.parser import AgentAction
from crewai.utilities.tool_utils import execute_tool_and_check_finality
class SuccessfulFinalTool(BaseTool):
name: str = "Successful Final Tool"
description: str = "A tool that succeeds and has result_as_answer=True"
result_as_answer: bool = True
def _run(self, **kwargs) -> str:
return "This is the correct final answer"
success_tool = SuccessfulFinalTool().to_structured_tool()
mock_agent = MagicMock()
mock_agent.key = "test_agent_key"
mock_agent.role = "test_agent_role"
mock_agent._original_role = "test_agent_role"
mock_agent.verbose = False
mock_agent.fingerprint = None
mock_agent.i18n.tools.return_value = {"name": "Successful Final Tool"}
mock_agent.i18n.errors.return_value = "Error: {error}"
mock_agent.i18n.slice.return_value = "Available tools: {tool_names}"
mock_task = MagicMock()
mock_task.delegations = 0
mock_task.name = "Test Task"
mock_task.description = "A test task"
mock_task.id = "test-task-id"
from crewai.utilities.i18n import I18N
i18n = I18N()
action = AgentAction(
thought="I need to use this tool",
tool="successful_final_tool",
tool_input="{}",
text="Action: successful_final_tool\nAction Input: {}",
)
result = execute_tool_and_check_finality(
agent_action=action,
tools=[success_tool],
i18n=i18n,
agent_key="test_agent_key",
agent_role="test_agent_role",
tools_handler=MagicMock(cache=None, last_used_tool=None),
task=mock_task,
agent=mock_agent,
function_calling_llm=None,
)
# The result SHOULD be treated as the final answer when the tool succeeds
assert result.result_as_answer is True, (
"result_as_answer should be True when tool execution succeeds "
"and the tool has result_as_answer=True"
)
def test_tool_error_does_not_emit_finished_event():
from crewai.tools.tool_calling import ToolCalling