Compare commits

...

2 Commits

Author SHA1 Message Date
Devin AI
1073483854 fix: remove unused imports in test file (review bot feedback)
Co-Authored-By: João <joao@crewai.com>
2026-02-25 19:44:27 +00:00
Devin AI
660a186375 feat: enforce fail-closed defaults for unsafe tool execution
Fixes #4593

- Add  field to BaseTool with documentation
- Propagate  flag through CrewStructuredTool and @tool decorator
- Enforce fail-closed behavior in all tool execution paths:
  - crew_agent_executor.py (native tool calling path)
  - experimental/agent_executor.py (experimental executor path)
  - tool_utils.py (ReAct text-based path, sync and async)
- Unsafe tools require a before_tool_call hook returning True to execute
- Deterministic error message for denied unsafe tool execution
- Add 15 regression tests covering allow/deny paths

Co-Authored-By: João <joao@crewai.com>
2026-02-25 19:40:15 +00:00
6 changed files with 469 additions and 13 deletions

View File

@@ -954,7 +954,15 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
structured_tool = structured
break
# Determine if the tool is marked as unsafe
is_unsafe = False
if structured_tool and getattr(structured_tool, "unsafe", False):
is_unsafe = True
elif original_tool and getattr(original_tool, "unsafe", False):
is_unsafe = True
hook_blocked = False
hook_explicitly_allowed = False
before_hook_context = ToolCallHookContext(
tool_name=func_name,
tool_input=args_dict,
@@ -970,6 +978,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
if hook_result is False:
hook_blocked = True
break
if hook_result is True:
hook_explicitly_allowed = True
except Exception as hook_error:
if self.agent.verbose:
self._printer.print(
@@ -977,7 +987,18 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
color="red",
)
if hook_blocked:
# Enforce fail-closed for unsafe tools: require explicit True from a hook
if is_unsafe and not hook_explicitly_allowed:
hook_blocked = True
if hook_blocked and is_unsafe and not hook_explicitly_allowed:
result = (
f"Unsafe tool execution denied (fail-closed): Tool '{func_name}' is marked as "
f"unsafe and requires an explicit safety policy (a before_tool_call hook "
f"returning True) to execute. Register a before_tool_call hook that returns "
f"True to allow this tool."
)
elif hook_blocked:
result = f"Tool execution blocked by hook. Tool: {func_name}"
elif max_usage_reached and original_tool:
result = f"Tool '{func_name}' has reached its usage limit of {original_tool.max_usage_count} times and cannot be used anymore."

View File

@@ -909,7 +909,15 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
structured_tool = structured
break
# Determine if the tool is marked as unsafe
is_unsafe = False
if structured_tool and getattr(structured_tool, "unsafe", False):
is_unsafe = True
elif original_tool and getattr(original_tool, "unsafe", False):
is_unsafe = True
hook_blocked = False
hook_explicitly_allowed = False
before_hook_context = ToolCallHookContext(
tool_name=func_name,
tool_input=args_dict,
@@ -925,6 +933,8 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
if hook_result is False:
hook_blocked = True
break
if hook_result is True:
hook_explicitly_allowed = True
except Exception as hook_error:
if self.agent.verbose:
self._printer.print(
@@ -932,7 +942,18 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
color="red",
)
if hook_blocked:
# Enforce fail-closed for unsafe tools: require explicit True from a hook
if is_unsafe and not hook_explicitly_allowed:
hook_blocked = True
if hook_blocked and is_unsafe and not hook_explicitly_allowed:
result = (
f"Unsafe tool execution denied (fail-closed): Tool '{func_name}' is marked as "
f"unsafe and requires an explicit safety policy (a before_tool_call hook "
f"returning True) to execute. Register a before_tool_call hook that returns "
f"True to allow this tool."
)
elif hook_blocked:
result = f"Tool execution blocked by hook. Tool: {func_name}"
elif not from_cache and not max_usage_reached:
result = "Tool not found"

View File

@@ -18,7 +18,6 @@ from pydantic import (
BaseModel as PydanticBaseModel,
ConfigDict,
Field,
ValidationError,
create_model,
field_validator,
)
@@ -87,6 +86,15 @@ class BaseTool(BaseModel, ABC):
default=False,
description="Flag to check if the tool should be the final agent answer.",
)
unsafe: bool = Field(
default=False,
description=(
"Mark this tool as unsafe/high-impact. Unsafe tools are denied by default "
"(fail-closed) unless a before_tool_call hook explicitly returns True to "
"allow execution. This ensures that high-impact tools cannot run without "
"an explicit safety policy."
),
)
max_usage_count: int | None = Field(
default=None,
description="Maximum number of times this tool can be used. None means unlimited usage.",
@@ -250,6 +258,7 @@ class BaseTool(BaseModel, ABC):
args_schema=self.args_schema,
func=self._run,
result_as_answer=self.result_as_answer,
unsafe=self.unsafe,
max_usage_count=self.max_usage_count,
current_usage_count=self.current_usage_count,
)
@@ -489,6 +498,7 @@ def tool(
/,
*,
result_as_answer: bool = ...,
unsafe: bool = ...,
max_usage_count: int | None = ...,
) -> Callable[[Callable[P2, R2]], Tool[P2, R2]]: ...
@@ -497,6 +507,7 @@ def tool(
def tool(
*,
result_as_answer: bool = ...,
unsafe: bool = ...,
max_usage_count: int | None = ...,
) -> Callable[[Callable[P2, R2]], Tool[P2, R2]]: ...
@@ -504,6 +515,7 @@ def tool(
def tool(
*args: Callable[P2, R2] | str,
result_as_answer: bool = False,
unsafe: bool = False,
max_usage_count: int | None = None,
) -> Tool[P2, R2] | Callable[[Callable[P2, R2]], Tool[P2, R2]]:
"""Decorator to create a Tool from a function.
@@ -516,6 +528,8 @@ def tool(
Args:
*args: Either the function to decorate or a custom tool name.
result_as_answer: If True, the tool result becomes the final agent answer.
unsafe: If True, marks this tool as unsafe/high-impact. Unsafe tools are
denied by default unless a before_tool_call hook explicitly returns True.
max_usage_count: Maximum times this tool can be used. None means unlimited.
Returns:
@@ -564,6 +578,7 @@ def tool(
func=f,
args_schema=args_schema,
result_as_answer=result_as_answer,
unsafe=unsafe,
max_usage_count=max_usage_count,
current_usage_count=0,
)

View File

@@ -35,6 +35,7 @@ class CrewStructuredTool:
args_schema: type[BaseModel],
func: Callable[..., Any],
result_as_answer: bool = False,
unsafe: bool = False,
max_usage_count: int | None = None,
current_usage_count: int = 0,
) -> None:
@@ -46,6 +47,8 @@ class CrewStructuredTool:
args_schema: The pydantic model for the tool's arguments
func: The function to run when the tool is called
result_as_answer: Whether to return the output directly
unsafe: Whether this tool is unsafe/high-impact and requires explicit
hook approval to execute (fail-closed behavior).
max_usage_count: Maximum number of times this tool can be used. None means unlimited usage.
current_usage_count: Current number of times this tool has been used.
"""
@@ -55,6 +58,7 @@ class CrewStructuredTool:
self.func = func
self._logger = Logger()
self.result_as_answer = result_as_answer
self.unsafe = unsafe
self.max_usage_count = max_usage_count
self.current_usage_count = current_usage_count
self._original_tool: BaseTool | None = None

View File

@@ -104,19 +104,42 @@ async def aexecute_tool_and_check_finality(
crew=crew,
)
# Determine if the tool is marked as unsafe
is_unsafe = getattr(tool, "unsafe", False)
hook_blocked = False
hook_explicitly_allowed = False
before_hooks = get_before_tool_call_hooks()
try:
for hook in before_hooks:
result = hook(hook_context)
if result is False:
blocked_message = (
f"Tool execution blocked by hook. "
f"Tool: {tool_calling.tool_name}"
)
return ToolResult(blocked_message, False)
hook_blocked = True
break
if result is True:
hook_explicitly_allowed = True
except Exception as e:
logger.log("error", f"Error in before_tool_call hook: {e}")
# Enforce fail-closed for unsafe tools: require explicit True from a hook
if is_unsafe and not hook_explicitly_allowed:
hook_blocked = True
if hook_blocked and is_unsafe and not hook_explicitly_allowed:
blocked_message = (
f"Unsafe tool execution denied (fail-closed): Tool '{tool_calling.tool_name}' is marked as "
f"unsafe and requires an explicit safety policy (a before_tool_call hook "
f"returning True) to execute. Register a before_tool_call hook that returns "
f"True to allow this tool."
)
return ToolResult(blocked_message, False)
if hook_blocked:
blocked_message = (
f"Tool execution blocked by hook. "
f"Tool: {tool_calling.tool_name}"
)
return ToolResult(blocked_message, False)
tool_result = await tool_usage.ause(tool_calling, agent_action.text)
after_hook_context = ToolCallHookContext(
@@ -224,19 +247,42 @@ def execute_tool_and_check_finality(
crew=crew,
)
# Determine if the tool is marked as unsafe
is_unsafe = getattr(tool, "unsafe", False)
hook_blocked = False
hook_explicitly_allowed = False
before_hooks = get_before_tool_call_hooks()
try:
for hook in before_hooks:
result = hook(hook_context)
if result is False:
blocked_message = (
f"Tool execution blocked by hook. "
f"Tool: {tool_calling.tool_name}"
)
return ToolResult(blocked_message, False)
hook_blocked = True
break
if result is True:
hook_explicitly_allowed = True
except Exception as e:
logger.log("error", f"Error in before_tool_call hook: {e}")
# Enforce fail-closed for unsafe tools: require explicit True from a hook
if is_unsafe and not hook_explicitly_allowed:
hook_blocked = True
if hook_blocked and is_unsafe and not hook_explicitly_allowed:
blocked_message = (
f"Unsafe tool execution denied (fail-closed): Tool '{tool_calling.tool_name}' is marked as "
f"unsafe and requires an explicit safety policy (a before_tool_call hook "
f"returning True) to execute. Register a before_tool_call hook that returns "
f"True to allow this tool."
)
return ToolResult(blocked_message, False)
if hook_blocked:
blocked_message = (
f"Tool execution blocked by hook. "
f"Tool: {tool_calling.tool_name}"
)
return ToolResult(blocked_message, False)
tool_result = tool_usage.use(tool_calling, agent_action.text)
after_hook_context = ToolCallHookContext(

View File

@@ -0,0 +1,349 @@
"""Tests for fail-closed behavior of unsafe tools.
Unsafe tools (marked with unsafe=True) must be denied by default unless
a before_tool_call hook explicitly returns True. This enforces a fail-closed
security model where high-impact tools cannot run without an explicit safety
policy.
See: https://github.com/crewAIInc/crewAI/issues/4593
"""
from __future__ import annotations
import pytest
from crewai.hooks import tool_hooks
from crewai.hooks.tool_hooks import (
ToolCallHookContext,
register_before_tool_call_hook,
)
from crewai.tools import BaseTool, tool
from crewai.tools.structured_tool import CrewStructuredTool
from crewai.tools.tool_types import ToolResult
from crewai.utilities.tool_utils import execute_tool_and_check_finality
@pytest.fixture(autouse=True)
def clear_hooks():
"""Clear global hooks before and after each test."""
original_before = tool_hooks._before_tool_call_hooks.copy()
original_after = tool_hooks._after_tool_call_hooks.copy()
tool_hooks._before_tool_call_hooks.clear()
tool_hooks._after_tool_call_hooks.clear()
yield
tool_hooks._before_tool_call_hooks.clear()
tool_hooks._after_tool_call_hooks.clear()
tool_hooks._before_tool_call_hooks.extend(original_before)
tool_hooks._after_tool_call_hooks.extend(original_after)
class TestUnsafeFlagOnBaseTool:
"""Test that the unsafe flag can be set on BaseTool subclasses."""
def test_default_unsafe_is_false(self):
"""Tools are safe by default (unsafe=False)."""
from pydantic import BaseModel
class MyTool(BaseTool):
name: str = "my_tool"
description: str = "A test tool"
def _run(self, **kwargs) -> str:
return "result"
t = MyTool()
assert t.unsafe is False
def test_unsafe_can_be_set_true(self):
"""Tools can be marked as unsafe."""
from pydantic import BaseModel
class UnsafeTool(BaseTool):
name: str = "unsafe_tool"
description: str = "A dangerous tool"
unsafe: bool = True
def _run(self, **kwargs) -> str:
return "dangerous result"
t = UnsafeTool()
assert t.unsafe is True
class TestUnsafeFlagOnToolDecorator:
"""Test that the @tool decorator supports unsafe=True."""
def test_tool_decorator_default_safe(self):
"""@tool decorator creates safe tools by default."""
@tool("safe_tool")
def safe_tool(x: str) -> str:
"""A safe tool."""
return x
assert safe_tool.unsafe is False
def test_tool_decorator_unsafe_true(self):
"""@tool decorator supports unsafe=True."""
@tool("unsafe_tool", unsafe=True)
def unsafe_tool(x: str) -> str:
"""An unsafe tool."""
return x
assert unsafe_tool.unsafe is True
def test_tool_decorator_unsafe_propagates_to_structured_tool(self):
"""unsafe flag propagates when converting to CrewStructuredTool."""
@tool("unsafe_tool", unsafe=True)
def unsafe_tool(x: str) -> str:
"""An unsafe tool."""
return x
structured = unsafe_tool.to_structured_tool()
assert structured.unsafe is True
class TestUnsafeFlagOnCrewStructuredTool:
"""Test that CrewStructuredTool stores the unsafe flag."""
def test_default_unsafe_is_false(self):
"""CrewStructuredTool defaults to unsafe=False."""
from pydantic import BaseModel
class Schema(BaseModel):
x: str
t = CrewStructuredTool(
name="test",
description="test",
args_schema=Schema,
func=lambda x: x,
)
assert t.unsafe is False
def test_unsafe_can_be_set_true(self):
"""CrewStructuredTool accepts unsafe=True."""
from pydantic import BaseModel
class Schema(BaseModel):
x: str
t = CrewStructuredTool(
name="test",
description="test",
args_schema=Schema,
func=lambda x: x,
unsafe=True,
)
assert t.unsafe is True
class TestFailClosedBehavior:
"""Test fail-closed enforcement for unsafe tools.
These tests verify that:
- Unsafe tools are blocked when no hooks are registered
- Unsafe tools are blocked when hooks return None (neutral)
- Unsafe tools execute when a hook explicitly returns True
- Safe tools (default) are unaffected by the fail-closed logic
- Hook returning False still blocks safe tools (existing behavior preserved)
Example usage for users:
from crewai.tools import tool
from crewai.hooks.tool_hooks import register_before_tool_call_hook, ToolCallHookContext
# 1. Mark a tool as unsafe
@tool("delete_database", unsafe=True)
def delete_database(db_name: str) -> str:
\"\"\"Delete an entire database.\"\"\"
return f"Deleted {db_name}"
# 2. Register a safety policy hook that explicitly approves execution
def safety_policy(context: ToolCallHookContext) -> bool | None:
# Your approval logic here (e.g. prompt user, check allow-list)
if context.tool_name == "delete_database":
return True # Explicitly allow
return None # Neutral for other tools
register_before_tool_call_hook(safety_policy)
"""
def _make_mock_tools(self, unsafe: bool = False):
"""Helper to create a mock tool setup for execute_tool_and_check_finality.
Note: AgentAction.tool_input must be a JSON string (not a dict) because
ToolUsage._validate_tool_input parses it as JSON text.
"""
from pydantic import BaseModel
from crewai.agents.parser import AgentAction
from crewai.utilities.i18n import I18N
class Schema(BaseModel):
x: str = "default"
structured = CrewStructuredTool(
name="test_tool",
description="A test tool",
args_schema=Schema,
func=lambda x="default": f"executed with {x}",
unsafe=unsafe,
)
agent_action = AgentAction(
tool="test_tool",
tool_input='{"x": "value"}',
text='Action: test_tool\nAction Input: {"x": "value"}',
thought="Let me use the test tool",
)
i18n = I18N()
return structured, agent_action, i18n
def test_unsafe_tool_blocked_no_hooks(self):
"""Unsafe tool is denied when no before_tool_call hooks are registered."""
structured, agent_action, i18n = self._make_mock_tools(unsafe=True)
result = execute_tool_and_check_finality(
agent_action=agent_action,
tools=[structured],
i18n=i18n,
)
assert isinstance(result, ToolResult)
assert "Unsafe tool execution denied (fail-closed)" in result.result
assert "test_tool" in result.result
assert result.result_as_answer is False
def test_unsafe_tool_blocked_hook_returns_none(self):
"""Unsafe tool is denied when hook returns None (neutral, not explicit approval)."""
structured, agent_action, i18n = self._make_mock_tools(unsafe=True)
def neutral_hook(context: ToolCallHookContext):
return None # Neutral - does NOT count as approval
register_before_tool_call_hook(neutral_hook)
result = execute_tool_and_check_finality(
agent_action=agent_action,
tools=[structured],
i18n=i18n,
)
assert "Unsafe tool execution denied (fail-closed)" in result.result
def test_unsafe_tool_allowed_hook_returns_true(self):
"""Unsafe tool executes when a hook explicitly returns True."""
structured, agent_action, i18n = self._make_mock_tools(unsafe=True)
def allow_hook(context: ToolCallHookContext):
return True # Explicit approval
register_before_tool_call_hook(allow_hook)
result = execute_tool_and_check_finality(
agent_action=agent_action,
tools=[structured],
i18n=i18n,
)
# Tool should have executed successfully
assert "Unsafe tool execution denied" not in result.result
assert "executed with" in result.result
def test_safe_tool_unaffected_no_hooks(self):
"""Safe tools (default unsafe=False) execute normally without hooks."""
structured, agent_action, i18n = self._make_mock_tools(unsafe=False)
result = execute_tool_and_check_finality(
agent_action=agent_action,
tools=[structured],
i18n=i18n,
)
assert "executed with" in result.result
assert "Unsafe tool execution denied" not in result.result
def test_safe_tool_still_blocked_by_hook_returning_false(self):
"""Safe tools are still blocked if a hook explicitly returns False (existing behavior)."""
structured, agent_action, i18n = self._make_mock_tools(unsafe=False)
def block_hook(context: ToolCallHookContext):
return False # Block execution
register_before_tool_call_hook(block_hook)
result = execute_tool_and_check_finality(
agent_action=agent_action,
tools=[structured],
i18n=i18n,
)
assert "Tool execution blocked by hook" in result.result
def test_unsafe_tool_blocked_by_false_even_with_prior_true(self):
"""If a hook returns False, the tool is blocked even if a prior hook returned True."""
structured, agent_action, i18n = self._make_mock_tools(unsafe=True)
def allow_hook(context: ToolCallHookContext):
return True
def block_hook(context: ToolCallHookContext):
return False
register_before_tool_call_hook(allow_hook)
register_before_tool_call_hook(block_hook)
result = execute_tool_and_check_finality(
agent_action=agent_action,
tools=[structured],
i18n=i18n,
)
# False takes priority - but since True was already set, the hook_blocked
# message applies. The key point is the tool does NOT execute.
assert "blocked" in result.result.lower() or "denied" in result.result.lower()
assert "executed with" not in result.result
def test_error_message_is_deterministic(self):
"""Error message for unsafe tool denial is deterministic and contains actionable info."""
structured, agent_action, i18n = self._make_mock_tools(unsafe=True)
result = execute_tool_and_check_finality(
agent_action=agent_action,
tools=[structured],
i18n=i18n,
)
expected_prefix = "Unsafe tool execution denied (fail-closed):"
assert result.result.startswith(expected_prefix)
assert "before_tool_call hook" in result.result
assert "returning True" in result.result
def test_multiple_hooks_first_true_allows_unsafe(self):
"""Multiple hooks: if any returns True before a False, tool is allowed."""
structured, agent_action, i18n = self._make_mock_tools(unsafe=True)
def allow_hook(context: ToolCallHookContext):
return True
def neutral_hook(context: ToolCallHookContext):
return None
register_before_tool_call_hook(allow_hook)
register_before_tool_call_hook(neutral_hook)
result = execute_tool_and_check_finality(
agent_action=agent_action,
tools=[structured],
i18n=i18n,
)
assert "executed with" in result.result