diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py index 56abaae02..d460c9e74 100644 --- a/lib/crewai/src/crewai/agents/crew_agent_executor.py +++ b/lib/crewai/src/crewai/agents/crew_agent_executor.py @@ -954,7 +954,15 @@ class CrewAgentExecutor(CrewAgentExecutorMixin): structured_tool = structured break + # Determine if the tool is marked as unsafe + is_unsafe = False + if structured_tool and getattr(structured_tool, "unsafe", False): + is_unsafe = True + elif original_tool and getattr(original_tool, "unsafe", False): + is_unsafe = True + hook_blocked = False + hook_explicitly_allowed = False before_hook_context = ToolCallHookContext( tool_name=func_name, tool_input=args_dict, @@ -970,6 +978,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin): if hook_result is False: hook_blocked = True break + if hook_result is True: + hook_explicitly_allowed = True except Exception as hook_error: if self.agent.verbose: self._printer.print( @@ -977,7 +987,18 @@ class CrewAgentExecutor(CrewAgentExecutorMixin): color="red", ) - if hook_blocked: + # Enforce fail-closed for unsafe tools: require explicit True from a hook + if is_unsafe and not hook_explicitly_allowed: + hook_blocked = True + + if hook_blocked and is_unsafe and not hook_explicitly_allowed: + result = ( + f"Unsafe tool execution denied (fail-closed): Tool '{func_name}' is marked as " + f"unsafe and requires an explicit safety policy (a before_tool_call hook " + f"returning True) to execute. Register a before_tool_call hook that returns " + f"True to allow this tool." + ) + elif hook_blocked: result = f"Tool execution blocked by hook. Tool: {func_name}" elif max_usage_reached and original_tool: result = f"Tool '{func_name}' has reached its usage limit of {original_tool.max_usage_count} times and cannot be used anymore." diff --git a/lib/crewai/src/crewai/experimental/agent_executor.py b/lib/crewai/src/crewai/experimental/agent_executor.py index e568dc0d4..a0f681647 100644 --- a/lib/crewai/src/crewai/experimental/agent_executor.py +++ b/lib/crewai/src/crewai/experimental/agent_executor.py @@ -909,7 +909,15 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin): structured_tool = structured break + # Determine if the tool is marked as unsafe + is_unsafe = False + if structured_tool and getattr(structured_tool, "unsafe", False): + is_unsafe = True + elif original_tool and getattr(original_tool, "unsafe", False): + is_unsafe = True + hook_blocked = False + hook_explicitly_allowed = False before_hook_context = ToolCallHookContext( tool_name=func_name, tool_input=args_dict, @@ -925,6 +933,8 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin): if hook_result is False: hook_blocked = True break + if hook_result is True: + hook_explicitly_allowed = True except Exception as hook_error: if self.agent.verbose: self._printer.print( @@ -932,7 +942,18 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin): color="red", ) - if hook_blocked: + # Enforce fail-closed for unsafe tools: require explicit True from a hook + if is_unsafe and not hook_explicitly_allowed: + hook_blocked = True + + if hook_blocked and is_unsafe and not hook_explicitly_allowed: + result = ( + f"Unsafe tool execution denied (fail-closed): Tool '{func_name}' is marked as " + f"unsafe and requires an explicit safety policy (a before_tool_call hook " + f"returning True) to execute. Register a before_tool_call hook that returns " + f"True to allow this tool." + ) + elif hook_blocked: result = f"Tool execution blocked by hook. Tool: {func_name}" elif not from_cache and not max_usage_reached: result = "Tool not found" diff --git a/lib/crewai/src/crewai/tools/base_tool.py b/lib/crewai/src/crewai/tools/base_tool.py index 88c0826a9..ff90cef0f 100644 --- a/lib/crewai/src/crewai/tools/base_tool.py +++ b/lib/crewai/src/crewai/tools/base_tool.py @@ -18,7 +18,6 @@ from pydantic import ( BaseModel as PydanticBaseModel, ConfigDict, Field, - ValidationError, create_model, field_validator, ) @@ -87,6 +86,15 @@ class BaseTool(BaseModel, ABC): default=False, description="Flag to check if the tool should be the final agent answer.", ) + unsafe: bool = Field( + default=False, + description=( + "Mark this tool as unsafe/high-impact. Unsafe tools are denied by default " + "(fail-closed) unless a before_tool_call hook explicitly returns True to " + "allow execution. This ensures that high-impact tools cannot run without " + "an explicit safety policy." + ), + ) max_usage_count: int | None = Field( default=None, description="Maximum number of times this tool can be used. None means unlimited usage.", @@ -250,6 +258,7 @@ class BaseTool(BaseModel, ABC): args_schema=self.args_schema, func=self._run, result_as_answer=self.result_as_answer, + unsafe=self.unsafe, max_usage_count=self.max_usage_count, current_usage_count=self.current_usage_count, ) @@ -489,6 +498,7 @@ def tool( /, *, result_as_answer: bool = ..., + unsafe: bool = ..., max_usage_count: int | None = ..., ) -> Callable[[Callable[P2, R2]], Tool[P2, R2]]: ... @@ -497,6 +507,7 @@ def tool( def tool( *, result_as_answer: bool = ..., + unsafe: bool = ..., max_usage_count: int | None = ..., ) -> Callable[[Callable[P2, R2]], Tool[P2, R2]]: ... @@ -504,6 +515,7 @@ def tool( def tool( *args: Callable[P2, R2] | str, result_as_answer: bool = False, + unsafe: bool = False, max_usage_count: int | None = None, ) -> Tool[P2, R2] | Callable[[Callable[P2, R2]], Tool[P2, R2]]: """Decorator to create a Tool from a function. @@ -516,6 +528,8 @@ def tool( Args: *args: Either the function to decorate or a custom tool name. result_as_answer: If True, the tool result becomes the final agent answer. + unsafe: If True, marks this tool as unsafe/high-impact. Unsafe tools are + denied by default unless a before_tool_call hook explicitly returns True. max_usage_count: Maximum times this tool can be used. None means unlimited. Returns: @@ -564,6 +578,7 @@ def tool( func=f, args_schema=args_schema, result_as_answer=result_as_answer, + unsafe=unsafe, max_usage_count=max_usage_count, current_usage_count=0, ) diff --git a/lib/crewai/src/crewai/tools/structured_tool.py b/lib/crewai/src/crewai/tools/structured_tool.py index 44f0af2d9..58dbd4fb5 100644 --- a/lib/crewai/src/crewai/tools/structured_tool.py +++ b/lib/crewai/src/crewai/tools/structured_tool.py @@ -35,6 +35,7 @@ class CrewStructuredTool: args_schema: type[BaseModel], func: Callable[..., Any], result_as_answer: bool = False, + unsafe: bool = False, max_usage_count: int | None = None, current_usage_count: int = 0, ) -> None: @@ -46,6 +47,8 @@ class CrewStructuredTool: args_schema: The pydantic model for the tool's arguments func: The function to run when the tool is called result_as_answer: Whether to return the output directly + unsafe: Whether this tool is unsafe/high-impact and requires explicit + hook approval to execute (fail-closed behavior). max_usage_count: Maximum number of times this tool can be used. None means unlimited usage. current_usage_count: Current number of times this tool has been used. """ @@ -55,6 +58,7 @@ class CrewStructuredTool: self.func = func self._logger = Logger() self.result_as_answer = result_as_answer + self.unsafe = unsafe self.max_usage_count = max_usage_count self.current_usage_count = current_usage_count self._original_tool: BaseTool | None = None diff --git a/lib/crewai/src/crewai/utilities/tool_utils.py b/lib/crewai/src/crewai/utilities/tool_utils.py index 027f136ed..f5ad4a770 100644 --- a/lib/crewai/src/crewai/utilities/tool_utils.py +++ b/lib/crewai/src/crewai/utilities/tool_utils.py @@ -104,19 +104,42 @@ async def aexecute_tool_and_check_finality( crew=crew, ) + # Determine if the tool is marked as unsafe + is_unsafe = getattr(tool, "unsafe", False) + + hook_blocked = False + hook_explicitly_allowed = False before_hooks = get_before_tool_call_hooks() try: for hook in before_hooks: result = hook(hook_context) if result is False: - blocked_message = ( - f"Tool execution blocked by hook. " - f"Tool: {tool_calling.tool_name}" - ) - return ToolResult(blocked_message, False) + hook_blocked = True + break + if result is True: + hook_explicitly_allowed = True except Exception as e: logger.log("error", f"Error in before_tool_call hook: {e}") + # Enforce fail-closed for unsafe tools: require explicit True from a hook + if is_unsafe and not hook_explicitly_allowed: + hook_blocked = True + + if hook_blocked and is_unsafe and not hook_explicitly_allowed: + blocked_message = ( + f"Unsafe tool execution denied (fail-closed): Tool '{tool_calling.tool_name}' is marked as " + f"unsafe and requires an explicit safety policy (a before_tool_call hook " + f"returning True) to execute. Register a before_tool_call hook that returns " + f"True to allow this tool." + ) + return ToolResult(blocked_message, False) + if hook_blocked: + blocked_message = ( + f"Tool execution blocked by hook. " + f"Tool: {tool_calling.tool_name}" + ) + return ToolResult(blocked_message, False) + tool_result = await tool_usage.ause(tool_calling, agent_action.text) after_hook_context = ToolCallHookContext( @@ -224,19 +247,42 @@ def execute_tool_and_check_finality( crew=crew, ) + # Determine if the tool is marked as unsafe + is_unsafe = getattr(tool, "unsafe", False) + + hook_blocked = False + hook_explicitly_allowed = False before_hooks = get_before_tool_call_hooks() try: for hook in before_hooks: result = hook(hook_context) if result is False: - blocked_message = ( - f"Tool execution blocked by hook. " - f"Tool: {tool_calling.tool_name}" - ) - return ToolResult(blocked_message, False) + hook_blocked = True + break + if result is True: + hook_explicitly_allowed = True except Exception as e: logger.log("error", f"Error in before_tool_call hook: {e}") + # Enforce fail-closed for unsafe tools: require explicit True from a hook + if is_unsafe and not hook_explicitly_allowed: + hook_blocked = True + + if hook_blocked and is_unsafe and not hook_explicitly_allowed: + blocked_message = ( + f"Unsafe tool execution denied (fail-closed): Tool '{tool_calling.tool_name}' is marked as " + f"unsafe and requires an explicit safety policy (a before_tool_call hook " + f"returning True) to execute. Register a before_tool_call hook that returns " + f"True to allow this tool." + ) + return ToolResult(blocked_message, False) + if hook_blocked: + blocked_message = ( + f"Tool execution blocked by hook. " + f"Tool: {tool_calling.tool_name}" + ) + return ToolResult(blocked_message, False) + tool_result = tool_usage.use(tool_calling, agent_action.text) after_hook_context = ToolCallHookContext( diff --git a/lib/crewai/tests/hooks/test_unsafe_tool_fail_closed.py b/lib/crewai/tests/hooks/test_unsafe_tool_fail_closed.py new file mode 100644 index 000000000..fbc2209f2 --- /dev/null +++ b/lib/crewai/tests/hooks/test_unsafe_tool_fail_closed.py @@ -0,0 +1,353 @@ +"""Tests for fail-closed behavior of unsafe tools. + +Unsafe tools (marked with unsafe=True) must be denied by default unless +a before_tool_call hook explicitly returns True. This enforces a fail-closed +security model where high-impact tools cannot run without an explicit safety +policy. + +See: https://github.com/crewAIInc/crewAI/issues/4593 +""" + +from __future__ import annotations + +from unittest.mock import Mock + +import pytest + +from crewai.hooks import tool_hooks +from crewai.hooks.tool_hooks import ( + ToolCallHookContext, + get_before_tool_call_hooks, + register_before_tool_call_hook, + unregister_before_tool_call_hook, +) +from crewai.tools import BaseTool, tool +from crewai.tools.structured_tool import CrewStructuredTool +from crewai.tools.tool_types import ToolResult +from crewai.utilities.tool_utils import execute_tool_and_check_finality + + +@pytest.fixture(autouse=True) +def clear_hooks(): + """Clear global hooks before and after each test.""" + original_before = tool_hooks._before_tool_call_hooks.copy() + original_after = tool_hooks._after_tool_call_hooks.copy() + + tool_hooks._before_tool_call_hooks.clear() + tool_hooks._after_tool_call_hooks.clear() + + yield + + tool_hooks._before_tool_call_hooks.clear() + tool_hooks._after_tool_call_hooks.clear() + tool_hooks._before_tool_call_hooks.extend(original_before) + tool_hooks._after_tool_call_hooks.extend(original_after) + + +class TestUnsafeFlagOnBaseTool: + """Test that the unsafe flag can be set on BaseTool subclasses.""" + + def test_default_unsafe_is_false(self): + """Tools are safe by default (unsafe=False).""" + from pydantic import BaseModel + + class MyTool(BaseTool): + name: str = "my_tool" + description: str = "A test tool" + + def _run(self, **kwargs) -> str: + return "result" + + t = MyTool() + assert t.unsafe is False + + def test_unsafe_can_be_set_true(self): + """Tools can be marked as unsafe.""" + from pydantic import BaseModel + + class UnsafeTool(BaseTool): + name: str = "unsafe_tool" + description: str = "A dangerous tool" + unsafe: bool = True + + def _run(self, **kwargs) -> str: + return "dangerous result" + + t = UnsafeTool() + assert t.unsafe is True + + +class TestUnsafeFlagOnToolDecorator: + """Test that the @tool decorator supports unsafe=True.""" + + def test_tool_decorator_default_safe(self): + """@tool decorator creates safe tools by default.""" + + @tool("safe_tool") + def safe_tool(x: str) -> str: + """A safe tool.""" + return x + + assert safe_tool.unsafe is False + + def test_tool_decorator_unsafe_true(self): + """@tool decorator supports unsafe=True.""" + + @tool("unsafe_tool", unsafe=True) + def unsafe_tool(x: str) -> str: + """An unsafe tool.""" + return x + + assert unsafe_tool.unsafe is True + + def test_tool_decorator_unsafe_propagates_to_structured_tool(self): + """unsafe flag propagates when converting to CrewStructuredTool.""" + + @tool("unsafe_tool", unsafe=True) + def unsafe_tool(x: str) -> str: + """An unsafe tool.""" + return x + + structured = unsafe_tool.to_structured_tool() + assert structured.unsafe is True + + +class TestUnsafeFlagOnCrewStructuredTool: + """Test that CrewStructuredTool stores the unsafe flag.""" + + def test_default_unsafe_is_false(self): + """CrewStructuredTool defaults to unsafe=False.""" + from pydantic import BaseModel + + class Schema(BaseModel): + x: str + + t = CrewStructuredTool( + name="test", + description="test", + args_schema=Schema, + func=lambda x: x, + ) + assert t.unsafe is False + + def test_unsafe_can_be_set_true(self): + """CrewStructuredTool accepts unsafe=True.""" + from pydantic import BaseModel + + class Schema(BaseModel): + x: str + + t = CrewStructuredTool( + name="test", + description="test", + args_schema=Schema, + func=lambda x: x, + unsafe=True, + ) + assert t.unsafe is True + + +class TestFailClosedBehavior: + """Test fail-closed enforcement for unsafe tools. + + These tests verify that: + - Unsafe tools are blocked when no hooks are registered + - Unsafe tools are blocked when hooks return None (neutral) + - Unsafe tools execute when a hook explicitly returns True + - Safe tools (default) are unaffected by the fail-closed logic + - Hook returning False still blocks safe tools (existing behavior preserved) + + Example usage for users: + + from crewai.tools import tool + from crewai.hooks.tool_hooks import register_before_tool_call_hook, ToolCallHookContext + + # 1. Mark a tool as unsafe + @tool("delete_database", unsafe=True) + def delete_database(db_name: str) -> str: + \"\"\"Delete an entire database.\"\"\" + return f"Deleted {db_name}" + + # 2. Register a safety policy hook that explicitly approves execution + def safety_policy(context: ToolCallHookContext) -> bool | None: + # Your approval logic here (e.g. prompt user, check allow-list) + if context.tool_name == "delete_database": + return True # Explicitly allow + return None # Neutral for other tools + + register_before_tool_call_hook(safety_policy) + """ + + def _make_mock_tools(self, unsafe: bool = False): + """Helper to create a mock tool setup for execute_tool_and_check_finality. + + Note: AgentAction.tool_input must be a JSON string (not a dict) because + ToolUsage._validate_tool_input parses it as JSON text. + """ + from pydantic import BaseModel + + from crewai.agents.parser import AgentAction + from crewai.utilities.i18n import I18N + + class Schema(BaseModel): + x: str = "default" + + structured = CrewStructuredTool( + name="test_tool", + description="A test tool", + args_schema=Schema, + func=lambda x="default": f"executed with {x}", + unsafe=unsafe, + ) + + agent_action = AgentAction( + tool="test_tool", + tool_input='{"x": "value"}', + text='Action: test_tool\nAction Input: {"x": "value"}', + thought="Let me use the test tool", + ) + + i18n = I18N() + + return structured, agent_action, i18n + + def test_unsafe_tool_blocked_no_hooks(self): + """Unsafe tool is denied when no before_tool_call hooks are registered.""" + structured, agent_action, i18n = self._make_mock_tools(unsafe=True) + + result = execute_tool_and_check_finality( + agent_action=agent_action, + tools=[structured], + i18n=i18n, + ) + + assert isinstance(result, ToolResult) + assert "Unsafe tool execution denied (fail-closed)" in result.result + assert "test_tool" in result.result + assert result.result_as_answer is False + + def test_unsafe_tool_blocked_hook_returns_none(self): + """Unsafe tool is denied when hook returns None (neutral, not explicit approval).""" + structured, agent_action, i18n = self._make_mock_tools(unsafe=True) + + def neutral_hook(context: ToolCallHookContext): + return None # Neutral - does NOT count as approval + + register_before_tool_call_hook(neutral_hook) + + result = execute_tool_and_check_finality( + agent_action=agent_action, + tools=[structured], + i18n=i18n, + ) + + assert "Unsafe tool execution denied (fail-closed)" in result.result + + def test_unsafe_tool_allowed_hook_returns_true(self): + """Unsafe tool executes when a hook explicitly returns True.""" + structured, agent_action, i18n = self._make_mock_tools(unsafe=True) + + def allow_hook(context: ToolCallHookContext): + return True # Explicit approval + + register_before_tool_call_hook(allow_hook) + + result = execute_tool_and_check_finality( + agent_action=agent_action, + tools=[structured], + i18n=i18n, + ) + + # Tool should have executed successfully + assert "Unsafe tool execution denied" not in result.result + assert "executed with" in result.result + + def test_safe_tool_unaffected_no_hooks(self): + """Safe tools (default unsafe=False) execute normally without hooks.""" + structured, agent_action, i18n = self._make_mock_tools(unsafe=False) + + result = execute_tool_and_check_finality( + agent_action=agent_action, + tools=[structured], + i18n=i18n, + ) + + assert "executed with" in result.result + assert "Unsafe tool execution denied" not in result.result + + def test_safe_tool_still_blocked_by_hook_returning_false(self): + """Safe tools are still blocked if a hook explicitly returns False (existing behavior).""" + structured, agent_action, i18n = self._make_mock_tools(unsafe=False) + + def block_hook(context: ToolCallHookContext): + return False # Block execution + + register_before_tool_call_hook(block_hook) + + result = execute_tool_and_check_finality( + agent_action=agent_action, + tools=[structured], + i18n=i18n, + ) + + assert "Tool execution blocked by hook" in result.result + + def test_unsafe_tool_blocked_by_false_even_with_prior_true(self): + """If a hook returns False, the tool is blocked even if a prior hook returned True.""" + structured, agent_action, i18n = self._make_mock_tools(unsafe=True) + + def allow_hook(context: ToolCallHookContext): + return True + + def block_hook(context: ToolCallHookContext): + return False + + register_before_tool_call_hook(allow_hook) + register_before_tool_call_hook(block_hook) + + result = execute_tool_and_check_finality( + agent_action=agent_action, + tools=[structured], + i18n=i18n, + ) + + # False takes priority - but since True was already set, the hook_blocked + # message applies. The key point is the tool does NOT execute. + assert "blocked" in result.result.lower() or "denied" in result.result.lower() + assert "executed with" not in result.result + + def test_error_message_is_deterministic(self): + """Error message for unsafe tool denial is deterministic and contains actionable info.""" + structured, agent_action, i18n = self._make_mock_tools(unsafe=True) + + result = execute_tool_and_check_finality( + agent_action=agent_action, + tools=[structured], + i18n=i18n, + ) + + expected_prefix = "Unsafe tool execution denied (fail-closed):" + assert result.result.startswith(expected_prefix) + assert "before_tool_call hook" in result.result + assert "returning True" in result.result + + def test_multiple_hooks_first_true_allows_unsafe(self): + """Multiple hooks: if any returns True before a False, tool is allowed.""" + structured, agent_action, i18n = self._make_mock_tools(unsafe=True) + + def allow_hook(context: ToolCallHookContext): + return True + + def neutral_hook(context: ToolCallHookContext): + return None + + register_before_tool_call_hook(allow_hook) + register_before_tool_call_hook(neutral_hook) + + result = execute_tool_and_check_finality( + agent_action=agent_action, + tools=[structured], + i18n=i18n, + ) + + assert "executed with" in result.result