feat: handle malformed or invalid response from CodeInterpreterTool

This commit is contained in:
Lucas Gomide
2025-04-22 12:24:40 -03:00
parent 50453c6984
commit 09543cd705
2 changed files with 32 additions and 3 deletions

View File

@@ -61,7 +61,7 @@ class GuardrailTask:
"You are a expert Python developer"
"You **must strictly** follow the task description, use the provided raw output as the input in your code. "
"Your code must:\n"
"- Return results with: print((True, data)) on success, or print((False, 'very detailed error message')) on failure. Make sure the final output is beign assined to 'result' variable.\n"
"- Return results with: print((True, data)) on success, or print((False, 'very detailed error message')) on failure. Make sure the final output is being assined to 'result' variable.\n"
"- Use the literal string of the task output (already included in your input) if needed.\n"
"- Generate the code **following strictly** the task description.\n"
"- Be valid Python 3 — executable as-is.\n"
@@ -151,7 +151,10 @@ class GuardrailTask:
return False, result
if isinstance(result, str):
result = ast.literal_eval(result)
try:
result = ast.literal_eval(result)
except Exception as e:
return False, f"Error parsing result: {str(e)}"
return result

View File

@@ -227,6 +227,11 @@ print(result)
"expected_result": False,
"expected_output": "Invalid output format",
},
{
"output": "bla-bla-bla",
"expected_result": False,
"expected_output": "Error parsing result: malformed node or string on line 1",
},
],
)
@patch("crewai_tools.CodeInterpreterTool.run")
@@ -237,7 +242,7 @@ def test_guardrail_task_execute_code(mock_run, mock_llm, tool_run_output, task_o
result = guardrail(task_output)
assert result[0] == tool_run_output["expected_result"]
assert result[1] == tool_run_output["expected_output"]
assert tool_run_output["expected_output"] in result[1]
@patch("crewai_tools.CodeInterpreterTool.run")
@@ -361,3 +366,24 @@ def test_guardrail_task_when_docker_is_available(mock_llm, task_output):
guardrail(task_output)
mock_init.assert_called_once_with(code=ANY, unsafe_mode=False)
def test_guardrail_task_when_tool_output_is_not_valid(mock_llm, task_output):
guardrail = GuardrailTask(description="Test validation", llm=mock_llm)
with (
patch(
"crewai_tools.CodeInterpreterTool.__init__", return_value=None
) as mock_init,
patch(
"crewai_tools.CodeInterpreterTool.run", return_value=(True, "Valid output")
) as mock_run,
patch(
"subprocess.run",
return_value=True,
),
):
mock_init.return_value = None
mock_run.return_value = (True, "Valid output")
guardrail(task_output)
mock_init.assert_called_once_with(code=ANY, unsafe_mode=False)