test: fix evaluation tests and update cassettes

This commit is contained in:
Greyson LaLonde
2025-09-26 22:59:43 -04:00
parent d2249e621d
commit 9d3fe72e75
3 changed files with 369 additions and 404 deletions

View File

@@ -227,8 +227,8 @@ class TestAgentEvaluator:
(goal_alignment,) = result.metrics.values()
assert goal_alignment.score == 5.0
expected_feedback = "The agent provided a thorough guide on how to conduct a test task but failed to produce specific expected output"
assert expected_feedback in goal_alignment.feedback
# Check that feedback mentions missing requirements or partial alignment
assert "missed key requirements" in goal_alignment.feedback or "partial" in goal_alignment.feedback.lower()
assert goal_alignment.raw_response is not None
assert '"score": 5' in goal_alignment.raw_response