mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-08 15:48:29 +00:00
test: add test helper to assert Agent Experiments (#3156)
This commit is contained in:
49
src/crewai/evaluation/experiment/testing.py
Normal file
49
src/crewai/evaluation/experiment/testing.py
Normal file
@@ -0,0 +1,49 @@
|
||||
import warnings
|
||||
from crewai.experimental.evaluation import ExperimentResults
|
||||
|
||||
def assert_experiment_successfully(experiment_results: ExperimentResults) -> None:
|
||||
"""
|
||||
Assert that all experiment results passed successfully.
|
||||
|
||||
Args:
|
||||
experiment_results: The experiment results to check
|
||||
|
||||
Raises:
|
||||
AssertionError: If any test case failed
|
||||
"""
|
||||
failed_tests = [result for result in experiment_results.results if not result.passed]
|
||||
|
||||
if failed_tests:
|
||||
detailed_failures: list[str] = []
|
||||
|
||||
for result in failed_tests:
|
||||
expected = result.expected_score
|
||||
actual = result.score
|
||||
detailed_failures.append(f"- {result.identifier}: expected {expected}, got {actual}")
|
||||
|
||||
failure_details = "\n".join(detailed_failures)
|
||||
raise AssertionError(f"The following test cases failed:\n{failure_details}")
|
||||
|
||||
def assert_experiment_no_regression(comparison_result: dict[str, list[str]]) -> None:
|
||||
"""
|
||||
Assert that there are no regressions in the experiment results compared to baseline.
|
||||
Also warns if there are missing tests.
|
||||
|
||||
Args:
|
||||
comparison_result: The result from compare_with_baseline()
|
||||
|
||||
Raises:
|
||||
AssertionError: If there are regressions
|
||||
"""
|
||||
# Check for regressions
|
||||
regressed = comparison_result.get("regressed", [])
|
||||
if regressed:
|
||||
raise AssertionError(f"Regression detected! The following tests that previously passed now fail: {regressed}")
|
||||
|
||||
# Check for missing tests and warn
|
||||
missing_tests = comparison_result.get("missing_tests", [])
|
||||
if missing_tests:
|
||||
warnings.warn(
|
||||
f"Warning: {len(missing_tests)} tests from the baseline are missing in the current run: {missing_tests}",
|
||||
UserWarning
|
||||
)
|
||||
52
src/crewai/experimental/evaluation/testing.py
Normal file
52
src/crewai/experimental/evaluation/testing.py
Normal file
@@ -0,0 +1,52 @@
|
||||
import inspect
|
||||
|
||||
from typing_extensions import Any
|
||||
import warnings
|
||||
from crewai.experimental.evaluation.experiment import ExperimentResults, ExperimentRunner
|
||||
from crewai import Crew
|
||||
|
||||
def assert_experiment_successfully(experiment_results: ExperimentResults, baseline_filepath: str | None = None) -> None:
|
||||
failed_tests = [result for result in experiment_results.results if not result.passed]
|
||||
|
||||
if failed_tests:
|
||||
detailed_failures: list[str] = []
|
||||
|
||||
for result in failed_tests:
|
||||
expected = result.expected_score
|
||||
actual = result.score
|
||||
detailed_failures.append(f"- {result.identifier}: expected {expected}, got {actual}")
|
||||
|
||||
failure_details = "\n".join(detailed_failures)
|
||||
raise AssertionError(f"The following test cases failed:\n{failure_details}")
|
||||
|
||||
baseline_filepath = baseline_filepath or _get_baseline_filepath_fallback()
|
||||
comparison = experiment_results.compare_with_baseline(baseline_filepath=baseline_filepath)
|
||||
assert_experiment_no_regression(comparison)
|
||||
|
||||
def assert_experiment_no_regression(comparison_result: dict[str, list[str]]) -> None:
|
||||
regressed = comparison_result.get("regressed", [])
|
||||
if regressed:
|
||||
raise AssertionError(f"Regression detected! The following tests that previously passed now fail: {regressed}")
|
||||
|
||||
missing_tests = comparison_result.get("missing_tests", [])
|
||||
if missing_tests:
|
||||
warnings.warn(
|
||||
f"Warning: {len(missing_tests)} tests from the baseline are missing in the current run: {missing_tests}",
|
||||
UserWarning
|
||||
)
|
||||
|
||||
def run_experiment(dataset: list[dict[str, Any]], crew: Crew, verbose: bool = False) -> ExperimentResults:
|
||||
runner = ExperimentRunner(dataset=dataset)
|
||||
|
||||
return runner.run(crew=crew, print_summary=verbose)
|
||||
|
||||
def _get_baseline_filepath_fallback() -> str:
|
||||
test_func_name = "experiment_fallback"
|
||||
|
||||
try:
|
||||
current_frame = inspect.currentframe()
|
||||
if current_frame is not None:
|
||||
test_func_name = current_frame.f_back.f_back.f_code.co_name # type: ignore[union-attr]
|
||||
except Exception:
|
||||
...
|
||||
return f"{test_func_name}_results.json"
|
||||
Reference in New Issue
Block a user