mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-21 13:58:15 +00:00
feat: add experimental folder for beta features
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
from unittest.mock import patch, MagicMock
|
||||
from tests.evaluation.metrics.base_evaluation_metrics_test import BaseEvaluationMetricsTest
|
||||
|
||||
from crewai.evaluation.base_evaluator import EvaluationScore
|
||||
from crewai.evaluation.metrics.goal_metrics import GoalAlignmentEvaluator
|
||||
from crewai.experimental.evaluation.base_evaluator import EvaluationScore
|
||||
from crewai.experimental.evaluation.metrics.goal_metrics import GoalAlignmentEvaluator
|
||||
from crewai.utilities.llm_utils import LLM
|
||||
|
||||
|
||||
|
||||
@@ -3,12 +3,12 @@ from unittest.mock import patch, MagicMock
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
from crewai.evaluation.metrics.reasoning_metrics import (
|
||||
from crewai.experimental.evaluation.metrics.reasoning_metrics import (
|
||||
ReasoningEfficiencyEvaluator,
|
||||
)
|
||||
from tests.evaluation.metrics.base_evaluation_metrics_test import BaseEvaluationMetricsTest
|
||||
from crewai.utilities.llm_utils import LLM
|
||||
from crewai.evaluation.base_evaluator import EvaluationScore
|
||||
from crewai.experimental.evaluation.base_evaluator import EvaluationScore
|
||||
|
||||
class TestReasoningEfficiencyEvaluator(BaseEvaluationMetricsTest):
|
||||
@pytest.fixture
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from crewai.evaluation.base_evaluator import EvaluationScore
|
||||
from crewai.evaluation.metrics.semantic_quality_metrics import SemanticQualityEvaluator
|
||||
from crewai.experimental.evaluation.base_evaluator import EvaluationScore
|
||||
from crewai.experimental.evaluation.metrics.semantic_quality_metrics import SemanticQualityEvaluator
|
||||
from tests.evaluation.metrics.base_evaluation_metrics_test import BaseEvaluationMetricsTest
|
||||
from crewai.utilities.llm_utils import LLM
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from crewai.evaluation.metrics.tools_metrics import (
|
||||
from crewai.experimental.evaluation.metrics.tools_metrics import (
|
||||
ToolSelectionEvaluator,
|
||||
ParameterExtractionEvaluator,
|
||||
ToolInvocationEvaluator
|
||||
|
||||
@@ -3,9 +3,9 @@ import pytest
|
||||
from crewai.agent import Agent
|
||||
from crewai.task import Task
|
||||
from crewai.crew import Crew
|
||||
from crewai.evaluation.agent_evaluator import AgentEvaluator
|
||||
from crewai.evaluation.base_evaluator import AgentEvaluationResult
|
||||
from crewai.evaluation import (
|
||||
from crewai.experimental.evaluation.agent_evaluator import AgentEvaluator
|
||||
from crewai.experimental.evaluation.base_evaluator import AgentEvaluationResult
|
||||
from crewai.experimental.evaluation import (
|
||||
GoalAlignmentEvaluator,
|
||||
SemanticQualityEvaluator,
|
||||
ToolSelectionEvaluator,
|
||||
@@ -14,7 +14,7 @@ from crewai.evaluation import (
|
||||
ReasoningEfficiencyEvaluator
|
||||
)
|
||||
|
||||
from crewai.evaluation import create_default_evaluator
|
||||
from crewai.experimental.evaluation import create_default_evaluator
|
||||
class TestAgentEvaluator:
|
||||
@pytest.fixture
|
||||
def mock_crew(self):
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from crewai.evaluation.experiment.result import ExperimentResult, ExperimentResults
|
||||
from crewai.experimental.evaluation.experiment.result import ExperimentResult, ExperimentResults
|
||||
|
||||
|
||||
class TestExperimentResult:
|
||||
|
||||
@@ -2,10 +2,10 @@ import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from crewai.crew import Crew
|
||||
from crewai.evaluation.experiment.runner import ExperimentRunner
|
||||
from crewai.evaluation.experiment.result import ExperimentResults
|
||||
from crewai.evaluation.evaluation_display import AgentAggregatedEvaluationResult
|
||||
from crewai.evaluation.base_evaluator import MetricCategory, EvaluationScore
|
||||
from crewai.experimental.evaluation.experiment.runner import ExperimentRunner
|
||||
from crewai.experimental.evaluation.experiment.result import ExperimentResults
|
||||
from crewai.experimental.evaluation.evaluation_display import AgentAggregatedEvaluationResult
|
||||
from crewai.experimental.evaluation.base_evaluator import MetricCategory, EvaluationScore
|
||||
|
||||
|
||||
class TestExperimentRunner:
|
||||
@@ -44,7 +44,7 @@ class TestExperimentRunner:
|
||||
|
||||
return {"Test Agent": agent_evaluation}
|
||||
|
||||
@patch('crewai.evaluation.experiment.runner.create_default_evaluator')
|
||||
@patch('crewai.experimental.evaluation.experiment.runner.create_default_evaluator')
|
||||
def test_run_success(self, mock_create_evaluator, mock_crew, mock_evaluator_results):
|
||||
dataset = [
|
||||
{
|
||||
@@ -102,7 +102,7 @@ class TestExperimentRunner:
|
||||
assert mock_evaluator.get_agent_evaluation.call_count == 3
|
||||
|
||||
|
||||
@patch('crewai.evaluation.experiment.runner.create_default_evaluator')
|
||||
@patch('crewai.experimental.evaluation.experiment.runner.create_default_evaluator')
|
||||
def test_run_success_with_unknown_metric(self, mock_create_evaluator, mock_crew, mock_evaluator_results):
|
||||
dataset = [
|
||||
{
|
||||
@@ -130,7 +130,7 @@ class TestExperimentRunner:
|
||||
assert "unknown_metric" in result.expected_score.keys()
|
||||
assert result.passed is True
|
||||
|
||||
@patch('crewai.evaluation.experiment.runner.create_default_evaluator')
|
||||
@patch('crewai.experimental.evaluation.experiment.runner.create_default_evaluator')
|
||||
def test_run_success_with_single_metric_evaluator_and_expected_specific_metric(self, mock_create_evaluator, mock_crew, mock_evaluator_results):
|
||||
dataset = [
|
||||
{
|
||||
@@ -163,7 +163,7 @@ class TestExperimentRunner:
|
||||
assert "goal_alignment" in result.expected_score.keys()
|
||||
assert result.passed is True
|
||||
|
||||
@patch('crewai.evaluation.experiment.runner.create_default_evaluator')
|
||||
@patch('crewai.experimental.evaluation.experiment.runner.create_default_evaluator')
|
||||
def test_run_success_when_expected_metric_is_not_available(self, mock_create_evaluator, mock_crew, mock_evaluator_results):
|
||||
dataset = [
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user