mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-03 08:12:39 +00:00
fix: add ConfigDict for Pydantic model_config and ClassVar annotations
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.agent import Agent
|
||||
from crewai.task import Task
|
||||
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
from unittest.mock import patch, MagicMock
|
||||
from tests.experimental.evaluation.metrics.test_base_evaluation_metrics import (
|
||||
BaseEvaluationMetricsTest,
|
||||
)
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from crewai.experimental.evaluation.base_evaluator import EvaluationScore
|
||||
from crewai.experimental.evaluation.metrics.goal_metrics import GoalAlignmentEvaluator
|
||||
from crewai.utilities.llm_utils import LLM
|
||||
from tests.experimental.evaluation.metrics.test_base_evaluation_metrics import (
|
||||
BaseEvaluationMetricsTest,
|
||||
)
|
||||
|
||||
|
||||
class TestGoalAlignmentEvaluator(BaseEvaluationMetricsTest):
|
||||
|
||||
@@ -1,16 +1,17 @@
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
from typing import List, Dict, Any
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
import pytest
|
||||
|
||||
from crewai.experimental.evaluation.base_evaluator import EvaluationScore
|
||||
from crewai.experimental.evaluation.metrics.reasoning_metrics import (
|
||||
ReasoningEfficiencyEvaluator,
|
||||
)
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
from crewai.utilities.llm_utils import LLM
|
||||
from tests.experimental.evaluation.metrics.test_base_evaluation_metrics import (
|
||||
BaseEvaluationMetricsTest,
|
||||
)
|
||||
from crewai.utilities.llm_utils import LLM
|
||||
from crewai.experimental.evaluation.base_evaluator import EvaluationScore
|
||||
|
||||
|
||||
class TestReasoningEfficiencyEvaluator(BaseEvaluationMetricsTest):
|
||||
@@ -21,7 +22,7 @@ class TestReasoningEfficiencyEvaluator(BaseEvaluationMetricsTest):
|
||||
return output
|
||||
|
||||
@pytest.fixture
|
||||
def llm_calls(self) -> List[Dict[str, Any]]:
|
||||
def llm_calls(self) -> list[dict[str, Any]]:
|
||||
return [
|
||||
{
|
||||
"prompt": "How should I approach this task?",
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
from unittest.mock import patch, MagicMock
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from crewai.experimental.evaluation.base_evaluator import EvaluationScore
|
||||
from crewai.experimental.evaluation.metrics.semantic_quality_metrics import (
|
||||
SemanticQualityEvaluator,
|
||||
)
|
||||
from crewai.utilities.llm_utils import LLM
|
||||
from tests.experimental.evaluation.metrics.test_base_evaluation_metrics import (
|
||||
BaseEvaluationMetricsTest,
|
||||
)
|
||||
from crewai.utilities.llm_utils import LLM
|
||||
|
||||
|
||||
class TestSemanticQualityEvaluator(BaseEvaluationMetricsTest):
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
from unittest.mock import patch, MagicMock
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from crewai.experimental.evaluation.metrics.tools_metrics import (
|
||||
ToolSelectionEvaluator,
|
||||
ParameterExtractionEvaluator,
|
||||
ToolInvocationEvaluator,
|
||||
ToolSelectionEvaluator,
|
||||
)
|
||||
from crewai.utilities.llm_utils import LLM
|
||||
from tests.experimental.evaluation.metrics.test_base_evaluation_metrics import (
|
||||
|
||||
@@ -1,28 +1,27 @@
|
||||
import pytest
|
||||
|
||||
from crewai.agent import Agent
|
||||
from crewai.task import Task
|
||||
from crewai.crew import Crew
|
||||
from crewai.experimental.evaluation.agent_evaluator import AgentEvaluator
|
||||
from crewai.experimental.evaluation.base_evaluator import AgentEvaluationResult
|
||||
from crewai.experimental.evaluation import (
|
||||
GoalAlignmentEvaluator,
|
||||
SemanticQualityEvaluator,
|
||||
ToolSelectionEvaluator,
|
||||
ParameterExtractionEvaluator,
|
||||
ToolInvocationEvaluator,
|
||||
ReasoningEfficiencyEvaluator,
|
||||
MetricCategory,
|
||||
EvaluationScore,
|
||||
)
|
||||
|
||||
from crewai.events.event_bus import crewai_event_bus
|
||||
from crewai.events.types.agent_events import (
|
||||
AgentEvaluationStartedEvent,
|
||||
AgentEvaluationCompletedEvent,
|
||||
AgentEvaluationFailedEvent,
|
||||
AgentEvaluationStartedEvent,
|
||||
)
|
||||
from crewai.events.event_bus import crewai_event_bus
|
||||
from crewai.experimental.evaluation import create_default_evaluator
|
||||
from crewai.experimental.evaluation import (
|
||||
EvaluationScore,
|
||||
GoalAlignmentEvaluator,
|
||||
MetricCategory,
|
||||
ParameterExtractionEvaluator,
|
||||
ReasoningEfficiencyEvaluator,
|
||||
SemanticQualityEvaluator,
|
||||
ToolInvocationEvaluator,
|
||||
ToolSelectionEvaluator,
|
||||
create_default_evaluator,
|
||||
)
|
||||
from crewai.experimental.evaluation.agent_evaluator import AgentEvaluator
|
||||
from crewai.experimental.evaluation.base_evaluator import AgentEvaluationResult
|
||||
from crewai.task import Task
|
||||
|
||||
|
||||
class TestAgentEvaluator:
|
||||
@@ -100,7 +99,7 @@ class TestAgentEvaluator:
|
||||
]
|
||||
|
||||
assert len(agent_evaluator.evaluators) == len(expected_types)
|
||||
for evaluator, expected_type in zip(agent_evaluator.evaluators, expected_types):
|
||||
for evaluator, expected_type in zip(agent_evaluator.evaluators, expected_types, strict=False):
|
||||
assert isinstance(evaluator, expected_type)
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
@@ -254,8 +253,8 @@ class TestAgentEvaluator:
|
||||
events["failed"] = event
|
||||
|
||||
# Create a mock evaluator that will raise an exception
|
||||
from crewai.experimental.evaluation.base_evaluator import BaseEvaluator
|
||||
from crewai.experimental.evaluation import MetricCategory
|
||||
from crewai.experimental.evaluation.base_evaluator import BaseEvaluator
|
||||
|
||||
class FailingEvaluator(BaseEvaluator):
|
||||
metric_category = MetricCategory.GOAL_ALIGNMENT
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from crewai.experimental.evaluation.experiment.result import ExperimentResult, ExperimentResults
|
||||
import pytest
|
||||
|
||||
from crewai.experimental.evaluation.experiment.result import (
|
||||
ExperimentResult,
|
||||
ExperimentResults,
|
||||
)
|
||||
|
||||
|
||||
class TestExperimentResult:
|
||||
@@ -108,4 +112,4 @@ class TestExperimentResult:
|
||||
assert comparison["regressed"] == ["test-3"]
|
||||
assert comparison["unchanged"] == ["test-2", "test-4"]
|
||||
assert comparison["new_tests"] == ["test-6"]
|
||||
assert comparison["missing_tests"] == ["test-5"]
|
||||
assert comparison["missing_tests"] == ["test-5"]
|
||||
|
||||
@@ -1,11 +1,17 @@
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.crew import Crew
|
||||
from crewai.experimental.evaluation.experiment.runner import ExperimentRunner
|
||||
from crewai.experimental.evaluation.base_evaluator import (
|
||||
EvaluationScore,
|
||||
MetricCategory,
|
||||
)
|
||||
from crewai.experimental.evaluation.evaluation_display import (
|
||||
AgentAggregatedEvaluationResult,
|
||||
)
|
||||
from crewai.experimental.evaluation.experiment.result import ExperimentResults
|
||||
from crewai.experimental.evaluation.evaluation_display import AgentAggregatedEvaluationResult
|
||||
from crewai.experimental.evaluation.base_evaluator import MetricCategory, EvaluationScore
|
||||
from crewai.experimental.evaluation.experiment.runner import ExperimentRunner
|
||||
|
||||
|
||||
class TestExperimentRunner:
|
||||
@@ -194,4 +200,4 @@ class TestExperimentRunner:
|
||||
assert result.inputs == {"query": "Test query 2"}
|
||||
assert isinstance(result.expected_score, dict)
|
||||
assert "unknown_metric" in result.expected_score.keys()
|
||||
assert result.passed is False
|
||||
assert result.passed is False
|
||||
|
||||
Reference in New Issue
Block a user