mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-29 18:18:13 +00:00
Introducing Agent evaluation (#3130)
* feat: add exchanged messages in LLMCallCompletedEvent * feat: add GoalAlignment metric for Agent evaluation * feat: add SemanticQuality metric for Agent evaluation * feat: add Tool Metrics for Agent evaluation * feat: add Reasoning Metrics for Agent evaluation, still in progress * feat: add AgentEvaluator class This class will evaluate Agent' results and report to user * fix: do not evaluate Agent by default This is a experimental feature we still need refine it further * test: add Agent eval tests * fix: render all feedback per iteration * style: resolve linter issues * style: fix mypy issues * fix: allow messages be empty on LLMCallCompletedEvent
This commit is contained in:
28
tests/evaluation/metrics/base_evaluation_metrics_test.py
Normal file
28
tests/evaluation/metrics/base_evaluation_metrics_test.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
from crewai.agent import Agent
|
||||
from crewai.task import Task
|
||||
|
||||
class BaseEvaluationMetricsTest:
|
||||
@pytest.fixture
|
||||
def mock_agent(self):
|
||||
agent = MagicMock(spec=Agent)
|
||||
agent.id = "test_agent_id"
|
||||
agent.role = "Test Agent"
|
||||
agent.goal = "Test goal"
|
||||
agent.tools = []
|
||||
return agent
|
||||
|
||||
@pytest.fixture
|
||||
def mock_task(self):
|
||||
task = MagicMock(spec=Task)
|
||||
task.description = "Test task description"
|
||||
task.expected_output = "Test expected output"
|
||||
return task
|
||||
|
||||
@pytest.fixture
|
||||
def execution_trace(self):
|
||||
return {
|
||||
"thinking": ["I need to analyze this data carefully"],
|
||||
"actions": ["Gathered information", "Analyzed data"]
|
||||
}
|
||||
Reference in New Issue
Block a user