Files
crewAI/src/crewai/experimental/evaluation/__init__.py
Lucas Gomide 1b6b2b36d9 Introduce Evaluator Experiment (#3133)
* feat: add exchanged messages in LLMCallCompletedEvent

* feat: add GoalAlignment metric for Agent evaluation

* feat: add SemanticQuality metric for Agent evaluation

* feat: add Tool Metrics for Agent evaluation

* feat: add Reasoning Metrics for Agent evaluation, still in progress

* feat: add AgentEvaluator class

This class will evaluate Agent' results and report to user

* fix: do not evaluate Agent by default

This is a experimental feature we still need refine it further

* test: add Agent eval tests

* fix: render all feedback per iteration

* style: resolve linter issues

* style: fix mypy issues

* fix: allow messages be empty on LLMCallCompletedEvent

* feat: add Experiment evaluation framework with baseline comparison

* fix: reset evaluator for each experiement iteraction

* fix: fix track of new test cases

* chore: split Experimental evaluation classes

* refactor: remove unused method

* refactor: isolate Console print in a dedicated class

* fix: make crew required to run an experiment

* fix: use time-aware to define experiment result

* test: add tests for Evaluator Experiment

* style: fix linter issues

* fix: encode string before hashing

* style: resolve linter issues

* feat: add experimental folder for beta features (#3141)

* test: move tests to experimental folder
2025-07-14 09:06:45 -04:00

52 lines
1.2 KiB
Python

from crewai.experimental.evaluation.base_evaluator import (
BaseEvaluator,
EvaluationScore,
MetricCategory,
AgentEvaluationResult
)
from crewai.experimental.evaluation.metrics import (
SemanticQualityEvaluator,
GoalAlignmentEvaluator,
ReasoningEfficiencyEvaluator,
ToolSelectionEvaluator,
ParameterExtractionEvaluator,
ToolInvocationEvaluator
)
from crewai.experimental.evaluation.evaluation_listener import (
EvaluationTraceCallback,
create_evaluation_callbacks
)
from crewai.experimental.evaluation.agent_evaluator import (
AgentEvaluator,
create_default_evaluator
)
from crewai.experimental.evaluation.experiment import (
ExperimentRunner,
ExperimentResults,
ExperimentResult
)
__all__ = [
"BaseEvaluator",
"EvaluationScore",
"MetricCategory",
"AgentEvaluationResult",
"SemanticQualityEvaluator",
"GoalAlignmentEvaluator",
"ReasoningEfficiencyEvaluator",
"ToolSelectionEvaluator",
"ParameterExtractionEvaluator",
"ToolInvocationEvaluator",
"EvaluationTraceCallback",
"create_evaluation_callbacks",
"AgentEvaluator",
"create_default_evaluator",
"ExperimentRunner",
"ExperimentResults",
"ExperimentResult"
]