mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-08 15:48:29 +00:00
* feat: add exchanged messages in LLMCallCompletedEvent * feat: add GoalAlignment metric for Agent evaluation * feat: add SemanticQuality metric for Agent evaluation * feat: add Tool Metrics for Agent evaluation * feat: add Reasoning Metrics for Agent evaluation, still in progress * feat: add AgentEvaluator class This class will evaluate Agent' results and report to user * fix: do not evaluate Agent by default This is a experimental feature we still need refine it further * test: add Agent eval tests * fix: render all feedback per iteration * style: resolve linter issues * style: fix mypy issues * fix: allow messages be empty on LLMCallCompletedEvent * feat: add Experiment evaluation framework with baseline comparison * fix: reset evaluator for each experiement iteraction * fix: fix track of new test cases * chore: split Experimental evaluation classes * refactor: remove unused method * refactor: isolate Console print in a dedicated class * fix: make crew required to run an experiment * fix: use time-aware to define experiment result * test: add tests for Evaluator Experiment * style: fix linter issues * fix: encode string before hashing * style: resolve linter issues * feat: add experimental folder for beta features (#3141) * test: move tests to experimental folder
52 lines
1.2 KiB
Python
52 lines
1.2 KiB
Python
from crewai.experimental.evaluation.base_evaluator import (
|
|
BaseEvaluator,
|
|
EvaluationScore,
|
|
MetricCategory,
|
|
AgentEvaluationResult
|
|
)
|
|
|
|
from crewai.experimental.evaluation.metrics import (
|
|
SemanticQualityEvaluator,
|
|
GoalAlignmentEvaluator,
|
|
ReasoningEfficiencyEvaluator,
|
|
ToolSelectionEvaluator,
|
|
ParameterExtractionEvaluator,
|
|
ToolInvocationEvaluator
|
|
)
|
|
|
|
from crewai.experimental.evaluation.evaluation_listener import (
|
|
EvaluationTraceCallback,
|
|
create_evaluation_callbacks
|
|
)
|
|
|
|
from crewai.experimental.evaluation.agent_evaluator import (
|
|
AgentEvaluator,
|
|
create_default_evaluator
|
|
)
|
|
|
|
from crewai.experimental.evaluation.experiment import (
|
|
ExperimentRunner,
|
|
ExperimentResults,
|
|
ExperimentResult
|
|
)
|
|
|
|
__all__ = [
|
|
"BaseEvaluator",
|
|
"EvaluationScore",
|
|
"MetricCategory",
|
|
"AgentEvaluationResult",
|
|
"SemanticQualityEvaluator",
|
|
"GoalAlignmentEvaluator",
|
|
"ReasoningEfficiencyEvaluator",
|
|
"ToolSelectionEvaluator",
|
|
"ParameterExtractionEvaluator",
|
|
"ToolInvocationEvaluator",
|
|
"EvaluationTraceCallback",
|
|
"create_evaluation_callbacks",
|
|
"AgentEvaluator",
|
|
"create_default_evaluator",
|
|
"ExperimentRunner",
|
|
"ExperimentResults",
|
|
"ExperimentResult"
|
|
]
|