mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-03 08:12:39 +00:00
Introducing Agent evaluation (#3130)
* feat: add exchanged messages in LLMCallCompletedEvent * feat: add GoalAlignment metric for Agent evaluation * feat: add SemanticQuality metric for Agent evaluation * feat: add Tool Metrics for Agent evaluation * feat: add Reasoning Metrics for Agent evaluation, still in progress * feat: add AgentEvaluator class This class will evaluate Agent' results and report to user * fix: do not evaluate Agent by default This is a experimental feature we still need refine it further * test: add Agent eval tests * fix: render all feedback per iteration * style: resolve linter issues * style: fix mypy issues * fix: allow messages be empty on LLMCallCompletedEvent
This commit is contained in:
@@ -48,8 +48,8 @@ class LLMCallStartedEvent(LLMEventBase):
|
||||
"""
|
||||
|
||||
type: str = "llm_call_started"
|
||||
messages: Union[str, List[Dict[str, Any]]]
|
||||
tools: Optional[List[dict]] = None
|
||||
messages: Optional[Union[str, List[Dict[str, Any]]]] = None
|
||||
tools: Optional[List[dict[str, Any]]] = None
|
||||
callbacks: Optional[List[Any]] = None
|
||||
available_functions: Optional[Dict[str, Any]] = None
|
||||
|
||||
@@ -58,10 +58,10 @@ class LLMCallCompletedEvent(LLMEventBase):
|
||||
"""Event emitted when a LLM call completes"""
|
||||
|
||||
type: str = "llm_call_completed"
|
||||
messages: str | list[dict[str, Any]] | None = None
|
||||
response: Any
|
||||
call_type: LLMCallType
|
||||
|
||||
|
||||
class LLMCallFailedEvent(LLMEventBase):
|
||||
"""Event emitted when a LLM call fails"""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user