Introducing Agent evaluation (#3130)

* feat: add exchanged messages in LLMCallCompletedEvent * feat: add GoalAlignment metric for Agent evaluation * feat: add SemanticQuality metric for Agent evaluation * feat: add Tool Metrics for Agent evaluation * feat: add Reasoning Metrics for Agent evaluation, still in progress * feat: add AgentEvaluator class This class will evaluate Agent' results and report to user * fix: do not evaluate Agent by default This is a experimental feature we still need refine it further * test: add Agent eval tests * fix: render all feedback per iteration * style: resolve linter issues * style: fix mypy issues * fix: allow messages be empty on LLMCallCompletedEvent
2026-05-03 08:12:39 +00:00 · 2025-07-11 14:18:03 -03:00
parent bf8fa3232b
commit 08fa3797ca
26 changed files with 2930 additions and 14 deletions
--- a/src/crewai/utilities/events/llm_events.py
+++ b/src/crewai/utilities/events/llm_events.py
@@ -48,8 +48,8 @@ class LLMCallStartedEvent(LLMEventBase):
    """

    type: str = "llm_call_started"
-    messages: Union[str, List[Dict[str, Any]]]
-    tools: Optional[List[dict]] = None
+    messages: Optional[Union[str, List[Dict[str, Any]]]] = None
+    tools: Optional[List[dict[str, Any]]] = None
    callbacks: Optional[List[Any]] = None
    available_functions: Optional[Dict[str, Any]] = None

@@ -58,10 +58,10 @@ class LLMCallCompletedEvent(LLMEventBase):
    """Event emitted when a LLM call completes"""

    type: str = "llm_call_completed"
+    messages: str | list[dict[str, Any]] | None = None
    response: Any
    call_type: LLMCallType

-
 class LLMCallFailedEvent(LLMEventBase):
    """Event emitted when a LLM call fails"""