mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 08:08:32 +00:00
Introducing Agent evaluation (#3130)
* feat: add exchanged messages in LLMCallCompletedEvent * feat: add GoalAlignment metric for Agent evaluation * feat: add SemanticQuality metric for Agent evaluation * feat: add Tool Metrics for Agent evaluation * feat: add Reasoning Metrics for Agent evaluation, still in progress * feat: add AgentEvaluator class This class will evaluate Agent' results and report to user * fix: do not evaluate Agent by default This is a experimental feature we still need refine it further * test: add Agent eval tests * fix: render all feedback per iteration * style: resolve linter issues * style: fix mypy issues * fix: allow messages be empty on LLMCallCompletedEvent
This commit is contained in:
@@ -601,7 +601,7 @@ def test_handle_streaming_tool_calls(get_weather_tool_schema, mock_emit):
|
||||
def test_handle_streaming_tool_calls_with_error(get_weather_tool_schema, mock_emit):
|
||||
def get_weather_error(location):
|
||||
raise Exception("Error")
|
||||
|
||||
|
||||
llm = LLM(model="openai/gpt-4o", stream=True)
|
||||
response = llm.call(
|
||||
messages=[
|
||||
@@ -619,7 +619,7 @@ def test_handle_streaming_tool_calls_with_error(get_weather_tool_schema, mock_em
|
||||
expected_stream_chunk=9,
|
||||
expected_completed_llm_call=1,
|
||||
expected_tool_usage_started=1,
|
||||
expected_tool_usage_error=1,
|
||||
expected_tool_usage_error=1,
|
||||
expected_final_chunk_result=expected_final_chunk_result,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user