diff --git a/src/crewai/utilities/evaluators/crew_evaluator_handler.py b/src/crewai/utilities/evaluators/crew_evaluator_handler.py index 2ac6e218d..bc618bc8c 100644 --- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py +++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py @@ -184,7 +184,7 @@ class CrewEvaluator: self.crew, evaluation_result.pydantic.quality, current_task._execution_time, - self.openai_model_name, + self.llm.model if isinstance(self.llm, LLM) else self.llm, ) self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality) self.run_execution_times[self.iteration].append( diff --git a/tests/crew_test.py b/tests/crew_test.py index 585f6c829..bd18ddea8 100644 --- a/tests/crew_test.py +++ b/tests/crew_test.py @@ -303,7 +303,7 @@ def test_hierarchical_process(): @mock.patch("crewai.crew.CrewEvaluator") @mock.patch("crewai.crew.Crew.copy") def test_crew_test_backward_compatibility(mock_copy, mock_evaluator): - crew = Crew(agents=[researcher], tasks=[Task(description="test", agent=researcher)]) + crew = Crew(agents=[researcher], tasks=[Task(description="test", expected_output="test output", agent=researcher)]) crew.test(2, openai_model_name="gpt-4") mock_evaluator.assert_called_once() _, kwargs = mock_evaluator.call_args