fix: improve error handling, logging, and test coverage

Co-Authored-By: Joe Moura <joao@crewai.com>
This commit is contained in:
Devin AI
2025-02-09 21:33:03 +00:00
parent 81f84cab58
commit 2744af4825
3 changed files with 39 additions and 9 deletions

View File

@@ -1081,7 +1081,7 @@ class Crew(BaseModel):
openai_model_name: Optional[Union[str, LLM]] = None,
inputs: Optional[Dict[str, Any]] = None,
) -> None:
"""Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures.
"""Test and evaluate the Crew with the given inputs for n iterations.
Args:
n_iterations: The number of iterations to run the test.
@@ -1089,6 +1089,9 @@ class Crew(BaseModel):
the performance of the agents. If a string is provided, it will be used to create
an LLM instance.
inputs: The inputs to use for the test.
Raises:
ValueError: If openai_model_name is not a string or LLM instance.
"""
test_crew = self.copy()

View File

@@ -4,6 +4,7 @@ from crewai.llm import LLM
from collections import defaultdict
from pydantic import BaseModel, Field
from crewai.utilities.logger import Logger
from rich.box import HEAVY_EDGE
from rich.console import Console
from rich.table import Table
@@ -42,11 +43,22 @@ class CrewEvaluator:
crew (Crew): The crew to evaluate
openai_model_name (Union[str, LLM]): Either a model name string or an LLM instance
to use for evaluation. If a string is provided, it will be used to create an
LLM instance.
LLM instance with default settings. If an LLM instance is provided, its settings
(like temperature) will be preserved.
Raises:
ValueError: If openai_model_name is not a string or LLM instance.
"""
self.crew = crew
self.llm = openai_model_name if isinstance(openai_model_name, LLM) else LLM(model=openai_model_name)
if not isinstance(openai_model_name, (str, LLM)):
raise ValueError(f"Invalid model type '{type(openai_model_name)}'. Expected str or LLM instance.")
self.model_instance = openai_model_name if isinstance(openai_model_name, LLM) else LLM(model=openai_model_name)
self._telemetry = Telemetry()
self._logger = Logger()
self._logger.log(
"info",
f"Initializing CrewEvaluator with model: {openai_model_name if isinstance(openai_model_name, str) else openai_model_name.model}"
)
self._setup_for_evaluating()
def _setup_for_evaluating(self) -> None:
@@ -62,7 +74,7 @@ class CrewEvaluator:
),
backstory="Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed",
verbose=False,
llm=self.llm,
llm=self.model_instance,
)
def _evaluation_task(
@@ -192,7 +204,11 @@ class CrewEvaluator:
self.crew,
evaluation_result.pydantic.quality,
current_task._execution_time,
self.llm.model,
self.model_instance.model,
)
self._logger.log(
"info",
f"Task evaluation completed with quality score: {evaluation_result.pydantic.quality}"
)
self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
self.run_execution_times[self.iteration].append(

View File

@@ -136,14 +136,25 @@ class TestCrewEvaluator:
"""Test that CrewEvaluator correctly handles custom LLM instances."""
custom_llm = LLM(model="gpt-4", temperature=0.5)
evaluator = CrewEvaluator(crew_planner.crew, custom_llm)
assert evaluator.llm == custom_llm
assert evaluator.llm.temperature == 0.5
assert evaluator.model_instance == custom_llm
assert evaluator.model_instance.temperature == 0.5
def test_evaluator_with_invalid_model_type(self, crew_planner):
"""Test that CrewEvaluator raises error for invalid model type."""
with pytest.raises(ValueError, match="Invalid model type"):
CrewEvaluator(crew_planner.crew, 123)
def test_evaluator_preserves_model_settings(self, crew_planner):
"""Test that CrewEvaluator preserves model settings."""
custom_llm = LLM(model="gpt-4", temperature=0.7)
evaluator = CrewEvaluator(crew_planner.crew, custom_llm)
assert evaluator.model_instance.temperature == 0.7
def test_evaluator_with_model_name(self, crew_planner):
"""Test that CrewEvaluator correctly handles string model names."""
evaluator = CrewEvaluator(crew_planner.crew, "gpt-4")
assert isinstance(evaluator.llm, LLM)
assert evaluator.llm.model == "gpt-4"
assert isinstance(evaluator.model_instance, LLM)
assert evaluator.model_instance.model == "gpt-4"
def test_evaluate(self, crew_planner):
task_output = TaskOutput(