From 5e528416ec2b1cf59be4b6f17299734328d69002 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sun, 9 Feb 2025 21:46:23 +0000 Subject: [PATCH] feat: improve llm handling and error validation Co-Authored-By: Joe Moura --- src/crewai/crew.py | 21 +++++- .../evaluators/crew_evaluator_handler.py | 74 ++++++++++++++++--- 2 files changed, 83 insertions(+), 12 deletions(-) diff --git a/src/crewai/crew.py b/src/crewai/crew.py index 115d82387..2e88118bf 100644 --- a/src/crewai/crew.py +++ b/src/crewai/crew.py @@ -1081,7 +1081,26 @@ class Crew(BaseModel): openai_model_name: Optional[str] = None, # For backward compatibility inputs: Optional[Dict[str, Any]] = None, ) -> None: - """Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures.""" + """Test and evaluate the Crew with the given inputs for n iterations. + + This method runs tests to evaluate the performance of the crew using the specified + language model. It supports both string model names and LLM instances for flexibility. + + Args: + n_iterations: Number of test iterations to run + llm: Language model configuration (preferred). Can be: + - A string model name (e.g., "gpt-4") + - An LLM instance + - Any object with model_name or deployment_name attributes + openai_model_name: Legacy parameter for backward compatibility. + Deprecated: Will be removed in future versions. Use `llm` instead. + inputs: Optional dictionary of inputs to be used during testing + + Note: + The `openai_model_name` parameter is deprecated and will be removed in + future versions. Use the more flexible `llm` parameter instead, which + supports any LLM implementation. + """ test_crew = self.copy() # For backward compatibility, convert openai_model_name to llm diff --git a/src/crewai/utilities/evaluators/crew_evaluator_handler.py b/src/crewai/utilities/evaluators/crew_evaluator_handler.py index bc618bc8c..3cc6e76d9 100644 --- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py +++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py @@ -1,8 +1,13 @@ -from collections import defaultdict +from collections.abc import Callable +from typing import Any, Dict, List, Union, Annotated, DefaultDict -from typing import Any, Union - -from pydantic import BaseModel, Field, InstanceOf +from pydantic import ( + BaseModel, + Field, + InstanceOf, + PrivateAttr, + model_validator, +) from rich.box import HEAVY_EDGE from rich.console import Console from rich.table import Table @@ -20,7 +25,7 @@ class TaskEvaluationPydanticOutput(BaseModel): ) -class CrewEvaluator: +class CrewEvaluator(BaseModel): """ A class to evaluate the performance of the agents in the crew based on the tasks they have performed. @@ -31,16 +36,63 @@ class CrewEvaluator: iteration (int): The current iteration of the evaluation. """ - tasks_scores: defaultdict = defaultdict(list) - run_execution_times: defaultdict = defaultdict(list) - iteration: int = 0 + crew: Any = Field(description="The crew of agents to evaluate.") + llm: Union[str, InstanceOf[LLM], Any] = Field( + description="Language model that will run the evaluation." + ) + tasks_scores: DefaultDict[int, List[float]] = Field( + default_factory=lambda: DefaultDict(list), + description="Dictionary to store the scores of the agents for each task." + ) + run_execution_times: DefaultDict[int, List[int]] = Field( + default_factory=lambda: DefaultDict(list), + description="Dictionary to store execution times for each run." + ) + iteration: int = Field( + default=0, + description="Current iteration of the evaluation." + ) + + @model_validator(mode="after") + def validate_llm(self): + """Validates that the LLM is properly configured.""" + if not self.llm: + raise ValueError("LLM configuration is required") + return self + + _telemetry: Telemetry = PrivateAttr(default_factory=Telemetry) def __init__(self, crew, llm: Union[str, InstanceOf[LLM], Any]): - self.crew = crew - self.llm = llm if isinstance(llm, LLM) else LLM(model=llm) - self._telemetry = Telemetry() + # Initialize Pydantic model with validated fields + super().__init__(crew=crew, llm=llm) self._setup_for_evaluating() + @model_validator(mode="before") + def init_llm(cls, values): + """Initialize LLM before Pydantic validation.""" + llm = values.get("llm") + try: + if isinstance(llm, str): + values["llm"] = LLM(model=llm) + elif isinstance(llm, LLM): + values["llm"] = llm + else: + # For any other type, attempt to extract relevant attributes + llm_params = { + "model": getattr(llm, "model_name", None) + or getattr(llm, "deployment_name", None) + or str(llm), + "temperature": getattr(llm, "temperature", None), + "max_tokens": getattr(llm, "max_tokens", None), + "timeout": getattr(llm, "timeout", None), + } + # Remove None values + llm_params = {k: v for k, v in llm_params.items() if v is not None} + values["llm"] = LLM(**llm_params) + except Exception as e: + raise ValueError(f"Invalid LLM configuration: {str(e)}") from e + return values + def _setup_for_evaluating(self) -> None: """Sets up the crew for evaluating.""" for task in self.crew.tasks: