feat: improve llm handling and error validation

Co-Authored-By: Joe Moura <joao@crewai.com>
This commit is contained in:
Devin AI
2025-02-09 21:46:23 +00:00
parent a097d933f6
commit 5e528416ec
2 changed files with 83 additions and 12 deletions

View File

@@ -1081,7 +1081,26 @@ class Crew(BaseModel):
openai_model_name: Optional[str] = None, # For backward compatibility openai_model_name: Optional[str] = None, # For backward compatibility
inputs: Optional[Dict[str, Any]] = None, inputs: Optional[Dict[str, Any]] = None,
) -> None: ) -> None:
"""Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures.""" """Test and evaluate the Crew with the given inputs for n iterations.
This method runs tests to evaluate the performance of the crew using the specified
language model. It supports both string model names and LLM instances for flexibility.
Args:
n_iterations: Number of test iterations to run
llm: Language model configuration (preferred). Can be:
- A string model name (e.g., "gpt-4")
- An LLM instance
- Any object with model_name or deployment_name attributes
openai_model_name: Legacy parameter for backward compatibility.
Deprecated: Will be removed in future versions. Use `llm` instead.
inputs: Optional dictionary of inputs to be used during testing
Note:
The `openai_model_name` parameter is deprecated and will be removed in
future versions. Use the more flexible `llm` parameter instead, which
supports any LLM implementation.
"""
test_crew = self.copy() test_crew = self.copy()
# For backward compatibility, convert openai_model_name to llm # For backward compatibility, convert openai_model_name to llm

View File

@@ -1,8 +1,13 @@
from collections import defaultdict from collections.abc import Callable
from typing import Any, Dict, List, Union, Annotated, DefaultDict
from typing import Any, Union from pydantic import (
BaseModel,
from pydantic import BaseModel, Field, InstanceOf Field,
InstanceOf,
PrivateAttr,
model_validator,
)
from rich.box import HEAVY_EDGE from rich.box import HEAVY_EDGE
from rich.console import Console from rich.console import Console
from rich.table import Table from rich.table import Table
@@ -20,7 +25,7 @@ class TaskEvaluationPydanticOutput(BaseModel):
) )
class CrewEvaluator: class CrewEvaluator(BaseModel):
""" """
A class to evaluate the performance of the agents in the crew based on the tasks they have performed. A class to evaluate the performance of the agents in the crew based on the tasks they have performed.
@@ -31,16 +36,63 @@ class CrewEvaluator:
iteration (int): The current iteration of the evaluation. iteration (int): The current iteration of the evaluation.
""" """
tasks_scores: defaultdict = defaultdict(list) crew: Any = Field(description="The crew of agents to evaluate.")
run_execution_times: defaultdict = defaultdict(list) llm: Union[str, InstanceOf[LLM], Any] = Field(
iteration: int = 0 description="Language model that will run the evaluation."
)
tasks_scores: DefaultDict[int, List[float]] = Field(
default_factory=lambda: DefaultDict(list),
description="Dictionary to store the scores of the agents for each task."
)
run_execution_times: DefaultDict[int, List[int]] = Field(
default_factory=lambda: DefaultDict(list),
description="Dictionary to store execution times for each run."
)
iteration: int = Field(
default=0,
description="Current iteration of the evaluation."
)
@model_validator(mode="after")
def validate_llm(self):
"""Validates that the LLM is properly configured."""
if not self.llm:
raise ValueError("LLM configuration is required")
return self
_telemetry: Telemetry = PrivateAttr(default_factory=Telemetry)
def __init__(self, crew, llm: Union[str, InstanceOf[LLM], Any]): def __init__(self, crew, llm: Union[str, InstanceOf[LLM], Any]):
self.crew = crew # Initialize Pydantic model with validated fields
self.llm = llm if isinstance(llm, LLM) else LLM(model=llm) super().__init__(crew=crew, llm=llm)
self._telemetry = Telemetry()
self._setup_for_evaluating() self._setup_for_evaluating()
@model_validator(mode="before")
def init_llm(cls, values):
"""Initialize LLM before Pydantic validation."""
llm = values.get("llm")
try:
if isinstance(llm, str):
values["llm"] = LLM(model=llm)
elif isinstance(llm, LLM):
values["llm"] = llm
else:
# For any other type, attempt to extract relevant attributes
llm_params = {
"model": getattr(llm, "model_name", None)
or getattr(llm, "deployment_name", None)
or str(llm),
"temperature": getattr(llm, "temperature", None),
"max_tokens": getattr(llm, "max_tokens", None),
"timeout": getattr(llm, "timeout", None),
}
# Remove None values
llm_params = {k: v for k, v in llm_params.items() if v is not None}
values["llm"] = LLM(**llm_params)
except Exception as e:
raise ValueError(f"Invalid LLM configuration: {str(e)}") from e
return values
def _setup_for_evaluating(self) -> None: def _setup_for_evaluating(self) -> None:
"""Sets up the crew for evaluating.""" """Sets up the crew for evaluating."""
for task in self.crew.tasks: for task in self.crew.tasks: