feat: improve LLM validation and error handling

- Add descriptive error messages with usage context - Add LLM instance validation - Add deprecation warning for openai_model_name - Add string representation to CrewEvaluator - Add edge case tests Co-Authored-By: Joe Moura <joao@crewai.com>
2026-01-09 16:18:30 +00:00 · 2025-02-09 22:48:56 +00:00
parent 3e67a2eca1
commit 90aea23dd6
3 changed files with 61 additions and 6 deletions
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -1087,13 +1087,31 @@ class Crew(BaseModel):
            llm: LLM instance or model name to use for evaluation
            inputs: Optional dictionary of inputs to pass to the crew
        """
-        if not llm and not openai_model_name:
+        if openai_model_name:
-            raise ValueError("Either llm or openai_model_name must be provided")
+            warnings.warn(
                "openai_model_name is deprecated and will be removed in future versions. Use llm parameter instead.",
                DeprecationWarning,
                stacklevel=2
            )
        test_crew = self.copy()
        model = llm if llm else openai_model_name
-        if isinstance(model, str):
+
-            model = LLM(model=model)
+        try:
            if not model:
                raise ValueError(
                    "Either llm or openai_model_name must be provided. Please provide either "
                    "a custom LLM instance or an OpenAI model name."
                )
            if isinstance(model, LLM):
                if not hasattr(model, 'model'):
                    raise ValueError("Provided LLM instance must have a 'model' attribute")
            elif isinstance(model, str):
                model = LLM(model=model)
            else:
                raise ValueError("LLM must be either a string model name or an LLM instance")
        except Exception as e:
            raise ValueError(f"Failed to initialize LLM: {str(e)}")
        self._test_execution_span = test_crew._telemetry.test_execution_span(
            test_crew,
--- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py
+++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
@@ -38,10 +38,18 @@ class CrewEvaluator:
    def __init__(self, crew, llm: Union[str, LLM]):
        self.crew = crew
-        self.llm = llm if isinstance(llm, LLM) else LLM(model=llm)
+        try:
            self.llm = llm if isinstance(llm, LLM) else LLM(model=llm)
            if not hasattr(self.llm, 'model'):
                raise ValueError("Provided LLM instance must have a 'model' attribute")
        except Exception as e:
            raise ValueError(f"Failed to initialize LLM: {str(e)}")
        self._telemetry = Telemetry()
        self._setup_for_evaluating()
    def __str__(self) -> str:
        return f"CrewEvaluator(model={str(self.llm)}, iteration={self.iteration})"
    def _setup_for_evaluating(self) -> None:
        """Sets up the crew for evaluating."""
        for task in self.crew.tasks:
--- a/tests/crew_test.py
+++ b/tests/crew_test.py
@@ -2862,7 +2862,8 @@ def test_crew_testing_backward_compatibility(kickoff_mock, copy_mock, crew_evalu
    copy_mock.return_value = crew
    n_iterations = 2
-    crew.test(n_iterations, openai_model_name="gpt-4o-mini", inputs={"topic": "AI"})
+    with pytest.warns(DeprecationWarning, match="openai_model_name is deprecated"):
        crew.test(n_iterations, openai_model_name="gpt-4o-mini", inputs={"topic": "AI"})
    # Ensure kickoff is called on the copied crew
    kickoff_mock.assert_has_calls([
@@ -2900,6 +2901,34 @@ def test_crew_testing_missing_llm(kickoff_mock, copy_mock, crew_evaluator):
    with pytest.raises(ValueError, match="Either llm or openai_model_name must be provided"):
        crew.test(n_iterations)
@mock.patch("crewai.crew.CrewEvaluator")
@mock.patch("crewai.crew.Crew.copy")
@mock.patch("crewai.crew.Crew.kickoff")
 def test_crew_testing_with_invalid_llm(kickoff_mock, copy_mock, crew_evaluator):
    task = Task(
        description="Test task",
        expected_output="Test output",
        agent=researcher,
    )
    crew = Crew(
        agents=[researcher],
        tasks=[task],
    )
    # Create a mock for the copied crew
    copy_mock.return_value = crew
    # Test invalid LLM type
    with pytest.raises(ValueError, match="Failed to initialize LLM"):
        crew.test(n_iterations=2, llm={})
    # Test LLM without model attribute
    class InvalidLLM:
        def __init__(self): pass
    with pytest.raises(ValueError, match="LLM must be either a string model name or an LLM instance"):
        crew.test(n_iterations=2, llm=InvalidLLM())
@pytest.mark.vcr(filter_headers=["authorization"])
 def test_hierarchical_verbose_manager_agent():