From 90aea23dd65961e8bf08a865121309aee5ac2524 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Sun, 9 Feb 2025 22:48:56 +0000
Subject: [PATCH] feat: improve LLM validation and error handling

- Add descriptive error messages with usage context
- Add LLM instance validation
- Add deprecation warning for openai_model_name
- Add string representation to CrewEvaluator
- Add edge case tests

Co-Authored-By: Joe Moura <joao@crewai.com>
---
 src/crewai/crew.py                            | 26 +++++++++++++---
 .../evaluators/crew_evaluator_handler.py      | 10 +++++-
 tests/crew_test.py                            | 31 ++++++++++++++++++-
 3 files changed, 61 insertions(+), 6 deletions(-)

diff --git a/src/crewai/crew.py b/src/crewai/crew.py
index b39029e56..632ae407c 100644
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -1087,13 +1087,31 @@ class Crew(BaseModel):
             llm: LLM instance or model name to use for evaluation
             inputs: Optional dictionary of inputs to pass to the crew
         """
-        if not llm and not openai_model_name:
-            raise ValueError("Either llm or openai_model_name must be provided")
+        if openai_model_name:
+            warnings.warn(
+                "openai_model_name is deprecated and will be removed in future versions. Use llm parameter instead.",
+                DeprecationWarning,
+                stacklevel=2
+            )
         
         test_crew = self.copy()
         model = llm if llm else openai_model_name
-        if isinstance(model, str):
-            model = LLM(model=model)
+
+        try:
+            if not model:
+                raise ValueError(
+                    "Either llm or openai_model_name must be provided. Please provide either "
+                    "a custom LLM instance or an OpenAI model name."
+                )
+            if isinstance(model, LLM):
+                if not hasattr(model, 'model'):
+                    raise ValueError("Provided LLM instance must have a 'model' attribute")
+            elif isinstance(model, str):
+                model = LLM(model=model)
+            else:
+                raise ValueError("LLM must be either a string model name or an LLM instance")
+        except Exception as e:
+            raise ValueError(f"Failed to initialize LLM: {str(e)}")
 
         self._test_execution_span = test_crew._telemetry.test_execution_span(
             test_crew,
diff --git a/src/crewai/utilities/evaluators/crew_evaluator_handler.py b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
index ef9fe0849..c6f7f9ce2 100644
--- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py
+++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
@@ -38,10 +38,18 @@ class CrewEvaluator:
 
     def __init__(self, crew, llm: Union[str, LLM]):
         self.crew = crew
-        self.llm = llm if isinstance(llm, LLM) else LLM(model=llm)
+        try:
+            self.llm = llm if isinstance(llm, LLM) else LLM(model=llm)
+            if not hasattr(self.llm, 'model'):
+                raise ValueError("Provided LLM instance must have a 'model' attribute")
+        except Exception as e:
+            raise ValueError(f"Failed to initialize LLM: {str(e)}")
         self._telemetry = Telemetry()
         self._setup_for_evaluating()
 
+    def __str__(self) -> str:
+        return f"CrewEvaluator(model={str(self.llm)}, iteration={self.iteration})"
+
     def _setup_for_evaluating(self) -> None:
         """Sets up the crew for evaluating."""
         for task in self.crew.tasks:
diff --git a/tests/crew_test.py b/tests/crew_test.py
index 24c6ebe27..e9f199b7d 100644
--- a/tests/crew_test.py
+++ b/tests/crew_test.py
@@ -2862,7 +2862,8 @@ def test_crew_testing_backward_compatibility(kickoff_mock, copy_mock, crew_evalu
     copy_mock.return_value = crew
 
     n_iterations = 2
-    crew.test(n_iterations, openai_model_name="gpt-4o-mini", inputs={"topic": "AI"})
+    with pytest.warns(DeprecationWarning, match="openai_model_name is deprecated"):
+        crew.test(n_iterations, openai_model_name="gpt-4o-mini", inputs={"topic": "AI"})
 
     # Ensure kickoff is called on the copied crew
     kickoff_mock.assert_has_calls([
@@ -2900,6 +2901,34 @@ def test_crew_testing_missing_llm(kickoff_mock, copy_mock, crew_evaluator):
     with pytest.raises(ValueError, match="Either llm or openai_model_name must be provided"):
         crew.test(n_iterations)
 
+@mock.patch("crewai.crew.CrewEvaluator")
+@mock.patch("crewai.crew.Crew.copy")
+@mock.patch("crewai.crew.Crew.kickoff")
+def test_crew_testing_with_invalid_llm(kickoff_mock, copy_mock, crew_evaluator):
+    task = Task(
+        description="Test task",
+        expected_output="Test output",
+        agent=researcher,
+    )
+
+    crew = Crew(
+        agents=[researcher],
+        tasks=[task],
+    )
+
+    # Create a mock for the copied crew
+    copy_mock.return_value = crew
+
+    # Test invalid LLM type
+    with pytest.raises(ValueError, match="Failed to initialize LLM"):
+        crew.test(n_iterations=2, llm={})
+
+    # Test LLM without model attribute
+    class InvalidLLM:
+        def __init__(self): pass
+    with pytest.raises(ValueError, match="LLM must be either a string model name or an LLM instance"):
+        crew.test(n_iterations=2, llm=InvalidLLM())
+
 
 @pytest.mark.vcr(filter_headers=["authorization"])
 def test_hierarchical_verbose_manager_agent():