diff --git a/src/crewai/crew.py b/src/crewai/crew.py
index cf6c65512..28bc3b3b8 100644
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -1081,7 +1081,7 @@ class Crew(BaseModel):
         openai_model_name: Optional[Union[str, LLM]] = None,
         inputs: Optional[Dict[str, Any]] = None,
     ) -> None:
-        """Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures.
+        """Test and evaluate the Crew with the given inputs for n iterations.
 
         Args:
             n_iterations: The number of iterations to run the test.
@@ -1089,6 +1089,9 @@ class Crew(BaseModel):
                 the performance of the agents. If a string is provided, it will be used to create
                 an LLM instance.
             inputs: The inputs to use for the test.
+
+        Raises:
+            ValueError: If openai_model_name is not a string or LLM instance.
         """
         test_crew = self.copy()
 
diff --git a/src/crewai/utilities/evaluators/crew_evaluator_handler.py b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
index cac3c7125..4c173632b 100644
--- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py
+++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
@@ -4,6 +4,7 @@ from crewai.llm import LLM
 from collections import defaultdict
 
 from pydantic import BaseModel, Field
+from crewai.utilities.logger import Logger
 from rich.box import HEAVY_EDGE
 from rich.console import Console
 from rich.table import Table
@@ -42,11 +43,22 @@ class CrewEvaluator:
             crew (Crew): The crew to evaluate
             openai_model_name (Union[str, LLM]): Either a model name string or an LLM instance
                 to use for evaluation. If a string is provided, it will be used to create an
-                LLM instance.
+                LLM instance with default settings. If an LLM instance is provided, its settings
+                (like temperature) will be preserved.
+        
+        Raises:
+            ValueError: If openai_model_name is not a string or LLM instance.
         """
         self.crew = crew
-        self.llm = openai_model_name if isinstance(openai_model_name, LLM) else LLM(model=openai_model_name)
+        if not isinstance(openai_model_name, (str, LLM)):
+            raise ValueError(f"Invalid model type '{type(openai_model_name)}'. Expected str or LLM instance.")
+        self.model_instance = openai_model_name if isinstance(openai_model_name, LLM) else LLM(model=openai_model_name)
         self._telemetry = Telemetry()
+        self._logger = Logger()
+        self._logger.log(
+            "info",
+            f"Initializing CrewEvaluator with model: {openai_model_name if isinstance(openai_model_name, str) else openai_model_name.model}"
+        )
         self._setup_for_evaluating()
 
     def _setup_for_evaluating(self) -> None:
@@ -62,7 +74,7 @@ class CrewEvaluator:
             ),
             backstory="Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed",
             verbose=False,
-            llm=self.llm,
+            llm=self.model_instance,
         )
 
     def _evaluation_task(
@@ -192,7 +204,11 @@ class CrewEvaluator:
                 self.crew,
                 evaluation_result.pydantic.quality,
                 current_task._execution_time,
-                self.llm.model,
+                self.model_instance.model,
+            )
+            self._logger.log(
+                "info",
+                f"Task evaluation completed with quality score: {evaluation_result.pydantic.quality}"
             )
             self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
             self.run_execution_times[self.iteration].append(
diff --git a/tests/utilities/evaluators/test_crew_evaluator_handler.py b/tests/utilities/evaluators/test_crew_evaluator_handler.py
index e68fe7409..d358cbb24 100644
--- a/tests/utilities/evaluators/test_crew_evaluator_handler.py
+++ b/tests/utilities/evaluators/test_crew_evaluator_handler.py
@@ -136,14 +136,25 @@ class TestCrewEvaluator:
         """Test that CrewEvaluator correctly handles custom LLM instances."""
         custom_llm = LLM(model="gpt-4", temperature=0.5)
         evaluator = CrewEvaluator(crew_planner.crew, custom_llm)
-        assert evaluator.llm == custom_llm
-        assert evaluator.llm.temperature == 0.5
+        assert evaluator.model_instance == custom_llm
+        assert evaluator.model_instance.temperature == 0.5
+
+    def test_evaluator_with_invalid_model_type(self, crew_planner):
+        """Test that CrewEvaluator raises error for invalid model type."""
+        with pytest.raises(ValueError, match="Invalid model type"):
+            CrewEvaluator(crew_planner.crew, 123)
+
+    def test_evaluator_preserves_model_settings(self, crew_planner):
+        """Test that CrewEvaluator preserves model settings."""
+        custom_llm = LLM(model="gpt-4", temperature=0.7)
+        evaluator = CrewEvaluator(crew_planner.crew, custom_llm)
+        assert evaluator.model_instance.temperature == 0.7
 
     def test_evaluator_with_model_name(self, crew_planner):
         """Test that CrewEvaluator correctly handles string model names."""
         evaluator = CrewEvaluator(crew_planner.crew, "gpt-4")
-        assert isinstance(evaluator.llm, LLM)
-        assert evaluator.llm.model == "gpt-4"
+        assert isinstance(evaluator.model_instance, LLM)
+        assert evaluator.model_instance.model == "gpt-4"
 
     def test_evaluate(self, crew_planner):
         task_output = TaskOutput(