diff --git a/src/crewai/experimental/evaluation/base_evaluator.py b/src/crewai/experimental/evaluation/base_evaluator.py
index 076810a3c..1fe3f58a1 100644
--- a/src/crewai/experimental/evaluation/base_evaluator.py
+++ b/src/crewai/experimental/evaluation/base_evaluator.py
@@ -1,14 +1,15 @@
 import abc
 import enum
 from enum import Enum
-from typing import Any, Dict, List, Optional
+from typing import Any, Optional
 
 from pydantic import BaseModel, Field
 
 from crewai.agent import Agent
-from crewai.task import Task
 from crewai.llm import BaseLLM
-from crewai.utilities.llm_utils import create_llm
+from crewai.task import Task
+from crewai.utilities.llm_utils import create_default_llm, create_llm
+
 
 class MetricCategory(enum.Enum):
     GOAL_ALIGNMENT = "goal_alignment"
@@ -19,7 +20,7 @@ class MetricCategory(enum.Enum):
     TOOL_INVOCATION = "tool_invocation"
 
     def title(self):
-        return self.value.replace('_', ' ').title()
+        return self.value.replace("_", " ").title()
 
 
 class EvaluationScore(BaseModel):
@@ -27,15 +28,13 @@ class EvaluationScore(BaseModel):
         default=5.0,
         description="Numeric score from 0-10 where 0 is worst and 10 is best, None if not applicable",
         ge=0.0,
-        le=10.0
+        le=10.0,
     )
     feedback: str = Field(
-        default="",
-        description="Detailed feedback explaining the evaluation score"
+        default="", description="Detailed feedback explaining the evaluation score"
     )
     raw_response: str | None = Field(
-        default=None,
-        description="Raw response from the evaluator (e.g., LLM)"
+        default=None, description="Raw response from the evaluator (e.g., LLM)"
     )
 
     def __str__(self) -> str:
@@ -46,7 +45,9 @@ class EvaluationScore(BaseModel):
 
 class BaseEvaluator(abc.ABC):
     def __init__(self, llm: BaseLLM | None = None):
-        self.llm: BaseLLM | None = create_llm(llm)
+        self.llm: BaseLLM | None = (
+            create_llm(llm) if llm is not None else create_default_llm()
+        )
 
     @property
     @abc.abstractmethod
@@ -57,7 +58,7 @@ class BaseEvaluator(abc.ABC):
     def evaluate(
         self,
         agent: Agent,
-        execution_trace: Dict[str, Any],
+        execution_trace: dict[str, Any],
         final_output: Any,
         task: Task | None = None,
     ) -> EvaluationScore:
@@ -67,9 +68,8 @@ class BaseEvaluator(abc.ABC):
 class AgentEvaluationResult(BaseModel):
     agent_id: str = Field(description="ID of the evaluated agent")
     task_id: str = Field(description="ID of the task that was executed")
-    metrics: Dict[MetricCategory, EvaluationScore] = Field(
-        default_factory=dict,
-        description="Evaluation scores for each metric category"
+    metrics: dict[MetricCategory, EvaluationScore] = Field(
+        default_factory=dict, description="Evaluation scores for each metric category"
     )
 
 
@@ -81,33 +81,23 @@ class AggregationStrategy(Enum):
 
 
 class AgentAggregatedEvaluationResult(BaseModel):
-    agent_id: str = Field(
-        default="",
-        description="ID of the agent"
-    )
-    agent_role: str = Field(
-        default="",
-        description="Role of the agent"
-    )
+    agent_id: str = Field(default="", description="ID of the agent")
+    agent_role: str = Field(default="", description="Role of the agent")
     task_count: int = Field(
-        default=0,
-        description="Number of tasks included in this aggregation"
+        default=0, description="Number of tasks included in this aggregation"
     )
     aggregation_strategy: AggregationStrategy = Field(
         default=AggregationStrategy.SIMPLE_AVERAGE,
-        description="Strategy used for aggregation"
+        description="Strategy used for aggregation",
     )
-    metrics: Dict[MetricCategory, EvaluationScore] = Field(
-        default_factory=dict,
-        description="Aggregated metrics across all tasks"
+    metrics: dict[MetricCategory, EvaluationScore] = Field(
+        default_factory=dict, description="Aggregated metrics across all tasks"
     )
-    task_results: List[str] = Field(
-        default_factory=list,
-        description="IDs of tasks included in this aggregation"
+    task_results: list[str] = Field(
+        default_factory=list, description="IDs of tasks included in this aggregation"
     )
     overall_score: Optional[float] = Field(
-        default=None,
-        description="Overall score for this agent"
+        default=None, description="Overall score for this agent"
     )
 
     def __str__(self) -> str:
@@ -119,7 +109,7 @@ class AgentAggregatedEvaluationResult(BaseModel):
             result += f"\n\n- {category.value.upper()}: {score.score}/10\n"
 
             if score.feedback:
-                detailed_feedback = "\n  ".join(score.feedback.split('\n'))
+                detailed_feedback = "\n  ".join(score.feedback.split("\n"))
                 result += f"  {detailed_feedback}\n"
 
-        return result
\ No newline at end of file
+        return result