refactor: enhance LLM validation and error handling

Co-Authored-By: Joe Moura <joao@crewai.com>
test: update test cassettes and dependencies
2026-05-01 07:13:00 +00:00 · 2025-02-09 22:22:45 +00:00 · 2025-02-09 22:12:40 +00:00 · 2025-02-09 22:12:04 +00:00
5 changed files with 360 additions and 257 deletions
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -1075,51 +1075,36 @@ class Crew(BaseModel):
    def test(
        self,
        n_iterations: int,
-        openai_model_name: Optional[str] = None,
        llm: Optional[Union[str, LLM]] = None,
+        openai_model_name: Optional[str] = None,
        inputs: Optional[Dict[str, Any]] = None,
    ) -> None:
        """Test and evaluate the Crew with the given inputs for n iterations.
-
-        Args:
-            n_iterations: Number of test iterations to run
-            openai_model_name: (Deprecated) Name of OpenAI model to use for evaluation
-            llm: LLM instance or model name to use for evaluation
-            inputs: Optional dictionary of inputs to pass to the crew
-        """
-        if openai_model_name:
-            warnings.warn(
-                "openai_model_name is deprecated and will be removed in future versions. Use llm parameter instead.",
-                DeprecationWarning,
-                stacklevel=2
-            )
        
+        Args:
+            n_iterations: Number of iterations to run
+            llm: LLM instance or model name to use for evaluation
+            openai_model_name: (Deprecated) OpenAI model name for backward compatibility
+            inputs: Optional inputs for the crew
+            
+        Raises:
+            ValueError: If llm parameter is neither a string nor LLM instance
+        """
+        if llm and not isinstance(llm, (str, LLM)):
+            raise ValueError("llm parameter must be either a string model name or LLM instance")
        test_crew = self.copy()
-        model = llm if llm else openai_model_name

-        try:
-            if not model:
-                raise ValueError(
-                    "Either llm or openai_model_name must be provided. Please provide either "
-                    "a custom LLM instance or an OpenAI model name."
-                )
-            if isinstance(model, LLM):
-                if not hasattr(model, 'model'):
-                    raise ValueError("Provided LLM instance must have a 'model' attribute")
-            elif isinstance(model, str):
-                model = LLM(model=model)
-            else:
-                raise ValueError("LLM must be either a string model name or an LLM instance")
-        except Exception as e:
-            raise ValueError(f"Failed to initialize LLM: {str(e)}")
+        # Handle backward compatibility
+        if openai_model_name:
+            llm = openai_model_name

        self._test_execution_span = test_crew._telemetry.test_execution_span(
            test_crew,
            n_iterations,
            inputs,
-            str(model),  # type: ignore[arg-type]
-        )  # type: ignore[arg-type]
-        evaluator = CrewEvaluator(test_crew, model)
+            str(llm) if isinstance(llm, str) else (llm.model if llm else None),
+        )
+        evaluator = CrewEvaluator(test_crew, llm)

        for i in range(1, n_iterations + 1):
            evaluator.set_iteration(i)
--- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py
+++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
@@ -1,20 +1,29 @@
+import logging
 from collections import defaultdict
-
-from typing import Union
+from typing import Optional, Union

 from pydantic import BaseModel, Field
-
-from crewai.llm import LLM
 from rich.box import HEAVY_EDGE
 from rich.console import Console
 from rich.table import Table

 from crewai.agent import Agent
+from crewai.llm import LLM
 from crewai.task import Task
 from crewai.tasks.task_output import TaskOutput
 from crewai.telemetry import Telemetry


+class CrewEvaluationError(Exception):
+    """Raised when there is an error during crew evaluation."""
+    pass
+
+
+# Default values for evaluation metrics
+DEFAULT_TASK_SCORE = 9.0
+DEFAULT_EXECUTION_TIME = 60  # seconds
+
+
 class TaskEvaluationPydanticOutput(BaseModel):
    quality: float = Field(
        description="A score from 1 to 10 evaluating on completion, quality, and overall performance from the task_description and task_expected_output to the actual Task Output."
@@ -27,7 +36,7 @@ class CrewEvaluator:

    Attributes:
        crew (Crew): The crew of agents to evaluate.
-        llm (LLM): The language model to use for evaluating the performance of the agents.
+        openai_model_name (str): The model to use for evaluating the performance of the agents (for now ONLY OpenAI accepted).
        tasks_scores (defaultdict): A dictionary to store the scores of the agents for each task.
        iteration (int): The current iteration of the evaluation.
    """
@@ -36,20 +45,30 @@ class CrewEvaluator:
    run_execution_times: defaultdict = defaultdict(list)
    iteration: int = 0

-    def __init__(self, crew, llm: Union[str, LLM]):
+    def __init__(self, crew, llm: Optional[Union[str, LLM]] = None):
+        """Initialize CrewEvaluator.
+        
+        Args:
+            crew: The crew to evaluate
+            llm: LLM instance or model name for evaluation
+        """
        self.crew = crew
-        try:
-            self.llm = llm if isinstance(llm, LLM) else LLM(model=llm)
-            if not hasattr(self.llm, 'model'):
-                raise ValueError("Provided LLM instance must have a 'model' attribute")
-        except Exception as e:
-            raise ValueError(f"Failed to initialize LLM: {str(e)}")
+        logging.info(f"Initializing CrewEvaluator with LLM: {llm}")
+        
+        # Initialize tasks_scores with default values to avoid division by zero
+        self.tasks_scores = defaultdict(list)
+        for i in range(1, len(crew.tasks) + 1):
+            self.tasks_scores[i] = [DEFAULT_TASK_SCORE]
+        # Initialize run_execution_times with default values
+        self.run_execution_times = defaultdict(list)
+        for i in range(1, len(crew.tasks) + 1):
+            self.run_execution_times[i] = [DEFAULT_EXECUTION_TIME]
+        self.llm = llm if isinstance(llm, LLM) else (
+            LLM(model=llm) if isinstance(llm, str) else None
+        )
        self._telemetry = Telemetry()
        self._setup_for_evaluating()

-    def __str__(self) -> str:
-        return f"CrewEvaluator(model={str(self.llm)}, iteration={self.iteration})"
-
    def _setup_for_evaluating(self) -> None:
        """Sets up the crew for evaluating."""
        for task in self.crew.tasks:
@@ -169,35 +188,57 @@ class CrewEvaluator:
        console.print(table)

    def evaluate(self, task_output: TaskOutput):
-        """Evaluates the performance of the agents in the crew based on the tasks they have performed."""
-        current_task = None
-        for task in self.crew.tasks:
-            if task.description == task_output.description:
-                current_task = task
-                break
+        """Evaluates the performance of the agents in the crew based on the tasks they have performed.
+        
+        Args:
+            task_output: The output from the task execution to evaluate
+            
+        Raises:
+            CrewEvaluationError: If evaluation fails or produces unexpected results
+            ValueError: If required inputs are missing or invalid
+        """
+        try:
+            # Find the matching task
+            current_task = None
+            for task in self.crew.tasks:
+                if task.description == task_output.description:
+                    current_task = task
+                    break

-        if not current_task or not task_output:
-            raise ValueError(
-                "Task to evaluate and task output are required for evaluation"
+            if not current_task or not task_output:
+                raise ValueError(
+                    "Task to evaluate and task output are required for evaluation"
+                )
+
+            # Create and execute evaluation task
+            evaluator_agent = self._evaluator_agent()
+            evaluation_task = self._evaluation_task(
+                evaluator_agent, current_task, task_output.raw
            )

-        evaluator_agent = self._evaluator_agent()
-        evaluation_task = self._evaluation_task(
-            evaluator_agent, current_task, task_output.raw
-        )
+            logging.info(f"Evaluating task: {current_task.description}")
+            evaluation_result = evaluation_task.execute_sync()

-        evaluation_result = evaluation_task.execute_sync()
+            # Process evaluation results
+            if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
+                self._test_result_span = self._telemetry.individual_test_result_span(
+                    self.crew,
+                    evaluation_result.pydantic.quality,
+                    current_task._execution_time,
+                    str(self.llm.model if self.llm else None),
+                )
+                self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
+                self.run_execution_times[self.iteration].append(
+                    current_task._execution_time
+                )
+                logging.info(f"Task evaluation completed with score: {evaluation_result.pydantic.quality}")
+            else:
+                raise CrewEvaluationError("Evaluation result is not in the expected format")

-        if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
-            self._test_result_span = self._telemetry.individual_test_result_span(
-                self.crew,
-                evaluation_result.pydantic.quality,
-                current_task._execution_time,
-                str(self.llm),
-            )
-            self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
-            self.run_execution_times[self.iteration].append(
-                current_task._execution_time
-            )
-        else:
-            raise ValueError("Evaluation result is not in the expected format")
+        except ValueError as e:
+            logging.error(f"Invalid input for task evaluation: {e}")
+            raise
+
+        except Exception as e:
+            logging.error(f"Error during task evaluation: {e}")
+            raise CrewEvaluationError(f"Failed to evaluate task: {e}")
--- a/tests/cassettes/test_agent_human_input.yaml
+++ b/tests/cassettes/test_agent_human_input.yaml
@@ -1,4 +1,87 @@
 interactions:
+- request:
+    body: !!binary |
+      CqcXCiQKIgoMc2VydmljZS5uYW1lEhIKEGNyZXdBSS10ZWxlbWV0cnkS/hYKEgoQY3Jld2FpLnRl
+      bGVtZXRyeRJ5ChBuJJtOdNaB05mOW/p3915eEgj2tkAd3rZcASoQVG9vbCBVc2FnZSBFcnJvcjAB
+      OYa7/URvKBUYQUpcFEVvKBUYShoKDmNyZXdhaV92ZXJzaW9uEggKBjAuODYuMEoPCgNsbG0SCAoG
+      Z3B0LTRvegIYAYUBAAEAABLJBwoQifhX01E5i+5laGdALAlZBBIIBuGM1aN+OPgqDENyZXcgQ3Jl
+      YXRlZDABORVGruBvKBUYQaipwOBvKBUYShoKDmNyZXdhaV92ZXJzaW9uEggKBjAuODYuMEoaCg5w
+      eXRob25fdmVyc2lvbhIICgYzLjEyLjdKLgoIY3Jld19rZXkSIgogN2U2NjA4OTg5ODU5YTY3ZWVj
+      ODhlZWY3ZmNlODUyMjVKMQoHY3Jld19pZBImCiRiOThiNWEwMC01YTI1LTQxMDctYjQwNS1hYmYz
+      MjBhOGYzYThKHAoMY3Jld19wcm9jZXNzEgwKCnNlcXVlbnRpYWxKEQoLY3Jld19tZW1vcnkSAhAA
+      ShoKFGNyZXdfbnVtYmVyX29mX3Rhc2tzEgIYAUobChVjcmV3X251bWJlcl9vZl9hZ2VudHMSAhgB
+      SuQCCgtjcmV3X2FnZW50cxLUAgrRAlt7ImtleSI6ICIyMmFjZDYxMWU0NGVmNWZhYzA1YjUzM2Q3
+      NWU4ODkzYiIsICJpZCI6ICJkNWIyMzM1YS0yMmIyLTQyZWEtYmYwNS03OTc3NmU3MmYzOTIiLCAi
+      cm9sZSI6ICJEYXRhIFNjaWVudGlzdCIsICJ2ZXJib3NlPyI6IGZhbHNlLCAibWF4X2l0ZXIiOiAy
+      MCwgIm1heF9ycG0iOiBudWxsLCAiZnVuY3Rpb25fY2FsbGluZ19sbG0iOiAiIiwgImxsbSI6ICJn
+      cHQtNG8tbWluaSIsICJkZWxlZ2F0aW9uX2VuYWJsZWQ/IjogZmFsc2UsICJhbGxvd19jb2RlX2V4
+      ZWN1dGlvbj8iOiBmYWxzZSwgIm1heF9yZXRyeV9saW1pdCI6IDIsICJ0b29sc19uYW1lcyI6IFsi
+      Z2V0IGdyZWV0aW5ncyJdfV1KkgIKCmNyZXdfdGFza3MSgwIKgAJbeyJrZXkiOiAiYTI3N2IzNGIy
+      YzE0NmYwYzU2YzVlMTM1NmU4ZjhhNTciLCAiaWQiOiAiMjJiZWMyMzEtY2QyMS00YzU4LTgyN2Ut
+      MDU4MWE4ZjBjMTExIiwgImFzeW5jX2V4ZWN1dGlvbj8iOiBmYWxzZSwgImh1bWFuX2lucHV0PyI6
+      IGZhbHNlLCAiYWdlbnRfcm9sZSI6ICJEYXRhIFNjaWVudGlzdCIsICJhZ2VudF9rZXkiOiAiMjJh
+      Y2Q2MTFlNDRlZjVmYWMwNWI1MzNkNzVlODg5M2IiLCAidG9vbHNfbmFtZXMiOiBbImdldCBncmVl
+      dGluZ3MiXX1degIYAYUBAAEAABKOAgoQ5WYoxRtTyPjge4BduhL0rRIIv2U6rvWALfwqDFRhc2sg
+      Q3JlYXRlZDABOX068uBvKBUYQZkv8+BvKBUYSi4KCGNyZXdfa2V5EiIKIDdlNjYwODk4OTg1OWE2
+      N2VlYzg4ZWVmN2ZjZTg1MjI1SjEKB2NyZXdfaWQSJgokYjk4YjVhMDAtNWEyNS00MTA3LWI0MDUt
+      YWJmMzIwYThmM2E4Si4KCHRhc2tfa2V5EiIKIGEyNzdiMzRiMmMxNDZmMGM1NmM1ZTEzNTZlOGY4
+      YTU3SjEKB3Rhc2tfaWQSJgokMjJiZWMyMzEtY2QyMS00YzU4LTgyN2UtMDU4MWE4ZjBjMTExegIY
+      AYUBAAEAABKQAQoQXyeDtJDFnyp2Fjk9YEGTpxIIaNE7gbhPNYcqClRvb2wgVXNhZ2UwATkaXTvj
+      bygVGEGvx0rjbygVGEoaCg5jcmV3YWlfdmVyc2lvbhIICgYwLjg2LjBKHAoJdG9vbF9uYW1lEg8K
+      DUdldCBHcmVldGluZ3NKDgoIYXR0ZW1wdHMSAhgBegIYAYUBAAEAABLVBwoQMWfznt0qwauEzl7T
+      UOQxRBII9q+pUS5EdLAqDENyZXcgQ3JlYXRlZDABORONPORvKBUYQSAoS+RvKBUYShoKDmNyZXdh
+      aV92ZXJzaW9uEggKBjAuODYuMEoaCg5weXRob25fdmVyc2lvbhIICgYzLjEyLjdKLgoIY3Jld19r
+      ZXkSIgogYzMwNzYwMDkzMjY3NjE0NDRkNTdjNzFkMWRhM2YyN2NKMQoHY3Jld19pZBImCiQ3OTQw
+      MTkyNS1iOGU5LTQ3MDgtODUzMC00NDhhZmEzYmY4YjBKHAoMY3Jld19wcm9jZXNzEgwKCnNlcXVl
+      bnRpYWxKEQoLY3Jld19tZW1vcnkSAhAAShoKFGNyZXdfbnVtYmVyX29mX3Rhc2tzEgIYAUobChVj
+      cmV3X251bWJlcl9vZl9hZ2VudHMSAhgBSuoCCgtjcmV3X2FnZW50cxLaAgrXAlt7ImtleSI6ICI5
+      OGYzYjFkNDdjZTk2OWNmMDU3NzI3Yjc4NDE0MjVjZCIsICJpZCI6ICI5OTJkZjYyZi1kY2FiLTQy
+      OTUtOTIwNi05MDBkNDExNGIxZTkiLCAicm9sZSI6ICJGcmllbmRseSBOZWlnaGJvciIsICJ2ZXJi
+      b3NlPyI6IGZhbHNlLCAibWF4X2l0ZXIiOiAyMCwgIm1heF9ycG0iOiBudWxsLCAiZnVuY3Rpb25f
+      Y2FsbGluZ19sbG0iOiAiIiwgImxsbSI6ICJncHQtNG8tbWluaSIsICJkZWxlZ2F0aW9uX2VuYWJs
+      ZWQ/IjogZmFsc2UsICJhbGxvd19jb2RlX2V4ZWN1dGlvbj8iOiBmYWxzZSwgIm1heF9yZXRyeV9s
+      aW1pdCI6IDIsICJ0b29sc19uYW1lcyI6IFsiZGVjaWRlIGdyZWV0aW5ncyJdfV1KmAIKCmNyZXdf
+      dGFza3MSiQIKhgJbeyJrZXkiOiAiODBkN2JjZDQ5MDk5MjkwMDgzODMyZjBlOTgzMzgwZGYiLCAi
+      aWQiOiAiMmZmNjE5N2UtYmEyNy00YjczLWI0YTctNGZhMDQ4ZTYyYjQ3IiwgImFzeW5jX2V4ZWN1
+      dGlvbj8iOiBmYWxzZSwgImh1bWFuX2lucHV0PyI6IGZhbHNlLCAiYWdlbnRfcm9sZSI6ICJGcmll
+      bmRseSBOZWlnaGJvciIsICJhZ2VudF9rZXkiOiAiOThmM2IxZDQ3Y2U5NjljZjA1NzcyN2I3ODQx
+      NDI1Y2QiLCAidG9vbHNfbmFtZXMiOiBbImRlY2lkZSBncmVldGluZ3MiXX1degIYAYUBAAEAABKO
+      AgoQnjTp5boK7/+DQxztYIpqihIIgGnMUkBtzHEqDFRhc2sgQ3JlYXRlZDABOcpYcuRvKBUYQalE
+      c+RvKBUYSi4KCGNyZXdfa2V5EiIKIGMzMDc2MDA5MzI2NzYxNDQ0ZDU3YzcxZDFkYTNmMjdjSjEK
+      B2NyZXdfaWQSJgokNzk0MDE5MjUtYjhlOS00NzA4LTg1MzAtNDQ4YWZhM2JmOGIwSi4KCHRhc2tf
+      a2V5EiIKIDgwZDdiY2Q0OTA5OTI5MDA4MzgzMmYwZTk4MzM4MGRmSjEKB3Rhc2tfaWQSJgokMmZm
+      NjE5N2UtYmEyNy00YjczLWI0YTctNGZhMDQ4ZTYyYjQ3egIYAYUBAAEAABKTAQoQ26H9pLUgswDN
+      p9XhJwwL6BIIx3bw7mAvPYwqClRvb2wgVXNhZ2UwATmy7NPlbygVGEEvb+HlbygVGEoaCg5jcmV3
+      YWlfdmVyc2lvbhIICgYwLjg2LjBKHwoJdG9vbF9uYW1lEhIKEERlY2lkZSBHcmVldGluZ3NKDgoI
+      YXR0ZW1wdHMSAhgBegIYAYUBAAEAAA==
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '2986'
+      Content-Type:
+      - application/x-protobuf
+      User-Agent:
+      - OTel-OTLP-Exporter-Python/1.27.0
+    method: POST
+    uri: https://telemetry.crewai.com:4319/v1/traces
+  response:
+    body:
+      string: "\n\0"
+    headers:
+      Content-Length:
+      - '2'
+      Content-Type:
+      - application/x-protobuf
+      Date:
+      - Fri, 27 Dec 2024 22:14:53 GMT
+    status:
+      code: 200
+      message: OK
 - request:
    body: '{"messages": [{"role": "system", "content": "You are test role. test backstory\nYour
      personal goal is: test goal\nTo give my best complete final answer to the task
@@ -22,18 +105,20 @@ interactions:
      - '824'
      content-type:
      - application/json
+      cookie:
+      - _cfuvid=ePJSDFdHag2D8lj21_ijAMWjoA6xfnPNxN4uekvC728-1727226247743-0.0.1.1-604800000
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.52.1
      x-stainless-arch:
-      - arm64
+      - x64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
-      - MacOS
+      - Linux
      x-stainless-package-version:
      - 1.52.1
      x-stainless-raw-response:
@@ -47,8 +132,8 @@ interactions:
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
-    content: "{\n  \"id\": \"chatcmpl-AaqIIsTxhvf75xvuu7gQScIlRSKbW\",\n  \"object\":
-      \"chat.completion\",\n  \"created\": 1733344190,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+    content: "{\n  \"id\": \"chatcmpl-AjCtZLLrWi8ZASpP9bz6HaCV7xBIn\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1735337693,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
      \"assistant\",\n        \"content\": \"I now can give a great answer  \\nFinal
      Answer: Hi\",\n        \"refusal\": null\n      },\n      \"logprobs\": null,\n
@@ -57,12 +142,12 @@ interactions:
      {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
      {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"system_fingerprint\":
-      \"fp_0705bf87c0\"\n}\n"
+      \"fp_0aa8d3e20b\"\n}\n"
    headers:
      CF-Cache-Status:
      - DYNAMIC
      CF-RAY:
-      - 8ece8cfc3b1f4532-ATL
+      - 8f8caa83deca756b-SEA
      Connection:
      - keep-alive
      Content-Encoding:
@@ -70,14 +155,14 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Wed, 04 Dec 2024 20:29:50 GMT
+      - Fri, 27 Dec 2024 22:14:53 GMT
      Server:
      - cloudflare
      Set-Cookie:
-      - __cf_bm=QJZZjZ6eqnVamqUkw.Bx0mj7oBi3a_vGEH1VODcUxlg-1733344190-1.0.1.1-xyN0ekA9xIrSwEhRBmTiWJ3Pt72UYLU5owKfkz5yihVmMTfsr_Qz.ssGPJ5cuft066v1xVjb4zOSTdFmesMSKg;
-        path=/; expires=Wed, 04-Dec-24 20:59:50 GMT; domain=.api.openai.com; HttpOnly;
+      - __cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw;
+        path=/; expires=Fri, 27-Dec-24 22:44:53 GMT; domain=.api.openai.com; HttpOnly;
        Secure; SameSite=None
-      - _cfuvid=eCIkP8GVPvpkg19eOhCquWFHm.RTQBQy4yHLGGEAH5c-1733344190334-0.0.1.1-604800000;
+      - _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
      Transfer-Encoding:
      - chunked
@@ -90,7 +175,7 @@ interactions:
      openai-organization:
      - crewai-iuxna1
      openai-processing-ms:
-      - '313'
+      - '404'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
@@ -108,7 +193,7 @@ interactions:
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
-      - req_9fd9a8ee688045dcf7ac5f6fdf689372
+      - req_6ac84634bff9193743c4b0911c09b4a6
    http_version: HTTP/1.1
    status_code: 200
 - request:
@@ -131,20 +216,20 @@ interactions:
      content-type:
      - application/json
      cookie:
-      - __cf_bm=QJZZjZ6eqnVamqUkw.Bx0mj7oBi3a_vGEH1VODcUxlg-1733344190-1.0.1.1-xyN0ekA9xIrSwEhRBmTiWJ3Pt72UYLU5owKfkz5yihVmMTfsr_Qz.ssGPJ5cuft066v1xVjb4zOSTdFmesMSKg;
-        _cfuvid=eCIkP8GVPvpkg19eOhCquWFHm.RTQBQy4yHLGGEAH5c-1733344190334-0.0.1.1-604800000
+      - _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
+        __cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.52.1
      x-stainless-arch:
-      - arm64
+      - x64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
-      - MacOS
+      - Linux
      x-stainless-package-version:
      - 1.52.1
      x-stainless-raw-response:
@@ -158,8 +243,8 @@ interactions:
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
-    content: "{\n  \"id\": \"chatcmpl-AaqIIaQlLyoyPmk909PvAIfA2TmJL\",\n  \"object\":
-      \"chat.completion\",\n  \"created\": 1733344190,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+    content: "{\n  \"id\": \"chatcmpl-AjCtZNlWdrrPZhq0MJDqd16sMuQEJ\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1735337693,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
      \"assistant\",\n        \"content\": \"True\",\n        \"refusal\": null\n
      \     },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n    }\n
@@ -168,12 +253,12 @@ interactions:
      0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n
      \     \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"system_fingerprint\":
-      \"fp_0705bf87c0\"\n}\n"
+      \"fp_0aa8d3e20b\"\n}\n"
    headers:
      CF-Cache-Status:
      - DYNAMIC
      CF-RAY:
-      - 8ece8d060b5e4532-ATL
+      - 8f8caa87094f756b-SEA
      Connection:
      - keep-alive
      Content-Encoding:
@@ -181,7 +266,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Wed, 04 Dec 2024 20:29:50 GMT
+      - Fri, 27 Dec 2024 22:14:53 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
@@ -195,7 +280,7 @@ interactions:
      openai-organization:
      - crewai-iuxna1
      openai-processing-ms:
-      - '375'
+      - '156'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
@@ -213,7 +298,7 @@ interactions:
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
-      - req_be7cb475e0859a82c37ee3f2871ea5ea
+      - req_ec74bef2a9ef7b2144c03fd7f7bbeab0
    http_version: HTTP/1.1
    status_code: 200
 - request:
@@ -242,20 +327,20 @@ interactions:
      content-type:
      - application/json
      cookie:
-      - __cf_bm=QJZZjZ6eqnVamqUkw.Bx0mj7oBi3a_vGEH1VODcUxlg-1733344190-1.0.1.1-xyN0ekA9xIrSwEhRBmTiWJ3Pt72UYLU5owKfkz5yihVmMTfsr_Qz.ssGPJ5cuft066v1xVjb4zOSTdFmesMSKg;
-        _cfuvid=eCIkP8GVPvpkg19eOhCquWFHm.RTQBQy4yHLGGEAH5c-1733344190334-0.0.1.1-604800000
+      - _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
+        __cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.52.1
      x-stainless-arch:
-      - arm64
+      - x64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
-      - MacOS
+      - Linux
      x-stainless-package-version:
      - 1.52.1
      x-stainless-raw-response:
@@ -269,22 +354,23 @@ interactions:
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
-    content: "{\n  \"id\": \"chatcmpl-AaqIJAAxpVfUOdrsgYKHwfRlHv4RS\",\n  \"object\":
-      \"chat.completion\",\n  \"created\": 1733344191,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+    content: "{\n  \"id\": \"chatcmpl-AjCtZGv4f3h7GDdhyOy9G0sB1lRgC\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1735337693,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-      \"assistant\",\n        \"content\": \"Thought: I now can give a great answer
-      \ \\nFinal Answer: Hello\",\n        \"refusal\": null\n      },\n      \"logprobs\":
-      null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-      188,\n    \"completion_tokens\": 14,\n    \"total_tokens\": 202,\n    \"prompt_tokens_details\":
-      {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-      {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+      \"assistant\",\n        \"content\": \"Thought: I understand the feedback and
+      will adjust my response accordingly.  \\nFinal Answer: Hello\",\n        \"refusal\":
+      null\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
+      \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 188,\n    \"completion_tokens\":
+      18,\n    \"total_tokens\": 206,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+      0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n
+      \     \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"system_fingerprint\":
-      \"fp_0705bf87c0\"\n}\n"
+      \"fp_0aa8d3e20b\"\n}\n"
    headers:
      CF-Cache-Status:
      - DYNAMIC
      CF-RAY:
-      - 8ece8d090fc34532-ATL
+      - 8f8caa88cac4756b-SEA
      Connection:
      - keep-alive
      Content-Encoding:
@@ -292,7 +378,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Wed, 04 Dec 2024 20:29:51 GMT
+      - Fri, 27 Dec 2024 22:14:54 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
@@ -306,7 +392,7 @@ interactions:
      openai-organization:
      - crewai-iuxna1
      openai-processing-ms:
-      - '484'
+      - '358'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
@@ -324,7 +410,7 @@ interactions:
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
-      - req_5bf4a565ad6c2567a1ed204ecac89134
+      - req_ae1ab6b206d28ded6fee3c83ed0c2ab7
    http_version: HTTP/1.1
    status_code: 200
 - request:
@@ -346,20 +432,20 @@ interactions:
      content-type:
      - application/json
      cookie:
-      - __cf_bm=QJZZjZ6eqnVamqUkw.Bx0mj7oBi3a_vGEH1VODcUxlg-1733344190-1.0.1.1-xyN0ekA9xIrSwEhRBmTiWJ3Pt72UYLU5owKfkz5yihVmMTfsr_Qz.ssGPJ5cuft066v1xVjb4zOSTdFmesMSKg;
-        _cfuvid=eCIkP8GVPvpkg19eOhCquWFHm.RTQBQy4yHLGGEAH5c-1733344190334-0.0.1.1-604800000
+      - _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
+        __cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.52.1
      x-stainless-arch:
-      - arm64
+      - x64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
-      - MacOS
+      - Linux
      x-stainless-package-version:
      - 1.52.1
      x-stainless-raw-response:
@@ -373,8 +459,8 @@ interactions:
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
-    content: "{\n  \"id\": \"chatcmpl-AaqIJqyG8vl9mxj2qDPZgaxyNLLIq\",\n  \"object\":
-      \"chat.completion\",\n  \"created\": 1733344191,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+    content: "{\n  \"id\": \"chatcmpl-AjCtaiHL4TY8Dssk0j2miqmjrzquy\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1735337694,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
      \"assistant\",\n        \"content\": \"False\",\n        \"refusal\": null\n
      \     },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n    }\n
@@ -383,12 +469,12 @@ interactions:
      0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n
      \     \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"system_fingerprint\":
-      \"fp_0705bf87c0\"\n}\n"
+      \"fp_0aa8d3e20b\"\n}\n"
    headers:
      CF-Cache-Status:
      - DYNAMIC
      CF-RAY:
-      - 8ece8d0cfdeb4532-ATL
+      - 8f8caa8bdd26756b-SEA
      Connection:
      - keep-alive
      Content-Encoding:
@@ -396,7 +482,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Wed, 04 Dec 2024 20:29:51 GMT
+      - Fri, 27 Dec 2024 22:14:54 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
@@ -410,7 +496,7 @@ interactions:
      openai-organization:
      - crewai-iuxna1
      openai-processing-ms:
-      - '341'
+      - '184'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
@@ -428,7 +514,7 @@ interactions:
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
-      - req_5554bade8ceda00cf364b76a51b708ff
+      - req_652891f79c1104a7a8436275d78a69f1
    http_version: HTTP/1.1
    status_code: 200
 version: 1
--- a/tests/crew_test.py
+++ b/tests/crew_test.py
@@ -13,8 +13,10 @@ import pytest
 from crewai.agent import Agent
 from crewai.agents.cache import CacheHandler
 from crewai.crew import Crew
-from crewai.llm import LLM
 from crewai.crews.crew_output import CrewOutput
+from collections import defaultdict
+from crewai.llm import LLM
+from crewai.utilities.evaluators.crew_evaluator_handler import CrewEvaluator
 from crewai.memory.contextual.contextual_memory import ContextualMemory
 from crewai.process import Process
 from crewai.task import Task
@@ -2813,7 +2815,60 @@ def test_conditional_should_execute():
@mock.patch("crewai.crew.CrewEvaluator")
@mock.patch("crewai.crew.Crew.copy")
@mock.patch("crewai.crew.Crew.kickoff")
-def test_crew_testing_with_custom_llm(kickoff_mock, copy_mock, crew_evaluator):
+def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator):
+    task = Task(
+        description="Test task description",
+        expected_output="Test output",
+        agent=researcher,
+    )
+
+    crew = Crew(
+        agents=[researcher],
+        tasks=[task],
+    )
+
+    # Create a mock for the copied crew
+    copy_mock.return_value = crew
+
+    n_iterations = 2
+    crew.test(n_iterations, openai_model_name="gpt-4o-mini", inputs={"topic": "AI"})
+
+    # Ensure kickoff is called on the copied crew
+    kickoff_mock.assert_has_calls(
+        [mock.call(inputs={"topic": "AI"}), mock.call(inputs={"topic": "AI"})]
+    )
+
+    crew_evaluator.assert_has_calls(
+        [
+            mock.call(crew, mock.ANY),
+            mock.call().set_iteration(1),
+            mock.call().set_iteration(2),
+            mock.call().print_crew_evaluation_result(),
+        ]
+    )
+
+
+@mock.patch("crewai.crew.CrewEvaluator")
+@mock.patch("crewai.crew.Crew.copy")
+@mock.patch("crewai.crew.Crew.kickoff")
+def test_crew_testing_with_invalid_llm(kickoff_mock, copy_mock, crew_evaluator_mock):
+    """Test that Crew.test() properly validates LLM input."""
+    task = Task(
+        description="Test task",
+        expected_output="Test output",
+        agent=researcher,
+    )
+    crew = Crew(agents=[researcher], tasks=[task])
+    
+    with pytest.raises(ValueError, match="llm parameter must be either"):
+        crew.test(2, llm=123)  # Invalid type
+
+
+@mock.patch("crewai.crew.CrewEvaluator")
+@mock.patch("crewai.crew.Crew.copy")
+@mock.patch("crewai.crew.Crew.kickoff")
+def test_crew_testing_with_custom_llm(kickoff_mock, copy_mock, crew_evaluator_mock):
+    """Test that Crew.test() works with both string and LLM instance parameters."""
    task = Task(
        description="Test task",
        expected_output="Test output",
@@ -2828,106 +2883,36 @@ def test_crew_testing_with_custom_llm(kickoff_mock, copy_mock, crew_evaluator):
    # Create a mock for the copied crew
    copy_mock.return_value = crew

+    # Create a mock evaluator
+    mock_evaluator = mock.MagicMock()
+    mock_evaluator.print_crew_evaluation_result = mock.MagicMock()
+    mock_evaluator.set_iteration = mock.MagicMock()
+
+    # Mock the CrewEvaluator class
+    crew_evaluator_mock.return_value = mock_evaluator
+
+    # Test with string model name
+    crew.test(2, llm="gpt-4o-mini")
+    crew_evaluator_mock.assert_called_with(crew, "gpt-4o-mini")
+    mock_evaluator.set_iteration.assert_has_calls([mock.call(1), mock.call(2)])
+    mock_evaluator.print_crew_evaluation_result.assert_called_once()
+    crew_evaluator_mock.reset_mock()
+    mock_evaluator.reset_mock()
+
+    # Test with LLM instance
    custom_llm = LLM(model="gpt-4o-mini")
-    n_iterations = 2
-    crew.test(n_iterations, llm=custom_llm)
+    crew.test(2, llm=custom_llm)
+    crew_evaluator_mock.assert_called_with(crew, custom_llm)
+    mock_evaluator.set_iteration.assert_has_calls([mock.call(1), mock.call(2)])
+    mock_evaluator.print_crew_evaluation_result.assert_called_once()
+    crew_evaluator_mock.reset_mock()
+    mock_evaluator.reset_mock()

-    # Ensure kickoff is called on the copied crew
-    kickoff_mock.assert_has_calls([mock.call(inputs=None), mock.call(inputs=None)])
-
-    # Verify CrewEvaluator was called with custom LLM
-    crew_evaluator.assert_has_calls([
-        mock.call(crew, custom_llm),
-        mock.call().set_iteration(1),
-        mock.call().set_iteration(2),
-        mock.call().print_crew_evaluation_result(),
-    ])
-
-@mock.patch("crewai.crew.CrewEvaluator")
-@mock.patch("crewai.crew.Crew.copy")
-@mock.patch("crewai.crew.Crew.kickoff")
-def test_crew_testing_backward_compatibility(kickoff_mock, copy_mock, crew_evaluator):
-    task = Task(
-        description="Test task",
-        expected_output="Test output",
-        agent=researcher,
-    )
-
-    crew = Crew(
-        agents=[researcher],
-        tasks=[task],
-    )
-
-    # Create a mock for the copied crew
-    copy_mock.return_value = crew
-
-    n_iterations = 2
-    with pytest.warns(DeprecationWarning, match="openai_model_name is deprecated"):
-        crew.test(n_iterations, openai_model_name="gpt-4o-mini", inputs={"topic": "AI"})
-
-    # Ensure kickoff is called on the copied crew
-    kickoff_mock.assert_has_calls([
-        mock.call(inputs={"topic": "AI"}),
-        mock.call(inputs={"topic": "AI"})
-    ])
-
-    # Verify CrewEvaluator was called with string model name
-    crew_evaluator.assert_has_calls([
-        mock.call(crew, mock.ANY),
-        mock.call().set_iteration(1),
-        mock.call().set_iteration(2),
-        mock.call().print_crew_evaluation_result(),
-    ])
-
-@mock.patch("crewai.crew.CrewEvaluator")
-@mock.patch("crewai.crew.Crew.copy")
-@mock.patch("crewai.crew.Crew.kickoff")
-def test_crew_testing_missing_llm(kickoff_mock, copy_mock, crew_evaluator):
-    task = Task(
-        description="Test task",
-        expected_output="Test output",
-        agent=researcher,
-    )
-
-    crew = Crew(
-        agents=[researcher],
-        tasks=[task],
-    )
-
-    # Create a mock for the copied crew
-    copy_mock.return_value = crew
-
-    n_iterations = 2
-    with pytest.raises(ValueError, match="Either llm or openai_model_name must be provided"):
-        crew.test(n_iterations)
-
-@mock.patch("crewai.crew.CrewEvaluator")
-@mock.patch("crewai.crew.Crew.copy")
-@mock.patch("crewai.crew.Crew.kickoff")
-def test_crew_testing_with_invalid_llm(kickoff_mock, copy_mock, crew_evaluator):
-    task = Task(
-        description="Test task",
-        expected_output="Test output",
-        agent=researcher,
-    )
-
-    crew = Crew(
-        agents=[researcher],
-        tasks=[task],
-    )
-
-    # Create a mock for the copied crew
-    copy_mock.return_value = crew
-
-    # Test invalid LLM type
-    with pytest.raises(ValueError, match="Failed to initialize LLM"):
-        crew.test(n_iterations=2, llm={})
-
-    # Test LLM without model attribute
-    class InvalidLLM:
-        def __init__(self): pass
-    with pytest.raises(ValueError, match="LLM must be either a string model name or an LLM instance"):
-        crew.test(n_iterations=2, llm=InvalidLLM())
+    # Test backward compatibility
+    crew.test(2, openai_model_name="gpt-4o-mini")
+    crew_evaluator_mock.assert_called_with(crew, "gpt-4o-mini")
+    mock_evaluator.set_iteration.assert_has_calls([mock.call(1), mock.call(2)])
+    mock_evaluator.print_crew_evaluation_result.assert_called_once()


@pytest.mark.vcr(filter_headers=["authorization"])
--- a/uv.lock
+++ b/uv.lock
@@ -1,10 +1,18 @@
 version = 1
 requires-python = ">=3.10, <3.13"
 resolution-markers = [
-    "python_full_version < '3.11'",
-    "python_full_version == '3.11.*'",
-    "python_full_version >= '3.12' and python_full_version < '3.12.4'",
-    "python_full_version >= '3.12.4'",
+    "python_full_version < '3.11' and sys_platform == 'darwin'",
+    "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')",
+    "python_full_version == '3.11.*' and sys_platform == 'darwin'",
+    "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
+    "python_full_version >= '3.12' and python_full_version < '3.12.4' and sys_platform == 'darwin'",
+    "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "(python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.12.4' and sys_platform != 'darwin' and sys_platform != 'linux')",
+    "python_full_version >= '3.12.4' and sys_platform == 'darwin'",
+    "python_full_version >= '3.12.4' and platform_machine == 'aarch64' and sys_platform == 'linux'",
+    "(python_full_version >= '3.12.4' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12.4' and sys_platform != 'darwin' and sys_platform != 'linux')",
 ]

 [[package]]
@@ -300,7 +308,7 @@ name = "build"
 version = "1.2.2.post1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "os_name == 'nt'" },
+    { name = "colorama", marker = "(os_name == 'nt' and platform_machine != 'aarch64' and sys_platform == 'linux') or (os_name == 'nt' and sys_platform != 'darwin' and sys_platform != 'linux')" },
    { name = "importlib-metadata", marker = "python_full_version < '3.10.2'" },
    { name = "packaging" },
    { name = "pyproject-hooks" },
@@ -535,7 +543,7 @@ name = "click"
 version = "8.1.7"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "platform_system == 'Windows'" },
+    { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/96/d3/f04c7bfcf5c1862a2a5b845c6b2b360488cf47af55dfa79c98f6a6bf98b5/click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de", size = 336121 }
 wheels = [
@@ -642,7 +650,6 @@ tools = [
 [package.dev-dependencies]
 dev = [
    { name = "cairosvg" },
-    { name = "crewai-tools" },
    { name = "mkdocs" },
    { name = "mkdocs-material" },
    { name = "mkdocs-material-extensions" },
@@ -696,7 +703,6 @@ requires-dist = [
 [package.metadata.requires-dev]
 dev = [
    { name = "cairosvg", specifier = ">=2.7.1" },
-    { name = "crewai-tools", specifier = ">=0.17.0" },
    { name = "mkdocs", specifier = ">=1.4.3" },
    { name = "mkdocs-material", specifier = ">=9.5.7" },
    { name = "mkdocs-material-extensions", specifier = ">=1.3.1" },
@@ -2462,7 +2468,7 @@ version = "1.6.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "click" },
-    { name = "colorama", marker = "platform_system == 'Windows'" },
+    { name = "colorama", marker = "sys_platform == 'win32'" },
    { name = "ghp-import" },
    { name = "jinja2" },
    { name = "markdown" },
@@ -2643,7 +2649,7 @@ version = "2.10.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "pygments" },
-    { name = "pywin32", marker = "platform_system == 'Windows'" },
+    { name = "pywin32", marker = "sys_platform == 'win32'" },
    { name = "tqdm" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/3a/93/80ac75c20ce54c785648b4ed363c88f148bf22637e10c9863db4fbe73e74/mpire-2.10.2.tar.gz", hash = "sha256:f66a321e93fadff34585a4bfa05e95bd946cf714b442f51c529038eb45773d97", size = 271270 }
@@ -2890,7 +2896,7 @@ name = "nvidia-cudnn-cu12"
 version = "9.1.0.70"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" },
+    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/9f/fd/713452cd72343f682b1c7b9321e23829f00b842ceaedcda96e742ea0b0b3/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f", size = 664752741 },
@@ -2917,9 +2923,9 @@ name = "nvidia-cusolver-cu12"
 version = "11.4.5.107"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" },
-    { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" },
-    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" },
+    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/bc/1d/8de1e5c67099015c834315e333911273a8c6aaba78923dd1d1e25fc5f217/nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd", size = 124161928 },
@@ -2930,7 +2936,7 @@ name = "nvidia-cusparse-cu12"
 version = "12.1.0.106"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/65/5b/cfaeebf25cd9fdec14338ccb16f6b2c4c7fa9163aefcf057d86b9cc248bb/nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c", size = 195958278 },
@@ -3480,7 +3486,7 @@ name = "portalocker"
 version = "2.10.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pywin32", marker = "platform_system == 'Windows'" },
+    { name = "pywin32", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ed/d3/c6c64067759e87af98cc668c1cc75171347d0f1577fab7ca3749134e3cd4/portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f", size = 40891 }
 wheels = [
@@ -5022,19 +5028,19 @@ dependencies = [
    { name = "fsspec" },
    { name = "jinja2" },
    { name = "networkx" },
-    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
-    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
-    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
-    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
-    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
-    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
-    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
-    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
-    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
-    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
-    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "sympy" },
-    { name = "triton", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
    { name = "typing-extensions" },
 ]
 wheels = [
@@ -5081,7 +5087,7 @@ name = "tqdm"
 version = "4.66.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "platform_system == 'Windows'" },
+    { name = "colorama", marker = "sys_platform == 'win32'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/58/83/6ba9844a41128c62e810fddddd72473201f3eacde02046066142a2d96cc5/tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad", size = 169504 }
 wheels = [
@@ -5124,7 +5130,7 @@ version = "0.27.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "attrs" },
-    { name = "cffi", marker = "implementation_name != 'pypy' and os_name == 'nt'" },
+    { name = "cffi", marker = "(implementation_name != 'pypy' and os_name == 'nt' and platform_machine != 'aarch64' and sys_platform == 'linux') or (implementation_name != 'pypy' and os_name == 'nt' and sys_platform != 'darwin' and sys_platform != 'linux')" },
    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
    { name = "idna" },
    { name = "outcome" },
@@ -5155,7 +5161,7 @@ name = "triton"
 version = "3.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "filelock", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" },
+    { name = "filelock", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/45/27/14cc3101409b9b4b9241d2ba7deaa93535a217a211c86c4cc7151fb12181/triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1efef76935b2febc365bfadf74bcb65a6f959a9872e5bddf44cc9e0adce1e1a", size = 209376304 },