Address PR feedback: Add error handling and edge case tests

Co-Authored-By: Joe Moura <joao@crewai.com>
Fix prompt formatting biases affecting JSON output
2026-01-08 15:48:29 +00:00 · 2025-05-13 17:57:49 +00:00 · 2025-05-13 17:52:58 +00:00
11 changed files with 289 additions and 309 deletions
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -16,8 +16,6 @@ from pydantic import (
    field_validator,
    model_validator,
 )
-
-from crewai.llm import LLM
 from pydantic_core import PydanticCustomError

 from crewai.agent import Agent
@@ -1077,41 +1075,19 @@ class Crew(BaseModel):
    def test(
        self,
        n_iterations: int,
-        llm: Optional[Union[str, InstanceOf[LLM], Any]] = None,
-        openai_model_name: Optional[str] = None,  # For backward compatibility
+        openai_model_name: Optional[str] = None,
        inputs: Optional[Dict[str, Any]] = None,
    ) -> None:
-        """Test and evaluate the Crew with the given inputs for n iterations.
-
-        This method runs tests to evaluate the performance of the crew using the specified
-        language model. It supports both string model names and LLM instances for flexibility.
-
-        Args:
-            n_iterations: Number of test iterations to run
-            llm: Language model configuration (preferred). Can be:
-                - A string model name (e.g., "gpt-4")
-                - An LLM instance
-                - Any object with model_name or deployment_name attributes
-            openai_model_name: Legacy parameter for backward compatibility.
-                Deprecated: Will be removed in future versions. Use `llm` instead.
-            inputs: Optional dictionary of inputs to be used during testing
-
-        Note:
-            The `openai_model_name` parameter is deprecated and will be removed in
-            future versions. Use the more flexible `llm` parameter instead, which
-            supports any LLM implementation.
-        """
+        """Test and evaluate the Crew with the given inputs for n iterations concurrently using concurrent.futures."""
        test_crew = self.copy()

-        # For backward compatibility, convert openai_model_name to llm
-        model_name = llm or openai_model_name or "gpt-4o-mini"
        self._test_execution_span = test_crew._telemetry.test_execution_span(
            test_crew,
            n_iterations,
            inputs,
-            model_name,
-        )
-        evaluator = CrewEvaluator(test_crew, llm=model_name)
+            openai_model_name,  # type: ignore[arg-type]
+        )  # type: ignore[arg-type]
+        evaluator = CrewEvaluator(test_crew, openai_model_name)  # type: ignore[arg-type]

        for i in range(1, n_iterations + 1):
            evaluator.set_iteration(i)
--- a/src/crewai/tools/base_tool.py
+++ b/src/crewai/tools/base_tool.py
@@ -135,15 +135,25 @@ class BaseTool(BaseModel, ABC):
            )

    def _generate_description(self):
-        args_schema = {
-            name: {
-                "description": field.description,
-                "type": BaseTool._get_arg_annotations(field.annotation),
-            }
-            for name, field in self.args_schema.model_fields.items()
-        }
+        import json
+        import logging

-        self.description = f"Tool Name: {self.name}\nTool Arguments: {args_schema}\nTool Description: {self.description}"
+        logger = logging.getLogger(__name__)
+        
+        try:
+            args_schema = {
+                name: {
+                    "description": field.description,
+                    "type": BaseTool._get_arg_annotations(field.annotation),
+                }
+                for name, field in self.args_schema.model_fields.items()
+            }
+            args_json = json.dumps(args_schema)
+        except Exception as e:
+            logger.warning(f"Failed to serialize args schema: {e}")
+            args_json = str(args_schema)
+            
+        self.description = f"Tool Name: {self.name}\nTool Arguments: {args_json}\nTool Description: {self.description}"

    @staticmethod
    def _get_arg_annotations(annotation: type[Any] | None) -> str:
--- a/src/crewai/translations/en.json
+++ b/src/crewai/translations/en.json
@@ -9,7 +9,7 @@
    "task": "\nCurrent Task: {input}\n\nBegin! This is VERY important to you, use the tools available and give your best Final Answer, your job depends on it!\n\nThought:",
    "memory": "\n\n# Useful context: \n{memory}",
    "role_playing": "You are {role}. {backstory}\nYour personal goal is: {goal}",
-    "tools": "\nYou ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n\n{tools}\n\nUse the following format:\n\nThought: you should always think about what to do\nAction: the action to take, only one name of [{tool_names}], just the name, exactly as it's written.\nAction Input: the input to the action, just a simple python dictionary, enclosed in curly braces, using \" to wrap keys and values.\nObservation: the result of the action\n\nOnce all necessary information is gathered:\n\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n",
+    "tools": "\nYou ONLY have access to the following tools, and should NEVER make up tools that are not listed here:\n\n{tools}\n\nUse the following format:\n\nThought: you should always think about what to do\nAction: the action to take, only one name of [{tool_names}], just the name, exactly as it's written.\nAction Input: the input to the action, just a simple python dictionary, enclosed in curly braces, using double quotes (\") to wrap keys and values.\nObservation: the result of the action\n\nOnce all necessary information is gathered:\n\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n",
    "no_tools": "\nTo give my best complete final answer to the task use the exact following format:\n\nThought: I now can give a great answer\nFinal Answer: Your final answer must be the great and the most complete as possible, it must be outcome described.\n\nI MUST use these formats, my job depends on it!",
    "format": "I MUST either use a tool (use one at time) OR give my best final answer not both at the same time. To Use the following format:\n\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action, dictionary enclosed in curly braces\nObservation: the result of the action\n... (this Thought/Action/Action Input/Result can repeat N times)\nThought: I now can give a great answer\nFinal Answer: Your final answer must be the great and the most complete as possible, it must be outcome described\n\n",
    "final_answer_format": "If you don't need to use any more tools, you must give your best complete final answer, make sure it satisfies the expected criteria, use the EXACT format below:\n\nThought: I now can give a great answer\nFinal Answer: my best complete final answer to the task.\n\n",
--- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py
+++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
@@ -1,19 +1,11 @@
 from collections import defaultdict
-from typing import Any, Dict, List, Union

-from pydantic import (
-    BaseModel,
-    Field,
-    InstanceOf,
-    PrivateAttr,
-    model_validator,
-)
+from pydantic import BaseModel, Field
 from rich.box import HEAVY_EDGE
 from rich.console import Console
 from rich.table import Table

 from crewai.agent import Agent
-from crewai.llm import LLM
 from crewai.task import Task
 from crewai.tasks.task_output import TaskOutput
 from crewai.telemetry import Telemetry
@@ -25,74 +17,27 @@ class TaskEvaluationPydanticOutput(BaseModel):
    )


-class CrewEvaluator(BaseModel):
+class CrewEvaluator:
    """
    A class to evaluate the performance of the agents in the crew based on the tasks they have performed.

    Attributes:
        crew (Crew): The crew of agents to evaluate.
-        llm (Union[str, InstanceOf[LLM], Any]): The language model to use for evaluating the performance of the agents.
+        openai_model_name (str): The model to use for evaluating the performance of the agents (for now ONLY OpenAI accepted).
        tasks_scores (defaultdict): A dictionary to store the scores of the agents for each task.
        iteration (int): The current iteration of the evaluation.
    """

-    crew: Any = Field(description="The crew of agents to evaluate.")
-    llm: Union[str, InstanceOf[LLM], Any] = Field(
-        description="Language model that will run the evaluation."
-    )
-    tasks_scores: Dict[int, List[float]] = Field(
-        default_factory=lambda: defaultdict(list),
-        description="Dictionary to store the scores of the agents for each task."
-    )
-    run_execution_times: Dict[int, List[int]] = Field(
-        default_factory=lambda: defaultdict(list),
-        description="Dictionary to store execution times for each run."
-    )
-    iteration: int = Field(
-        default=0,
-        description="Current iteration of the evaluation."
-    )
+    tasks_scores: defaultdict = defaultdict(list)
+    run_execution_times: defaultdict = defaultdict(list)
+    iteration: int = 0

-    @model_validator(mode="after")
-    def validate_llm(self):
-        """Validates that the LLM is properly configured."""
-        if not self.llm:
-            raise ValueError("LLM configuration is required")
-        return self
-
-    _telemetry: Telemetry = PrivateAttr(default_factory=Telemetry)
-
-    def __init__(self, crew, llm: Union[str, InstanceOf[LLM], Any]):
-        # Initialize Pydantic model with validated fields
-        super().__init__(crew=crew, llm=llm)
+    def __init__(self, crew, openai_model_name: str):
+        self.crew = crew
+        self.openai_model_name = openai_model_name
+        self._telemetry = Telemetry()
        self._setup_for_evaluating()

-    @model_validator(mode="before")
-    def init_llm(cls, values):
-        """Initialize LLM before Pydantic validation."""
-        llm = values.get("llm")
-        try:
-            if isinstance(llm, str):
-                values["llm"] = LLM(model=llm)
-            elif isinstance(llm, LLM):
-                values["llm"] = llm
-            else:
-                # For any other type, attempt to extract relevant attributes
-                llm_params = {
-                    "model": getattr(llm, "model_name", None)
-                    or getattr(llm, "deployment_name", None)
-                    or str(llm),
-                    "temperature": getattr(llm, "temperature", None),
-                    "max_tokens": getattr(llm, "max_tokens", None),
-                    "timeout": getattr(llm, "timeout", None),
-                }
-                # Remove None values
-                llm_params = {k: v for k, v in llm_params.items() if v is not None}
-                values["llm"] = LLM(**llm_params)
-        except Exception as e:
-            raise ValueError(f"Invalid LLM configuration: {str(e)}") from e
-        return values
-
    def _setup_for_evaluating(self) -> None:
        """Sets up the crew for evaluating."""
        for task in self.crew.tasks:
@@ -106,7 +51,7 @@ class CrewEvaluator(BaseModel):
            ),
            backstory="Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed",
            verbose=False,
-            llm=self.llm,
+            llm=self.openai_model_name,
        )

    def _evaluation_task(
@@ -236,7 +181,7 @@ class CrewEvaluator(BaseModel):
                self.crew,
                evaluation_result.pydantic.quality,
                current_task._execution_time,
-                self.llm.model if isinstance(self.llm, LLM) else self.llm,
+                self.openai_model_name,
            )
            self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
            self.run_execution_times[self.iteration].append(
--- a/tests/cassettes/test_agent_human_input.yaml
+++ b/tests/cassettes/test_agent_human_input.yaml
@@ -1,87 +1,4 @@
 interactions:
- request:
-    body: !!binary |
-      CqcXCiQKIgoMc2VydmljZS5uYW1lEhIKEGNyZXdBSS10ZWxlbWV0cnkS/hYKEgoQY3Jld2FpLnRl
-      bGVtZXRyeRJ5ChBuJJtOdNaB05mOW/p3915eEgj2tkAd3rZcASoQVG9vbCBVc2FnZSBFcnJvcjAB
-      OYa7/URvKBUYQUpcFEVvKBUYShoKDmNyZXdhaV92ZXJzaW9uEggKBjAuODYuMEoPCgNsbG0SCAoG
-      Z3B0LTRvegIYAYUBAAEAABLJBwoQifhX01E5i+5laGdALAlZBBIIBuGM1aN+OPgqDENyZXcgQ3Jl
-      YXRlZDABORVGruBvKBUYQaipwOBvKBUYShoKDmNyZXdhaV92ZXJzaW9uEggKBjAuODYuMEoaCg5w
-      eXRob25fdmVyc2lvbhIICgYzLjEyLjdKLgoIY3Jld19rZXkSIgogN2U2NjA4OTg5ODU5YTY3ZWVj
-      ODhlZWY3ZmNlODUyMjVKMQoHY3Jld19pZBImCiRiOThiNWEwMC01YTI1LTQxMDctYjQwNS1hYmYz
-      MjBhOGYzYThKHAoMY3Jld19wcm9jZXNzEgwKCnNlcXVlbnRpYWxKEQoLY3Jld19tZW1vcnkSAhAA
-      ShoKFGNyZXdfbnVtYmVyX29mX3Rhc2tzEgIYAUobChVjcmV3X251bWJlcl9vZl9hZ2VudHMSAhgB
-      SuQCCgtjcmV3X2FnZW50cxLUAgrRAlt7ImtleSI6ICIyMmFjZDYxMWU0NGVmNWZhYzA1YjUzM2Q3
-      NWU4ODkzYiIsICJpZCI6ICJkNWIyMzM1YS0yMmIyLTQyZWEtYmYwNS03OTc3NmU3MmYzOTIiLCAi
-      cm9sZSI6ICJEYXRhIFNjaWVudGlzdCIsICJ2ZXJib3NlPyI6IGZhbHNlLCAibWF4X2l0ZXIiOiAy
-      MCwgIm1heF9ycG0iOiBudWxsLCAiZnVuY3Rpb25fY2FsbGluZ19sbG0iOiAiIiwgImxsbSI6ICJn
-      cHQtNG8tbWluaSIsICJkZWxlZ2F0aW9uX2VuYWJsZWQ/IjogZmFsc2UsICJhbGxvd19jb2RlX2V4
-      ZWN1dGlvbj8iOiBmYWxzZSwgIm1heF9yZXRyeV9saW1pdCI6IDIsICJ0b29sc19uYW1lcyI6IFsi
-      Z2V0IGdyZWV0aW5ncyJdfV1KkgIKCmNyZXdfdGFza3MSgwIKgAJbeyJrZXkiOiAiYTI3N2IzNGIy
-      YzE0NmYwYzU2YzVlMTM1NmU4ZjhhNTciLCAiaWQiOiAiMjJiZWMyMzEtY2QyMS00YzU4LTgyN2Ut
-      MDU4MWE4ZjBjMTExIiwgImFzeW5jX2V4ZWN1dGlvbj8iOiBmYWxzZSwgImh1bWFuX2lucHV0PyI6
-      IGZhbHNlLCAiYWdlbnRfcm9sZSI6ICJEYXRhIFNjaWVudGlzdCIsICJhZ2VudF9rZXkiOiAiMjJh
-      Y2Q2MTFlNDRlZjVmYWMwNWI1MzNkNzVlODg5M2IiLCAidG9vbHNfbmFtZXMiOiBbImdldCBncmVl
-      dGluZ3MiXX1degIYAYUBAAEAABKOAgoQ5WYoxRtTyPjge4BduhL0rRIIv2U6rvWALfwqDFRhc2sg
-      Q3JlYXRlZDABOX068uBvKBUYQZkv8+BvKBUYSi4KCGNyZXdfa2V5EiIKIDdlNjYwODk4OTg1OWE2
-      N2VlYzg4ZWVmN2ZjZTg1MjI1SjEKB2NyZXdfaWQSJgokYjk4YjVhMDAtNWEyNS00MTA3LWI0MDUt
-      YWJmMzIwYThmM2E4Si4KCHRhc2tfa2V5EiIKIGEyNzdiMzRiMmMxNDZmMGM1NmM1ZTEzNTZlOGY4
-      YTU3SjEKB3Rhc2tfaWQSJgokMjJiZWMyMzEtY2QyMS00YzU4LTgyN2UtMDU4MWE4ZjBjMTExegIY
-      AYUBAAEAABKQAQoQXyeDtJDFnyp2Fjk9YEGTpxIIaNE7gbhPNYcqClRvb2wgVXNhZ2UwATkaXTvj
-      bygVGEGvx0rjbygVGEoaCg5jcmV3YWlfdmVyc2lvbhIICgYwLjg2LjBKHAoJdG9vbF9uYW1lEg8K
-      DUdldCBHcmVldGluZ3NKDgoIYXR0ZW1wdHMSAhgBegIYAYUBAAEAABLVBwoQMWfznt0qwauEzl7T
-      UOQxRBII9q+pUS5EdLAqDENyZXcgQ3JlYXRlZDABORONPORvKBUYQSAoS+RvKBUYShoKDmNyZXdh
-      aV92ZXJzaW9uEggKBjAuODYuMEoaCg5weXRob25fdmVyc2lvbhIICgYzLjEyLjdKLgoIY3Jld19r
-      ZXkSIgogYzMwNzYwMDkzMjY3NjE0NDRkNTdjNzFkMWRhM2YyN2NKMQoHY3Jld19pZBImCiQ3OTQw
-      MTkyNS1iOGU5LTQ3MDgtODUzMC00NDhhZmEzYmY4YjBKHAoMY3Jld19wcm9jZXNzEgwKCnNlcXVl
-      bnRpYWxKEQoLY3Jld19tZW1vcnkSAhAAShoKFGNyZXdfbnVtYmVyX29mX3Rhc2tzEgIYAUobChVj
-      cmV3X251bWJlcl9vZl9hZ2VudHMSAhgBSuoCCgtjcmV3X2FnZW50cxLaAgrXAlt7ImtleSI6ICI5
-      OGYzYjFkNDdjZTk2OWNmMDU3NzI3Yjc4NDE0MjVjZCIsICJpZCI6ICI5OTJkZjYyZi1kY2FiLTQy
-      OTUtOTIwNi05MDBkNDExNGIxZTkiLCAicm9sZSI6ICJGcmllbmRseSBOZWlnaGJvciIsICJ2ZXJi
-      b3NlPyI6IGZhbHNlLCAibWF4X2l0ZXIiOiAyMCwgIm1heF9ycG0iOiBudWxsLCAiZnVuY3Rpb25f
-      Y2FsbGluZ19sbG0iOiAiIiwgImxsbSI6ICJncHQtNG8tbWluaSIsICJkZWxlZ2F0aW9uX2VuYWJs
-      ZWQ/IjogZmFsc2UsICJhbGxvd19jb2RlX2V4ZWN1dGlvbj8iOiBmYWxzZSwgIm1heF9yZXRyeV9s
-      aW1pdCI6IDIsICJ0b29sc19uYW1lcyI6IFsiZGVjaWRlIGdyZWV0aW5ncyJdfV1KmAIKCmNyZXdf
-      dGFza3MSiQIKhgJbeyJrZXkiOiAiODBkN2JjZDQ5MDk5MjkwMDgzODMyZjBlOTgzMzgwZGYiLCAi
-      aWQiOiAiMmZmNjE5N2UtYmEyNy00YjczLWI0YTctNGZhMDQ4ZTYyYjQ3IiwgImFzeW5jX2V4ZWN1
-      dGlvbj8iOiBmYWxzZSwgImh1bWFuX2lucHV0PyI6IGZhbHNlLCAiYWdlbnRfcm9sZSI6ICJGcmll
-      bmRseSBOZWlnaGJvciIsICJhZ2VudF9rZXkiOiAiOThmM2IxZDQ3Y2U5NjljZjA1NzcyN2I3ODQx
-      NDI1Y2QiLCAidG9vbHNfbmFtZXMiOiBbImRlY2lkZSBncmVldGluZ3MiXX1degIYAYUBAAEAABKO
-      AgoQnjTp5boK7/+DQxztYIpqihIIgGnMUkBtzHEqDFRhc2sgQ3JlYXRlZDABOcpYcuRvKBUYQalE
-      c+RvKBUYSi4KCGNyZXdfa2V5EiIKIGMzMDc2MDA5MzI2NzYxNDQ0ZDU3YzcxZDFkYTNmMjdjSjEK
-      B2NyZXdfaWQSJgokNzk0MDE5MjUtYjhlOS00NzA4LTg1MzAtNDQ4YWZhM2JmOGIwSi4KCHRhc2tf
-      a2V5EiIKIDgwZDdiY2Q0OTA5OTI5MDA4MzgzMmYwZTk4MzM4MGRmSjEKB3Rhc2tfaWQSJgokMmZm
-      NjE5N2UtYmEyNy00YjczLWI0YTctNGZhMDQ4ZTYyYjQ3egIYAYUBAAEAABKTAQoQ26H9pLUgswDN
-      p9XhJwwL6BIIx3bw7mAvPYwqClRvb2wgVXNhZ2UwATmy7NPlbygVGEEvb+HlbygVGEoaCg5jcmV3
-      YWlfdmVyc2lvbhIICgYwLjg2LjBKHwoJdG9vbF9uYW1lEhIKEERlY2lkZSBHcmVldGluZ3NKDgoI
-      YXR0ZW1wdHMSAhgBegIYAYUBAAEAAA==
-    headers:
-      Accept:
-      - '*/*'
-      Accept-Encoding:
-      - gzip, deflate
-      Connection:
-      - keep-alive
-      Content-Length:
-      - '2986'
-      Content-Type:
-      - application/x-protobuf
-      User-Agent:
-      - OTel-OTLP-Exporter-Python/1.27.0
-    method: POST
-    uri: https://telemetry.crewai.com:4319/v1/traces
-  response:
-    body:
-      string: "\n\0"
-    headers:
-      Content-Length:
-      - '2'
-      Content-Type:
-      - application/x-protobuf
-      Date:
-      - Fri, 27 Dec 2024 22:14:53 GMT
-    status:
-      code: 200
-      message: OK
 - request:
    body: '{"messages": [{"role": "system", "content": "You are test role. test backstory\nYour
      personal goal is: test goal\nTo give my best complete final answer to the task
@@ -105,20 +22,18 @@ interactions:
      - '824'
      content-type:
      - application/json
-      cookie:
-      - _cfuvid=ePJSDFdHag2D8lj21_ijAMWjoA6xfnPNxN4uekvC728-1727226247743-0.0.1.1-604800000
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.52.1
      x-stainless-arch:
-      - x64
+      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
-      - Linux
+      - MacOS
      x-stainless-package-version:
      - 1.52.1
      x-stainless-raw-response:
@@ -132,8 +47,8 @@ interactions:
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
-    content: "{\n  \"id\": \"chatcmpl-AjCtZLLrWi8ZASpP9bz6HaCV7xBIn\",\n  \"object\":
-      \"chat.completion\",\n  \"created\": 1735337693,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+    content: "{\n  \"id\": \"chatcmpl-AaqIIsTxhvf75xvuu7gQScIlRSKbW\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1733344190,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
      \"assistant\",\n        \"content\": \"I now can give a great answer  \\nFinal
      Answer: Hi\",\n        \"refusal\": null\n      },\n      \"logprobs\": null,\n
@@ -142,12 +57,12 @@ interactions:
      {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
      {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"system_fingerprint\":
-      \"fp_0aa8d3e20b\"\n}\n"
+      \"fp_0705bf87c0\"\n}\n"
    headers:
      CF-Cache-Status:
      - DYNAMIC
      CF-RAY:
-      - 8f8caa83deca756b-SEA
+      - 8ece8cfc3b1f4532-ATL
      Connection:
      - keep-alive
      Content-Encoding:
@@ -155,14 +70,14 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Fri, 27 Dec 2024 22:14:53 GMT
+      - Wed, 04 Dec 2024 20:29:50 GMT
      Server:
      - cloudflare
      Set-Cookie:
-      - __cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw;
-        path=/; expires=Fri, 27-Dec-24 22:44:53 GMT; domain=.api.openai.com; HttpOnly;
+      - __cf_bm=QJZZjZ6eqnVamqUkw.Bx0mj7oBi3a_vGEH1VODcUxlg-1733344190-1.0.1.1-xyN0ekA9xIrSwEhRBmTiWJ3Pt72UYLU5owKfkz5yihVmMTfsr_Qz.ssGPJ5cuft066v1xVjb4zOSTdFmesMSKg;
+        path=/; expires=Wed, 04-Dec-24 20:59:50 GMT; domain=.api.openai.com; HttpOnly;
        Secure; SameSite=None
-      - _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
+      - _cfuvid=eCIkP8GVPvpkg19eOhCquWFHm.RTQBQy4yHLGGEAH5c-1733344190334-0.0.1.1-604800000;
        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
      Transfer-Encoding:
      - chunked
@@ -175,7 +90,7 @@ interactions:
      openai-organization:
      - crewai-iuxna1
      openai-processing-ms:
-      - '404'
+      - '313'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
@@ -193,7 +108,7 @@ interactions:
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
-      - req_6ac84634bff9193743c4b0911c09b4a6
+      - req_9fd9a8ee688045dcf7ac5f6fdf689372
    http_version: HTTP/1.1
    status_code: 200
 - request:
@@ -216,20 +131,20 @@ interactions:
      content-type:
      - application/json
      cookie:
-      - _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
-        __cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw
+      - __cf_bm=QJZZjZ6eqnVamqUkw.Bx0mj7oBi3a_vGEH1VODcUxlg-1733344190-1.0.1.1-xyN0ekA9xIrSwEhRBmTiWJ3Pt72UYLU5owKfkz5yihVmMTfsr_Qz.ssGPJ5cuft066v1xVjb4zOSTdFmesMSKg;
+        _cfuvid=eCIkP8GVPvpkg19eOhCquWFHm.RTQBQy4yHLGGEAH5c-1733344190334-0.0.1.1-604800000
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.52.1
      x-stainless-arch:
-      - x64
+      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
-      - Linux
+      - MacOS
      x-stainless-package-version:
      - 1.52.1
      x-stainless-raw-response:
@@ -243,8 +158,8 @@ interactions:
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
-    content: "{\n  \"id\": \"chatcmpl-AjCtZNlWdrrPZhq0MJDqd16sMuQEJ\",\n  \"object\":
-      \"chat.completion\",\n  \"created\": 1735337693,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+    content: "{\n  \"id\": \"chatcmpl-AaqIIaQlLyoyPmk909PvAIfA2TmJL\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1733344190,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
      \"assistant\",\n        \"content\": \"True\",\n        \"refusal\": null\n
      \     },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n    }\n
@@ -253,12 +168,12 @@ interactions:
      0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n
      \     \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"system_fingerprint\":
-      \"fp_0aa8d3e20b\"\n}\n"
+      \"fp_0705bf87c0\"\n}\n"
    headers:
      CF-Cache-Status:
      - DYNAMIC
      CF-RAY:
-      - 8f8caa87094f756b-SEA
+      - 8ece8d060b5e4532-ATL
      Connection:
      - keep-alive
      Content-Encoding:
@@ -266,7 +181,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Fri, 27 Dec 2024 22:14:53 GMT
+      - Wed, 04 Dec 2024 20:29:50 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
@@ -280,7 +195,7 @@ interactions:
      openai-organization:
      - crewai-iuxna1
      openai-processing-ms:
-      - '156'
+      - '375'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
@@ -298,7 +213,7 @@ interactions:
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
-      - req_ec74bef2a9ef7b2144c03fd7f7bbeab0
+      - req_be7cb475e0859a82c37ee3f2871ea5ea
    http_version: HTTP/1.1
    status_code: 200
 - request:
@@ -327,20 +242,20 @@ interactions:
      content-type:
      - application/json
      cookie:
-      - _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
-        __cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw
+      - __cf_bm=QJZZjZ6eqnVamqUkw.Bx0mj7oBi3a_vGEH1VODcUxlg-1733344190-1.0.1.1-xyN0ekA9xIrSwEhRBmTiWJ3Pt72UYLU5owKfkz5yihVmMTfsr_Qz.ssGPJ5cuft066v1xVjb4zOSTdFmesMSKg;
+        _cfuvid=eCIkP8GVPvpkg19eOhCquWFHm.RTQBQy4yHLGGEAH5c-1733344190334-0.0.1.1-604800000
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.52.1
      x-stainless-arch:
-      - x64
+      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
-      - Linux
+      - MacOS
      x-stainless-package-version:
      - 1.52.1
      x-stainless-raw-response:
@@ -354,23 +269,22 @@ interactions:
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
-    content: "{\n  \"id\": \"chatcmpl-AjCtZGv4f3h7GDdhyOy9G0sB1lRgC\",\n  \"object\":
-      \"chat.completion\",\n  \"created\": 1735337693,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+    content: "{\n  \"id\": \"chatcmpl-AaqIJAAxpVfUOdrsgYKHwfRlHv4RS\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1733344191,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-      \"assistant\",\n        \"content\": \"Thought: I understand the feedback and
-      will adjust my response accordingly.  \\nFinal Answer: Hello\",\n        \"refusal\":
-      null\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
-      \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 188,\n    \"completion_tokens\":
-      18,\n    \"total_tokens\": 206,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-      0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n
-      \     \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+      \"assistant\",\n        \"content\": \"Thought: I now can give a great answer
+      \ \\nFinal Answer: Hello\",\n        \"refusal\": null\n      },\n      \"logprobs\":
+      null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+      188,\n    \"completion_tokens\": 14,\n    \"total_tokens\": 202,\n    \"prompt_tokens_details\":
+      {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+      {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"system_fingerprint\":
-      \"fp_0aa8d3e20b\"\n}\n"
+      \"fp_0705bf87c0\"\n}\n"
    headers:
      CF-Cache-Status:
      - DYNAMIC
      CF-RAY:
-      - 8f8caa88cac4756b-SEA
+      - 8ece8d090fc34532-ATL
      Connection:
      - keep-alive
      Content-Encoding:
@@ -378,7 +292,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Fri, 27 Dec 2024 22:14:54 GMT
+      - Wed, 04 Dec 2024 20:29:51 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
@@ -392,7 +306,7 @@ interactions:
      openai-organization:
      - crewai-iuxna1
      openai-processing-ms:
-      - '358'
+      - '484'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
@@ -410,7 +324,7 @@ interactions:
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
-      - req_ae1ab6b206d28ded6fee3c83ed0c2ab7
+      - req_5bf4a565ad6c2567a1ed204ecac89134
    http_version: HTTP/1.1
    status_code: 200
 - request:
@@ -432,20 +346,20 @@ interactions:
      content-type:
      - application/json
      cookie:
-      - _cfuvid=A_ASCLNAVfQoyucWOAIhecWtEpNotYoZr0bAFihgNxs-1735337693273-0.0.1.1-604800000;
-        __cf_bm=wJkq_yLkzE3OdxE0aMJz.G0kce969.9JxRmZ0ratl4c-1735337693-1.0.1.1-OKpUoRrSPFGvWv5Hp5ET1PNZ7iZNHPKEAuakpcQUxxPSeisUIIR3qIOZ31MGmYugqB5.wkvidgbxOAagqJvmnw
+      - __cf_bm=QJZZjZ6eqnVamqUkw.Bx0mj7oBi3a_vGEH1VODcUxlg-1733344190-1.0.1.1-xyN0ekA9xIrSwEhRBmTiWJ3Pt72UYLU5owKfkz5yihVmMTfsr_Qz.ssGPJ5cuft066v1xVjb4zOSTdFmesMSKg;
+        _cfuvid=eCIkP8GVPvpkg19eOhCquWFHm.RTQBQy4yHLGGEAH5c-1733344190334-0.0.1.1-604800000
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.52.1
      x-stainless-arch:
-      - x64
+      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
-      - Linux
+      - MacOS
      x-stainless-package-version:
      - 1.52.1
      x-stainless-raw-response:
@@ -459,8 +373,8 @@ interactions:
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
-    content: "{\n  \"id\": \"chatcmpl-AjCtaiHL4TY8Dssk0j2miqmjrzquy\",\n  \"object\":
-      \"chat.completion\",\n  \"created\": 1735337694,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+    content: "{\n  \"id\": \"chatcmpl-AaqIJqyG8vl9mxj2qDPZgaxyNLLIq\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1733344191,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
      \"assistant\",\n        \"content\": \"False\",\n        \"refusal\": null\n
      \     },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n    }\n
@@ -469,12 +383,12 @@ interactions:
      0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n
      \     \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"system_fingerprint\":
-      \"fp_0aa8d3e20b\"\n}\n"
+      \"fp_0705bf87c0\"\n}\n"
    headers:
      CF-Cache-Status:
      - DYNAMIC
      CF-RAY:
-      - 8f8caa8bdd26756b-SEA
+      - 8ece8d0cfdeb4532-ATL
      Connection:
      - keep-alive
      Content-Encoding:
@@ -482,7 +396,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Fri, 27 Dec 2024 22:14:54 GMT
+      - Wed, 04 Dec 2024 20:29:51 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
@@ -496,7 +410,7 @@ interactions:
      openai-organization:
      - crewai-iuxna1
      openai-processing-ms:
-      - '184'
+      - '341'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
@@ -514,7 +428,7 @@ interactions:
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
-      - req_652891f79c1104a7a8436275d78a69f1
+      - req_5554bade8ceda00cf364b76a51b708ff
    http_version: HTTP/1.1
    status_code: 200
 version: 1
--- a/tests/crew_test.py
+++ b/tests/crew_test.py
@@ -300,15 +300,6 @@ def test_hierarchical_process():
    )


-@mock.patch("crewai.crew.CrewEvaluator")
-@mock.patch("crewai.crew.Crew.copy")
-def test_crew_test_backward_compatibility(mock_copy, mock_evaluator):
-    crew = Crew(agents=[researcher], tasks=[Task(description="test", expected_output="test output", agent=researcher)])
-    crew.test(2, openai_model_name="gpt-4")
-    mock_evaluator.assert_called_once()
-    _, kwargs = mock_evaluator.call_args
-    assert kwargs["llm"] == "gpt-4"
-
 def test_manager_llm_requirement_for_hierarchical_process():
    task = Task(
        description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
@@ -1132,7 +1123,7 @@ def test_kickoff_for_each_empty_input():
    assert results == []


-@pytest.mark.vcr(filter_headeruvs=["authorization"])
+@pytest.mark.vcr(filter_headers=["authorization"])
 def test_kickoff_for_each_invalid_input():
    """Tests if kickoff_for_each raises TypeError for invalid input types."""

@@ -2846,7 +2837,7 @@ def test_crew_testing_function(kickoff_mock, copy_mock, crew_evaluator):

    crew_evaluator.assert_has_calls(
        [
-            mock.call(crew, llm="gpt-4o-mini"),
+            mock.call(crew, "gpt-4o-mini"),
            mock.call().set_iteration(1),
            mock.call().set_iteration(2),
            mock.call().print_crew_evaluation_result(),
@@ -3134,4 +3125,4 @@ def test_multimodal_agent_live_image_analysis():
    # Verify we got a meaningful response
    assert isinstance(result.raw, str)
    assert len(result.raw) > 100  # Expecting a detailed analysis
-    assert "error" not in result.raw.lower()  # No error messages in response
+    assert "error" not in result.raw.lower()  # No error messages in response
--- a/tests/tools/test_json_edge_cases.py
+++ b/tests/tools/test_json_edge_cases.py
@@ -0,0 +1,85 @@
+import json
+from unittest.mock import MagicMock
+
+import pytest
+from pydantic import BaseModel, Field
+
+from crewai.tools import BaseTool
+from crewai.tools.tool_usage import ToolUsage
+
+
+class TestComplexInput(BaseModel):
+    special_chars: str = Field(
+        ..., description="Parameter with special characters: \"'\\{}[]"
+    )
+    nested_dict: dict = Field(
+        ..., description="A nested dictionary parameter"
+    )
+    unicode_text: str = Field(
+        ..., description="Text with unicode characters: 你好, こんにちは, مرحبا"
+    )
+
+
+class TestComplexTool(BaseTool):
+    name: str = "Complex JSON Tool"
+    description: str = "A tool for testing complex JSON formatting"
+    args_schema: type[BaseModel] = TestComplexInput
+
+    def _run(self, special_chars: str, nested_dict: dict, unicode_text: str) -> str:
+        return f"Processed complex input successfully"
+
+
+def test_complex_json_formatting():
+    """Test that complex JSON with special characters and nested structures is formatted correctly."""
+    tool = TestComplexTool()
+    
+    assert "Tool Arguments:" in tool.description
+    
+    description_parts = tool.description.split("Tool Arguments: ")
+    json_str = description_parts[1].split("\nTool Description:")[0]
+    
+    parsed_json = json.loads(json_str)
+    
+    assert "special_chars" in parsed_json
+    assert "nested_dict" in parsed_json
+    assert "unicode_text" in parsed_json
+    
+    assert "\"'\\{}[]" in parsed_json["special_chars"]["description"]
+    
+    assert "你好" in parsed_json["unicode_text"]["description"]
+    assert "こんにちは" in parsed_json["unicode_text"]["description"]
+    assert "مرحبا" in parsed_json["unicode_text"]["description"]
+
+
+def test_complex_tool_usage_render():
+    """Test that complex tool usage renders with proper JSON formatting."""
+    tool = TestComplexTool()
+
+    tool_usage = ToolUsage(
+        tools_handler=MagicMock(),
+        tools=[tool],
+        original_tools=[tool],
+        tools_description="Tool for testing complex JSON formatting",
+        tools_names="test_complex_tool",
+        task=MagicMock(),
+        function_calling_llm=MagicMock(),
+        agent=MagicMock(),
+        action=MagicMock(),
+    )
+
+    rendered = tool_usage._render()
+    
+    rendered_parts = rendered.split("Tool Arguments: ")
+    if len(rendered_parts) > 1:
+        json_str = rendered_parts[1].split("\nTool Description:")[0]
+        
+        try:
+            parsed_json = json.loads(json_str)
+            assert True  # If we get here, JSON parsing succeeded
+            
+            assert "special_chars" in parsed_json
+            assert "nested_dict" in parsed_json
+            assert "unicode_text" in parsed_json
+            
+        except json.JSONDecodeError:
+            assert False, "The rendered tool arguments are not valid JSON"
--- a/tests/tools/test_json_formatting.py
+++ b/tests/tools/test_json_formatting.py
@@ -0,0 +1,78 @@
+import json
+from unittest.mock import MagicMock
+
+import pytest
+from pydantic import BaseModel, Field
+
+from crewai.tools import BaseTool
+from crewai.tools.tool_usage import ToolUsage
+
+
+class TestJsonInput(BaseModel):
+    test_param: str = Field(
+        ..., description="A test parameter"
+    )
+    another_param: int = Field(
+        ..., description="Another test parameter"
+    )
+
+
+class TestJsonTool(BaseTool):
+    name: str = "Test JSON Tool"
+    description: str = "A tool for testing JSON formatting"
+    args_schema: type[BaseModel] = TestJsonInput
+
+    def _run(self, test_param: str, another_param: int) -> str:
+        return f"Received {test_param} and {another_param}"
+
+
+def test_tool_description_json_formatting():
+    """Test that the tool description uses proper JSON formatting with double quotes."""
+    tool = TestJsonTool()
+    
+    assert "Tool Arguments:" in tool.description
+    
+    description_parts = tool.description.split("Tool Arguments: ")
+    json_str = description_parts[1].split("\nTool Description:")[0]
+    
+    parsed_json = json.loads(json_str)
+    
+    assert "test_param" in parsed_json
+    assert "another_param" in parsed_json
+    
+    assert '"test_param"' in json_str
+    assert '"another_param"' in json_str
+    assert "'" not in json_str  # No single quotes should be present
+
+
+def test_tool_usage_json_formatting():
+    """Test that the tool usage renders with proper JSON formatting."""
+    tool = TestJsonTool()
+
+    tool_usage = ToolUsage(
+        tools_handler=MagicMock(),
+        tools=[tool],
+        original_tools=[tool],
+        tools_description="Tool for testing JSON formatting",
+        tools_names="test_json_tool",
+        task=MagicMock(),
+        function_calling_llm=MagicMock(),
+        agent=MagicMock(),
+        action=MagicMock(),
+    )
+
+    rendered = tool_usage._render()
+    
+    rendered_parts = rendered.split("Tool Arguments: ")
+    if len(rendered_parts) > 1:
+        json_str = rendered_parts[1].split("\nTool Description:")[0]
+        
+        try:
+            parsed_json = json.loads(json_str)
+            assert True  # If we get here, JSON parsing succeeded
+        except json.JSONDecodeError:
+            assert False, "The rendered tool arguments are not valid JSON"
+        
+        assert '"test_param"' in json_str
+        assert '"another_param"' in json_str
+        assert "'" not in json_str  # No single quotes should be present
--- a/tests/tools/test_tool_usage.py
+++ b/tests/tools/test_tool_usage.py
@@ -102,15 +102,15 @@ def test_tool_usage_render():

    rendered = tool_usage._render()

-    # Updated checks to match the actual output
+    # Updated checks to match the actual output with JSON formatting
    assert "Tool Name: Random Number Generator" in rendered
    assert "Tool Arguments:" in rendered
    assert (
-        "'min_value': {'description': 'The minimum value of the range (inclusive)', 'type': 'int'}"
+        '"min_value": {"description": "The minimum value of the range (inclusive)", "type": "int"}'
        in rendered
    )
    assert (
-        "'max_value': {'description': 'The maximum value of the range (inclusive)', 'type': 'int'}"
+        '"max_value": {"description": "The maximum value of the range (inclusive)", "type": "int"}'
        in rendered
    )
    assert (
@@ -118,6 +118,6 @@ def test_tool_usage_render():
        in rendered
    )
    assert (
-        "Tool Name: Random Number Generator\nTool Arguments: {'min_value': {'description': 'The minimum value of the range (inclusive)', 'type': 'int'}, 'max_value': {'description': 'The maximum value of the range (inclusive)', 'type': 'int'}}\nTool Description: Generates a random number within a specified range"
+        'Tool Name: Random Number Generator\nTool Arguments: {"min_value": {"description": "The minimum value of the range (inclusive)", "type": "int"}, "max_value": {"description": "The maximum value of the range (inclusive)", "type": "int"}}\nTool Description: Generates a random number within a specified range'
        in rendered
    )
--- a/tests/utilities/evaluators/test_crew_evaluator_handler.py
+++ b/tests/utilities/evaluators/test_crew_evaluator_handler.py
@@ -4,7 +4,6 @@ import pytest

 from crewai.agent import Agent
 from crewai.crew import Crew
-from crewai.llm import LLM
 from crewai.task import Task
 from crewai.tasks.task_output import TaskOutput
 from crewai.utilities.evaluators.crew_evaluator_handler import (
@@ -24,7 +23,7 @@ class TestCrewEvaluator:
        )
        crew = Crew(agents=[agent], tasks=[task])

-        return CrewEvaluator(crew, llm="gpt-4o-mini")
+        return CrewEvaluator(crew, openai_model_name="gpt-4o-mini")

    def test_setup_for_evaluating(self, crew_planner):
        crew_planner._setup_for_evaluating()
@@ -47,7 +46,6 @@ class TestCrewEvaluator:
        )
        assert agent.verbose is False
        assert agent.llm.model == "gpt-4o-mini"
-        assert isinstance(agent.llm, LLM)

    def test_evaluation_task(self, crew_planner):
        evaluator_agent = Agent(
@@ -133,17 +131,6 @@ class TestCrewEvaluator:
        # Ensure the console prints the table
        console.assert_has_calls([mock.call(), mock.call().print(table())])

-    def test_custom_llm_support(self):
-        agent = Agent(role="Agent 1", goal="Goal 1", backstory="Backstory 1")
-        task = Task(description="Task 1", expected_output="Output 1", agent=agent)
-        crew = Crew(agents=[agent], tasks=[task])
-        
-        custom_llm = LLM(model="custom-model")
-        evaluator = CrewEvaluator(crew, llm=custom_llm)
-        
-        assert evaluator.llm.model == "custom-model"
-        assert isinstance(evaluator.llm, LLM)
-
    def test_evaluate(self, crew_planner):
        task_output = TaskOutput(
            description="Task 1", agent=str(crew_planner.crew.agents[0])
--- a/uv.lock
+++ b/uv.lock
@@ -1,18 +1,10 @@
 version = 1
 requires-python = ">=3.10, <3.13"
 resolution-markers = [
-    "python_full_version < '3.11' and sys_platform == 'darwin'",
-    "python_full_version < '3.11' and platform_machine == 'aarch64' and sys_platform == 'linux'",
-    "(python_full_version < '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.11' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "python_full_version == '3.11.*' and sys_platform == 'darwin'",
-    "python_full_version == '3.11.*' and platform_machine == 'aarch64' and sys_platform == 'linux'",
-    "(python_full_version == '3.11.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.11.*' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "python_full_version >= '3.12' and python_full_version < '3.12.4' and sys_platform == 'darwin'",
-    "python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_machine == 'aarch64' and sys_platform == 'linux'",
-    "(python_full_version >= '3.12' and python_full_version < '3.12.4' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and python_full_version < '3.12.4' and sys_platform != 'darwin' and sys_platform != 'linux')",
-    "python_full_version >= '3.12.4' and sys_platform == 'darwin'",
-    "python_full_version >= '3.12.4' and platform_machine == 'aarch64' and sys_platform == 'linux'",
-    "(python_full_version >= '3.12.4' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12.4' and sys_platform != 'darwin' and sys_platform != 'linux')",
+    "python_full_version < '3.11'",
+    "python_full_version == '3.11.*'",
+    "python_full_version >= '3.12' and python_full_version < '3.12.4'",
+    "python_full_version >= '3.12.4'",
 ]

 [[package]]
@@ -308,7 +300,7 @@ name = "build"
 version = "1.2.2.post1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "(os_name == 'nt' and platform_machine != 'aarch64' and sys_platform == 'linux') or (os_name == 'nt' and sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "colorama", marker = "os_name == 'nt'" },
    { name = "importlib-metadata", marker = "python_full_version < '3.10.2'" },
    { name = "packaging" },
    { name = "pyproject-hooks" },
@@ -543,7 +535,7 @@ name = "click"
 version = "8.1.7"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "platform_system == 'Windows'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/96/d3/f04c7bfcf5c1862a2a5b845c6b2b360488cf47af55dfa79c98f6a6bf98b5/click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de", size = 336121 }
 wheels = [
@@ -650,6 +642,7 @@ tools = [
 [package.dev-dependencies]
 dev = [
    { name = "cairosvg" },
+    { name = "crewai-tools" },
    { name = "mkdocs" },
    { name = "mkdocs-material" },
    { name = "mkdocs-material-extensions" },
@@ -703,6 +696,7 @@ requires-dist = [
 [package.metadata.requires-dev]
 dev = [
    { name = "cairosvg", specifier = ">=2.7.1" },
+    { name = "crewai-tools", specifier = ">=0.17.0" },
    { name = "mkdocs", specifier = ">=1.4.3" },
    { name = "mkdocs-material", specifier = ">=9.5.7" },
    { name = "mkdocs-material-extensions", specifier = ">=1.3.1" },
@@ -2468,7 +2462,7 @@ version = "1.6.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "click" },
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "platform_system == 'Windows'" },
    { name = "ghp-import" },
    { name = "jinja2" },
    { name = "markdown" },
@@ -2649,7 +2643,7 @@ version = "2.10.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "pygments" },
-    { name = "pywin32", marker = "sys_platform == 'win32'" },
+    { name = "pywin32", marker = "platform_system == 'Windows'" },
    { name = "tqdm" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/3a/93/80ac75c20ce54c785648b4ed363c88f148bf22637e10c9863db4fbe73e74/mpire-2.10.2.tar.gz", hash = "sha256:f66a321e93fadff34585a4bfa05e95bd946cf714b442f51c529038eb45773d97", size = 271270 }
@@ -2896,7 +2890,7 @@ name = "nvidia-cudnn-cu12"
 version = "9.1.0.70"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/9f/fd/713452cd72343f682b1c7b9321e23829f00b842ceaedcda96e742ea0b0b3/nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f", size = 664752741 },
@@ -2923,9 +2917,9 @@ name = "nvidia-cusolver-cu12"
 version = "11.4.5.107"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
-    { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
-    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" },
+    { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/bc/1d/8de1e5c67099015c834315e333911273a8c6aaba78923dd1d1e25fc5f217/nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd", size = 124161928 },
@@ -2936,7 +2930,7 @@ name = "nvidia-cusparse-cu12"
 version = "12.1.0.106"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/65/5b/cfaeebf25cd9fdec14338ccb16f6b2c4c7fa9163aefcf057d86b9cc248bb/nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c", size = 195958278 },
@@ -3486,7 +3480,7 @@ name = "portalocker"
 version = "2.10.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "pywin32", marker = "sys_platform == 'win32'" },
+    { name = "pywin32", marker = "platform_system == 'Windows'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ed/d3/c6c64067759e87af98cc668c1cc75171347d0f1577fab7ca3749134e3cd4/portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f", size = 40891 }
 wheels = [
@@ -5028,19 +5022,19 @@ dependencies = [
    { name = "fsspec" },
    { name = "jinja2" },
    { name = "networkx" },
-    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
-    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-cuda-runtime-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-cudnn-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-cufft-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-curand-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-cusolver-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-cusparse-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-nccl-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
+    { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
    { name = "sympy" },
-    { name = "triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" },
+    { name = "triton", marker = "platform_machine == 'x86_64' and platform_system == 'Linux'" },
    { name = "typing-extensions" },
 ]
 wheels = [
@@ -5087,7 +5081,7 @@ name = "tqdm"
 version = "4.66.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "colorama", marker = "platform_system == 'Windows'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/58/83/6ba9844a41128c62e810fddddd72473201f3eacde02046066142a2d96cc5/tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad", size = 169504 }
 wheels = [
@@ -5130,7 +5124,7 @@ version = "0.27.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "attrs" },
-    { name = "cffi", marker = "(implementation_name != 'pypy' and os_name == 'nt' and platform_machine != 'aarch64' and sys_platform == 'linux') or (implementation_name != 'pypy' and os_name == 'nt' and sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "cffi", marker = "implementation_name != 'pypy' and os_name == 'nt'" },
    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
    { name = "idna" },
    { name = "outcome" },
@@ -5161,7 +5155,7 @@ name = "triton"
 version = "3.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "filelock", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" },
+    { name = "filelock", marker = "(platform_machine != 'aarch64' and platform_system != 'Darwin') or (platform_system != 'Darwin' and platform_system != 'Linux')" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/45/27/14cc3101409b9b4b9241d2ba7deaa93535a217a211c86c4cc7151fb12181/triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1efef76935b2febc365bfadf74bcb65a6f959a9872e5bddf44cc9e0adce1e1a", size = 209376304 },
Author	SHA1	Message	Date
Devin AI	14035a98c3	Address PR feedback: Add error handling and edge case tests Co-Authored-By: Joe Moura <joao@crewai.com>	2025-05-13 17:57:49 +00:00
Devin AI	48f7721baf	Fix prompt formatting biases affecting JSON output Co-Authored-By: Joe Moura <joao@crewai.com>	2025-05-13 17:52:58 +00:00