Merge branch 'main' of github.com:crewAIInc/crewAI into feat/sliding-context-window

2026-01-12 01:28:30 +00:00 · 2024-07-26 14:51:30 -07:00
parent 15c379f053 da7d8256eb
commit 149cb1ffa1
20 changed files with 26671 additions and 176 deletions
--- a/docs/core-concepts/Crews.md
+++ b/docs/core-concepts/Crews.md
@@ -33,6 +33,7 @@ A crew in crewAI represents a collaborative group of agents working together to
 | **Manager Callbacks** _(optional)_    | `manager_callbacks`    | `manager_callbacks` takes a list of callback handlers to be executed by the manager agent when a hierarchical process is used.                                                                                                                            |
 | **Prompt File** _(optional)_          | `prompt_file`          | Path to the prompt JSON file to be used for the crew.                                                                                                                                                                                                     |
 | **Planning** *(optional)*             | `planning`             |  Adds planning ability to the Crew. When activated before each Crew iteration, all Crew data is sent to an AgentPlanner that will plan the tasks and this plan will be added to each task description.
+| **Planning LLM** *(optional)*         | `planning_llm`         | The language model used by the AgentPlanner in a planning process. |

 !!! note "Crew Max RPM"
 The `max_rpm` attribute sets the maximum number of requests per minute the crew can perform to avoid rate limits and will override individual agents' `max_rpm` settings if you set it.
--- a/docs/core-concepts/Planning.md
+++ b/docs/core-concepts/Planning.md
@@ -23,6 +23,25 @@ my_crew = Crew(

 From this point on, your crew will have planning enabled, and the tasks will be planned before each iteration.

+#### Planning LLM
+
+Now you can define the LLM that will be used to plan the tasks. You can use any ChatOpenAI LLM model available.
+
+```python
+from crewai import Crew, Agent, Task, Process
+from langchain_openai import ChatOpenAI
+
+# Assemble your crew with planning capabilities and custom LLM
+my_crew = Crew(
+    agents=self.agents,
+    tasks=self.tasks,
+    process=Process.sequential,
+    planning=True,
+    planning_llm=ChatOpenAI(model="gpt-4o")
+)
+```
+
+
 ### Example

 When running the base case example, you will see something like the following output, which represents the output of the AgentPlanner responsible for creating the step-by-step logic to add to the Agents tasks.
--- a/docs/core-concepts/Testing.md
+++ b/docs/core-concepts/Testing.md
@@ -0,0 +1,41 @@
+---
+title: crewAI Testing
+description: Learn how to test your crewAI Crew and evaluate their performance.
+---
+
+## Introduction
+
+Testing is a crucial part of the development process, and it is essential to ensure that your crew is performing as expected. And with crewAI, you can easily test your crew and evaluate its performance using the built-in testing capabilities.
+
+### Using the Testing Feature
+
+We added the CLI command `crewai test` to make it easy to test your crew. This command will run your crew for a specified number of iterations and provide detailed performance metrics.
+The parameters are `n_iterations` and `model` which are optional and default to 2 and `gpt-4o-mini` respectively. For now the only provider available is OpenAI.
+
+```bash
+crewai test
+```
+
+If you want to run more iterations or use a different model, you can specify the parameters like this:
+
+```bash
+crewai test --n_iterations 5 --model gpt-4o
+```
+
+What happens when you run the `crewai test` command is that the crew will be executed for the specified number of iterations, and the performance metrics will be displayed at the end of the run.
+
+A table of scores at the end will show the performance of the crew in terms of the following metrics:
+```
+                Task Scores
+          (1-10 Higher is better)
+┏━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┓
+┃ Tasks/Crew ┃ Run 1 ┃ Run 2 ┃ Avg. Total ┃
+┡━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━┩
+│ Task 1     │ 10.0  │ 9.0   │ 9.5        │
+│ Task 2     │ 9.0   │ 9.0   │ 9.0        │
+│ Crew       │ 9.5   │ 9.0   │ 9.2        │
+└────────────┴───────┴───────┴────────────┘
+```
+
+The example above shows the test results for two runs of the crew with two tasks, with the average total score for each task and the crew as a whole.
+
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -129,6 +129,7 @@ nav:
    - Training: 'core-concepts/Training-Crew.md'
    - Memory: 'core-concepts/Memory.md'
    - Planning: 'core-concepts/Planning.md'
+    - Testing: 'core-concepts/Testing.md'
    - Using LangChain Tools: 'core-concepts/Using-LangChain-Tools.md'
    - Using LlamaIndex Tools: 'core-concepts/Using-LlamaIndex-Tools.md'
  - How to Guides:
--- a/src/crewai/cli/templates/main.py
+++ b/src/crewai/cli/templates/main.py
@@ -48,7 +48,7 @@ def test():
        "topic": "AI LLMs"
    }
    try:
-        {{crew_name}}Crew().crew().test(n_iterations=int(sys.argv[1]), model=sys.argv[2], inputs=inputs)
+        {{crew_name}}Crew().crew().test(n_iterations=int(sys.argv[1]), openai_model_name=sys.argv[2], inputs=inputs)

    except Exception as e:
        raise Exception(f"An error occurred while replaying the crew: {e}")
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -37,6 +37,7 @@ from crewai.utilities.constants import (
    TRAINED_AGENTS_DATA_FILE,
    TRAINING_DATA_FILE,
 )
+from crewai.utilities.evaluators.crew_evaluator_handler import CrewEvaluator
 from crewai.utilities.evaluators.task_evaluator import TaskEvaluator
 from crewai.utilities.formatter import (
    aggregate_raw_outputs_from_task_outputs,
@@ -154,6 +155,10 @@ class Crew(BaseModel):
        default=False,
        description="Plan the crew execution and add the plan to the crew.",
    )
+    planning_llm: Optional[Any] = Field(
+        default=None,
+        description="Language model that will run the AgentPlanner if planning is True.",
+    )
    task_execution_output_json_files: Optional[List[str]] = Field(
        default=None,
        description="List of file paths for task execution JSON files.",
@@ -266,20 +271,6 @@ class Crew(BaseModel):

        return self

-    @model_validator(mode="after")
-    def check_tasks_in_hierarchical_process_not_async(self):
-        """Validates that the tasks in hierarchical process are not flagged with async_execution."""
-        if self.process == Process.hierarchical:
-            for task in self.tasks:
-                if task.async_execution:
-                    raise PydanticCustomError(
-                        "async_execution_in_hierarchical_process",
-                        "Hierarchical process error: Tasks cannot be flagged with async_execution.",
-                        {},
-                    )
-
-        return self
-
    @model_validator(mode="after")
    def validate_end_with_at_most_one_async_task(self):
        """Validates that the crew ends with at most one asynchronous task."""
@@ -559,15 +550,12 @@ class Crew(BaseModel):
    def _handle_crew_planning(self):
        """Handles the Crew planning."""
        self._logger.log("info", "Planning the crew execution")
-        result = CrewPlanner(self.tasks)._handle_crew_planning()
+        result = CrewPlanner(
+            tasks=self.tasks, planning_agent_llm=self.planning_llm
+        )._handle_crew_planning()

-        if result is not None and hasattr(result, "list_of_plans_per_task"):
-            for task, step_plan in zip(self.tasks, result.list_of_plans_per_task):
-                task.description += step_plan
-        else:
-            self._logger.log(
-                "info", "Something went wrong with the planning process of the Crew"
-            )
+        for task, step_plan in zip(self.tasks, result.list_of_plans_per_task):
+            task.description += step_plan

    def _store_execution_log(
        self,
@@ -605,7 +593,7 @@ class Crew(BaseModel):
    def _run_hierarchical_process(self) -> CrewOutput:
        """Creates and assigns a manager agent to make sure the crew completes the tasks."""
        self._create_manager_agent()
-        return self._execute_tasks(self.tasks, self.manager_agent)
+        return self._execute_tasks(self.tasks)

    def _create_manager_agent(self):
        i18n = I18N(prompt_file=self.prompt_file)
@@ -629,7 +617,6 @@ class Crew(BaseModel):
    def _execute_tasks(
        self,
        tasks: List[Task],
-        manager: Optional[BaseAgent] = None,
        start_index: Optional[int] = 0,
        was_replayed: bool = False,
    ) -> CrewOutput:
@@ -657,13 +644,13 @@ class Crew(BaseModel):
                        last_sync_output = task.output
                continue

-            agent_to_use = self._get_agent_to_use(task, manager)
+            agent_to_use = self._get_agent_to_use(task)
            if agent_to_use is None:
                raise ValueError(
                    f"No agent available for task: {task.description}. Ensure that either the task has an assigned agent or a manager agent is provided."
                )

-            self._prepare_agent_tools(task, manager)
+            self._prepare_agent_tools(task)
            self._log_task_start(task, agent_to_use.role)

            if isinstance(task, ConditionalTask):
@@ -729,20 +716,18 @@ class Crew(BaseModel):
            return skipped_task_output
        return None

-    def _prepare_agent_tools(self, task: Task, manager: Optional[BaseAgent]):
+    def _prepare_agent_tools(self, task: Task):
        if self.process == Process.hierarchical:
-            if manager:
-                self._update_manager_tools(task, manager)
+            if self.manager_agent:
+                self._update_manager_tools(task)
            else:
                raise ValueError("Manager agent is required for hierarchical process.")
        elif task.agent and task.agent.allow_delegation:
            self._add_delegation_tools(task)

-    def _get_agent_to_use(
-        self, task: Task, manager: Optional[BaseAgent]
-    ) -> Optional[BaseAgent]:
+    def _get_agent_to_use(self, task: Task) -> Optional[BaseAgent]:
        if self.process == Process.hierarchical:
-            return manager
+            return self.manager_agent
        return task.agent

    def _add_delegation_tools(self, task: Task):
@@ -778,11 +763,14 @@ class Crew(BaseModel):
        if self.output_log_file:
            self._file_handler.log(agent=role, task=task.description, status="started")

-    def _update_manager_tools(self, task: Task, manager: BaseAgent):
-        if task.agent:
-            manager.tools = task.agent.get_delegation_tools([task.agent])
-        else:
-            manager.tools = manager.get_delegation_tools(self.agents)
+    def _update_manager_tools(self, task: Task):
+        if self.manager_agent:
+            if task.agent:
+                self.manager_agent.tools = task.agent.get_delegation_tools([task.agent])
+            else:
+                self.manager_agent.tools = self.manager_agent.get_delegation_tools(
+                    self.agents
+                )

    def _get_context(self, task: Task, task_outputs: List[TaskOutput]):
        context = (
@@ -881,7 +869,7 @@ class Crew(BaseModel):
            self.tasks[i].output = task_output

        self._logging_color = "bold_blue"
-        result = self._execute_tasks(self.tasks, self.manager_agent, start_index, True)
+        result = self._execute_tasks(self.tasks, start_index, True)
        return result

    def copy(self):
@@ -967,10 +955,19 @@ class Crew(BaseModel):
        return total_usage_metrics

    def test(
-        self, n_iterations: int, model: str, inputs: Optional[Dict[str, Any]] = None
+        self,
+        n_iterations: int,
+        openai_model_name: str,
+        inputs: Optional[Dict[str, Any]] = None,
    ) -> None:
-        """Test the crew with the given inputs."""
-        pass
+        """Test and evaluate the Crew with the given inputs for n iterations."""
+        evaluator = CrewEvaluator(self, openai_model_name)
+
+        for i in range(1, n_iterations + 1):
+            evaluator.set_iteration(i)
+            self.kickoff(inputs=inputs)
+
+        evaluator.print_crew_evaluation_result()

    def __repr__(self):
        return f"Crew(id={self.id}, process={self.process}, number_of_agents={len(self.agents)}, number_of_tasks={len(self.tasks)})"
--- a/src/crewai/task.py
+++ b/src/crewai/task.py
@@ -1,6 +1,5 @@
 import json
 import os
-import re
 import threading
 import uuid
 from concurrent.futures import Future
@@ -8,7 +7,6 @@ from copy import copy
 from hashlib import md5
 from typing import Any, Dict, List, Optional, Tuple, Type, Union

-from langchain_openai import ChatOpenAI
 from opentelemetry.trace import Span
 from pydantic import UUID4, BaseModel, Field, field_validator, model_validator
 from pydantic_core import PydanticCustomError
@@ -17,10 +15,8 @@ from crewai.agents.agent_builder.base_agent import BaseAgent
 from crewai.tasks.output_format import OutputFormat
 from crewai.tasks.task_output import TaskOutput
 from crewai.telemetry.telemetry import Telemetry
-from crewai.utilities.converter import Converter, ConverterError
+from crewai.utilities.converter import Converter, convert_to_model
 from crewai.utilities.i18n import I18N
-from crewai.utilities.printer import Printer
-from crewai.utilities.pydantic_schema_parser import PydanticSchemaParser


 class Task(BaseModel):
@@ -254,9 +250,7 @@ class Task(BaseModel):
            content = (
                json_output
                if json_output
-                else pydantic_output.model_dump_json()
-                if pydantic_output
-                else result
+                else pydantic_output.model_dump_json() if pydantic_output else result
            )
            self._save_file(content)

@@ -326,18 +320,6 @@ class Task(BaseModel):

        return copied_task

-    def _create_converter(self, *args, **kwargs) -> Converter:
-        """Create a converter instance."""
-        if self.agent and not self.converter_cls:
-            converter = self.agent.get_output_converter(*args, **kwargs)
-        elif self.converter_cls:
-            converter = self.converter_cls(*args, **kwargs)
-
-        if not converter:
-            raise Exception("No output converter found or set.")
-
-        return converter
-
    def _export_output(
        self, result: str
    ) -> Tuple[Optional[BaseModel], Optional[Dict[str, Any]]]:
@@ -345,75 +327,26 @@ class Task(BaseModel):
        json_output: Optional[Dict[str, Any]] = None

        if self.output_pydantic or self.output_json:
-            model_output = self._convert_to_model(result)
-            pydantic_output = (
-                model_output if isinstance(model_output, BaseModel) else None
+            model_output = convert_to_model(
+                result,
+                self.output_pydantic,
+                self.output_json,
+                self.agent,
+                self.converter_cls,
            )
-            if isinstance(model_output, str):
+
+            if isinstance(model_output, BaseModel):
+                pydantic_output = model_output
+            elif isinstance(model_output, dict):
+                json_output = model_output
+            elif isinstance(model_output, str):
                try:
                    json_output = json.loads(model_output)
                except json.JSONDecodeError:
                    json_output = None
-            else:
-                json_output = model_output if isinstance(model_output, dict) else None

        return pydantic_output, json_output

-    def _convert_to_model(self, result: str) -> Union[dict, BaseModel, str]:
-        model = self.output_pydantic or self.output_json
-        if model is None:
-            return result
-
-        try:
-            return self._validate_model(result, model)
-        except Exception:
-            return self._handle_partial_json(result, model)
-
-    def _validate_model(
-        self, result: str, model: Type[BaseModel]
-    ) -> Union[dict, BaseModel]:
-        exported_result = model.model_validate_json(result)
-        if self.output_json:
-            return exported_result.model_dump()
-        return exported_result
-
-    def _handle_partial_json(
-        self, result: str, model: Type[BaseModel]
-    ) -> Union[dict, BaseModel, str]:
-        match = re.search(r"({.*})", result, re.DOTALL)
-        if match:
-            try:
-                exported_result = model.model_validate_json(match.group(0))
-                if self.output_json:
-                    return exported_result.model_dump()
-                return exported_result
-            except Exception:
-                pass
-
-        return self._convert_with_instructions(result, model)
-
-    def _convert_with_instructions(
-        self, result: str, model: Type[BaseModel]
-    ) -> Union[dict, BaseModel, str]:
-        llm = self.agent.function_calling_llm or self.agent.llm  # type: ignore # Item "None" of "BaseAgent | None" has no attribute "function_calling_llm"
-        instructions = self._get_conversion_instructions(model, llm)
-
-        converter = self._create_converter(
-            llm=llm, text=result, model=model, instructions=instructions
-        )
-        exported_result = (
-            converter.to_pydantic() if self.output_pydantic else converter.to_json()
-        )
-
-        if isinstance(exported_result, ConverterError):
-            Printer().print(
-                content=f"{exported_result.message} Using raw output instead.",
-                color="red",
-            )
-            return result
-
-        return exported_result
-
    def _get_output_format(self) -> OutputFormat:
        if self.output_json:
            return OutputFormat.JSON
@@ -421,26 +354,6 @@ class Task(BaseModel):
            return OutputFormat.PYDANTIC
        return OutputFormat.RAW

-    def _get_conversion_instructions(self, model: Type[BaseModel], llm: Any) -> str:
-        instructions = "I'm gonna convert this raw text into valid JSON."
-        if not self._is_gpt(llm):
-            model_schema = PydanticSchemaParser(model=model).get_schema()
-            instructions = f"{instructions}\n\nThe json should have the following structure, with the following keys:\n{model_schema}"
-        return instructions
-
-    def _save_output(self, content: str) -> None:
-        if not self.output_file:
-            raise Exception("Output file path is not set.")
-
-        directory = os.path.dirname(self.output_file)
-        if directory and not os.path.exists(directory):
-            os.makedirs(directory)
-        with open(self.output_file, "w", encoding="utf-8") as file:
-            file.write(content)
-
-    def _is_gpt(self, llm) -> bool:
-        return isinstance(llm, ChatOpenAI) and llm.openai_api_base is None
-
    def _save_file(self, result: Any) -> None:
        directory = os.path.dirname(self.output_file)  # type: ignore # Value of type variable "AnyOrLiteralStr" of "dirname" cannot be "str | None"

--- a/src/crewai/utilities/converter.py
+++ b/src/crewai/utilities/converter.py
@@ -1,9 +1,14 @@
 import json
+import re
+from typing import Any, Optional, Type, Union

 from langchain.schema import HumanMessage, SystemMessage
 from langchain_openai import ChatOpenAI
+from pydantic import BaseModel, ValidationError

 from crewai.agents.agent_builder.utilities.base_output_converter import OutputConverter
+from crewai.utilities.printer import Printer
+from crewai.utilities.pydantic_schema_parser import PydanticSchemaParser


 class ConverterError(Exception):
@@ -72,3 +77,153 @@ class Converter(OutputConverter):
    def is_gpt(self) -> bool:
        """Return if llm provided is of gpt from openai."""
        return isinstance(self.llm, ChatOpenAI) and self.llm.openai_api_base is None
+
+
+def convert_to_model(
+    result: str,
+    output_pydantic: Optional[Type[BaseModel]],
+    output_json: Optional[Type[BaseModel]],
+    agent: Any,
+    converter_cls: Optional[Type[Converter]] = None,
+) -> Union[dict, BaseModel, str]:
+    model = output_pydantic or output_json
+    if model is None:
+        return result
+
+    try:
+        escaped_result = json.dumps(json.loads(result, strict=False))
+        return validate_model(escaped_result, model, bool(output_json))
+    except json.JSONDecodeError as e:
+        Printer().print(
+            content=f"Error parsing JSON: {e}. Attempting to handle partial JSON.",
+            color="yellow",
+        )
+        return handle_partial_json(
+            result, model, bool(output_json), agent, converter_cls
+        )
+    except ValidationError as e:
+        Printer().print(
+            content=f"Pydantic validation error: {e}. Attempting to handle partial JSON.",
+            color="yellow",
+        )
+        return handle_partial_json(
+            result, model, bool(output_json), agent, converter_cls
+        )
+    except Exception as e:
+        Printer().print(
+            content=f"Unexpected error during model conversion: {type(e).__name__}: {e}. Returning original result.",
+            color="red",
+        )
+        return result
+
+
+def validate_model(
+    result: str, model: Type[BaseModel], is_json_output: bool
+) -> Union[dict, BaseModel]:
+    exported_result = model.model_validate_json(result)
+    if is_json_output:
+        return exported_result.model_dump()
+    return exported_result
+
+
+def handle_partial_json(
+    result: str,
+    model: Type[BaseModel],
+    is_json_output: bool,
+    agent: Any,
+    converter_cls: Optional[Type[Converter]] = None,
+) -> Union[dict, BaseModel, str]:
+    match = re.search(r"({.*})", result, re.DOTALL)
+    if match:
+        try:
+            exported_result = model.model_validate_json(match.group(0))
+            if is_json_output:
+                return exported_result.model_dump()
+            return exported_result
+        except json.JSONDecodeError as e:
+            Printer().print(
+                content=f"Error parsing JSON: {e}. The extracted JSON-like string is not valid JSON. Attempting alternative conversion method.",
+                color="yellow",
+            )
+        except ValidationError as e:
+            Printer().print(
+                content=f"Pydantic validation error: {e}. The JSON structure doesn't match the expected model. Attempting alternative conversion method.",
+                color="yellow",
+            )
+        except Exception as e:
+            Printer().print(
+                content=f"Unexpected error during partial JSON handling: {type(e).__name__}: {e}. Attempting alternative conversion method.",
+                color="red",
+            )
+
+    return convert_with_instructions(
+        result, model, is_json_output, agent, converter_cls
+    )
+
+
+def convert_with_instructions(
+    result: str,
+    model: Type[BaseModel],
+    is_json_output: bool,
+    agent: Any,
+    converter_cls: Optional[Type[Converter]] = None,
+) -> Union[dict, BaseModel, str]:
+    llm = agent.function_calling_llm or agent.llm
+    instructions = get_conversion_instructions(model, llm)
+
+    converter = create_converter(
+        agent=agent,
+        converter_cls=converter_cls,
+        llm=llm,
+        text=result,
+        model=model,
+        instructions=instructions,
+    )
+    exported_result = (
+        converter.to_pydantic() if not is_json_output else converter.to_json()
+    )
+
+    if isinstance(exported_result, ConverterError):
+        Printer().print(
+            content=f"{exported_result.message} Using raw output instead.",
+            color="red",
+        )
+        return result
+
+    return exported_result
+
+
+def get_conversion_instructions(model: Type[BaseModel], llm: Any) -> str:
+    instructions = "I'm gonna convert this raw text into valid JSON."
+    if not is_gpt(llm):
+        model_schema = PydanticSchemaParser(model=model).get_schema()
+        instructions = f"{instructions}\n\nThe json should have the following structure, with the following keys:\n{model_schema}"
+    return instructions
+
+
+def is_gpt(llm: Any) -> bool:
+    from langchain_openai import ChatOpenAI
+
+    return isinstance(llm, ChatOpenAI) and llm.openai_api_base is None
+
+
+def create_converter(
+    agent: Optional[Any] = None,
+    converter_cls: Optional[Type[Converter]] = None,
+    *args,
+    **kwargs,
+) -> Converter:
+    if agent and not converter_cls:
+        if hasattr(agent, "get_output_converter"):
+            converter = agent.get_output_converter(*args, **kwargs)
+        else:
+            raise AttributeError("Agent does not have a 'get_output_converter' method")
+    elif converter_cls:
+        converter = converter_cls(*args, **kwargs)
+    else:
+        raise ValueError("Either agent or converter_cls must be provided")
+
+    if not converter:
+        raise Exception("No output converter found or set.")
+
+    return converter
--- a/src/crewai/utilities/crew_pydantic_output_parser.py
+++ b/src/crewai/utilities/crew_pydantic_output_parser.py
@@ -14,7 +14,7 @@ class CrewPydanticOutputParser(PydanticOutputParser):

    pydantic_object: Type[BaseModel]

-    def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
+    def parse_result(self, result: List[Generation]) -> Any:
        result[0].text = self._transform_in_valid_json(result[0].text)

        # Treating edge case of function calling llm returning the name instead of tool_name
--- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py
+++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
@@ -0,0 +1,149 @@
+from collections import defaultdict
+
+from langchain_openai import ChatOpenAI
+from pydantic import BaseModel, Field
+from rich.console import Console
+from rich.table import Table
+
+from crewai.agent import Agent
+from crewai.task import Task
+from crewai.tasks.task_output import TaskOutput
+
+
+class TaskEvaluationPydanticOutput(BaseModel):
+    quality: float = Field(
+        description="A score from 1 to 10 evaluating on completion, quality, and overall performance from the task_description and task_expected_output to the actual Task Output."
+    )
+
+
+class CrewEvaluator:
+    """
+    A class to evaluate the performance of the agents in the crew based on the tasks they have performed.
+
+    Attributes:
+        crew (Crew): The crew of agents to evaluate.
+        openai_model_name (str): The model to use for evaluating the performance of the agents (for now ONLY OpenAI accepted).
+        tasks_scores (defaultdict): A dictionary to store the scores of the agents for each task.
+        iteration (int): The current iteration of the evaluation.
+    """
+
+    tasks_scores: defaultdict = defaultdict(list)
+    iteration: int = 0
+
+    def __init__(self, crew, openai_model_name: str):
+        self.crew = crew
+        self.openai_model_name = openai_model_name
+        self._setup_for_evaluating()
+
+    def _setup_for_evaluating(self) -> None:
+        """Sets up the crew for evaluating."""
+        for task in self.crew.tasks:
+            task.callback = self.evaluate
+
+    def set_iteration(self, iteration: int) -> None:
+        self.iteration = iteration
+
+    def _evaluator_agent(self):
+        return Agent(
+            role="Task Execution Evaluator",
+            goal=(
+                "Your goal is to evaluate the performance of the agents in the crew based on the tasks they have performed using score from 1 to 10 evaluating on completion, quality, and overall performance."
+            ),
+            backstory="Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed",
+            verbose=False,
+            llm=ChatOpenAI(model=self.openai_model_name),
+        )
+
+    def _evaluation_task(
+        self, evaluator_agent: Agent, task_to_evaluate: Task, task_output: str
+    ) -> Task:
+        return Task(
+            description=(
+                "Based on the task description and the expected output, compare and evaluate the performance of the agents in the crew based on the Task Output they have performed using score from 1 to 10 evaluating on completion, quality, and overall performance."
+                f"task_description: {task_to_evaluate.description} "
+                f"task_expected_output: {task_to_evaluate.expected_output} "
+                f"agent: {task_to_evaluate.agent.role if task_to_evaluate.agent else None} "
+                f"agent_goal: {task_to_evaluate.agent.goal if task_to_evaluate.agent else None} "
+                f"Task Output: {task_output}"
+            ),
+            expected_output="Evaluation Score from 1 to 10 based on the performance of the agents on the tasks",
+            agent=evaluator_agent,
+            output_pydantic=TaskEvaluationPydanticOutput,
+        )
+
+    def print_crew_evaluation_result(self) -> None:
+        """
+        Prints the evaluation result of the crew in a table.
+        A Crew with 2 tasks using the command crewai test -n 2
+        will output the following table:
+
+                        Task Scores
+                    (1-10 Higher is better)
+            ┏━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┓
+            ┃ Tasks/Crew ┃ Run 1 ┃ Run 2 ┃ Avg. Total ┃
+            ┡━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━┩
+            │ Task 1     │ 10.0  │ 9.0   │ 9.5        │
+            │ Task 2     │ 9.0   │ 9.0   │ 9.0        │
+            │ Crew       │ 9.5   │ 9.0   │ 9.2        │
+            └────────────┴───────┴───────┴────────────┘
+        """
+        task_averages = [
+            sum(scores) / len(scores) for scores in zip(*self.tasks_scores.values())
+        ]
+        crew_average = sum(task_averages) / len(task_averages)
+
+        # Create a table
+        table = Table(title="Tasks Scores \n (1-10 Higher is better)")
+
+        # Add columns for the table
+        table.add_column("Tasks/Crew")
+        for run in range(1, len(self.tasks_scores) + 1):
+            table.add_column(f"Run {run}")
+        table.add_column("Avg. Total")
+
+        # Add rows for each task
+        for task_index in range(len(task_averages)):
+            task_scores = [
+                self.tasks_scores[run][task_index]
+                for run in range(1, len(self.tasks_scores) + 1)
+            ]
+            avg_score = task_averages[task_index]
+            table.add_row(
+                f"Task {task_index + 1}", *map(str, task_scores), f"{avg_score:.1f}"
+            )
+
+        # Add a row for the crew average
+        crew_scores = [
+            sum(self.tasks_scores[run]) / len(self.tasks_scores[run])
+            for run in range(1, len(self.tasks_scores) + 1)
+        ]
+        table.add_row("Crew", *map(str, crew_scores), f"{crew_average:.1f}")
+
+        # Display the table in the terminal
+        console = Console()
+        console.print(table)
+
+    def evaluate(self, task_output: TaskOutput):
+        """Evaluates the performance of the agents in the crew based on the tasks they have performed."""
+        current_task = None
+        for task in self.crew.tasks:
+            if task.description == task_output.description:
+                current_task = task
+                break
+
+        if not current_task or not task_output:
+            raise ValueError(
+                "Task to evaluate and task output are required for evaluation"
+            )
+
+        evaluator_agent = self._evaluator_agent()
+        evaluation_task = self._evaluation_task(
+            evaluator_agent, current_task, task_output.raw
+        )
+
+        evaluation_result = evaluation_task.execute_sync()
+
+        if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
+            self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
+        else:
+            raise ValueError("Evaluation result is not in the expected format")
--- a/src/crewai/utilities/planning_handler.py
+++ b/src/crewai/utilities/planning_handler.py
@@ -1,5 +1,6 @@
-from typing import List, Optional
+from typing import Any, List, Optional

+from langchain_openai import ChatOpenAI
 from pydantic import BaseModel

 from crewai.agent import Agent
@@ -11,17 +12,27 @@ class PlannerTaskPydanticOutput(BaseModel):


 class CrewPlanner:
-    def __init__(self, tasks: List[Task]):
+    def __init__(self, tasks: List[Task], planning_agent_llm: Optional[Any] = None):
        self.tasks = tasks

-    def _handle_crew_planning(self) -> Optional[BaseModel]:
+        if planning_agent_llm is None:
+            self.planning_agent_llm = ChatOpenAI(model="gpt-4o-mini")
+        else:
+            self.planning_agent_llm = planning_agent_llm
+
+    def _handle_crew_planning(self) -> PlannerTaskPydanticOutput:
        """Handles the Crew planning by creating detailed step-by-step plans for each task."""
        planning_agent = self._create_planning_agent()
        tasks_summary = self._create_tasks_summary()

        planner_task = self._create_planner_task(planning_agent, tasks_summary)

-        return planner_task.execute_sync().pydantic
+        result = planner_task.execute_sync()
+
+        if isinstance(result.pydantic, PlannerTaskPydanticOutput):
+            return result.pydantic
+
+        raise ValueError("Failed to get the Planning output")

    def _create_planning_agent(self) -> Agent:
        """Creates the planning agent for the crew planning."""
@@ -32,6 +43,7 @@ class CrewPlanner:
                "available to each agent so that they can perform the tasks in an exemplary manner"
            ),
            backstory="Planner agent for crew planning",
+            llm=self.planning_agent_llm,
        )

    def _create_planner_task(self, planning_agent: Agent, tasks_summary: str) -> Task:
--- a/src/crewai/utilities/pydantic_schema_parser.py
+++ b/src/crewai/utilities/pydantic_schema_parser.py
@@ -16,11 +16,13 @@ class PydanticSchemaParser(BaseModel):
        return self._get_model_schema(self.model)

    def _get_model_schema(self, model, depth=0) -> str:
-        lines = ["{"]
+        indent = "    " * depth
+        lines = [f"{indent}{{"]
        for field_name, field in model.model_fields.items():
            field_type_str = self._get_field_type(field, depth + 1)
-            lines.append(f"{' ' * 4 * (depth + 1)}{field_name}: {field_type_str}")
-        lines.append(f"{' ' * 4 * depth}")
+            lines.append(f"{indent}    {field_name}: {field_type_str},")
+        lines[-1] = lines[-1].rstrip(",")  # Remove trailing comma from last item
+        lines.append(f"{indent}}}")
        return "\n".join(lines)

    def _get_field_type(self, field, depth) -> str:
--- a/tests/cassettes/test_hierarchical_crew_creation_tasks_with_async_execution.yaml
+++ b/tests/cassettes/test_hierarchical_crew_creation_tasks_with_async_execution.yaml
--- a/tests/cassettes/test_hierarchical_crew_creation_tasks_with_sync_last.yaml
+++ b/tests/cassettes/test_hierarchical_crew_creation_tasks_with_sync_last.yaml
--- a/tests/crew_test.py
+++ b/tests/crew_test.py
@@ -8,6 +8,7 @@ from unittest.mock import MagicMock, patch

 import pydantic_core
 import pytest
+
 from crewai.agent import Agent
 from crewai.agents.cache import CacheHandler
 from crewai.crew import Crew
@@ -1355,28 +1356,66 @@ def test_hierarchical_crew_creation_tasks_with_agents():

@pytest.mark.vcr(filter_headers=["authorization"])
 def test_hierarchical_crew_creation_tasks_with_async_execution():
+    """
+    Agents are not required for tasks in a hierarchical process but sometimes they are still added
+    This test makes sure that the manager still delegates the task to the agent even if the agent is passed in the task
+    """
    from langchain_openai import ChatOpenAI

    task = Task(
-        description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
-        expected_output="5 bullet points with a paragraph for each idea.",
-        async_execution=True,  # should throw an error
+        description="Write one amazing paragraph about AI.",
+        expected_output="A single paragraph with 4 sentences.",
+        agent=writer,
+        async_execution=True,
    )

-    with pytest.raises(pydantic_core._pydantic_core.ValidationError) as exec_info:
-        Crew(
-            tasks=[task],
-            agents=[researcher],
-            process=Process.hierarchical,
-            manager_llm=ChatOpenAI(model="gpt-4o"),
-        )
-
-    assert (
-        exec_info.value.errors()[0]["type"] == "async_execution_in_hierarchical_process"
+    crew = Crew(
+        tasks=[task],
+        agents=[writer, researcher, ceo],
+        process=Process.hierarchical,
+        manager_llm=ChatOpenAI(model="gpt-4o"),
    )
-    assert (
-        "Hierarchical process error: Tasks cannot be flagged with async_execution."
-        in exec_info.value.errors()[0]["msg"]
+
+    crew.kickoff()
+    assert crew.manager_agent is not None
+    assert crew.manager_agent.tools is not None
+    assert crew.manager_agent.tools[0].description.startswith(
+        "Delegate a specific task to one of the following coworkers: Senior Writer\n"
+    )
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_hierarchical_crew_creation_tasks_with_sync_last():
+    """
+    Agents are not required for tasks in a hierarchical process but sometimes they are still added
+    This test makes sure that the manager still delegates the task to the agent even if the agent is passed in the task
+    """
+    from langchain_openai import ChatOpenAI
+
+    task = Task(
+        description="Write one amazing paragraph about AI.",
+        expected_output="A single paragraph with 4 sentences.",
+        agent=writer,
+        async_execution=True,
+    )
+    task2 = Task(
+        description="Write one amazing paragraph about AI.",
+        expected_output="A single paragraph with 4 sentences.",
+        async_execution=False,
+    )
+
+    crew = Crew(
+        tasks=[task, task2],
+        agents=[writer, researcher, ceo],
+        process=Process.hierarchical,
+        manager_llm=ChatOpenAI(model="gpt-4o"),
+    )
+
+    crew.kickoff()
+    assert crew.manager_agent is not None
+    assert crew.manager_agent.tools is not None
+    assert crew.manager_agent.tools[0].description.startswith(
+        "Delegate a specific task to one of the following coworkers: Senior Writer, Researcher, CEO\n"
    )


@@ -2499,3 +2538,34 @@ def test_conditional_should_execute():
        assert condition_mock.call_count == 1
        assert condition_mock() is True
        assert mock_execute_sync.call_count == 2
+
+
+@mock.patch("crewai.crew.CrewEvaluator")
+@mock.patch("crewai.crew.Crew.kickoff")
+def test_crew_testing_function(mock_kickoff, crew_evaluator):
+    task = Task(
+        description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
+        expected_output="5 bullet points with a paragraph for each idea.",
+        agent=researcher,
+    )
+
+    crew = Crew(
+        agents=[researcher],
+        tasks=[task],
+    )
+    n_iterations = 2
+    crew.test(n_iterations, openai_model_name="gpt-4o-mini", inputs={"topic": "AI"})
+
+    assert len(mock_kickoff.mock_calls) == n_iterations
+    mock_kickoff.assert_has_calls(
+        [mock.call(inputs={"topic": "AI"}), mock.call(inputs={"topic": "AI"})]
+    )
+
+    crew_evaluator.assert_has_calls(
+        [
+            mock.call(crew, "gpt-4o-mini"),
+            mock.call().set_iteration(1),
+            mock.call().set_iteration(2),
+            mock.call().print_crew_evaluation_result(),
+        ]
+    )
--- a/tests/task_test.py
+++ b/tests/task_test.py
@@ -5,13 +5,12 @@ import json
 from unittest.mock import MagicMock, patch

 import pytest
-from pydantic import BaseModel
-from pydantic_core import ValidationError
-
 from crewai import Agent, Crew, Process, Task
 from crewai.tasks.conditional_task import ConditionalTask
 from crewai.tasks.task_output import TaskOutput
 from crewai.utilities.converter import Converter
+from pydantic import BaseModel
+from pydantic_core import ValidationError


 def test_task_tool_reflect_agent_tools():
--- a/tests/utilities/evaluators/test_crew_evaluator_handler.py
+++ b/tests/utilities/evaluators/test_crew_evaluator_handler.py
@@ -0,0 +1,113 @@
+from unittest import mock
+
+import pytest
+
+from crewai.agent import Agent
+from crewai.crew import Crew
+from crewai.task import Task
+from crewai.tasks.task_output import TaskOutput
+from crewai.utilities.evaluators.crew_evaluator_handler import (
+    CrewEvaluator,
+    TaskEvaluationPydanticOutput,
+)
+
+
+class TestCrewEvaluator:
+    @pytest.fixture
+    def crew_planner(self):
+        agent = Agent(role="Agent 1", goal="Goal 1", backstory="Backstory 1")
+        task = Task(
+            description="Task 1",
+            expected_output="Output 1",
+            agent=agent,
+        )
+        crew = Crew(agents=[agent], tasks=[task])
+
+        return CrewEvaluator(crew, openai_model_name="gpt-4o-mini")
+
+    def test_setup_for_evaluating(self, crew_planner):
+        crew_planner._setup_for_evaluating()
+        assert crew_planner.crew.tasks[0].callback == crew_planner.evaluate
+
+    def test_set_iteration(self, crew_planner):
+        crew_planner.set_iteration(1)
+        assert crew_planner.iteration == 1
+
+    def test_evaluator_agent(self, crew_planner):
+        agent = crew_planner._evaluator_agent()
+        assert agent.role == "Task Execution Evaluator"
+        assert (
+            agent.goal
+            == "Your goal is to evaluate the performance of the agents in the crew based on the tasks they have performed using score from 1 to 10 evaluating on completion, quality, and overall performance."
+        )
+        assert (
+            agent.backstory
+            == "Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed"
+        )
+        assert agent.verbose is False
+        assert agent.llm.model_name == "gpt-4o-mini"
+
+    def test_evaluation_task(self, crew_planner):
+        evaluator_agent = Agent(
+            role="Evaluator Agent",
+            goal="Evaluate the performance of the agents in the crew",
+            backstory="Master in Evaluation",
+        )
+        task_to_evaluate = Task(
+            description="Task 1",
+            expected_output="Output 1",
+            agent=Agent(role="Agent 1", goal="Goal 1", backstory="Backstory 1"),
+        )
+        task_output = "Task Output 1"
+        task = crew_planner._evaluation_task(
+            evaluator_agent, task_to_evaluate, task_output
+        )
+
+        assert task.description.startswith(
+            "Based on the task description and the expected output, compare and evaluate the performance of the agents in the crew based on the Task Output they have performed using score from 1 to 10 evaluating on completion, quality, and overall performance."
+        )
+
+        assert task.agent == evaluator_agent
+        assert (
+            task.description
+            == "Based on the task description and the expected output, compare and evaluate "
+            "the performance of the agents in the crew based on the Task Output they have "
+            "performed using score from 1 to 10 evaluating on completion, quality, and overall "
+            "performance.task_description: Task 1 task_expected_output: Output 1 "
+            "agent: Agent 1 agent_goal: Goal 1 Task Output: Task Output 1"
+        )
+
+    @mock.patch("crewai.utilities.evaluators.crew_evaluator_handler.Console")
+    @mock.patch("crewai.utilities.evaluators.crew_evaluator_handler.Table")
+    def test_print_crew_evaluation_result(self, table, console, crew_planner):
+        crew_planner.tasks_scores = {
+            1: [10, 9, 8],
+            2: [9, 8, 7],
+        }
+
+        crew_planner.print_crew_evaluation_result()
+
+        table.assert_has_calls(
+            [
+                mock.call(title="Tasks Scores \n (1-10 Higher is better)"),
+                mock.call().add_column("Tasks/Crew"),
+                mock.call().add_column("Run 1"),
+                mock.call().add_column("Run 2"),
+                mock.call().add_column("Avg. Total"),
+                mock.call().add_row("Task 1", "10", "9", "9.5"),
+                mock.call().add_row("Task 2", "9", "8", "8.5"),
+                mock.call().add_row("Task 3", "8", "7", "7.5"),
+                mock.call().add_row("Crew", "9.0", "8.0", "8.5"),
+            ]
+        )
+        console.assert_has_calls([mock.call(), mock.call().print(table())])
+
+    def test_evaluate(self, crew_planner):
+        task_output = TaskOutput(
+            description="Task 1", agent=str(crew_planner.crew.agents[0])
+        )
+
+        with mock.patch.object(Task, "execute_sync") as execute:
+            execute().pydantic = TaskEvaluationPydanticOutput(quality=9.5)
+            crew_planner.evaluate(task_output)
+            assert crew_planner.tasks_scores[0] == [9.5]
--- a/tests/utilities/evaluators/test_task_evaluator.py
+++ b/tests/utilities/evaluators/test_task_evaluator.py
@@ -56,8 +56,7 @@ def test_evaluate_training_data(converter_mock):
                "based on the human feedback\n",
                model=TrainingTaskEvaluation,
                instructions="I'm gonna convert this raw text into valid JSON.\n\nThe json should have the "
-                "following structure, with the following keys:\n- suggestions: List[str]\n- "
-                "quality: float\n- final_summary: str",
+                "following structure, with the following keys:\n{\n    suggestions: List[str],\n    quality: float,\n    final_summary: str\n}",
            ),
            mock.call().to_pydantic(),
        ]
--- a/tests/utilities/test_converter.py
+++ b/tests/utilities/test_converter.py
@@ -0,0 +1,266 @@
+import json
+from unittest.mock import MagicMock, Mock, patch
+
+import pytest
+from crewai.utilities.converter import (
+    Converter,
+    ConverterError,
+    convert_to_model,
+    convert_with_instructions,
+    create_converter,
+    get_conversion_instructions,
+    handle_partial_json,
+    is_gpt,
+    validate_model,
+)
+from pydantic import BaseModel
+
+
+# Sample Pydantic models for testing
+class EmailResponse(BaseModel):
+    previous_message_content: str
+
+
+class EmailResponses(BaseModel):
+    responses: list[EmailResponse]
+
+
+class SimpleModel(BaseModel):
+    name: str
+    age: int
+
+
+class NestedModel(BaseModel):
+    id: int
+    data: SimpleModel
+
+
+# Fixtures
+@pytest.fixture
+def mock_agent():
+    agent = Mock()
+    agent.function_calling_llm = None
+    agent.llm = Mock()
+    return agent
+
+
+# Tests for convert_to_model
+def test_convert_to_model_with_valid_json():
+    result = '{"name": "John", "age": 30}'
+    output = convert_to_model(result, SimpleModel, None, None)
+    assert isinstance(output, SimpleModel)
+    assert output.name == "John"
+    assert output.age == 30
+
+
+def test_convert_to_model_with_invalid_json():
+    result = '{"name": "John", "age": "thirty"}'
+    with patch("crewai.utilities.converter.handle_partial_json") as mock_handle:
+        mock_handle.return_value = "Fallback result"
+        output = convert_to_model(result, SimpleModel, None, None)
+        assert output == "Fallback result"
+
+
+def test_convert_to_model_with_no_model():
+    result = "Plain text"
+    output = convert_to_model(result, None, None, None)
+    assert output == "Plain text"
+
+
+def test_convert_to_model_with_special_characters():
+    json_string_test = """
+    {
+        "responses": [
+            {
+                "previous_message_content": "Hi Tom,\r\n\r\nNiamh has chosen the Mika phonics on"
+            }
+        ]
+    }
+    """
+    output = convert_to_model(json_string_test, EmailResponses, None, None)
+    assert isinstance(output, EmailResponses)
+    assert len(output.responses) == 1
+    assert (
+        output.responses[0].previous_message_content
+        == "Hi Tom,\r\n\r\nNiamh has chosen the Mika phonics on"
+    )
+
+
+def test_convert_to_model_with_escaped_special_characters():
+    json_string_test = json.dumps(
+        {
+            "responses": [
+                {
+                    "previous_message_content": "Hi Tom,\r\n\r\nNiamh has chosen the Mika phonics on"
+                }
+            ]
+        }
+    )
+    output = convert_to_model(json_string_test, EmailResponses, None, None)
+    assert isinstance(output, EmailResponses)
+    assert len(output.responses) == 1
+    assert (
+        output.responses[0].previous_message_content
+        == "Hi Tom,\r\n\r\nNiamh has chosen the Mika phonics on"
+    )
+
+
+def test_convert_to_model_with_multiple_special_characters():
+    json_string_test = """
+    {
+        "responses": [
+            {
+                "previous_message_content": "Line 1\r\nLine 2\tTabbed\nLine 3\r\n\rEscaped newline"
+            }
+        ]
+    }
+    """
+    output = convert_to_model(json_string_test, EmailResponses, None, None)
+    assert isinstance(output, EmailResponses)
+    assert len(output.responses) == 1
+    assert (
+        output.responses[0].previous_message_content
+        == "Line 1\r\nLine 2\tTabbed\nLine 3\r\n\rEscaped newline"
+    )
+
+
+# Tests for validate_model
+def test_validate_model_pydantic_output():
+    result = '{"name": "Alice", "age": 25}'
+    output = validate_model(result, SimpleModel, False)
+    assert isinstance(output, SimpleModel)
+    assert output.name == "Alice"
+    assert output.age == 25
+
+
+def test_validate_model_json_output():
+    result = '{"name": "Bob", "age": 40}'
+    output = validate_model(result, SimpleModel, True)
+    assert isinstance(output, dict)
+    assert output == {"name": "Bob", "age": 40}
+
+
+# Tests for handle_partial_json
+def test_handle_partial_json_with_valid_partial():
+    result = 'Some text {"name": "Charlie", "age": 35} more text'
+    output = handle_partial_json(result, SimpleModel, False, None)
+    assert isinstance(output, SimpleModel)
+    assert output.name == "Charlie"
+    assert output.age == 35
+
+
+def test_handle_partial_json_with_invalid_partial(mock_agent):
+    result = "No valid JSON here"
+    with patch("crewai.utilities.converter.convert_with_instructions") as mock_convert:
+        mock_convert.return_value = "Converted result"
+        output = handle_partial_json(result, SimpleModel, False, mock_agent)
+        assert output == "Converted result"
+
+
+# Tests for convert_with_instructions
+@patch("crewai.utilities.converter.create_converter")
+@patch("crewai.utilities.converter.get_conversion_instructions")
+def test_convert_with_instructions_success(
+    mock_get_instructions, mock_create_converter, mock_agent
+):
+    mock_get_instructions.return_value = "Instructions"
+    mock_converter = Mock()
+    mock_converter.to_pydantic.return_value = SimpleModel(name="David", age=50)
+    mock_create_converter.return_value = mock_converter
+
+    result = "Some text to convert"
+    output = convert_with_instructions(result, SimpleModel, False, mock_agent)
+
+    assert isinstance(output, SimpleModel)
+    assert output.name == "David"
+    assert output.age == 50
+
+
+@patch("crewai.utilities.converter.create_converter")
+@patch("crewai.utilities.converter.get_conversion_instructions")
+def test_convert_with_instructions_failure(
+    mock_get_instructions, mock_create_converter, mock_agent
+):
+    mock_get_instructions.return_value = "Instructions"
+    mock_converter = Mock()
+    mock_converter.to_pydantic.return_value = ConverterError("Conversion failed")
+    mock_create_converter.return_value = mock_converter
+
+    result = "Some text to convert"
+    with patch("crewai.utilities.converter.Printer") as mock_printer:
+        output = convert_with_instructions(result, SimpleModel, False, mock_agent)
+        assert output == result
+        mock_printer.return_value.print.assert_called_once()
+
+
+# Tests for get_conversion_instructions
+def test_get_conversion_instructions_gpt():
+    mock_llm = Mock()
+    mock_llm.openai_api_base = None
+    with patch("crewai.utilities.converter.is_gpt", return_value=True):
+        instructions = get_conversion_instructions(SimpleModel, mock_llm)
+        assert instructions == "I'm gonna convert this raw text into valid JSON."
+
+
+def test_get_conversion_instructions_non_gpt():
+    mock_llm = Mock()
+    with patch("crewai.utilities.converter.is_gpt", return_value=False):
+        with patch("crewai.utilities.converter.PydanticSchemaParser") as mock_parser:
+            mock_parser.return_value.get_schema.return_value = "Sample schema"
+            instructions = get_conversion_instructions(SimpleModel, mock_llm)
+            assert "Sample schema" in instructions
+
+
+# Tests for is_gpt
+def test_is_gpt_true():
+    from langchain_openai import ChatOpenAI
+
+    mock_llm = Mock(spec=ChatOpenAI)
+    mock_llm.openai_api_base = None
+    assert is_gpt(mock_llm) is True
+
+
+def test_is_gpt_false():
+    mock_llm = Mock()
+    assert is_gpt(mock_llm) is False
+
+
+class CustomConverter(Converter):
+    pass
+
+
+def test_create_converter_with_mock_agent():
+    mock_agent = MagicMock()
+    mock_agent.get_output_converter.return_value = MagicMock(spec=Converter)
+
+    converter = create_converter(
+        agent=mock_agent,
+        llm=Mock(),
+        text="Sample",
+        model=SimpleModel,
+        instructions="Convert",
+    )
+
+    assert isinstance(converter, Converter)
+    mock_agent.get_output_converter.assert_called_once()
+
+
+def test_create_converter_with_custom_converter():
+    converter = create_converter(
+        converter_cls=CustomConverter,
+        llm=Mock(),
+        text="Sample",
+        model=SimpleModel,
+        instructions="Convert",
+    )
+
+    assert isinstance(converter, CustomConverter)
+
+
+def test_create_converter_fails_without_agent_or_converter_cls():
+    with pytest.raises(
+        ValueError, match="Either agent or converter_cls must be provided"
+    ):
+        create_converter(
+            llm=Mock(), text="Sample", model=SimpleModel, instructions="Convert"
+        )
--- a/tests/utilities/test_planning_handler.py
+++ b/tests/utilities/test_planning_handler.py
@@ -1,10 +1,11 @@
 from unittest.mock import patch
-from crewai.tasks.task_output import TaskOutput

 import pytest
+from langchain_openai import ChatOpenAI

 from crewai.agent import Agent
 from crewai.task import Task
+from crewai.tasks.task_output import TaskOutput
 from crewai.utilities.planning_handler import CrewPlanner, PlannerTaskPydanticOutput


@@ -28,7 +29,19 @@ class TestCrewPlanner:
                agent=Agent(role="Agent 3", goal="Goal 3", backstory="Backstory 3"),
            ),
        ]
-        return CrewPlanner(tasks)
+        return CrewPlanner(tasks, None)
+
+    @pytest.fixture
+    def crew_planner_different_llm(self):
+        tasks = [
+            Task(
+                description="Task 1",
+                expected_output="Output 1",
+                agent=Agent(role="Agent 1", goal="Goal 1", backstory="Backstory 1"),
+            )
+        ]
+        planning_agent_llm = ChatOpenAI(model="gpt-3.5-turbo")
+        return CrewPlanner(tasks, planning_agent_llm)

    def test_handle_crew_planning(self, crew_planner):
        with patch.object(Task, "execute_sync") as execute:
@@ -40,7 +53,7 @@ class TestCrewPlanner:
                ),
            )
            result = crew_planner._handle_crew_planning()
-
+            assert crew_planner.planning_agent_llm.model_name == "gpt-4o-mini"
            assert isinstance(result, PlannerTaskPydanticOutput)
            assert len(result.list_of_plans_per_task) == len(crew_planner.tasks)
            execute.assert_called_once()
@@ -72,3 +85,22 @@ class TestCrewPlanner:
        assert isinstance(tasks_summary, str)
        assert tasks_summary.startswith("\n                Task Number 1 - Task 1")
        assert tasks_summary.endswith('"agent_tools": []\n                ')
+
+    def test_handle_crew_planning_different_llm(self, crew_planner_different_llm):
+        with patch.object(Task, "execute_sync") as execute:
+            execute.return_value = TaskOutput(
+                description="Description",
+                agent="agent",
+                pydantic=PlannerTaskPydanticOutput(list_of_plans_per_task=["Plan 1"]),
+            )
+            result = crew_planner_different_llm._handle_crew_planning()
+
+            assert (
+                crew_planner_different_llm.planning_agent_llm.model_name
+                == "gpt-3.5-turbo"
+            )
+            assert isinstance(result, PlannerTaskPydanticOutput)
+            assert len(result.list_of_plans_per_task) == len(
+                crew_planner_different_llm.tasks
+            )
+            execute.assert_called_once()