Feature/kickoff consistent output (#847)

* Cleaned up task execution to now have separate paths for async and sync execution. Updating all kickoff functions to return CrewOutput. WIP. Waiting for Joao feedback on async task execution with task_output * Consistently storing async and sync output for context * outline tests I need to create going forward * Major rehaul of TaskOutput and CrewOutput. Updated all tests to work with new change. Need to add in a few final tricky async tests and add a few more to verify output types on TaskOutput and CrewOutput. * Encountering issues with callback. Need to test on main. WIP * working on tests. WIP * WIP. Figuring out disconnect issue. * Cleaned up logs now that I've isolated the issue to the LLM * more wip. * WIP. It looks like usage metrics has always been broken for async * Update parent crew who is managing for_each loop * Merge in main to bugfix/kickoff-for-each-usage-metrics * Clean up code for review * Add new tests * Final cleanup. Ready for review. * Moving copy functionality from Agent to BaseAgent * Fix renaming issue * Fix linting errors * use BaseAgent instead of Agent where applicable * Fixing missing function. Working on tests. * WIP. Needing team to review change * Fixing issues brought about by merge * WIP * Implement major fixes from yesterdays group conversation. Now working on tests. * The majority of tasks are working now. Need to fix converter class * Fix final failing test * Fix linting and type-checker issues * Add more tests to fully test CrewOutput and TaskOutput changes * Add in validation for async cannot depend on other async tasks. * Update validators and tests
2026-01-10 00:28:31 +00:00 · 2024-07-10 23:35:02 -04:00
parent ca22a91b8a
commit 2fcaf68312
30 changed files with 260509 additions and 697 deletions
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -1,6 +1,7 @@
 import asyncio
 import json
 import uuid
+from concurrent.futures import Future
 from typing import Any, Dict, List, Optional, Tuple, Union

 from langchain_core.callbacks import BaseCallbackHandler
@@ -20,16 +21,22 @@ from pydantic_core import PydanticCustomError
 from crewai.agent import Agent
 from crewai.agents.agent_builder.base_agent import BaseAgent
 from crewai.agents.cache import CacheHandler
+from crewai.crews.crew_output import CrewOutput
 from crewai.memory.entity.entity_memory import EntityMemory
 from crewai.memory.long_term.long_term_memory import LongTermMemory
 from crewai.memory.short_term.short_term_memory import ShortTermMemory
 from crewai.process import Process
 from crewai.task import Task
+from crewai.tasks.task_output import TaskOutput
 from crewai.telemetry import Telemetry
 from crewai.tools.agent_tools import AgentTools
 from crewai.utilities import I18N, FileHandler, Logger, RPMController
 from crewai.utilities.constants import TRAINED_AGENTS_DATA_FILE, TRAINING_DATA_FILE
 from crewai.utilities.evaluators.task_evaluator import TaskEvaluator
+from crewai.utilities.formatter import (
+    aggregate_raw_outputs_from_task_outputs,
+    aggregate_raw_outputs_from_tasks,
+)
 from crewai.utilities.training_handler import CrewTrainingHandler

 try:
@@ -57,7 +64,6 @@ class Crew(BaseModel):
        max_rpm: Maximum number of requests per minute for the crew execution to be respected.
        prompt_file: Path to the prompt json file to be used for the crew.
        id: A unique identifier for the crew instance.
-        full_output: Whether the crew should return the full output with all tasks outputs and token usage metrics or just the final output.
        task_callback: Callback to be executed after each task for every agents execution.
        step_callback: Callback to be executed after each step for every agents execution.
        share_crew: Whether you want to share the complete crew information and execution with crewAI to make the library better, and allow us to train models.
@@ -93,10 +99,6 @@ class Crew(BaseModel):
        default=None,
        description="Metrics for the LLM usage during all tasks execution.",
    )
-    full_output: Optional[bool] = Field(
-        default=False,
-        description="Whether the crew should return the full output with all tasks outputs and token usage metrics or just the final output.",
-    )
    manager_llm: Optional[Any] = Field(
        description="Language model that will run the agent.", default=None
    )
@@ -252,6 +254,63 @@ class Crew(BaseModel):

        return self

+    @model_validator(mode="after")
+    def validate_end_with_at_most_one_async_task(self):
+        """Validates that the crew ends with at most one asynchronous task."""
+        final_async_task_count = 0
+
+        # Traverse tasks backward
+        for task in reversed(self.tasks):
+            if task.async_execution:
+                final_async_task_count += 1
+            else:
+                break  # Stop traversing as soon as a non-async task is encountered
+
+        if final_async_task_count > 1:
+            raise PydanticCustomError(
+                "async_task_count",
+                "The crew must end with at most one asynchronous task.",
+                {},
+            )
+
+        return self
+
+    @model_validator(mode="after")
+    def validate_async_task_cannot_include_sequential_async_tasks_in_context(self):
+        """
+        Validates that if a task is set to be executed asynchronously,
+        it cannot include other asynchronous tasks in its context unless
+        separated by a synchronous task.
+        """
+        for i, task in enumerate(self.tasks):
+            if task.async_execution and task.context:
+                for context_task in task.context:
+                    if context_task.async_execution:
+                        for j in range(i - 1, -1, -1):
+                            if self.tasks[j] == context_task:
+                                raise ValueError(
+                                    f"Task '{task.description}' is asynchronous and cannot include other sequential asynchronous tasks in its context."
+                                )
+                            if not self.tasks[j].async_execution:
+                                break
+        return self
+
+    @model_validator(mode="after")
+    def validate_context_no_future_tasks(self):
+        """Validates that a task's context does not include future tasks."""
+        task_indices = {id(task): i for i, task in enumerate(self.tasks)}
+
+        for task in self.tasks:
+            if task.context:
+                for context_task in task.context:
+                    if id(context_task) not in task_indices:
+                        continue  # Skip context tasks not in the main tasks list
+                    if task_indices[id(context_task)] > task_indices[id(task)]:
+                        raise ValueError(
+                            f"Task '{task.description}' has a context dependency on a future task '{context_task.description}', which is not allowed."
+                        )
+        return self
+
    def _setup_from_config(self):
        assert self.config is not None, "Config should not be None."

@@ -314,12 +373,12 @@ class Crew(BaseModel):

    def kickoff(
        self,
-        inputs: Optional[Dict[str, Any]] = {},
-    ) -> Union[str, Dict[str, Any]]:
+        inputs: Optional[Dict[str, Any]] = None,
+    ) -> CrewOutput:
        """Starts the crew to work on its assigned tasks."""
        self._execution_span = self._telemetry.crew_execution_span(self, inputs)
-
-        self._interpolate_inputs(inputs)  # type: ignore # Argument 1 to "_interpolate_inputs" of "Crew" has incompatible type "dict[str, Any] | None"; expected "dict[str, Any]"
+        if inputs is not None:
+            self._interpolate_inputs(inputs)
        self._set_tasks_callbacks()

        i18n = I18N(prompt_file=self.prompt_file)
@@ -345,8 +404,7 @@ class Crew(BaseModel):
        if self.process == Process.sequential:
            result = self._run_sequential_process()
        elif self.process == Process.hierarchical:
-            result, manager_metrics = self._run_hierarchical_process()  # type: ignore # Incompatible types in assignment (expression has type "str | dict[str, Any]", variable has type "str")
-            metrics.append(manager_metrics)
+            result = self._run_hierarchical_process()  # type: ignore # Incompatible types in assignment (expression has type "str | dict[str, Any]", variable has type "str")
        else:
            raise NotImplementedError(
                f"The process '{self.process}' is not implemented yet."
@@ -359,11 +417,9 @@ class Crew(BaseModel):

        return result

-    def kickoff_for_each(
-        self, inputs: List[Dict[str, Any]]
-    ) -> List[Union[str, Dict[str, Any]]]:
+    def kickoff_for_each(self, inputs: List[Dict[str, Any]]) -> List[CrewOutput]:
        """Executes the Crew's workflow for each input in the list and aggregates results."""
-        results = []
+        results: List[CrewOutput] = []

        # Initialize the parent crew's usage metrics
        total_usage_metrics = {
@@ -387,13 +443,11 @@ class Crew(BaseModel):
        self.usage_metrics = total_usage_metrics
        return results

-    async def kickoff_async(
-        self, inputs: Optional[Dict[str, Any]] = {}
-    ) -> Union[str, Dict]:
+    async def kickoff_async(self, inputs: Optional[Dict[str, Any]] = {}) -> CrewOutput:
        """Asynchronous kickoff method to start the crew execution."""
        return await asyncio.to_thread(self.kickoff, inputs)

-    async def kickoff_for_each_async(self, inputs: List[Dict]) -> List[Any]:
+    async def kickoff_for_each_async(self, inputs: List[Dict]) -> List[CrewOutput]:
        crew_copies = [self.copy() for _ in inputs]

        async def run_crew(crew, input_data):
@@ -403,6 +457,10 @@ class Crew(BaseModel):
            asyncio.create_task(run_crew(crew_copies[i], inputs[i]))
            for i in range(len(inputs))
        ]
+        tasks = [
+            asyncio.create_task(run_crew(crew_copies[i], inputs[i]))
+            for i in range(len(inputs))
+        ]

        results = await asyncio.gather(*tasks)

@@ -419,11 +477,25 @@ class Crew(BaseModel):

        self.usage_metrics = total_usage_metrics

+        total_usage_metrics = {
+            "total_tokens": 0,
+            "prompt_tokens": 0,
+            "completion_tokens": 0,
+            "successful_requests": 0,
+        }
+        for crew in crew_copies:
+            if crew.usage_metrics:
+                for key in total_usage_metrics:
+                    total_usage_metrics[key] += crew.usage_metrics.get(key, 0)
+
+        self.usage_metrics = total_usage_metrics
+
        return results

-    def _run_sequential_process(self) -> Union[str, Dict[str, Any]]:
+    def _run_sequential_process(self) -> CrewOutput:
        """Executes tasks sequentially and returns the final output."""
-        task_output = None
+        task_outputs: List[TaskOutput] = []
+        futures: List[Tuple[Task, Future[TaskOutput]]] = []

        for task in self.tasks:
            if task.agent and task.agent.allow_delegation:
@@ -443,28 +515,80 @@ class Crew(BaseModel):
                self._file_handler.log(
                    agent=role, task=task.description, status="started"
                )
-            output = task.execute(context=task_output)

-            if not task.async_execution:
-                task_output = output
+            if task.async_execution:
+                context = (
+                    aggregate_raw_outputs_from_tasks(task.context)
+                    if task.context
+                    else aggregate_raw_outputs_from_task_outputs(task_outputs)
+                )
+                future = task.execute_async(
+                    agent=task.agent, context=context, tools=task.tools
+                )
+                futures.append((task, future))
+            else:
+                # Before executing a synchronous task, wait for all async tasks to complete
+                if futures:
+                    # Clear task_outputs before processing async tasks
+                    task_outputs = []
+                    for future_task, future in futures:
+                        task_output = future.result()
+                        task_outputs.append(task_output)
+                        self._process_task_result(future_task, task_output)

-            role = task.agent.role if task.agent is not None else "None"
-            self._logger.log("debug", f"== [{role}] Task output: {task_output}\n\n")
+                    # Clear the futures list after processing all async results
+                    futures.clear()

-            if self.output_log_file:
-                self._file_handler.log(agent=role, task=task_output, status="completed")
+                context = (
+                    aggregate_raw_outputs_from_tasks(task.context)
+                    if task.context
+                    else aggregate_raw_outputs_from_task_outputs(task_outputs)
+                )
+                task_output = task.execute_sync(
+                    agent=task.agent, context=context, tools=task.tools
+                )
+                task_outputs = [task_output]
+                self._process_task_result(task, task_output)

-        self._finish_execution(task_output)
+        if futures:
+            # Clear task_outputs before processing async tasks
+            task_outputs = []
+            for future_task, future in futures:
+                task_output = future.result()
+                task_outputs.append(task_output)
+                self._process_task_result(future_task, task_output)
+
+        # Important: There should only be one task output in the list
+        #            If there are more or 0, something went wrong.
+        if len(task_outputs) != 1:
+            raise ValueError(
+                "Something went wrong. Kickoff should return only one task output."
+            )
+
+        final_task_output = task_outputs[0]
+
+        final_string_output = final_task_output.raw
+        self._finish_execution(final_string_output)

        token_usage = self.calculate_usage_metrics()

-        return self._format_output(task_output if task_output else "", token_usage)
+        return CrewOutput(
+            raw=final_task_output.raw,
+            pydantic=final_task_output.pydantic,
+            json_dict=final_task_output.json_dict,
+            tasks_output=[task.output for task in self.tasks if task.output],
+            token_usage=token_usage,
+        )

-    def _run_hierarchical_process(
-        self,
-    ) -> Tuple[Union[str, Dict[str, Any]], Dict[str, Any]]:
+    def _process_task_result(self, task: Task, output: TaskOutput) -> None:
+        role = task.agent.role if task.agent is not None else "None"
+        self._logger.log("debug", f"== [{role}] Task output: {output}\n\n")
+        if self.output_log_file:
+            self._file_handler.log(agent=role, task=output, status="completed")
+
+    # TODO: @joao, Breaking change. Changed return type. Usage metrics is included in crewoutput
+    def _run_hierarchical_process(self) -> CrewOutput:
        """Creates and assigns a manager agent to make sure the crew completes the tasks."""
-
        i18n = I18N(prompt_file=self.prompt_file)
        if self.manager_agent is not None:
            self.manager_agent.allow_delegation = True
@@ -483,8 +607,10 @@ class Crew(BaseModel):
            )
            self.manager_agent = manager

-        task_output = None
+        task_outputs: List[TaskOutput] = []
+        futures: List[Tuple[Task, Future[TaskOutput]]] = []

+        # TODO: IF USER OVERRIDE THE CONTEXT, PASS THAT
        for task in self.tasks:
            self._logger.log("debug", f"Working Agent: {manager.role}")
            self._logger.log("info", f"Starting Task: {task.description}")
@@ -494,27 +620,70 @@ class Crew(BaseModel):
                    agent=manager.role, task=task.description, status="started"
                )

-            if task.agent:
-                manager.tools = task.agent.get_delegation_tools([task.agent])
+            if task.async_execution:
+                context = (
+                    aggregate_raw_outputs_from_tasks(task.context)
+                    if task.context
+                    else aggregate_raw_outputs_from_task_outputs(task_outputs)
+                )
+                future = task.execute_async(
+                    agent=manager, context=context, tools=manager.tools
+                )
+                futures.append((task, future))
            else:
-                manager.tools = manager.get_delegation_tools(self.agents)
-            task_output = task.execute(
-                agent=manager, context=task_output, tools=manager.tools
+                # Before executing a synchronous task, wait for all async tasks to complete
+                if futures:
+                    # Clear task_outputs before processing async tasks
+                    task_outputs = []
+                    for future_task, future in futures:
+                        task_output = future.result()
+                        task_outputs.append(task_output)
+                        self._process_task_result(future_task, task_output)
+
+                    # Clear the futures list after processing all async results
+                    futures.clear()
+
+                context = (
+                    aggregate_raw_outputs_from_tasks(task.context)
+                    if task.context
+                    else aggregate_raw_outputs_from_task_outputs(task_outputs)
+                )
+                task_output = task.execute_sync(
+                    agent=manager, context=context, tools=manager.tools
+                )
+                task_outputs = [task_output]
+                self._process_task_result(task, task_output)
+
+        # Process any remaining async results
+        if futures:
+            # Clear task_outputs before processing async tasks
+            task_outputs = []
+            for future_task, future in futures:
+                task_output = future.result()
+                task_outputs.append(task_output)
+                self._process_task_result(future_task, task_output)
+
+        # Important: There should only be one task output in the list
+        #            If there are more or 0, something went wrong.
+        if len(task_outputs) != 1:
+            raise ValueError(
+                "Something went wrong. Kickoff should return only one task output."
            )

-            self._logger.log("debug", f"[{manager.role}] Task output: {task_output}")
-            if self.output_log_file:
-                self._file_handler.log(
-                    agent=manager.role, task=task_output, status="completed"
-                )
+        final_task_output = task_outputs[0]

-        self._finish_execution(task_output)
+        final_string_output = final_task_output.raw
+        self._finish_execution(final_string_output)

        token_usage = self.calculate_usage_metrics()

-        return self._format_output(
-            task_output if task_output else "", token_usage
-        ), token_usage
+        return CrewOutput(
+            raw=final_task_output.raw,
+            pydantic=final_task_output.pydantic,
+            json_dict=final_task_output.json_dict,
+            tasks_output=[task.output for task in self.tasks if task.output],
+            token_usage=token_usage,
+        )

    def copy(self):
        """Create a deep copy of the Crew."""
@@ -566,31 +735,15 @@ class Crew(BaseModel):
        for agent in self.agents:
            agent.interpolate_inputs(inputs)

-    def _format_output(
-        self, output: str, token_usage: Optional[Dict[str, Any]] = None
-    ) -> Union[str, Dict[str, Any]]:
-        """
-        Formats the output of the crew execution.
-        If full_output is True, then returned data type will be a dictionary else returned outputs are string
-        """
-
-        if self.full_output:
-            return {
-                "final_output": output,
-                "tasks_outputs": [task.output for task in self.tasks if task],
-                "usage_metrics": token_usage,
-            }
-        else:
-            return output
-
-    def _finish_execution(self, output) -> None:
+    def _finish_execution(self, final_string_output: str) -> None:
        if self.max_rpm:
            self._rpm_controller.stop_rpm_counter()
        if agentops:
            agentops.end_session(
-                end_state="Success", end_state_reason="Finished Execution"
+                end_state="Success",
+                end_state_reason="Finished Execution",
            )
-        self._telemetry.end_crew(self, output)
+        self._telemetry.end_crew(self, final_string_output)

    def calculate_usage_metrics(self) -> Dict[str, int]:
        """Calculates and returns the usage metrics."""
--- a/src/crewai/crews/init.py
+++ b/src/crewai/crews/init.py
@@ -0,0 +1 @@
+from .crew_output import CrewOutput
--- a/src/crewai/crews/crew_output.py
+++ b/src/crewai/crews/crew_output.py
@@ -0,0 +1,60 @@
+import json
+from typing import Any, Dict, Optional
+
+from pydantic import BaseModel, Field
+
+from crewai.tasks.output_format import OutputFormat
+from crewai.tasks.task_output import TaskOutput
+
+
+class CrewOutput(BaseModel):
+    """Class that represents the result of a crew."""
+
+    raw: str = Field(description="Raw output of crew", default="")
+    pydantic: Optional[BaseModel] = Field(
+        description="Pydantic output of Crew", default=None
+    )
+    json_dict: Optional[Dict[str, Any]] = Field(
+        description="JSON dict output of Crew", default=None
+    )
+    tasks_output: list[TaskOutput] = Field(
+        description="Output of each task", default=[]
+    )
+    token_usage: Dict[str, Any] = Field(
+        description="Processed token summary", default={}
+    )
+
+    # TODO: Joao - Adding this safety check breakes when people want to see
+    #                   The full output of a CrewOutput.
+    # @property
+    # def pydantic(self) -> Optional[BaseModel]:
+    #     # Check if the final task output included a pydantic model
+    #     if self.tasks_output[-1].output_format != OutputFormat.PYDANTIC:
+    #         raise ValueError(
+    #             "No pydantic model found in the final task. Please make sure to set the output_pydantic property in the final task in your crew."
+    #         )
+
+    #     return self._pydantic
+
+    @property
+    def json(self) -> Optional[str]:
+        if self.tasks_output[-1].output_format != OutputFormat.JSON:
+            raise ValueError(
+                "No JSON output found in the final task. Please make sure to set the output_json property in the final task in your crew."
+            )
+
+        return json.dumps(self.json_dict)
+
+    def to_dict(self) -> Dict[str, Any]:
+        if self.json_dict:
+            return self.json_dict
+        if self.pydantic:
+            return self.pydantic.model_dump()
+        raise ValueError("No output to convert to dictionary")
+
+    def __str__(self):
+        if self.pydantic:
+            return str(self.pydantic)
+        if self.json_dict:
+            return str(self.json_dict)
+        return self.raw
--- a/src/crewai/task.py
+++ b/src/crewai/task.py
@@ -1,9 +1,11 @@
+import json
 import os
 import re
+import threading
 import uuid
-from concurrent.futures import Future, ThreadPoolExecutor
+from concurrent.futures import Future
 from copy import copy
-from typing import Any, Dict, List, Optional, Type, Union
+from typing import Any, Dict, List, Optional, Tuple, Type, Union

 from langchain_openai import ChatOpenAI
 from opentelemetry.trace import Span
@@ -11,6 +13,7 @@ from pydantic import UUID4, BaseModel, Field, field_validator, model_validator
 from pydantic_core import PydanticCustomError

 from crewai.agents.agent_builder.base_agent import BaseAgent
+from crewai.tasks.output_format import OutputFormat
 from crewai.tasks.task_output import TaskOutput
 from crewai.telemetry.telemetry import Telemetry
 from crewai.utilities.converter import Converter, ConverterError
@@ -106,7 +109,7 @@ class Task(BaseModel):
    _execution_span: Span | None = None
    _original_description: str | None = None
    _original_expected_output: str | None = None
-    _future: Future | None = None
+    _thread: threading.Thread | None = None

    def __init__(__pydantic_self__, **data):
        config = data.pop("config", {})
@@ -161,80 +164,74 @@ class Task(BaseModel):
            )
        return self

-    def wait_for_completion(self) -> str | BaseModel:
-        """Wait for asynchronous task completion and return the output."""
-        assert self.async_execution, "Task is not set to be executed asynchronously."
+    def execute_sync(
+        self,
+        agent: Optional[BaseAgent] = None,
+        context: Optional[str] = None,
+        tools: Optional[List[Any]] = None,
+    ) -> TaskOutput:
+        """Execute the task synchronously."""
+        return self._execute_core(agent, context, tools)

-        if self._future:
-            self._future.result()  # Wait for the future to complete
-            self._future = None
-
-        assert self.output, "Task output is not set."
-
-        return self.output.exported_output
-
-    def execute(
+    def execute_async(
        self,
        agent: BaseAgent | None = None,
        context: Optional[str] = None,
        tools: Optional[List[Any]] = None,
-    ) -> str | None:
-        """Execute the task.
+    ) -> Future[TaskOutput]:
+        """Execute the task asynchronously."""
+        future: Future[TaskOutput] = Future()
+        threading.Thread(
+            target=self._execute_task_async, args=(agent, context, tools, future)
+        ).start()
+        return future

-        Returns:
-            Output of the task.
-        """
+    def _execute_task_async(
+        self,
+        agent: Optional[BaseAgent],
+        context: Optional[str],
+        tools: Optional[List[Any]],
+        future: Future[TaskOutput],
+    ) -> None:
+        """Execute the task asynchronously with context handling."""
+        result = self._execute_core(agent, context, tools)
+        future.set_result(result)

+    def _execute_core(
+        self,
+        agent: Optional[BaseAgent],
+        context: Optional[str],
+        tools: Optional[List[Any]],
+    ) -> TaskOutput:
+        """Run the core execution logic of the task."""
        agent = agent or self.agent
        if not agent:
            raise Exception(
-                f"The task '{self.description}' has no agent assigned, therefore it can't be executed directly "
-                "and should be executed in a Crew using a specific process that support that, like hierarchical."
+                f"The task '{self.description}' has no agent assigned, therefore it can't be executed directly and should be executed in a Crew using a specific process that support that, like hierarchical."
            )

        self._execution_span = self._telemetry.task_started(crew=agent.crew, task=self)

-        if self.context:
-            internal_context = []
-            for task in self.context:
-                if task.async_execution:
-                    task.wait_for_completion()
-                if task.output:
-                    internal_context.append(task.output.raw_output)
-            context = "\n".join(internal_context)
-
        self.prompt_context = context
-        tools = tools or self.tools
+        tools = tools or self.tools or []

-        if self.async_execution:
-            with ThreadPoolExecutor() as executor:
-                self._future = executor.submit(
-                    self._execute, agent, self, context, tools
-                )
-            return None
-        else:
-            result = self._execute(
-                task=self,
-                agent=agent,
-                context=context,
-                tools=tools,
-            )
-            return result
-
-    def _execute(self, agent: "BaseAgent", task, context, tools) -> str | None:
        result = agent.execute_task(
-            task=task,
+            task=self,
            context=context,
            tools=tools,
        )
-        exported_output = self._export_output(result)

-        self.output = TaskOutput(
+        pydantic_output, json_output = self._export_output(result)
+
+        task_output = TaskOutput(
            description=self.description,
-            exported_output=exported_output,
-            raw_output=result,
+            raw=result,
+            pydantic=pydantic_output,
+            json_dict=json_output,
            agent=agent.role,
+            output_format=self._get_output_format(),
        )
+        self.output = task_output

        if self.callback:
            self.callback(self.output)
@@ -243,7 +240,15 @@ class Task(BaseModel):
            self._telemetry.task_ended(self._execution_span, self)
            self._execution_span = None

-        return exported_output
+        if self.output_file:
+            content = (
+                json_output
+                if json_output
+                else pydantic_output.model_dump_json() if pydantic_output else result
+            )
+            self._save_file(content)
+
+        return task_output

    def prompt(self) -> str:
        """Prompt the task.
@@ -278,7 +283,7 @@ class Task(BaseModel):
        """Increment the delegations counter."""
        self.delegations += 1

-    def copy(self, agents: Optional[List["BaseAgent"]] = None) -> "Task":  # type: ignore # Signature of "copy" incompatible with supertype "BaseModel"
+    def copy(self, agents: List["BaseAgent"]) -> "Task":
        """Create a deep copy of the Task."""
        exclude = {
            "id",
@@ -295,7 +300,7 @@ class Task(BaseModel):
        )

        def get_agent_by_role(role: str) -> Union["BaseAgent", None]:
-            return next((agent for agent in agents if agent.role == role), None)  # type: ignore # Item "None" of "list[BaseAgent] | None" has no attribute "__iter__" (not iterable)
+            return next((agent for agent in agents if agent.role == role), None)

        cloned_agent = get_agent_by_role(self.agent.role) if self.agent else None
        cloned_tools = copy(self.tools) if self.tools else []
@@ -309,72 +314,113 @@ class Task(BaseModel):

        return copied_task

-    def _create_converter(self, *args, **kwargs) -> Converter:  # type: ignore
-        converter = self.agent.get_output_converter(  # type: ignore # Item "None" of "BaseAgent | None" has no attribute "get_output_converter"
-            *args, **kwargs
-        )
+    def _create_converter(self, *args, **kwargs) -> Converter:
+        """Create a converter instance."""
+        converter = self.agent.get_output_converter(*args, **kwargs)
        if self.converter_cls:
-            converter = self.converter_cls(  # type: ignore # Item "None" of "BaseAgent | None" has no attribute "get_output_converter"
-                *args, **kwargs
-            )
+            converter = self.converter_cls(*args, **kwargs)
        return converter

-    def _export_output(self, result: str) -> Any:
-        exported_result = result
-        instructions = "I'm gonna convert this raw text into valid JSON."
+    def _export_output(
+        self, result: str
+    ) -> Tuple[Optional[BaseModel], Optional[Dict[str, Any]]]:
+        pydantic_output: Optional[BaseModel] = None
+        json_output: Optional[Dict[str, Any]] = None

        if self.output_pydantic or self.output_json:
-            model = self.output_pydantic or self.output_json
+            model_output = self._convert_to_model(result)
+            pydantic_output = (
+                model_output if isinstance(model_output, BaseModel) else None
+            )
+            if isinstance(model_output, str):
+                try:
+                    json_output = json.loads(model_output)
+                except json.JSONDecodeError:
+                    json_output = None
+            else:
+                json_output = model_output if isinstance(model_output, dict) else None

-            # try to convert task_output directly to pydantic/json
+        return pydantic_output, json_output
+
+    def _convert_to_model(self, result: str) -> Union[dict, BaseModel, str]:
+        model = self.output_pydantic or self.output_json
+        if model is None:
+            return result
+
+        try:
+            return self._validate_model(result, model)
+        except Exception:
+            return self._handle_partial_json(result, model)
+
+    def _validate_model(
+        self, result: str, model: Type[BaseModel]
+    ) -> Union[dict, BaseModel]:
+        exported_result = model.model_validate_json(result)
+        if self.output_json:
+            return exported_result.model_dump()
+        return exported_result
+
+    def _handle_partial_json(
+        self, result: str, model: Type[BaseModel]
+    ) -> Union[dict, BaseModel, str]:
+        match = re.search(r"({.*})", result, re.DOTALL)
+        if match:
            try:
-                exported_result = model.model_validate_json(result)  # type: ignore # Item "None" of "type[BaseModel] | None" has no attribute "model_validate_json"
+                exported_result = model.model_validate_json(match.group(0))
                if self.output_json:
-                    return exported_result.model_dump()  # type: ignore # "str" has no attribute "model_dump"
+                    return exported_result.model_dump()
                return exported_result
            except Exception:
-                # sometimes the response contains valid JSON in the middle of text
-                match = re.search(r"({.*})", result, re.DOTALL)
-                if match:
-                    try:
-                        exported_result = model.model_validate_json(match.group(0))  # type: ignore # Item "None" of "type[BaseModel] | None" has no attribute "model_validate_json"
-                        if self.output_json:
-                            return exported_result.model_dump()  # type: ignore # "str" has no attribute "model_dump"
-                        return exported_result
-                    except Exception:
-                        pass
+                pass

-            llm = getattr(self.agent, "function_calling_llm", None) or self.agent.llm  # type: ignore # Item "None" of "BaseAgent | None" has no attribute "function_calling_llm"
-            if not self._is_gpt(llm):
-                model_schema = PydanticSchemaParser(model=model).get_schema()  # type: ignore # Argument "model" to "PydanticSchemaParser" has incompatible type "type[BaseModel] | None"; expected "type[BaseModel]"
-                instructions = f"{instructions}\n\nThe json should have the following structure, with the following keys:\n{model_schema}"
+        return self._convert_with_instructions(result, model)

-            converter = self._create_converter(  # type: ignore # Item "None" of "BaseAgent | None" has no attribute "get_output_converter"
-                llm=llm, text=result, model=model, instructions=instructions
+    def _convert_with_instructions(
+        self, result: str, model: Type[BaseModel]
+    ) -> Union[dict, BaseModel, str]:
+        llm = self.agent.function_calling_llm or self.agent.llm
+        instructions = self._get_conversion_instructions(model, llm)
+
+        converter = self._create_converter(
+            llm=llm, text=result, model=model, instructions=instructions
+        )
+        exported_result = (
+            converter.to_pydantic() if self.output_pydantic else converter.to_json()
+        )
+
+        if isinstance(exported_result, ConverterError):
+            Printer().print(
+                content=f"{exported_result.message} Using raw output instead.",
+                color="red",
            )
-
-            if self.output_pydantic:
-                exported_result = converter.to_pydantic()
-            elif self.output_json:
-                exported_result = converter.to_json()
-
-            if isinstance(exported_result, ConverterError):
-                Printer().print(
-                    content=f"{exported_result.message} Using raw output instead.",
-                    color="red",
-                )
-                exported_result = result
-
-        if self.output_file:
-            content = (
-                exported_result
-                if not self.output_pydantic
-                else exported_result.model_dump_json()  # type: ignore # "str" has no attribute "json"
-            )
-            self._save_file(content)
+            return result

        return exported_result

+    def _get_output_format(self) -> OutputFormat:
+        if self.output_json:
+            return OutputFormat.JSON
+        if self.output_pydantic:
+            return OutputFormat.PYDANTIC
+        return OutputFormat.RAW
+
+    def _get_conversion_instructions(self, model: Type[BaseModel], llm: Any) -> str:
+        instructions = "I'm gonna convert this raw text into valid JSON."
+        if not self._is_gpt(llm):
+            model_schema = PydanticSchemaParser(model=model).get_schema()
+            instructions = f"{instructions}\n\nThe json should have the following structure, with the following keys:\n{model_schema}"
+        return instructions
+
+    def _save_output(self, content: str) -> None:
+        if not self.output_file:
+            raise Exception("Output file path is not set.")
+
+        directory = os.path.dirname(self.output_file)
+        if directory and not os.path.exists(directory):
+            os.makedirs(directory)
+        with open(self.output_file, "w", encoding="utf-8") as file:
+            file.write(content)
+
    def _is_gpt(self, llm) -> bool:
        return isinstance(llm, ChatOpenAI) and llm.openai_api_base is None

--- a/src/crewai/tasks/init.py
+++ b/src/crewai/tasks/init.py
@@ -0,0 +1,4 @@
+from crewai.tasks.output_format import OutputFormat
+from crewai.tasks.task_output import TaskOutput
+
+__all__ = ["OutputFormat", "TaskOutput"]
--- a/src/crewai/tasks/output_format.py
+++ b/src/crewai/tasks/output_format.py
@@ -0,0 +1,9 @@
+from enum import Enum
+
+
+class OutputFormat(str, Enum):
+    """Enum that represents the output format of a task."""
+
+    JSON = "json"
+    PYDANTIC = "pydantic"
+    RAW = "raw"
--- a/src/crewai/tasks/task_output.py
+++ b/src/crewai/tasks/task_output.py
@@ -1,24 +1,78 @@
-from typing import Optional, Union
+import json
+from typing import Any, Dict, Optional

 from pydantic import BaseModel, Field, model_validator

+from crewai.tasks.output_format import OutputFormat
+

 class TaskOutput(BaseModel):
    """Class that represents the result of a task."""

    description: str = Field(description="Description of the task")
    summary: Optional[str] = Field(description="Summary of the task", default=None)
-    exported_output: Union[str, BaseModel] = Field(
-        description="Output of the task", default=None
+    raw: str = Field(
+        description="Raw output of the task", default=""
+    )  # TODO: @joao: breaking change, by renaming raw_output to raw, but now consistent with CrewOutput
+    pydantic: Optional[BaseModel] = Field(
+        description="Pydantic output of task", default=None
+    )
+    json_dict: Optional[Dict[str, Any]] = Field(
+        description="JSON dictionary of task", default=None
    )
    agent: str = Field(description="Agent that executed the task")
-    raw_output: str = Field(description="Result of the task")
+    output_format: OutputFormat = Field(
+        description="Output format of the task", default=OutputFormat.RAW
+    )

    @model_validator(mode="after")
    def set_summary(self):
+        """Set the summary field based on the description."""
        excerpt = " ".join(self.description.split(" ")[:10])
        self.summary = f"{excerpt}..."
        return self

-    def result(self):
-        return self.exported_output
+    # TODO: Joao - Adding this safety check breakes when people want to see
+    #                   The full output of a TaskOutput or CrewOutput.
+    # @property
+    # def pydantic(self) -> Optional[BaseModel]:
+    #     # Check if the final task output included a pydantic model
+    #     if self.output_format != OutputFormat.PYDANTIC:
+    #         raise ValueError(
+    #             """
+    #             Invalid output format requested.
+    #             If you would like to access the pydantic model,
+    #             please make sure to set the output_pydantic property for the task.
+    #             """
+    #         )
+
+    #     return self._pydantic
+
+    @property
+    def json(self) -> Optional[str]:
+        if self.output_format != OutputFormat.JSON:
+            raise ValueError(
+                """
+                Invalid output format requested.
+                If you would like to access the JSON output,
+                please make sure to set the output_json property for the task
+                """
+            )
+
+        return json.dumps(self.json_dict)
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert json_output and pydantic_output to a dictionary."""
+        output_dict = {}
+        if self.json_dict:
+            output_dict.update(self.json_dict)
+        if self.pydantic:
+            output_dict.update(self.pydantic.model_dump())
+        return output_dict
+
+    def __str__(self) -> str:
+        if self.pydantic:
+            return str(self.pydantic)
+        if self.json_dict:
+            return str(self.json_dict)
+        return self.raw
--- a/src/crewai/telemetry/telemetry.py
+++ b/src/crewai/telemetry/telemetry.py
@@ -337,7 +337,7 @@ class Telemetry:
            except Exception:
                pass

-    def end_crew(self, crew, output):
+    def end_crew(self, crew, final_string_output):
        if (self.ready) and (crew.share_crew):
            try:
                self._add_attribute(
@@ -345,7 +345,9 @@ class Telemetry:
                    "crewai_version",
                    pkg_resources.get_distribution("crewai").version,
                )
-                self._add_attribute(crew._execution_span, "crew_output", output)
+                self._add_attribute(
+                    crew._execution_span, "crew_output", final_string_output
+                )
                self._add_attribute(
                    crew._execution_span,
                    "crew_tasks_output",
--- a/src/crewai/utilities/formatter.py
+++ b/src/crewai/utilities/formatter.py
@@ -0,0 +1,20 @@
+from typing import List
+
+from crewai.task import Task
+from crewai.tasks.task_output import TaskOutput
+
+
+def aggregate_raw_outputs_from_task_outputs(task_outputs: List[TaskOutput]) -> str:
+    """Generate string context from the task outputs."""
+    dividers = "\n\n----------\n\n"
+
+    # Join task outputs with dividers
+    context = dividers.join(output.raw for output in task_outputs)
+    return context
+
+
+def aggregate_raw_outputs_from_tasks(tasks: List[Task]) -> str:
+    """Generate string context from the tasks."""
+    task_outputs = [task.output for task in tasks if task.output is not None]
+
+    return aggregate_raw_outputs_from_task_outputs(task_outputs)