Replay feat using db (#930)

* Cleaned up task execution to now have separate paths for async and sync execution. Updating all kickoff functions to return CrewOutput. WIP. Waiting for Joao feedback on async task execution with task_output * Consistently storing async and sync output for context * outline tests I need to create going forward * Major rehaul of TaskOutput and CrewOutput. Updated all tests to work with new change. Need to add in a few final tricky async tests and add a few more to verify output types on TaskOutput and CrewOutput. * Encountering issues with callback. Need to test on main. WIP * working on tests. WIP * WIP. Figuring out disconnect issue. * Cleaned up logs now that I've isolated the issue to the LLM * more wip. * WIP. It looks like usage metrics has always been broken for async * Update parent crew who is managing for_each loop * Merge in main to bugfix/kickoff-for-each-usage-metrics * Clean up code for review * Add new tests * Final cleanup. Ready for review. * Moving copy functionality from Agent to BaseAgent * Fix renaming issue * Fix linting errors * use BaseAgent instead of Agent where applicable * Fixing missing function. Working on tests. * WIP. Needing team to review change * Fixing issues brought about by merge * WIP: need to fix json encoder * WIP need to fix encoder * WIP * WIP: replay working with async. need to add tests * Implement major fixes from yesterdays group conversation. Now working on tests. * The majority of tasks are working now. Need to fix converter class * Fix final failing test * Fix linting and type-checker issues * Add more tests to fully test CrewOutput and TaskOutput changes * Add in validation for async cannot depend on other async tasks. * WIP: working replay feat fixing inputs, need tests * WIP: core logic of seq and heir for executing tasks added into one * Update validators and tests * better logic for seq and hier * replay working for both seq and hier just need tests * fixed context * added cli command + code cleanup TODO: need better refactoring * refactoring for cleaner code * added better tests * removed todo comments and fixed some tests * fix logging now all tests should pass * cleaner code * ensure replay is delcared when replaying specific tasks * ensure hierarchical works * better typing for stored_outputs and separated task_output_handler * added better tests * added replay feature to crew docs * easier cli command name * fixing changes * using sqllite instead of .json file for logging previous task_outputs * tools fix * added to docs and fixed tests * fixed .db * fixed docs and removed unneeded comments * separating ltm and replay db * fixed printing colors * added how to doc --------- Co-authored-by: Brandon Hancock <brandon@brandonhancock.io>
2026-01-10 08:38:30 +00:00 · 2024-07-15 13:14:10 -07:00
parent 7696b45fc3
commit 67b04b30bf
25 changed files with 6378 additions and 195 deletions
--- a/src/crewai/agents/agent_builder/base_agent.py
+++ b/src/crewai/agents/agent_builder/base_agent.py
@@ -180,7 +180,7 @@ class BaseAgent(ABC, BaseModel):
        pass

    @abstractmethod
-    def get_delegation_tools(self, agents: List["BaseAgent"]):
+    def get_delegation_tools(self, agents: List["BaseAgent"]) -> List[Any]:
        """Set the task tools that init BaseAgenTools class."""
        pass

--- a/src/crewai/cli/cli.py
+++ b/src/crewai/cli/cli.py
@@ -1,8 +1,14 @@
 import click
 import pkg_resources

+from crewai.memory.storage.kickoff_task_outputs_storage import (
+    KickoffTaskOutputsSQLiteStorage,
+)
+
+
 from .create_crew import create_crew
 from .train_crew import train_crew
+from .replay_from_task import replay_task_command


@click.group()
@@ -48,5 +54,50 @@ def train(n_iterations: int):
    train_crew(n_iterations)


+@crewai.command()
+@click.option(
+    "-t",
+    "--task_id",
+    type=str,
+    help="Replay the crew from this task ID, including all subsequent tasks.",
+)
+def replay(task_id: str) -> None:
+    """
+    Replay the crew execution from a specific task.
+
+    Args:
+        task_id (str): The ID of the task to replay from.
+    """
+    try:
+        click.echo(f"Replaying the crew from task {task_id}")
+        replay_task_command(task_id)
+    except Exception as e:
+        click.echo(f"An error occurred while replaying: {e}", err=True)
+
+
+@crewai.command()
+def log_tasks_outputs() -> None:
+    """
+    Retrieve your latest crew.kickoff() task outputs.
+    """
+    try:
+        storage = KickoffTaskOutputsSQLiteStorage()
+        tasks = storage.load()
+
+        if not tasks:
+            click.echo(
+                "No task outputs found. Only crew kickoff task outputs are logged."
+            )
+            return
+
+        for index, task in enumerate(tasks, 1):
+            click.echo(f"Task {index}: {task['task_id']}")
+            click.echo(f"Description: {task['expected_output']}")
+            click.echo("------")
+
+    except Exception as e:
+        click.echo(f"An error occurred while logging task outputs: {e}", err=True)
+
+
 if __name__ == "__main__":
    crewai()
--- a/src/crewai/cli/replay_from_task.py
+++ b/src/crewai/cli/replay_from_task.py
@@ -0,0 +1,24 @@
+import subprocess
+import click
+
+
+def replay_task_command(task_id: str) -> None:
+    """
+    Replay the crew execution from a specific task.
+
+    Args:
+      task_id (str): The ID of the task to replay from.
+    """
+    command = ["poetry", "run", "replay", task_id]
+
+    try:
+        result = subprocess.run(command, capture_output=False, text=True, check=True)
+        if result.stderr:
+            click.echo(result.stderr, err=True)
+
+    except subprocess.CalledProcessError as e:
+        click.echo(f"An error occurred while replaying the task: {e}", err=True)
+        click.echo(e.output, err=True)
+
+    except Exception as e:
+        click.echo(f"An unexpected error occurred: {e}", err=True)
--- a/src/crewai/cli/templates/main.py
+++ b/src/crewai/cli/templates/main.py
@@ -21,3 +21,13 @@ def train():

    except Exception as e:
        raise Exception(f"An error occurred while training the crew: {e}")
+
+def replay_from_task():
+    """
+    Replay the crew execution from a specific task.
+    """
+    try:
+        {{crew_name}}Crew().crew().replay_from_task(task_id=sys.argv[1])
+
+    except Exception as e:
+        raise Exception(f"An error occurred while replaying the crew: {e}")
--- a/src/crewai/cli/templates/pyproject.toml
+++ b/src/crewai/cli/templates/pyproject.toml
@@ -11,6 +11,7 @@ crewai = { extras = ["tools"], version = "^0.35.8" }
 [tool.poetry.scripts]
 {{folder_name}} = "{{folder_name}}.main:run"
 train = "{{folder_name}}.main:train"
+replay = "{{folder_name}}.main:replay_from_task"

 [build-system]
 requires = ["poetry-core"]
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -6,15 +6,15 @@ from typing import Any, Dict, List, Optional, Tuple, Union

 from langchain_core.callbacks import BaseCallbackHandler
 from pydantic import (
-  UUID4,
-  BaseModel,
-  ConfigDict,
-  Field,
-  InstanceOf,
-  Json,
-  PrivateAttr,
-  field_validator,
-  model_validator,
+    UUID4,
+    BaseModel,
+    ConfigDict,
+    Field,
+    InstanceOf,
+    Json,
+    PrivateAttr,
+    field_validator,
+    model_validator,
 )
 from pydantic_core import PydanticCustomError

@@ -31,8 +31,13 @@ from crewai.tasks.task_output import TaskOutput
 from crewai.telemetry import Telemetry
 from crewai.tools.agent_tools import AgentTools
 from crewai.utilities import I18N, FileHandler, Logger, RPMController
-from crewai.utilities.constants import TRAINED_AGENTS_DATA_FILE, TRAINING_DATA_FILE
+from crewai.utilities.constants import (
+    TRAINED_AGENTS_DATA_FILE,
+    TRAINING_DATA_FILE,
+)
 from crewai.utilities.evaluators.task_evaluator import TaskEvaluator
+from crewai.utilities.task_output_storage_handler import TaskOutputStorageHandler
+
 from crewai.utilities.formatter import (
    aggregate_raw_outputs_from_task_outputs,
    aggregate_raw_outputs_from_tasks,
@@ -80,6 +85,13 @@ class Crew(BaseModel):
    _entity_memory: Optional[InstanceOf[EntityMemory]] = PrivateAttr()
    _train: Optional[bool] = PrivateAttr(default=False)
    _train_iteration: Optional[int] = PrivateAttr()
+    _inputs: Optional[Dict[str, Any]] = PrivateAttr(default=None)
+    _logging_color: str = PrivateAttr(
+        default="bold_purple",
+    )
+    _task_output_handler: TaskOutputStorageHandler = PrivateAttr(
+        default_factory=TaskOutputStorageHandler
+    )

    cache: bool = Field(default=True)
    model_config = ConfigDict(arbitrary_types_allowed=True)
@@ -135,6 +147,14 @@ class Crew(BaseModel):
        default=False,
        description="output_log_file",
    )
+    task_execution_output_json_files: Optional[List[str]] = Field(
+        default=None,
+        description="List of file paths for task execution JSON files.",
+    )
+    execution_logs: List[Dict[str, Any]] = Field(
+        default=[],
+        description="List of execution logs for tasks",
+    )

    @field_validator("id", mode="before")
    @classmethod
@@ -376,7 +396,11 @@ class Crew(BaseModel):
    ) -> CrewOutput:
        """Starts the crew to work on its assigned tasks."""
        self._execution_span = self._telemetry.crew_execution_span(self, inputs)
+        self._task_output_handler.reset()
+        self._logging_color = "bold_purple"
+
        if inputs is not None:
+            self._inputs = inputs
            self._interpolate_inputs(inputs)
        self._set_tasks_callbacks()

@@ -403,7 +427,7 @@ class Crew(BaseModel):
        if self.process == Process.sequential:
            result = self._run_sequential_process()
        elif self.process == Process.hierarchical:
-            result = self._run_hierarchical_process()  # type: ignore # Incompatible types in assignment (expression has type "str | dict[str, Any]", variable has type "str")
+            result = self._run_hierarchical_process()
        else:
            raise NotImplementedError(
                f"The process '{self.process}' is not implemented yet."
@@ -440,6 +464,7 @@ class Crew(BaseModel):
            results.append(output)

        self.usage_metrics = total_usage_metrics
+        self._task_output_handler.reset()
        return results

    async def kickoff_async(self, inputs: Optional[Dict[str, Any]] = {}) -> CrewOutput:
@@ -488,129 +513,48 @@ class Crew(BaseModel):
                    total_usage_metrics[key] += crew.usage_metrics.get(key, 0)

        self.usage_metrics = total_usage_metrics
-
+        self._task_output_handler.reset()
        return results

+    def _store_execution_log(
+        self,
+        task: Task,
+        output: TaskOutput,
+        task_index: int,
+        was_replayed: bool = False,
+    ):
+        if self._inputs:
+            inputs = self._inputs
+        else:
+            inputs = {}
+
+        log = {
+            "task": task,
+            "output": {
+                "description": output.description,
+                "summary": output.summary,
+                "raw": output.raw,
+                "pydantic": output.pydantic,
+                "json_dict": output.json_dict,
+                "output_format": output.output_format,
+                "agent": output.agent,
+            },
+            "task_index": task_index,
+            "inputs": inputs,
+            "was_replayed": was_replayed,
+        }
+        self._task_output_handler.update(task_index, log)
+
    def _run_sequential_process(self) -> CrewOutput:
        """Executes tasks sequentially and returns the final output."""
-        task_outputs: List[TaskOutput] = []
-        futures: List[Tuple[Task, Future[TaskOutput]]] = []
+        return self._execute_tasks(self.tasks)

-        for task in self.tasks:
-            if task.agent and task.agent.allow_delegation:
-                agents_for_delegation = [
-                    agent for agent in self.agents if agent != task.agent
-                ]
-                if len(self.agents) > 1 and len(agents_for_delegation) > 0:
-                    delegation_tools = task.agent.get_delegation_tools(
-                        agents_for_delegation
-                    )
-
-                    # Add tools if they are not already in task.tools
-                    for new_tool in delegation_tools:
-                        # Find the index of the tool with the same name
-                        existing_tool_index = next(
-                            (
-                                index
-                                for index, tool in enumerate(task.tools or [])
-                                if tool.name == new_tool.name
-                            ),
-                            None,
-                        )
-                        if not task.tools:
-                            task.tools = []
-
-                        if existing_tool_index is not None:
-                            # Replace the existing tool
-                            task.tools[existing_tool_index] = new_tool 
-                        else:
-                            # Add the new tool
-                            task.tools.append(new_tool)
-
-            role = task.agent.role if task.agent is not None else "None"
-            self._logger.log("debug", f"== Working Agent: {role}", color="bold_purple")
-            self._logger.log(
-                "info", f"== Starting Task: {task.description}", color="bold_purple"
-            )
-
-            if self.output_log_file:
-                self._file_handler.log(
-                    agent=role, task=task.description, status="started"
-                )
-
-            if task.async_execution:
-                context = (
-                    aggregate_raw_outputs_from_tasks(task.context)
-                    if task.context
-                    else aggregate_raw_outputs_from_task_outputs(task_outputs)
-                )
-                future = task.execute_async(
-                    agent=task.agent, context=context, tools=task.tools
-                )
-                futures.append((task, future))
-            else:
-                # Before executing a synchronous task, wait for all async tasks to complete
-                if futures:
-                    # Clear task_outputs before processing async tasks
-                    task_outputs = []
-                    for future_task, future in futures:
-                        task_output = future.result()
-                        task_outputs.append(task_output)
-                        self._process_task_result(future_task, task_output)
-
-                    # Clear the futures list after processing all async results
-                    futures.clear()
-
-                context = (
-                    aggregate_raw_outputs_from_tasks(task.context)
-                    if task.context
-                    else aggregate_raw_outputs_from_task_outputs(task_outputs)
-                )
-                task_output = task.execute_sync(
-                    agent=task.agent, context=context, tools=task.tools
-                )
-                task_outputs = [task_output]
-                self._process_task_result(task, task_output)
-
-        if futures:
-            # Clear task_outputs before processing async tasks
-            task_outputs = []
-            for future_task, future in futures:
-                task_output = future.result()
-                task_outputs.append(task_output)
-                self._process_task_result(future_task, task_output)
-
-        # Important: There should only be one task output in the list
-        #            If there are more or 0, something went wrong.
-        if len(task_outputs) != 1:
-            raise ValueError(
-                "Something went wrong. Kickoff should return only one task output."
-            )
-
-        final_task_output = task_outputs[0]
-
-        final_string_output = final_task_output.raw
-        self._finish_execution(final_string_output)
-
-        token_usage = self.calculate_usage_metrics()
-
-        return CrewOutput(
-            raw=final_task_output.raw,
-            pydantic=final_task_output.pydantic,
-            json_dict=final_task_output.json_dict,
-            tasks_output=[task.output for task in self.tasks if task.output],
-            token_usage=token_usage,
-        )
-
-    def _process_task_result(self, task: Task, output: TaskOutput) -> None:
-        role = task.agent.role if task.agent is not None else "None"
-        self._logger.log("debug", f"== [{role}] Task output: {output}\n\n")
-        if self.output_log_file:
-            self._file_handler.log(agent=role, task=output, status="completed")
-
-    # TODO: @joao, Breaking change. Changed return type. Usage metrics is included in crewoutput
    def _run_hierarchical_process(self) -> CrewOutput:
        """Creates and assigns a manager agent to make sure the crew completes the tasks."""
+        self._create_manager_agent()
+        return self._execute_tasks(self.tasks, self.manager_agent)
+
+    def _create_manager_agent(self):
        i18n = I18N(prompt_file=self.prompt_file)
        if self.manager_agent is not None:
            self.manager_agent.allow_delegation = True
@@ -629,74 +573,148 @@ class Crew(BaseModel):
            )
            self.manager_agent = manager

+    def _execute_tasks(
+        self,
+        tasks: List[Task],
+        manager: Optional[BaseAgent] = None,
+        start_index: Optional[int] = 0,
+        was_replayed: bool = False,
+    ) -> CrewOutput:
+        """Executes tasks sequentially and returns the final output.
+
+        Args:
+            tasks (List[Task]): List of tasks to execute
+            manager (Optional[BaseAgent], optional): Manager agent to use for delegation. Defaults to None.
+
+        Returns:
+            CrewOutput: Final output of the crew
+        """
+
        task_outputs: List[TaskOutput] = []
-        futures: List[Tuple[Task, Future[TaskOutput]]] = []
+        futures: List[Tuple[Task, Future[TaskOutput], int]] = []
+        last_sync_output: Optional[TaskOutput] = None

-        # TODO: IF USER OVERRIDE THE CONTEXT, PASS THAT
-        for task in self.tasks:
-            self._logger.log("debug", f"Working Agent: {manager.role}")
-            self._logger.log("info", f"Starting Task: {task.description}")
+        for task_index, task in enumerate(tasks):
+            if start_index is not None and task_index < start_index:
+                if task.output:
+                    if task.async_execution:
+                        task_outputs.append(task.output)
+                    else:
+                        task_outputs = [task.output]
+                        last_sync_output = task.output
+                continue

-            if self.output_log_file:
-                self._file_handler.log(
-                    agent=manager.role, task=task.description, status="started"
+            self._prepare_task(task, manager)
+            if self.process == Process.hierarchical:
+                agent_to_use = manager
+            else:
+                agent_to_use = task.agent
+            if agent_to_use is None:
+                raise ValueError(
+                    f"No agent available for task: {task.description}. Ensure that either the task has an assigned agent or a manager agent is provided."
                )
+            self._log_task_start(task, agent_to_use)

            if task.async_execution:
-                context = (
-                    aggregate_raw_outputs_from_tasks(task.context)
-                    if task.context
-                    else aggregate_raw_outputs_from_task_outputs(task_outputs)
+                context = self._get_context(
+                    task, [last_sync_output] if last_sync_output else []
                )
                future = task.execute_async(
-                    agent=manager, context=context, tools=manager.tools
+                    agent=agent_to_use,
+                    context=context,
+                    tools=agent_to_use.tools,
                )
-                futures.append((task, future))
+                futures.append((task, future, task_index))
            else:
-                # Before executing a synchronous task, wait for all async tasks to complete
                if futures:
-                    # Clear task_outputs before processing async tasks
-                    task_outputs = []
-                    for future_task, future in futures:
-                        task_output = future.result()
-                        task_outputs.append(task_output)
-                        self._process_task_result(future_task, task_output)
-
-                    # Clear the futures list after processing all async results
+                    task_outputs.extend(
+                        self._process_async_tasks(futures, was_replayed)
+                    )
                    futures.clear()

-                context = (
-                    aggregate_raw_outputs_from_tasks(task.context)
-                    if task.context
-                    else aggregate_raw_outputs_from_task_outputs(task_outputs)
-                )
+                context = self._get_context(task, task_outputs)
                task_output = task.execute_sync(
-                    agent=manager, context=context, tools=manager.tools
+                    agent=agent_to_use,
+                    context=context,
+                    tools=agent_to_use.tools,
                )
                task_outputs = [task_output]
                self._process_task_result(task, task_output)
+                self._store_execution_log(task, task_output, task_index, was_replayed)

-        # Process any remaining async results
        if futures:
-            # Clear task_outputs before processing async tasks
-            task_outputs = []
-            for future_task, future in futures:
-                task_output = future.result()
-                task_outputs.append(task_output)
-                self._process_task_result(future_task, task_output)
+            task_outputs = self._process_async_tasks(futures, was_replayed)

-        # Important: There should only be one task output in the list
-        #            If there are more or 0, something went wrong.
+        return self._create_crew_output(task_outputs)
+
+    def _prepare_task(self, task: Task, manager: Optional[BaseAgent]):
+        if self.process == Process.hierarchical:
+            self._update_manager_tools(task, manager)
+        elif task.agent and task.agent.allow_delegation:
+            self._add_delegation_tools(task)
+
+    def _add_delegation_tools(self, task: Task):
+        agents_for_delegation = [agent for agent in self.agents if agent != task.agent]
+        if len(self.agents) > 1 and len(agents_for_delegation) > 0 and task.agent:
+            delegation_tools = task.agent.get_delegation_tools(agents_for_delegation)
+
+            # Add tools if they are not already in task.tools
+            for new_tool in delegation_tools:
+                # Find the index of the tool with the same name
+                existing_tool_index = next(
+                    (
+                        index
+                        for index, tool in enumerate(task.tools or [])
+                        if tool.name == new_tool.name
+                    ),
+                    None,
+                )
+                if not task.tools:
+                    task.tools = []
+
+                if existing_tool_index is not None:
+                    # Replace the existing tool
+                    task.tools[existing_tool_index] = new_tool
+                else:
+                    # Add the new tool
+                    task.tools.append(new_tool)
+
+    def _log_task_start(self, task: Task, agent: Optional[BaseAgent]):
+        color = self._logging_color
+        role = agent.role if agent else "None"
+        self._logger.log("debug", f"== Working Agent: {role}", color=color)
+        self._logger.log("info", f"== Starting Task: {task.description}", color=color)
+        if self.output_log_file:
+            self._file_handler.log(agent=role, task=task.description, status="started")
+
+    def _update_manager_tools(self, task: Task, manager: Optional[BaseAgent]):
+        if task.agent and manager:
+            manager.tools = task.agent.get_delegation_tools([task.agent])
+        if manager:
+            manager.tools = manager.get_delegation_tools(self.agents)
+
+    def _get_context(self, task: Task, task_outputs: List[TaskOutput]):
+        context = (
+            aggregate_raw_outputs_from_tasks(task.context)
+            if task.context
+            else aggregate_raw_outputs_from_task_outputs(task_outputs)
+        )
+        return context
+
+    def _process_task_result(self, task: Task, output: TaskOutput) -> None:
+        role = task.agent.role if task.agent is not None else "None"
+        self._logger.log("debug", f"== [{role}] Task output: {output}\n\n")
+        if self.output_log_file:
+            self._file_handler.log(agent=role, task=output, status="completed")
+
+    def _create_crew_output(self, task_outputs: List[TaskOutput]) -> CrewOutput:
        if len(task_outputs) != 1:
            raise ValueError(
                "Something went wrong. Kickoff should return only one task output."
            )
-
        final_task_output = task_outputs[0]
-
        final_string_output = final_task_output.raw
        self._finish_execution(final_string_output)
-
        token_usage = self.calculate_usage_metrics()

        return CrewOutput(
@@ -707,6 +725,74 @@ class Crew(BaseModel):
            token_usage=token_usage,
        )

+    def _process_async_tasks(
+        self,
+        futures: List[Tuple[Task, Future[TaskOutput], int]],
+        was_replayed: bool = False,
+    ) -> List[TaskOutput]:
+        task_outputs = []
+        for future_task, future, task_index in futures:
+            task_output = future.result()
+            task_outputs.append(task_output)
+            self._process_task_result(future_task, task_output)
+            self._store_execution_log(
+                future_task, task_output, task_index, was_replayed
+            )
+        return task_outputs
+
+    def _find_task_index(
+        self, task_id: str, stored_outputs: List[Any]
+    ) -> Optional[int]:
+        return next(
+            (
+                index
+                for (index, d) in enumerate(stored_outputs)
+                if d["task_id"] == str(task_id)
+            ),
+            None,
+        )
+
+    def replay_from_task(
+        self, task_id: str, inputs: Optional[Dict[str, Any]] = None
+    ) -> CrewOutput:
+        stored_outputs = self._task_output_handler.load()
+        if not stored_outputs:
+            raise ValueError(f"Task with id {task_id} not found in the crew's tasks.")
+
+        start_index = self._find_task_index(task_id, stored_outputs)
+
+        if start_index is None:
+            raise ValueError(f"Task with id {task_id} not found in the crew's tasks.")
+
+        replay_inputs = (
+            inputs if inputs is not None else stored_outputs[start_index]["inputs"]
+        )
+        self._inputs = replay_inputs
+
+        if replay_inputs:
+            self._interpolate_inputs(replay_inputs)
+
+        if self.process == Process.hierarchical:
+            self._create_manager_agent()
+
+        for i in range(start_index):
+            stored_output = stored_outputs[i][
+                "output"
+            ]  # for adding context to the task
+            task_output = TaskOutput(
+                description=stored_output["description"],
+                agent=stored_output["agent"],
+                raw=stored_output["raw"],
+                pydantic=stored_output["pydantic"],
+                json_dict=stored_output["json_dict"],
+                output_format=stored_output["output_format"],
+            )
+            self.tasks[i].output = task_output
+
+        self._logging_color = "bold_blue"
+        result = self._execute_tasks(self.tasks, self.manager_agent, start_index, True)
+        return result
+
    def copy(self):
        """Create a deep copy of the Crew."""

--- a/src/crewai/crews/crew_output.py
+++ b/src/crewai/crews/crew_output.py
@@ -24,8 +24,6 @@ class CrewOutput(BaseModel):
        description="Processed token summary", default={}
    )

-    # TODO: Joao - Adding this safety check breakes when people want to see
-    #                   The full output of a CrewOutput.
    # @property
    # def pydantic(self) -> Optional[BaseModel]:
    #     # Check if the final task output included a pydantic model
--- a/src/crewai/memory/storage/kickoff_task_outputs_storage.py
+++ b/src/crewai/memory/storage/kickoff_task_outputs_storage.py
@@ -0,0 +1,166 @@
+import json
+import sqlite3
+from typing import Any, Dict, List, Optional
+
+from crewai.task import Task
+from crewai.utilities import Printer
+from crewai.utilities.crew_json_encoder import CrewJSONEncoder
+from crewai.utilities.paths import db_storage_path
+
+
+class KickoffTaskOutputsSQLiteStorage:
+    """
+    An updated SQLite storage class for kickoff task outputs storage.
+    """
+
+    def __init__(
+        self, db_path: str = f"{db_storage_path()}/latest_kickoff_task_outputs.db"
+    ) -> None:
+        self.db_path = db_path
+        self._printer: Printer = Printer()
+        self._initialize_db()
+
+    def _initialize_db(self):
+        """
+        Initializes the SQLite database and creates LTM table
+        """
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute(
+                    """
+                    CREATE TABLE IF NOT EXISTS latest_kickoff_task_outputs (
+                        task_id TEXT PRIMARY KEY,
+                        expected_output TEXT,
+                        output JSON,
+                        task_index INTEGER,
+                        inputs JSON,
+                        was_replayed BOOLEAN,
+                        timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
+                    )
+                """
+                )
+
+                conn.commit()
+        except sqlite3.Error as e:
+            self._printer.print(
+                content=f"SAVING KICKOFF TASK OUTPUTS ERROR: An error occurred during database initialization: {e}",
+                color="red",
+            )
+
+    def add(
+        self,
+        task: Task,
+        output: Dict[str, Any],
+        task_index: int,
+        was_replayed: bool = False,
+        inputs: Dict[str, Any] = {},
+    ):
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute(
+                    """
+                INSERT OR REPLACE INTO latest_kickoff_task_outputs
+                (task_id, expected_output, output, task_index, inputs, was_replayed)
+                VALUES (?, ?, ?, ?, ?, ?)
+            """,
+                    (
+                        str(task.id),
+                        task.expected_output,
+                        json.dumps(output, cls=CrewJSONEncoder),
+                        task_index,
+                        json.dumps(inputs),
+                        was_replayed,
+                    ),
+                )
+                conn.commit()
+        except sqlite3.Error as e:
+            self._printer.print(
+                content=f"SAVING KICKOFF TASK OUTPUTS ERROR: An error occurred during database initialization: {e}",
+                color="red",
+            )
+
+    def update(
+        self,
+        task_index: int,
+        **kwargs,
+    ):
+        """
+        Updates an existing row in the latest_kickoff_task_outputs table based on task_index.
+        """
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+
+                fields = []
+                values = []
+                for key, value in kwargs.items():
+                    fields.append(f"{key} = ?")
+                    values.append(
+                        json.dumps(value, cls=CrewJSONEncoder)
+                        if isinstance(value, dict)
+                        else value
+                    )
+
+                query = f"UPDATE latest_kickoff_task_outputs SET {', '.join(fields)} WHERE task_index = ?"
+                values.append(task_index)
+
+                cursor.execute(query, tuple(values))
+                conn.commit()
+
+                if cursor.rowcount == 0:
+                    self._printer.print(
+                        f"No row found with task_index {task_index}. No update performed.",
+                        color="red",
+                    )
+        except sqlite3.Error as e:
+            self._printer.print(f"UPDATE KICKOFF TASK OUTPUTS ERROR: {e}", color="red")
+
+    def load(self) -> Optional[List[Dict[str, Any]]]:
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute("""
+                SELECT *
+                FROM latest_kickoff_task_outputs
+                ORDER BY task_index
+                """)
+
+                rows = cursor.fetchall()
+                results = []
+                for row in rows:
+                    result = {
+                        "task_id": row[0],
+                        "expected_output": row[1],
+                        "output": json.loads(row[2]),
+                        "task_index": row[3],
+                        "inputs": json.loads(row[4]),
+                        "was_replayed": row[5],
+                        "timestamp": row[6],
+                    }
+                    results.append(result)
+
+                return results
+
+        except sqlite3.Error as e:
+            self._printer.print(
+                content=f"LOADING KICKOFF TASK OUTPUTS ERROR: An error occurred while querying kickoff task outputs: {e}",
+                color="red",
+            )
+            return None
+
+    def delete_all(self):
+        """
+        Deletes all rows from the latest_kickoff_task_outputs table.
+        """
+        try:
+            with sqlite3.connect(self.db_path) as conn:
+                cursor = conn.cursor()
+                cursor.execute("DELETE FROM latest_kickoff_task_outputs")
+                conn.commit()
+        except sqlite3.Error as e:
+            self._printer.print(
+                content=f"ERROR: Failed to delete all kickoff task outputs: {e}",
+                color="red",
+            )
--- a/src/crewai/task.py
+++ b/src/crewai/task.py
@@ -204,6 +204,7 @@ class Task(BaseModel):
        tools: Optional[List[Any]],
    ) -> TaskOutput:
        """Run the core execution logic of the task."""
+        self.agent = agent
        agent = agent or self.agent
        if not agent:
            raise Exception(
@@ -244,7 +245,9 @@ class Task(BaseModel):
            content = (
                json_output
                if json_output
-                else pydantic_output.model_dump_json() if pydantic_output else result
+                else pydantic_output.model_dump_json()
+                if pydantic_output
+                else result
            )
            self._save_file(content)

@@ -378,7 +381,7 @@ class Task(BaseModel):
    def _convert_with_instructions(
        self, result: str, model: Type[BaseModel]
    ) -> Union[dict, BaseModel, str]:
-        llm = self.agent.function_calling_llm or self.agent.llm
+        llm = self.agent.function_calling_llm or self.agent.llm  # type: ignore # Item "None" of "BaseAgent | None" has no attribute "function_calling_llm"
        instructions = self._get_conversion_instructions(model, llm)

        converter = self._create_converter(
--- a/src/crewai/tasks/task_output.py
+++ b/src/crewai/tasks/task_output.py
@@ -11,9 +11,7 @@ class TaskOutput(BaseModel):

    description: str = Field(description="Description of the task")
    summary: Optional[str] = Field(description="Summary of the task", default=None)
-    raw: str = Field(
-        description="Raw output of the task", default=""
-    )  # TODO: @joao: breaking change, by renaming raw_output to raw, but now consistent with CrewOutput
+    raw: str = Field(description="Raw output of the task", default="")
    pydantic: Optional[BaseModel] = Field(
        description="Pydantic output of task", default=None
    )
@@ -32,8 +30,6 @@ class TaskOutput(BaseModel):
        self.summary = f"{excerpt}..."
        return self

-    # TODO: Joao - Adding this safety check breakes when people want to see
-    #                   The full output of a TaskOutput or CrewOutput.
    # @property
    # def pydantic(self) -> Optional[BaseModel]:
    #     # Check if the final task output included a pydantic model
--- a/src/crewai/utilities/crew_json_encoder.py
+++ b/src/crewai/utilities/crew_json_encoder.py
@@ -0,0 +1,31 @@
+from datetime import datetime
+import json
+from uuid import UUID
+from pydantic import BaseModel
+
+
+class CrewJSONEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, BaseModel):
+            return self._handle_pydantic_model(obj)
+        elif isinstance(obj, UUID):
+            return str(obj)
+
+        elif isinstance(obj, datetime):
+            return obj.isoformat()
+        return super().default(obj)
+
+    def _handle_pydantic_model(self, obj):
+        try:
+            data = obj.model_dump()
+            # Remove circular references
+            for key, value in data.items():
+                if isinstance(value, BaseModel):
+                    data[key] = str(
+                        value
+                    )  # Convert nested models to string representation
+            return data
+        except RecursionError:
+            return str(
+                obj
+            )  # Fall back to string representation if circular reference is detected
--- a/src/crewai/utilities/file_handler.py
+++ b/src/crewai/utilities/file_handler.py
@@ -1,5 +1,7 @@
 import os
 import pickle
+
+
 from datetime import datetime


--- a/src/crewai/utilities/printer.py
+++ b/src/crewai/utilities/printer.py
@@ -8,6 +8,8 @@ class Printer:
            self._print_bold_green(content)
        elif color == "bold_purple":
            self._print_bold_purple(content)
+        elif color == "bold_blue":
+            self._print_bold_blue(content)
        else:
            print(content)

@@ -22,3 +24,6 @@ class Printer:

    def _print_red(self, content):
        print("\033[91m {}\033[00m".format(content))
+
+    def _print_bold_blue(self, content):
+        print("\033[1m\033[94m {}\033[00m".format(content))
--- a/src/crewai/utilities/task_output_storage_handler.py
+++ b/src/crewai/utilities/task_output_storage_handler.py
@@ -0,0 +1,61 @@
+from pydantic import BaseModel, Field
+from datetime import datetime
+from typing import Dict, Any, Optional, List
+from crewai.memory.storage.kickoff_task_outputs_storage import (
+    KickoffTaskOutputsSQLiteStorage,
+)
+from crewai.task import Task
+
+
+class ExecutionLog(BaseModel):
+    task_id: str
+    expected_output: Optional[str] = None
+    output: Dict[str, Any]
+    timestamp: datetime = Field(default_factory=datetime.now)
+    task_index: int
+    inputs: Dict[str, Any] = Field(default_factory=dict)
+    was_replayed: bool = False
+
+    def __getitem__(self, key: str) -> Any:
+        return getattr(self, key)
+
+
+class TaskOutputStorageHandler:
+    def __init__(self) -> None:
+        self.storage = KickoffTaskOutputsSQLiteStorage()
+
+    def update(self, task_index: int, log: Dict[str, Any]):
+        saved_outputs = self.load()
+        if saved_outputs is None:
+            raise ValueError("Logs cannot be None")
+
+        if log.get("was_replayed", False):
+            replayed = {
+                "task_id": str(log["task"].id),
+                "expected_output": log["task"].expected_output,
+                "output": log["output"],
+                "was_replayed": log["was_replayed"],
+                "inputs": log["inputs"],
+            }
+            self.storage.update(
+                task_index,
+                **replayed,
+            )
+        else:
+            self.storage.add(**log)
+
+    def add(
+        self,
+        task: Task,
+        output: Dict[str, Any],
+        task_index: int,
+        inputs: Dict[str, Any] = {},
+        was_replayed: bool = False,
+    ):
+        self.storage.add(task, output, task_index, was_replayed, inputs)
+
+    def reset(self):
+        self.storage.delete_all()
+
+    def load(self) -> Optional[List[Dict[str, Any]]]:
+        return self.storage.load()