Replay feat using db (#930)

* Cleaned up task execution to now have separate paths for async and sync execution. Updating all kickoff functions to return CrewOutput. WIP. Waiting for Joao feedback on async task execution with task_output * Consistently storing async and sync output for context * outline tests I need to create going forward * Major rehaul of TaskOutput and CrewOutput. Updated all tests to work with new change. Need to add in a few final tricky async tests and add a few more to verify output types on TaskOutput and CrewOutput. * Encountering issues with callback. Need to test on main. WIP * working on tests. WIP * WIP. Figuring out disconnect issue. * Cleaned up logs now that I've isolated the issue to the LLM * more wip. * WIP. It looks like usage metrics has always been broken for async * Update parent crew who is managing for_each loop * Merge in main to bugfix/kickoff-for-each-usage-metrics * Clean up code for review * Add new tests * Final cleanup. Ready for review. * Moving copy functionality from Agent to BaseAgent * Fix renaming issue * Fix linting errors * use BaseAgent instead of Agent where applicable * Fixing missing function. Working on tests. * WIP. Needing team to review change * Fixing issues brought about by merge * WIP: need to fix json encoder * WIP need to fix encoder * WIP * WIP: replay working with async. need to add tests * Implement major fixes from yesterdays group conversation. Now working on tests. * The majority of tasks are working now. Need to fix converter class * Fix final failing test * Fix linting and type-checker issues * Add more tests to fully test CrewOutput and TaskOutput changes * Add in validation for async cannot depend on other async tasks. * WIP: working replay feat fixing inputs, need tests * WIP: core logic of seq and heir for executing tasks added into one * Update validators and tests * better logic for seq and hier * replay working for both seq and hier just need tests * fixed context * added cli command + code cleanup TODO: need better refactoring * refactoring for cleaner code * added better tests * removed todo comments and fixed some tests * fix logging now all tests should pass * cleaner code * ensure replay is delcared when replaying specific tasks * ensure hierarchical works * better typing for stored_outputs and separated task_output_handler * added better tests * added replay feature to crew docs * easier cli command name * fixing changes * using sqllite instead of .json file for logging previous task_outputs * tools fix * added to docs and fixed tests * fixed .db * fixed docs and removed unneeded comments * separating ltm and replay db * fixed printing colors * added how to doc --------- Co-authored-by: Brandon Hancock <brandon@brandonhancock.io>
2026-01-10 08:38:30 +00:00 · 2024-07-15 13:14:10 -07:00
parent 7696b45fc3
commit 67b04b30bf
25 changed files with 6378 additions and 195 deletions
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -6,15 +6,15 @@ from typing import Any, Dict, List, Optional, Tuple, Union

 from langchain_core.callbacks import BaseCallbackHandler
 from pydantic import (
-  UUID4,
-  BaseModel,
-  ConfigDict,
-  Field,
-  InstanceOf,
-  Json,
-  PrivateAttr,
-  field_validator,
-  model_validator,
+    UUID4,
+    BaseModel,
+    ConfigDict,
+    Field,
+    InstanceOf,
+    Json,
+    PrivateAttr,
+    field_validator,
+    model_validator,
 )
 from pydantic_core import PydanticCustomError

@@ -31,8 +31,13 @@ from crewai.tasks.task_output import TaskOutput
 from crewai.telemetry import Telemetry
 from crewai.tools.agent_tools import AgentTools
 from crewai.utilities import I18N, FileHandler, Logger, RPMController
-from crewai.utilities.constants import TRAINED_AGENTS_DATA_FILE, TRAINING_DATA_FILE
+from crewai.utilities.constants import (
+    TRAINED_AGENTS_DATA_FILE,
+    TRAINING_DATA_FILE,
+)
 from crewai.utilities.evaluators.task_evaluator import TaskEvaluator
+from crewai.utilities.task_output_storage_handler import TaskOutputStorageHandler
+
 from crewai.utilities.formatter import (
    aggregate_raw_outputs_from_task_outputs,
    aggregate_raw_outputs_from_tasks,
@@ -80,6 +85,13 @@ class Crew(BaseModel):
    _entity_memory: Optional[InstanceOf[EntityMemory]] = PrivateAttr()
    _train: Optional[bool] = PrivateAttr(default=False)
    _train_iteration: Optional[int] = PrivateAttr()
+    _inputs: Optional[Dict[str, Any]] = PrivateAttr(default=None)
+    _logging_color: str = PrivateAttr(
+        default="bold_purple",
+    )
+    _task_output_handler: TaskOutputStorageHandler = PrivateAttr(
+        default_factory=TaskOutputStorageHandler
+    )

    cache: bool = Field(default=True)
    model_config = ConfigDict(arbitrary_types_allowed=True)
@@ -135,6 +147,14 @@ class Crew(BaseModel):
        default=False,
        description="output_log_file",
    )
+    task_execution_output_json_files: Optional[List[str]] = Field(
+        default=None,
+        description="List of file paths for task execution JSON files.",
+    )
+    execution_logs: List[Dict[str, Any]] = Field(
+        default=[],
+        description="List of execution logs for tasks",
+    )

    @field_validator("id", mode="before")
    @classmethod
@@ -376,7 +396,11 @@ class Crew(BaseModel):
    ) -> CrewOutput:
        """Starts the crew to work on its assigned tasks."""
        self._execution_span = self._telemetry.crew_execution_span(self, inputs)
+        self._task_output_handler.reset()
+        self._logging_color = "bold_purple"
+
        if inputs is not None:
+            self._inputs = inputs
            self._interpolate_inputs(inputs)
        self._set_tasks_callbacks()

@@ -403,7 +427,7 @@ class Crew(BaseModel):
        if self.process == Process.sequential:
            result = self._run_sequential_process()
        elif self.process == Process.hierarchical:
-            result = self._run_hierarchical_process()  # type: ignore # Incompatible types in assignment (expression has type "str | dict[str, Any]", variable has type "str")
+            result = self._run_hierarchical_process()
        else:
            raise NotImplementedError(
                f"The process '{self.process}' is not implemented yet."
@@ -440,6 +464,7 @@ class Crew(BaseModel):
            results.append(output)

        self.usage_metrics = total_usage_metrics
+        self._task_output_handler.reset()
        return results

    async def kickoff_async(self, inputs: Optional[Dict[str, Any]] = {}) -> CrewOutput:
@@ -488,129 +513,48 @@ class Crew(BaseModel):
                    total_usage_metrics[key] += crew.usage_metrics.get(key, 0)

        self.usage_metrics = total_usage_metrics
-
+        self._task_output_handler.reset()
        return results

+    def _store_execution_log(
+        self,
+        task: Task,
+        output: TaskOutput,
+        task_index: int,
+        was_replayed: bool = False,
+    ):
+        if self._inputs:
+            inputs = self._inputs
+        else:
+            inputs = {}
+
+        log = {
+            "task": task,
+            "output": {
+                "description": output.description,
+                "summary": output.summary,
+                "raw": output.raw,
+                "pydantic": output.pydantic,
+                "json_dict": output.json_dict,
+                "output_format": output.output_format,
+                "agent": output.agent,
+            },
+            "task_index": task_index,
+            "inputs": inputs,
+            "was_replayed": was_replayed,
+        }
+        self._task_output_handler.update(task_index, log)
+
    def _run_sequential_process(self) -> CrewOutput:
        """Executes tasks sequentially and returns the final output."""
-        task_outputs: List[TaskOutput] = []
-        futures: List[Tuple[Task, Future[TaskOutput]]] = []
+        return self._execute_tasks(self.tasks)

-        for task in self.tasks:
-            if task.agent and task.agent.allow_delegation:
-                agents_for_delegation = [
-                    agent for agent in self.agents if agent != task.agent
-                ]
-                if len(self.agents) > 1 and len(agents_for_delegation) > 0:
-                    delegation_tools = task.agent.get_delegation_tools(
-                        agents_for_delegation
-                    )
-
-                    # Add tools if they are not already in task.tools
-                    for new_tool in delegation_tools:
-                        # Find the index of the tool with the same name
-                        existing_tool_index = next(
-                            (
-                                index
-                                for index, tool in enumerate(task.tools or [])
-                                if tool.name == new_tool.name
-                            ),
-                            None,
-                        )
-                        if not task.tools:
-                            task.tools = []
-
-                        if existing_tool_index is not None:
-                            # Replace the existing tool
-                            task.tools[existing_tool_index] = new_tool 
-                        else:
-                            # Add the new tool
-                            task.tools.append(new_tool)
-
-            role = task.agent.role if task.agent is not None else "None"
-            self._logger.log("debug", f"== Working Agent: {role}", color="bold_purple")
-            self._logger.log(
-                "info", f"== Starting Task: {task.description}", color="bold_purple"
-            )
-
-            if self.output_log_file:
-                self._file_handler.log(
-                    agent=role, task=task.description, status="started"
-                )
-
-            if task.async_execution:
-                context = (
-                    aggregate_raw_outputs_from_tasks(task.context)
-                    if task.context
-                    else aggregate_raw_outputs_from_task_outputs(task_outputs)
-                )
-                future = task.execute_async(
-                    agent=task.agent, context=context, tools=task.tools
-                )
-                futures.append((task, future))
-            else:
-                # Before executing a synchronous task, wait for all async tasks to complete
-                if futures:
-                    # Clear task_outputs before processing async tasks
-                    task_outputs = []
-                    for future_task, future in futures:
-                        task_output = future.result()
-                        task_outputs.append(task_output)
-                        self._process_task_result(future_task, task_output)
-
-                    # Clear the futures list after processing all async results
-                    futures.clear()
-
-                context = (
-                    aggregate_raw_outputs_from_tasks(task.context)
-                    if task.context
-                    else aggregate_raw_outputs_from_task_outputs(task_outputs)
-                )
-                task_output = task.execute_sync(
-                    agent=task.agent, context=context, tools=task.tools
-                )
-                task_outputs = [task_output]
-                self._process_task_result(task, task_output)
-
-        if futures:
-            # Clear task_outputs before processing async tasks
-            task_outputs = []
-            for future_task, future in futures:
-                task_output = future.result()
-                task_outputs.append(task_output)
-                self._process_task_result(future_task, task_output)
-
-        # Important: There should only be one task output in the list
-        #            If there are more or 0, something went wrong.
-        if len(task_outputs) != 1:
-            raise ValueError(
-                "Something went wrong. Kickoff should return only one task output."
-            )
-
-        final_task_output = task_outputs[0]
-
-        final_string_output = final_task_output.raw
-        self._finish_execution(final_string_output)
-
-        token_usage = self.calculate_usage_metrics()
-
-        return CrewOutput(
-            raw=final_task_output.raw,
-            pydantic=final_task_output.pydantic,
-            json_dict=final_task_output.json_dict,
-            tasks_output=[task.output for task in self.tasks if task.output],
-            token_usage=token_usage,
-        )
-
-    def _process_task_result(self, task: Task, output: TaskOutput) -> None:
-        role = task.agent.role if task.agent is not None else "None"
-        self._logger.log("debug", f"== [{role}] Task output: {output}\n\n")
-        if self.output_log_file:
-            self._file_handler.log(agent=role, task=output, status="completed")
-
-    # TODO: @joao, Breaking change. Changed return type. Usage metrics is included in crewoutput
    def _run_hierarchical_process(self) -> CrewOutput:
        """Creates and assigns a manager agent to make sure the crew completes the tasks."""
+        self._create_manager_agent()
+        return self._execute_tasks(self.tasks, self.manager_agent)
+
+    def _create_manager_agent(self):
        i18n = I18N(prompt_file=self.prompt_file)
        if self.manager_agent is not None:
            self.manager_agent.allow_delegation = True
@@ -629,74 +573,148 @@ class Crew(BaseModel):
            )
            self.manager_agent = manager

+    def _execute_tasks(
+        self,
+        tasks: List[Task],
+        manager: Optional[BaseAgent] = None,
+        start_index: Optional[int] = 0,
+        was_replayed: bool = False,
+    ) -> CrewOutput:
+        """Executes tasks sequentially and returns the final output.
+
+        Args:
+            tasks (List[Task]): List of tasks to execute
+            manager (Optional[BaseAgent], optional): Manager agent to use for delegation. Defaults to None.
+
+        Returns:
+            CrewOutput: Final output of the crew
+        """
+
        task_outputs: List[TaskOutput] = []
-        futures: List[Tuple[Task, Future[TaskOutput]]] = []
+        futures: List[Tuple[Task, Future[TaskOutput], int]] = []
+        last_sync_output: Optional[TaskOutput] = None

-        # TODO: IF USER OVERRIDE THE CONTEXT, PASS THAT
-        for task in self.tasks:
-            self._logger.log("debug", f"Working Agent: {manager.role}")
-            self._logger.log("info", f"Starting Task: {task.description}")
+        for task_index, task in enumerate(tasks):
+            if start_index is not None and task_index < start_index:
+                if task.output:
+                    if task.async_execution:
+                        task_outputs.append(task.output)
+                    else:
+                        task_outputs = [task.output]
+                        last_sync_output = task.output
+                continue

-            if self.output_log_file:
-                self._file_handler.log(
-                    agent=manager.role, task=task.description, status="started"
+            self._prepare_task(task, manager)
+            if self.process == Process.hierarchical:
+                agent_to_use = manager
+            else:
+                agent_to_use = task.agent
+            if agent_to_use is None:
+                raise ValueError(
+                    f"No agent available for task: {task.description}. Ensure that either the task has an assigned agent or a manager agent is provided."
                )
+            self._log_task_start(task, agent_to_use)

            if task.async_execution:
-                context = (
-                    aggregate_raw_outputs_from_tasks(task.context)
-                    if task.context
-                    else aggregate_raw_outputs_from_task_outputs(task_outputs)
+                context = self._get_context(
+                    task, [last_sync_output] if last_sync_output else []
                )
                future = task.execute_async(
-                    agent=manager, context=context, tools=manager.tools
+                    agent=agent_to_use,
+                    context=context,
+                    tools=agent_to_use.tools,
                )
-                futures.append((task, future))
+                futures.append((task, future, task_index))
            else:
-                # Before executing a synchronous task, wait for all async tasks to complete
                if futures:
-                    # Clear task_outputs before processing async tasks
-                    task_outputs = []
-                    for future_task, future in futures:
-                        task_output = future.result()
-                        task_outputs.append(task_output)
-                        self._process_task_result(future_task, task_output)
-
-                    # Clear the futures list after processing all async results
+                    task_outputs.extend(
+                        self._process_async_tasks(futures, was_replayed)
+                    )
                    futures.clear()

-                context = (
-                    aggregate_raw_outputs_from_tasks(task.context)
-                    if task.context
-                    else aggregate_raw_outputs_from_task_outputs(task_outputs)
-                )
+                context = self._get_context(task, task_outputs)
                task_output = task.execute_sync(
-                    agent=manager, context=context, tools=manager.tools
+                    agent=agent_to_use,
+                    context=context,
+                    tools=agent_to_use.tools,
                )
                task_outputs = [task_output]
                self._process_task_result(task, task_output)
+                self._store_execution_log(task, task_output, task_index, was_replayed)

-        # Process any remaining async results
        if futures:
-            # Clear task_outputs before processing async tasks
-            task_outputs = []
-            for future_task, future in futures:
-                task_output = future.result()
-                task_outputs.append(task_output)
-                self._process_task_result(future_task, task_output)
+            task_outputs = self._process_async_tasks(futures, was_replayed)

-        # Important: There should only be one task output in the list
-        #            If there are more or 0, something went wrong.
+        return self._create_crew_output(task_outputs)
+
+    def _prepare_task(self, task: Task, manager: Optional[BaseAgent]):
+        if self.process == Process.hierarchical:
+            self._update_manager_tools(task, manager)
+        elif task.agent and task.agent.allow_delegation:
+            self._add_delegation_tools(task)
+
+    def _add_delegation_tools(self, task: Task):
+        agents_for_delegation = [agent for agent in self.agents if agent != task.agent]
+        if len(self.agents) > 1 and len(agents_for_delegation) > 0 and task.agent:
+            delegation_tools = task.agent.get_delegation_tools(agents_for_delegation)
+
+            # Add tools if they are not already in task.tools
+            for new_tool in delegation_tools:
+                # Find the index of the tool with the same name
+                existing_tool_index = next(
+                    (
+                        index
+                        for index, tool in enumerate(task.tools or [])
+                        if tool.name == new_tool.name
+                    ),
+                    None,
+                )
+                if not task.tools:
+                    task.tools = []
+
+                if existing_tool_index is not None:
+                    # Replace the existing tool
+                    task.tools[existing_tool_index] = new_tool
+                else:
+                    # Add the new tool
+                    task.tools.append(new_tool)
+
+    def _log_task_start(self, task: Task, agent: Optional[BaseAgent]):
+        color = self._logging_color
+        role = agent.role if agent else "None"
+        self._logger.log("debug", f"== Working Agent: {role}", color=color)
+        self._logger.log("info", f"== Starting Task: {task.description}", color=color)
+        if self.output_log_file:
+            self._file_handler.log(agent=role, task=task.description, status="started")
+
+    def _update_manager_tools(self, task: Task, manager: Optional[BaseAgent]):
+        if task.agent and manager:
+            manager.tools = task.agent.get_delegation_tools([task.agent])
+        if manager:
+            manager.tools = manager.get_delegation_tools(self.agents)
+
+    def _get_context(self, task: Task, task_outputs: List[TaskOutput]):
+        context = (
+            aggregate_raw_outputs_from_tasks(task.context)
+            if task.context
+            else aggregate_raw_outputs_from_task_outputs(task_outputs)
+        )
+        return context
+
+    def _process_task_result(self, task: Task, output: TaskOutput) -> None:
+        role = task.agent.role if task.agent is not None else "None"
+        self._logger.log("debug", f"== [{role}] Task output: {output}\n\n")
+        if self.output_log_file:
+            self._file_handler.log(agent=role, task=output, status="completed")
+
+    def _create_crew_output(self, task_outputs: List[TaskOutput]) -> CrewOutput:
        if len(task_outputs) != 1:
            raise ValueError(
                "Something went wrong. Kickoff should return only one task output."
            )
-
        final_task_output = task_outputs[0]
-
        final_string_output = final_task_output.raw
        self._finish_execution(final_string_output)
-
        token_usage = self.calculate_usage_metrics()

        return CrewOutput(
@@ -707,6 +725,74 @@ class Crew(BaseModel):
            token_usage=token_usage,
        )

+    def _process_async_tasks(
+        self,
+        futures: List[Tuple[Task, Future[TaskOutput], int]],
+        was_replayed: bool = False,
+    ) -> List[TaskOutput]:
+        task_outputs = []
+        for future_task, future, task_index in futures:
+            task_output = future.result()
+            task_outputs.append(task_output)
+            self._process_task_result(future_task, task_output)
+            self._store_execution_log(
+                future_task, task_output, task_index, was_replayed
+            )
+        return task_outputs
+
+    def _find_task_index(
+        self, task_id: str, stored_outputs: List[Any]
+    ) -> Optional[int]:
+        return next(
+            (
+                index
+                for (index, d) in enumerate(stored_outputs)
+                if d["task_id"] == str(task_id)
+            ),
+            None,
+        )
+
+    def replay_from_task(
+        self, task_id: str, inputs: Optional[Dict[str, Any]] = None
+    ) -> CrewOutput:
+        stored_outputs = self._task_output_handler.load()
+        if not stored_outputs:
+            raise ValueError(f"Task with id {task_id} not found in the crew's tasks.")
+
+        start_index = self._find_task_index(task_id, stored_outputs)
+
+        if start_index is None:
+            raise ValueError(f"Task with id {task_id} not found in the crew's tasks.")
+
+        replay_inputs = (
+            inputs if inputs is not None else stored_outputs[start_index]["inputs"]
+        )
+        self._inputs = replay_inputs
+
+        if replay_inputs:
+            self._interpolate_inputs(replay_inputs)
+
+        if self.process == Process.hierarchical:
+            self._create_manager_agent()
+
+        for i in range(start_index):
+            stored_output = stored_outputs[i][
+                "output"
+            ]  # for adding context to the task
+            task_output = TaskOutput(
+                description=stored_output["description"],
+                agent=stored_output["agent"],
+                raw=stored_output["raw"],
+                pydantic=stored_output["pydantic"],
+                json_dict=stored_output["json_dict"],
+                output_format=stored_output["output_format"],
+            )
+            self.tasks[i].output = task_output
+
+        self._logging_color = "bold_blue"
+        result = self._execute_tasks(self.tasks, self.manager_agent, start_index, True)
+        return result
+
    def copy(self):
        """Create a deep copy of the Crew."""