WIP. Procedure appears to be working well. Working on mocking properly for tests

2026-01-10 00:28:31 +00:00 · 2024-07-11 15:26:46 -04:00
parent 7b53457ef3
commit adf93c91f7
6 changed files with 281 additions and 20 deletions
--- a/poetry.lock
+++ b/poetry.lock
@@ -2395,8 +2395,8 @@ langchain-core = ">=0.2.10,<0.3.0"
 langchain-text-splitters = ">=0.2.0,<0.3.0"
 langsmith = ">=0.1.17,<0.2.0"
 numpy = [
    {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""},
    {version = ">=1,<2", markers = "python_version < \"3.12\""},
    {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""},
 ]
 pydantic = ">=1,<3"
 PyYAML = ">=5.3"
@@ -2437,8 +2437,8 @@ langchain = ">=0.2.6,<0.3.0"
 langchain-core = ">=0.2.10,<0.3.0"
 langsmith = ">=0.1.0,<0.2.0"
 numpy = [
    {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""},
    {version = ">=1,<2", markers = "python_version < \"3.12\""},
    {version = ">=1.26.0,<2.0.0", markers = "python_version >= \"3.12\""},
 ]
 PyYAML = ">=5.3"
 requests = ">=2,<3"
@@ -2461,8 +2461,8 @@ jsonpatch = ">=1.33,<2.0"
 langsmith = ">=0.1.75,<0.2.0"
 packaging = ">=23.2,<25"
 pydantic = [
    {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
    {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
    {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
 ]
 PyYAML = ">=5.3"
 tenacity = ">=8.1.0,<8.4.0 || >8.4.0,<9.0.0"
@@ -2511,8 +2511,8 @@ files = [
 [package.dependencies]
 orjson = ">=3.9.14,<4.0.0"
 pydantic = [
    {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
    {version = ">=1,<3", markers = "python_full_version < \"3.12.4\""},
    {version = ">=2.7.4,<3.0.0", markers = "python_full_version >= \"3.12.4\""},
 ]
 requests = ">=2,<3"
@@ -3989,8 +3989,8 @@ files = [
 annotated-types = ">=0.4.0"
 pydantic-core = "2.20.1"
 typing-extensions = [
    {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
    {version = ">=4.6.1", markers = "python_version < \"3.13\""},
    {version = ">=4.12.2", markers = "python_version >= \"3.13\""},
 ]
 [package.extras]
@@ -4279,6 +4279,24 @@ tomli = {version = ">=1", markers = "python_version < \"3.11\""}
 [package.extras]
 dev = ["argcomplete", "attrs (>=19.2)", "hypothesis (>=3.56)", "mock", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
 [[package]]
 name = "pytest-asyncio"
 version = "0.23.7"
 description = "Pytest support for asyncio"
 optional = false
 python-versions = ">=3.8"
 files = [
    {file = "pytest_asyncio-0.23.7-py3-none-any.whl", hash = "sha256:009b48127fbe44518a547bddd25611551b0e43ccdbf1e67d12479f569832c20b"},
    {file = "pytest_asyncio-0.23.7.tar.gz", hash = "sha256:5f5c72948f4c49e7db4f29f2521d4031f1c27f86e57b046126654083d4770268"},
 ]
 [package.dependencies]
 pytest = ">=7.0.0,<9"
 [package.extras]
 docs = ["sphinx (>=5.3)", "sphinx-rtd-theme (>=1.0)"]
 testing = ["coverage (>=6.2)", "hypothesis (>=5.7.1)"]
 [[package]]
 name = "pytest-vcr"
 version = "1.0.2"
@@ -6090,4 +6108,4 @@ tools = ["crewai-tools"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.10,<=3.13"
-content-hash = "0dbf6f6e2e841fb3eec4ff87ea5d6b430f29702118fee91307983c6b2581e59e"
+content-hash = "91b755743f562d0c830916fc49df0f4fb5798cbd61d313b90165f050dfc3e34f"
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -51,6 +51,7 @@ crewai-tools = "^0.4.8"
 pytest = "^8.0.0"
 pytest-vcr = "^1.0.2"
 python-dotenv = "1.0.0"
 pytest-asyncio = "^0.23.7"
 [tool.poetry.scripts]
 crewai = "crewai.cli.cli:crewai"
--- a/src/crewai/crew.py
+++ b/src/crewai/crew.py
@@ -6,15 +6,15 @@ from typing import Any, Dict, List, Optional, Tuple, Union
 from langchain_core.callbacks import BaseCallbackHandler
 from pydantic import (
-    UUID4,
+  UUID4,
-    BaseModel,
+  BaseModel,
-    ConfigDict,
+  ConfigDict,
-    Field,
+  Field,
-    InstanceOf,
+  InstanceOf,
-    Json,
+  Json,
-    PrivateAttr,
+  PrivateAttr,
-    field_validator,
+  field_validator,
-    model_validator,
+  model_validator,
 )
 from pydantic_core import PydanticCustomError
@@ -34,8 +34,8 @@ from crewai.utilities import I18N, FileHandler, Logger, RPMController
 from crewai.utilities.constants import TRAINED_AGENTS_DATA_FILE, TRAINING_DATA_FILE
 from crewai.utilities.evaluators.task_evaluator import TaskEvaluator
 from crewai.utilities.formatter import (
-    aggregate_raw_outputs_from_task_outputs,
+  aggregate_raw_outputs_from_task_outputs,
-    aggregate_raw_outputs_from_tasks,
+  aggregate_raw_outputs_from_tasks,
 )
 from crewai.utilities.training_handler import CrewTrainingHandler
@@ -86,7 +86,7 @@ class Crew(BaseModel):
    tasks: List[Task] = Field(default_factory=list)
    agents: List[BaseAgent] = Field(default_factory=list)
    process: Process = Field(default=Process.sequential)
-    verbose: Union[int, bool] = Field(default=0)
+    verbose: int = Field(default=0)
    memory: bool = Field(
        default=False,
        description="Whether the crew should use memory to store memories of it's execution",
@@ -131,8 +131,8 @@ class Crew(BaseModel):
        default=None,
        description="Path to the prompt json file to be used for the crew.",
    )
-    output_log_file: Optional[Union[bool, str]] = Field(
+    output_log_file: Optional[str] = Field(
-        default=False,
+        default="",
        description="output_log_file",
    )
--- a/src/crewai/procedure/init.py
+++ b/src/crewai/procedure/init.py
@@ -0,0 +1,3 @@
 from crewai.procedure.procedure import Procedure
 __all__ = ["Procedure"]
--- a/src/crewai/procedure/procedure.py
+++ b/src/crewai/procedure/procedure.py
@@ -0,0 +1,38 @@
 import asyncio
 from typing import Any, Dict, List
 from pydantic import BaseModel, Field
 from crewai.crew import Crew
 from crewai.crews.crew_output import CrewOutput
 class Procedure(BaseModel):
    crews: List[Crew] = Field(
        ..., description="List of crews to be executed in sequence"
    )
    async def kickoff(self, inputs: List[Dict[str, Any]]) -> List[CrewOutput]:
        current_inputs = inputs
        for crew in self.crews:
            # Process all inputs for the current crew
            crew_outputs = await self._process_crew(crew, current_inputs)
            print("Crew Outputs", crew_outputs)
            # Prepare inputs for the next crew
            current_inputs = [output.to_dict() for output in crew_outputs]
        # Return the final outputs
        return crew_outputs
    async def _process_crew(
        self, crew: Crew, inputs: List[Dict[str, Any]]
    ) -> List[CrewOutput]:
        # Kickoff crew asynchronously for each input
        crew_kickoffs = [crew.kickoff_async(inputs=input_data) for input_data in inputs]
        # Wait for all kickoffs to complete
        outputs = await asyncio.gather(*crew_kickoffs)
        return outputs
--- a/tests/procedure/test_procedure.py
+++ b/tests/procedure/test_procedure.py
@@ -0,0 +1,201 @@
 from unittest.mock import Mock, PropertyMock
 import pytest
 from crewai.crew import Crew
 from crewai.crews.crew_output import CrewOutput
 from crewai.procedure.procedure import Procedure
 from crewai.tasks.task_output import TaskOutput
@pytest.fixture
 def mock_crew():
    crew = Mock(spec=Crew)
    task_output = TaskOutput(
        description="Test task", raw="Task output", agent="Test Agent"
    )
    crew_output = CrewOutput(
        raw="Test output",
        tasks_output=[task_output],
        token_usage={"total_tokens": 100, "prompt_tokens": 50, "completion_tokens": 50},
        json_dict={"key": "value"},  # Add this line
    )
    async def async_kickoff(inputs=None):
        return crew_output
    crew.kickoff.return_value = crew_output
    crew.kickoff_async.side_effect = async_kickoff
    return crew
 def test_procedure_initialization():
    """
    Test that a Procedure is correctly initialized with the given crews.
    """
    crew1 = Mock(spec=Crew)
    crew2 = Mock(spec=Crew)
    # Add properties required by validators
    type(crew1).verbose = PropertyMock(return_value=True)
    type(crew2).verbose = PropertyMock(return_value=True)
    type(crew1).output_log_file = PropertyMock(return_value=False)
    type(crew2).output_log_file = PropertyMock(return_value=False)
    procedure = Procedure(crews=[crew1, crew2])
    assert len(procedure.crews) == 2
    assert procedure.crews[0] == crew1
    assert procedure.crews[1] == crew2
@pytest.mark.asyncio
 async def test_procedure_kickoff_single_input(mock_crew):
    """
    Test that Procedure.kickoff() correctly processes a single input
    and returns the expected CrewOutput.
    """
    procedure = Procedure(crews=[mock_crew])
    input_data = {"key": "value"}
    result = await procedure.kickoff([input_data])
    mock_crew.kickoff_async.assert_called_once_with(inputs=input_data)
    assert len(result) == 1
    assert isinstance(result[0], CrewOutput)
    assert result[0].raw == "Test output"
    assert len(result[0].tasks_output) == 1
    assert result[0].tasks_output[0].raw == "Task output"
    assert result[0].token_usage == {
        "total_tokens": 100,
        "prompt_tokens": 50,
        "completion_tokens": 50,
    }
@pytest.mark.asyncio
 async def test_procedure_kickoff_multiple_inputs(mock_crew):
    """
    Test that Procedure.kickoff() correctly processes multiple inputs
    and returns the expected CrewOutputs.
    """
    procedure = Procedure(crews=[mock_crew, mock_crew])
    input_data = [{"key1": "value1"}, {"key2": "value2"}]
    result = await procedure.kickoff(input_data)
    expected_call_count = 4  # 2 crews x 2 inputs = 4
    assert mock_crew.kickoff_async.call_count == expected_call_count
    assert len(result) == 2
    assert all(isinstance(r, CrewOutput) for r in result)
    assert all(len(r.tasks_output) == 1 for r in result)
    assert all(
        r.token_usage
        == {"total_tokens": 100, "prompt_tokens": 50, "completion_tokens": 50}
        for r in result
    )
@pytest.mark.asyncio
 async def test_procedure_chaining():
    """
    Test that Procedure correctly chains multiple crews, passing the output
    of one crew as input to the next crew in the sequence.
    This test verifies:
    1. The first crew receives the initial input.
    2. The second crew receives the output from the first crew as its input.
    3. The final output contains the result from the last crew in the chain.
    4. Task outputs and token usage are correctly propagated through the chain.
    """
    crew1, crew2 = Mock(spec=Crew), Mock(spec=Crew)
    task_output1 = TaskOutput(description="Task 1", raw="Output 1", agent="Agent 1")
    task_output2 = TaskOutput(description="Task 2", raw="Final output", agent="Agent 2")
    crew_output1 = CrewOutput(
        raw="Output 1",
        tasks_output=[task_output1],
        token_usage={"total_tokens": 100, "prompt_tokens": 50, "completion_tokens": 50},
        json_dict={"key1": "value1"},
    )
    crew_output2 = CrewOutput(
        raw="Final output",
        tasks_output=[task_output2],
        token_usage={"total_tokens": 150, "prompt_tokens": 75, "completion_tokens": 75},
        json_dict={"key2": "value2"},
    )
    async def async_kickoff1(inputs=None):
        return crew_output1
    async def async_kickoff2(inputs=None):
        return crew_output2
    crew1.kickoff_async.side_effect = async_kickoff1
    crew2.kickoff_async.side_effect = async_kickoff2
    procedure = Procedure(crews=[crew1, crew2])
    input_data = [{"initial": "data"}]
    result = await procedure.kickoff(input_data)
    # Check that the first crew received the initial input
    crew1.kickoff_async.assert_called_once_with(inputs={"initial": "data"})
    # Check that the second crew received the output from the first crew as its input
    crew2.kickoff_async.assert_called_once_with(inputs=crew_output1.to_dict())
    # Check the final output
    assert len(result) == 1
    assert isinstance(result[0], CrewOutput)
    assert result[0].raw == "Final output"
    assert len(result[0].tasks_output) == 1
    assert result[0].tasks_output[0].raw == "Final output"
    assert result[0].token_usage == {
        "total_tokens": 150,
        "prompt_tokens": 75,
        "completion_tokens": 75,
    }
    assert result[0].json_dict == {"key2": "value2"}
@pytest.mark.asyncio
 async def test_procedure_invalid_input_type():
    """
    Test that Procedure.kickoff() raises a TypeError when given an invalid input type.
    """
    procedure = Procedure(crews=[Mock(spec=Crew)])
    with pytest.raises(TypeError):
        await procedure.kickoff("invalid input")
@pytest.mark.asyncio
 async def test_procedure_token_usage_aggregation():
    """
    Test that Procedure correctly aggregates token usage across multiple crews.
    """
    crew1, crew2 = Mock(spec=Crew), Mock(spec=Crew)
    crew1.kickoff.return_value = CrewOutput(
        raw="Output 1",
        tasks_output=[
            TaskOutput(description="Task 1", raw="Output 1", agent="Agent 1")
        ],
        token_usage={"total_tokens": 100, "prompt_tokens": 50, "completion_tokens": 50},
    )
    crew2.kickoff.return_value = CrewOutput(
        raw="Output 2",
        tasks_output=[
            TaskOutput(description="Task 2", raw="Output 2", agent="Agent 2")
        ],
        token_usage={"total_tokens": 150, "prompt_tokens": 75, "completion_tokens": 75},
    )
    procedure = Procedure([crew1, crew2])
    result = await procedure.kickoff([{"initial": "data"}])
    assert result[0].token_usage == {
        "total_tokens": 250,
        "prompt_tokens": 125,
        "completion_tokens": 125,
    }
    assert result[0].token_usage == {
        "total_tokens": 250,
        "prompt_tokens": 125,
        "completion_tokens": 125,
    }
		`@@ -0,0 +1,3 @@`
							`from crewai.procedure.procedure import Procedure`

							`__all__ = ["Procedure"]`