Going to start refactoring for pipeline_output

This commit is contained in:
Brandon Hancock
2024-07-18 11:20:26 -04:00
parent c0c329b6e0
commit 834c62feca
8 changed files with 322 additions and 299 deletions

View File

@@ -48,7 +48,7 @@ except ImportError:
agentops = None
if TYPE_CHECKING:
from crewai.procedure.procedure import Procedure
from crewai.pipeline.pipeline import Pipeline
class Crew(BaseModel):
@@ -946,17 +946,17 @@ class Crew(BaseModel):
return total_usage_metrics
def __rshift__(self, other: "Crew") -> "Procedure":
def __rshift__(self, other: "Crew") -> "Pipeline":
"""
Implements the >> operator to add another Crew to an existing Procedure.
Implements the >> operator to add another Crew to an existing Pipeline.
"""
from crewai.procedure.procedure import Procedure
from crewai.pipeline.pipeline import Pipeline
if not isinstance(other, Crew):
raise TypeError(
f"Unsupported operand type for >>: '{type(self).__name__}' and '{type(other).__name__}'"
)
return Procedure(crews=[self, other])
return Pipeline(stages=[self, other])
def __repr__(self):
return f"Crew(id={self.id}, process={self.process}, number_of_agents={len(self.agents)}, number_of_tasks={len(self.tasks)})"

View File

@@ -0,0 +1,3 @@
from crewai.pipeline.pipeline import Pipeline
__all__ = ["Pipeline"]

View File

@@ -0,0 +1,92 @@
import asyncio
from typing import Any, Dict, List, Union
from pydantic import BaseModel, Field
from crewai.crew import Crew
from crewai.crews.crew_output import CrewOutput
"""
Pipeline Terminology:
Pipeline: The overall structure that defines a sequence of operations.
Stage: A distinct part of the pipeline, which can be either sequential or parallel.
Branch: Parallel executions within a stage (e.g., concurrent crew operations).
Stream: The journey of an individual input through the entire pipeline.
Example pipeline structure:
crew1 >> [crew2, crew3] >> crew4
This represents a pipeline with three stages:
1. A sequential stage (crew1)
2. A parallel stage with two branches (crew2 and crew3 executing concurrently)
3. Another sequential stage (crew4)
Each input creates its own stream, flowing through all stages of the pipeline.
Multiple streams can be processed concurrently, each following the defined pipeline structure.
"""
class Pipeline(BaseModel):
stages: List[Union[Crew, List[Crew]]] = Field(
..., description="List of crews representing stages to be executed in sequence"
)
async def process_streams(
self, stream_inputs: List[Dict[str, Any]]
) -> List[List[CrewOutput]]:
"""
Process multiple streams in parallel, with each stream going through all stages.
"""
async def process_single_stream(
stream_input: Dict[str, Any]
) -> List[CrewOutput]:
print("current_input in stream", stream_input)
stage_outputs = []
for stage in self.stages:
if isinstance(stage, Crew):
# Process single crew
stage_output = await stage.kickoff_async(inputs=stream_input)
stage_outputs = [stage_output]
else:
# Process each crew in parallel
parallel_outputs = await asyncio.gather(
*[crew.kickoff_async(inputs=stream_input) for crew in stage]
)
stage_outputs = parallel_outputs
# Convert all CrewOutputs from stage into a dictionary for next stage
# and update original stream_input dictionary with new values
stage_output_dicts = [output.to_dict() for output in stage_outputs]
for stage_dict in stage_output_dicts:
stream_input.update(stage_dict)
print("UPDATING stream_input - new values:", stream_input)
# Return all CrewOutputs from this stream
return stage_outputs
# Process all streams in parallel
return await asyncio.gather(
*(process_single_stream(input_data) for input_data in stream_inputs)
)
def __rshift__(self, other: Any) -> "Pipeline":
"""
Implements the >> operator to add another Stage (Crew or List[Crew]) to an existing Pipeline.
"""
if isinstance(other, Crew):
return type(self)(stages=self.stages + [other])
elif isinstance(other, list) and all(isinstance(crew, Crew) for crew in other):
return type(self)(stages=self.stages + [other])
else:
raise TypeError(
f"Unsupported operand type for >>: '{type(self).__name__}' and '{type(other).__name__}'"
)
# Helper function to run the pipeline
async def run_pipeline(
pipeline: Pipeline, inputs: List[Dict[str, Any]]
) -> List[List[CrewOutput]]:
return await pipeline.process_streams(inputs)

View File

@@ -0,0 +1,21 @@
from typing import Any, Dict, List
from pydantic import BaseModel, Field
from crewai.crews.crew_output import CrewOutput
class PipelineOutput(BaseModel):
final_outputs: List[CrewOutput] = Field(
description="List of final outputs from the last crew in the pipeline",
default=[],
)
token_usage: List[List[Dict[str, Any]]] = Field(
description="Token usage for each crew in each stream", default=[]
)
def add_final_output(self, output: CrewOutput):
self.final_outputs.append(output)
def add_token_usage(self, usage: List[Dict[str, Any]]):
self.token_usage.append(usage)

View File

@@ -1,3 +0,0 @@
from crewai.procedure.procedure import Procedure
__all__ = ["Procedure"]

View File

@@ -1,76 +0,0 @@
import asyncio
from typing import Any, Dict, List
from pydantic import BaseModel, Field
from crewai.crew import Crew
from crewai.crews.crew_output import CrewOutput
class Procedure(BaseModel):
crews: List[Crew] = Field(
..., description="List of crews to be executed in sequence"
)
def kickoff(self, inputs: List[Dict[str, Any]]) -> List[CrewOutput]:
current_inputs = inputs
crew_outputs = []
for index, crew in enumerate(self.crews):
# Process all inputs for the current crew
crew_outputs = self._process_crew(crew, current_inputs)
# If this is not the last crew, prepare inputs for the next crew
if index < len(self.crews) - 1:
current_inputs = [output.to_dict() for output in crew_outputs]
else:
# For the last crew, we don't need to convert the output to input
return crew_outputs
return crew_outputs
async def kickoff_async(self, inputs: List[Dict[str, Any]]) -> List[CrewOutput]:
current_inputs = inputs
crew_outputs = []
for index, crew in enumerate(self.crews):
# Process all inputs for the current crew
crew_outputs = await self._process_crew(crew, current_inputs)
# If this is not the last crew, prepare inputs for the next crew
if index < len(self.crews) - 1:
current_inputs = [output.to_dict() for output in crew_outputs]
else:
# For the last crew, we don't need to convert the output to input
return crew_outputs
return crew_outputs
def _process_crew(
self, crew: Crew, inputs: List[Dict[str, Any]]
) -> List[CrewOutput]:
# Kickoff crew for each input
outputs = [crew.kickoff(inputs=input_data) for input_data in inputs]
return outputs
async def _process_crew_async(
self, crew: Crew, inputs: List[Dict[str, Any]]
) -> List[CrewOutput]:
# Kickoff crew asynchronously for each input
crew_kickoffs = [crew.kickoff_async(inputs=input_data) for input_data in inputs]
# Wait for all kickoffs to complete
outputs = await asyncio.gather(*crew_kickoffs)
return outputs
def __rshift__(self, other: Crew) -> "Procedure":
"""
Implements the >> operator to add another Crew to an existing Procedure.
"""
if not isinstance(other, Crew):
raise TypeError(
f"Unsupported operand type for >>: '{type(self).__name__}' and '{type(other).__name__}'"
)
return type(self)(crews=self.crews + [other])