Merge branch 'feature/procedure_v2' into brandon/cre-107-pipeline-conditional-routing

This commit is contained in:
Brandon Hancock
2024-07-29 16:11:55 -04:00
55 changed files with 437468 additions and 7681 deletions

View File

@@ -55,8 +55,6 @@ class Agent(BaseAgent):
tools: Tools at agents disposal
step_callback: Callback to be executed after each step of the agent execution.
callbacks: A list of callback functions from the langchain library that are triggered during the agent's execution process
allow_code_execution: Enable code execution for the agent.
max_retry_limit: Maximum number of retries for an agent to execute a task when an error occurs.
"""
_times_executed: int = PrivateAttr(default=0)
@@ -199,9 +197,7 @@ class Agent(BaseAgent):
"tools": self.agent_executor.tools_description,
}
)["output"]
print("Result when things went well:", result)
except Exception as e:
print("FAILED TO EXECUTE TASK", e)
self._times_executed += 1
if self._times_executed > self.max_retry_limit:
raise e
@@ -217,7 +213,6 @@ class Agent(BaseAgent):
if tool_result.get("result_as_answer", False):
result = tool_result["result"]
print("RESULT TO RETURN", result)
return result
def format_log_to_str(
@@ -265,6 +260,7 @@ class Agent(BaseAgent):
"tools_handler": self.tools_handler,
"function_calling_llm": self.function_calling_llm,
"callbacks": self.callbacks,
"max_tokens": self.max_tokens,
}
if self._rpm_controller:

View File

@@ -45,6 +45,7 @@ class BaseAgent(ABC, BaseModel):
i18n (I18N): Internationalization settings.
cache_handler (InstanceOf[CacheHandler]): An instance of the CacheHandler class.
tools_handler (InstanceOf[ToolsHandler]): An instance of the ToolsHandler class.
max_tokens: Maximum number of tokens for the agent to generate in a response.
Methods:
@@ -118,6 +119,9 @@ class BaseAgent(ABC, BaseModel):
tools_handler: InstanceOf[ToolsHandler] = Field(
default=None, description="An instance of the ToolsHandler class."
)
max_tokens: Optional[int] = Field(
default=None, description="Maximum number of tokens for the agent's execution."
)
_original_role: str | None = None
_original_goal: str | None = None

View File

@@ -1,4 +1,4 @@
from typing import Any, Dict
from crewai.types.usage_metrics import UsageMetrics
class TokenProcess:
@@ -18,10 +18,10 @@ class TokenProcess:
def sum_successful_requests(self, requests: int):
self.successful_requests = self.successful_requests + requests
def get_summary(self) -> Dict[str, Any]:
return {
"total_tokens": self.total_tokens,
"prompt_tokens": self.prompt_tokens,
"completion_tokens": self.completion_tokens,
"successful_requests": self.successful_requests,
}
def get_summary(self) -> UsageMetrics:
return UsageMetrics(
total_tokens=self.total_tokens,
prompt_tokens=self.prompt_tokens,
completion_tokens=self.completion_tokens,
successful_requests=self.successful_requests,
)

View File

@@ -5,11 +5,11 @@ from crewai.memory.storage.kickoff_task_outputs_storage import (
KickoffTaskOutputsSQLiteStorage,
)
from .create_crew import create_crew
from .train_crew import train_crew
from .replay_from_task import replay_task_command
from .reset_memories_command import reset_memories_command
from .test_crew import test_crew
from .train_crew import train_crew
@click.group()
@@ -126,5 +126,26 @@ def reset_memories(long, short, entities, kickoff_outputs, all):
click.echo(f"An error occurred while resetting memories: {e}", err=True)
@crewai.command()
@click.option(
"-n",
"--n_iterations",
type=int,
default=3,
help="Number of iterations to Test the crew",
)
@click.option(
"-m",
"--model",
type=str,
default="gpt-4o-mini",
help="LLM Model to run the tests on the Crew. For now only accepting only OpenAI models.",
)
def test(n_iterations: int, model: str):
"""Test the crew and evaluate the results."""
click.echo(f"Testing the crew for {n_iterations} iterations with model {model}")
test_crew(n_iterations, model)
if __name__ == "__main__":
crewai()

View File

@@ -9,10 +9,14 @@ from crewai.utilities.task_output_storage_handler import TaskOutputStorageHandle
def reset_memories_command(long, short, entity, kickoff_outputs, all) -> None:
"""
Replay the crew execution from a specific task.
Reset the crew memories.
Args:
task_id (str): The ID of the task to replay from.
long (bool): Whether to reset the long-term memory.
short (bool): Whether to reset the short-term memory.
entity (bool): Whether to reset the entity memory.
kickoff_outputs (bool): Whether to reset the latest kickoff task outputs.
all (bool): Whether to reset all memories.
"""
try:

View File

@@ -5,6 +5,7 @@ research_task:
the current year is 2024.
expected_output: >
A list with 10 bullet points of the most relevant information about {topic}
agent: researcher
reporting_task:
description: >
@@ -13,3 +14,4 @@ reporting_task:
expected_output: >
A fully fledge reports with the mains topics, each with a full section of information.
Formatted as markdown without '```'
agent: reporting_analyst

View File

@@ -32,14 +32,12 @@ class {{crew_name}}Crew():
def research_task(self) -> Task:
return Task(
config=self.tasks_config['research_task'],
agent=self.researcher()
)
@task
def reporting_task(self) -> Task:
return Task(
config=self.tasks_config['reporting_task'],
agent=self.reporting_analyst(),
output_file='report.md'
)

View File

@@ -39,3 +39,16 @@ def replay():
except Exception as e:
raise Exception(f"An error occurred while replaying the crew: {e}")
def test():
"""
Test the crew execution and returns the results.
"""
inputs = {
"topic": "AI LLMs"
}
try:
{{crew_name}}Crew().crew().test(n_iterations=int(sys.argv[1]), openai_model_name=sys.argv[2], inputs=inputs)
except Exception as e:
raise Exception(f"An error occurred while replaying the crew: {e}")

View File

@@ -12,6 +12,7 @@ crewai = { extras = ["tools"], version = "^0.41.1" }
{{folder_name}} = "{{folder_name}}.main:run"
train = "{{folder_name}}.main:train"
replay = "{{folder_name}}.main:replay"
test = "{{folder_name}}.main:test"
[build-system]
requires = ["poetry-core"]

View File

@@ -0,0 +1,32 @@
import subprocess
import click
import pytest
pytest.skip(allow_module_level=True)
def test_crew(n_iterations: int, model: str) -> None:
"""
Test the crew by running a command in the Poetry environment.
Args:
n_iterations (int): The number of iterations to test the crew.
model (str): The model to test the crew with.
"""
command = ["poetry", "run", "test", str(n_iterations), model]
try:
if n_iterations <= 0:
raise ValueError("The number of iterations must be a positive integer.")
result = subprocess.run(command, capture_output=False, text=True, check=True)
if result.stderr:
click.echo(result.stderr, err=True)
except subprocess.CalledProcessError as e:
click.echo(f"An error occurred while testing the crew: {e}", err=True)
click.echo(e.output, err=True)
except Exception as e:
click.echo(f"An unexpected error occurred: {e}", err=True)

View File

@@ -32,8 +32,10 @@ from crewai.tasks.conditional_task import ConditionalTask
from crewai.tasks.task_output import TaskOutput
from crewai.telemetry import Telemetry
from crewai.tools.agent_tools import AgentTools
from crewai.types.usage_metrics import UsageMetrics
from crewai.utilities import I18N, FileHandler, Logger, RPMController
from crewai.utilities.constants import TRAINED_AGENTS_DATA_FILE, TRAINING_DATA_FILE
from crewai.utilities.evaluators.crew_evaluator_handler import CrewEvaluator
from crewai.utilities.evaluators.task_evaluator import TaskEvaluator
from crewai.utilities.formatter import (
aggregate_raw_outputs_from_task_outputs,
@@ -96,7 +98,7 @@ class Crew(BaseModel):
default_factory=TaskOutputStorageHandler
)
name: Optional[str] = Field(default="")
name: Optional[str] = Field(default=None)
cache: bool = Field(default=True)
model_config = ConfigDict(arbitrary_types_allowed=True)
tasks: List[Task] = Field(default_factory=list)
@@ -111,7 +113,7 @@ class Crew(BaseModel):
default={"provider": "openai"},
description="Configuration for the embedder to be used for the crew.",
)
usage_metrics: Optional[dict] = Field(
usage_metrics: Optional[UsageMetrics] = Field(
default=None,
description="Metrics for the LLM usage during all tasks execution.",
)
@@ -148,13 +150,17 @@ class Crew(BaseModel):
description="Path to the prompt json file to be used for the crew.",
)
output_log_file: Optional[str] = Field(
default="",
default=None,
description="output_log_file",
)
planning: Optional[bool] = Field(
default=False,
description="Plan the crew execution and add the plan to the crew.",
)
planning_llm: Optional[Any] = Field(
default=None,
description="Language model that will run the AgentPlanner if planning is True.",
)
task_execution_output_json_files: Optional[List[str]] = Field(
default=None,
description="List of file paths for task execution JSON files.",
@@ -267,20 +273,6 @@ class Crew(BaseModel):
return self
@model_validator(mode="after")
def check_tasks_in_hierarchical_process_not_async(self):
"""Validates that the tasks in hierarchical process are not flagged with async_execution."""
if self.process == Process.hierarchical:
for task in self.tasks:
if task.async_execution:
raise PydanticCustomError(
"async_execution_in_hierarchical_process",
"Hierarchical process error: Tasks cannot be flagged with async_execution.",
{},
)
return self
@model_validator(mode="after")
def validate_end_with_at_most_one_async_task(self):
"""Validates that the crew ends with at most one asynchronous task."""
@@ -463,7 +455,7 @@ class Crew(BaseModel):
if self.planning:
self._handle_crew_planning()
metrics = []
metrics: List[UsageMetrics] = []
if self.process == Process.sequential:
result = self._run_sequential_process()
@@ -473,11 +465,12 @@ class Crew(BaseModel):
raise NotImplementedError(
f"The process '{self.process}' is not implemented yet."
)
metrics += [agent._token_process.get_summary() for agent in self.agents]
self.usage_metrics = {
key: sum([m[key] for m in metrics if m is not None]) for key in metrics[0]
}
self.usage_metrics = UsageMetrics()
for metric in metrics:
self.usage_metrics.add_usage_metrics(metric)
return result
@@ -486,12 +479,7 @@ class Crew(BaseModel):
results: List[CrewOutput] = []
# Initialize the parent crew's usage metrics
total_usage_metrics = {
"total_tokens": 0,
"prompt_tokens": 0,
"completion_tokens": 0,
"successful_requests": 0,
}
total_usage_metrics = UsageMetrics()
for input_data in inputs:
crew = self.copy()
@@ -499,8 +487,7 @@ class Crew(BaseModel):
output = crew.kickoff(inputs=input_data)
if crew.usage_metrics:
for key in total_usage_metrics:
total_usage_metrics[key] += crew.usage_metrics.get(key, 0)
total_usage_metrics.add_usage_metrics(crew.usage_metrics)
results.append(output)
@@ -529,29 +516,10 @@ class Crew(BaseModel):
results = await asyncio.gather(*tasks)
total_usage_metrics = {
"total_tokens": 0,
"prompt_tokens": 0,
"completion_tokens": 0,
"successful_requests": 0,
}
total_usage_metrics = UsageMetrics()
for crew in crew_copies:
if crew.usage_metrics:
for key in total_usage_metrics:
total_usage_metrics[key] += crew.usage_metrics.get(key, 0)
self.usage_metrics = total_usage_metrics
total_usage_metrics = {
"total_tokens": 0,
"prompt_tokens": 0,
"completion_tokens": 0,
"successful_requests": 0,
}
for crew in crew_copies:
if crew.usage_metrics:
for key in total_usage_metrics:
total_usage_metrics[key] += crew.usage_metrics.get(key, 0)
total_usage_metrics.add_usage_metrics(crew.usage_metrics)
self.usage_metrics = total_usage_metrics
self._task_output_handler.reset()
@@ -560,15 +528,12 @@ class Crew(BaseModel):
def _handle_crew_planning(self):
"""Handles the Crew planning."""
self._logger.log("info", "Planning the crew execution")
result = CrewPlanner(self.tasks)._handle_crew_planning()
result = CrewPlanner(
tasks=self.tasks, planning_agent_llm=self.planning_llm
)._handle_crew_planning()
if result is not None and hasattr(result, "list_of_plans_per_task"):
for task, step_plan in zip(self.tasks, result.list_of_plans_per_task):
task.description += step_plan
else:
self._logger.log(
"info", "Something went wrong with the planning process of the Crew"
)
for task, step_plan in zip(self.tasks, result.list_of_plans_per_task):
task.description += step_plan
def _store_execution_log(
self,
@@ -606,7 +571,7 @@ class Crew(BaseModel):
def _run_hierarchical_process(self) -> CrewOutput:
"""Creates and assigns a manager agent to make sure the crew completes the tasks."""
self._create_manager_agent()
return self._execute_tasks(self.tasks, self.manager_agent)
return self._execute_tasks(self.tasks)
def _create_manager_agent(self):
i18n = I18N(prompt_file=self.prompt_file)
@@ -630,7 +595,6 @@ class Crew(BaseModel):
def _execute_tasks(
self,
tasks: List[Task],
manager: Optional[BaseAgent] = None,
start_index: Optional[int] = 0,
was_replayed: bool = False,
) -> CrewOutput:
@@ -658,13 +622,13 @@ class Crew(BaseModel):
last_sync_output = task.output
continue
agent_to_use = self._get_agent_to_use(task, manager)
agent_to_use = self._get_agent_to_use(task)
if agent_to_use is None:
raise ValueError(
f"No agent available for task: {task.description}. Ensure that either the task has an assigned agent or a manager agent is provided."
)
self._prepare_agent_tools(task, manager)
self._prepare_agent_tools(task)
self._log_task_start(task, agent_to_use.role)
if isinstance(task, ConditionalTask):
@@ -730,20 +694,18 @@ class Crew(BaseModel):
return skipped_task_output
return None
def _prepare_agent_tools(self, task: Task, manager: Optional[BaseAgent]):
def _prepare_agent_tools(self, task: Task):
if self.process == Process.hierarchical:
if manager:
self._update_manager_tools(task, manager)
if self.manager_agent:
self._update_manager_tools(task)
else:
raise ValueError("Manager agent is required for hierarchical process.")
elif task.agent and task.agent.allow_delegation:
self._add_delegation_tools(task)
def _get_agent_to_use(
self, task: Task, manager: Optional[BaseAgent]
) -> Optional[BaseAgent]:
def _get_agent_to_use(self, task: Task) -> Optional[BaseAgent]:
if self.process == Process.hierarchical:
return manager
return self.manager_agent
return task.agent
def _add_delegation_tools(self, task: Task):
@@ -779,11 +741,14 @@ class Crew(BaseModel):
if self.output_log_file:
self._file_handler.log(agent=role, task=task.description, status="started")
def _update_manager_tools(self, task: Task, manager: BaseAgent):
if task.agent:
manager.tools = task.agent.get_delegation_tools([task.agent])
else:
manager.tools = manager.get_delegation_tools(self.agents)
def _update_manager_tools(self, task: Task):
if self.manager_agent:
if task.agent:
self.manager_agent.tools = task.agent.get_delegation_tools([task.agent])
else:
self.manager_agent.tools = self.manager_agent.get_delegation_tools(
self.agents
)
def _get_context(self, task: Task, task_outputs: List[TaskOutput]):
context = (
@@ -882,7 +847,7 @@ class Crew(BaseModel):
self.tasks[i].output = task_output
self._logging_color = "bold_blue"
result = self._execute_tasks(self.tasks, self.manager_agent, start_index, True)
result = self._execute_tasks(self.tasks, start_index, True)
return result
def copy(self):
@@ -945,28 +910,36 @@ class Crew(BaseModel):
)
self._telemetry.end_crew(self, final_string_output)
def calculate_usage_metrics(self) -> Dict[str, int]:
def calculate_usage_metrics(self) -> UsageMetrics:
"""Calculates and returns the usage metrics."""
total_usage_metrics = {
"total_tokens": 0,
"prompt_tokens": 0,
"completion_tokens": 0,
"successful_requests": 0,
}
total_usage_metrics = UsageMetrics()
for agent in self.agents:
if hasattr(agent, "_token_process"):
token_sum = agent._token_process.get_summary()
for key in total_usage_metrics:
total_usage_metrics[key] += token_sum.get(key, 0)
total_usage_metrics.add_usage_metrics(token_sum)
if self.manager_agent and hasattr(self.manager_agent, "_token_process"):
token_sum = self.manager_agent._token_process.get_summary()
for key in total_usage_metrics:
total_usage_metrics[key] += token_sum.get(key, 0)
total_usage_metrics.add_usage_metrics(token_sum)
return total_usage_metrics
def test(
self,
n_iterations: int,
openai_model_name: str,
inputs: Optional[Dict[str, Any]] = None,
) -> None:
"""Test and evaluate the Crew with the given inputs for n iterations."""
evaluator = CrewEvaluator(self, openai_model_name)
for i in range(1, n_iterations + 1):
evaluator.set_iteration(i)
self.kickoff(inputs=inputs)
evaluator.print_crew_evaluation_result()
def __rshift__(self, other: "Crew") -> "Pipeline":
"""
Implements the >> operator to add another Crew to an existing Pipeline.

View File

@@ -5,6 +5,7 @@ from pydantic import BaseModel, Field
from crewai.tasks.output_format import OutputFormat
from crewai.tasks.task_output import TaskOutput
from crewai.types.usage_metrics import UsageMetrics
class CrewOutput(BaseModel):
@@ -20,9 +21,7 @@ class CrewOutput(BaseModel):
tasks_output: list[TaskOutput] = Field(
description="Output of each task", default=[]
)
token_usage: Dict[str, Any] = Field(
description="Processed token summary", default={}
)
token_usage: UsageMetrics = Field(description="Processed token summary", default={})
@property
def json(self) -> Optional[str]:

View File

@@ -19,7 +19,7 @@ class ShortTermMemory(Memory):
super().__init__(storage)
def save(self, item: ShortTermMemoryItem) -> None:
super().save(item.data, item.metadata, item.agent)
super().save(value=item.data, metadata=item.metadata, agent=item.agent)
def search(self, query: str, score_threshold: float = 0.35):
return self.storage.search(query=query, score_threshold=score_threshold) # type: ignore # BUG? The reference is to the parent class, but the parent class does not have this parameters

View File

@@ -10,19 +10,53 @@ from crewai.crew import Crew
from crewai.crews.crew_output import CrewOutput
from crewai.pipeline.pipeline_run_result import PipelineRunResult
from crewai.types.pipeline_stage import PipelineStage
from crewai.types.usage_metrics import UsageMetrics
if TYPE_CHECKING:
from crewai.routers.pipeline_router import PipelineRouter
Trace = Union[Union[str, Dict[str, Any]], List[Union[str, Dict[str, Any]]]]
"""
Developer Notes:
This module defines a Pipeline class that represents a sequence of operations (stages)
to process inputs. Each stage can be either sequential or parallel, and the pipeline
can process multiple runs concurrently.
Core Loop Explanation:
1. The `process_runs` method processes multiple runs in parallel, each going through
all pipeline stages.
2. The `process_single_run` method handles the processing of a single run through
all stages, updating metrics and input data along the way.
3. The `_process_stage` method determines whether a stage is sequential or parallel
and processes it accordingly.
4. The `_process_single_crew` and `_process_parallel_crews` methods handle the
execution of single and parallel crew stages.
5. The `_update_metrics_and_input` method updates usage metrics and the current input
with the outputs from a stage.
6. The `_build_pipeline_run_results` method constructs the final results of the
pipeline run, including traces and outputs.
Handling Traces and Crew Outputs:
- During the processing of stages, we handle the results (traces and crew outputs)
for all stages except the last one differently from the final stage.
- For intermediate stages, the primary focus is on passing the input data between stages.
This involves merging the output dictionaries from all crews in a stage into a single
dictionary and passing it to the next stage. This merged dictionary allows for smooth
data flow between stages.
- For the final stage, in addition to passing the input data, we also need to prepare
the final outputs and traces to be returned as the overall result of the pipeline run.
In this case, we do not merge the results, as each result needs to be included
separately in its own pipeline run result.
Pipeline Terminology:
Pipeline: The overall structure that defines a sequence of operations.
Stage: A distinct part of the pipeline, which can be either sequential or parallel.
Run: A specific execution of the pipeline for a given set of inputs, representing a single instance of processing through the pipeline.
Branch: Parallel executions within a stage (e.g., concurrent crew operations).
Trace: The journey of an individual input through the entire pipeline.
- Pipeline: The overall structure that defines a sequence of operations.
- Stage: A distinct part of the pipeline, which can be either sequential or parallel.
- Run: A specific execution of the pipeline for a given set of inputs, representing a single instance of processing through the pipeline.
- Branch: Parallel executions within a stage (e.g., concurrent crew operations).
- Trace: The journey of an individual input through the entire pipeline.
Example pipeline structure:
crew1 >> crew2 >> crew3
@@ -56,6 +90,15 @@ class Pipeline(BaseModel):
@model_validator(mode="before")
@classmethod
def validate_stages(cls, values):
"""
Validates the stages to ensure correct nesting and types.
Args:
values (dict): Dictionary containing the pipeline stages.
Returns:
dict: Validated stages.
"""
stages = values.get("stages", [])
def check_nesting_and_type(item, depth=0):
@@ -77,9 +120,15 @@ class Pipeline(BaseModel):
self, run_inputs: List[Dict[str, Any]]
) -> List[PipelineRunResult]:
"""
Process multiple runs in parallel, with each run going through all stages.
Processes multiple runs in parallel, each going through all pipeline stages.
Args:
run_inputs (List[Dict[str, Any]]): List of inputs for each run.
Returns:
List[PipelineRunResult]: List of results from each run.
"""
pipeline_results = []
pipeline_results: List[PipelineRunResult] = []
# Process all runs in parallel
all_run_results = await asyncio.gather(
@@ -96,9 +145,18 @@ class Pipeline(BaseModel):
async def process_single_run(
self, run_input: Dict[str, Any]
) -> List[PipelineRunResult]:
"""
Processes a single run through all pipeline stages.
Args:
run_input (Dict[str, Any]): The input for the run.
Returns:
List[PipelineRunResult]: The results of processing the run.
"""
initial_input = copy.deepcopy(run_input)
current_input = copy.deepcopy(run_input)
usage_metrics = {}
pipeline_usage_metrics: Dict[str, UsageMetrics] = {}
all_stage_outputs: List[List[CrewOutput]] = []
traces: List[List[Union[str, Dict[str, Any]]]] = [[initial_input]]
@@ -121,19 +179,29 @@ class Pipeline(BaseModel):
stage_outputs, stage_trace = await self._process_stage(stage, stage_input)
self._update_metrics_and_input(
usage_metrics, current_input, stage, stage_outputs
pipeline_usage_metrics, current_input, stage, stage_outputs
)
traces.append(stage_trace)
all_stage_outputs.append(stage_outputs)
stage_index += 1
return self._build_pipeline_run_results(
all_stage_outputs, traces, usage_metrics
all_stage_outputs, traces, pipeline_usage_metrics
)
async def _process_stage(
self, stage: PipelineStage, current_input: Dict[str, Any]
) -> Tuple[List[CrewOutput], List[Union[str, Dict[str, Any]]]]:
"""
Processes a single stage of the pipeline, which can be either sequential or parallel.
Args:
stage (Union[Crew, List[Crew]]): The stage to process.
current_input (Dict[str, Any]): The input for the stage.
Returns:
Tuple[List[CrewOutput], List[Union[str, Dict[str, Any]]]]: The outputs and trace of the stage.
"""
if isinstance(stage, Crew):
return await self._process_single_crew(stage, current_input)
elif isinstance(stage, list) and all(isinstance(crew, Crew) for crew in stage):
@@ -154,12 +222,32 @@ class Pipeline(BaseModel):
async def _process_single_crew(
self, crew: Crew, current_input: Dict[str, Any]
) -> Tuple[List[CrewOutput], List[Union[str, Dict[str, Any]]]]:
"""
Processes a single crew.
Args:
crew (Crew): The crew to process.
current_input (Dict[str, Any]): The input for the crew.
Returns:
Tuple[List[CrewOutput], List[Union[str, Dict[str, Any]]]]: The output and trace of the crew.
"""
output = await crew.kickoff_async(inputs=current_input)
return [output], [crew.name or str(crew.id)]
async def _process_parallel_crews(
self, crews: List[Crew], current_input: Dict[str, Any]
) -> Tuple[List[CrewOutput], List[Union[str, Dict[str, Any]]]]:
"""
Processes multiple crews in parallel.
Args:
crews (List[Crew]): The list of crews to process in parallel.
current_input (Dict[str, Any]): The input for the crews.
Returns:
Tuple[List[CrewOutput], List[Union[str, Dict[str, Any]]]]: The outputs and traces of the crews.
"""
parallel_outputs = await asyncio.gather(
*[crew.kickoff_async(inputs=current_input) for crew in crews]
)
@@ -167,11 +255,20 @@ class Pipeline(BaseModel):
def _update_metrics_and_input(
self,
usage_metrics: Dict[str, Any],
usage_metrics: Dict[str, UsageMetrics],
current_input: Dict[str, Any],
stage: PipelineStage,
outputs: List[CrewOutput],
) -> None:
"""
Updates metrics and current input with the outputs of a stage.
Args:
usage_metrics (Dict[str, Any]): The usage metrics to update.
current_input (Dict[str, Any]): The current input to update.
stage (Union[Crew, List[Crew]]): The stage that was processed.
outputs (List[CrewOutput]): The outputs of the stage.
"""
if isinstance(stage, Crew):
usage_metrics[stage.name or str(stage.id)] = outputs[0].token_usage
current_input.update(outputs[0].to_dict())
@@ -186,8 +283,19 @@ class Pipeline(BaseModel):
self,
all_stage_outputs: List[List[CrewOutput]],
traces: List[List[Union[str, Dict[str, Any]]]],
token_usage: Dict[str, Any],
token_usage: Dict[str, UsageMetrics],
) -> List[PipelineRunResult]:
"""
Builds the results of a pipeline run.
Args:
all_stage_outputs (List[List[CrewOutput]]): All stage outputs.
traces (List[List[Union[str, Dict[str, Any]]]]): All traces.
token_usage (Dict[str, Any]): Token usage metrics.
Returns:
List[PipelineRunResult]: The results of the pipeline run.
"""
formatted_traces = self._format_traces(traces)
formatted_crew_outputs = self._format_crew_outputs(all_stage_outputs)
@@ -208,12 +316,51 @@ class Pipeline(BaseModel):
def _format_traces(
self, traces: List[List[Union[str, Dict[str, Any]]]]
) -> List[List[Trace]]:
formatted_traces: List[Trace] = []
for trace in traces[:-1]:
formatted_traces.append(trace[0] if len(trace) == 1 else trace)
"""
Formats the traces of a pipeline run.
Args:
traces (List[List[Union[str, Dict[str, Any]]]]): The traces to format.
Returns:
List[List[Trace]]: The formatted traces.
"""
formatted_traces: List[Trace] = self._format_single_trace(traces[:-1])
return self._format_multiple_traces(formatted_traces, traces[-1])
def _format_single_trace(
self, traces: List[List[Union[str, Dict[str, Any]]]]
) -> List[Trace]:
"""
Formats single traces.
Args:
traces (List[List[Union[str, Dict[str, Any]]]]): The traces to format.
Returns:
List[Trace]: The formatted single traces.
"""
formatted_traces: List[Trace] = []
for trace in traces:
formatted_traces.append(trace[0] if len(trace) == 1 else trace)
return formatted_traces
def _format_multiple_traces(
self,
formatted_traces: List[Trace],
final_trace: List[Union[str, Dict[str, Any]]],
) -> List[List[Trace]]:
"""
Formats multiple traces.
Args:
formatted_traces (List[Trace]): The formatted single traces.
final_trace (List[Union[str, Dict[str, Any]]]): The final trace to format.
Returns:
List[List[Trace]]: The formatted multiple traces.
"""
traces_to_return: List[List[Trace]] = []
final_trace = traces[-1]
if len(final_trace) == 1:
formatted_traces.append(final_trace[0])
traces_to_return.append(formatted_traces)
@@ -222,12 +369,20 @@ class Pipeline(BaseModel):
copied_traces = formatted_traces.copy()
copied_traces.append(trace)
traces_to_return.append(copied_traces)
return traces_to_return
def _format_crew_outputs(
self, all_stage_outputs: List[List[CrewOutput]]
) -> List[List[CrewOutput]]:
"""
Formats the outputs of all stages into a list of crew outputs.
Args:
all_stage_outputs (List[List[CrewOutput]]): All stage outputs.
Returns:
List[List[CrewOutput]]: Formatted crew outputs.
"""
crew_outputs: List[CrewOutput] = [
output
for stage_outputs in all_stage_outputs[:-1]

View File

@@ -5,6 +5,7 @@ from typing import Any, Dict, List, Optional, Union
from pydantic import UUID4, BaseModel, Field
from crewai.crews.crew_output import CrewOutput
from crewai.types.usage_metrics import UsageMetrics
class PipelineRunResult(BaseModel):
@@ -23,7 +24,7 @@ class PipelineRunResult(BaseModel):
description="JSON dict output of the pipeline run", default={}
)
token_usage: Dict[str, Any] = Field(
token_usage: Dict[str, UsageMetrics] = Field(
description="Token usage for each crew in the run"
)
trace: List[Any] = Field(

View File

@@ -1,2 +1,25 @@
from .annotations import agent, crew, task
from .annotations import (
agent,
crew,
task,
output_json,
output_pydantic,
tool,
callback,
llm,
cache_handler,
)
from .crew_base import CrewBase
__all__ = [
"agent",
"crew",
"task",
"output_json",
"output_pydantic",
"tool",
"callback",
"CrewBase",
"llm",
"cache_handler",
]

View File

@@ -30,6 +30,37 @@ def agent(func):
return func
def llm(func):
func.is_llm = True
func = memoize(func)
return func
def output_json(cls):
cls.is_output_json = True
return cls
def output_pydantic(cls):
cls.is_output_pydantic = True
return cls
def tool(func):
func.is_tool = True
return memoize(func)
def callback(func):
func.is_callback = True
return memoize(func)
def cache_handler(func):
func.is_cache_handler = True
return memoize(func)
def crew(func):
def wrapper(self, *args, **kwargs):
instantiated_tasks = []

View File

@@ -1,6 +1,7 @@
import inspect
import os
from pathlib import Path
from typing import Any, Callable, Dict
import yaml
from dotenv import load_dotenv
@@ -20,11 +21,6 @@ def CrewBase(cls):
base_directory = Path(frame_info.filename).parent.resolve()
break
if base_directory is None:
raise Exception(
"Unable to dynamically determine the project's base directory, you must run it from the project's root directory."
)
original_agents_config_path = getattr(
cls, "agents_config", "config/agents.yaml"
)
@@ -32,12 +28,20 @@ def CrewBase(cls):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
if self.base_directory is None:
raise Exception(
"Unable to dynamically determine the project's base directory, you must run it from the project's root directory."
)
self.agents_config = self.load_yaml(
os.path.join(self.base_directory, self.original_agents_config_path)
)
self.tasks_config = self.load_yaml(
os.path.join(self.base_directory, self.original_tasks_config_path)
)
self.map_all_agent_variables()
self.map_all_task_variables()
@staticmethod
def load_yaml(config_path: str):
@@ -45,4 +49,138 @@ def CrewBase(cls):
# parsedContent = YamlParser.parse(file) # type: ignore # Argument 1 to "parse" has incompatible type "TextIOWrapper"; expected "YamlParser"
return yaml.safe_load(file)
def _get_all_functions(self):
return {
name: getattr(self, name)
for name in dir(self)
if callable(getattr(self, name))
}
def _filter_functions(
self, functions: Dict[str, Callable], attribute: str
) -> Dict[str, Callable]:
return {
name: func
for name, func in functions.items()
if hasattr(func, attribute)
}
def map_all_agent_variables(self) -> None:
all_functions = self._get_all_functions()
llms = self._filter_functions(all_functions, "is_llm")
tool_functions = self._filter_functions(all_functions, "is_tool")
cache_handler_functions = self._filter_functions(
all_functions, "is_cache_handler"
)
callbacks = self._filter_functions(all_functions, "is_callback")
agents = self._filter_functions(all_functions, "is_agent")
for agent_name, agent_info in self.agents_config.items():
self._map_agent_variables(
agent_name,
agent_info,
agents,
llms,
tool_functions,
cache_handler_functions,
callbacks,
)
def _map_agent_variables(
self,
agent_name: str,
agent_info: Dict[str, Any],
agents: Dict[str, Callable],
llms: Dict[str, Callable],
tool_functions: Dict[str, Callable],
cache_handler_functions: Dict[str, Callable],
callbacks: Dict[str, Callable],
) -> None:
if llm := agent_info.get("llm"):
self.agents_config[agent_name]["llm"] = llms[llm]()
if tools := agent_info.get("tools"):
self.agents_config[agent_name]["tools"] = [
tool_functions[tool]() for tool in tools
]
if function_calling_llm := agent_info.get("function_calling_llm"):
self.agents_config[agent_name]["function_calling_llm"] = agents[
function_calling_llm
]()
if step_callback := agent_info.get("step_callback"):
self.agents_config[agent_name]["step_callback"] = callbacks[
step_callback
]()
if cache_handler := agent_info.get("cache_handler"):
self.agents_config[agent_name]["cache_handler"] = (
cache_handler_functions[cache_handler]()
)
def map_all_task_variables(self) -> None:
all_functions = self._get_all_functions()
agents = self._filter_functions(all_functions, "is_agent")
tasks = self._filter_functions(all_functions, "is_task")
output_json_functions = self._filter_functions(
all_functions, "is_output_json"
)
tool_functions = self._filter_functions(all_functions, "is_tool")
callback_functions = self._filter_functions(all_functions, "is_callback")
output_pydantic_functions = self._filter_functions(
all_functions, "is_output_pydantic"
)
for task_name, task_info in self.tasks_config.items():
self._map_task_variables(
task_name,
task_info,
agents,
tasks,
output_json_functions,
tool_functions,
callback_functions,
output_pydantic_functions,
)
def _map_task_variables(
self,
task_name: str,
task_info: Dict[str, Any],
agents: Dict[str, Callable],
tasks: Dict[str, Callable],
output_json_functions: Dict[str, Callable],
tool_functions: Dict[str, Callable],
callback_functions: Dict[str, Callable],
output_pydantic_functions: Dict[str, Callable],
) -> None:
if context_list := task_info.get("context"):
self.tasks_config[task_name]["context"] = [
tasks[context_task_name]() for context_task_name in context_list
]
if tools := task_info.get("tools"):
self.tasks_config[task_name]["tools"] = [
tool_functions[tool]() for tool in tools
]
if agent_name := task_info.get("agent"):
self.tasks_config[task_name]["agent"] = agents[agent_name]()
if output_json := task_info.get("output_json"):
self.tasks_config[task_name]["output_json"] = output_json_functions[
output_json
]
if output_pydantic := task_info.get("output_pydantic"):
self.tasks_config[task_name]["output_pydantic"] = (
output_pydantic_functions[output_pydantic]
)
if callbacks := task_info.get("callbacks"):
self.tasks_config[task_name]["callbacks"] = [
callback_functions[callback]() for callback in callbacks
]
return WrappedClass

View File

@@ -1,6 +1,5 @@
import json
import os
import re
import threading
import uuid
from concurrent.futures import Future
@@ -8,7 +7,6 @@ from copy import copy
from hashlib import md5
from typing import Any, Dict, List, Optional, Tuple, Type, Union
from langchain_openai import ChatOpenAI
from opentelemetry.trace import Span
from pydantic import UUID4, BaseModel, Field, field_validator, model_validator
from pydantic_core import PydanticCustomError
@@ -17,10 +15,8 @@ from crewai.agents.agent_builder.base_agent import BaseAgent
from crewai.tasks.output_format import OutputFormat
from crewai.tasks.task_output import TaskOutput
from crewai.telemetry.telemetry import Telemetry
from crewai.utilities.converter import Converter, ConverterError
from crewai.utilities.converter import Converter, convert_to_model
from crewai.utilities.i18n import I18N
from crewai.utilities.printer import Printer
from crewai.utilities.pydantic_schema_parser import PydanticSchemaParser
class Task(BaseModel):
@@ -50,6 +46,7 @@ class Task(BaseModel):
tools_errors: int = 0
delegations: int = 0
i18n: I18N = I18N()
name: Optional[str] = Field(default=None)
prompt_context: Optional[str] = None
description: str = Field(description="Description of the actual task.")
expected_output: str = Field(
@@ -126,7 +123,7 @@ class Task(BaseModel):
@field_validator("output_file")
@classmethod
def output_file_validattion(cls, value: str) -> str:
def output_file_validation(cls, value: str) -> str:
"""Validate the output file path by removing the / from the beginning of the path."""
if value.startswith("/"):
return value[1:]
@@ -254,9 +251,7 @@ class Task(BaseModel):
content = (
json_output
if json_output
else pydantic_output.model_dump_json()
if pydantic_output
else result
else pydantic_output.model_dump_json() if pydantic_output else result
)
self._save_file(content)
@@ -326,18 +321,6 @@ class Task(BaseModel):
return copied_task
def _create_converter(self, *args, **kwargs) -> Converter:
"""Create a converter instance."""
if self.agent and not self.converter_cls:
converter = self.agent.get_output_converter(*args, **kwargs)
elif self.converter_cls:
converter = self.converter_cls(*args, **kwargs)
if not converter:
raise Exception("No output converter found or set.")
return converter
def _export_output(
self, result: str
) -> Tuple[Optional[BaseModel], Optional[Dict[str, Any]]]:
@@ -345,75 +328,26 @@ class Task(BaseModel):
json_output: Optional[Dict[str, Any]] = None
if self.output_pydantic or self.output_json:
model_output = self._convert_to_model(result)
pydantic_output = (
model_output if isinstance(model_output, BaseModel) else None
model_output = convert_to_model(
result,
self.output_pydantic,
self.output_json,
self.agent,
self.converter_cls,
)
if isinstance(model_output, str):
if isinstance(model_output, BaseModel):
pydantic_output = model_output
elif isinstance(model_output, dict):
json_output = model_output
elif isinstance(model_output, str):
try:
json_output = json.loads(model_output)
except json.JSONDecodeError:
json_output = None
else:
json_output = model_output if isinstance(model_output, dict) else None
return pydantic_output, json_output
def _convert_to_model(self, result: str) -> Union[dict, BaseModel, str]:
model = self.output_pydantic or self.output_json
if model is None:
return result
try:
return self._validate_model(result, model)
except Exception:
return self._handle_partial_json(result, model)
def _validate_model(
self, result: str, model: Type[BaseModel]
) -> Union[dict, BaseModel]:
exported_result = model.model_validate_json(result)
if self.output_json:
return exported_result.model_dump()
return exported_result
def _handle_partial_json(
self, result: str, model: Type[BaseModel]
) -> Union[dict, BaseModel, str]:
match = re.search(r"({.*})", result, re.DOTALL)
if match:
try:
exported_result = model.model_validate_json(match.group(0))
if self.output_json:
return exported_result.model_dump()
return exported_result
except Exception:
pass
return self._convert_with_instructions(result, model)
def _convert_with_instructions(
self, result: str, model: Type[BaseModel]
) -> Union[dict, BaseModel, str]:
llm = self.agent.function_calling_llm or self.agent.llm # type: ignore # Item "None" of "BaseAgent | None" has no attribute "function_calling_llm"
instructions = self._get_conversion_instructions(model, llm)
converter = self._create_converter(
llm=llm, text=result, model=model, instructions=instructions
)
exported_result = (
converter.to_pydantic() if self.output_pydantic else converter.to_json()
)
if isinstance(exported_result, ConverterError):
Printer().print(
content=f"{exported_result.message} Using raw output instead.",
color="red",
)
return result
return exported_result
def _get_output_format(self) -> OutputFormat:
if self.output_json:
return OutputFormat.JSON
@@ -421,34 +355,22 @@ class Task(BaseModel):
return OutputFormat.PYDANTIC
return OutputFormat.RAW
def _get_conversion_instructions(self, model: Type[BaseModel], llm: Any) -> str:
instructions = "I'm gonna convert this raw text into valid JSON."
if not self._is_gpt(llm):
model_schema = PydanticSchemaParser(model=model).get_schema()
instructions = f"{instructions}\n\nThe json should have the following structure, with the following keys:\n{model_schema}"
return instructions
def _save_output(self, content: str) -> None:
if not self.output_file:
raise Exception("Output file path is not set.")
directory = os.path.dirname(self.output_file)
if directory and not os.path.exists(directory):
os.makedirs(directory)
with open(self.output_file, "w", encoding="utf-8") as file:
file.write(content)
def _is_gpt(self, llm) -> bool:
return isinstance(llm, ChatOpenAI) and llm.openai_api_base is None
def _save_file(self, result: Any) -> None:
if self.output_file is None:
raise ValueError("output_file is not set.")
directory = os.path.dirname(self.output_file) # type: ignore # Value of type variable "AnyOrLiteralStr" of "dirname" cannot be "str | None"
if directory and not os.path.exists(directory):
os.makedirs(directory)
with open(self.output_file, "w", encoding="utf-8") as file: # type: ignore # Argument 1 to "open" has incompatible type "str | None"; expected "int | str | bytes | PathLike[str] | PathLike[bytes]"
file.write(result)
with open(self.output_file, "w", encoding="utf-8") as file:
if isinstance(result, dict):
import json
json.dump(result, file, ensure_ascii=False, indent=2)
else:
file.write(str(result))
return None
def __repr__(self):

View File

@@ -86,7 +86,8 @@ class ToolUsage:
) -> str:
if isinstance(calling, ToolUsageErrorException):
error = calling.message
self._printer.print(content=f"\n\n{error}\n", color="red")
if self.agent.verbose:
self._printer.print(content=f"\n\n{error}\n", color="red")
self.task.increment_tools_errors()
return error
@@ -96,7 +97,8 @@ class ToolUsage:
except Exception as e:
error = getattr(e, "message", str(e))
self.task.increment_tools_errors()
self._printer.print(content=f"\n\n{error}\n", color="red")
if self.agent.verbose:
self._printer.print(content=f"\n\n{error}\n", color="red")
return error
return f"{self._use(tool_string=tool_string, tool=tool, calling=calling)}" # type: ignore # BUG?: "_use" of "ToolUsage" does not return a value (it only ever returns None)
@@ -112,7 +114,8 @@ class ToolUsage:
result = self._i18n.errors("task_repeated_usage").format(
tool_names=self.tools_names
)
self._printer.print(content=f"\n\n{result}\n", color="purple")
if self.agent.verbose:
self._printer.print(content=f"\n\n{result}\n", color="purple")
self._telemetry.tool_repeated_usage(
llm=self.function_calling_llm,
tool_name=tool.name,
@@ -168,7 +171,10 @@ class ToolUsage:
f'\n{error_message}.\nMoving on then. {self._i18n.slice("format").format(tool_names=self.tools_names)}'
).message
self.task.increment_tools_errors()
self._printer.print(content=f"\n\n{error_message}\n", color="red")
if self.agent.verbose:
self._printer.print(
content=f"\n\n{error_message}\n", color="red"
)
return error # type: ignore # No return value expected
self.task.increment_tools_errors()
@@ -192,7 +198,8 @@ class ToolUsage:
calling=calling, output=result, should_cache=should_cache
)
self._printer.print(content=f"\n\n{result}\n", color="purple")
if self.agent.verbose:
self._printer.print(content=f"\n\n{result}\n", color="purple")
if agentops:
agentops.record(tool_event)
self._telemetry.tool_usage(
@@ -346,7 +353,8 @@ class ToolUsage:
if self._run_attempts > self._max_parsing_attempts:
self._telemetry.tool_usage_error(llm=self.function_calling_llm)
self.task.increment_tools_errors()
self._printer.print(content=f"\n\n{e}\n", color="red")
if self.agent.verbose:
self._printer.print(content=f"\n\n{e}\n", color="red")
return ToolUsageErrorException( # type: ignore # Incompatible return value type (got "ToolUsageErrorException", expected "ToolCalling | InstructorToolCalling")
f'{self._i18n.errors("tool_usage_error").format(error=e)}\nMoving on then. {self._i18n.slice("format").format(tool_names=self.tools_names)}'
)

View File

@@ -0,0 +1,36 @@
from pydantic import BaseModel, Field
class UsageMetrics(BaseModel):
"""
Model to track usage metrics for the crew's execution.
Attributes:
total_tokens: Total number of tokens used.
prompt_tokens: Number of tokens used in prompts.
completion_tokens: Number of tokens used in completions.
successful_requests: Number of successful requests made.
"""
total_tokens: int = Field(default=0, description="Total number of tokens used.")
prompt_tokens: int = Field(
default=0, description="Number of tokens used in prompts."
)
completion_tokens: int = Field(
default=0, description="Number of tokens used in completions."
)
successful_requests: int = Field(
default=0, description="Number of successful requests made."
)
def add_usage_metrics(self, usage_metrics: "UsageMetrics"):
"""
Add the usage metrics from another UsageMetrics object.
Args:
usage_metrics (UsageMetrics): The usage metrics to add.
"""
self.total_tokens += usage_metrics.total_tokens
self.prompt_tokens += usage_metrics.prompt_tokens
self.completion_tokens += usage_metrics.completion_tokens
self.successful_requests += usage_metrics.successful_requests

View File

@@ -1,9 +1,14 @@
import json
import re
from typing import Any, Optional, Type, Union
from langchain.schema import HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, ValidationError
from crewai.agents.agent_builder.utilities.base_output_converter import OutputConverter
from crewai.utilities.printer import Printer
from crewai.utilities.pydantic_schema_parser import PydanticSchemaParser
class ConverterError(Exception):
@@ -72,3 +77,153 @@ class Converter(OutputConverter):
def is_gpt(self) -> bool:
"""Return if llm provided is of gpt from openai."""
return isinstance(self.llm, ChatOpenAI) and self.llm.openai_api_base is None
def convert_to_model(
result: str,
output_pydantic: Optional[Type[BaseModel]],
output_json: Optional[Type[BaseModel]],
agent: Any,
converter_cls: Optional[Type[Converter]] = None,
) -> Union[dict, BaseModel, str]:
model = output_pydantic or output_json
if model is None:
return result
try:
escaped_result = json.dumps(json.loads(result, strict=False))
return validate_model(escaped_result, model, bool(output_json))
except json.JSONDecodeError as e:
Printer().print(
content=f"Error parsing JSON: {e}. Attempting to handle partial JSON.",
color="yellow",
)
return handle_partial_json(
result, model, bool(output_json), agent, converter_cls
)
except ValidationError as e:
Printer().print(
content=f"Pydantic validation error: {e}. Attempting to handle partial JSON.",
color="yellow",
)
return handle_partial_json(
result, model, bool(output_json), agent, converter_cls
)
except Exception as e:
Printer().print(
content=f"Unexpected error during model conversion: {type(e).__name__}: {e}. Returning original result.",
color="red",
)
return result
def validate_model(
result: str, model: Type[BaseModel], is_json_output: bool
) -> Union[dict, BaseModel]:
exported_result = model.model_validate_json(result)
if is_json_output:
return exported_result.model_dump()
return exported_result
def handle_partial_json(
result: str,
model: Type[BaseModel],
is_json_output: bool,
agent: Any,
converter_cls: Optional[Type[Converter]] = None,
) -> Union[dict, BaseModel, str]:
match = re.search(r"({.*})", result, re.DOTALL)
if match:
try:
exported_result = model.model_validate_json(match.group(0))
if is_json_output:
return exported_result.model_dump()
return exported_result
except json.JSONDecodeError as e:
Printer().print(
content=f"Error parsing JSON: {e}. The extracted JSON-like string is not valid JSON. Attempting alternative conversion method.",
color="yellow",
)
except ValidationError as e:
Printer().print(
content=f"Pydantic validation error: {e}. The JSON structure doesn't match the expected model. Attempting alternative conversion method.",
color="yellow",
)
except Exception as e:
Printer().print(
content=f"Unexpected error during partial JSON handling: {type(e).__name__}: {e}. Attempting alternative conversion method.",
color="red",
)
return convert_with_instructions(
result, model, is_json_output, agent, converter_cls
)
def convert_with_instructions(
result: str,
model: Type[BaseModel],
is_json_output: bool,
agent: Any,
converter_cls: Optional[Type[Converter]] = None,
) -> Union[dict, BaseModel, str]:
llm = agent.function_calling_llm or agent.llm
instructions = get_conversion_instructions(model, llm)
converter = create_converter(
agent=agent,
converter_cls=converter_cls,
llm=llm,
text=result,
model=model,
instructions=instructions,
)
exported_result = (
converter.to_pydantic() if not is_json_output else converter.to_json()
)
if isinstance(exported_result, ConverterError):
Printer().print(
content=f"{exported_result.message} Using raw output instead.",
color="red",
)
return result
return exported_result
def get_conversion_instructions(model: Type[BaseModel], llm: Any) -> str:
instructions = "I'm gonna convert this raw text into valid JSON."
if not is_gpt(llm):
model_schema = PydanticSchemaParser(model=model).get_schema()
instructions = f"{instructions}\n\nThe json should have the following structure, with the following keys:\n{model_schema}"
return instructions
def is_gpt(llm: Any) -> bool:
from langchain_openai import ChatOpenAI
return isinstance(llm, ChatOpenAI) and llm.openai_api_base is None
def create_converter(
agent: Optional[Any] = None,
converter_cls: Optional[Type[Converter]] = None,
*args,
**kwargs,
) -> Converter:
if agent and not converter_cls:
if hasattr(agent, "get_output_converter"):
converter = agent.get_output_converter(*args, **kwargs)
else:
raise AttributeError("Agent does not have a 'get_output_converter' method")
elif converter_cls:
converter = converter_cls(*args, **kwargs)
else:
raise ValueError("Either agent or converter_cls must be provided")
if not converter:
raise Exception("No output converter found or set.")
return converter

View File

@@ -1,5 +1,5 @@
import json
from typing import Any, List, Type, Union
from typing import Any, List, Type
import regex
from langchain.output_parsers import PydanticOutputParser
@@ -7,29 +7,24 @@ from langchain_core.exceptions import OutputParserException
from langchain_core.outputs import Generation
from langchain_core.pydantic_v1 import ValidationError
from pydantic import BaseModel
from pydantic.v1 import BaseModel as V1BaseModel
class CrewPydanticOutputParser(PydanticOutputParser):
"""Parses the text into pydantic models"""
pydantic_object: Union[Type[BaseModel], Type[V1BaseModel]]
pydantic_object: Type[BaseModel]
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
def parse_result(self, result: List[Generation]) -> Any:
result[0].text = self._transform_in_valid_json(result[0].text)
# Treating edge case of function calling llm returning the name instead of tool_name
json_object = json.loads(result[0].text)
json_object["tool_name"] = (
json_object["name"]
if "tool_name" not in json_object
else json_object["tool_name"]
)
if "tool_name" not in json_object:
json_object["tool_name"] = json_object.get("name", "")
result[0].text = json.dumps(json_object)
json_object = super().parse_result(result)
try:
return self.pydantic_object.parse_obj(json_object)
return self.pydantic_object.model_validate(json_object)
except ValidationError as e:
name = self.pydantic_object.__name__
msg = f"Failed to parse {name} from completion {json_object}. Got: {e}"

View File

@@ -0,0 +1,149 @@
from collections import defaultdict
from langchain_openai import ChatOpenAI
from pydantic import BaseModel, Field
from rich.console import Console
from rich.table import Table
from crewai.agent import Agent
from crewai.task import Task
from crewai.tasks.task_output import TaskOutput
class TaskEvaluationPydanticOutput(BaseModel):
quality: float = Field(
description="A score from 1 to 10 evaluating on completion, quality, and overall performance from the task_description and task_expected_output to the actual Task Output."
)
class CrewEvaluator:
"""
A class to evaluate the performance of the agents in the crew based on the tasks they have performed.
Attributes:
crew (Crew): The crew of agents to evaluate.
openai_model_name (str): The model to use for evaluating the performance of the agents (for now ONLY OpenAI accepted).
tasks_scores (defaultdict): A dictionary to store the scores of the agents for each task.
iteration (int): The current iteration of the evaluation.
"""
tasks_scores: defaultdict = defaultdict(list)
iteration: int = 0
def __init__(self, crew, openai_model_name: str):
self.crew = crew
self.openai_model_name = openai_model_name
self._setup_for_evaluating()
def _setup_for_evaluating(self) -> None:
"""Sets up the crew for evaluating."""
for task in self.crew.tasks:
task.callback = self.evaluate
def set_iteration(self, iteration: int) -> None:
self.iteration = iteration
def _evaluator_agent(self):
return Agent(
role="Task Execution Evaluator",
goal=(
"Your goal is to evaluate the performance of the agents in the crew based on the tasks they have performed using score from 1 to 10 evaluating on completion, quality, and overall performance."
),
backstory="Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed",
verbose=False,
llm=ChatOpenAI(model=self.openai_model_name),
)
def _evaluation_task(
self, evaluator_agent: Agent, task_to_evaluate: Task, task_output: str
) -> Task:
return Task(
description=(
"Based on the task description and the expected output, compare and evaluate the performance of the agents in the crew based on the Task Output they have performed using score from 1 to 10 evaluating on completion, quality, and overall performance."
f"task_description: {task_to_evaluate.description} "
f"task_expected_output: {task_to_evaluate.expected_output} "
f"agent: {task_to_evaluate.agent.role if task_to_evaluate.agent else None} "
f"agent_goal: {task_to_evaluate.agent.goal if task_to_evaluate.agent else None} "
f"Task Output: {task_output}"
),
expected_output="Evaluation Score from 1 to 10 based on the performance of the agents on the tasks",
agent=evaluator_agent,
output_pydantic=TaskEvaluationPydanticOutput,
)
def print_crew_evaluation_result(self) -> None:
"""
Prints the evaluation result of the crew in a table.
A Crew with 2 tasks using the command crewai test -n 2
will output the following table:
Task Scores
(1-10 Higher is better)
┏━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┓
┃ Tasks/Crew ┃ Run 1 ┃ Run 2 ┃ Avg. Total ┃
┡━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━┩
│ Task 1 │ 10.0 │ 9.0 │ 9.5 │
│ Task 2 │ 9.0 │ 9.0 │ 9.0 │
│ Crew │ 9.5 │ 9.0 │ 9.2 │
└────────────┴───────┴───────┴────────────┘
"""
task_averages = [
sum(scores) / len(scores) for scores in zip(*self.tasks_scores.values())
]
crew_average = sum(task_averages) / len(task_averages)
# Create a table
table = Table(title="Tasks Scores \n (1-10 Higher is better)")
# Add columns for the table
table.add_column("Tasks/Crew")
for run in range(1, len(self.tasks_scores) + 1):
table.add_column(f"Run {run}")
table.add_column("Avg. Total")
# Add rows for each task
for task_index in range(len(task_averages)):
task_scores = [
self.tasks_scores[run][task_index]
for run in range(1, len(self.tasks_scores) + 1)
]
avg_score = task_averages[task_index]
table.add_row(
f"Task {task_index + 1}", *map(str, task_scores), f"{avg_score:.1f}"
)
# Add a row for the crew average
crew_scores = [
sum(self.tasks_scores[run]) / len(self.tasks_scores[run])
for run in range(1, len(self.tasks_scores) + 1)
]
table.add_row("Crew", *map(str, crew_scores), f"{crew_average:.1f}")
# Display the table in the terminal
console = Console()
console.print(table)
def evaluate(self, task_output: TaskOutput):
"""Evaluates the performance of the agents in the crew based on the tasks they have performed."""
current_task = None
for task in self.crew.tasks:
if task.description == task_output.description:
current_task = task
break
if not current_task or not task_output:
raise ValueError(
"Task to evaluate and task output are required for evaluation"
)
evaluator_agent = self._evaluator_agent()
evaluation_task = self._evaluation_task(
evaluator_agent, current_task, task_output.raw
)
evaluation_result = evaluation_task.execute_sync()
if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
else:
raise ValueError("Evaluation result is not in the expected format")

View File

@@ -54,23 +54,23 @@ class TaskEvaluator:
def __init__(self, original_agent):
self.llm = original_agent.llm
def evaluate(self, task, ouput) -> TaskEvaluation:
def evaluate(self, task, output) -> TaskEvaluation:
evaluation_query = (
f"Assess the quality of the task completed based on the description, expected output, and actual results.\n\n"
f"Task Description:\n{task.description}\n\n"
f"Expected Output:\n{task.expected_output}\n\n"
f"Actual Output:\n{ouput}\n\n"
f"Actual Output:\n{output}\n\n"
"Please provide:\n"
"- Bullet points suggestions to improve future similar tasks\n"
"- A score from 0 to 10 evaluating on completion, quality, and overall performance"
"- Entities extracted from the task output, if any, their type, description, and relationships"
)
instructions = "I'm gonna convert this raw text into valid JSON."
instructions = "Convert all responses into valid JSON output."
if not self._is_gpt(self.llm):
model_schema = PydanticSchemaParser(model=TaskEvaluation).get_schema()
instructions = f"{instructions}\n\nThe json should have the following structure, with the following keys:\n{model_schema}"
instructions = f"{instructions}\n\nReturn only valid JSON with the following schema:\n```json\n{model_schema}\n```"
converter = Converter(
llm=self.llm,

View File

@@ -1,17 +1,28 @@
import re
class YamlParser:
@staticmethod
def parse(file):
"""
Parses a YAML file, modifies specific patterns, and checks for unsupported 'context' usage.
Args:
file (file object): The YAML file to parse.
Returns:
str: The modified content of the YAML file.
Raises:
ValueError: If 'context:' is used incorrectly.
"""
content = file.read()
# Replace single { and } with doubled ones, while leaving already doubled ones intact and the other special characters {# and {%
modified_content = re.sub(r"(?<!\{){(?!\{)(?!\#)(?!\%)", "{{", content)
modified_content = re.sub(
r"(?<!\})(?<!\%)(?<!\#)\}(?!})", "}}", modified_content
)
modified_content = re.sub(r"(?<!\})(?<!\%)(?<!\#)\}(?!})", "}}", modified_content)
# Check for 'context:' not followed by '[' and raise an error
if re.search(r"context:(?!\s*\[)", modified_content):
raise ValueError(
"Context is currently only supported in code when creating a task. Please use the 'context' key in the task configuration."
"Context is currently only supported in code when creating a task. "
"Please use the 'context' key in the task configuration."
)
return modified_content

View File

@@ -1,5 +1,6 @@
from typing import List, Optional
from typing import Any, List, Optional
from langchain_openai import ChatOpenAI
from pydantic import BaseModel
from crewai.agent import Agent
@@ -11,17 +12,27 @@ class PlannerTaskPydanticOutput(BaseModel):
class CrewPlanner:
def __init__(self, tasks: List[Task]):
def __init__(self, tasks: List[Task], planning_agent_llm: Optional[Any] = None):
self.tasks = tasks
def _handle_crew_planning(self) -> Optional[BaseModel]:
if planning_agent_llm is None:
self.planning_agent_llm = ChatOpenAI(model="gpt-4o-mini")
else:
self.planning_agent_llm = planning_agent_llm
def _handle_crew_planning(self) -> PlannerTaskPydanticOutput:
"""Handles the Crew planning by creating detailed step-by-step plans for each task."""
planning_agent = self._create_planning_agent()
tasks_summary = self._create_tasks_summary()
planner_task = self._create_planner_task(planning_agent, tasks_summary)
return planner_task.execute_sync().pydantic
result = planner_task.execute_sync()
if isinstance(result.pydantic, PlannerTaskPydanticOutput):
return result.pydantic
raise ValueError("Failed to get the Planning output")
def _create_planning_agent(self) -> Agent:
"""Creates the planning agent for the crew planning."""
@@ -32,6 +43,7 @@ class CrewPlanner:
"available to each agent so that they can perform the tasks in an exemplary manner"
),
backstory="Planner agent for crew planning",
llm=self.planning_agent_llm,
)
def _create_planner_task(self, planning_agent: Agent, tasks_summary: str) -> Task:

View File

@@ -16,11 +16,13 @@ class PydanticSchemaParser(BaseModel):
return self._get_model_schema(self.model)
def _get_model_schema(self, model, depth=0) -> str:
lines = []
indent = " " * depth
lines = [f"{indent}{{"]
for field_name, field in model.model_fields.items():
field_type_str = self._get_field_type(field, depth + 1)
lines.append(f"{' ' * 4 * depth}- {field_name}: {field_type_str}")
lines.append(f"{indent} {field_name}: {field_type_str},")
lines[-1] = lines[-1].rstrip(",") # Remove trailing comma from last item
lines.append(f"{indent}}}")
return "\n".join(lines)
def _get_field_type(self, field, depth) -> str:
@@ -35,6 +37,6 @@ class PydanticSchemaParser(BaseModel):
else:
return f"List[{list_item_type.__name__}]"
elif issubclass(field_type, BaseModel):
return f"\n{self._get_model_schema(field_type, depth)}"
return self._get_model_schema(field_type, depth)
else:
return field_type.__name__