Feature/kickoff consistent output (#847)

* Cleaned up task execution to now have separate paths for async and sync execution. Updating all kickoff functions to return CrewOutput. WIP. Waiting for Joao feedback on async task execution with task_output

* Consistently storing async and sync output for context

* outline tests I need to create going forward

* Major rehaul of TaskOutput and CrewOutput. Updated all tests to work with new change. Need to add in a few final tricky async tests and add a few more to verify output types on TaskOutput and CrewOutput.

* Encountering issues with callback. Need to test on main. WIP

* working on tests. WIP

* WIP. Figuring out disconnect issue.

* Cleaned up logs now that I've isolated the issue to the LLM

* more wip.

* WIP. It looks like usage metrics has always been broken for async

* Update parent crew who is managing for_each loop

* Merge in main to bugfix/kickoff-for-each-usage-metrics

* Clean up code for review

* Add new tests

* Final cleanup. Ready for review.

* Moving copy functionality from Agent to BaseAgent

* Fix renaming issue

* Fix linting errors

* use BaseAgent instead of Agent where applicable

* Fixing missing function. Working on tests.

* WIP. Needing team to review change

* Fixing issues brought about by merge

* WIP

* Implement major fixes from yesterdays group conversation. Now working on tests.

* The majority of tasks are working now. Need to fix converter class

* Fix final failing test

* Fix linting and type-checker issues

* Add more tests to fully test CrewOutput and TaskOutput changes

* Add in validation for async cannot depend on other async tasks.

* Update validators and tests
This commit is contained in:
Brandon Hancock (bhancock_ai)
2024-07-10 23:35:02 -04:00
committed by GitHub
parent 691b094a40
commit 7b53457ef3
30 changed files with 260509 additions and 697 deletions

View File

@@ -81,7 +81,7 @@ def test_task_prompt_includes_expected_output():
with patch.object(Agent, "execute_task") as execute:
execute.return_value = "ok"
task.execute()
task.execute_sync()
execute.assert_called_once_with(task=task, context=None, tools=[])
@@ -104,11 +104,13 @@ def test_task_callback():
with patch.object(Agent, "execute_task") as execute:
execute.return_value = "ok"
task.execute()
task.execute_sync()
task_completed.assert_called_once_with(task.output)
def test_task_callback_returns_task_ouput():
from crewai.tasks.output_format import OutputFormat
researcher = Agent(
role="Researcher",
goal="Make the best research and analysis on content about AI and AI agents",
@@ -127,7 +129,7 @@ def test_task_callback_returns_task_ouput():
with patch.object(Agent, "execute_task") as execute:
execute.return_value = "exported_ok"
task.execute()
task.execute_sync()
# Ensure the callback is called with a TaskOutput object serialized to JSON
task_completed.assert_called_once()
callback_data = task_completed.call_args[0][0]
@@ -140,10 +142,12 @@ def test_task_callback_returns_task_ouput():
output_dict = json.loads(callback_data)
expected_output = {
"description": task.description,
"exported_output": "exported_ok",
"raw_output": "exported_ok",
"raw": "exported_ok",
"pydantic": None,
"json_dict": None,
"agent": researcher.role,
"summary": "Give me a list of 5 interesting ideas to explore...",
"output_format": OutputFormat.RAW,
}
assert output_dict == expected_output
@@ -162,7 +166,7 @@ def test_execute_with_agent():
)
with patch.object(Agent, "execute_task", return_value="ok") as execute:
task.execute(agent=researcher)
task.execute_sync(agent=researcher)
execute.assert_called_once_with(task=task, context=None, tools=[])
@@ -182,7 +186,7 @@ def test_async_execution():
)
with patch.object(Agent, "execute_task", return_value="ok") as execute:
task.execute(agent=researcher)
task.execute_async(agent=researcher)
execute.assert_called_once_with(task=task, context=None, tools=[])
@@ -200,7 +204,7 @@ def test_multiple_output_type_error():
@pytest.mark.vcr(filter_headers=["authorization"])
def test_output_pydantic():
def test_output_pydantic_sequential():
class ScoreOutput(BaseModel):
score: int
@@ -218,13 +222,46 @@ def test_output_pydantic():
agent=scorer,
)
crew = Crew(agents=[scorer], tasks=[task])
crew = Crew(agents=[scorer], tasks=[task], process=Process.sequential)
result = crew.kickoff()
assert isinstance(result, ScoreOutput)
assert isinstance(result.pydantic, ScoreOutput)
assert result.to_dict() == {"score": 4}
@pytest.mark.vcr(filter_headers=["authorization"])
def test_output_json():
def test_output_pydantic_hierarchical():
from langchain_openai import ChatOpenAI
class ScoreOutput(BaseModel):
score: int
scorer = Agent(
role="Scorer",
goal="Score the title",
backstory="You're an expert scorer, specialized in scoring titles.",
allow_delegation=False,
)
task = Task(
description="Give me an integer score between 1-5 for the following title: 'The impact of AI in the future of work'",
expected_output="The score of the title.",
output_pydantic=ScoreOutput,
agent=scorer,
)
crew = Crew(
agents=[scorer],
tasks=[task],
process=Process.hierarchical,
manager_llm=ChatOpenAI(model="gpt-4o"),
)
result = crew.kickoff()
assert isinstance(result.pydantic, ScoreOutput)
assert result.to_dict() == {"score": 4}
@pytest.mark.vcr(filter_headers=["authorization"])
def test_output_json_sequential():
class ScoreOutput(BaseModel):
score: int
@@ -242,9 +279,126 @@ def test_output_json():
agent=scorer,
)
crew = Crew(agents=[scorer], tasks=[task])
crew = Crew(agents=[scorer], tasks=[task], process=Process.sequential)
result = crew.kickoff()
assert '{\n "score": 4\n}' == result
assert '{"score": 4}' == result.json
assert result.to_dict() == {"score": 4}
@pytest.mark.vcr(filter_headers=["authorization"])
def test_output_json_hierarchical():
from langchain_openai import ChatOpenAI
class ScoreOutput(BaseModel):
score: int
scorer = Agent(
role="Scorer",
goal="Score the title",
backstory="You're an expert scorer, specialized in scoring titles.",
allow_delegation=False,
)
task = Task(
description="Give me an integer score between 1-5 for the following title: 'The impact of AI in the future of work'",
expected_output="The score of the title.",
output_json=ScoreOutput,
agent=scorer,
)
crew = Crew(
agents=[scorer],
tasks=[task],
process=Process.hierarchical,
manager_llm=ChatOpenAI(model="gpt-4o"),
)
result = crew.kickoff()
assert '{"score": 4}' == result.json
assert result.to_dict() == {"score": 4}
def test_json_property_without_output_json():
class ScoreOutput(BaseModel):
score: int
scorer = Agent(
role="Scorer",
goal="Score the title",
backstory="You're an expert scorer, specialized in scoring titles.",
allow_delegation=False,
)
task = Task(
description="Give me an integer score between 1-5 for the following title: 'The impact of AI in the future of work'",
expected_output="The score of the title.",
output_pydantic=ScoreOutput, # Using output_pydantic instead of output_json
agent=scorer,
)
crew = Crew(agents=[scorer], tasks=[task], process=Process.sequential)
result = crew.kickoff()
with pytest.raises(ValueError) as excinfo:
_ = result.json # Attempt to access the json property
assert "No JSON output found in the final task." in str(excinfo.value)
@pytest.mark.vcr(filter_headers=["authorization"])
def test_output_json_dict_sequential():
class ScoreOutput(BaseModel):
score: int
scorer = Agent(
role="Scorer",
goal="Score the title",
backstory="You're an expert scorer, specialized in scoring titles.",
allow_delegation=False,
)
task = Task(
description="Give me an integer score between 1-5 for the following title: 'The impact of AI in the future of work'",
expected_output="The score of the title.",
output_json=ScoreOutput,
agent=scorer,
)
crew = Crew(agents=[scorer], tasks=[task], process=Process.sequential)
result = crew.kickoff()
assert {"score": 4} == result.json_dict
assert result.to_dict() == {"score": 4}
@pytest.mark.vcr(filter_headers=["authorization"])
def test_output_json_dict_hierarchical():
from langchain_openai import ChatOpenAI
class ScoreOutput(BaseModel):
score: int
scorer = Agent(
role="Scorer",
goal="Score the title",
backstory="You're an expert scorer, specialized in scoring titles.",
allow_delegation=False,
)
task = Task(
description="Give me an integer score between 1-5 for the following title: 'The impact of AI in the future of work'",
expected_output="The score of the title.",
output_json=ScoreOutput,
agent=scorer,
)
crew = Crew(
agents=[scorer],
tasks=[task],
process=Process.hierarchical,
manager_llm=ChatOpenAI(model="gpt-4o"),
)
result = crew.kickoff()
assert {"score": 4} == result.json_dict
assert result.to_dict() == {"score": 4}
@pytest.mark.vcr(filter_headers=["authorization"])
@@ -280,7 +434,11 @@ def test_output_pydantic_to_another_task():
crew = Crew(agents=[scorer], tasks=[task1, task2], verbose=2)
result = crew.kickoff()
assert 5 == result.score
pydantic_result = result.pydantic
assert isinstance(
pydantic_result, ScoreOutput
), "Expected pydantic result to be of type ScoreOutput"
assert 5 == pydantic_result.score
@pytest.mark.vcr(filter_headers=["authorization"])
@@ -311,7 +469,7 @@ def test_output_json_to_another_task():
crew = Crew(agents=[scorer], tasks=[task1, task2])
result = crew.kickoff()
assert '{\n "score": 5\n}' == result
assert '{"score": 5}' == result.json
@pytest.mark.vcr(filter_headers=["authorization"])
@@ -363,7 +521,9 @@ def test_save_task_json_output():
with patch.object(Task, "_save_file") as save_file:
save_file.return_value = None
crew.kickoff()
save_file.assert_called_once_with('{\n "score": 4\n}')
save_file.assert_called_once_with(
{"score": 4}
) # TODO: @Joao, should this be a dict or a json string?
@pytest.mark.vcr(filter_headers=["authorization"])
@@ -556,3 +716,80 @@ def test_interpolate_inputs():
== "Give me a list of 5 interesting ideas about ML to explore for an article, what makes them unique and interesting."
)
assert task.expected_output == "Bullet point list of 5 interesting ideas about ML."
def test_task_output_str_with_pydantic():
from crewai.tasks.output_format import OutputFormat
class ScoreOutput(BaseModel):
score: int
score_output = ScoreOutput(score=4)
task_output = TaskOutput(
description="Test task",
agent="Test Agent",
pydantic=score_output,
output_format=OutputFormat.PYDANTIC,
)
assert str(task_output) == str(score_output)
def test_task_output_str_with_json_dict():
from crewai.tasks.output_format import OutputFormat
json_dict = {"score": 4}
task_output = TaskOutput(
description="Test task",
agent="Test Agent",
json_dict=json_dict,
output_format=OutputFormat.JSON,
)
assert str(task_output) == str(json_dict)
def test_task_output_str_with_raw():
from crewai.tasks.output_format import OutputFormat
raw_output = "Raw task output"
task_output = TaskOutput(
description="Test task",
agent="Test Agent",
raw=raw_output,
output_format=OutputFormat.RAW,
)
assert str(task_output) == raw_output
def test_task_output_str_with_pydantic_and_json_dict():
from crewai.tasks.output_format import OutputFormat
class ScoreOutput(BaseModel):
score: int
score_output = ScoreOutput(score=4)
json_dict = {"score": 4}
task_output = TaskOutput(
description="Test task",
agent="Test Agent",
pydantic=score_output,
json_dict=json_dict,
output_format=OutputFormat.PYDANTIC,
)
# When both pydantic and json_dict are present, pydantic should take precedence
assert str(task_output) == str(score_output)
def test_task_output_str_with_none():
from crewai.tasks.output_format import OutputFormat
task_output = TaskOutput(
description="Test task",
agent="Test Agent",
output_format=OutputFormat.RAW,
)
assert str(task_output) == ""