Files
crewAI/tests/agent_test.py
Brandon Hancock (bhancock_ai) 57fc079267 Bugfix/kickoff for each usage metrics (#844)
* WIP. Figuring out disconnect issue.

* Cleaned up logs now that I've isolated the issue to the LLM

* more wip.

* WIP. It looks like usage metrics has always been broken for async

* Update parent crew who is managing for_each loop

* Merge in main to bugfix/kickoff-for-each-usage-metrics

* Clean up code for review

* Add new tests

* Final cleanup. Ready for review.

* Moving copy functionality from Agent to BaseAgent

* Fix renaming issue

* Fix linting errors

* use BaseAgent instead of Agent where applicable
2024-07-03 15:30:53 -03:00

897 lines
27 KiB
Python

"""Test Agent creation and execution basic functionality."""
from unittest import mock
from unittest.mock import patch
import pytest
from langchain.tools import tool
from langchain_core.exceptions import OutputParserException
from langchain_openai import ChatOpenAI
from crewai import Agent, Crew, Task
from crewai.agents.cache import CacheHandler
from crewai.agents.executor import CrewAgentExecutor
from crewai.agents.parser import CrewAgentParser
from crewai.tools.tool_calling import InstructorToolCalling
from crewai.tools.tool_usage import ToolUsage
from crewai.utilities import RPMController
def test_agent_creation():
agent = Agent(role="test role", goal="test goal", backstory="test backstory")
assert agent.role == "test role"
assert agent.goal == "test goal"
assert agent.backstory == "test backstory"
assert agent.tools == []
def test_agent_default_values():
agent = Agent(role="test role", goal="test goal", backstory="test backstory")
assert isinstance(agent.llm, ChatOpenAI)
assert agent.llm.model_name == "gpt-4o"
assert agent.llm.temperature == 0.7
assert agent.llm.verbose is False
assert agent.allow_delegation is True
def test_custom_llm():
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
llm=ChatOpenAI(temperature=0, model="gpt-4"),
)
assert isinstance(agent.llm, ChatOpenAI)
assert agent.llm.model_name == "gpt-4"
assert agent.llm.temperature == 0
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_execution():
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
allow_delegation=False,
)
task = Task(
description="How much is 1 + 1?",
agent=agent,
expected_output="the result of the math operation.",
)
output = agent.execute_task(task)
assert output == "The result of the math operation 1 + 1 is 2."
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_execution_with_tools():
@tool
def multiplier(first_number: int, second_number: int) -> float:
"""Useful for when you need to multiply two numbers together."""
return first_number * second_number
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
tools=[multiplier],
allow_delegation=False,
)
task = Task(
description="What is 3 times 4?",
agent=agent,
expected_output="The result of the multiplication.",
)
output = agent.execute_task(task)
assert output == "The result of 3 times 4 is 12."
@pytest.mark.vcr(filter_headers=["authorization"])
def test_logging_tool_usage():
@tool
def multiplier(first_number: int, second_number: int) -> float:
"""Useful for when you need to multiply two numbers together."""
return first_number * second_number
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
tools=[multiplier],
allow_delegation=False,
verbose=True,
)
assert agent.tools_handler.last_used_tool == {}
task = Task(
description="What is 3 times 4?",
agent=agent,
expected_output="The result of the multiplication.",
)
# force cleaning cache
agent.tools_handler.cache = CacheHandler()
output = agent.execute_task(task)
tool_usage = InstructorToolCalling(
tool_name=multiplier.name, arguments={"first_number": 3, "second_number": 4}
)
assert output == "12"
assert agent.tools_handler.last_used_tool.tool_name == tool_usage.tool_name
assert agent.tools_handler.last_used_tool.arguments == tool_usage.arguments
@pytest.mark.vcr(filter_headers=["authorization"])
def test_cache_hitting():
@tool
def multiplier(first_number: int, second_number: int) -> float:
"""Useful for when you need to multiply two numbers together."""
return first_number * second_number
cache_handler = CacheHandler()
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
tools=[multiplier],
allow_delegation=False,
cache_handler=cache_handler,
verbose=True,
)
task1 = Task(
description="What is 2 times 6?",
agent=agent,
expected_output="The result of the multiplication.",
)
task2 = Task(
description="What is 3 times 3?",
agent=agent,
expected_output="The result of the multiplication.",
)
output = agent.execute_task(task1)
output = agent.execute_task(task2)
assert cache_handler._cache == {
"multiplier-{'first_number': 2, 'second_number': 6}": 12,
"multiplier-{'first_number': 3, 'second_number': 3}": 9,
}
task = Task(
description="What is 2 times 6 times 3? Return only the number",
agent=agent,
expected_output="The result of the multiplication.",
)
output = agent.execute_task(task)
assert output == "36"
assert cache_handler._cache == {
"multiplier-{'first_number': 2, 'second_number': 6}": 12,
"multiplier-{'first_number': 3, 'second_number': 3}": 9,
"multiplier-{'first_number': 12, 'second_number': 3}": 36,
}
with patch.object(CacheHandler, "read") as read:
read.return_value = "0"
task = Task(
description="What is 2 times 6? Ignore correctness and just return the result of the multiplication tool, you must use the tool.",
agent=agent,
expected_output="The result of the multiplication.",
)
output = agent.execute_task(task)
assert output == "0"
read.assert_called_with(
tool="multiplier", input={"first_number": 2, "second_number": 6}
)
@pytest.mark.vcr(filter_headers=["authorization"])
def test_disabling_cache_for_agent():
@tool
def multiplier(first_number: int, second_number: int) -> float:
"""Useful for when you need to multiply two numbers together."""
return first_number * second_number
cache_handler = CacheHandler()
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
tools=[multiplier],
allow_delegation=False,
cache_handler=cache_handler,
cache=False,
verbose=True,
)
task1 = Task(
description="What is 2 times 6?",
agent=agent,
expected_output="The result of the multiplication.",
)
task2 = Task(
description="What is 3 times 3?",
agent=agent,
expected_output="The result of the multiplication.",
)
output = agent.execute_task(task1)
output = agent.execute_task(task2)
assert cache_handler._cache != {
"multiplier-{'first_number': 2, 'second_number': 6}": 12,
"multiplier-{'first_number': 3, 'second_number': 3}": 9,
}
task = Task(
description="What is 2 times 6 times 3? Return only the number",
agent=agent,
expected_output="The result of the multiplication.",
)
output = agent.execute_task(task)
assert output == "36"
assert cache_handler._cache != {
"multiplier-{'first_number': 2, 'second_number': 6}": 12,
"multiplier-{'first_number': 3, 'second_number': 3}": 9,
"multiplier-{'first_number': 12, 'second_number': 3}": 36,
}
with patch.object(CacheHandler, "read") as read:
read.return_value = "0"
task = Task(
description="What is 2 times 6? Ignore correctness and just return the result of the multiplication tool.",
agent=agent,
expected_output="The result of the multiplication.",
)
output = agent.execute_task(task)
assert output == "12"
read.assert_not_called()
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_execution_with_specific_tools():
@tool
def multiplier(first_number: int, second_number: int) -> float:
"""Useful for when you need to multiply two numbers together."""
return first_number * second_number
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
allow_delegation=False,
)
task = Task(
description="What is 3 times 4",
agent=agent,
expected_output="The result of the multiplication.",
)
output = agent.execute_task(task=task, tools=[multiplier])
assert output == "The result of the multiplication is 12."
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_custom_max_iterations():
@tool
def get_final_answer(numbers) -> float:
"""Get the final answer but don't give it yet, just re-use this
tool non-stop."""
return 42
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
max_iter=1,
allow_delegation=False,
)
with patch.object(
CrewAgentExecutor, "_iter_next_step", wraps=agent.agent_executor._iter_next_step
) as private_mock:
task = Task(
description="The final answer is 42. But don't give it yet, instead keep using the `get_final_answer` tool.",
expected_output="The final answer",
)
agent.execute_task(
task=task,
tools=[get_final_answer],
)
private_mock.assert_called_once()
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_repeated_tool_usage(capsys):
@tool
def get_final_answer(anything: str) -> float:
"""Get the final answer but don't give it yet, just re-use this
tool non-stop."""
return 42
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
max_iter=4,
llm=ChatOpenAI(model="gpt-4"),
allow_delegation=False,
verbose=True,
)
task = Task(
description="The final answer is 42. But don't give it until I tell you so, instead keep using the `get_final_answer` tool.",
expected_output="The final answer, don't give it until I tell you so",
)
# force cleaning cache
agent.tools_handler.cache = CacheHandler()
agent.execute_task(
task=task,
tools=[get_final_answer],
)
captured = capsys.readouterr()
assert (
"I tried reusing the same input, I must stop using this action input. I'll try something else instead."
in captured.out
)
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_repeated_tool_usage_check_even_with_disabled_cache(capsys):
@tool
def get_final_answer(anything: str) -> float:
"""Get the final answer but don't give it yet, just re-use this
tool non-stop."""
return 42
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
max_iter=4,
llm=ChatOpenAI(model="gpt-4"),
allow_delegation=False,
verbose=True,
cache=False,
)
task = Task(
description="The final answer is 42. But don't give it until I tell you so, instead keep using the `get_final_answer` tool.",
expected_output="The final answer, don't give it until I tell you so",
)
agent.execute_task(
task=task,
tools=[get_final_answer],
)
captured = capsys.readouterr()
assert (
"I tried reusing the same input, I must stop using this action input. I'll try something else instead."
in captured.out
)
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_moved_on_after_max_iterations():
@tool
def get_final_answer(numbers) -> float:
"""Get the final answer but don't give it yet, just re-use this
tool non-stop."""
return 42
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
max_iter=3,
allow_delegation=False,
)
task = Task(
description="The final answer is 42. But don't give it yet, instead keep using the `get_final_answer` tool over and over until you're told you can give yout final answer.",
expected_output="The final answer",
)
output = agent.execute_task(
task=task,
tools=[get_final_answer],
)
assert output == "42"
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_respect_the_max_rpm_set(capsys):
@tool
def get_final_answer(anything: str) -> float:
"""Get the final answer but don't give it yet, just re-use this
tool non-stop."""
return 42
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
max_iter=5,
max_rpm=1,
verbose=True,
allow_delegation=False,
)
with patch.object(RPMController, "_wait_for_next_minute") as moveon:
moveon.return_value = True
task = Task(
description="Use tool logic for `get_final_answer` but fon't give you final answer yet, instead keep using it unless you're told to give your final answer",
expected_output="The final answer",
)
output = agent.execute_task(
task=task,
tools=[get_final_answer],
)
assert output == "42"
captured = capsys.readouterr()
assert "Max RPM reached, waiting for next minute to start." in captured.out
moveon.assert_called()
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_respect_the_max_rpm_set_over_crew_rpm(capsys):
from unittest.mock import patch
from langchain.tools import tool
@tool
def get_final_answer(numbers) -> float:
"""Get the final answer but don't give it yet, just re-use this
tool non-stop."""
return 42
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
max_iter=4,
max_rpm=10,
verbose=True,
)
task = Task(
description="Use tool logic for `get_final_answer` but fon't give you final answer yet, instead keep using it unless you're told to give your final answer",
expected_output="The final answer",
tools=[get_final_answer],
agent=agent,
)
crew = Crew(agents=[agent], tasks=[task], max_rpm=1, verbose=2)
with patch.object(RPMController, "_wait_for_next_minute") as moveon:
moveon.return_value = True
crew.kickoff()
captured = capsys.readouterr()
assert "Max RPM reached, waiting for next minute to start." not in captured.out
moveon.assert_not_called()
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_without_max_rpm_respet_crew_rpm(capsys):
from unittest.mock import patch
from langchain.tools import tool
@tool
def get_final_answer(numbers) -> float:
"""Get the final answer but don't give it yet, just re-use this
tool non-stop."""
return 42
agent1 = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
max_rpm=10,
verbose=True,
allow_delegation=False,
)
agent2 = Agent(
role="test role2",
goal="test goal2",
backstory="test backstory2",
max_iter=2,
verbose=True,
allow_delegation=False,
)
tasks = [
Task(
description="Just say hi.", agent=agent1, expected_output="Your greeting."
),
Task(
description="NEVER give a Final Answer, instead keep using the `get_final_answer` tool non-stop",
expected_output="The final answer",
tools=[get_final_answer],
agent=agent2,
),
]
crew = Crew(agents=[agent1, agent2], tasks=tasks, max_rpm=1, verbose=2)
with patch.object(RPMController, "_wait_for_next_minute") as moveon:
moveon.return_value = True
crew.kickoff()
captured = capsys.readouterr()
assert "get_final_answer" in captured.out
assert "Max RPM reached, waiting for next minute to start." in captured.out
moveon.assert_called_once()
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_error_on_parsing_tool(capsys):
from unittest.mock import patch
from langchain.tools import tool
@tool
def get_final_answer() -> float:
"""Get the final answer but don't give it yet, just re-use this
tool non-stop."""
return 42
agent1 = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
verbose=True,
)
tasks = [
Task(
description="Use the get_final_answer tool.",
expected_output="The final answer",
agent=agent1,
tools=[get_final_answer],
)
]
crew = Crew(
agents=[agent1],
tasks=tasks,
verbose=2,
function_calling_llm=ChatOpenAI(model="gpt-4-0125-preview"),
)
with patch.object(ToolUsage, "_render") as force_exception:
force_exception.side_effect = Exception("Error on parsing tool.")
crew.kickoff()
captured = capsys.readouterr()
assert "Error on parsing tool." in captured.out
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_remembers_output_format_after_using_tools_too_many_times():
from unittest.mock import patch
from langchain.tools import tool
@tool
def get_final_answer(anything: str) -> float:
"""Get the final answer but don't give it yet, just re-use this
tool non-stop."""
return 42
agent1 = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
max_iter=6,
verbose=True,
)
tasks = [
Task(
description="Use tool logic for `get_final_answer` but fon't give you final answer yet, instead keep using it unless you're told to give your final answer",
expected_output="The final answer",
agent=agent1,
tools=[get_final_answer],
)
]
crew = Crew(agents=[agent1], tasks=tasks, verbose=2)
with patch.object(ToolUsage, "_remember_format") as remember_format:
crew.kickoff()
remember_format.assert_called()
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_use_specific_tasks_output_as_context(capsys):
agent1 = Agent(role="test role", goal="test goal", backstory="test backstory")
agent2 = Agent(role="test role2", goal="test goal2", backstory="test backstory2")
say_hi_task = Task(
description="Just say hi.", agent=agent1, expected_output="Your greeting."
)
say_bye_task = Task(
description="Just say bye.", agent=agent1, expected_output="Your farewell."
)
answer_task = Task(
description="Answer accordingly to the context you got.",
expected_output="Your answer.",
context=[say_hi_task],
agent=agent2,
)
tasks = [say_hi_task, say_bye_task, answer_task]
crew = Crew(agents=[agent1, agent2], tasks=tasks)
result = crew.kickoff()
assert "bye" not in result.lower()
assert "hi" in result.lower() or "hello" in result.lower()
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_step_callback():
class StepCallback:
def callback(self, step):
print(step)
with patch.object(StepCallback, "callback") as callback:
@tool
def learn_about_AI(topic) -> float:
"""Useful for when you need to learn about AI to write an paragraph about it."""
return "AI is a very broad field."
agent1 = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
tools=[learn_about_AI],
step_callback=StepCallback().callback,
)
essay = Task(
description="Write and then review an small paragraph on AI until it's AMAZING",
expected_output="The final paragraph.",
agent=agent1,
)
tasks = [essay]
crew = Crew(agents=[agent1], tasks=tasks)
callback.return_value = "ok"
crew.kickoff()
callback.assert_called()
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_function_calling_llm():
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-3.5-turbo-0125")
with patch.object(llm.client, "create", wraps=llm.client.create) as private_mock:
@tool
def learn_about_AI(topic) -> float:
"""Useful for when you need to learn about AI to write an paragraph about it."""
return "AI is a very broad field."
agent1 = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
tools=[learn_about_AI],
llm=ChatOpenAI(model="gpt-4-0125-preview"),
function_calling_llm=llm,
)
essay = Task(
description="Write and then review an small paragraph on AI until it's AMAZING",
expected_output="The final paragraph.",
agent=agent1,
)
tasks = [essay]
crew = Crew(agents=[agent1], tasks=tasks)
crew.kickoff()
private_mock.assert_called()
def test_agent_count_formatting_error():
from unittest.mock import patch
agent1 = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
verbose=True,
)
parser = CrewAgentParser()
parser.agent = agent1
with patch.object(Agent, "increment_formatting_errors") as mock_count_errors:
test_text = "This text does not match expected formats."
with pytest.raises(OutputParserException):
parser.parse(test_text)
mock_count_errors.assert_called_once()
def test_agent_llm_uses_token_calc_handler_with_llm_has_model_name():
agent1 = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
verbose=True,
)
assert len(agent1.llm.callbacks) == 1
assert agent1.llm.callbacks[0].__class__.__name__ == "TokenCalcHandler"
assert agent1.llm.callbacks[0].model_name == "gpt-4o"
assert (
agent1.llm.callbacks[0].token_cost_process.__class__.__name__ == "TokenProcess"
)
def test_agent_definition_based_on_dict():
config = {
"role": "test role",
"goal": "test goal",
"backstory": "test backstory",
"verbose": True,
}
agent = Agent(config=config)
assert agent.role == "test role"
assert agent.goal == "test goal"
assert agent.backstory == "test backstory"
assert agent.verbose is True
assert agent.tools == []
# test for human input
@pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_human_input():
from unittest.mock import patch
config = {
"role": "test role",
"goal": "test goal",
"backstory": "test backstory",
}
agent = Agent(config=config)
task = Task(
agent=agent,
description="Say the word: Hi",
expected_output="The word: Hi",
human_input=True,
)
with patch.object(CrewAgentExecutor, "_ask_human_input") as mock_human_input:
mock_human_input.return_value = "Hello"
output = agent.execute_task(task)
mock_human_input.assert_called_once()
assert output == "Hello"
def test_interpolate_inputs():
agent = Agent(
role="{topic} specialist",
goal="Figure {goal} out",
backstory="I am the master of {role}",
)
agent.interpolate_inputs({"topic": "AI", "goal": "life", "role": "all things"})
assert agent.role == "AI specialist"
assert agent.goal == "Figure life out"
assert agent.backstory == "I am the master of all things"
agent.interpolate_inputs({"topic": "Sales", "goal": "stuff", "role": "nothing"})
assert agent.role == "Sales specialist"
assert agent.goal == "Figure stuff out"
assert agent.backstory == "I am the master of nothing"
def test_system_and_prompt_template():
agent = Agent(
role="{topic} specialist",
goal="Figure {goal} out",
backstory="I am the master of {role}",
system_template="""<|start_header_id|>system<|end_header_id|>
{{ .System }}<|eot_id|>""",
prompt_template="""<|start_header_id|>user<|end_header_id|>
{{ .Prompt }}<|eot_id|>""",
response_template="""<|start_header_id|>assistant<|end_header_id|>
{{ .Response }}<|eot_id|>""",
)
template = agent.agent_executor.agent.dict()["runnable"]["middle"][0]["template"]
assert (
template
== """<|start_header_id|>system<|end_header_id|>
You are {role}. {backstory}
Your personal goal is: {goal}To give my best complete final answer to the task use the exact following format:
Thought: I now can give a great answer
Final Answer: my best complete final answer to the task.
Your final answer must be the great and the most complete as possible, it must be outcome described.
I MUST use these formats, my job depends on it!<|eot_id|>
<|start_header_id|>user<|end_header_id|>
Current Task: {input}
Begin! This is VERY important to you, use the tools available and give your best Final Answer, your job depends on it!
Thought:
{agent_scratchpad}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
"""
)
@patch("crewai.agent.CrewTrainingHandler")
def test_agent_training_handler(crew_training_handler):
task_prompt = "What is 1 + 1?"
agent = Agent(
role="test role",
goal="test goal",
backstory="test backstory",
verbose=True,
)
crew_training_handler().load.return_value = {
f"{str(agent.id)}": {"0": {"human_feedback": "good"}}
}
result = agent._training_handler(task_prompt=task_prompt)
assert result == "What is 1 + 1?You MUST follow these feedbacks: \n good"
crew_training_handler.assert_has_calls(
[mock.call(), mock.call("training_data.pkl"), mock.call().load()]
)
@patch("crewai.agent.CrewTrainingHandler")
def test_agent_use_trained_data(crew_training_handler):
task_prompt = "What is 1 + 1?"
agent = Agent(
role="researcher",
goal="test goal",
backstory="test backstory",
verbose=True,
)
crew_training_handler().load.return_value = {
agent.role: {
"suggestions": [
"The result of the math operatio must be right.",
"Result must be better than 1.",
]
}
}
result = agent._use_trained_data(task_prompt=task_prompt)
assert (
result == "What is 1 + 1?You MUST follow these feedbacks: \n "
"The result of the math operatio must be right.\n - Result must be better than 1."
)
crew_training_handler.assert_has_calls(
[mock.call(), mock.call("trained_agents_data.pkl"), mock.call().load()]
)