mirror of
https://github.com/crewAIInc/crewAI.git
synced 2025-12-16 04:18:35 +00:00
* Add reasoning attribute to Agent class Co-Authored-By: Joe Moura <joao@crewai.com> * Address PR feedback: improve type hints, error handling, refactor reasoning handler, and enhance tests and docs Co-Authored-By: Joe Moura <joao@crewai.com> * Implement function calling for reasoning and move prompts to translations Co-Authored-By: Joe Moura <joao@crewai.com> * Simplify function calling implementation with better error handling Co-Authored-By: Joe Moura <joao@crewai.com> * Enhance system prompts to leverage agent context (role, goal, backstory) Co-Authored-By: Joe Moura <joao@crewai.com> * Fix lint and type-checker issues Co-Authored-By: Joe Moura <joao@crewai.com> * Enhance system prompts to better leverage agent context Co-Authored-By: Joe Moura <joao@crewai.com> * Fix backstory access in reasoning handler for Python 3.12 compatibility Co-Authored-By: Joe Moura <joao@crewai.com> --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Joe Moura <joao@crewai.com> Co-authored-by: João Moura <joaomdmoura@gmail.com>
262 lines
8.3 KiB
Python
262 lines
8.3 KiB
Python
"""Tests for reasoning in agents."""
|
|
|
|
import json
|
|
import pytest
|
|
|
|
from crewai import Agent, Task
|
|
from crewai.llm import LLM
|
|
from crewai.utilities.reasoning_handler import AgentReasoning
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_llm_responses():
|
|
"""Fixture for mock LLM responses."""
|
|
return {
|
|
"ready": "I'll solve this simple math problem.\n\nREADY: I am ready to execute the task.\n\n",
|
|
"not_ready": "I need to think about derivatives.\n\nNOT READY: I need to refine my plan because I'm not sure about the derivative rules.",
|
|
"ready_after_refine": "I'll use the power rule for derivatives where d/dx(x^n) = n*x^(n-1).\n\nREADY: I am ready to execute the task.",
|
|
"execution": "4"
|
|
}
|
|
|
|
|
|
def test_agent_with_reasoning(mock_llm_responses):
|
|
"""Test agent with reasoning."""
|
|
llm = LLM("gpt-3.5-turbo")
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="To test the reasoning feature",
|
|
backstory="I am a test agent created to verify the reasoning feature works correctly.",
|
|
llm=llm,
|
|
reasoning=True,
|
|
verbose=True
|
|
)
|
|
|
|
task = Task(
|
|
description="Simple math task: What's 2+2?",
|
|
expected_output="The answer should be a number.",
|
|
agent=agent
|
|
)
|
|
|
|
agent.llm.call = lambda messages, *args, **kwargs: (
|
|
mock_llm_responses["ready"]
|
|
if any("create a detailed plan" in msg.get("content", "") for msg in messages)
|
|
else mock_llm_responses["execution"]
|
|
)
|
|
|
|
result = agent.execute_task(task)
|
|
|
|
assert result == mock_llm_responses["execution"]
|
|
assert "Reasoning Plan:" in task.description
|
|
|
|
|
|
def test_agent_with_reasoning_not_ready_initially(mock_llm_responses):
|
|
"""Test agent with reasoning that requires refinement."""
|
|
llm = LLM("gpt-3.5-turbo")
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="To test the reasoning feature",
|
|
backstory="I am a test agent created to verify the reasoning feature works correctly.",
|
|
llm=llm,
|
|
reasoning=True,
|
|
max_reasoning_attempts=2,
|
|
verbose=True
|
|
)
|
|
|
|
task = Task(
|
|
description="Complex math task: What's the derivative of x²?",
|
|
expected_output="The answer should be a mathematical expression.",
|
|
agent=agent
|
|
)
|
|
|
|
call_count = [0]
|
|
|
|
def mock_llm_call(messages, *args, **kwargs):
|
|
if any("create a detailed plan" in msg.get("content", "") for msg in messages) or any("refine your plan" in msg.get("content", "") for msg in messages):
|
|
call_count[0] += 1
|
|
if call_count[0] == 1:
|
|
return mock_llm_responses["not_ready"]
|
|
else:
|
|
return mock_llm_responses["ready_after_refine"]
|
|
else:
|
|
return "2x"
|
|
|
|
agent.llm.call = mock_llm_call
|
|
|
|
result = agent.execute_task(task)
|
|
|
|
assert result == "2x"
|
|
assert call_count[0] == 2 # Should have made 2 reasoning calls
|
|
assert "Reasoning Plan:" in task.description
|
|
|
|
|
|
def test_agent_with_reasoning_max_attempts_reached():
|
|
"""Test agent with reasoning that reaches max attempts without being ready."""
|
|
llm = LLM("gpt-3.5-turbo")
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="To test the reasoning feature",
|
|
backstory="I am a test agent created to verify the reasoning feature works correctly.",
|
|
llm=llm,
|
|
reasoning=True,
|
|
max_reasoning_attempts=2,
|
|
verbose=True
|
|
)
|
|
|
|
task = Task(
|
|
description="Complex math task: Solve the Riemann hypothesis.",
|
|
expected_output="A proof or disproof of the hypothesis.",
|
|
agent=agent
|
|
)
|
|
|
|
call_count = [0]
|
|
|
|
def mock_llm_call(messages, *args, **kwargs):
|
|
if any("create a detailed plan" in msg.get("content", "") for msg in messages) or any("refine your plan" in msg.get("content", "") for msg in messages):
|
|
call_count[0] += 1
|
|
return f"Attempt {call_count[0]}: I need more time to think.\n\nNOT READY: I need to refine my plan further."
|
|
else:
|
|
return "This is an unsolved problem in mathematics."
|
|
|
|
agent.llm.call = mock_llm_call
|
|
|
|
result = agent.execute_task(task)
|
|
|
|
assert result == "This is an unsolved problem in mathematics."
|
|
assert call_count[0] == 2 # Should have made exactly 2 reasoning calls (max_attempts)
|
|
assert "Reasoning Plan:" in task.description
|
|
|
|
|
|
def test_agent_reasoning_input_validation():
|
|
"""Test input validation in AgentReasoning."""
|
|
llm = LLM("gpt-3.5-turbo")
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="To test the reasoning feature",
|
|
backstory="I am a test agent created to verify the reasoning feature works correctly.",
|
|
llm=llm,
|
|
reasoning=True
|
|
)
|
|
|
|
with pytest.raises(ValueError, match="Both task and agent must be provided"):
|
|
AgentReasoning(task=None, agent=agent)
|
|
|
|
task = Task(
|
|
description="Simple task",
|
|
expected_output="Simple output"
|
|
)
|
|
with pytest.raises(ValueError, match="Both task and agent must be provided"):
|
|
AgentReasoning(task=task, agent=None)
|
|
|
|
|
|
def test_agent_reasoning_error_handling():
|
|
"""Test error handling during the reasoning process."""
|
|
llm = LLM("gpt-3.5-turbo")
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="To test the reasoning feature",
|
|
backstory="I am a test agent created to verify the reasoning feature works correctly.",
|
|
llm=llm,
|
|
reasoning=True
|
|
)
|
|
|
|
task = Task(
|
|
description="Task that will cause an error",
|
|
expected_output="Output that will never be generated",
|
|
agent=agent
|
|
)
|
|
|
|
call_count = [0]
|
|
|
|
def mock_llm_call_error(*args, **kwargs):
|
|
call_count[0] += 1
|
|
if call_count[0] <= 2: # First calls are for reasoning
|
|
raise Exception("LLM error during reasoning")
|
|
return "Fallback execution result" # Return a value for task execution
|
|
|
|
agent.llm.call = mock_llm_call_error
|
|
|
|
result = agent.execute_task(task)
|
|
|
|
assert result == "Fallback execution result"
|
|
assert call_count[0] > 2 # Ensure we called the mock multiple times
|
|
|
|
|
|
def test_agent_with_function_calling():
|
|
"""Test agent with reasoning using function calling."""
|
|
llm = LLM("gpt-3.5-turbo")
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="To test the reasoning feature",
|
|
backstory="I am a test agent created to verify the reasoning feature works correctly.",
|
|
llm=llm,
|
|
reasoning=True,
|
|
verbose=True
|
|
)
|
|
|
|
task = Task(
|
|
description="Simple math task: What's 2+2?",
|
|
expected_output="The answer should be a number.",
|
|
agent=agent
|
|
)
|
|
|
|
agent.llm.supports_function_calling = lambda: True
|
|
|
|
def mock_function_call(messages, *args, **kwargs):
|
|
if "tools" in kwargs:
|
|
return json.dumps({
|
|
"plan": "I'll solve this simple math problem: 2+2=4.",
|
|
"ready": True
|
|
})
|
|
else:
|
|
return "4"
|
|
|
|
agent.llm.call = mock_function_call
|
|
|
|
result = agent.execute_task(task)
|
|
|
|
assert result == "4"
|
|
assert "Reasoning Plan:" in task.description
|
|
assert "I'll solve this simple math problem: 2+2=4." in task.description
|
|
|
|
|
|
def test_agent_with_function_calling_fallback():
|
|
"""Test agent with reasoning using function calling that falls back to text parsing."""
|
|
llm = LLM("gpt-3.5-turbo")
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="To test the reasoning feature",
|
|
backstory="I am a test agent created to verify the reasoning feature works correctly.",
|
|
llm=llm,
|
|
reasoning=True,
|
|
verbose=True
|
|
)
|
|
|
|
task = Task(
|
|
description="Simple math task: What's 2+2?",
|
|
expected_output="The answer should be a number.",
|
|
agent=agent
|
|
)
|
|
|
|
agent.llm.supports_function_calling = lambda: True
|
|
|
|
def mock_function_call(messages, *args, **kwargs):
|
|
if "tools" in kwargs:
|
|
return "Invalid JSON that will trigger fallback. READY: I am ready to execute the task."
|
|
else:
|
|
return "4"
|
|
|
|
agent.llm.call = mock_function_call
|
|
|
|
result = agent.execute_task(task)
|
|
|
|
assert result == "4"
|
|
assert "Reasoning Plan:" in task.description
|
|
assert "Invalid JSON that will trigger fallback" in task.description
|