mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-13 10:08:29 +00:00
- Add reasoning parameter to LLM.__init__() method - Implement logic in _prepare_completion_params to handle reasoning=False - Add comprehensive tests for reasoning parameter functionality - Ensure reasoning=False overrides reasoning_effort parameter - Add integration test with Agent class The fix ensures that when reasoning=False is set, the reasoning_effort parameter is not included in the LLM completion call, effectively disabling reasoning mode for models like Qwen and Cogito with Ollama. Co-Authored-By: João <joao@crewai.com>
291 lines
9.1 KiB
Python
291 lines
9.1 KiB
Python
"""Tests for reasoning in agents."""
|
|
|
|
import json
|
|
import pytest
|
|
from unittest.mock import patch
|
|
|
|
from crewai import Agent, Task
|
|
from crewai.llm import LLM
|
|
from crewai.utilities.reasoning_handler import AgentReasoning
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_llm_responses():
|
|
"""Fixture for mock LLM responses."""
|
|
return {
|
|
"ready": "I'll solve this simple math problem.\n\nREADY: I am ready to execute the task.\n\n",
|
|
"not_ready": "I need to think about derivatives.\n\nNOT READY: I need to refine my plan because I'm not sure about the derivative rules.",
|
|
"ready_after_refine": "I'll use the power rule for derivatives where d/dx(x^n) = n*x^(n-1).\n\nREADY: I am ready to execute the task.",
|
|
"execution": "4"
|
|
}
|
|
|
|
|
|
def test_agent_with_reasoning(mock_llm_responses):
|
|
"""Test agent with reasoning."""
|
|
llm = LLM("gpt-3.5-turbo")
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="To test the reasoning feature",
|
|
backstory="I am a test agent created to verify the reasoning feature works correctly.",
|
|
llm=llm,
|
|
reasoning=True,
|
|
verbose=True
|
|
)
|
|
|
|
task = Task(
|
|
description="Simple math task: What's 2+2?",
|
|
expected_output="The answer should be a number.",
|
|
agent=agent
|
|
)
|
|
|
|
agent.llm.call = lambda messages, *args, **kwargs: (
|
|
mock_llm_responses["ready"]
|
|
if any("create a detailed plan" in msg.get("content", "") for msg in messages)
|
|
else mock_llm_responses["execution"]
|
|
)
|
|
|
|
result = agent.execute_task(task)
|
|
|
|
assert result == mock_llm_responses["execution"]
|
|
assert "Reasoning Plan:" in task.description
|
|
|
|
|
|
def test_agent_with_reasoning_not_ready_initially(mock_llm_responses):
|
|
"""Test agent with reasoning that requires refinement."""
|
|
llm = LLM("gpt-3.5-turbo")
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="To test the reasoning feature",
|
|
backstory="I am a test agent created to verify the reasoning feature works correctly.",
|
|
llm=llm,
|
|
reasoning=True,
|
|
max_reasoning_attempts=2,
|
|
verbose=True
|
|
)
|
|
|
|
task = Task(
|
|
description="Complex math task: What's the derivative of x²?",
|
|
expected_output="The answer should be a mathematical expression.",
|
|
agent=agent
|
|
)
|
|
|
|
call_count = [0]
|
|
|
|
def mock_llm_call(messages, *args, **kwargs):
|
|
if any("create a detailed plan" in msg.get("content", "") for msg in messages) or any("refine your plan" in msg.get("content", "") for msg in messages):
|
|
call_count[0] += 1
|
|
if call_count[0] == 1:
|
|
return mock_llm_responses["not_ready"]
|
|
else:
|
|
return mock_llm_responses["ready_after_refine"]
|
|
else:
|
|
return "2x"
|
|
|
|
agent.llm.call = mock_llm_call
|
|
|
|
result = agent.execute_task(task)
|
|
|
|
assert result == "2x"
|
|
assert call_count[0] == 2 # Should have made 2 reasoning calls
|
|
assert "Reasoning Plan:" in task.description
|
|
|
|
|
|
def test_agent_with_reasoning_max_attempts_reached():
|
|
"""Test agent with reasoning that reaches max attempts without being ready."""
|
|
llm = LLM("gpt-3.5-turbo")
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="To test the reasoning feature",
|
|
backstory="I am a test agent created to verify the reasoning feature works correctly.",
|
|
llm=llm,
|
|
reasoning=True,
|
|
max_reasoning_attempts=2,
|
|
verbose=True
|
|
)
|
|
|
|
task = Task(
|
|
description="Complex math task: Solve the Riemann hypothesis.",
|
|
expected_output="A proof or disproof of the hypothesis.",
|
|
agent=agent
|
|
)
|
|
|
|
call_count = [0]
|
|
|
|
def mock_llm_call(messages, *args, **kwargs):
|
|
if any("create a detailed plan" in msg.get("content", "") for msg in messages) or any("refine your plan" in msg.get("content", "") for msg in messages):
|
|
call_count[0] += 1
|
|
return f"Attempt {call_count[0]}: I need more time to think.\n\nNOT READY: I need to refine my plan further."
|
|
else:
|
|
return "This is an unsolved problem in mathematics."
|
|
|
|
agent.llm.call = mock_llm_call
|
|
|
|
result = agent.execute_task(task)
|
|
|
|
assert result == "This is an unsolved problem in mathematics."
|
|
assert call_count[0] == 2 # Should have made exactly 2 reasoning calls (max_attempts)
|
|
assert "Reasoning Plan:" in task.description
|
|
|
|
|
|
def test_agent_reasoning_input_validation():
|
|
"""Test input validation in AgentReasoning."""
|
|
llm = LLM("gpt-3.5-turbo")
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="To test the reasoning feature",
|
|
backstory="I am a test agent created to verify the reasoning feature works correctly.",
|
|
llm=llm,
|
|
reasoning=True
|
|
)
|
|
|
|
with pytest.raises(ValueError, match="Both task and agent must be provided"):
|
|
AgentReasoning(task=None, agent=agent)
|
|
|
|
task = Task(
|
|
description="Simple task",
|
|
expected_output="Simple output"
|
|
)
|
|
with pytest.raises(ValueError, match="Both task and agent must be provided"):
|
|
AgentReasoning(task=task, agent=None)
|
|
|
|
|
|
def test_agent_reasoning_error_handling():
|
|
"""Test error handling during the reasoning process."""
|
|
llm = LLM("gpt-3.5-turbo")
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="To test the reasoning feature",
|
|
backstory="I am a test agent created to verify the reasoning feature works correctly.",
|
|
llm=llm,
|
|
reasoning=True
|
|
)
|
|
|
|
task = Task(
|
|
description="Task that will cause an error",
|
|
expected_output="Output that will never be generated",
|
|
agent=agent
|
|
)
|
|
|
|
call_count = [0]
|
|
|
|
def mock_llm_call_error(*args, **kwargs):
|
|
call_count[0] += 1
|
|
if call_count[0] <= 2: # First calls are for reasoning
|
|
raise Exception("LLM error during reasoning")
|
|
return "Fallback execution result" # Return a value for task execution
|
|
|
|
agent.llm.call = mock_llm_call_error
|
|
|
|
result = agent.execute_task(task)
|
|
|
|
assert result == "Fallback execution result"
|
|
assert call_count[0] > 2 # Ensure we called the mock multiple times
|
|
|
|
|
|
def test_agent_with_function_calling():
|
|
"""Test agent with reasoning using function calling."""
|
|
llm = LLM("gpt-3.5-turbo")
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="To test the reasoning feature",
|
|
backstory="I am a test agent created to verify the reasoning feature works correctly.",
|
|
llm=llm,
|
|
reasoning=True,
|
|
verbose=True
|
|
)
|
|
|
|
task = Task(
|
|
description="Simple math task: What's 2+2?",
|
|
expected_output="The answer should be a number.",
|
|
agent=agent
|
|
)
|
|
|
|
agent.llm.supports_function_calling = lambda: True
|
|
|
|
def mock_function_call(messages, *args, **kwargs):
|
|
if "tools" in kwargs:
|
|
return json.dumps({
|
|
"plan": "I'll solve this simple math problem: 2+2=4.",
|
|
"ready": True
|
|
})
|
|
else:
|
|
return "4"
|
|
|
|
agent.llm.call = mock_function_call
|
|
|
|
result = agent.execute_task(task)
|
|
|
|
assert result == "4"
|
|
assert "Reasoning Plan:" in task.description
|
|
assert "I'll solve this simple math problem: 2+2=4." in task.description
|
|
|
|
|
|
def test_agent_with_function_calling_fallback():
|
|
"""Test agent with reasoning using function calling that falls back to text parsing."""
|
|
llm = LLM("gpt-3.5-turbo")
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="To test the reasoning feature",
|
|
backstory="I am a test agent created to verify the reasoning feature works correctly.",
|
|
llm=llm,
|
|
reasoning=True,
|
|
verbose=True
|
|
)
|
|
|
|
task = Task(
|
|
description="Simple math task: What's 2+2?",
|
|
expected_output="The answer should be a number.",
|
|
agent=agent
|
|
)
|
|
|
|
agent.llm.supports_function_calling = lambda: True
|
|
|
|
def mock_function_call(messages, *args, **kwargs):
|
|
if "tools" in kwargs:
|
|
return "Invalid JSON that will trigger fallback. READY: I am ready to execute the task."
|
|
else:
|
|
return "4"
|
|
|
|
agent.llm.call = mock_function_call
|
|
|
|
result = agent.execute_task(task)
|
|
|
|
assert result == "4"
|
|
assert "Reasoning Plan:" in task.description
|
|
assert "Invalid JSON that will trigger fallback" in task.description
|
|
|
|
|
|
def test_agent_with_llm_reasoning_disabled():
|
|
"""Test agent with LLM reasoning disabled."""
|
|
llm = LLM("gpt-3.5-turbo", reasoning=False)
|
|
|
|
agent = Agent(
|
|
role="Test Agent",
|
|
goal="To test the LLM reasoning parameter",
|
|
backstory="I am a test agent created to verify the LLM reasoning parameter works correctly.",
|
|
llm=llm,
|
|
reasoning=False,
|
|
verbose=True
|
|
)
|
|
|
|
task = Task(
|
|
description="Simple math task: What's 3+3?",
|
|
expected_output="The answer should be a number.",
|
|
agent=agent
|
|
)
|
|
|
|
with patch.object(agent.llm, 'call') as mock_call:
|
|
mock_call.return_value = "6"
|
|
|
|
result = agent.execute_task(task)
|
|
|
|
assert result == "6"
|
|
assert "Reasoning Plan:" not in task.description
|