"""Tests for reasoning in agents.""" import json import pytest from crewai import Agent, Task from crewai.llm import LLM from crewai.utilities.reasoning_handler import AgentReasoning @pytest.fixture def mock_llm_responses(): """Fixture for mock LLM responses.""" return { "ready": "I'll solve this simple math problem.\n\nREADY: I am ready to execute the task.\n\n", "not_ready": "I need to think about derivatives.\n\nNOT READY: I need to refine my plan because I'm not sure about the derivative rules.", "ready_after_refine": "I'll use the power rule for derivatives where d/dx(x^n) = n*x^(n-1).\n\nREADY: I am ready to execute the task.", "execution": "4" } def test_agent_with_reasoning(mock_llm_responses): """Test agent with reasoning.""" llm = LLM("gpt-3.5-turbo") agent = Agent( role="Test Agent", goal="To test the reasoning feature", backstory="I am a test agent created to verify the reasoning feature works correctly.", llm=llm, reasoning=True, verbose=True ) task = Task( description="Simple math task: What's 2+2?", expected_output="The answer should be a number.", agent=agent ) agent.llm.call = lambda messages, *args, **kwargs: ( mock_llm_responses["ready"] if any("create a detailed plan" in msg.get("content", "") for msg in messages) else mock_llm_responses["execution"] ) result = agent.execute_task(task) assert result == mock_llm_responses["execution"] assert "Reasoning Plan:" in task.description def test_agent_with_reasoning_not_ready_initially(mock_llm_responses): """Test agent with reasoning that requires refinement.""" llm = LLM("gpt-3.5-turbo") agent = Agent( role="Test Agent", goal="To test the reasoning feature", backstory="I am a test agent created to verify the reasoning feature works correctly.", llm=llm, reasoning=True, max_reasoning_attempts=2, verbose=True ) task = Task( description="Complex math task: What's the derivative of x²?", expected_output="The answer should be a mathematical expression.", agent=agent ) call_count = [0] def mock_llm_call(messages, *args, **kwargs): if any("create a detailed plan" in msg.get("content", "") for msg in messages) or any("refine your plan" in msg.get("content", "") for msg in messages): call_count[0] += 1 if call_count[0] == 1: return mock_llm_responses["not_ready"] else: return mock_llm_responses["ready_after_refine"] else: return "2x" agent.llm.call = mock_llm_call result = agent.execute_task(task) assert result == "2x" assert call_count[0] == 2 # Should have made 2 reasoning calls assert "Reasoning Plan:" in task.description def test_agent_with_reasoning_max_attempts_reached(): """Test agent with reasoning that reaches max attempts without being ready.""" llm = LLM("gpt-3.5-turbo") agent = Agent( role="Test Agent", goal="To test the reasoning feature", backstory="I am a test agent created to verify the reasoning feature works correctly.", llm=llm, reasoning=True, max_reasoning_attempts=2, verbose=True ) task = Task( description="Complex math task: Solve the Riemann hypothesis.", expected_output="A proof or disproof of the hypothesis.", agent=agent ) call_count = [0] def mock_llm_call(messages, *args, **kwargs): if any("create a detailed plan" in msg.get("content", "") for msg in messages) or any("refine your plan" in msg.get("content", "") for msg in messages): call_count[0] += 1 return f"Attempt {call_count[0]}: I need more time to think.\n\nNOT READY: I need to refine my plan further." else: return "This is an unsolved problem in mathematics." agent.llm.call = mock_llm_call result = agent.execute_task(task) assert result == "This is an unsolved problem in mathematics." assert call_count[0] == 2 # Should have made exactly 2 reasoning calls (max_attempts) assert "Reasoning Plan:" in task.description def test_agent_reasoning_input_validation(): """Test input validation in AgentReasoning.""" llm = LLM("gpt-3.5-turbo") agent = Agent( role="Test Agent", goal="To test the reasoning feature", backstory="I am a test agent created to verify the reasoning feature works correctly.", llm=llm, reasoning=True ) with pytest.raises(ValueError, match="Both task and agent must be provided"): AgentReasoning(task=None, agent=agent) task = Task( description="Simple task", expected_output="Simple output" ) with pytest.raises(ValueError, match="Both task and agent must be provided"): AgentReasoning(task=task, agent=None) def test_agent_reasoning_error_handling(): """Test error handling during the reasoning process.""" llm = LLM("gpt-3.5-turbo") agent = Agent( role="Test Agent", goal="To test the reasoning feature", backstory="I am a test agent created to verify the reasoning feature works correctly.", llm=llm, reasoning=True ) task = Task( description="Task that will cause an error", expected_output="Output that will never be generated", agent=agent ) call_count = [0] def mock_llm_call_error(*args, **kwargs): call_count[0] += 1 if call_count[0] <= 2: # First calls are for reasoning raise Exception("LLM error during reasoning") return "Fallback execution result" # Return a value for task execution agent.llm.call = mock_llm_call_error result = agent.execute_task(task) assert result == "Fallback execution result" assert call_count[0] > 2 # Ensure we called the mock multiple times def test_agent_with_function_calling(): """Test agent with reasoning using function calling.""" llm = LLM("gpt-3.5-turbo") agent = Agent( role="Test Agent", goal="To test the reasoning feature", backstory="I am a test agent created to verify the reasoning feature works correctly.", llm=llm, reasoning=True, verbose=True ) task = Task( description="Simple math task: What's 2+2?", expected_output="The answer should be a number.", agent=agent ) agent.llm.supports_function_calling = lambda: True def mock_function_call(messages, *args, **kwargs): if "tools" in kwargs: return json.dumps({ "plan": "I'll solve this simple math problem: 2+2=4.", "ready": True }) else: return "4" agent.llm.call = mock_function_call result = agent.execute_task(task) assert result == "4" assert "Reasoning Plan:" in task.description assert "I'll solve this simple math problem: 2+2=4." in task.description def test_agent_with_function_calling_fallback(): """Test agent with reasoning using function calling that falls back to text parsing.""" llm = LLM("gpt-3.5-turbo") agent = Agent( role="Test Agent", goal="To test the reasoning feature", backstory="I am a test agent created to verify the reasoning feature works correctly.", llm=llm, reasoning=True, verbose=True ) task = Task( description="Simple math task: What's 2+2?", expected_output="The answer should be a number.", agent=agent ) agent.llm.supports_function_calling = lambda: True def mock_function_call(messages, *args, **kwargs): if "tools" in kwargs: return "Invalid JSON that will trigger fallback. READY: I am ready to execute the task." else: return "4" agent.llm.call = mock_function_call result = agent.execute_task(task) assert result == "4" assert "Reasoning Plan:" in task.description assert "Invalid JSON that will trigger fallback" in task.description