crewAI/tests/agents/test_agent_reasoning.py

"""Tests for reasoning in agents."""

import json

import pytest

from crewai import Agent, Task
from crewai.llm import LLM


@pytest.fixture
def mock_llm_responses():
    """Fixture for mock LLM responses."""
    return {
        "ready": "I'll solve this simple math problem.\n\nREADY: I am ready to execute the task.\n\n",
        "not_ready": "I need to think about derivatives.\n\nNOT READY: I need to refine my plan because I'm not sure about the derivative rules.",
        "ready_after_refine": "I'll use the power rule for derivatives where d/dx(x^n) = n*x^(n-1).\n\nREADY: I am ready to execute the task.",
        "execution": "4",
    }


def test_agent_with_reasoning(mock_llm_responses):
    """Test agent with reasoning."""
    llm = LLM("gpt-3.5-turbo")

    agent = Agent(
        role="Test Agent",
        goal="To test the reasoning feature",
        backstory="I am a test agent created to verify the reasoning feature works correctly.",
        llm=llm,
        reasoning=True,
        verbose=True,
    )

    task = Task(
        description="Simple math task: What's 2+2?",
        expected_output="The answer should be a number.",
        agent=agent,
    )

    agent.llm.call = lambda messages, *args, **kwargs: (
        mock_llm_responses["ready"]
        if any("create a detailed plan" in msg.get("content", "") for msg in messages)
        else mock_llm_responses["execution"]
    )

    result = agent.execute_task(task)

    assert result == mock_llm_responses["execution"]
    assert "Reasoning Plan:" in task.description


def test_agent_with_reasoning_not_ready_initially(mock_llm_responses):
    """Test agent with reasoning that requires refinement."""
    llm = LLM("gpt-3.5-turbo")

    agent = Agent(
        role="Test Agent",
        goal="To test the reasoning feature",
        backstory="I am a test agent created to verify the reasoning feature works correctly.",
        llm=llm,
        reasoning=True,
        max_reasoning_attempts=2,
        verbose=True,
    )

    task = Task(
        description="Complex math task: What's the derivative of x²?",
        expected_output="The answer should be a mathematical expression.",
        agent=agent,
    )

    call_count = [0]

    def mock_llm_call(messages, *args, **kwargs):
        if any(
            "create a detailed plan" in msg.get("content", "") for msg in messages
        ) or any("refine your plan" in msg.get("content", "") for msg in messages):
            call_count[0] += 1
            if call_count[0] == 1:
                return mock_llm_responses["not_ready"]
            return mock_llm_responses["ready_after_refine"]
        return "2x"

    agent.llm.call = mock_llm_call

    result = agent.execute_task(task)

    assert result == "2x"
    assert call_count[0] == 2  # Should have made 2 reasoning calls
    assert "Reasoning Plan:" in task.description


def test_agent_with_reasoning_max_attempts_reached():
    """Test agent with reasoning that reaches max attempts without being ready."""
    llm = LLM("gpt-3.5-turbo")

    agent = Agent(
        role="Test Agent",
        goal="To test the reasoning feature",
        backstory="I am a test agent created to verify the reasoning feature works correctly.",
        llm=llm,
        reasoning=True,
        max_reasoning_attempts=2,
        verbose=True,
    )

    task = Task(
        description="Complex math task: Solve the Riemann hypothesis.",
        expected_output="A proof or disproof of the hypothesis.",
        agent=agent,
    )

    call_count = [0]

    def mock_llm_call(messages, *args, **kwargs):
        if any(
            "create a detailed plan" in msg.get("content", "") for msg in messages
        ) or any("refine your plan" in msg.get("content", "") for msg in messages):
            call_count[0] += 1
            return f"Attempt {call_count[0]}: I need more time to think.\n\nNOT READY: I need to refine my plan further."
        return "This is an unsolved problem in mathematics."

    agent.llm.call = mock_llm_call

    result = agent.execute_task(task)

    assert result == "This is an unsolved problem in mathematics."
    assert (
        call_count[0] == 2
    )  # Should have made exactly 2 reasoning calls (max_attempts)
    assert "Reasoning Plan:" in task.description


def test_agent_reasoning_error_handling():
    """Test error handling during the reasoning process."""
    llm = LLM("gpt-3.5-turbo")

    agent = Agent(
        role="Test Agent",
        goal="To test the reasoning feature",
        backstory="I am a test agent created to verify the reasoning feature works correctly.",
        llm=llm,
        reasoning=True,
    )

    task = Task(
        description="Task that will cause an error",
        expected_output="Output that will never be generated",
        agent=agent,
    )

    call_count = [0]

    def mock_llm_call_error(*args, **kwargs):
        call_count[0] += 1
        if call_count[0] <= 2:  # First calls are for reasoning
            raise Exception("LLM error during reasoning")
        return "Fallback execution result"  # Return a value for task execution

    agent.llm.call = mock_llm_call_error

    result = agent.execute_task(task)

    assert result == "Fallback execution result"
    assert call_count[0] > 2  # Ensure we called the mock multiple times


def test_agent_with_function_calling():
    """Test agent with reasoning using function calling."""
    llm = LLM("gpt-3.5-turbo")

    agent = Agent(
        role="Test Agent",
        goal="To test the reasoning feature",
        backstory="I am a test agent created to verify the reasoning feature works correctly.",
        llm=llm,
        reasoning=True,
        verbose=True,
    )

    task = Task(
        description="Simple math task: What's 2+2?",
        expected_output="The answer should be a number.",
        agent=agent,
    )

    agent.llm.supports_function_calling = lambda: True

    def mock_function_call(messages, *args, **kwargs):
        if "tools" in kwargs:
            return json.dumps(
                {"plan": "I'll solve this simple math problem: 2+2=4.", "ready": True}
            )
        return "4"

    agent.llm.call = mock_function_call

    result = agent.execute_task(task)

    assert result == "4"
    assert "Reasoning Plan:" in task.description
    assert "I'll solve this simple math problem: 2+2=4." in task.description


def test_agent_with_function_calling_fallback():
    """Test agent with reasoning using function calling that falls back to text parsing."""
    llm = LLM("gpt-3.5-turbo")

    agent = Agent(
        role="Test Agent",
        goal="To test the reasoning feature",
        backstory="I am a test agent created to verify the reasoning feature works correctly.",
        llm=llm,
        reasoning=True,
        verbose=True,
    )

    task = Task(
        description="Simple math task: What's 2+2?",
        expected_output="The answer should be a number.",
        agent=agent,
    )

    agent.llm.supports_function_calling = lambda: True

    def mock_function_call(messages, *args, **kwargs):
        if "tools" in kwargs:
            return "Invalid JSON that will trigger fallback. READY: I am ready to execute the task."
        return "4"

    agent.llm.call = mock_function_call

    result = agent.execute_task(task)

    assert result == "4"
    assert "Reasoning Plan:" in task.description
    assert "Invalid JSON that will trigger fallback" in task.description