Add reasoning attribute to Agent class (#2866)

* Add reasoning attribute to Agent class Co-Authored-By: Joe Moura <joao@crewai.com> * Address PR feedback: improve type hints, error handling, refactor reasoning handler, and enhance tests and docs Co-Authored-By: Joe Moura <joao@crewai.com> * Implement function calling for reasoning and move prompts to translations Co-Authored-By: Joe Moura <joao@crewai.com> * Simplify function calling implementation with better error handling Co-Authored-By: Joe Moura <joao@crewai.com> * Enhance system prompts to leverage agent context (role, goal, backstory) Co-Authored-By: Joe Moura <joao@crewai.com> * Fix lint and type-checker issues Co-Authored-By: Joe Moura <joao@crewai.com> * Enhance system prompts to better leverage agent context Co-Authored-By: Joe Moura <joao@crewai.com> * Fix backstory access in reasoning handler for Python 3.12 compatibility Co-Authored-By: Joe Moura <joao@crewai.com> --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Joe Moura <joao@crewai.com> Co-authored-by: João Moura <joaomdmoura@gmail.com>
2026-01-08 23:58:34 +00:00 · 2025-05-20 07:40:40 -07:00
parent 227b521f9e
commit 1ef22131e6
5 changed files with 741 additions and 0 deletions
--- a/tests/agent_reasoning_test.py
+++ b/tests/agent_reasoning_test.py
@@ -0,0 +1,261 @@
+"""Tests for reasoning in agents."""
+
+import json
+import pytest
+
+from crewai import Agent, Task
+from crewai.llm import LLM
+from crewai.utilities.reasoning_handler import AgentReasoning
+
+
+@pytest.fixture
+def mock_llm_responses():
+    """Fixture for mock LLM responses."""
+    return {
+        "ready": "I'll solve this simple math problem.\n\nREADY: I am ready to execute the task.\n\n",
+        "not_ready": "I need to think about derivatives.\n\nNOT READY: I need to refine my plan because I'm not sure about the derivative rules.",
+        "ready_after_refine": "I'll use the power rule for derivatives where d/dx(x^n) = n*x^(n-1).\n\nREADY: I am ready to execute the task.",
+        "execution": "4"
+    }
+
+
+def test_agent_with_reasoning(mock_llm_responses):
+    """Test agent with reasoning."""
+    llm = LLM("gpt-3.5-turbo")
+    
+    agent = Agent(
+        role="Test Agent",
+        goal="To test the reasoning feature",
+        backstory="I am a test agent created to verify the reasoning feature works correctly.",
+        llm=llm,
+        reasoning=True,
+        verbose=True
+    )
+    
+    task = Task(
+        description="Simple math task: What's 2+2?",
+        expected_output="The answer should be a number.",
+        agent=agent
+    )
+    
+    agent.llm.call = lambda messages, *args, **kwargs: (
+        mock_llm_responses["ready"]
+        if any("create a detailed plan" in msg.get("content", "") for msg in messages)
+        else mock_llm_responses["execution"]
+    )
+    
+    result = agent.execute_task(task)
+    
+    assert result == mock_llm_responses["execution"]
+    assert "Reasoning Plan:" in task.description
+
+
+def test_agent_with_reasoning_not_ready_initially(mock_llm_responses):
+    """Test agent with reasoning that requires refinement."""
+    llm = LLM("gpt-3.5-turbo")
+    
+    agent = Agent(
+        role="Test Agent",
+        goal="To test the reasoning feature",
+        backstory="I am a test agent created to verify the reasoning feature works correctly.",
+        llm=llm,
+        reasoning=True,
+        max_reasoning_attempts=2,
+        verbose=True
+    )
+    
+    task = Task(
+        description="Complex math task: What's the derivative of x²?",
+        expected_output="The answer should be a mathematical expression.",
+        agent=agent
+    )
+    
+    call_count = [0]
+    
+    def mock_llm_call(messages, *args, **kwargs):
+        if any("create a detailed plan" in msg.get("content", "") for msg in messages) or any("refine your plan" in msg.get("content", "") for msg in messages):
+            call_count[0] += 1
+            if call_count[0] == 1:
+                return mock_llm_responses["not_ready"]
+            else:
+                return mock_llm_responses["ready_after_refine"]
+        else:
+            return "2x"
+    
+    agent.llm.call = mock_llm_call
+    
+    result = agent.execute_task(task)
+    
+    assert result == "2x"
+    assert call_count[0] == 2  # Should have made 2 reasoning calls
+    assert "Reasoning Plan:" in task.description
+
+
+def test_agent_with_reasoning_max_attempts_reached():
+    """Test agent with reasoning that reaches max attempts without being ready."""
+    llm = LLM("gpt-3.5-turbo")
+    
+    agent = Agent(
+        role="Test Agent",
+        goal="To test the reasoning feature",
+        backstory="I am a test agent created to verify the reasoning feature works correctly.",
+        llm=llm,
+        reasoning=True,
+        max_reasoning_attempts=2,
+        verbose=True
+    )
+    
+    task = Task(
+        description="Complex math task: Solve the Riemann hypothesis.",
+        expected_output="A proof or disproof of the hypothesis.",
+        agent=agent
+    )
+    
+    call_count = [0]
+    
+    def mock_llm_call(messages, *args, **kwargs):
+        if any("create a detailed plan" in msg.get("content", "") for msg in messages) or any("refine your plan" in msg.get("content", "") for msg in messages):
+            call_count[0] += 1
+            return f"Attempt {call_count[0]}: I need more time to think.\n\nNOT READY: I need to refine my plan further."
+        else:
+            return "This is an unsolved problem in mathematics."
+    
+    agent.llm.call = mock_llm_call
+    
+    result = agent.execute_task(task)
+    
+    assert result == "This is an unsolved problem in mathematics."
+    assert call_count[0] == 2  # Should have made exactly 2 reasoning calls (max_attempts)
+    assert "Reasoning Plan:" in task.description
+
+
+def test_agent_reasoning_input_validation():
+    """Test input validation in AgentReasoning."""
+    llm = LLM("gpt-3.5-turbo")
+    
+    agent = Agent(
+        role="Test Agent",
+        goal="To test the reasoning feature",
+        backstory="I am a test agent created to verify the reasoning feature works correctly.",
+        llm=llm,
+        reasoning=True
+    )
+    
+    with pytest.raises(ValueError, match="Both task and agent must be provided"):
+        AgentReasoning(task=None, agent=agent)
+    
+    task = Task(
+        description="Simple task",
+        expected_output="Simple output"
+    )
+    with pytest.raises(ValueError, match="Both task and agent must be provided"):
+        AgentReasoning(task=task, agent=None)
+
+
+def test_agent_reasoning_error_handling():
+    """Test error handling during the reasoning process."""
+    llm = LLM("gpt-3.5-turbo")
+    
+    agent = Agent(
+        role="Test Agent",
+        goal="To test the reasoning feature",
+        backstory="I am a test agent created to verify the reasoning feature works correctly.",
+        llm=llm,
+        reasoning=True
+    )
+    
+    task = Task(
+        description="Task that will cause an error",
+        expected_output="Output that will never be generated",
+        agent=agent
+    )
+    
+    call_count = [0]
+    
+    def mock_llm_call_error(*args, **kwargs):
+        call_count[0] += 1
+        if call_count[0] <= 2:  # First calls are for reasoning
+            raise Exception("LLM error during reasoning")
+        return "Fallback execution result"  # Return a value for task execution
+    
+    agent.llm.call = mock_llm_call_error
+    
+    result = agent.execute_task(task)
+    
+    assert result == "Fallback execution result"
+    assert call_count[0] > 2  # Ensure we called the mock multiple times
+
+
+def test_agent_with_function_calling():
+    """Test agent with reasoning using function calling."""
+    llm = LLM("gpt-3.5-turbo")
+    
+    agent = Agent(
+        role="Test Agent",
+        goal="To test the reasoning feature",
+        backstory="I am a test agent created to verify the reasoning feature works correctly.",
+        llm=llm,
+        reasoning=True,
+        verbose=True
+    )
+    
+    task = Task(
+        description="Simple math task: What's 2+2?",
+        expected_output="The answer should be a number.",
+        agent=agent
+    )
+    
+    agent.llm.supports_function_calling = lambda: True
+    
+    def mock_function_call(messages, *args, **kwargs):
+        if "tools" in kwargs:
+            return json.dumps({
+                "plan": "I'll solve this simple math problem: 2+2=4.",
+                "ready": True
+            })
+        else:
+            return "4"
+    
+    agent.llm.call = mock_function_call
+    
+    result = agent.execute_task(task)
+    
+    assert result == "4"
+    assert "Reasoning Plan:" in task.description
+    assert "I'll solve this simple math problem: 2+2=4." in task.description
+
+
+def test_agent_with_function_calling_fallback():
+    """Test agent with reasoning using function calling that falls back to text parsing."""
+    llm = LLM("gpt-3.5-turbo")
+    
+    agent = Agent(
+        role="Test Agent",
+        goal="To test the reasoning feature",
+        backstory="I am a test agent created to verify the reasoning feature works correctly.",
+        llm=llm,
+        reasoning=True,
+        verbose=True
+    )
+    
+    task = Task(
+        description="Simple math task: What's 2+2?",
+        expected_output="The answer should be a number.",
+        agent=agent
+    )
+    
+    agent.llm.supports_function_calling = lambda: True
+    
+    def mock_function_call(messages, *args, **kwargs):
+        if "tools" in kwargs:
+            return "Invalid JSON that will trigger fallback. READY: I am ready to execute the task."
+        else:
+            return "4"
+    
+    agent.llm.call = mock_function_call
+    
+    result = agent.execute_task(task)
+    
+    assert result == "4"
+    assert "Reasoning Plan:" in task.description
+    assert "Invalid JSON that will trigger fallback" in task.description