Agent State Step 1

2026-01-08 15:48:29 +00:00 · 2025-05-31 23:15:39 -07:00
parent e3cd7209ad
commit 7009a6b7a0
5 changed files with 579 additions and 54 deletions
--- a/src/crewai/agent.py
+++ b/src/crewai/agent.py
@@ -71,6 +71,7 @@ class Agent(BaseAgent):
    """

    _times_executed: int = PrivateAttr(default=0)
+    _last_reasoning_output: Optional[Any] = PrivateAttr(default=None)
    max_execution_time: Optional[int] = Field(
        default=None,
        description="Maximum execution time for an agent to execute a task",
@@ -388,6 +389,9 @@ class Agent(BaseAgent):

                reasoning_output: AgentReasoningOutput = reasoning_handler.handle_agent_reasoning()

+                # Store the reasoning output for the executor to use
+                self._last_reasoning_output = reasoning_output
+
                plan_text = reasoning_output.plan.plan

                internal_plan_msg = (
@@ -483,6 +487,10 @@ class Agent(BaseAgent):
            self,
            event=AgentExecutionCompletedEvent(agent=self, task=task, output=result),
        )
+
+        # Clean up reasoning output after task completion
+        self._last_reasoning_output = None
+
        return result

    def _execute_with_timeout(self, task_prompt: str, task: Task, timeout: int) -> str:
--- a/src/crewai/agents/agent_state.py
+++ b/src/crewai/agents/agent_state.py
@@ -0,0 +1,200 @@
+"""Agent state management for long-running tasks."""
+
+from typing import Any, Dict, List, Optional
+from pydantic import BaseModel, Field
+from datetime import datetime
+
+
+class ToolUsage(BaseModel):
+    """Record of a single tool usage."""
+    tool_name: str = Field(description="Name of the tool used")
+    arguments: Dict[str, Any] = Field(description="Arguments passed to the tool (may be truncated)")
+    result_summary: Optional[str] = Field(default=None, description="Brief summary of the tool's result")
+    timestamp: datetime = Field(default_factory=datetime.now, description="When the tool was used")
+    step_number: int = Field(description="Which execution step this tool was used in")
+
+
+class AgentState(BaseModel):
+    """Persistent state object for agent task execution.
+
+    This state object helps agents maintain coherence during long-running tasks
+    by tracking plans, progress, and intermediate results without relying solely
+    on conversation history.
+    """
+
+    # Core fields
+    completed: bool = Field(
+        default=False,
+        description="Whether the current task is finished"
+    )
+
+    original_plan: List[str] = Field(
+        default_factory=list,
+        description="The initial plan from first reasoning pass. Never overwrite unless user requests complete replan"
+    )
+
+    last_plan: List[str] = Field(
+        default_factory=list,
+        description="The most recent plan (original or mid-execution update)"
+    )
+
+    acceptance_criteria: List[str] = Field(
+        default_factory=list,
+        description="Concrete goals to satisfy before marking completed=true"
+    )
+
+    scratchpad: Dict[str, Any] = Field(
+        default_factory=dict,
+        description="Agent-defined storage for intermediate results and metadata"
+    )
+
+    tool_usage_history: List[ToolUsage] = Field(
+        default_factory=list,
+        description="Detailed history of tool usage including arguments and results"
+    )
+
+    # Additional tracking fields
+    task_id: Optional[str] = Field(
+        default=None,
+        description="ID of the current task being executed"
+    )
+
+    created_at: datetime = Field(
+        default_factory=datetime.now,
+        description="When this state was created"
+    )
+
+    last_updated: datetime = Field(
+        default_factory=datetime.now,
+        description="When this state was last modified"
+    )
+
+    steps_completed: int = Field(
+        default=0,
+        description="Number of execution steps completed"
+    )
+
+    def update_last_plan(self, new_plan: List[str]) -> None:
+        """Update the last plan and timestamp."""
+        self.last_plan = new_plan
+        self.last_updated = datetime.now()
+
+    def set_original_plan(self, plan: List[str]) -> None:
+        """Set the original plan (only if not already set)."""
+        if not self.original_plan:
+            self.original_plan = plan
+            self.last_plan = plan
+            self.last_updated = datetime.now()
+
+    def add_to_scratchpad(self, key: str, value: Any) -> None:
+        """Add or update a value in the scratchpad."""
+        self.scratchpad[key] = value
+        self.last_updated = datetime.now()
+
+    def record_tool_usage(
+        self,
+        tool_name: str,
+        arguments: Dict[str, Any],
+        result_summary: Optional[str] = None,
+        max_arg_length: int = 200
+    ) -> None:
+        """Record a tool usage with truncated arguments.
+
+        Args:
+            tool_name: Name of the tool used
+            arguments: Arguments passed to the tool
+            result_summary: Optional brief summary of the result
+            max_arg_length: Maximum length for string arguments before truncation
+        """
+        # Truncate long string arguments to prevent state bloat
+        truncated_args = {}
+        for key, value in arguments.items():
+            if isinstance(value, str) and len(value) > max_arg_length:
+                truncated_args[key] = value[:max_arg_length] + "..."
+            elif isinstance(value, (list, dict)):
+                # For complex types, store a summary
+                truncated_args[key] = f"<{type(value).__name__} with {len(value)} items>"
+            else:
+                truncated_args[key] = value
+
+        tool_usage = ToolUsage(
+            tool_name=tool_name,
+            arguments=truncated_args,
+            result_summary=result_summary,
+            step_number=self.steps_completed
+        )
+
+        self.tool_usage_history.append(tool_usage)
+        self.last_updated = datetime.now()
+
+    def increment_steps(self) -> None:
+        """Increment the step counter."""
+        self.steps_completed += 1
+        self.last_updated = datetime.now()
+
+    def mark_completed(self) -> None:
+        """Mark the task as completed."""
+        self.completed = True
+        self.last_updated = datetime.now()
+
+    def reset(self, task_id: Optional[str] = None) -> None:
+        """Reset state for a new task."""
+        self.completed = False
+        self.original_plan = []
+        self.last_plan = []
+        self.acceptance_criteria = []
+        self.scratchpad = {}
+        self.tool_usage_history = []
+        self.task_id = task_id
+        self.created_at = datetime.now()
+        self.last_updated = datetime.now()
+        self.steps_completed = 0
+
+    def to_context_string(self) -> str:
+        """Generate a concise string representation for LLM context."""
+        context = f"Current State (Step {self.steps_completed}):\n"
+        context += f"- Task ID: {self.task_id}\n"
+        context += f"- Completed: {self.completed}\n"
+
+        if self.acceptance_criteria:
+            context += "- Acceptance Criteria:\n"
+            for criterion in self.acceptance_criteria:
+                context += f"  • {criterion}\n"
+
+        if self.last_plan:
+            context += "- Current Plan:\n"
+            for i, step in enumerate(self.last_plan, 1):
+                context += f"  {i}. {step}\n"
+
+        if self.tool_usage_history:
+            context += "- Recent Tool Usage:\n"
+            # Show last 5 tool uses
+            recent_tools = self.tool_usage_history[-5:]
+            for usage in recent_tools:
+                context += f"  • Step {usage.step_number}: {usage.tool_name}"
+                if usage.arguments:
+                    args_preview = ", ".join(f"{k}={v}" for k, v in list(usage.arguments.items())[:2])
+                    context += f"({args_preview})"
+                context += "\n"
+
+        if self.scratchpad:
+            context += "- Scratchpad:\n"
+            for key, value in self.scratchpad.items():
+                context += f"  • {key}: {value}\n"
+
+        return context
+
+    def get_tools_summary(self) -> Dict[str, Any]:
+        """Get a summary of tool usage statistics."""
+        if not self.tool_usage_history:
+            return {"total_tool_uses": 0, "unique_tools": 0, "tools_by_frequency": {}}
+
+        tool_counts = {}
+        for usage in self.tool_usage_history:
+            tool_counts[usage.tool_name] = tool_counts.get(usage.tool_name, 0) + 1
+
+        return {
+            "total_tool_uses": len(self.tool_usage_history),
+            "unique_tools": len(set(usage.tool_name for usage in self.tool_usage_history)),
+            "tools_by_frequency": dict(sorted(tool_counts.items(), key=lambda x: x[1], reverse=True))
+        }
--- a/src/crewai/agents/crew_agent_executor.py
+++ b/src/crewai/agents/crew_agent_executor.py
@@ -1,8 +1,8 @@
-from collections import deque
 from typing import Any, Callable, Dict, List, Optional, Union, cast

 from crewai.agents.agent_builder.base_agent import BaseAgent
 from crewai.agents.agent_builder.base_agent_executor_mixin import CrewAgentExecutorMixin
+from crewai.agents.agent_state import AgentState
 from crewai.agents.parser import (
    AgentAction,
    AgentFinish,
@@ -84,8 +84,8 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        self.tool_name_to_tool_map: Dict[str, Union[CrewStructuredTool, BaseTool]] = {
            tool.name: tool for tool in self.tools
        }
-        self.tools_used: deque[str] = deque(maxlen=100)  # Limit history size
        self.steps_since_reasoning = 0
+        self.agent_state: AgentState = AgentState(task_id=str(task.id) if task else None)
        existing_stop = self.llm.stop or []
        self.llm.stop = list(
            set(
@@ -96,6 +96,9 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        )

    def invoke(self, inputs: Dict[str, str]) -> Dict[str, Any]:
+        # Reset agent state for new task execution
+        self.agent_state.reset(task_id=str(self.task.id) if self.task else None)
+
        if "system" in self.prompt:
            system_prompt = self._format_prompt(self.prompt.get("system", ""), inputs)
            user_prompt = self._format_prompt(self.prompt.get("user", ""), inputs)
@@ -110,6 +113,10 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        self.ask_for_human_input = bool(inputs.get("ask_for_human_input", False))

        try:
+            # Populate agent state from reasoning output if available
+            if hasattr(self.agent, "reasoning") and self.agent.reasoning:
+                self._populate_state_from_reasoning()
+
            formatted_answer = self._invoke_loop()
        except AssertionError:
            self._printer.print(
@@ -128,11 +135,52 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        if self.ask_for_human_input:
            formatted_answer = self._handle_human_feedback(formatted_answer)

+        # Mark task as completed in agent state
+        self.agent_state.mark_completed()
+
        self._create_short_term_memory(formatted_answer)
        self._create_long_term_memory(formatted_answer)
        self._create_external_memory(formatted_answer)
        return {"output": formatted_answer.output}

+    def _populate_state_from_reasoning(self) -> None:
+        """Populate agent state from the reasoning output if available."""
+        try:
+            # Check if the agent has reasoning output from the initial reasoning
+            if hasattr(self.agent, '_last_reasoning_output') and self.agent._last_reasoning_output:
+                reasoning_output = self.agent._last_reasoning_output
+
+                # Extract structured plan if available
+                if reasoning_output.plan.structured_plan:
+                    self.agent_state.set_original_plan(reasoning_output.plan.structured_plan.steps)
+                    self.agent_state.acceptance_criteria = reasoning_output.plan.structured_plan.acceptance_criteria
+                elif reasoning_output.plan.plan:
+                    # Fallback: try to extract steps from unstructured plan
+                    plan_lines = [line.strip() for line in reasoning_output.plan.plan.split('\n') if line.strip()]
+                    # Take meaningful lines that look like steps (skip headers, empty lines, etc.)
+                    steps = []
+                    for line in plan_lines:
+                        if line and not line.startswith('###') and not line.startswith('**'):
+                            steps.append(line)
+                        if len(steps) >= 10:  # Limit to 10 steps
+                            break
+                    if steps:
+                        self.agent_state.set_original_plan(steps)
+
+                # Add state context to messages for coherence
+                if self.agent_state.original_plan:
+                    state_context = f"Initial plan loaded with {len(self.agent_state.original_plan)} steps."
+                    self._append_message(state_context, role="assistant")
+
+                # Clear the reasoning output to avoid using it again
+                self.agent._last_reasoning_output = None
+
+        except Exception as e:
+            self._printer.print(
+                content=f"Error populating state from reasoning: {str(e)}",
+                color="yellow",
+            )
+
    def _invoke_loop(self) -> AgentFinish:
        """
        Main loop to invoke the agent's thought process until it reaches a conclusion
@@ -191,6 +239,37 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
                        formatted_answer, tool_result
                    )

+                    # Record detailed tool usage in agent state
+                    if hasattr(formatted_answer, 'tool') and formatted_answer.tool:
+                        # Extract tool arguments from the agent action
+                        tool_args = {}
+                        if hasattr(formatted_answer, 'tool_input') and formatted_answer.tool_input:
+                            if isinstance(formatted_answer.tool_input, dict):
+                                tool_args = formatted_answer.tool_input
+                            elif isinstance(formatted_answer.tool_input, str):
+                                # Try to parse JSON if it's a string
+                                try:
+                                    import json
+                                    tool_args = json.loads(formatted_answer.tool_input)
+                                except (json.JSONDecodeError, TypeError):
+                                    tool_args = {"input": formatted_answer.tool_input}
+
+                        # Truncate result for summary
+                        result_summary = None
+                        if tool_result and hasattr(tool_result, 'result'):
+                            result_str = str(tool_result.result)
+                            result_summary = result_str[:200] + "..." if len(result_str) > 200 else result_str
+
+                        # Record the tool usage with arguments
+                        self.agent_state.record_tool_usage(
+                            tool_name=formatted_answer.tool,
+                            arguments=tool_args,
+                            result_summary=result_summary
+                        )
+
+                # Increment steps in agent state
+                self.agent_state.increment_steps()
+
                if self._should_trigger_reasoning():
                    self._handle_mid_execution_reasoning()
                else:
@@ -242,10 +321,6 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
        self, formatted_answer: AgentAction, tool_result: ToolResult
    ) -> Union[AgentAction, AgentFinish]:
        """Handle the AgentAction, execute tools, and process the results."""
-        if hasattr(formatted_answer, 'tool') and formatted_answer.tool:
-            if formatted_answer.tool not in self.tools_used:
-                self.tools_used.append(formatted_answer.tool)
-
        # Special case for add_image_tool
        add_image_tool = self._i18n.tools("add_image")
        if (
@@ -485,12 +560,30 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):

            current_progress = self._summarize_current_progress()

+            # Build detailed tools used list from agent state
+            tools_used_detailed = []
+            for usage in self.agent_state.tool_usage_history:
+                tool_desc = f"{usage.tool_name}"
+                if usage.arguments:
+                    args_preview = ", ".join(f"{k}={v}" for k, v in list(usage.arguments.items())[:2])
+                    tool_desc += f"({args_preview})"
+                tools_used_detailed.append(tool_desc)
+
+            # Get tool usage statistics and patterns
+            tool_stats = self.agent_state.get_tools_summary()
+
+            # Detect patterns in tool usage
+            tool_patterns = self._detect_tool_patterns()
+            if tool_patterns:
+                tool_stats['recent_patterns'] = tool_patterns
+
            reasoning_handler = AgentReasoning(task=self.task, agent=cast(Agent, self.agent))

            return reasoning_handler.should_adaptive_reason_llm(
                current_steps=self.iterations,
-                tools_used=list(self.tools_used),
+                tools_used=tools_used_detailed,
                current_progress=current_progress,
+                tool_usage_stats=tool_stats
            )
        except Exception as e:
            self._printer.print(
@@ -499,16 +592,47 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
            )
            return False

-    def _has_recent_errors(self) -> bool:
-        """Check for error indicators in recent messages."""
-        error_indicators = ["error", "exception", "failed", "unable to", "couldn't"]
-        recent_messages = self.messages[-3:] if len(self.messages) >= 3 else self.messages
+    def _detect_tool_patterns(self) -> Optional[str]:
+        """
+        Detect patterns in recent tool usage that might indicate issues.

-        for message in recent_messages:
-            content = message.get("content", "").lower()
-            if any(indicator in content for indicator in error_indicators):
-                return True
-        return False
+        Returns:
+            Optional[str]: Description of detected patterns, or None
+        """
+        if not self.agent_state.tool_usage_history:
+            return None
+
+        patterns = []
+
+        # Check for repeated use of the same tool with similar arguments
+        recent_tools = self.agent_state.tool_usage_history[-5:] if len(self.agent_state.tool_usage_history) >= 5 else self.agent_state.tool_usage_history
+
+        # Count consecutive uses of the same tool
+        if len(recent_tools) >= 2:
+            consecutive_count = 1
+            for i in range(1, len(recent_tools)):
+                if recent_tools[i].tool_name == recent_tools[i-1].tool_name:
+                    consecutive_count += 1
+                    if consecutive_count >= 3:
+                        patterns.append(f"Same tool ({recent_tools[i].tool_name}) used {consecutive_count} times consecutively")
+                else:
+                    consecutive_count = 1
+
+        # Check for tools with empty or error results
+        error_count = 0
+        for usage in recent_tools:
+            if usage.result_summary and any(keyword in usage.result_summary.lower()
+                                           for keyword in ['error', 'failed', 'not found', 'empty']):
+                error_count += 1
+
+        if error_count >= 2:
+            patterns.append(f"{error_count} tools returned errors or empty results recently")
+
+        # Check for rapid tool switching (might indicate confusion)
+        if len(set(usage.tool_name for usage in recent_tools)) == len(recent_tools) and len(recent_tools) >= 4:
+            patterns.append("Rapid switching between different tools without repetition")
+
+        return "; ".join(patterns) if patterns else None

    def _handle_mid_execution_reasoning(self) -> None:
        """
@@ -522,21 +646,51 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):

            current_progress = self._summarize_current_progress()

+            # Include agent state in progress summary
+            state_info = f"\n\n{self.agent_state.to_context_string()}"
+            current_progress += state_info
+
            from crewai.agent import Agent

            reasoning_handler = AgentReasoning(task=self.task, agent=cast(Agent, self.agent))

+            # Build detailed tools used list from agent state
+            tools_used_detailed = []
+            for usage in self.agent_state.tool_usage_history:
+                tool_desc = f"{usage.tool_name}"
+                if usage.arguments:
+                    args_preview = ", ".join(f"{k}={v}" for k, v in list(usage.arguments.items())[:2])
+                    tool_desc += f"({args_preview})"
+                tools_used_detailed.append(tool_desc)
+
            reasoning_output = reasoning_handler.handle_mid_execution_reasoning(
                current_steps=self.iterations,
-                tools_used=list(self.tools_used),
+                tools_used=tools_used_detailed,
                current_progress=current_progress,
                iteration_messages=self.messages
            )

+            # Update agent state with new plan if available
+            if reasoning_output.plan.structured_plan:
+                self.agent_state.update_last_plan(reasoning_output.plan.structured_plan.steps)
+                # Update acceptance criteria if they changed
+                if reasoning_output.plan.structured_plan.acceptance_criteria:
+                    self.agent_state.acceptance_criteria = reasoning_output.plan.structured_plan.acceptance_criteria
+
+            # Add a note about the reasoning update to scratchpad
+            self.agent_state.add_to_scratchpad(
+                f"reasoning_update_{self.iterations}",
+                {
+                    "reason": "Mid-execution reasoning triggered",
+                    "updated_plan": bool(reasoning_output.plan.structured_plan)
+                }
+            )
+
            updated_plan_msg = (
                self._i18n.retrieve("reasoning", "mid_execution_reasoning_update").format(
                    plan=reasoning_output.plan.plan
                ) +
+                f"\n\nUpdated State:\n{self.agent_state.to_context_string()}" +
                "\n\nRemember: strictly follow the updated plan above and ensure the final answer fully meets the EXPECTED OUTPUT criteria."
            )

@@ -561,9 +715,25 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):

        summary = f"After {self.iterations} steps, "

-        if self.tools_used:
-            unique_tools = set(self.tools_used)
-            summary += f"I've used {len(self.tools_used)} tools ({', '.join(unique_tools)}). "
+        # Use tool usage history from agent state for better context
+        if self.agent_state.tool_usage_history:
+            tool_summary = self.agent_state.get_tools_summary()
+            summary += f"I've used {tool_summary['total_tool_uses']} tools ({tool_summary['unique_tools']} unique). "
+
+            # Include most frequently used tools
+            if tool_summary['tools_by_frequency']:
+                top_tools = list(tool_summary['tools_by_frequency'].items())[:3]
+                tools_str = ", ".join(f"{tool} ({count}x)" for tool, count in top_tools)
+                summary += f"Most used: {tools_str}. "
+
+            # Include details of the last tool use
+            if self.agent_state.tool_usage_history:
+                last_tool = self.agent_state.tool_usage_history[-1]
+                summary += f"Last tool: {last_tool.tool_name}"
+                if last_tool.arguments:
+                    args_str = ", ".join(f"{k}={v}" for k, v in list(last_tool.arguments.items())[:2])
+                    summary += f" with args ({args_str})"
+                summary += ". "
        else:
            summary += "I haven't used any tools yet. "

@@ -574,3 +744,14 @@ class CrewAgentExecutor(CrewAgentExecutorMixin):
            summary += f"Most recent action: {last_message}"

        return summary
+
+    def _has_recent_errors(self) -> bool:
+        """Check for error indicators in recent messages."""
+        error_indicators = ["error", "exception", "failed", "unable to", "couldn't"]
+        recent_messages = self.messages[-3:] if len(self.messages) >= 3 else self.messages
+
+        for message in recent_messages:
+            content = message.get("content", "").lower()
+            if any(indicator in content for indicator in error_indicators):
+                return True
+        return False
--- a/src/crewai/translations/en.json
+++ b/src/crewai/translations/en.json
@@ -55,12 +55,12 @@
  "reasoning": {
    "initial_plan": "You are {role}, a professional with the following background: {backstory}\n\nYour primary goal is: {goal}\n\nAs {role}, you are creating a strategic plan for a task that requires your expertise and unique perspective.",
    "refine_plan": "You are {role}, a professional with the following background: {backstory}\n\nYour primary goal is: {goal}\n\nAs {role}, you are refining a strategic plan for a task that requires your expertise and unique perspective.",
-    "create_plan_prompt": "You are {role} with this background: {backstory}\n\nYour primary goal is: {goal}\n\nYou have been assigned the following task:\n{description}\n\nExpected output:\n{expected_output}\n\nAvailable tools: {tools}\n\nBefore executing this task, create a detailed plan that leverages your expertise as {role} and outlines:\n1. Your understanding of the task from your professional perspective\n2. The key steps you'll take to complete it, drawing on your background and skills\n3. How you'll approach any challenges that might arise, considering your expertise\n4. How you'll strategically use the available tools based on your experience, exactly what tools to use and how to use them\n5. The expected outcome and how it aligns with your goal\n\nRemember: your ultimate objective is to produce the most COMPLETE Final Answer that fully meets the **Expected output** criteria.\n\nAfter creating your plan, assess whether you feel ready to execute the task or if you could do better.\nConclude with one of these statements:\n- \"READY: I am ready to execute the task.\"\n- \"NOT READY: I need to refine my plan because [specific reason].\"",
-    "refine_plan_prompt": "You are {role} with this background: {backstory}\n\nYour primary goal is: {goal}\n\nYou created the following plan for this task:\n{current_plan}\n\nHowever, you indicated that you're not ready to execute the task yet.\n\nPlease refine your plan further, drawing on your expertise as {role} to address any gaps or uncertainties. As you refine your plan, be specific about which available tools you will use, how you will use them, and why they are the best choices for each step. Clearly outline your tool usage strategy as part of your improved plan.\n\nMake sure your refined strategy directly guides you toward producing the most COMPLETE Final Answer that fully satisfies the **Expected output**.\n\nAfter refining your plan, assess whether you feel ready to execute the task.\nConclude with one of these statements:\n- \"READY: I am ready to execute the task.\"\n- \"NOT READY: I need to refine my plan further because [specific reason].\"",
+    "create_plan_prompt": "You are {role} with this background: {backstory}\n\nYour primary goal is: {goal}\n\nYou have been assigned the following task:\n{description}\n\nExpected output:\n{expected_output}\n\nAvailable tools: {tools}\n\nBefore executing this task, create a detailed plan that leverages your expertise as {role} and outlines:\n1. Your understanding of the task from your professional perspective\n2. The key steps you'll take to complete it, drawing on your background and skills\n3. How you'll approach any challenges that might arise, considering your expertise\n4. How you'll strategically use the available tools based on your experience, exactly what tools to use and how to use them\n5. The expected outcome and how it aligns with your goal\n\nIMPORTANT: Structure your plan as follows:\n\nSTEPS:\n1. [First concrete action step]\n2. [Second concrete action step]\n3. [Continue with numbered steps...]\n\nACCEPTANCE CRITERIA:\n- [First criterion that must be met]\n- [Second criterion that must be met]\n- [Continue with criteria...]\n\nRemember: your ultimate objective is to produce the most COMPLETE Final Answer that fully meets the **Expected output** criteria.\n\nAfter creating your plan, assess whether you feel ready to execute the task or if you could do better.\nConclude with one of these statements:\n- \"READY: I am ready to execute the task.\"\n- \"NOT READY: I need to refine my plan because [specific reason].\"",
+    "refine_plan_prompt": "You are {role} with this background: {backstory}\n\nYour primary goal is: {goal}\n\nYou created the following plan for this task:\n{current_plan}\n\nHowever, you indicated that you're not ready to execute the task yet.\n\nPlease refine your plan further, drawing on your expertise as {role} to address any gaps or uncertainties. As you refine your plan, be specific about which available tools you will use, how you will use them, and why they are the best choices for each step. Clearly outline your tool usage strategy as part of your improved plan.\n\nIMPORTANT: Structure your refined plan as follows:\n\nSTEPS:\n1. [First concrete action step]\n2. [Second concrete action step]\n3. [Continue with numbered steps...]\n\nACCEPTANCE CRITERIA:\n- [First criterion that must be met]\n- [Second criterion that must be met]\n- [Continue with criteria...]\n\nMake sure your refined strategy directly guides you toward producing the most COMPLETE Final Answer that fully satisfies the **Expected output**.\n\nAfter refining your plan, assess whether you feel ready to execute the task.\nConclude with one of these statements:\n- \"READY: I am ready to execute the task.\"\n- \"NOT READY: I need to refine my plan further because [specific reason].\"",
    "adaptive_reasoning_decision": "You are {role}, a professional with the following background: {backstory}\n\nYour primary goal is: {goal}\n\nAs {role}, you are currently executing a task and need to decide whether to pause and reassess your plan based on the current context.",
-    "mid_execution_reasoning": "You are currently executing a task and need to reassess your plan based on progress so far.\n\nTASK DESCRIPTION:\n{description}\n\nEXPECTED OUTPUT:\n{expected_output}\n\nCURRENT PROGRESS:\nSteps completed: {current_steps}\nTools used: {tools_used}\nProgress summary: {current_progress}\n\nRECENT CONVERSATION:\n{recent_messages}\n\nYour reassessment MUST focus on steering the remaining work toward a FINAL ANSWER that is as complete as possible and perfectly matches the **Expected output**.\n\nBased on the current progress and context, please reassess your plan for completing this task.\nConsider what has been accomplished, what challenges you've encountered, and what steps remain.\nAdjust your strategy if needed or confirm your current approach is still optimal.\n\nProvide a detailed updated plan for completing the task.\nEnd with \"READY: I am ready to continue executing the task.\" if you're confident in your plan.",
+    "mid_execution_reasoning": "You are currently executing a task and need to reassess your plan based on progress so far.\n\nTASK DESCRIPTION:\n{description}\n\nEXPECTED OUTPUT:\n{expected_output}\n\nCURRENT PROGRESS:\nSteps completed: {current_steps}\nTools used: {tools_used}\nProgress summary: {current_progress}\n\nRECENT CONVERSATION:\n{recent_messages}\n\nYour reassessment MUST focus on steering the remaining work toward a FINAL ANSWER that is as complete as possible and perfectly matches the **Expected output**.\n\nBased on the current progress and context, please reassess your plan for completing this task.\nConsider what has been accomplished, what challenges you've encountered, and what steps remain.\nAdjust your strategy if needed or confirm your current approach is still optimal.\n\nIMPORTANT: Structure your updated plan as follows:\n\nREMAINING STEPS:\n1. [First remaining action step]\n2. [Second remaining action step]\n3. [Continue with numbered steps...]\n\nUPDATED ACCEPTANCE CRITERIA (if changed):\n- [First criterion that must be met]\n- [Second criterion that must be met]\n- [Continue with criteria...]\n\nProvide a detailed updated plan for completing the task.\nEnd with \"READY: I am ready to continue executing the task.\" if you're confident in your plan.",
    "mid_execution_plan": "You are {role}, a professional with the following background: {backstory}\n\nYour primary goal is: {goal}\n\nAs {role}, you are reassessing your plan during task execution based on the progress made so far.",
    "mid_execution_reasoning_update": "I've reassessed my approach based on progress so far. Updated plan:\n\n{plan}",
-    "adaptive_reasoning_context": "\n\nTASK DESCRIPTION:\n{description}\n\nEXPECTED OUTPUT:\n{expected_output}\n\nCURRENT EXECUTION CONTEXT:\n- Steps completed: {current_steps}\n- Tools used: {tools_used}\n- Progress summary: {current_progress}\n\nConsider whether the current approach is optimal or if a strategic pause to reassess would be beneficial. You should reason when:\n- You might be approaching the task inefficiently\n- The context suggests a different strategy might be better\n- You're uncertain about the next steps\n- The progress suggests you need to reconsider your approach\n\nDecide whether reasoning/re-planning is needed at this point."
+    "adaptive_reasoning_context": "\n\nTASK DESCRIPTION:\n{description}\n\nEXPECTED OUTPUT:\n{expected_output}\n\nCURRENT EXECUTION CONTEXT:\n- Steps completed: {current_steps}\n- Tools used: {tools_used}\n- Progress summary: {current_progress}\n\nConsider whether the current approach is optimal or if a strategic pause to reassess would be beneficial. You should reason when:\n- You might be approaching the task inefficiently\n- The context suggests a different strategy might be better\n- You're uncertain about the next steps\n- The progress suggests you need to reconsider your approach\n- Tool usage patterns indicate issues (e.g., repeated failures, same tool used many times, rapid switching)\n- Multiple tools have returned errors or empty results\n- You're using the same tool repeatedly without making progress\n\nPay special attention to the TOOL USAGE STATISTICS section if present, as it reveals patterns that might not be obvious from the tool list alone.\n\nDecide whether reasoning/re-planning is needed at this point."
  }
 }
--- a/src/crewai/utilities/reasoning_handler.py
+++ b/src/crewai/utilities/reasoning_handler.py
@@ -1,6 +1,6 @@
 import logging
 import json
-from typing import Tuple, cast
+from typing import Tuple, cast, List, Optional, Dict, Any

 from pydantic import BaseModel, Field

@@ -16,10 +16,17 @@ from crewai.utilities.events.reasoning_events import (
 )


+class StructuredPlan(BaseModel):
+    """Structured representation of a task plan."""
+    steps: List[str] = Field(description="List of steps to complete the task")
+    acceptance_criteria: List[str] = Field(description="Criteria that must be met before task is considered complete")
+
+
 class ReasoningPlan(BaseModel):
    """Model representing a reasoning plan for a task."""
    plan: str = Field(description="The detailed reasoning plan for the task.")
    ready: bool = Field(description="Whether the agent is ready to execute the task.")
+    structured_plan: Optional[StructuredPlan] = Field(default=None, description="Structured version of the plan")


 class AgentReasoningOutput(BaseModel):
@@ -31,6 +38,8 @@ class ReasoningFunction(BaseModel):
    """Model for function calling with reasoning."""
    plan: str = Field(description="The detailed reasoning plan for the task.")
    ready: bool = Field(description="Whether the agent is ready to execute the task.")
+    steps: Optional[List[str]] = Field(default=None, description="List of steps to complete the task")
+    acceptance_criteria: Optional[List[str]] = Field(default=None, description="Criteria that must be met before task is complete")


 class AgentReasoning:
@@ -119,25 +128,25 @@ class AgentReasoning:
        Returns:
            AgentReasoningOutput: The output of the agent reasoning process.
        """
-        plan, ready = self.__create_initial_plan()
+        plan, ready, structured_plan = self.__create_initial_plan()

-        plan, ready = self.__refine_plan_if_needed(plan, ready)
+        plan, ready, structured_plan = self.__refine_plan_if_needed(plan, ready, structured_plan)

-        reasoning_plan = ReasoningPlan(plan=plan, ready=ready)
+        reasoning_plan = ReasoningPlan(plan=plan, ready=ready, structured_plan=structured_plan)
        return AgentReasoningOutput(plan=reasoning_plan)

-    def __create_initial_plan(self) -> Tuple[str, bool]:
+    def __create_initial_plan(self) -> Tuple[str, bool, Optional[StructuredPlan]]:
        """
        Creates the initial reasoning plan for the task.

        Returns:
-            Tuple[str, bool]: The initial plan and whether the agent is ready to execute the task.
+            Tuple[str, bool, Optional[StructuredPlan]]: The initial plan, whether the agent is ready, and structured plan.
        """
        reasoning_prompt = self.__create_reasoning_prompt()

        if self.llm.supports_function_calling():
-            plan, ready = self.__call_with_function(reasoning_prompt, "initial_plan")
-            return plan, ready
+            plan, ready, structured_plan = self.__call_with_function(reasoning_prompt, "initial_plan")
+            return plan, ready, structured_plan
        else:
            system_prompt = self.i18n.retrieve("reasoning", "initial_plan").format(
                role=self.agent.role,
@@ -152,18 +161,21 @@ class AgentReasoning:
                ]
            )

-            return self.__parse_reasoning_response(str(response))
+            plan, ready = self.__parse_reasoning_response(str(response))
+            structured_plan = self.__extract_structured_plan(plan)
+            return plan, ready, structured_plan

-    def __refine_plan_if_needed(self, plan: str, ready: bool) -> Tuple[str, bool]:
+    def __refine_plan_if_needed(self, plan: str, ready: bool, structured_plan: Optional[StructuredPlan]) -> Tuple[str, bool, Optional[StructuredPlan]]:
        """
        Refines the reasoning plan if the agent is not ready to execute the task.

        Args:
            plan: The current reasoning plan.
            ready: Whether the agent is ready to execute the task.
+            structured_plan: The current structured plan.

        Returns:
-            Tuple[str, bool]: The refined plan and whether the agent is ready to execute the task.
+            Tuple[str, bool, Optional[StructuredPlan]]: The refined plan, ready status, and structured plan.
        """
        attempt = 1
        max_attempts = self.agent.max_reasoning_attempts
@@ -185,7 +197,7 @@ class AgentReasoning:
            refine_prompt = self.__create_refine_prompt(plan)

            if self.llm.supports_function_calling():
-                plan, ready = self.__call_with_function(refine_prompt, "refine_plan")
+                plan, ready, structured_plan = self.__call_with_function(refine_prompt, "refine_plan")
            else:
                system_prompt = self.i18n.retrieve("reasoning", "refine_plan").format(
                    role=self.agent.role,
@@ -200,6 +212,7 @@ class AgentReasoning:
                    ]
                )
                plan, ready = self.__parse_reasoning_response(str(response))
+                structured_plan = self.__extract_structured_plan(plan)

            attempt += 1

@@ -209,9 +222,9 @@ class AgentReasoning:
                )
                break

-        return plan, ready
+        return plan, ready, structured_plan

-    def __call_with_function(self, prompt: str, prompt_type: str) -> Tuple[str, bool]:
+    def __call_with_function(self, prompt: str, prompt_type: str) -> Tuple[str, bool, Optional[StructuredPlan]]:
        """
        Calls the LLM with function calling to get a reasoning plan.

@@ -220,7 +233,7 @@ class AgentReasoning:
            prompt_type: The type of prompt (initial_plan or refine_plan).

        Returns:
-            Tuple[str, bool]: A tuple containing the plan and whether the agent is ready.
+            Tuple[str, bool, Optional[StructuredPlan]]: A tuple containing the plan, ready status, and structured plan.
        """
        self.logger.debug(f"Using function calling for {prompt_type} reasoning")

@@ -239,6 +252,16 @@ class AgentReasoning:
                        "ready": {
                            "type": "boolean",
                            "description": "Whether the agent is ready to execute the task."
+                        },
+                        "steps": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "List of steps to complete the task"
+                        },
+                        "acceptance_criteria": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "Criteria that must be met before task is considered complete"
                        }
                    },
                    "required": ["plan", "ready"]
@@ -254,9 +277,14 @@ class AgentReasoning:
            )

            # Prepare a simple callable that just returns the tool arguments as JSON
-            def _create_reasoning_plan(plan: str, ready: bool):  # noqa: N802
+            def _create_reasoning_plan(plan: str, ready: bool, steps: Optional[List[str]] = None, acceptance_criteria: Optional[List[str]] = None):  # noqa: N802
                """Return the reasoning plan result in JSON string form."""
-                return json.dumps({"plan": plan, "ready": ready})
+                return json.dumps({
+                    "plan": plan,
+                    "ready": ready,
+                    "steps": steps,
+                    "acceptance_criteria": acceptance_criteria
+                })

            response = self.llm.call(
                [
@@ -272,12 +300,19 @@ class AgentReasoning:
            try:
                result = json.loads(response)
                if "plan" in result and "ready" in result:
-                    return result["plan"], result["ready"]
+                    structured_plan = None
+                    if result.get("steps") or result.get("acceptance_criteria"):
+                        structured_plan = StructuredPlan(
+                            steps=result.get("steps", []),
+                            acceptance_criteria=result.get("acceptance_criteria", [])
+                        )
+                    return result["plan"], result["ready"], structured_plan
            except (json.JSONDecodeError, KeyError):
                pass

            response_str = str(response)
-            return response_str, "READY: I am ready to execute the task." in response_str
+            structured_plan = self.__extract_structured_plan(response_str)
+            return response_str, "READY: I am ready to execute the task." in response_str, structured_plan

        except Exception as e:
            self.logger.warning(f"Error during function calling: {str(e)}. Falling back to text parsing.")
@@ -297,10 +332,11 @@ class AgentReasoning:
                )

                fallback_str = str(fallback_response)
-                return fallback_str, "READY: I am ready to execute the task." in fallback_str
+                structured_plan = self.__extract_structured_plan(fallback_str)
+                return fallback_str, "READY: I am ready to execute the task." in fallback_str, structured_plan
            except Exception as inner_e:
                self.logger.error(f"Error during fallback text parsing: {str(inner_e)}")
-                return "Failed to generate a plan due to an error.", True  # Default to ready to avoid getting stuck
+                return "Failed to generate a plan due to an error.", True, None  # Default to ready to avoid getting stuck

    def __get_agent_backstory(self) -> str:
        """
@@ -496,7 +532,7 @@ class AgentReasoning:
        )

        if self.llm.supports_function_calling():
-            plan, ready = self.__call_with_function(mid_execution_prompt, "mid_execution_plan")
+            plan, ready, structured_plan = self.__call_with_function(mid_execution_prompt, "mid_execution_plan")
        else:
            # Use the same prompt for system context
            system_prompt = self.i18n.retrieve("reasoning", "mid_execution_plan").format(
@@ -513,8 +549,9 @@ class AgentReasoning:
            )

            plan, ready = self.__parse_reasoning_response(str(response))
+            structured_plan = self.__extract_structured_plan(plan)

-        reasoning_plan = ReasoningPlan(plan=plan, ready=ready)
+        reasoning_plan = ReasoningPlan(plan=plan, ready=ready, structured_plan=structured_plan)
        return AgentReasoningOutput(plan=reasoning_plan)

    def __create_mid_execution_prompt(
@@ -560,7 +597,8 @@ class AgentReasoning:
        self,
        current_steps: int,
        tools_used: list,
-        current_progress: str
+        current_progress: str,
+        tool_usage_stats: Optional[Dict[str, Any]] = None
    ) -> bool:
        """
        Use LLM function calling to determine if adaptive reasoning should be triggered.
@@ -569,13 +607,14 @@ class AgentReasoning:
            current_steps: Number of steps executed so far
            tools_used: List of tools that have been used
            current_progress: Summary of progress made so far
+            tool_usage_stats: Optional statistics about tool usage patterns

        Returns:
            bool: True if reasoning should be triggered, False otherwise.
        """
        try:
            decision_prompt = self.__create_adaptive_reasoning_decision_prompt(
-                current_steps, tools_used, current_progress
+                current_steps, tools_used, current_progress, tool_usage_stats
            )

            if self.llm.supports_function_calling():
@@ -618,6 +657,11 @@ class AgentReasoning:
                        "reasoning": {
                            "type": "string",
                            "description": "Brief explanation of why reasoning is or isn't needed."
+                        },
+                        "detected_issues": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "description": "List of specific issues detected (e.g., 'repeated tool failures', 'no progress', 'inefficient approach')"
                        }
                    },
                    "required": ["should_reason", "reasoning"]
@@ -625,9 +669,18 @@ class AgentReasoning:
            }
        }

-        def _decide_reasoning_need(should_reason: bool, reasoning: str):
+        def _decide_reasoning_need(should_reason: bool, reasoning: str, detected_issues: Optional[List[str]] = None):
            """Return the reasoning decision result in JSON string form."""
-            return json.dumps({"should_reason": should_reason, "reasoning": reasoning})
+            result = {
+                "should_reason": should_reason,
+                "reasoning": reasoning
+            }
+            if detected_issues:
+                result["detected_issues"] = detected_issues
+                # Append detected issues to reasoning explanation
+                issues_str = ", ".join(detected_issues)
+                result["reasoning"] = f"{reasoning} Detected issues: {issues_str}"
+            return json.dumps(result)

        system_prompt = self.i18n.retrieve("reasoning", "adaptive_reasoning_decision").format(
            role=self.agent.role,
@@ -646,7 +699,11 @@ class AgentReasoning:

        try:
            result = json.loads(response)
-            return result.get("should_reason", False), result.get("reasoning", "No explanation provided")
+            reasoning_text = result.get("reasoning", "No explanation provided")
+            if result.get("detected_issues"):
+                # Include detected issues in the reasoning text for logging
+                self.logger.debug(f"Adaptive reasoning detected issues: {result['detected_issues']}")
+            return result.get("should_reason", False), reasoning_text
        except (json.JSONDecodeError, KeyError):
            return False, "No explanation provided"

@@ -669,11 +726,27 @@ class AgentReasoning:
        self,
        current_steps: int,
        tools_used: list,
-        current_progress: str
+        current_progress: str,
+        tool_usage_stats: Optional[Dict[str, Any]] = None
    ) -> str:
        """Create prompt for adaptive reasoning decision."""
        tools_used_str = ", ".join(tools_used) if tools_used else "No tools used yet"

+        # Add tool usage statistics to the prompt
+        tool_stats_str = ""
+        if tool_usage_stats:
+            tool_stats_str = f"\n\nTOOL USAGE STATISTICS:\n"
+            tool_stats_str += f"- Total tool invocations: {tool_usage_stats.get('total_tool_uses', 0)}\n"
+            tool_stats_str += f"- Unique tools used: {tool_usage_stats.get('unique_tools', 0)}\n"
+
+            if tool_usage_stats.get('tools_by_frequency'):
+                tool_stats_str += "- Tool frequency:\n"
+                for tool, count in tool_usage_stats['tools_by_frequency'].items():
+                    tool_stats_str += f"  • {tool}: {count} times\n"
+
+            if tool_usage_stats.get('recent_patterns'):
+                tool_stats_str += f"- Recent patterns: {tool_usage_stats['recent_patterns']}\n"
+
        # Use the prompt from i18n and format it with the current context
        base_prompt = self.i18n.retrieve("reasoning", "adaptive_reasoning_decision").format(
            role=self.agent.role,
@@ -686,9 +759,72 @@ class AgentReasoning:
            expected_output=self.task.expected_output,
            current_steps=current_steps,
            tools_used=tools_used_str,
-            current_progress=current_progress
+            current_progress=current_progress + tool_stats_str
        )

        prompt = base_prompt + context_prompt

        return prompt
+
+    def __extract_structured_plan(self, plan: str) -> Optional[StructuredPlan]:
+        """
+        Extracts a structured plan from the given plan text.
+
+        Args:
+            plan: The plan text.
+
+        Returns:
+            Optional[StructuredPlan]: The extracted structured plan or None if no plan was found.
+        """
+        if not plan:
+            return None
+
+        import re
+
+        steps = []
+        acceptance_criteria = []
+
+        # Look for numbered steps (1., 2., etc.)
+        step_pattern = r'^\s*(?:\d+\.|\-|\*)\s*(.+)$'
+
+        # Look for acceptance criteria section
+        in_acceptance_section = False
+
+        lines = plan.split('\n')
+        for line in lines:
+            line = line.strip()
+
+            # Check if we're entering acceptance criteria section
+            if any(marker in line.lower() for marker in ['acceptance criteria', 'success criteria', 'completion criteria']):
+                in_acceptance_section = True
+                continue
+
+            # Skip empty lines
+            if not line:
+                continue
+
+            # Extract steps or criteria
+            match = re.match(step_pattern, line, re.MULTILINE)
+            if match:
+                content = match.group(1).strip()
+                if in_acceptance_section:
+                    acceptance_criteria.append(content)
+                else:
+                    steps.append(content)
+            elif line and not line.endswith(':'):  # Non-empty line that's not a header
+                if in_acceptance_section:
+                    acceptance_criteria.append(line)
+                else:
+                    # Check if it looks like a step (starts with action verb)
+                    action_verbs = ['create', 'implement', 'design', 'build', 'test', 'verify', 'check', 'ensure', 'analyze', 'review']
+                    if any(line.lower().startswith(verb) for verb in action_verbs):
+                        steps.append(line)
+
+        # If we found steps or criteria, return structured plan
+        if steps or acceptance_criteria:
+            return StructuredPlan(
+                steps=steps,
+                acceptance_criteria=acceptance_criteria
+            )
+
+        return None