diff --git a/lib/crewai/src/crewai/agents/planner_observer.py b/lib/crewai/src/crewai/agents/planner_observer.py
new file mode 100644
index 000000000..492c374c4
--- /dev/null
+++ b/lib/crewai/src/crewai/agents/planner_observer.py
@@ -0,0 +1,355 @@
+"""PlannerObserver: Observation phase after each step execution.
+
+Implements the "Observe" phase from PLAN-AND-ACT (Section 3.3). After every
+step execution, the Planner analyzes what happened, what new information was
+learned, and whether the remaining plan is still valid.
+
+This is NOT an error detector — it runs on every step, including successes,
+to incorporate runtime observations into the remaining plan.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any
+
+from crewai.events.event_bus import crewai_event_bus
+from crewai.events.types.observation_events import (
+    StepObservationCompletedEvent,
+    StepObservationFailedEvent,
+    StepObservationStartedEvent,
+)
+from crewai.utilities.llm_utils import create_llm
+from crewai.utilities.planning_types import StepObservation, TodoItem
+from crewai.utilities.types import LLMMessage
+
+
+if TYPE_CHECKING:
+    from crewai.agent import Agent
+    from crewai.task import Task
+
+logger = logging.getLogger(__name__)
+
+
+class PlannerObserver:
+    """Observes step execution results and decides on plan continuation.
+
+    After EVERY step execution, this class:
+    1. Analyzes what the step accomplished
+    2. Identifies new information learned
+    3. Decides if the remaining plan is still valid
+    4. Suggests lightweight refinements or triggers full replanning
+
+    LLM resolution (magical fallback):
+    - If ``agent.planning_config.llm`` is explicitly set → use that
+    - Otherwise → fall back to ``agent.llm`` (same LLM for everything)
+
+    Args:
+        agent: The agent instance (for LLM resolution and config).
+        task: Optional task context (for description and expected output).
+    """
+
+    def __init__(self, agent: Agent, task: Task | None = None) -> None:
+        self.agent = agent
+        self.task = task
+        self.llm = self._resolve_llm()
+
+    def _resolve_llm(self) -> Any:
+        """Resolve which LLM to use for observation/planning.
+
+        Mirrors AgentReasoning._resolve_llm(): uses planning_config.llm
+        if explicitly set, otherwise falls back to agent.llm.
+
+        Returns:
+            The resolved LLM instance.
+        """
+        from crewai.llm import LLM
+
+        config = getattr(self.agent, "planning_config", None)
+        if config is not None and config.llm is not None:
+            if isinstance(config.llm, LLM):
+                return config.llm
+            return create_llm(config.llm)
+        return self.agent.llm
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def observe(
+        self,
+        completed_step: TodoItem,
+        result: str,
+        all_completed: list[TodoItem],
+        remaining_todos: list[TodoItem],
+    ) -> StepObservation:
+        """Observe a step's result and decide on plan continuation.
+
+        This runs after EVERY step execution — not just failures.
+
+        Args:
+            completed_step: The todo item that was just executed.
+            result: The final result string from the step.
+            all_completed: All previously completed todos (for context).
+            remaining_todos: The pending todos still in the plan.
+
+        Returns:
+            StepObservation with the Planner's analysis.
+        """
+        agent_role = self.agent.role if self.agent else "unknown"
+
+        # Emit observation started event
+        crewai_event_bus.emit(
+            self.agent,
+            event=StepObservationStartedEvent(
+                agent_role=agent_role,
+                step_number=completed_step.step_number,
+                step_description=completed_step.description,
+                from_task=self.task,
+                from_agent=self.agent,
+            ),
+        )
+
+        messages = self._build_observation_messages(
+            completed_step, result, all_completed, remaining_todos
+        )
+
+        try:
+            response = self.llm.call(
+                messages,
+                response_model=StepObservation,
+                from_task=self.task,
+                from_agent=self.agent,
+            )
+
+            if isinstance(response, StepObservation):
+                observation = response
+            else:
+                # If the LLM returned raw text instead of structured output,
+                # parse it conservatively
+                observation = StepObservation(
+                    step_completed_successfully=True,
+                    key_information_learned=str(response) if response else "",
+                    remaining_plan_still_valid=True,
+                )
+
+            # Emit observation completed event
+            crewai_event_bus.emit(
+                self.agent,
+                event=StepObservationCompletedEvent(
+                    agent_role=agent_role,
+                    step_number=completed_step.step_number,
+                    step_description=completed_step.description,
+                    step_completed_successfully=observation.step_completed_successfully,
+                    key_information_learned=observation.key_information_learned,
+                    remaining_plan_still_valid=observation.remaining_plan_still_valid,
+                    needs_full_replan=observation.needs_full_replan,
+                    replan_reason=observation.replan_reason,
+                    goal_already_achieved=observation.goal_already_achieved,
+                    suggested_refinements=observation.suggested_refinements,
+                    from_task=self.task,
+                    from_agent=self.agent,
+                ),
+            )
+
+            return observation
+
+        except Exception as e:
+            logger.warning(f"Observation LLM call failed: {e}. Defaulting to continue.")
+
+            # Emit observation failed event
+            crewai_event_bus.emit(
+                self.agent,
+                event=StepObservationFailedEvent(
+                    agent_role=agent_role,
+                    step_number=completed_step.step_number,
+                    step_description=completed_step.description,
+                    error=str(e),
+                    from_task=self.task,
+                    from_agent=self.agent,
+                ),
+            )
+
+            return StepObservation(
+                step_completed_successfully=True,
+                key_information_learned="",
+                remaining_plan_still_valid=True,
+            )
+
+    def refine_todos(
+        self,
+        observation: StepObservation,
+        remaining_todos: list[TodoItem],
+    ) -> list[TodoItem]:
+        """Refine pending todo descriptions based on observation.
+
+        This is a LIGHTWEIGHT operation — no full replan. It updates the
+        description field of pending todos based on new information learned.
+
+        Example: Step 1 found "3 products: A, B, C" → Step 2 changes from
+        "Select the best product" to "Select product B (highest rated)"
+
+        Args:
+            observation: The observation with suggested refinements.
+            remaining_todos: The pending todos to refine.
+
+        Returns:
+            The refined todo list (same objects, updated descriptions).
+        """
+        if not observation.suggested_refinements:
+            return remaining_todos
+
+        # Ask the LLM to apply the refinements to the todo descriptions
+        messages = self._build_refinement_messages(observation, remaining_todos)
+
+        try:
+            response = self.llm.call(
+                messages,
+                from_task=self.task,
+                from_agent=self.agent,
+            )
+
+            if response:
+                # Parse the LLM's refined descriptions and apply them
+                self._apply_refinements(str(response), remaining_todos)
+
+        except Exception as e:
+            logger.warning(
+                f"Refinement LLM call failed: {e}. Keeping original descriptions."
+            )
+
+        return remaining_todos
+
+    # ------------------------------------------------------------------
+    # Internal: Message building
+    # ------------------------------------------------------------------
+
+    def _build_observation_messages(
+        self,
+        completed_step: TodoItem,
+        result: str,
+        all_completed: list[TodoItem],
+        remaining_todos: list[TodoItem],
+    ) -> list[LLMMessage]:
+        """Build messages for the observation LLM call."""
+        task_desc = ""
+        task_goal = ""
+        if self.task:
+            task_desc = self.task.description or ""
+            task_goal = self.task.expected_output or ""
+
+        system_prompt = (
+            "You are a Planning Agent observing execution progress. "
+            "After each step completes, you analyze what happened and decide "
+            "whether the remaining plan is still valid.\n\n"
+            "Reason step-by-step about:\n"
+            "1. What new information was learned from this step's result\n"
+            "2. Whether the remaining steps still make sense given this new information\n"
+            "3. What refinements, if any, are needed for upcoming steps\n"
+            "4. Whether the overall goal has already been achieved\n\n"
+            "Be conservative about triggering full replans — only do so when the "
+            "remaining plan is fundamentally wrong, not just suboptimal."
+        )
+
+        # Build context of what's been done
+        completed_summary = ""
+        if all_completed:
+            completed_lines = []
+            for todo in all_completed:
+                result_preview = (todo.result or "")[:200]
+                completed_lines.append(
+                    f"  Step {todo.step_number}: {todo.description}\n"
+                    f"    Result: {result_preview}"
+                )
+            completed_summary = "\n## Previously completed steps:\n" + "\n".join(
+                completed_lines
+            )
+
+        # Build remaining plan
+        remaining_summary = ""
+        if remaining_todos:
+            remaining_lines = [
+                f"  Step {todo.step_number}: {todo.description}"
+                for todo in remaining_todos
+            ]
+            remaining_summary = "\n## Remaining plan steps:\n" + "\n".join(
+                remaining_lines
+            )
+
+        user_prompt = (
+            f"## Original task\n{task_desc}\n\n"
+            f"## Expected output\n{task_goal}\n"
+            f"{completed_summary}\n"
+            f"\n## Just completed step {completed_step.step_number}\n"
+            f"Description: {completed_step.description}\n"
+            f"Result: {result}\n"
+            f"{remaining_summary}\n\n"
+            "Analyze this step's result and provide your observation."
+        )
+
+        return [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ]
+
+    def _build_refinement_messages(
+        self,
+        observation: StepObservation,
+        remaining_todos: list[TodoItem],
+    ) -> list[LLMMessage]:
+        """Build messages for the refinement LLM call."""
+        system_prompt = (
+            "You are refining upcoming plan steps based on new information. "
+            "Update the step descriptions to be more specific and actionable "
+            "given what was learned. Keep the same step numbers.\n\n"
+            "Respond with one line per step in the format:\n"
+            "Step N: <refined description>"
+        )
+
+        refinements = "\n".join(observation.suggested_refinements or [])
+        todo_lines = "\n".join(
+            f"Step {t.step_number}: {t.description}" for t in remaining_todos
+        )
+
+        user_prompt = (
+            f"## New information learned\n{observation.key_information_learned}\n\n"
+            f"## Suggested refinements\n{refinements}\n\n"
+            f"## Current pending steps\n{todo_lines}\n\n"
+            "Update the step descriptions to incorporate the new information."
+        )
+
+        return [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ]
+
+    def _apply_refinements(
+        self,
+        llm_response: str,
+        remaining_todos: list[TodoItem],
+    ) -> None:
+        """Parse LLM refinement response and update todo descriptions.
+
+        Expects format: "Step N: <description>" per line.
+        """
+        # Build lookup for quick access
+        todo_by_step: dict[int, TodoItem] = {t.step_number: t for t in remaining_todos}
+
+        for line in llm_response.strip().split("\n"):
+            line = line.strip()
+            if not line.startswith("Step "):
+                continue
+
+            # Parse "Step N: description"
+            try:
+                parts = line.split(":", 1)
+                if len(parts) < 2:
+                    continue
+                step_part = parts[0].strip()  # "Step N"
+                description = parts[1].strip()
+                step_num = int(step_part.replace("Step", "").strip())
+
+                if step_num in todo_by_step and description:
+                    todo_by_step[step_num].description = description
+            except (ValueError, IndexError):
+                continue
diff --git a/lib/crewai/src/crewai/agents/step_executor.py b/lib/crewai/src/crewai/agents/step_executor.py
new file mode 100644
index 000000000..a9d05246a
--- /dev/null
+++ b/lib/crewai/src/crewai/agents/step_executor.py
@@ -0,0 +1,703 @@
+"""StepExecutor: Isolated executor for a single plan step.
+
+Implements a bounded ReAct loop scoped to ONE todo item. The tool execution
+machinery (native function calling, text-parsed tools, caching, hooks) lives
+here — moved from AgentExecutor so the outer Plan-and-Execute loop stays clean.
+
+Based on PLAN-AND-ACT (Section 3.2): The Executor translates high-level plan
+steps into concrete environment actions.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from datetime import datetime
+import json
+import time
+from typing import TYPE_CHECKING, Any
+
+from pydantic import BaseModel
+
+from crewai.agents.parser import (
+    AgentAction,
+    AgentFinish,
+)
+from crewai.events.event_bus import crewai_event_bus
+from crewai.events.types.tool_usage_events import (
+    ToolUsageErrorEvent,
+    ToolUsageFinishedEvent,
+    ToolUsageStartedEvent,
+)
+from crewai.hooks.tool_hooks import (
+    ToolCallHookContext,
+    get_after_tool_call_hooks,
+    get_before_tool_call_hooks,
+)
+from crewai.utilities.agent_utils import (
+    convert_tools_to_openai_schema,
+    enforce_rpm_limit,
+    extract_tool_call_info,
+    format_message_for_llm,
+    process_llm_response,
+    track_delegation_if_needed,
+)
+from crewai.utilities.i18n import I18N, get_i18n
+from crewai.utilities.planning_types import TodoItem
+from crewai.utilities.printer import Printer
+from crewai.utilities.step_execution_context import StepExecutionContext, StepResult
+from crewai.utilities.string_utils import sanitize_tool_name
+from crewai.utilities.tool_utils import execute_tool_and_check_finality
+from crewai.utilities.types import LLMMessage
+
+
+if TYPE_CHECKING:
+    from crewai.agent import Agent
+    from crewai.agents.tools_handler import ToolsHandler
+    from crewai.crew import Crew
+    from crewai.llms.base_llm import BaseLLM
+    from crewai.task import Task
+    from crewai.tools.base_tool import BaseTool
+    from crewai.tools.structured_tool import CrewStructuredTool
+
+
+# Maximum number of tool-call iterations within a single step
+_MAX_STEP_ITERATIONS: int = 10
+
+
+class StepExecutor:
+    """Executes a SINGLE todo item in isolation using a bounded ReAct loop.
+
+    The StepExecutor owns its own message list per invocation. It never reads
+    or writes the AgentExecutor's state. Results flow back via StepResult.
+
+    The internal loop:
+        1. Build messages from todo + context
+        2. Call LLM (with or without native tools)
+        3. If tool call → execute tool, append result, loop back to 2
+        4. If final answer → return StepResult
+        5. If max iterations → force final answer
+
+    Args:
+        llm: The language model to use for execution.
+        tools: Structured tools available to the executor.
+        agent: The agent instance (for role/goal/verbose/config).
+        original_tools: Original BaseTool instances (needed for native tool schema).
+        tools_handler: Optional tools handler for caching and delegation tracking.
+        task: Optional task context.
+        crew: Optional crew context.
+        function_calling_llm: Optional separate LLM for function calling.
+        request_within_rpm_limit: Optional RPM limit function.
+        callbacks: Optional list of callbacks.
+    """
+
+    def __init__(
+        self,
+        llm: BaseLLM,
+        tools: list[CrewStructuredTool],
+        agent: Agent,
+        original_tools: list[BaseTool] | None = None,
+        tools_handler: ToolsHandler | None = None,
+        task: Task | None = None,
+        crew: Crew | None = None,
+        function_calling_llm: BaseLLM | Any | None = None,
+        request_within_rpm_limit: Callable[[], bool] | None = None,
+        callbacks: list[Any] | None = None,
+        i18n: I18N | None = None,
+    ) -> None:
+        self.llm = llm
+        self.tools = tools
+        self.agent = agent
+        self.original_tools = original_tools or []
+        self.tools_handler = tools_handler
+        self.task = task
+        self.crew = crew
+        self.function_calling_llm = function_calling_llm
+        self.request_within_rpm_limit = request_within_rpm_limit
+        self.callbacks = callbacks or []
+        self._i18n: I18N = i18n or get_i18n()
+        self._printer: Printer = Printer()
+
+        # Native tool support — set up once
+        self._use_native_tools = self._check_native_tool_support()
+        self._openai_tools: list[dict[str, Any]] = []
+        self._available_functions: dict[str, Callable[..., Any]] = {}
+        if self._use_native_tools:
+            self._setup_native_tools()
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def execute(self, todo: TodoItem, context: StepExecutionContext) -> StepResult:
+        """Execute a single todo item in isolation.
+
+        Builds a fresh message list, runs a bounded ReAct loop, and returns
+        the result. Never touches external state.
+
+        Args:
+            todo: The todo item to execute.
+            context: Immutable context with task info and dependency results.
+
+        Returns:
+            StepResult with the outcome.
+        """
+        start_time = time.monotonic()
+        tool_calls_made: list[str] = []
+
+        try:
+            messages = self._build_isolated_messages(todo, context)
+            result_text = self._run_react_loop(todo, messages, tool_calls_made)
+
+            elapsed = time.monotonic() - start_time
+            return StepResult(
+                success=True,
+                result=result_text,
+                tool_calls_made=tool_calls_made,
+                execution_time=elapsed,
+            )
+        except Exception as e:
+            elapsed = time.monotonic() - start_time
+            return StepResult(
+                success=False,
+                result="",
+                error=str(e),
+                tool_calls_made=tool_calls_made,
+                execution_time=elapsed,
+            )
+
+    # ------------------------------------------------------------------
+    # Internal: Message building
+    # ------------------------------------------------------------------
+
+    def _build_isolated_messages(
+        self, todo: TodoItem, context: StepExecutionContext
+    ) -> list[LLMMessage]:
+        """Build a fresh message list for this step's execution.
+
+        System prompt tells the LLM it is an Executor focused on one step.
+        User prompt provides the step description, dependencies, and tools.
+        """
+        system_prompt = self._build_system_prompt()
+        user_prompt = self._build_user_prompt(todo, context)
+
+        messages: list[LLMMessage] = [
+            format_message_for_llm(system_prompt, role="system"),
+            format_message_for_llm(user_prompt, role="user"),
+        ]
+        return messages
+
+    def _build_system_prompt(self) -> str:
+        """Build the Executor's system prompt.
+
+        Emphasizes: complete THIS step only. Do not plan ahead.
+        Includes CoT reasoning instruction (per PLAN-AND-ACT Section 3.4).
+        """
+        role = self.agent.role if self.agent else "Assistant"
+        goal = self.agent.goal if self.agent else "Complete tasks efficiently"
+        backstory = getattr(self.agent, "backstory", "") or ""
+
+        tools_section = ""
+        if self.tools and not self._use_native_tools:
+            tool_names = ", ".join(sanitize_tool_name(t.name) for t in self.tools)
+            tools_section = f"\n\nAvailable tools: {tool_names}"
+            tools_section += "\n\nTo use a tool, respond with:\nThought: <your reasoning>\nAction: <tool_name>\nAction Input: <input>"
+            tools_section += "\n\nWhen you have the final answer, respond with:\nThought: <your reasoning>\nFinal Answer: <your answer>"
+
+        return f"""You are {role}. {backstory}
+
+Your goal: {goal}
+
+You are executing a specific step in a multi-step plan. Focus ONLY on completing
+the current step. Do not plan ahead or worry about future steps.
+
+Before acting, briefly reason about what you need to do and which approach
+or tool would be most helpful for this specific step.{tools_section}"""
+
+    def _build_user_prompt(self, todo: TodoItem, context: StepExecutionContext) -> str:
+        """Build the user prompt for this specific step."""
+        parts: list[str] = []
+
+        parts.append(f"## Current Step\n{todo.description}")
+
+        if todo.tool_to_use:
+            parts.append(f"\nSuggested tool: {todo.tool_to_use}")
+
+        # Include dependency results (final results only, no traces)
+        if context.dependency_results:
+            parts.append("\n## Context from previous steps:")
+            for step_num, result in sorted(context.dependency_results.items()):
+                parts.append(f"Step {step_num} result: {result}")
+
+        parts.append("\nComplete this step and provide your result.")
+
+        return "\n".join(parts)
+
+    # ------------------------------------------------------------------
+    # Internal: Bounded ReAct loop
+    # ------------------------------------------------------------------
+
+    def _run_react_loop(
+        self,
+        todo: TodoItem,
+        messages: list[LLMMessage],
+        tool_calls_made: list[str],
+    ) -> str:
+        """Run a bounded ReAct loop for a single step.
+
+        Returns the final answer text.
+        """
+        for iteration in range(_MAX_STEP_ITERATIONS):
+            enforce_rpm_limit(self.request_within_rpm_limit)
+
+            if self._use_native_tools:
+                result = self._native_tool_iteration(messages, tool_calls_made)
+            else:
+                result = self._text_parsed_iteration(messages, tool_calls_made)
+
+            if result is not None:
+                # Got a final answer
+                return result
+
+            # No final answer yet — loop continues with updated messages
+
+        # Max iterations reached — force a final answer
+        return self._force_final_answer(messages)
+
+    def _text_parsed_iteration(
+        self,
+        messages: list[LLMMessage],
+        tool_calls_made: list[str],
+    ) -> str | None:
+        """Single iteration using text-parsed tool calling.
+
+        Returns final answer string if done, None to continue looping.
+        """
+        try:
+            answer = self.llm.call(
+                messages,
+                callbacks=self.callbacks,
+                from_task=self.task,
+                from_agent=self.agent,
+            )
+        except Exception:
+            raise
+
+        if not answer:
+            raise ValueError("Empty response from LLM")
+
+        answer_str = str(answer)
+        use_stop_words = self.llm.supports_stop_words() if self.llm else False
+        formatted = process_llm_response(answer_str, use_stop_words)
+
+        if isinstance(formatted, AgentFinish):
+            return str(formatted.output)
+
+        if isinstance(formatted, AgentAction):
+            # Execute the tool
+            tool_calls_made.append(formatted.tool)
+
+            fingerprint_context = {}
+            if (
+                self.agent
+                and hasattr(self.agent, "security_config")
+                and hasattr(self.agent.security_config, "fingerprint")
+            ):
+                fingerprint_context = {
+                    "agent_fingerprint": str(self.agent.security_config.fingerprint)
+                }
+
+            tool_result = execute_tool_and_check_finality(
+                agent_action=formatted,
+                fingerprint_context=fingerprint_context,
+                tools=self.tools,
+                i18n=self._i18n,
+                agent_key=self.agent.key if self.agent else None,
+                agent_role=self.agent.role if self.agent else None,
+                tools_handler=self.tools_handler,
+                task=self.task,
+                agent=self.agent,
+                function_calling_llm=self.function_calling_llm,
+                crew=self.crew,
+            )
+
+            # Append observation to messages
+            observation = f"Observation: {tool_result.result}"
+            messages.append(
+                format_message_for_llm(
+                    formatted.text + f"\n{observation}",
+                    role="assistant",
+                )
+            )
+
+            if tool_result.result_as_answer:
+                return str(tool_result.result)
+
+            # Add reasoning prompt for next iteration
+            reasoning_prompt = self._i18n.slice("post_tool_reasoning")
+            messages.append(format_message_for_llm(reasoning_prompt, role="user"))
+
+            return None  # Continue looping
+
+        return answer_str  # Fallback: treat as final answer
+
+    def _native_tool_iteration(
+        self,
+        messages: list[LLMMessage],
+        tool_calls_made: list[str],
+    ) -> str | None:
+        """Single iteration using native function calling.
+
+        Returns final answer string if done, None to continue looping.
+        """
+        try:
+            answer = self.llm.call(
+                messages,
+                tools=self._openai_tools,
+                callbacks=self.callbacks,
+                from_task=self.task,
+                from_agent=self.agent,
+            )
+        except Exception:
+            raise
+
+        if not answer:
+            raise ValueError("Empty response from LLM")
+
+        # Check if the response is a list of tool calls
+        if isinstance(answer, list) and answer and self._is_tool_call_list(answer):
+            return self._execute_native_tool_calls(answer, messages, tool_calls_made)
+
+        # Text response — this is the final answer
+        if isinstance(answer, str):
+            return answer
+
+        # BaseModel response
+        if isinstance(answer, BaseModel):
+            return answer.model_dump_json()
+
+        return str(answer)
+
+    def _execute_native_tool_calls(
+        self,
+        tool_calls: list[Any],
+        messages: list[LLMMessage],
+        tool_calls_made: list[str],
+    ) -> str | None:
+        """Execute a batch of native tool calls and append results to messages.
+
+        Returns final answer string if a tool has result_as_answer, else None.
+        """
+        # Build assistant message with tool calls
+        tool_calls_to_report: list[dict[str, Any]] = []
+        for tool_call in tool_calls:
+            info = extract_tool_call_info(tool_call)
+            if not info:
+                continue
+            call_id, func_name, func_args = info
+            tool_calls_to_report.append(
+                {
+                    "id": call_id,
+                    "type": "function",
+                    "function": {
+                        "name": func_name,
+                        "arguments": func_args
+                        if isinstance(func_args, str)
+                        else json.dumps(func_args),
+                    },
+                }
+            )
+
+        if tool_calls_to_report:
+            assistant_message: LLMMessage = {
+                "role": "assistant",
+                "content": None,
+                "tool_calls": tool_calls_to_report,
+            }
+            # Preserve raw parts for Gemini compatibility
+            if all(type(tc).__qualname__ == "Part" for tc in tool_calls):
+                assistant_message["raw_tool_call_parts"] = list(tool_calls)
+            messages.append(assistant_message)
+
+        # Execute each tool call
+        final_answer: str | None = None
+        for tool_call in tool_calls:
+            info = extract_tool_call_info(tool_call)
+            if not info:
+                continue
+
+            call_id, func_name, func_args = info
+            tool_calls_made.append(func_name)
+
+            # Parse arguments
+            if isinstance(func_args, str):
+                try:
+                    args_dict = json.loads(func_args)
+                except json.JSONDecodeError:
+                    args_dict = {}
+            else:
+                args_dict = func_args
+
+            agent_key = (
+                getattr(self.agent, "key", "unknown") if self.agent else "unknown"
+            )
+
+            # Find original tool for cache_function and result_as_answer
+            original_tool = None
+            for tool in self.original_tools:
+                if sanitize_tool_name(tool.name) == func_name:
+                    original_tool = tool
+                    break
+
+            # Check max usage count
+            max_usage_reached = False
+            if (
+                original_tool
+                and original_tool.max_usage_count is not None
+                and original_tool.current_usage_count >= original_tool.max_usage_count
+            ):
+                max_usage_reached = True
+
+            # Check cache
+            from_cache = False
+            input_str = json.dumps(args_dict) if args_dict else ""
+            result = "Tool not found"
+
+            if self.tools_handler and self.tools_handler.cache:
+                cached_result = self.tools_handler.cache.read(
+                    tool=func_name, input=input_str
+                )
+                if cached_result is not None:
+                    result = (
+                        str(cached_result)
+                        if not isinstance(cached_result, str)
+                        else cached_result
+                    )
+                    from_cache = True
+
+            # Emit tool started event
+            started_at = datetime.now()
+            crewai_event_bus.emit(
+                self,
+                event=ToolUsageStartedEvent(
+                    tool_name=func_name,
+                    tool_args=args_dict,
+                    from_agent=self.agent,
+                    from_task=self.task,
+                    agent_key=agent_key,
+                ),
+            )
+
+            track_delegation_if_needed(func_name, args_dict, self.task)
+
+            # Find structured tool for hooks
+            structured_tool: CrewStructuredTool | None = None
+            for structured in self.tools or []:
+                if sanitize_tool_name(structured.name) == func_name:
+                    structured_tool = structured
+                    break
+
+            # Before hooks
+            hook_blocked = False
+            before_hook_context = ToolCallHookContext(
+                tool_name=func_name,
+                tool_input=args_dict,
+                tool=structured_tool,  # type: ignore[arg-type]
+                agent=self.agent,
+                task=self.task,
+                crew=self.crew,
+            )
+            try:
+                for hook in get_before_tool_call_hooks():
+                    if hook(before_hook_context) is False:
+                        hook_blocked = True
+                        break
+            except Exception:
+                pass
+
+            if hook_blocked:
+                result = f"Tool execution blocked by hook. Tool: {func_name}"
+            elif not from_cache and not max_usage_reached:
+                if func_name in self._available_functions:
+                    try:
+                        tool_func = self._available_functions[func_name]
+                        raw_result = tool_func(**args_dict)
+
+                        # Cache result
+                        if self.tools_handler and self.tools_handler.cache:
+                            should_cache = True
+                            if original_tool:
+                                should_cache = original_tool.cache_function(
+                                    args_dict, raw_result
+                                )
+                            if should_cache:
+                                self.tools_handler.cache.add(
+                                    tool=func_name, input=input_str, output=raw_result
+                                )
+
+                        result = (
+                            str(raw_result)
+                            if not isinstance(raw_result, str)
+                            else raw_result
+                        )
+                    except Exception as e:
+                        result = f"Error executing tool: {e}"
+                        if self.task:
+                            self.task.increment_tools_errors()
+                        crewai_event_bus.emit(
+                            self,
+                            event=ToolUsageErrorEvent(
+                                tool_name=func_name,
+                                tool_args=args_dict,
+                                from_agent=self.agent,
+                                from_task=self.task,
+                                agent_key=agent_key,
+                                error=e,
+                            ),
+                        )
+            elif max_usage_reached and original_tool:
+                result = (
+                    f"Tool '{func_name}' has reached its usage limit of "
+                    f"{original_tool.max_usage_count} times and cannot be used anymore."
+                )
+
+            # After hooks
+            after_hook_context = ToolCallHookContext(
+                tool_name=func_name,
+                tool_input=args_dict,
+                tool=structured_tool,  # type: ignore[arg-type]
+                agent=self.agent,
+                task=self.task,
+                crew=self.crew,
+                tool_result=result,
+            )
+            try:
+                for after_hook in get_after_tool_call_hooks():
+                    hook_result = after_hook(after_hook_context)
+                    if hook_result is not None:
+                        result = hook_result
+                        after_hook_context.tool_result = result
+            except Exception:
+                pass
+
+            # Emit tool finished event
+            crewai_event_bus.emit(
+                self,
+                event=ToolUsageFinishedEvent(
+                    output=result,
+                    tool_name=func_name,
+                    tool_args=args_dict,
+                    from_agent=self.agent,
+                    from_task=self.task,
+                    agent_key=agent_key,
+                    started_at=started_at,
+                    finished_at=datetime.now(),
+                ),
+            )
+
+            # Append tool result message
+            tool_message: LLMMessage = {
+                "role": "tool",
+                "tool_call_id": call_id,
+                "name": func_name,
+                "content": result,
+            }
+            messages.append(tool_message)
+
+            if self.agent and self.agent.verbose:
+                cache_info = " (from cache)" if from_cache else ""
+                self._printer.print(
+                    content=f"Tool {func_name} executed with result{cache_info}: {result[:200]}...",
+                    color="green",
+                )
+
+            # Check result_as_answer
+            if (
+                original_tool
+                and hasattr(original_tool, "result_as_answer")
+                and original_tool.result_as_answer
+            ):
+                final_answer = result
+
+        if final_answer is not None:
+            return final_answer
+
+        return None  # Continue looping
+
+    def _force_final_answer(self, messages: list[LLMMessage]) -> str:
+        """Force the LLM to provide a final answer when max iterations reached."""
+        force_prompt = (
+            "You have used the maximum number of tool calls for this step. "
+            "Based on the information gathered so far, provide your final answer now."
+        )
+        if not self._use_native_tools:
+            force_prompt += "\n\nFinal Answer: "
+
+        messages.append(format_message_for_llm(force_prompt, role="user"))
+
+        try:
+            answer = self.llm.call(
+                messages,
+                callbacks=self.callbacks,
+                from_task=self.task,
+                from_agent=self.agent,
+            )
+            if answer:
+                answer_str = str(answer)
+                # Try to extract just the final answer portion
+                if "Final Answer:" in answer_str:
+                    return answer_str.split("Final Answer:")[-1].strip()
+                return answer_str
+        except Exception:
+            pass
+
+        return "Step could not be completed within the iteration limit."
+
+    # ------------------------------------------------------------------
+    # Internal: Native tool support
+    # ------------------------------------------------------------------
+
+    def _check_native_tool_support(self) -> bool:
+        """Check if LLM supports native function calling."""
+        return (
+            hasattr(self.llm, "supports_function_calling")
+            and callable(getattr(self.llm, "supports_function_calling", None))
+            and self.llm.supports_function_calling()
+            and bool(self.original_tools)
+        )
+
+    def _setup_native_tools(self) -> None:
+        """Convert tools to OpenAI schema format for native function calling."""
+        if self.original_tools:
+            self._openai_tools, self._available_functions = (
+                convert_tools_to_openai_schema(self.original_tools)
+            )
+
+    def _is_tool_call_list(self, response: list[Any]) -> bool:
+        """Check if a response is a list of tool calls."""
+        if not response:
+            return False
+        first_item = response[0]
+        # OpenAI-style
+        if hasattr(first_item, "function") or (
+            isinstance(first_item, dict) and "function" in first_item
+        ):
+            return True
+        # Anthropic-style (ToolUseBlock)
+        if (
+            hasattr(first_item, "type")
+            and getattr(first_item, "type", None) == "tool_use"
+        ):
+            return True
+        if hasattr(first_item, "name") and hasattr(first_item, "input"):
+            return True
+        # Bedrock-style
+        if (
+            isinstance(first_item, dict)
+            and "name" in first_item
+            and "input" in first_item
+        ):
+            return True
+        # Gemini-style
+        if hasattr(first_item, "function_call") and first_item.function_call:
+            return True
+        return False
diff --git a/lib/crewai/src/crewai/events/event_listener.py b/lib/crewai/src/crewai/events/event_listener.py
index 5f22d0188..80f808e96 100644
--- a/lib/crewai/src/crewai/events/event_listener.py
+++ b/lib/crewai/src/crewai/events/event_listener.py
@@ -74,6 +74,14 @@ from crewai.events.types.mcp_events import (
     MCPToolExecutionFailedEvent,
     MCPToolExecutionStartedEvent,
 )
+from crewai.events.types.observation_events import (
+    GoalAchievedEarlyEvent,
+    PlanRefinementEvent,
+    PlanReplanTriggeredEvent,
+    StepObservationCompletedEvent,
+    StepObservationFailedEvent,
+    StepObservationStartedEvent,
+)
 from crewai.events.types.reasoning_events import (
     AgentReasoningCompletedEvent,
     AgentReasoningFailedEvent,
@@ -534,6 +542,64 @@ class EventListener(BaseEventListener):
                 event.error,
             )
 
+        # ----------- OBSERVATION EVENTS (Plan-and-Execute) -----------
+
+        @crewai_event_bus.on(StepObservationStartedEvent)
+        def on_step_observation_started(
+            _: Any, event: StepObservationStartedEvent
+        ) -> None:
+            self.formatter.handle_observation_started(
+                event.agent_role,
+                event.step_number,
+                event.step_description,
+            )
+
+        @crewai_event_bus.on(StepObservationCompletedEvent)
+        def on_step_observation_completed(
+            _: Any, event: StepObservationCompletedEvent
+        ) -> None:
+            self.formatter.handle_observation_completed(
+                event.agent_role,
+                event.step_number,
+                event.step_completed_successfully,
+                event.remaining_plan_still_valid,
+                event.key_information_learned,
+                event.needs_full_replan,
+                event.goal_already_achieved,
+            )
+
+        @crewai_event_bus.on(StepObservationFailedEvent)
+        def on_step_observation_failed(
+            _: Any, event: StepObservationFailedEvent
+        ) -> None:
+            self.formatter.handle_observation_failed(
+                event.step_number,
+                event.error,
+            )
+
+        @crewai_event_bus.on(PlanRefinementEvent)
+        def on_plan_refinement(_: Any, event: PlanRefinementEvent) -> None:
+            self.formatter.handle_plan_refinement(
+                event.step_number,
+                event.refined_step_count,
+                event.refinements,
+            )
+
+        @crewai_event_bus.on(PlanReplanTriggeredEvent)
+        def on_plan_replan_triggered(_: Any, event: PlanReplanTriggeredEvent) -> None:
+            self.formatter.handle_plan_replan(
+                event.replan_reason,
+                event.replan_count,
+                event.completed_steps_preserved,
+            )
+
+        @crewai_event_bus.on(GoalAchievedEarlyEvent)
+        def on_goal_achieved_early(_: Any, event: GoalAchievedEarlyEvent) -> None:
+            self.formatter.handle_goal_achieved_early(
+                event.steps_completed,
+                event.steps_remaining,
+            )
+
         # ----------- AGENT LOGGING EVENTS -----------
 
         @crewai_event_bus.on(AgentLogsStartedEvent)
diff --git a/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py b/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py
index ee337d7fd..a9f10d484 100644
--- a/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py
+++ b/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py
@@ -93,6 +93,14 @@ from crewai.events.types.memory_events import (
     MemorySaveFailedEvent,
     MemorySaveStartedEvent,
 )
+from crewai.events.types.observation_events import (
+    GoalAchievedEarlyEvent,
+    PlanRefinementEvent,
+    PlanReplanTriggeredEvent,
+    StepObservationCompletedEvent,
+    StepObservationFailedEvent,
+    StepObservationStartedEvent,
+)
 from crewai.events.types.reasoning_events import (
     AgentReasoningCompletedEvent,
     AgentReasoningFailedEvent,
@@ -437,6 +445,39 @@ class TraceCollectionListener(BaseEventListener):
         ) -> None:
             self._handle_action_event("agent_reasoning_failed", source, event)
 
+        # Observation events (Plan-and-Execute)
+        @event_bus.on(StepObservationStartedEvent)
+        def on_step_observation_started(
+            source: Any, event: StepObservationStartedEvent
+        ) -> None:
+            self._handle_action_event("step_observation_started", source, event)
+
+        @event_bus.on(StepObservationCompletedEvent)
+        def on_step_observation_completed(
+            source: Any, event: StepObservationCompletedEvent
+        ) -> None:
+            self._handle_action_event("step_observation_completed", source, event)
+
+        @event_bus.on(StepObservationFailedEvent)
+        def on_step_observation_failed(
+            source: Any, event: StepObservationFailedEvent
+        ) -> None:
+            self._handle_action_event("step_observation_failed", source, event)
+
+        @event_bus.on(PlanRefinementEvent)
+        def on_plan_refinement(source: Any, event: PlanRefinementEvent) -> None:
+            self._handle_action_event("plan_refinement", source, event)
+
+        @event_bus.on(PlanReplanTriggeredEvent)
+        def on_plan_replan_triggered(
+            source: Any, event: PlanReplanTriggeredEvent
+        ) -> None:
+            self._handle_action_event("plan_replan_triggered", source, event)
+
+        @event_bus.on(GoalAchievedEarlyEvent)
+        def on_goal_achieved_early(source: Any, event: GoalAchievedEarlyEvent) -> None:
+            self._handle_action_event("goal_achieved_early", source, event)
+
         @event_bus.on(KnowledgeRetrievalStartedEvent)
         def on_knowledge_retrieval_started(
             source: Any, event: KnowledgeRetrievalStartedEvent
diff --git a/lib/crewai/src/crewai/events/types/observation_events.py b/lib/crewai/src/crewai/events/types/observation_events.py
new file mode 100644
index 000000000..2c95f3ae0
--- /dev/null
+++ b/lib/crewai/src/crewai/events/types/observation_events.py
@@ -0,0 +1,99 @@
+"""Observation events for the Plan-and-Execute architecture.
+
+Emitted during the Observation phase (PLAN-AND-ACT Section 3.3) when the
+PlannerObserver analyzes step execution results and decides on plan
+continuation, refinement, or replanning.
+"""
+
+from typing import Any
+
+from crewai.events.base_events import BaseEvent
+
+
+class ObservationEvent(BaseEvent):
+    """Base event for observation phase events."""
+
+    type: str
+    agent_role: str
+    step_number: int
+    step_description: str = ""
+    from_task: Any | None = None
+    from_agent: Any | None = None
+
+    def __init__(self, **data: Any) -> None:
+        super().__init__(**data)
+        self._set_task_params(data)
+        self._set_agent_params(data)
+
+
+class StepObservationStartedEvent(ObservationEvent):
+    """Emitted when the Planner begins observing a step's result.
+
+    Fires after every step execution, before the observation LLM call.
+    """
+
+    type: str = "step_observation_started"
+
+
+class StepObservationCompletedEvent(ObservationEvent):
+    """Emitted when the Planner finishes observing a step's result.
+
+    Contains the full observation analysis: what was learned, whether
+    the plan is still valid, and what action to take next.
+    """
+
+    type: str = "step_observation_completed"
+    step_completed_successfully: bool = True
+    key_information_learned: str = ""
+    remaining_plan_still_valid: bool = True
+    needs_full_replan: bool = False
+    replan_reason: str | None = None
+    goal_already_achieved: bool = False
+    suggested_refinements: list[str] | None = None
+
+
+class StepObservationFailedEvent(ObservationEvent):
+    """Emitted when the observation LLM call itself fails.
+
+    The system defaults to continuing the plan when this happens,
+    but the event allows monitoring/alerting on observation failures.
+    """
+
+    type: str = "step_observation_failed"
+    error: str = ""
+
+
+class PlanRefinementEvent(ObservationEvent):
+    """Emitted when the Planner refines upcoming step descriptions.
+
+    This is the lightweight refinement path — no full replan, just
+    sharpening pending todo descriptions based on new information.
+    """
+
+    type: str = "plan_refinement"
+    refined_step_count: int = 0
+    refinements: list[str] | None = None
+
+
+class PlanReplanTriggeredEvent(ObservationEvent):
+    """Emitted when the Planner triggers a full replan.
+
+    The remaining plan was deemed fundamentally wrong and will be
+    regenerated from scratch, preserving completed step results.
+    """
+
+    type: str = "plan_replan_triggered"
+    replan_reason: str = ""
+    replan_count: int = 0
+    completed_steps_preserved: int = 0
+
+
+class GoalAchievedEarlyEvent(ObservationEvent):
+    """Emitted when the Planner detects the goal was achieved early.
+
+    Remaining steps will be skipped and execution will finalize.
+    """
+
+    type: str = "goal_achieved_early"
+    steps_remaining: int = 0
+    steps_completed: int = 0
diff --git a/lib/crewai/src/crewai/events/utils/console_formatter.py b/lib/crewai/src/crewai/events/utils/console_formatter.py
index ac6caabcf..8ffcc868f 100644
--- a/lib/crewai/src/crewai/events/utils/console_formatter.py
+++ b/lib/crewai/src/crewai/events/utils/console_formatter.py
@@ -851,6 +851,152 @@ To enable tracing, do any one of these:
         )
         self.print_panel(error_content, "❌ Reasoning Error", "red")
 
+    # ----------- OBSERVATION EVENTS (Plan-and-Execute) -----------
+
+    def handle_observation_started(
+        self,
+        agent_role: str,
+        step_number: int,
+        step_description: str,
+    ) -> None:
+        """Handle step observation started event."""
+        if not self.verbose:
+            return
+
+        content = Text()
+        content.append("Observation Started\n", style="cyan bold")
+        content.append("Agent: ", style="white")
+        content.append(f"{agent_role}\n", style="cyan")
+        content.append("Step: ", style="white")
+        content.append(f"{step_number}\n", style="cyan")
+        if step_description:
+            desc_preview = step_description[:80] + (
+                "..." if len(step_description) > 80 else ""
+            )
+            content.append("Description: ", style="white")
+            content.append(f"{desc_preview}\n", style="cyan")
+
+        self.print_panel(content, "🔍 Observing Step Result", "cyan")
+
+    def handle_observation_completed(
+        self,
+        agent_role: str,
+        step_number: int,
+        step_completed: bool,
+        plan_valid: bool,
+        key_info: str,
+        needs_replan: bool,
+        goal_achieved: bool,
+    ) -> None:
+        """Handle step observation completed event."""
+        if not self.verbose:
+            return
+
+        if goal_achieved:
+            style = "green"
+            status = "Goal Achieved Early"
+        elif needs_replan:
+            style = "yellow"
+            status = "Replan Needed"
+        elif plan_valid:
+            style = "green"
+            status = "Plan Valid — Continue"
+        else:
+            style = "red"
+            status = "Step Failed"
+
+        content = Text()
+        content.append("Observation Complete\n", style=f"{style} bold")
+        content.append("Step: ", style="white")
+        content.append(f"{step_number}\n", style=style)
+        content.append("Status: ", style="white")
+        content.append(f"{status}\n", style=style)
+        if key_info:
+            info_preview = key_info[:120] + ("..." if len(key_info) > 120 else "")
+            content.append("Learned: ", style="white")
+            content.append(f"{info_preview}\n", style=style)
+
+        self.print_panel(content, "🔍 Observation Result", style)
+
+    def handle_observation_failed(
+        self,
+        step_number: int,
+        error: str,
+    ) -> None:
+        """Handle step observation failure event."""
+        if not self.verbose:
+            return
+
+        error_content = self.create_status_content(
+            "Observation Failed",
+            "Error",
+            "red",
+            Step=str(step_number),
+            Error=error,
+        )
+        self.print_panel(error_content, "❌ Observation Error", "red")
+
+    def handle_plan_refinement(
+        self,
+        step_number: int,
+        refined_count: int,
+        refinements: list[str] | None,
+    ) -> None:
+        """Handle plan refinement event."""
+        if not self.verbose:
+            return
+
+        content = Text()
+        content.append("Plan Refined\n", style="cyan bold")
+        content.append("After Step: ", style="white")
+        content.append(f"{step_number}\n", style="cyan")
+        content.append("Steps Updated: ", style="white")
+        content.append(f"{refined_count}\n", style="cyan")
+        if refinements:
+            for r in refinements[:3]:
+                content.append(f"  • {r[:80]}\n", style="white")
+
+        self.print_panel(content, "✏️ Plan Refinement", "cyan")
+
+    def handle_plan_replan(
+        self,
+        reason: str,
+        replan_count: int,
+        preserved_count: int,
+    ) -> None:
+        """Handle plan replan triggered event."""
+        if not self.verbose:
+            return
+
+        content = Text()
+        content.append("Full Replan Triggered\n", style="yellow bold")
+        content.append("Reason: ", style="white")
+        content.append(f"{reason}\n", style="yellow")
+        content.append("Replan #: ", style="white")
+        content.append(f"{replan_count}\n", style="yellow")
+        content.append("Preserved Steps: ", style="white")
+        content.append(f"{preserved_count}\n", style="yellow")
+
+        self.print_panel(content, "🔄 Dynamic Replan", "yellow")
+
+    def handle_goal_achieved_early(
+        self,
+        steps_completed: int,
+        steps_remaining: int,
+    ) -> None:
+        """Handle goal achieved early event."""
+        if not self.verbose:
+            return
+
+        content = Text()
+        content.append("Goal Achieved Early!\n", style="green bold")
+        content.append("Completed: ", style="white")
+        content.append(f"{steps_completed} steps\n", style="green")
+        content.append("Skipped: ", style="white")
+        content.append(f"{steps_remaining} remaining steps\n", style="green")
+
+        self.print_panel(content, "🎯 Early Goal Achievement", "green")
+
     # ----------- AGENT LOGGING EVENTS -----------
 
     def handle_agent_logs_started(
diff --git a/lib/crewai/src/crewai/experimental/agent_executor.py b/lib/crewai/src/crewai/experimental/agent_executor.py
index adee3a8c1..c26b42783 100644
--- a/lib/crewai/src/crewai/experimental/agent_executor.py
+++ b/lib/crewai/src/crewai/experimental/agent_executor.py
@@ -27,6 +27,11 @@ from crewai.events.types.logging_events import (
     AgentLogsExecutionEvent,
     AgentLogsStartedEvent,
 )
+from crewai.events.types.observation_events import (
+    GoalAchievedEarlyEvent,
+    PlanRefinementEvent,
+    PlanReplanTriggeredEvent,
+)
 from crewai.events.types.tool_usage_events import (
     ToolUsageErrorEvent,
     ToolUsageFinishedEvent,
@@ -62,8 +67,14 @@ from crewai.utilities.agent_utils import (
 )
 from crewai.utilities.constants import TRAINING_DATA_FILE
 from crewai.utilities.i18n import I18N, get_i18n
-from crewai.utilities.planning_types import PlanStep, TodoItem, TodoList
+from crewai.utilities.planning_types import (
+    PlanStep,
+    StepObservation,
+    TodoItem,
+    TodoList,
+)
 from crewai.utilities.printer import Printer
+from crewai.utilities.step_execution_context import StepExecutionContext
 from crewai.utilities.string_utils import sanitize_tool_name
 from crewai.utilities.tool_utils import execute_tool_and_check_finality
 from crewai.utilities.training_handler import CrewTrainingHandler
@@ -109,6 +120,14 @@ class AgentReActState(BaseModel):
     last_replan_reason: str | None = Field(
         default=None, description="Reason for the last replan, if any"
     )
+    observations: dict[int, StepObservation] = Field(
+        default_factory=dict,
+        description="Planner's observation per step (keyed by step_number)",
+    )
+    execution_log: list[dict[str, Any]] = Field(
+        default_factory=list,
+        description="Audit trail for debugging (NOT used for LLM calls)",
+    )
 
 
 class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
@@ -222,6 +241,11 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
             )
         self._state = AgentReActState()
 
+        # Plan-and-Execute components (Phase 2)
+        # Lazy-imported to avoid circular imports during module load
+        self._step_executor: Any = None
+        self._planner_observer: Any = None
+
     def _ensure_flow_initialized(self) -> None:
         """Ensure Flow.__init__() has been called.
 
@@ -396,6 +420,331 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
 
         self.state.todos = TodoList(items=todos)
 
+    # -------------------------------------------------------------------------
+    # Plan-and-Execute: Component Initialization
+    # -------------------------------------------------------------------------
+
+    def _ensure_step_executor(self) -> Any:
+        """Lazily create the StepExecutor (avoids circular imports)."""
+        if self._step_executor is None:
+            from crewai.agents.step_executor import StepExecutor
+
+            self._step_executor = StepExecutor(
+                llm=self.llm,
+                tools=self.tools,
+                agent=self.agent,
+                original_tools=self.original_tools,
+                tools_handler=self.tools_handler,
+                task=self.task,
+                crew=self.crew,
+                function_calling_llm=self.function_calling_llm,
+                request_within_rpm_limit=self.request_within_rpm_limit,
+                callbacks=self.callbacks,
+                i18n=self._i18n,
+            )
+        return self._step_executor
+
+    def _ensure_planner_observer(self) -> Any:
+        """Lazily create the PlannerObserver (avoids circular imports)."""
+        if self._planner_observer is None:
+            from crewai.agents.planner_observer import PlannerObserver
+
+            self._planner_observer = PlannerObserver(
+                agent=self.agent,
+                task=self.task,
+            )
+        return self._planner_observer
+
+    def _build_context_for_todo(self, todo: TodoItem) -> StepExecutionContext:
+        """Build an isolated execution context for a single todo.
+
+        Passes only final results from completed dependencies — never
+        execution traces, tool calls, or LLM message history.
+
+        Args:
+            todo: The todo item to build context for.
+
+        Returns:
+            Immutable StepExecutionContext with dependency results.
+        """
+        dependency_results: dict[int, str] = {}
+        for dep_num in todo.depends_on:
+            dep_todo = self.state.todos.get_by_step_number(dep_num)
+            if dep_todo and dep_todo.result:
+                dependency_results[dep_num] = dep_todo.result
+
+        task_description = ""
+        task_goal = ""
+        if self.task:
+            task_description = self.task.description or ""
+            task_goal = self.task.expected_output or ""
+        else:
+            task_description = getattr(self, "_kickoff_input", "")
+            task_goal = "Complete the task successfully"
+
+        return StepExecutionContext(
+            task_description=task_description,
+            task_goal=task_goal,
+            dependency_results=dependency_results,
+        )
+
+    # -------------------------------------------------------------------------
+    # Plan-and-Execute: New Observation-Driven Flow Methods
+    # -------------------------------------------------------------------------
+
+    @listen("step_executed")
+    def observe_step_result(self) -> Literal["step_observed"]:
+        """THE OBSERVATION STEP — runs after EVERY step execution.
+
+        This is the Planner's opportunity to incorporate new information
+        learned during execution. It is NOT an error handler — it runs on
+        every step, including successes.
+
+        Based on PLAN-AND-ACT Section 3.3.
+        """
+        current_todo = self.state.todos.current_todo
+        if not current_todo:
+            return "step_observed"
+
+        observer = self._ensure_planner_observer()
+        all_completed = self.state.todos.get_completed_todos()
+        remaining = self.state.todos.get_pending_todos()
+
+        observation = observer.observe(
+            completed_step=current_todo,
+            result=current_todo.result or "",
+            all_completed=all_completed,
+            remaining_todos=remaining,
+        )
+
+        self.state.observations[current_todo.step_number] = observation
+
+        # Log observation for debugging
+        self.state.execution_log.append(
+            {
+                "type": "observation",
+                "step_number": current_todo.step_number,
+                "step_completed_successfully": observation.step_completed_successfully,
+                "key_information_learned": observation.key_information_learned,
+                "remaining_plan_still_valid": observation.remaining_plan_still_valid,
+                "needs_full_replan": observation.needs_full_replan,
+                "goal_already_achieved": observation.goal_already_achieved,
+            }
+        )
+
+        if self.agent.verbose:
+            self._printer.print(
+                content=(
+                    f"[Observe] Step {current_todo.step_number}: "
+                    f"success={observation.step_completed_successfully}, "
+                    f"plan_valid={observation.remaining_plan_still_valid}, "
+                    f"learned={observation.key_information_learned[:80]}..."
+                ),
+                color="cyan",
+            )
+
+        return "step_observed"
+
+    @router("step_observed")
+    def decide_next_action(
+        self,
+    ) -> Literal[
+        "goal_achieved",
+        "replan_now",
+        "refine_and_continue",
+        "continue_plan",
+    ]:
+        """Route based on the Planner's observation.
+
+        This replaces the old reactive _should_replan() heuristics with
+        proactive, LLM-driven decisions.
+        """
+        current_todo = self.state.todos.current_todo
+        if not current_todo:
+            return "continue_plan"
+
+        observation = self.state.observations.get(current_todo.step_number)
+        if not observation:
+            # No observation available — default to continue
+            self.state.todos.mark_completed(current_todo.step_number)
+            return "continue_plan"
+
+        # Goal already achieved — early termination
+        if observation.goal_already_achieved:
+            self.state.todos.mark_completed(
+                current_todo.step_number, result=current_todo.result
+            )
+            if self.agent.verbose:
+                self._printer.print(
+                    content="[Decide] Goal achieved early — finalizing",
+                    color="green",
+                )
+            return "goal_achieved"
+
+        # Full replan needed
+        if observation.needs_full_replan:
+            if self.agent.verbose:
+                self._printer.print(
+                    content=f"[Decide] Full replan needed: {observation.replan_reason}",
+                    color="yellow",
+                )
+            self.state.last_replan_reason = observation.replan_reason
+            return "replan_now"
+
+        # Step failed — also trigger replan
+        if not observation.step_completed_successfully:
+            if self.agent.verbose:
+                self._printer.print(
+                    content="[Decide] Step failed — triggering replan",
+                    color="yellow",
+                )
+            self.state.last_replan_reason = "Step did not complete successfully"
+            return "replan_now"
+
+        # Plan still valid but needs refinement
+        if observation.remaining_plan_still_valid and observation.suggested_refinements:
+            self.state.todos.mark_completed(
+                current_todo.step_number, result=current_todo.result
+            )
+            if self.agent.verbose:
+                self._printer.print(
+                    content="[Decide] Plan valid but refining upcoming steps",
+                    color="cyan",
+                )
+            return "refine_and_continue"
+
+        # Plan still valid, no refinements needed — just continue
+        self.state.todos.mark_completed(
+            current_todo.step_number, result=current_todo.result
+        )
+        if self.agent.verbose:
+            completed = self.state.todos.completed_count
+            total = len(self.state.todos.items)
+            self._printer.print(
+                content=f"[Decide] Continue plan ({completed}/{total} done)",
+                color="green",
+            )
+        return "continue_plan"
+
+    @listen("refine_and_continue")
+    def handle_refine_and_continue(self) -> Literal["has_todos"]:
+        """Lightweight plan refinement — update pending todo descriptions.
+
+        The Planner sharpens upcoming step descriptions based on what was
+        learned, without regenerating the entire plan.
+        """
+        # Find the most recent observation with refinements
+        recent_observation: StepObservation | None = None
+        last_step: int = 0
+        if self.state.observations:
+            last_step = max(self.state.observations.keys())
+            recent_observation = self.state.observations[last_step]
+
+        if recent_observation and recent_observation.suggested_refinements:
+            observer = self._ensure_planner_observer()
+            remaining = self.state.todos.get_pending_todos()
+
+            observer.refine_todos(recent_observation, remaining)
+
+            # Emit refinement event
+            crewai_event_bus.emit(
+                self.agent,
+                event=PlanRefinementEvent(
+                    agent_role=self.agent.role,
+                    step_number=last_step,
+                    step_description="",
+                    refined_step_count=len(remaining),
+                    refinements=recent_observation.suggested_refinements,
+                    from_task=self.task,
+                    from_agent=self.agent,
+                ),
+            )
+
+            if self.agent.verbose:
+                self._printer.print(
+                    content=f"[Refine] Updated {len(remaining)} pending step(s)",
+                    color="cyan",
+                )
+
+        return "has_todos"
+
+    @listen("continue_plan")
+    def handle_continue_plan(self) -> Literal["has_todos", "all_todos_complete"]:
+        """Continue to the next todo after a successful step."""
+        if self.state.todos.is_complete:
+            return "all_todos_complete"
+        return "has_todos"
+
+    @listen("goal_achieved")
+    def handle_goal_achieved(self) -> Literal["all_todos_complete"]:
+        """Handle early goal achievement — skip remaining todos."""
+        completed = self.state.todos.get_completed_todos()
+        remaining = self.state.todos.get_pending_todos()
+
+        # Emit goal achieved early event
+        crewai_event_bus.emit(
+            self.agent,
+            event=GoalAchievedEarlyEvent(
+                agent_role=self.agent.role,
+                step_number=completed[-1].step_number if completed else 0,
+                step_description="",
+                steps_completed=len(completed),
+                steps_remaining=len(remaining),
+                from_task=self.task,
+                from_agent=self.agent,
+            ),
+        )
+
+        if self.agent.verbose:
+            self._printer.print(
+                content="Goal achieved early — skipping remaining steps",
+                color="green",
+            )
+        return "all_todos_complete"
+
+    @listen("replan_now")
+    def handle_replan_now(
+        self,
+    ) -> Literal["has_todos", "all_todos_complete"]:
+        """Handle full replanning — regenerate the remaining plan.
+
+        Preserves completed todo results and replaces only pending steps.
+        """
+        max_replans = 3
+        self.state.replan_count += 1
+
+        if self.state.replan_count > max_replans:
+            if self.agent.verbose:
+                self._printer.print(
+                    content=f"Max replans ({max_replans}) reached — finalizing with current results",
+                    color="yellow",
+                )
+            return "all_todos_complete"
+
+        reason = self.state.last_replan_reason or "Dynamic replan triggered"
+        completed = self.state.todos.get_completed_todos()
+
+        # Emit replan triggered event
+        crewai_event_bus.emit(
+            self.agent,
+            event=PlanReplanTriggeredEvent(
+                agent_role=self.agent.role,
+                step_number=completed[-1].step_number if completed else 0,
+                step_description="",
+                replan_reason=reason,
+                replan_count=self.state.replan_count,
+                completed_steps_preserved=len(completed),
+                from_task=self.task,
+                from_agent=self.agent,
+            ),
+        )
+
+        self._trigger_replan(reason)
+
+        if self.state.todos.get_pending_todos():
+            return "has_todos"
+        return "all_todos_complete"
+
     # -------------------------------------------------------------------------
     # Todo-Driven Execution Flow
     # -------------------------------------------------------------------------
@@ -460,28 +809,73 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
         return "multiple_todos_ready"
 
     @router("single_todo_ready")
-    def execute_todo_sequential(self) -> Literal["todo_injected"]:
-        """Prepare to execute a single todo by injecting its context.
+    def execute_todo_sequential(
+        self,
+    ) -> Literal["step_executed", "todo_injected"]:
+        """Execute a single todo using StepExecutor (Plan-and-Execute mode)
+        or fall back to the old ReAct injection (legacy mode).
 
-        Adds a focused prompt for the current todo to the conversation,
-        guiding the agent to complete this specific step.
+        In Plan-and-Execute mode: executes the step in isolation via
+        StepExecutor, stores the result, and routes to the observation step.
+
+        In legacy mode: injects context into the shared message list and
+        routes to the ReAct loop.
         """
         current = self.state.todos.current_todo
+        if not current:
+            return "todo_injected"  # Fall through to legacy
 
-        # DEBUG: Trace starting todo execution
-        if self.agent.verbose:
-            self._printer.print(
-                content=f"[DEBUG] execute_todo_sequential: starting todo {current.step_number if current else None}",
-                color="cyan",
-            )
-            if current:
+        # Plan-and-Execute path: use StepExecutor for isolated execution
+        if getattr(self.agent, "planning_enabled", False):
+            if self.agent.verbose:
                 self._printer.print(
-                    content=f"[DEBUG]   Description: {current.description[:60]}...",
+                    content=(
+                        f"[Execute] Step {current.step_number}: "
+                        f"{current.description[:60]}..."
+                    ),
                     color="cyan",
                 )
 
-        if current:
-            self._inject_todo_context(current)
+            step_executor = self._ensure_step_executor()
+            context = self._build_context_for_todo(current)
+            result = step_executor.execute(current, context)
+
+            # Store result on the todo (do NOT mark completed — observation decides)
+            current.result = result.result
+
+            # Log to audit trail
+            self.state.execution_log.append(
+                {
+                    "type": "step_execution",
+                    "step_number": current.step_number,
+                    "success": result.success,
+                    "result_preview": result.result[:200] if result.result else "",
+                    "error": result.error,
+                    "tool_calls": result.tool_calls_made,
+                    "execution_time": result.execution_time,
+                }
+            )
+
+            if self.agent.verbose:
+                status = "success" if result.success else "failed"
+                self._printer.print(
+                    content=(
+                        f"[Execute] Step {current.step_number} {status} "
+                        f"({result.execution_time:.1f}s, "
+                        f"{len(result.tool_calls_made)} tool calls)"
+                    ),
+                    color="green" if result.success else "red",
+                )
+
+            return "step_executed"
+
+        # Legacy path: inject context into shared messages for ReAct loop
+        if self.agent.verbose:
+            self._printer.print(
+                content=f"[DEBUG] execute_todo_sequential (legacy): starting todo {current.step_number}",
+                color="cyan",
+            )
+        self._inject_todo_context(current)
         return "todo_injected"
 
     def _inject_todo_context(self, todo: TodoItem) -> None:
@@ -490,18 +884,23 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
         Args:
             todo: The todo item to inject context for.
         """
-        prompt = self._build_todo_prompt(todo)
+        # Build focused task prompt. Context from previous steps is already
+        # in self.state.messages as SYSTEM messages (added by _mark_todo_as_completed)
+        prompt = self._build_todo_prompt(todo, include_dependencies=False)
         todo_message: LLMMessage = {
             "role": "user",
             "content": prompt,
         }
         self.state.messages.append(todo_message)
 
-    def _build_todo_prompt(self, todo: TodoItem) -> str:
+    def _build_todo_prompt(
+        self, todo: TodoItem, include_dependencies: bool = True
+    ) -> str:
         """Build a focused prompt for executing a single todo.
 
         Args:
             todo: The todo item to build a prompt for.
+            include_dependencies: Whether to include dependency results in this prompt.
 
         Returns:
             A prompt string focused on this specific step.
@@ -513,19 +912,13 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
         if todo.tool_to_use:
             parts.append(f"Suggested tool: {todo.tool_to_use}")
 
-        # Include results from completed dependencies
-        if todo.depends_on:
+        # Include results from completed dependencies if requested (used for parallel execution)
+        if include_dependencies and todo.depends_on:
             dep_results = []
             for dep_num in todo.depends_on:
                 dep = self.state.todos.get_by_step_number(dep_num)
                 if dep and dep.result:
-                    # Truncate long results
-                    result_preview = (
-                        dep.result[:500] + "..."
-                        if len(dep.result) > 500
-                        else dep.result
-                    )
-                    dep_results.append(f"Step {dep_num} result: {result_preview}")
+                    dep_results.append(f"Step {dep_num} result: {dep.result}")
             if dep_results:
                 parts.append("\nContext from previous steps:")
                 parts.extend(dep_results)
@@ -561,12 +954,7 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
                         color="red",
                     )
             else:
-                self.state.todos.mark_completed(todo.step_number, result=str(result))
-                if self.agent.verbose:
-                    self._printer.print(
-                        content=f"Todo {todo.step_number} completed",
-                        color="green",
-                    )
+                self._mark_todo_as_completed(todo.step_number, str(result))
 
         return "parallel_todos_complete"
 
@@ -580,12 +968,29 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
             The result of executing the todo.
         """
         # Build messages for this specific todo
-        todo_prompt = self._build_todo_prompt(todo)
         messages: list[LLMMessage] = [
             {"role": "system", "content": self._get_todo_system_prompt()},
-            {"role": "user", "content": todo_prompt},
         ]
 
+        # Inject context into messages for parallel execution (since history is empty)
+        if todo.depends_on:
+            dep_results = []
+            for dep_num in todo.depends_on:
+                dep = self.state.todos.get_by_step_number(dep_num)
+                if dep and dep.result:
+                    dep_results.append(f"Step {dep_num} result: {dep.result}")
+            if dep_results:
+                messages.append(
+                    {
+                        "role": "system",
+                        "content": "Context from previous steps:\n"
+                        + "\n".join(dep_results),
+                    }
+                )
+
+        todo_prompt = self._build_todo_prompt(todo, include_dependencies=False)
+        messages.append({"role": "user", "content": todo_prompt})
+
         # If the todo specifies a tool and we have native tool support
         if todo.tool_to_use and self.state.use_native_tools:
             try:
@@ -1415,22 +1820,49 @@ provide clear results that can be used by subsequent steps."""
                     or last_msg.get("role") == "assistant"
                 ):
                     result = str(last_msg.get("content", ""))
+        elif not self.state.current_answer and self.state.messages:
+            # For native tools, results are in the message history as 'tool' roles
+            # We take the content of the most recent tool results
+            tool_results = []
+            for msg in reversed(self.state.messages):
+                if msg.get("role") == "tool":
+                    tool_results.insert(0, str(msg.get("content", "")))
+                elif msg.get("role") == "assistant" and msg.get("tool_calls"):
+                    # Once we hit the assistant message that triggered the tools, we stop
+                    break
+            result = "\n".join(tool_results)
 
-        self.state.todos.mark_completed(current_todo.step_number, result=result)
+        self._mark_todo_as_completed(current_todo.step_number, result)
+
+        return "todo_marked"
+
+    def _mark_todo_as_completed(self, step_number: int, result: str) -> None:
+        """Helper to mark a todo as completed and update history.
+
+        Args:
+            step_number: The step number to mark.
+            result: The result of the todo.
+        """
+        self.state.todos.mark_completed(step_number, result=result)
 
         if self.agent.verbose:
             completed = self.state.todos.completed_count
             total = len(self.state.todos.items)
             self._printer.print(
-                content=f"✓ Todo {current_todo.step_number} completed ({completed}/{total})",
+                content=f"✓ Todo {step_number} completed ({completed}/{total})",
                 color="green",
             )
             self._printer.print(
-                content=f"[DEBUG] Marked todo {current_todo.step_number} as completed, result_len={len(result)}",
+                content=f"[DEBUG] Marked todo {step_number} as completed, result_len={len(result)}",
                 color="cyan",
             )
 
-        return "todo_marked"
+        # Add to history as a SYSTEM message for subsequent steps
+        if result:
+            self._append_message_to_state(
+                f"**Step {step_number} result:**\n\n{result}",
+                role="system",
+            )
 
     @router(mark_todo_complete)
     def check_more_todos(
@@ -1500,22 +1932,28 @@ provide clear results that can be used by subsequent steps."""
         """Finalize execution and emit completion logs.
 
         If todos were used, synthesizes a final answer from all todo results.
+        Handles both the legacy ReAct path (current_answer already set) and
+        the Plan-and-Execute path (synthesize from completed todos).
         """
-        # DEBUG: Trace finalize being called
         if self.agent.verbose:
             self._printer.print(
-                content=f"[DEBUG] finalize called! todos_count={len(self.state.todos.items)}, todos_complete={self.state.todos.is_complete}",
+                content=f"[Finalize] todos_count={len(self.state.todos.items)}, todos_with_results={sum(1 for t in self.state.todos.items if t.result)}",
                 color="magenta",
             )
-            if self.state.todos.items:
-                for todo in self.state.todos.items:
-                    self._printer.print(
-                        content=f"[DEBUG]   Todo {todo.step_number}: status={todo.status}, desc={todo.description[:40]}...",
-                        color="magenta",
-                    )
 
-        # If we have completed todos, synthesize the final answer
-        if self.state.todos.items and self.state.todos.is_complete:
+        # Plan-and-Execute path: synthesize from completed todos
+        # Check for todos with results (even if not all marked "completed" —
+        # the goal_achieved path may skip marking some as completed)
+        todos_with_results = [t for t in self.state.todos.items if t.result]
+        if todos_with_results and self.state.current_answer is None:
+            self._synthesize_final_answer_from_todos()
+
+        # Legacy path: synthesize if todos are all formally complete
+        if (
+            self.state.todos.items
+            and self.state.todos.is_complete
+            and self.state.current_answer is None
+        ):
             self._synthesize_final_answer_from_todos()
 
         if self.state.current_answer is None:
@@ -1552,7 +1990,7 @@ provide clear results that can be used by subsequent steps."""
         results: list[str] = []
         for todo in self.state.todos.items:
             if todo.result:
-                results.append(f"**Step {todo.step_number}**: {todo.description}")
+                results.append(f"**Step {todo.step_number} result:**")
                 results.append(todo.result)
                 results.append("")  # Empty line for spacing
 
@@ -1703,14 +2141,9 @@ provide clear results that can be used by subsequent steps."""
         if completed:
             context_parts.append("Successfully completed steps:")
             for todo in completed:
-                result_preview = (
-                    todo.result[:200] + "..."
-                    if todo.result and len(todo.result) > 200
-                    else todo.result
-                )
                 context_parts.append(f"  - Step {todo.step_number}: {todo.description}")
-                if result_preview:
-                    context_parts.append(f"    Result: {result_preview}")
+                if todo.result:
+                    context_parts.append(f"    Result: {todo.result}")
 
         # Summarize failed todos
         failed = [
@@ -1858,6 +2291,8 @@ Consider:
             self.state.todos = TodoList()
             self.state.replan_count = 0
             self.state.last_replan_reason = None
+            self.state.observations = {}
+            self.state.execution_log = []
 
             self._kickoff_input = inputs.get("input", "")
 
@@ -1949,6 +2384,8 @@ Consider:
             self.state.todos = TodoList()
             self.state.replan_count = 0
             self.state.last_replan_reason = None
+            self.state.observations = {}
+            self.state.execution_log = []
 
             self._kickoff_input = inputs.get("input", "")
 
diff --git a/lib/crewai/src/crewai/utilities/planning_types.py b/lib/crewai/src/crewai/utilities/planning_types.py
index 69bc079cd..306b3b4fc 100644
--- a/lib/crewai/src/crewai/utilities/planning_types.py
+++ b/lib/crewai/src/crewai/utilities/planning_types.py
@@ -144,3 +144,86 @@ class TodoList(BaseModel):
     def running_count(self) -> int:
         """Count of currently running todos."""
         return sum(1 for item in self.items if item.status == "running")
+
+    def get_completed_todos(self) -> list[TodoItem]:
+        """Get all completed todos.
+
+        Returns:
+            List of completed TodoItem objects.
+        """
+        return [item for item in self.items if item.status == "completed"]
+
+    def get_pending_todos(self) -> list[TodoItem]:
+        """Get all pending todos.
+
+        Returns:
+            List of pending TodoItem objects.
+        """
+        return [item for item in self.items if item.status == "pending"]
+
+    def replace_pending_todos(self, new_items: list[TodoItem]) -> None:
+        """Replace all pending todos with new items.
+
+        Preserves completed and running todos, replaces only pending ones.
+        Used during replanning to swap in a new plan for remaining work.
+
+        Args:
+            new_items: The new todo items to replace pending ones.
+        """
+        non_pending = [item for item in self.items if item.status != "pending"]
+        self.items = non_pending + new_items
+
+
+class StepObservation(BaseModel):
+    """Planner's observation after a step execution completes.
+
+    Returned by the PlannerObserver after EVERY step — not just failures.
+    The Planner uses this to decide whether to continue, refine, or replan.
+
+    Based on PLAN-AND-ACT (Section 3.3): the Planner observes what the Executor
+    did and incorporates new information into the remaining plan.
+
+    Attributes:
+        step_completed_successfully: Whether the step achieved its objective.
+        key_information_learned: New information revealed by this step
+            (e.g., "Found 3 products: A, B, C"). Used to refine upcoming steps.
+        remaining_plan_still_valid: Whether pending todos still make sense
+            given the new information. True does NOT mean no refinement needed.
+        suggested_refinements: Minor tweaks to upcoming step descriptions.
+            These are lightweight in-place updates, not a full replan.
+            Example: ["Step 3 should select product B instead of 'best product'"]
+        needs_full_replan: The remaining plan is fundamentally wrong and must
+            be regenerated from scratch. Mutually exclusive with
+            remaining_plan_still_valid (if this is True, that should be False).
+        replan_reason: Explanation of why a full replan is needed (None if not).
+        goal_already_achieved: The overall task goal has been satisfied early.
+            No more steps needed — skip remaining todos and finalize.
+    """
+
+    step_completed_successfully: bool = Field(
+        description="Whether the step achieved what it was asked to do"
+    )
+    key_information_learned: str = Field(
+        default="",
+        description="What new information this step revealed",
+    )
+    remaining_plan_still_valid: bool = Field(
+        default=True,
+        description="Whether the remaining pending todos still make sense given new information",
+    )
+    suggested_refinements: list[str] | None = Field(
+        default=None,
+        description="Minor tweaks to descriptions of upcoming steps (lightweight, no full replan)",
+    )
+    needs_full_replan: bool = Field(
+        default=False,
+        description="The remaining plan is fundamentally wrong and must be regenerated",
+    )
+    replan_reason: str | None = Field(
+        default=None,
+        description="Explanation of why a full replan is needed",
+    )
+    goal_already_achieved: bool = Field(
+        default=False,
+        description="The overall task goal has been satisfied early; no more steps needed",
+    )
diff --git a/lib/crewai/src/crewai/utilities/step_execution_context.py b/lib/crewai/src/crewai/utilities/step_execution_context.py
new file mode 100644
index 000000000..170427948
--- /dev/null
+++ b/lib/crewai/src/crewai/utilities/step_execution_context.py
@@ -0,0 +1,64 @@
+"""Context and result types for isolated step execution in Plan-and-Execute architecture.
+
+These types mediate between the AgentExecutor (orchestrator) and StepExecutor (per-step worker).
+StepExecutionContext carries only final results from dependencies — never LLM message histories.
+StepResult carries only the outcome of a step — never internal execution traces.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+
+@dataclass(frozen=True)
+class StepExecutionContext:
+    """Immutable context passed to a StepExecutor for a single todo.
+
+    Contains only the information the Executor needs to complete one step:
+    the task description, goal, and final results from dependency steps.
+    No LLM message history, no execution traces, no shared mutable state.
+
+    Attributes:
+        task_description: The original task description (from Task or kickoff input).
+        task_goal: The expected output / goal of the overall task.
+        dependency_results: Mapping of step_number → final result string
+            for all completed dependencies of the current step.
+    """
+
+    task_description: str
+    task_goal: str
+    dependency_results: dict[int, str] = field(default_factory=dict)
+
+    def get_dependency_result(self, step_number: int) -> str | None:
+        """Get the final result of a dependency step.
+
+        Args:
+            step_number: The step number to look up.
+
+        Returns:
+            The result string if available, None otherwise.
+        """
+        return self.dependency_results.get(step_number)
+
+
+@dataclass
+class StepResult:
+    """Result returned by a StepExecutor after executing a single todo.
+
+    Contains the final outcome and metadata for debugging/metrics.
+    Tool call details are for audit logging only — they are NOT passed
+    to subsequent steps or the Planner.
+
+    Attributes:
+        success: Whether the step completed successfully.
+        result: The final output string from the step.
+        error: Error message if the step failed (None on success).
+        tool_calls_made: List of tool names invoked (for debugging/logging only).
+        execution_time: Wall-clock time in seconds for the step execution.
+    """
+
+    success: bool
+    result: str
+    error: str | None = None
+    tool_calls_made: list[str] = field(default_factory=list)
+    execution_time: float = 0.0
diff --git a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_with_planning_stores_plan_in_state.yaml b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_with_planning_stores_plan_in_state.yaml
index 97ca50aa2..f2b14ae14 100644
--- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_with_planning_stores_plan_in_state.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_with_planning_stores_plan_in_state.yaml
@@ -4,18 +4,25 @@ interactions:
       Create minimal, effective execution plans. Prefer fewer steps over more."},{"role":"user","content":"Create
       a focused execution plan for the following task:\n\n## Task\nWhat is 2 + 2?\n\n##
       Expected Output\nComplete the task successfully\n\n## Available Tools\nNo tools
-      available\n\n## Instructions\nCreate ONLY the essential steps needed to complete
-      this task. Use the MINIMUM number of steps required - do NOT pad your plan with
-      unnecessary steps. Most tasks need only 2-5 steps.\n\nFor each step:\n- State
-      the specific action to take\n- Specify which tool to use (if any)\n\nDo NOT
-      include:\n- Setup or preparation steps that are obvious\n- Verification steps
-      unless critical\n- Documentation or cleanup steps unless explicitly required\n-
-      Generic steps like \"review results\" or \"finalize output\"\n\nAfter your plan,
-      state:\n- \"READY: I am ready to execute the task.\" if the plan is complete\n-
-      \"NOT READY: I need to refine my plan because [reason].\" if you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
-      or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The
-      detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether
-      the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}'
+      available\n\n## Planning Principles\nFocus on WHAT needs to be accomplished,
+      not HOW. Group related actions into logical units. Fewer steps = better. Most
+      tasks need 3-6 steps. Hard limit: 20 steps.\n\n## Step Types (only these are
+      valid):\n1. **Tool Step**: Uses a tool to gather information or take action\n2.
+      **Output Step**: Synthesizes prior results into the final deliverable (usually
+      the last step)\n\n## Rules:\n- Each step must either USE A TOOL or PRODUCE THE
+      FINAL OUTPUT\n- Combine related tool calls: \"Research A, B, and C\" = ONE step,
+      not three\n- Combine all synthesis into ONE final output step\n- NO standalone
+      \"thinking\" steps (review, verify, confirm, refine, analyze) - these happen
+      naturally between steps\n\nFor each step: State the action, specify the tool
+      (if any), and note dependencies.\n\nAfter your plan, state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
+      or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A
+      brief summary of the overall plan."},"steps":{"type":"array","description":"List
+      of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step
+      number (1-based)"},"description":{"type":"string","description":"What to do
+      in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to
+      use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step
+      numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether
+      the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}'
     headers:
       User-Agent:
       - X-USER-AGENT-XXX
@@ -28,7 +35,7 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '1541'
+      - '2315'
       content-type:
       - application/json
       host:
@@ -55,20 +62,24 @@ interactions:
     uri: https://api.openai.com/v1/chat/completions
   response:
     body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTTAh68P65LybtqkwNI3p2HXcRv\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078147,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FIyv2pfC7qKbZVvmJNjOVfge1F\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330972,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
         \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"## Execution Plan\\n\\n1. **Action:**
-        Perform the addition operation.  \\n   **Tool:** None (manually calculate).\\n\\n2.
-        **Action:** State the result.  \\n   **Tool:** None (manually output).\\n\\nREADY:
-        I am ready to execute the task.\",\n        \"refusal\": null,\n        \"annotations\":
-        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 281,\n    \"completion_tokens\":
-        56,\n    \"total_tokens\": 337,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
+        \           \"id\": \"call_xEDChlUntYR0aSxQhkobswea\",\n            \"type\":
+        \"function\",\n            \"function\": {\n              \"name\": \"create_reasoning_plan\",\n
+        \             \"arguments\": \"{\\\"plan\\\":\\\"Calculate the sum of 2 +
+        2 and provide the result as the final output.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Calculate
+        the sum of 2 + 2\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Provide
+        the result as final output\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]}],\\\"ready\\\":true}\"\n
+        \           }\n          }\n        ],\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"tool_calls\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 440,\n    \"completion_tokens\":
+        92,\n    \"total_tokens\": 532,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
         0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
         {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
         0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
     headers:
       CF-RAY:
       - CF-RAY-XXX
@@ -77,7 +88,7 @@ interactions:
       Content-Type:
       - application/json
       Date:
-      - Tue, 03 Feb 2026 00:22:28 GMT
+      - Thu, 05 Feb 2026 22:36:13 GMT
       Server:
       - cloudflare
       Set-Cookie:
@@ -97,7 +108,7 @@ interactions:
       openai-organization:
       - OPENAI-ORG-XXX
       openai-processing-ms:
-      - '1165'
+      - '1670'
       openai-project:
       - OPENAI-PROJECT-XXX
       openai-version:
@@ -123,9 +134,13 @@ interactions:
       message: OK
 - request:
     body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
-      assistant that solves math problems step by step\nYour personal goal is: Help
-      solve simple math problems"},{"role":"user","content":"\nCurrent Task: What
-      is 2 + 2?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
+      assistant that solves math problems step by step\n\nYour goal: Help solve simple
+      math problems\n\nYou are executing a specific step in a multi-step plan. Focus
+      ONLY on completing\nthe current step. Do not plan ahead or worry about future
+      steps.\n\nBefore acting, briefly reason about what you need to do and which
+      approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
+      Current Step\nCalculate the sum of 2 + 2\n\nComplete this step and provide your
+      result."}],"model":"gpt-4o-mini"}'
     headers:
       User-Agent:
       - X-USER-AGENT-XXX
@@ -138,7 +153,7 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '299'
+      - '597'
       content-type:
       - application/json
       cookie:
@@ -167,20 +182,18 @@ interactions:
     uri: https://api.openai.com/v1/chat/completions
   response:
     body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTVB9mdtq1YZrUVf1aSb6dVVQ8G\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078149,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FJ4ZEkHWSBMZA8bDbMqd7upzwY\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330973,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
         \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"To solve the problem of 2 + 2, we simply
-        perform the addition:\\n\\n1. Start with the first number: 2\\n2. Add the
-        second number: + 2\\n3. Combine the two: 2 + 2 = 4\\n\\nTherefore, the answer
-        is 4.\",\n        \"refusal\": null,\n        \"annotations\": []\n      },\n
-        \     \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n
-        \ \"usage\": {\n    \"prompt_tokens\": 54,\n    \"completion_tokens\": 62,\n
-        \   \"total_tokens\": 116,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        \"assistant\",\n        \"content\": \"To calculate the sum of 2 + 2, I simply
+        add the two numbers together.\\n\\n2 + 2 = 4\\n\\nThe result is 4.\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        115,\n    \"completion_tokens\": 33,\n    \"total_tokens\": 148,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
         {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
         0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
     headers:
       CF-RAY:
       - CF-RAY-XXX
@@ -189,7 +202,7 @@ interactions:
       Content-Type:
       - application/json
       Date:
-      - Tue, 03 Feb 2026 00:22:30 GMT
+      - Thu, 05 Feb 2026 22:36:14 GMT
       Server:
       - cloudflare
       Strict-Transport-Security:
@@ -207,7 +220,155 @@ interactions:
       openai-organization:
       - OPENAI-ORG-XXX
       openai-processing-ms:
-      - '1300'
+      - '614'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
+      observing execution progress. After each step completes, you analyze what happened
+      and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
+      about:\\n1. What new information was learned from this step's result\\n2. Whether
+      the remaining steps still make sense given this new information\\n3. What refinements,
+      if any, are needed for upcoming steps\\n4. Whether the overall goal has already
+      been achieved\\n\\nBe conservative about triggering full replans \u2014 only
+      do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
+      Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription:
+      Calculate the sum of 2 + 2\\nResult: To calculate the sum of 2 + 2, I simply
+      add the two numbers together.\\n\\n2 + 2 = 4\\n\\nThe result is 4.\\n\\n## Remaining
+      plan steps:\\n  Step 2: Provide the result as final output\\n\\nAnalyze this
+      step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
+      observation after a step execution completes.\\n\\nReturned by the PlannerObserver
+      after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
+      whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
+      3.3): the Planner observes what the Executor\\ndid and incorporates new information
+      into the remaining plan.\\n\\nAttributes:\\n    step_completed_successfully:
+      Whether the step achieved its objective.\\n    key_information_learned: New
+      information revealed by this step\\n        (e.g., \\\"Found 3 products: A,
+      B, C\\\"). Used to refine upcoming steps.\\n    remaining_plan_still_valid:
+      Whether pending todos still make sense\\n        given the new information.
+      True does NOT mean no refinement needed.\\n    suggested_refinements: Minor
+      tweaks to upcoming step descriptions.\\n        These are lightweight in-place
+      updates, not a full replan.\\n        Example: [\\\"Step 3 should select product
+      B instead of 'best product'\\\"]\\n    needs_full_replan: The remaining plan
+      is fundamentally wrong and must\\n        be regenerated from scratch. Mutually
+      exclusive with\\n        remaining_plan_still_valid (if this is True, that should
+      be False).\\n    replan_reason: Explanation of why a full replan is needed (None
+      if not).\\n    goal_already_achieved: The overall task goal has been satisfied
+      early.\\n        No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
+      the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
+      new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
+      the remaining pending todos still make sense given new information\",\"title\":\"Remaining
+      Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
+      tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
+      Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
+      remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
+      Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
+      of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
+      overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
+      Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '4024'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-helper-method:
+      - beta.chat.completions.parse
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FKmJpd8tlJ6Y3OChUQsoz2o5ps\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330974,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The
+        calculation for 2 + 2 is 4.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":true}\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        789,\n    \"completion_tokens\": 64,\n    \"total_tokens\": 853,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:15 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1181'
       openai-project:
       - OPENAI-PROJECT-XXX
       openai-version:
diff --git a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_without_planning_skips_plan_generation.yaml b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_without_planning_skips_plan_generation.yaml
index 3ceb8fa34..a24b4277b 100644
--- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_without_planning_skips_plan_generation.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_without_planning_skips_plan_generation.yaml
@@ -42,17 +42,17 @@ interactions:
     uri: https://api.openai.com/v1/chat/completions
   response:
     body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTTFxQ75llVmJv0ee902FIjXE8p\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078147,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FTKj39Y02oqJmQxpmC8sz2piEl\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330983,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
         \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"3 + 3 equals 6.\",\n        \"refusal\":
+        \"assistant\",\n        \"content\": \"The sum of 3 + 3 is 6.\",\n        \"refusal\":
         null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
         \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-        47,\n    \"completion_tokens\": 8,\n    \"total_tokens\": 55,\n    \"prompt_tokens_details\":
+        47,\n    \"completion_tokens\": 12,\n    \"total_tokens\": 59,\n    \"prompt_tokens_details\":
         {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
         {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
         0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
     headers:
       CF-RAY:
       - CF-RAY-XXX
@@ -61,7 +61,7 @@ interactions:
       Content-Type:
       - application/json
       Date:
-      - Tue, 03 Feb 2026 00:22:27 GMT
+      - Thu, 05 Feb 2026 22:36:23 GMT
       Server:
       - cloudflare
       Set-Cookie:
@@ -81,7 +81,113 @@ interactions:
       openai-organization:
       - OPENAI-ORG-XXX
       openai-processing-ms:
-      - '401'
+      - '361'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
+      assistant\nYour personal goal is: Help solve simple math problems"},{"role":"user","content":"\nCurrent
+      Task: What is 3 + 3?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '260'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FT7ELyytoJFkmjOtWysQA2Bfvy\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330983,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"The sum of 3 + 3 is 6.\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        47,\n    \"completion_tokens\": 12,\n    \"total_tokens\": 59,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:23 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '362'
       openai-project:
       - OPENAI-PROJECT-XXX
       openai-version:
diff --git a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_executor_state_contains_plan_after_planning.yaml b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_executor_state_contains_plan_after_planning.yaml
index 003471628..8aa857a0b 100644
--- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_executor_state_contains_plan_after_planning.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_executor_state_contains_plan_after_planning.yaml
@@ -4,18 +4,25 @@ interactions:
       Create minimal, effective execution plans. Prefer fewer steps over more."},{"role":"user","content":"Create
       a focused execution plan for the following task:\n\n## Task\nWhat is 7 + 7?\n\n##
       Expected Output\nComplete the task successfully\n\n## Available Tools\nNo tools
-      available\n\n## Instructions\nCreate ONLY the essential steps needed to complete
-      this task. Use the MINIMUM number of steps required - do NOT pad your plan with
-      unnecessary steps. Most tasks need only 2-5 steps.\n\nFor each step:\n- State
-      the specific action to take\n- Specify which tool to use (if any)\n\nDo NOT
-      include:\n- Setup or preparation steps that are obvious\n- Verification steps
-      unless critical\n- Documentation or cleanup steps unless explicitly required\n-
-      Generic steps like \"review results\" or \"finalize output\"\n\nAfter your plan,
-      state:\n- \"READY: I am ready to execute the task.\" if the plan is complete\n-
-      \"NOT READY: I need to refine my plan because [reason].\" if you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
-      or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The
-      detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether
-      the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}'
+      available\n\n## Planning Principles\nFocus on WHAT needs to be accomplished,
+      not HOW. Group related actions into logical units. Fewer steps = better. Most
+      tasks need 3-6 steps. Hard limit: 20 steps.\n\n## Step Types (only these are
+      valid):\n1. **Tool Step**: Uses a tool to gather information or take action\n2.
+      **Output Step**: Synthesizes prior results into the final deliverable (usually
+      the last step)\n\n## Rules:\n- Each step must either USE A TOOL or PRODUCE THE
+      FINAL OUTPUT\n- Combine related tool calls: \"Research A, B, and C\" = ONE step,
+      not three\n- Combine all synthesis into ONE final output step\n- NO standalone
+      \"thinking\" steps (review, verify, confirm, refine, analyze) - these happen
+      naturally between steps\n\nFor each step: State the action, specify the tool
+      (if any), and note dependencies.\n\nAfter your plan, state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
+      or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A
+      brief summary of the overall plan."},"steps":{"type":"array","description":"List
+      of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step
+      number (1-based)"},"description":{"type":"string","description":"What to do
+      in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to
+      use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step
+      numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether
+      the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}'
     headers:
       User-Agent:
       - X-USER-AGENT-XXX
@@ -28,7 +35,7 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '1541'
+      - '2315'
       content-type:
       - application/json
       host:
@@ -55,18 +62,24 @@ interactions:
     uri: https://api.openai.com/v1/chat/completions
   response:
     body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTdqlxwWowSdLncBERFrCgxTvVj\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078157,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FN5xLKcEfF0ISjfbnezYLsZtma\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330977,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
         \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"## Execution Plan\\n\\n1. Calculate
-        the sum of 7 and 7.\\n   \\nREADY: I am ready to execute the task.\",\n        \"refusal\":
-        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
-        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-        281,\n    \"completion_tokens\": 28,\n    \"total_tokens\": 309,\n    \"prompt_tokens_details\":
-        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
+        \           \"id\": \"call_rSNKBB5w6x6IXkm0fm2GN1hI\",\n            \"type\":
+        \"function\",\n            \"function\": {\n              \"name\": \"create_reasoning_plan\",\n
+        \             \"arguments\": \"{\\\"plan\\\":\\\"Calculate the sum of 7 +
+        7 and provide the result.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Calculate
+        the sum of 7 + 7.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Provide
+        the final output of the calculation.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]}],\\\"ready\\\":true}\"\n
+        \           }\n          }\n        ],\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"tool_calls\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 440,\n    \"completion_tokens\":
+        89,\n    \"total_tokens\": 529,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
         {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
         0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
     headers:
       CF-RAY:
       - CF-RAY-XXX
@@ -75,7 +88,7 @@ interactions:
       Content-Type:
       - application/json
       Date:
-      - Tue, 03 Feb 2026 00:22:38 GMT
+      - Thu, 05 Feb 2026 22:36:18 GMT
       Server:
       - cloudflare
       Set-Cookie:
@@ -95,7 +108,7 @@ interactions:
       openai-organization:
       - OPENAI-ORG-XXX
       openai-processing-ms:
-      - '709'
+      - '1700'
       openai-project:
       - OPENAI-PROJECT-XXX
       openai-version:
@@ -121,9 +134,13 @@ interactions:
       message: OK
 - request:
     body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
-      assistant that solves math problems step by step\nYour personal goal is: Help
-      solve simple math problems"},{"role":"user","content":"\nCurrent Task: What
-      is 7 + 7?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
+      assistant that solves math problems step by step\n\nYour goal: Help solve simple
+      math problems\n\nYou are executing a specific step in a multi-step plan. Focus
+      ONLY on completing\nthe current step. Do not plan ahead or worry about future
+      steps.\n\nBefore acting, briefly reason about what you need to do and which
+      approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
+      Current Step\nCalculate the sum of 7 + 7.\n\nComplete this step and provide
+      your result."}],"model":"gpt-4o-mini"}'
     headers:
       User-Agent:
       - X-USER-AGENT-XXX
@@ -136,7 +153,7 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '299'
+      - '598'
       content-type:
       - application/json
       cookie:
@@ -165,18 +182,19 @@ interactions:
     uri: https://api.openai.com/v1/chat/completions
   response:
     body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTeB6Miecallw9SjSfLAXPjX2XD\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078158,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FOVRLtzvZr17sXJ05O6NTxw1rI\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330978,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
         \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"To find the sum of 7 and 7, you simply
-        add the two numbers together:\\n\\n7 + 7 = 14\\n\\nSo, the answer is 14.\",\n
-        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
-        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-        54,\n    \"completion_tokens\": 35,\n    \"total_tokens\": 89,\n    \"prompt_tokens_details\":
-        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        \"assistant\",\n        \"content\": \"To calculate the sum of 7 + 7, I need
+        to simply add the two numbers together. \\n\\n7 + 7 = 14.\\n\\nSo, the result
+        is 14.\",\n        \"refusal\": null,\n        \"annotations\": []\n      },\n
+        \     \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n
+        \ \"usage\": {\n    \"prompt_tokens\": 115,\n    \"completion_tokens\": 38,\n
+        \   \"total_tokens\": 153,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
         {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
         0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
     headers:
       CF-RAY:
       - CF-RAY-XXX
@@ -185,7 +203,7 @@ interactions:
       Content-Type:
       - application/json
       Date:
-      - Tue, 03 Feb 2026 00:22:38 GMT
+      - Thu, 05 Feb 2026 22:36:19 GMT
       Server:
       - cloudflare
       Strict-Transport-Security:
@@ -203,7 +221,418 @@ interactions:
       openai-organization:
       - OPENAI-ORG-XXX
       openai-processing-ms:
-      - '733'
+      - '868'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
+      observing execution progress. After each step completes, you analyze what happened
+      and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
+      about:\\n1. What new information was learned from this step's result\\n2. Whether
+      the remaining steps still make sense given this new information\\n3. What refinements,
+      if any, are needed for upcoming steps\\n4. Whether the overall goal has already
+      been achieved\\n\\nBe conservative about triggering full replans \u2014 only
+      do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
+      Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription:
+      Calculate the sum of 7 + 7.\\nResult: To calculate the sum of 7 + 7, I need
+      to simply add the two numbers together. \\n\\n7 + 7 = 14.\\n\\nSo, the result
+      is 14.\\n\\n## Remaining plan steps:\\n  Step 2: Provide the final output of
+      the calculation.\\n\\nAnalyze this step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
+      observation after a step execution completes.\\n\\nReturned by the PlannerObserver
+      after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
+      whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
+      3.3): the Planner observes what the Executor\\ndid and incorporates new information
+      into the remaining plan.\\n\\nAttributes:\\n    step_completed_successfully:
+      Whether the step achieved its objective.\\n    key_information_learned: New
+      information revealed by this step\\n        (e.g., \\\"Found 3 products: A,
+      B, C\\\"). Used to refine upcoming steps.\\n    remaining_plan_still_valid:
+      Whether pending todos still make sense\\n        given the new information.
+      True does NOT mean no refinement needed.\\n    suggested_refinements: Minor
+      tweaks to upcoming step descriptions.\\n        These are lightweight in-place
+      updates, not a full replan.\\n        Example: [\\\"Step 3 should select product
+      B instead of 'best product'\\\"]\\n    needs_full_replan: The remaining plan
+      is fundamentally wrong and must\\n        be regenerated from scratch. Mutually
+      exclusive with\\n        remaining_plan_still_valid (if this is True, that should
+      be False).\\n    replan_reason: Explanation of why a full replan is needed (None
+      if not).\\n    goal_already_achieved: The overall task goal has been satisfied
+      early.\\n        No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
+      the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
+      new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
+      the remaining pending todos still make sense given new information\",\"title\":\"Remaining
+      Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
+      tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
+      Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
+      remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
+      Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
+      of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
+      overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
+      Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '4051'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-helper-method:
+      - beta.chat.completions.parse
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FPKZpmhdynDPftfUn6yxeNSmro\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330979,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The
+        sum of 7 + 7 has been correctly calculated to be 14.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":false}\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        795,\n    \"completion_tokens\": 69,\n    \"total_tokens\": 864,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:21 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1071'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
+      assistant that solves math problems step by step\n\nYour goal: Help solve simple
+      math problems\n\nYou are executing a specific step in a multi-step plan. Focus
+      ONLY on completing\nthe current step. Do not plan ahead or worry about future
+      steps.\n\nBefore acting, briefly reason about what you need to do and which
+      approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
+      Current Step\nProvide the final output of the calculation.\n\n## Context from
+      previous steps:\nStep 1 result: To calculate the sum of 7 + 7, I need to simply
+      add the two numbers together. \n\n7 + 7 = 14.\n\nSo, the result is 14.\n\nComplete
+      this step and provide your result."}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '785'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FRqSOxtg5k7zpUfvXk8XEZMz9x\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330981,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"The final output of the calculation
+        is 14.\",\n        \"refusal\": null,\n        \"annotations\": []\n      },\n
+        \     \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n
+        \ \"usage\": {\n    \"prompt_tokens\": 162,\n    \"completion_tokens\": 10,\n
+        \   \"total_tokens\": 172,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:21 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '446'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
+      observing execution progress. After each step completes, you analyze what happened
+      and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
+      about:\\n1. What new information was learned from this step's result\\n2. Whether
+      the remaining steps still make sense given this new information\\n3. What refinements,
+      if any, are needed for upcoming steps\\n4. Whether the overall goal has already
+      been achieved\\n\\nBe conservative about triggering full replans \u2014 only
+      do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
+      Original task\\n\\n\\n## Expected output\\n\\n\\n## Previously completed steps:\\n
+      \ Step 1: Calculate the sum of 7 + 7.\\n    Result: To calculate the sum of
+      7 + 7, I need to simply add the two numbers together. \\n\\n7 + 7 = 14.\\n\\nSo,
+      the result is 14.\\n\\n## Just completed step 2\\nDescription: Provide the final
+      output of the calculation.\\nResult: The final output of the calculation is
+      14.\\n\\n\\nAnalyze this step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
+      observation after a step execution completes.\\n\\nReturned by the PlannerObserver
+      after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
+      whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
+      3.3): the Planner observes what the Executor\\ndid and incorporates new information
+      into the remaining plan.\\n\\nAttributes:\\n    step_completed_successfully:
+      Whether the step achieved its objective.\\n    key_information_learned: New
+      information revealed by this step\\n        (e.g., \\\"Found 3 products: A,
+      B, C\\\"). Used to refine upcoming steps.\\n    remaining_plan_still_valid:
+      Whether pending todos still make sense\\n        given the new information.
+      True does NOT mean no refinement needed.\\n    suggested_refinements: Minor
+      tweaks to upcoming step descriptions.\\n        These are lightweight in-place
+      updates, not a full replan.\\n        Example: [\\\"Step 3 should select product
+      B instead of 'best product'\\\"]\\n    needs_full_replan: The remaining plan
+      is fundamentally wrong and must\\n        be regenerated from scratch. Mutually
+      exclusive with\\n        remaining_plan_still_valid (if this is True, that should
+      be False).\\n    replan_reason: Explanation of why a full replan is needed (None
+      if not).\\n    goal_already_achieved: The overall task goal has been satisfied
+      early.\\n        No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
+      the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
+      new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
+      the remaining pending todos still make sense given new information\",\"title\":\"Remaining
+      Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
+      tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
+      Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
+      remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
+      Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
+      of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
+      overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
+      Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '4113'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-helper-method:
+      - beta.chat.completions.parse
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FRITGCaSHqqF9f8FVEgkrZ36QL\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330981,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The
+        final output of the calculation is confirmed to be 14.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":true}\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        808,\n    \"completion_tokens\": 65,\n    \"total_tokens\": 873,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:22 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '924'
       openai-project:
       - OPENAI-PROJECT-XXX
       openai-version:
diff --git a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_config_disabled_skips_planning.yaml b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_config_disabled_skips_planning.yaml
deleted file mode 100644
index ccb9aee30..000000000
--- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_config_disabled_skips_planning.yaml
+++ /dev/null
@@ -1,108 +0,0 @@
-interactions:
-- request:
-    body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
-      assistant\nYour personal goal is: Help solve simple math problems"},{"role":"user","content":"\nCurrent
-      Task: What is 5 + 5?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '260'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTf8T2iADffpPCJBZhntLlaoaSy\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078159,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"5 + 5 equals 10.\",\n        \"refusal\":
-        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
-        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-        47,\n    \"completion_tokens\": 8,\n    \"total_tokens\": 55,\n    \"prompt_tokens_details\":
-        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Tue, 03 Feb 2026 00:22:40 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - SET-COOKIE-XXX
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '515'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
-version: 1
diff --git a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_creates_minimal_steps_for_multi_step_task.yaml b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_creates_minimal_steps_for_multi_step_task.yaml
index 17307bdcc..5d2acbc90 100644
--- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_creates_minimal_steps_for_multi_step_task.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_creates_minimal_steps_for_multi_step_task.yaml
@@ -1,23 +1,104 @@
 interactions:
+- request:
+    body: '{"trace_id": "869cae2c-e863-4e17-b6c7-e9cf6ba8835d", "execution_type":
+      "crew", "user_identifier": null, "execution_context": {"crew_fingerprint": null,
+      "crew_name": "Unknown Crew", "flow_name": null, "crewai_version": "1.9.3", "privacy_level":
+      "standard"}, "execution_metadata": {"expected_duration_estimate": 300, "agent_count":
+      0, "task_count": 0, "flow_method_count": 0, "execution_started_at": "2026-02-05T22:35:59.859861+00:00"}}'
+    headers:
+      Accept:
+      - '*/*'
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '434'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - X-USER-AGENT-XXX
+      X-Crewai-Organization-Id:
+      - 3433f0ee-8a94-4aa4-822b-2ac71aa38b18
+      X-Crewai-Version:
+      - 1.9.3
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+    method: POST
+    uri: https://app.crewai.com/crewai_plus/api/v1/tracing/batches
+  response:
+    body:
+      string: '{"id":"d34854ac-4e95-420c-b08a-af182e63fc75","trace_id":"869cae2c-e863-4e17-b6c7-e9cf6ba8835d","execution_type":"crew","crew_name":"Unknown
+        Crew","flow_name":null,"status":"running","duration_ms":null,"crewai_version":"1.9.3","privacy_level":"standard","total_events":0,"execution_context":{"crew_fingerprint":null,"crew_name":"Unknown
+        Crew","flow_name":null,"crewai_version":"1.9.3","privacy_level":"standard"},"created_at":"2026-02-05T22:36:00.450Z","updated_at":"2026-02-05T22:36:00.450Z"}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '492'
+      Content-Type:
+      - application/json; charset=utf-8
+      Date:
+      - Thu, 05 Feb 2026 22:36:00 GMT
+      cache-control:
+      - no-store
+      content-security-policy:
+      - CSP-FILTERED
+      etag:
+      - ETAG-XXX
+      expires:
+      - '0'
+      permissions-policy:
+      - PERMISSIONS-POLICY-XXX
+      pragma:
+      - no-cache
+      referrer-policy:
+      - REFERRER-POLICY-XXX
+      strict-transport-security:
+      - STS-XXX
+      vary:
+      - Accept
+      x-content-type-options:
+      - X-CONTENT-TYPE-XXX
+      x-frame-options:
+      - X-FRAME-OPTIONS-XXX
+      x-permitted-cross-domain-policies:
+      - X-PERMITTED-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+      x-runtime:
+      - X-RUNTIME-XXX
+      x-xss-protection:
+      - X-XSS-PROTECTION-XXX
+    status:
+      code: 201
+      message: Created
 - request:
     body: '{"messages":[{"role":"system","content":"You are a strategic planning assistant.
       Create minimal, effective execution plans. Prefer fewer steps over more."},{"role":"user","content":"Create
       a focused execution plan for the following task:\n\n## Task\nCalculate the sum
       of the first 3 prime numbers, then multiply that result by 2. Show your work
       for each step.\n\n## Expected Output\nComplete the task successfully\n\n## Available
-      Tools\nNo tools available\n\n## Instructions\nCreate ONLY the essential steps
-      needed to complete this task. Use the MINIMUM number of steps required - do
-      NOT pad your plan with unnecessary steps. Most tasks need only 2-5 steps.\n\nFor
-      each step:\n- State the specific action to take\n- Specify which tool to use
-      (if any)\n\nDo NOT include:\n- Setup or preparation steps that are obvious\n-
-      Verification steps unless critical\n- Documentation or cleanup steps unless
-      explicitly required\n- Generic steps like \"review results\" or \"finalize output\"\n\nAfter
-      your plan, state:\n- \"READY: I am ready to execute the task.\" if the plan
-      is complete\n- \"NOT READY: I need to refine my plan because [reason].\" if
-      you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
-      or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The
-      detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether
-      the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}'
+      Tools\nNo tools available\n\n## Planning Principles\nFocus on WHAT needs to
+      be accomplished, not HOW. Group related actions into logical units. Fewer steps
+      = better. Most tasks need 3-6 steps. Hard limit: 10 steps.\n\n## Step Types
+      (only these are valid):\n1. **Tool Step**: Uses a tool to gather information
+      or take action\n2. **Output Step**: Synthesizes prior results into the final
+      deliverable (usually the last step)\n\n## Rules:\n- Each step must either USE
+      A TOOL or PRODUCE THE FINAL OUTPUT\n- Combine related tool calls: \"Research
+      A, B, and C\" = ONE step, not three\n- Combine all synthesis into ONE final
+      output step\n- NO standalone \"thinking\" steps (review, verify, confirm, refine,
+      analyze) - these happen naturally between steps\n\nFor each step: State the
+      action, specify the tool (if any), and note dependencies.\n\nAfter your plan,
+      state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
+      or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A
+      brief summary of the overall plan."},"steps":{"type":"array","description":"List
+      of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step
+      number (1-based)"},"description":{"type":"string","description":"What to do
+      in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to
+      use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step
+      numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether
+      the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}'
     headers:
       User-Agent:
       - X-USER-AGENT-XXX
@@ -30,7 +111,7 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '1636'
+      - '2410'
       content-type:
       - application/json
       host:
@@ -57,20 +138,26 @@ interactions:
     uri: https://api.openai.com/v1/chat/completions
   response:
     body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTWa7FxCHkHwHF25AYXXeJDBOuY\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078150,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62F62rSoHIF6DpZZFowcKaVmb8Iu\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330960,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
         \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"## Execution Plan\\n\\n1. Identify
-        the first 3 prime numbers: 2, 3, and 5.\\n2. Calculate the sum: \\\\(2 + 3
-        + 5 = 10\\\\).\\n3. Multiply the sum by 2: \\\\(10 \\\\times 2 = 20\\\\).\\n\\nREADY:
-        I am ready to execute the task.\",\n        \"refusal\": null,\n        \"annotations\":
-        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 299,\n    \"completion_tokens\":
-        74,\n    \"total_tokens\": 373,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
+        \           \"id\": \"call_bJJDEK5hizeG4PyxSUynX9x8\",\n            \"type\":
+        \"function\",\n            \"function\": {\n              \"name\": \"create_reasoning_plan\",\n
+        \             \"arguments\": \"{\\\"plan\\\":\\\"Calculate the sum of the
+        first 3 prime numbers and multiply that sum by 2.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Identify
+        the first 3 prime numbers (2, 3, 5).\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Calculate
+        the sum of the identified prime numbers (2 + 3 + 5).\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]},{\\\"step_number\\\":3,\\\"description\\\":\\\"Multiply
+        the sum by 2.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[2]},{\\\"step_number\\\":4,\\\"description\\\":\\\"Output
+        the final result.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[3]}],\\\"ready\\\":true}\"\n
+        \           }\n          }\n        ],\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"tool_calls\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 458,\n    \"completion_tokens\":
+        160,\n    \"total_tokens\": 618,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
         0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
         {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
         0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
     headers:
       CF-RAY:
       - CF-RAY-XXX
@@ -79,7 +166,7 @@ interactions:
       Content-Type:
       - application/json
       Date:
-      - Tue, 03 Feb 2026 00:22:32 GMT
+      - Thu, 05 Feb 2026 22:36:03 GMT
       Server:
       - cloudflare
       Set-Cookie:
@@ -99,7 +186,7 @@ interactions:
       openai-organization:
       - OPENAI-ORG-XXX
       openai-processing-ms:
-      - '1716'
+      - '2448'
       openai-project:
       - OPENAI-PROJECT-XXX
       openai-version:
@@ -125,10 +212,13 @@ interactions:
       message: OK
 - request:
     body: '{"messages":[{"role":"system","content":"You are Math Tutor. An expert
-      math tutor who breaks down problems step by step\nYour personal goal is: Solve
-      multi-step math problems accurately"},{"role":"user","content":"\nCurrent Task:
-      Calculate the sum of the first 3 prime numbers, then multiply that result by
-      2. Show your work for each step.\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
+      math tutor who breaks down problems step by step\n\nYour goal: Solve multi-step
+      math problems accurately\n\nYou are executing a specific step in a multi-step
+      plan. Focus ONLY on completing\nthe current step. Do not plan ahead or worry
+      about future steps.\n\nBefore acting, briefly reason about what you need to
+      do and which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
+      Current Step\nIdentify the first 3 prime numbers (2, 3, 5).\n\nComplete this
+      step and provide your result."}],"model":"gpt-4o-mini"}'
     headers:
       User-Agent:
       - X-USER-AGENT-XXX
@@ -141,7 +231,7 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '400'
+      - '622'
       content-type:
       - application/json
       cookie:
@@ -170,30 +260,24 @@ interactions:
     uri: https://api.openai.com/v1/chat/completions
   response:
     body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTYJgCZf2oY7wiPMZmN4QEQhHb5\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078152,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62F9RUZUkszp0yCWHbNJVTG16bx9\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330963,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
         \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"To solve the problem, let's break it
-        down into two main steps: \\n\\n1. Calculate the sum of the first 3 prime
-        numbers.\\n2. Multiply the result of that sum by 2.\\n\\n### Step 1: Identify
-        the first 3 prime numbers\\nPrime numbers are natural numbers greater than
-        1 that have no positive divisors other than 1 and themselves. \\n\\nThe first
-        three prime numbers are:\\n- 2\\n- 3\\n- 5\\n\\n### Step 2: Calculate the
-        sum of the first 3 prime numbers\\nNow, we add these prime numbers together:\\n\\n\\\\[\\n2
-        + 3 + 5\\n\\\\]\\n\\nCalculating this step-by-step:\\n- First, add 2 and 3:\\n
-        \ \\\\[\\n  2 + 3 = 5\\n  \\\\]\\n  \\n- Next, add this result to 5:\\n  \\\\[\\n
-        \ 5 + 5 = 10\\n  \\\\]\\n\\nSo, the sum of the first 3 prime numbers is \\\\(10\\\\).\\n\\n###
-        Step 3: Multiply the sum by 2\\nNext, we take the sum we calculated and multiply
-        it by 2:\\n\\n\\\\[\\n10 \\\\times 2\\n\\\\]\\n\\nCalculating this:\\n\\\\[\\n10
-        \\\\times 2 = 20\\n\\\\]\\n\\n### Final Answer\\nThus, the final result obtained
-        after performing all the steps is:\\n\\n\\\\[\\n\\\\boxed{20}\\n\\\\]\",\n
-        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
-        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-        74,\n    \"completion_tokens\": 288,\n    \"total_tokens\": 362,\n    \"prompt_tokens_details\":
-        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        \"assistant\",\n        \"content\": \"To identify the first three prime numbers,
+        we need to recall the definition of a prime number: it is a natural number
+        greater than 1 that has no positive divisors other than 1 and itself. \\n\\nStarting
+        from 2, we find:\\n1. The number **2** is prime (divisors are 1 and 2).\\n2.
+        The number **3** is prime (divisors are 1 and 3).\\n3. The number **4** is
+        not prime (divisors are 1, 2, and 4).\\n4. The number **5** is prime (divisors
+        are 1 and 5).\\n\\nThus, the first three prime numbers are **2, 3, and 5**.
+        \\n\\nResult: 2, 3, 5.\",\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 123,\n    \"completion_tokens\":
+        166,\n    \"total_tokens\": 289,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
         {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
         0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
     headers:
       CF-RAY:
       - CF-RAY-XXX
@@ -202,7 +286,7 @@ interactions:
       Content-Type:
       - application/json
       Date:
-      - Tue, 03 Feb 2026 00:22:37 GMT
+      - Thu, 05 Feb 2026 22:36:06 GMT
       Server:
       - cloudflare
       Strict-Transport-Security:
@@ -220,7 +304,444 @@ interactions:
       openai-organization:
       - OPENAI-ORG-XXX
       openai-processing-ms:
-      - '4751'
+      - '3090'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
+      observing execution progress. After each step completes, you analyze what happened
+      and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
+      about:\\n1. What new information was learned from this step's result\\n2. Whether
+      the remaining steps still make sense given this new information\\n3. What refinements,
+      if any, are needed for upcoming steps\\n4. Whether the overall goal has already
+      been achieved\\n\\nBe conservative about triggering full replans \u2014 only
+      do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
+      Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription:
+      Identify the first 3 prime numbers (2, 3, 5).\\nResult: To identify the first
+      three prime numbers, we need to recall the definition of a prime number: it
+      is a natural number greater than 1 that has no positive divisors other than
+      1 and itself. \\n\\nStarting from 2, we find:\\n1. The number **2** is prime
+      (divisors are 1 and 2).\\n2. The number **3** is prime (divisors are 1 and 3).\\n3.
+      The number **4** is not prime (divisors are 1, 2, and 4).\\n4. The number **5**
+      is prime (divisors are 1 and 5).\\n\\nThus, the first three prime numbers are
+      **2, 3, and 5**. \\n\\nResult: 2, 3, 5.\\n\\n## Remaining plan steps:\\n  Step
+      2: Calculate the sum of the identified prime numbers (2 + 3 + 5).\\n  Step 3:
+      Multiply the sum by 2.\\n  Step 4: Output the final result.\\n\\nAnalyze this
+      step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
+      observation after a step execution completes.\\n\\nReturned by the PlannerObserver
+      after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
+      whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
+      3.3): the Planner observes what the Executor\\ndid and incorporates new information
+      into the remaining plan.\\n\\nAttributes:\\n    step_completed_successfully:
+      Whether the step achieved its objective.\\n    key_information_learned: New
+      information revealed by this step\\n        (e.g., \\\"Found 3 products: A,
+      B, C\\\"). Used to refine upcoming steps.\\n    remaining_plan_still_valid:
+      Whether pending todos still make sense\\n        given the new information.
+      True does NOT mean no refinement needed.\\n    suggested_refinements: Minor
+      tweaks to upcoming step descriptions.\\n        These are lightweight in-place
+      updates, not a full replan.\\n        Example: [\\\"Step 3 should select product
+      B instead of 'best product'\\\"]\\n    needs_full_replan: The remaining plan
+      is fundamentally wrong and must\\n        be regenerated from scratch. Mutually
+      exclusive with\\n        remaining_plan_still_valid (if this is True, that should
+      be False).\\n    replan_reason: Explanation of why a full replan is needed (None
+      if not).\\n    goal_already_achieved: The overall task goal has been satisfied
+      early.\\n        No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
+      the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
+      new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
+      the remaining pending todos still make sense given new information\",\"title\":\"Remaining
+      Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
+      tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
+      Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
+      remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
+      Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
+      of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
+      overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
+      Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '4561'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-helper-method:
+      - beta.chat.completions.parse
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FCKhhkyZ4k2uH2KyhxsGnWEM7R\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330966,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The
+        first three prime numbers have been correctly identified as 2, 3, and 5.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":false}\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        960,\n    \"completion_tokens\": 72,\n    \"total_tokens\": 1032,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:07 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1058'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Math Tutor. An expert
+      math tutor who breaks down problems step by step\n\nYour goal: Solve multi-step
+      math problems accurately\n\nYou are executing a specific step in a multi-step
+      plan. Focus ONLY on completing\nthe current step. Do not plan ahead or worry
+      about future steps.\n\nBefore acting, briefly reason about what you need to
+      do and which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
+      Current Step\nCalculate the sum of the identified prime numbers (2 + 3 + 5).\n\n##
+      Context from previous steps:\nStep 1 result: To identify the first three prime
+      numbers, we need to recall the definition of a prime number: it is a natural
+      number greater than 1 that has no positive divisors other than 1 and itself.
+      \n\nStarting from 2, we find:\n1. The number **2** is prime (divisors are 1
+      and 2).\n2. The number **3** is prime (divisors are 1 and 3).\n3. The number
+      **4** is not prime (divisors are 1, 2, and 4).\n4. The number **5** is prime
+      (divisors are 1 and 5).\n\nThus, the first three prime numbers are **2, 3, and
+      5**. \n\nResult: 2, 3, 5.\n\nComplete this step and provide your result."}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1213'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FDWh7MhYTKIsLCnq6r5iXrbdrN\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330967,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"To calculate the sum of the identified
+        prime numbers (2 + 3 + 5), I will follow these steps:\\n\\n1. Add the first
+        two prime numbers: \\n   - \\\\( 2 + 3 = 5 \\\\)\\n\\n2. Then, add the result
+        to the third prime number:\\n   - \\\\( 5 + 5 = 10 \\\\)\\n\\nSo the sum of
+        the identified prime numbers (2 + 3 + 5) is **10**.\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        301,\n    \"completion_tokens\": 95,\n    \"total_tokens\": 396,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:09 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1470'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
+      observing execution progress. After each step completes, you analyze what happened
+      and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
+      about:\\n1. What new information was learned from this step's result\\n2. Whether
+      the remaining steps still make sense given this new information\\n3. What refinements,
+      if any, are needed for upcoming steps\\n4. Whether the overall goal has already
+      been achieved\\n\\nBe conservative about triggering full replans \u2014 only
+      do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
+      Original task\\n\\n\\n## Expected output\\n\\n\\n## Previously completed steps:\\n
+      \ Step 1: Identify the first 3 prime numbers (2, 3, 5).\\n    Result: To identify
+      the first three prime numbers, we need to recall the definition of a prime number:
+      it is a natural number greater than 1 that has no positive divisors other than
+      1 and itself. \\n\\nStarting f\\n\\n## Just completed step 2\\nDescription:
+      Calculate the sum of the identified prime numbers (2 + 3 + 5).\\nResult: To
+      calculate the sum of the identified prime numbers (2 + 3 + 5), I will follow
+      these steps:\\n\\n1. Add the first two prime numbers: \\n   - \\\\( 2 + 3 =
+      5 \\\\)\\n\\n2. Then, add the result to the third prime number:\\n   - \\\\(
+      5 + 5 = 10 \\\\)\\n\\nSo the sum of the identified prime numbers (2 + 3 + 5)
+      is **10**.\\n\\n## Remaining plan steps:\\n  Step 3: Multiply the sum by 2.\\n
+      \ Step 4: Output the final result.\\n\\nAnalyze this step's result and provide
+      your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
+      observation after a step execution completes.\\n\\nReturned by the PlannerObserver
+      after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
+      whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
+      3.3): the Planner observes what the Executor\\ndid and incorporates new information
+      into the remaining plan.\\n\\nAttributes:\\n    step_completed_successfully:
+      Whether the step achieved its objective.\\n    key_information_learned: New
+      information revealed by this step\\n        (e.g., \\\"Found 3 products: A,
+      B, C\\\"). Used to refine upcoming steps.\\n    remaining_plan_still_valid:
+      Whether pending todos still make sense\\n        given the new information.
+      True does NOT mean no refinement needed.\\n    suggested_refinements: Minor
+      tweaks to upcoming step descriptions.\\n        These are lightweight in-place
+      updates, not a full replan.\\n        Example: [\\\"Step 3 should select product
+      B instead of 'best product'\\\"]\\n    needs_full_replan: The remaining plan
+      is fundamentally wrong and must\\n        be regenerated from scratch. Mutually
+      exclusive with\\n        remaining_plan_still_valid (if this is True, that should
+      be False).\\n    replan_reason: Explanation of why a full replan is needed (None
+      if not).\\n    goal_already_achieved: The overall task goal has been satisfied
+      early.\\n        No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
+      the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
+      new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
+      the remaining pending todos still make sense given new information\",\"title\":\"Remaining
+      Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
+      tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
+      Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
+      remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
+      Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
+      of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
+      overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
+      Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '4591'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-helper-method:
+      - beta.chat.completions.parse
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FFIa3JdCnNkh6sa0wz28i55ni1\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330969,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"{\\\"step_completed_successfully\\\":false,\\\"key_information_learned\\\":\\\"The
+        calculation for the sum of the identified prime numbers was incorrect; it
+        should be 2 + 3 + 5 = 10, but there was a typo where the last addition was
+        mistakenly written as 5 + 5 instead of 5 + 2.\\\",\\\"remaining_plan_still_valid\\\":false,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":true,\\\"replan_reason\\\":\\\"The
+        remaining steps are based on an incorrect sum, making them invalid. The calculations
+        must be restarted from the correct determination of the sum of the prime numbers.\\\",\\\"goal_already_achieved\\\":false}\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        942,\n    \"completion_tokens\": 135,\n    \"total_tokens\": 1077,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:11 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '2300'
       openai-project:
       - OPENAI-PROJECT-XXX
       openai-version:
diff --git a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_disabled_skips_planning.yaml b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_disabled_skips_planning.yaml
index 88617c427..930d5134f 100644
--- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_disabled_skips_planning.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_disabled_skips_planning.yaml
@@ -42,17 +42,17 @@ interactions:
     uri: https://api.openai.com/v1/chat/completions
   response:
     body:
-      string: "{\n  \"id\": \"chatcmpl-D4yXGD5IrieoUDSK5hDmJyA2gJtDc\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078382,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FLMJF1jiuD18qhDDxWFYzJxWk3\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330975,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
         \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"5 + 5 equals 10.\",\n        \"refusal\":
+        \"assistant\",\n        \"content\": \"The sum of 5 + 5 is 10.\",\n        \"refusal\":
         null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
         \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-        47,\n    \"completion_tokens\": 8,\n    \"total_tokens\": 55,\n    \"prompt_tokens_details\":
+        47,\n    \"completion_tokens\": 12,\n    \"total_tokens\": 59,\n    \"prompt_tokens_details\":
         {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
         {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
         0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
     headers:
       CF-RAY:
       - CF-RAY-XXX
@@ -61,7 +61,7 @@ interactions:
       Content-Type:
       - application/json
       Date:
-      - Tue, 03 Feb 2026 00:26:23 GMT
+      - Thu, 05 Feb 2026 22:36:16 GMT
       Server:
       - cloudflare
       Set-Cookie:
@@ -81,7 +81,113 @@ interactions:
       openai-organization:
       - OPENAI-ORG-XXX
       openai-processing-ms:
-      - '363'
+      - '342'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
+      assistant\nYour personal goal is: Help solve simple math problems"},{"role":"user","content":"\nCurrent
+      Task: What is 5 + 5?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '260'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FM3zRv6CP5jgOiAWIaTukuPjwP\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330976,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"5 + 5 equals 10.\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        47,\n    \"completion_tokens\": 8,\n    \"total_tokens\": 55,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:16 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '488'
       openai-project:
       - OPENAI-PROJECT-XXX
       openai-version:
diff --git a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_handles_sequential_dependency_task.yaml b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_handles_sequential_dependency_task.yaml
index 35714d2cf..b0e7e4883 100644
--- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_handles_sequential_dependency_task.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_handles_sequential_dependency_task.yaml
@@ -5,18 +5,25 @@ interactions:
       a focused execution plan for the following task:\n\n## Task\nConvert 100 degrees
       Celsius to Fahrenheit, then round the result to the nearest 10.\n\n## Expected
       Output\nComplete the task successfully\n\n## Available Tools\nNo tools available\n\n##
-      Instructions\nCreate ONLY the essential steps needed to complete this task.
-      Use the MINIMUM number of steps required - do NOT pad your plan with unnecessary
-      steps. Most tasks need only 2-5 steps.\n\nFor each step:\n- State the specific
-      action to take\n- Specify which tool to use (if any)\n\nDo NOT include:\n- Setup
-      or preparation steps that are obvious\n- Verification steps unless critical\n-
-      Documentation or cleanup steps unless explicitly required\n- Generic steps like
-      \"review results\" or \"finalize output\"\n\nAfter your plan, state:\n- \"READY:
-      I am ready to execute the task.\" if the plan is complete\n- \"NOT READY: I
-      need to refine my plan because [reason].\" if you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
-      or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The
-      detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether
-      the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}'
+      Planning Principles\nFocus on WHAT needs to be accomplished, not HOW. Group
+      related actions into logical units. Fewer steps = better. Most tasks need 3-6
+      steps. Hard limit: 10 steps.\n\n## Step Types (only these are valid):\n1. **Tool
+      Step**: Uses a tool to gather information or take action\n2. **Output Step**:
+      Synthesizes prior results into the final deliverable (usually the last step)\n\n##
+      Rules:\n- Each step must either USE A TOOL or PRODUCE THE FINAL OUTPUT\n- Combine
+      related tool calls: \"Research A, B, and C\" = ONE step, not three\n- Combine
+      all synthesis into ONE final output step\n- NO standalone \"thinking\" steps
+      (review, verify, confirm, refine, analyze) - these happen naturally between
+      steps\n\nFor each step: State the action, specify the tool (if any), and note
+      dependencies.\n\nAfter your plan, state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
+      or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A
+      brief summary of the overall plan."},"steps":{"type":"array","description":"List
+      of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step
+      number (1-based)"},"description":{"type":"string","description":"What to do
+      in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to
+      use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step
+      numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether
+      the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}'
     headers:
       User-Agent:
       - X-USER-AGENT-XXX
@@ -29,7 +36,7 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '1610'
+      - '2384'
       content-type:
       - application/json
       host:
@@ -56,20 +63,25 @@ interactions:
     uri: https://api.openai.com/v1/chat/completions
   response:
     body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTN8fHOefyzzhvdUOHjxdFDR2HW\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078141,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FU2te4ww3DuIzbuySwWTIPTx6A\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330984,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
         \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"## Execution Plan\\n\\n1. Convert 100
-        degrees Celsius to Fahrenheit using the formula: \\\\( F = C \\\\times \\\\frac{9}{5}
-        + 32 \\\\).\\n2. Round the Fahrenheit result to the nearest 10.\\n\\nREADY:
-        I am ready to execute the task.\",\n        \"refusal\": null,\n        \"annotations\":
-        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 291,\n    \"completion_tokens\":
-        58,\n    \"total_tokens\": 349,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
+        \           \"id\": \"call_0LXFaxnsqT2kFmUyanui30k0\",\n            \"type\":
+        \"function\",\n            \"function\": {\n              \"name\": \"create_reasoning_plan\",\n
+        \             \"arguments\": \"{\\\"plan\\\":\\\"Convert 100 degrees Celsius
+        to Fahrenheit and round the result to the nearest 10.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Convert
+        100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Round
+        the Fahrenheit result to the nearest 10.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]},{\\\"step_number\\\":3,\\\"description\\\":\\\"Output
+        the final rounded temperature in Fahrenheit.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[2]}],\\\"ready\\\":true}\"\n
+        \           }\n          }\n        ],\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"tool_calls\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 450,\n    \"completion_tokens\":
+        133,\n    \"total_tokens\": 583,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
         0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
         {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
         0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
     headers:
       CF-RAY:
       - CF-RAY-XXX
@@ -78,7 +90,7 @@ interactions:
       Content-Type:
       - application/json
       Date:
-      - Tue, 03 Feb 2026 00:22:22 GMT
+      - Thu, 05 Feb 2026 22:36:26 GMT
       Server:
       - cloudflare
       Set-Cookie:
@@ -98,7 +110,7 @@ interactions:
       openai-organization:
       - OPENAI-ORG-XXX
       openai-processing-ms:
-      - '1089'
+      - '1976'
       openai-project:
       - OPENAI-PROJECT-XXX
       openai-version:
@@ -124,10 +136,13 @@ interactions:
       message: OK
 - request:
     body: '{"messages":[{"role":"system","content":"You are Unit Converter. A precise
-      unit conversion specialist\nYour personal goal is: Accurately convert between
-      units and apply transformations"},{"role":"user","content":"\nCurrent Task:
-      Convert 100 degrees Celsius to Fahrenheit, then round the result to the nearest
-      10.\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
+      unit conversion specialist\n\nYour goal: Accurately convert between units and
+      apply transformations\n\nYou are executing a specific step in a multi-step plan.
+      Focus ONLY on completing\nthe current step. Do not plan ahead or worry about
+      future steps.\n\nBefore acting, briefly reason about what you need to do and
+      which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
+      Current Step\nConvert 100 degrees Celsius to Fahrenheit using the formula (C
+      * 9/5) + 32.\n\nComplete this step and provide your result."}],"model":"gpt-4o-mini"}'
     headers:
       User-Agent:
       - X-USER-AGENT-XXX
@@ -140,7 +155,7 @@ interactions:
       connection:
       - keep-alive
       content-length:
-      - '373'
+      - '651'
       content-type:
       - application/json
       cookie:
@@ -169,26 +184,21 @@ interactions:
     uri: https://api.openai.com/v1/chat/completions
   response:
     body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTPQewXDyPdYHI4dHPH7YGHcRge\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078143,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FWhREtHEudJMFFypgh33C8GLdH\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330986,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
         \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"To convert degrees Celsius to Fahrenheit,
-        you can use the formula:\\n\\n\\\\[ F = \\\\left( C \\\\times \\\\frac{9}{5}
-        \\\\right) + 32 \\\\]\\n\\nPlugging in 100 degrees Celsius:\\n\\n\\\\[ F =
-        \\\\left( 100 \\\\times \\\\frac{9}{5} \\\\right) + 32 \\\\]\\n\\nCalculating
-        that step-by-step:\\n\\n1. Multiply 100 by 9: \\n   \\\\[ 100 \\\\times 9
-        = 900 \\\\]\\n\\n2. Divide by 5:\\n   \\\\[ 900 \\\\div 5 = 180 \\\\]\\n\\n3.
-        Add 32:\\n   \\\\[ 180 + 32 = 212 \\\\]\\n\\nSo, 100 degrees Celsius is equal
-        to 212 degrees Fahrenheit.\\n\\nNow, rounding 212 to the nearest 10:\\n\\nThe
-        nearest multiple of 10 to 212 is 210.\\n\\nTherefore, the final result is
-        **210 degrees Fahrenheit**.\",\n        \"refusal\": null,\n        \"annotations\":
+        \"assistant\",\n        \"content\": \"To convert 100 degrees Celsius to Fahrenheit
+        using the formula (C * 9/5) + 32, we substitute C with 100:\\n\\n1. Multiply
+        100 by 9/5:\\n   \\\\[ 100 * \\\\frac{9}{5} = 100 * 1.8 = 180 \\\\]\\n\\n2.
+        Then add 32:\\n   \\\\[ 180 + 32 = 212 \\\\]\\n\\nTherefore, 100 degrees Celsius
+        is equal to 212 degrees Fahrenheit.\",\n        \"refusal\": null,\n        \"annotations\":
         []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 63,\n    \"completion_tokens\":
-        191,\n    \"total_tokens\": 254,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 126,\n    \"completion_tokens\":
+        101,\n    \"total_tokens\": 227,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
         0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
         {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
         0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
     headers:
       CF-RAY:
       - CF-RAY-XXX
@@ -197,7 +207,7 @@ interactions:
       Content-Type:
       - application/json
       Date:
-      - Tue, 03 Feb 2026 00:22:26 GMT
+      - Thu, 05 Feb 2026 22:36:27 GMT
       Server:
       - cloudflare
       Strict-Transport-Security:
@@ -215,7 +225,548 @@ interactions:
       openai-organization:
       - OPENAI-ORG-XXX
       openai-processing-ms:
-      - '3736'
+      - '1505'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
+      observing execution progress. After each step completes, you analyze what happened
+      and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
+      about:\\n1. What new information was learned from this step's result\\n2. Whether
+      the remaining steps still make sense given this new information\\n3. What refinements,
+      if any, are needed for upcoming steps\\n4. Whether the overall goal has already
+      been achieved\\n\\nBe conservative about triggering full replans \u2014 only
+      do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
+      Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription:
+      Convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32.\\nResult:
+      To convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32,
+      we substitute C with 100:\\n\\n1. Multiply 100 by 9/5:\\n   \\\\[ 100 * \\\\frac{9}{5}
+      = 100 * 1.8 = 180 \\\\]\\n\\n2. Then add 32:\\n   \\\\[ 180 + 32 = 212 \\\\]\\n\\nTherefore,
+      100 degrees Celsius is equal to 212 degrees Fahrenheit.\\n\\n## Remaining plan
+      steps:\\n  Step 2: Round the Fahrenheit result to the nearest 10.\\n  Step 3:
+      Output the final rounded temperature in Fahrenheit.\\n\\nAnalyze this step's
+      result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
+      observation after a step execution completes.\\n\\nReturned by the PlannerObserver
+      after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
+      whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
+      3.3): the Planner observes what the Executor\\ndid and incorporates new information
+      into the remaining plan.\\n\\nAttributes:\\n    step_completed_successfully:
+      Whether the step achieved its objective.\\n    key_information_learned: New
+      information revealed by this step\\n        (e.g., \\\"Found 3 products: A,
+      B, C\\\"). Used to refine upcoming steps.\\n    remaining_plan_still_valid:
+      Whether pending todos still make sense\\n        given the new information.
+      True does NOT mean no refinement needed.\\n    suggested_refinements: Minor
+      tweaks to upcoming step descriptions.\\n        These are lightweight in-place
+      updates, not a full replan.\\n        Example: [\\\"Step 3 should select product
+      B instead of 'best product'\\\"]\\n    needs_full_replan: The remaining plan
+      is fundamentally wrong and must\\n        be regenerated from scratch. Mutually
+      exclusive with\\n        remaining_plan_still_valid (if this is True, that should
+      be False).\\n    replan_reason: Explanation of why a full replan is needed (None
+      if not).\\n    goal_already_achieved: The overall task goal has been satisfied
+      early.\\n        No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
+      the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
+      new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
+      the remaining pending todos still make sense given new information\",\"title\":\"Remaining
+      Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
+      tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
+      Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
+      remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
+      Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
+      of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
+      overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
+      Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '4342'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-helper-method:
+      - beta.chat.completions.parse
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FXF5UZlLp9eu5O7HsZvIvpC4My\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330987,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"Successfully
+        converted 100 degrees Celsius to 212 degrees Fahrenheit.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":[\\\"Step
+        2 should round 212 to the nearest 10, resulting in 210.\\\"],\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":false}\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        885,\n    \"completion_tokens\": 81,\n    \"total_tokens\": 966,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:29 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '2195'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are refining upcoming plan
+      steps based on new information. Update the step descriptions to be more specific
+      and actionable given what was learned. Keep the same step numbers.\n\nRespond
+      with one line per step in the format:\nStep N: <refined description>"},{"role":"user","content":"##
+      New information learned\nSuccessfully converted 100 degrees Celsius to 212 degrees
+      Fahrenheit.\n\n## Suggested refinements\nStep 2 should round 212 to the nearest
+      10, resulting in 210.\n\n## Current pending steps\nStep 2: Round the Fahrenheit
+      result to the nearest 10.\nStep 3: Output the final rounded temperature in Fahrenheit.\n\nUpdate
+      the step descriptions to incorporate the new information."}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '754'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FaclC9tg2ClH7HU3pfMzmlPJpB\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330990,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"Step 2: Round the Fahrenheit result
+        of 212 degrees to the nearest 10, resulting in 210 degrees.  \\nStep 3: Output
+        the final rounded temperature as 210 degrees Fahrenheit.\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        142,\n    \"completion_tokens\": 40,\n    \"total_tokens\": 182,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:30 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '706'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Unit Converter. A precise
+      unit conversion specialist\n\nYour goal: Accurately convert between units and
+      apply transformations\n\nYou are executing a specific step in a multi-step plan.
+      Focus ONLY on completing\nthe current step. Do not plan ahead or worry about
+      future steps.\n\nBefore acting, briefly reason about what you need to do and
+      which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
+      Current Step\nRound the Fahrenheit result of 212 degrees to the nearest 10,
+      resulting in 210 degrees.\n\n## Context from previous steps:\nStep 1 result:
+      To convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32,
+      we substitute C with 100:\n\n1. Multiply 100 by 9/5:\n   \\[ 100 * \\frac{9}{5}
+      = 100 * 1.8 = 180 \\]\n\n2. Then add 32:\n   \\[ 180 + 32 = 212 \\]\n\nTherefore,
+      100 degrees Celsius is equal to 212 degrees Fahrenheit.\n\nComplete this step
+      and provide your result."}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1011'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62Fb9PlGlUIcZRS2v2Lp9S62brRP\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330991,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"To complete this step, I will round
+        the Fahrenheit result of 212 degrees to the nearest 10. When rounding, since
+        212 is closer to 210 than it is to 220, I will round it down to 210 degrees.\\n\\nResult:
+        210 degrees Fahrenheit.\",\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 236,\n    \"completion_tokens\":
+        56,\n    \"total_tokens\": 292,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:32 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1187'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
+      observing execution progress. After each step completes, you analyze what happened
+      and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
+      about:\\n1. What new information was learned from this step's result\\n2. Whether
+      the remaining steps still make sense given this new information\\n3. What refinements,
+      if any, are needed for upcoming steps\\n4. Whether the overall goal has already
+      been achieved\\n\\nBe conservative about triggering full replans \u2014 only
+      do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
+      Original task\\n\\n\\n## Expected output\\n\\n\\n## Previously completed steps:\\n
+      \ Step 1: Convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5)
+      + 32.\\n    Result: To convert 100 degrees Celsius to Fahrenheit using the formula
+      (C * 9/5) + 32, we substitute C with 100:\\n\\n1. Multiply 100 by 9/5:\\n   \\\\[
+      100 * \\\\frac{9}{5} = 100 * 1.8 = 180 \\\\]\\n\\n2. Then add 32:\\n   \\\\[
+      18\\n\\n## Just completed step 2\\nDescription: Round the Fahrenheit result
+      of 212 degrees to the nearest 10, resulting in 210 degrees.\\nResult: To complete
+      this step, I will round the Fahrenheit result of 212 degrees to the nearest
+      10. When rounding, since 212 is closer to 210 than it is to 220, I will round
+      it down to 210 degrees.\\n\\nResult: 210 degrees Fahrenheit.\\n\\n## Remaining
+      plan steps:\\n  Step 3: Output the final rounded temperature as 210 degrees
+      Fahrenheit.\\n\\nAnalyze this step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
+      observation after a step execution completes.\\n\\nReturned by the PlannerObserver
+      after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
+      whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
+      3.3): the Planner observes what the Executor\\ndid and incorporates new information
+      into the remaining plan.\\n\\nAttributes:\\n    step_completed_successfully:
+      Whether the step achieved its objective.\\n    key_information_learned: New
+      information revealed by this step\\n        (e.g., \\\"Found 3 products: A,
+      B, C\\\"). Used to refine upcoming steps.\\n    remaining_plan_still_valid:
+      Whether pending todos still make sense\\n        given the new information.
+      True does NOT mean no refinement needed.\\n    suggested_refinements: Minor
+      tweaks to upcoming step descriptions.\\n        These are lightweight in-place
+      updates, not a full replan.\\n        Example: [\\\"Step 3 should select product
+      B instead of 'best product'\\\"]\\n    needs_full_replan: The remaining plan
+      is fundamentally wrong and must\\n        be regenerated from scratch. Mutually
+      exclusive with\\n        remaining_plan_still_valid (if this is True, that should
+      be False).\\n    replan_reason: Explanation of why a full replan is needed (None
+      if not).\\n    goal_already_achieved: The overall task goal has been satisfied
+      early.\\n        No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
+      the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
+      new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
+      the remaining pending todos still make sense given new information\",\"title\":\"Remaining
+      Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
+      tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
+      Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
+      remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
+      Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
+      of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
+      overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
+      Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '4579'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-helper-method:
+      - beta.chat.completions.parse
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FctLDvklBSvOY641JCvwFaTugO\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330992,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"Successfully
+        rounded the Fahrenheit result of 212 degrees down to 210 degrees.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":true}\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        941,\n    \"completion_tokens\": 67,\n    \"total_tokens\": 1008,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:33 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1208'
       openai-project:
       - OPENAI-PROJECT-XXX
       openai-version: