feat: introduce PlannerObserver and StepExecutor for enhanced plan execution

This commit adds the PlannerObserver and StepExecutor classes to the CrewAI framework, implementing the observation phase of the Plan-and-Execute architecture. The PlannerObserver analyzes step execution results, determines plan validity, and suggests refinements, while the StepExecutor executes individual todo items in isolation. These additions improve the overall planning and execution process, allowing for more dynamic and responsive agent behavior. Additionally, new observation events have been defined to facilitate monitoring and logging of the planning process.
2026-04-30 23:02:50 +00:00 · 2026-02-05 15:46:21 -08:00
parent 81d9fd4ab3
commit 8e1474d371
16 changed files with 4120 additions and 360 deletions
--- a/lib/crewai/src/crewai/agents/planner_observer.py
+++ b/lib/crewai/src/crewai/agents/planner_observer.py
@@ -0,0 +1,355 @@
+"""PlannerObserver: Observation phase after each step execution.
+
+Implements the "Observe" phase from PLAN-AND-ACT (Section 3.3). After every
+step execution, the Planner analyzes what happened, what new information was
+learned, and whether the remaining plan is still valid.
+
+This is NOT an error detector — it runs on every step, including successes,
+to incorporate runtime observations into the remaining plan.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING, Any
+
+from crewai.events.event_bus import crewai_event_bus
+from crewai.events.types.observation_events import (
+    StepObservationCompletedEvent,
+    StepObservationFailedEvent,
+    StepObservationStartedEvent,
+)
+from crewai.utilities.llm_utils import create_llm
+from crewai.utilities.planning_types import StepObservation, TodoItem
+from crewai.utilities.types import LLMMessage
+
+
+if TYPE_CHECKING:
+    from crewai.agent import Agent
+    from crewai.task import Task
+
+logger = logging.getLogger(__name__)
+
+
+class PlannerObserver:
+    """Observes step execution results and decides on plan continuation.
+
+    After EVERY step execution, this class:
+    1. Analyzes what the step accomplished
+    2. Identifies new information learned
+    3. Decides if the remaining plan is still valid
+    4. Suggests lightweight refinements or triggers full replanning
+
+    LLM resolution (magical fallback):
+    - If ``agent.planning_config.llm`` is explicitly set → use that
+    - Otherwise → fall back to ``agent.llm`` (same LLM for everything)
+
+    Args:
+        agent: The agent instance (for LLM resolution and config).
+        task: Optional task context (for description and expected output).
+    """
+
+    def __init__(self, agent: Agent, task: Task | None = None) -> None:
+        self.agent = agent
+        self.task = task
+        self.llm = self._resolve_llm()
+
+    def _resolve_llm(self) -> Any:
+        """Resolve which LLM to use for observation/planning.
+
+        Mirrors AgentReasoning._resolve_llm(): uses planning_config.llm
+        if explicitly set, otherwise falls back to agent.llm.
+
+        Returns:
+            The resolved LLM instance.
+        """
+        from crewai.llm import LLM
+
+        config = getattr(self.agent, "planning_config", None)
+        if config is not None and config.llm is not None:
+            if isinstance(config.llm, LLM):
+                return config.llm
+            return create_llm(config.llm)
+        return self.agent.llm
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def observe(
+        self,
+        completed_step: TodoItem,
+        result: str,
+        all_completed: list[TodoItem],
+        remaining_todos: list[TodoItem],
+    ) -> StepObservation:
+        """Observe a step's result and decide on plan continuation.
+
+        This runs after EVERY step execution — not just failures.
+
+        Args:
+            completed_step: The todo item that was just executed.
+            result: The final result string from the step.
+            all_completed: All previously completed todos (for context).
+            remaining_todos: The pending todos still in the plan.
+
+        Returns:
+            StepObservation with the Planner's analysis.
+        """
+        agent_role = self.agent.role if self.agent else "unknown"
+
+        # Emit observation started event
+        crewai_event_bus.emit(
+            self.agent,
+            event=StepObservationStartedEvent(
+                agent_role=agent_role,
+                step_number=completed_step.step_number,
+                step_description=completed_step.description,
+                from_task=self.task,
+                from_agent=self.agent,
+            ),
+        )
+
+        messages = self._build_observation_messages(
+            completed_step, result, all_completed, remaining_todos
+        )
+
+        try:
+            response = self.llm.call(
+                messages,
+                response_model=StepObservation,
+                from_task=self.task,
+                from_agent=self.agent,
+            )
+
+            if isinstance(response, StepObservation):
+                observation = response
+            else:
+                # If the LLM returned raw text instead of structured output,
+                # parse it conservatively
+                observation = StepObservation(
+                    step_completed_successfully=True,
+                    key_information_learned=str(response) if response else "",
+                    remaining_plan_still_valid=True,
+                )
+
+            # Emit observation completed event
+            crewai_event_bus.emit(
+                self.agent,
+                event=StepObservationCompletedEvent(
+                    agent_role=agent_role,
+                    step_number=completed_step.step_number,
+                    step_description=completed_step.description,
+                    step_completed_successfully=observation.step_completed_successfully,
+                    key_information_learned=observation.key_information_learned,
+                    remaining_plan_still_valid=observation.remaining_plan_still_valid,
+                    needs_full_replan=observation.needs_full_replan,
+                    replan_reason=observation.replan_reason,
+                    goal_already_achieved=observation.goal_already_achieved,
+                    suggested_refinements=observation.suggested_refinements,
+                    from_task=self.task,
+                    from_agent=self.agent,
+                ),
+            )
+
+            return observation
+
+        except Exception as e:
+            logger.warning(f"Observation LLM call failed: {e}. Defaulting to continue.")
+
+            # Emit observation failed event
+            crewai_event_bus.emit(
+                self.agent,
+                event=StepObservationFailedEvent(
+                    agent_role=agent_role,
+                    step_number=completed_step.step_number,
+                    step_description=completed_step.description,
+                    error=str(e),
+                    from_task=self.task,
+                    from_agent=self.agent,
+                ),
+            )
+
+            return StepObservation(
+                step_completed_successfully=True,
+                key_information_learned="",
+                remaining_plan_still_valid=True,
+            )
+
+    def refine_todos(
+        self,
+        observation: StepObservation,
+        remaining_todos: list[TodoItem],
+    ) -> list[TodoItem]:
+        """Refine pending todo descriptions based on observation.
+
+        This is a LIGHTWEIGHT operation — no full replan. It updates the
+        description field of pending todos based on new information learned.
+
+        Example: Step 1 found "3 products: A, B, C" → Step 2 changes from
+        "Select the best product" to "Select product B (highest rated)"
+
+        Args:
+            observation: The observation with suggested refinements.
+            remaining_todos: The pending todos to refine.
+
+        Returns:
+            The refined todo list (same objects, updated descriptions).
+        """
+        if not observation.suggested_refinements:
+            return remaining_todos
+
+        # Ask the LLM to apply the refinements to the todo descriptions
+        messages = self._build_refinement_messages(observation, remaining_todos)
+
+        try:
+            response = self.llm.call(
+                messages,
+                from_task=self.task,
+                from_agent=self.agent,
+            )
+
+            if response:
+                # Parse the LLM's refined descriptions and apply them
+                self._apply_refinements(str(response), remaining_todos)
+
+        except Exception as e:
+            logger.warning(
+                f"Refinement LLM call failed: {e}. Keeping original descriptions."
+            )
+
+        return remaining_todos
+
+    # ------------------------------------------------------------------
+    # Internal: Message building
+    # ------------------------------------------------------------------
+
+    def _build_observation_messages(
+        self,
+        completed_step: TodoItem,
+        result: str,
+        all_completed: list[TodoItem],
+        remaining_todos: list[TodoItem],
+    ) -> list[LLMMessage]:
+        """Build messages for the observation LLM call."""
+        task_desc = ""
+        task_goal = ""
+        if self.task:
+            task_desc = self.task.description or ""
+            task_goal = self.task.expected_output or ""
+
+        system_prompt = (
+            "You are a Planning Agent observing execution progress. "
+            "After each step completes, you analyze what happened and decide "
+            "whether the remaining plan is still valid.\n\n"
+            "Reason step-by-step about:\n"
+            "1. What new information was learned from this step's result\n"
+            "2. Whether the remaining steps still make sense given this new information\n"
+            "3. What refinements, if any, are needed for upcoming steps\n"
+            "4. Whether the overall goal has already been achieved\n\n"
+            "Be conservative about triggering full replans — only do so when the "
+            "remaining plan is fundamentally wrong, not just suboptimal."
+        )
+
+        # Build context of what's been done
+        completed_summary = ""
+        if all_completed:
+            completed_lines = []
+            for todo in all_completed:
+                result_preview = (todo.result or "")[:200]
+                completed_lines.append(
+                    f"  Step {todo.step_number}: {todo.description}\n"
+                    f"    Result: {result_preview}"
+                )
+            completed_summary = "\n## Previously completed steps:\n" + "\n".join(
+                completed_lines
+            )
+
+        # Build remaining plan
+        remaining_summary = ""
+        if remaining_todos:
+            remaining_lines = [
+                f"  Step {todo.step_number}: {todo.description}"
+                for todo in remaining_todos
+            ]
+            remaining_summary = "\n## Remaining plan steps:\n" + "\n".join(
+                remaining_lines
+            )
+
+        user_prompt = (
+            f"## Original task\n{task_desc}\n\n"
+            f"## Expected output\n{task_goal}\n"
+            f"{completed_summary}\n"
+            f"\n## Just completed step {completed_step.step_number}\n"
+            f"Description: {completed_step.description}\n"
+            f"Result: {result}\n"
+            f"{remaining_summary}\n\n"
+            "Analyze this step's result and provide your observation."
+        )
+
+        return [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ]
+
+    def _build_refinement_messages(
+        self,
+        observation: StepObservation,
+        remaining_todos: list[TodoItem],
+    ) -> list[LLMMessage]:
+        """Build messages for the refinement LLM call."""
+        system_prompt = (
+            "You are refining upcoming plan steps based on new information. "
+            "Update the step descriptions to be more specific and actionable "
+            "given what was learned. Keep the same step numbers.\n\n"
+            "Respond with one line per step in the format:\n"
+            "Step N: <refined description>"
+        )
+
+        refinements = "\n".join(observation.suggested_refinements or [])
+        todo_lines = "\n".join(
+            f"Step {t.step_number}: {t.description}" for t in remaining_todos
+        )
+
+        user_prompt = (
+            f"## New information learned\n{observation.key_information_learned}\n\n"
+            f"## Suggested refinements\n{refinements}\n\n"
+            f"## Current pending steps\n{todo_lines}\n\n"
+            "Update the step descriptions to incorporate the new information."
+        )
+
+        return [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ]
+
+    def _apply_refinements(
+        self,
+        llm_response: str,
+        remaining_todos: list[TodoItem],
+    ) -> None:
+        """Parse LLM refinement response and update todo descriptions.
+
+        Expects format: "Step N: <description>" per line.
+        """
+        # Build lookup for quick access
+        todo_by_step: dict[int, TodoItem] = {t.step_number: t for t in remaining_todos}
+
+        for line in llm_response.strip().split("\n"):
+            line = line.strip()
+            if not line.startswith("Step "):
+                continue
+
+            # Parse "Step N: description"
+            try:
+                parts = line.split(":", 1)
+                if len(parts) < 2:
+                    continue
+                step_part = parts[0].strip()  # "Step N"
+                description = parts[1].strip()
+                step_num = int(step_part.replace("Step", "").strip())
+
+                if step_num in todo_by_step and description:
+                    todo_by_step[step_num].description = description
+            except (ValueError, IndexError):
+                continue
--- a/lib/crewai/src/crewai/agents/step_executor.py
+++ b/lib/crewai/src/crewai/agents/step_executor.py
@@ -0,0 +1,703 @@
+"""StepExecutor: Isolated executor for a single plan step.
+
+Implements a bounded ReAct loop scoped to ONE todo item. The tool execution
+machinery (native function calling, text-parsed tools, caching, hooks) lives
+here — moved from AgentExecutor so the outer Plan-and-Execute loop stays clean.
+
+Based on PLAN-AND-ACT (Section 3.2): The Executor translates high-level plan
+steps into concrete environment actions.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from datetime import datetime
+import json
+import time
+from typing import TYPE_CHECKING, Any
+
+from pydantic import BaseModel
+
+from crewai.agents.parser import (
+    AgentAction,
+    AgentFinish,
+)
+from crewai.events.event_bus import crewai_event_bus
+from crewai.events.types.tool_usage_events import (
+    ToolUsageErrorEvent,
+    ToolUsageFinishedEvent,
+    ToolUsageStartedEvent,
+)
+from crewai.hooks.tool_hooks import (
+    ToolCallHookContext,
+    get_after_tool_call_hooks,
+    get_before_tool_call_hooks,
+)
+from crewai.utilities.agent_utils import (
+    convert_tools_to_openai_schema,
+    enforce_rpm_limit,
+    extract_tool_call_info,
+    format_message_for_llm,
+    process_llm_response,
+    track_delegation_if_needed,
+)
+from crewai.utilities.i18n import I18N, get_i18n
+from crewai.utilities.planning_types import TodoItem
+from crewai.utilities.printer import Printer
+from crewai.utilities.step_execution_context import StepExecutionContext, StepResult
+from crewai.utilities.string_utils import sanitize_tool_name
+from crewai.utilities.tool_utils import execute_tool_and_check_finality
+from crewai.utilities.types import LLMMessage
+
+
+if TYPE_CHECKING:
+    from crewai.agent import Agent
+    from crewai.agents.tools_handler import ToolsHandler
+    from crewai.crew import Crew
+    from crewai.llms.base_llm import BaseLLM
+    from crewai.task import Task
+    from crewai.tools.base_tool import BaseTool
+    from crewai.tools.structured_tool import CrewStructuredTool
+
+
+# Maximum number of tool-call iterations within a single step
+_MAX_STEP_ITERATIONS: int = 10
+
+
+class StepExecutor:
+    """Executes a SINGLE todo item in isolation using a bounded ReAct loop.
+
+    The StepExecutor owns its own message list per invocation. It never reads
+    or writes the AgentExecutor's state. Results flow back via StepResult.
+
+    The internal loop:
+        1. Build messages from todo + context
+        2. Call LLM (with or without native tools)
+        3. If tool call → execute tool, append result, loop back to 2
+        4. If final answer → return StepResult
+        5. If max iterations → force final answer
+
+    Args:
+        llm: The language model to use for execution.
+        tools: Structured tools available to the executor.
+        agent: The agent instance (for role/goal/verbose/config).
+        original_tools: Original BaseTool instances (needed for native tool schema).
+        tools_handler: Optional tools handler for caching and delegation tracking.
+        task: Optional task context.
+        crew: Optional crew context.
+        function_calling_llm: Optional separate LLM for function calling.
+        request_within_rpm_limit: Optional RPM limit function.
+        callbacks: Optional list of callbacks.
+    """
+
+    def __init__(
+        self,
+        llm: BaseLLM,
+        tools: list[CrewStructuredTool],
+        agent: Agent,
+        original_tools: list[BaseTool] | None = None,
+        tools_handler: ToolsHandler | None = None,
+        task: Task | None = None,
+        crew: Crew | None = None,
+        function_calling_llm: BaseLLM | Any | None = None,
+        request_within_rpm_limit: Callable[[], bool] | None = None,
+        callbacks: list[Any] | None = None,
+        i18n: I18N | None = None,
+    ) -> None:
+        self.llm = llm
+        self.tools = tools
+        self.agent = agent
+        self.original_tools = original_tools or []
+        self.tools_handler = tools_handler
+        self.task = task
+        self.crew = crew
+        self.function_calling_llm = function_calling_llm
+        self.request_within_rpm_limit = request_within_rpm_limit
+        self.callbacks = callbacks or []
+        self._i18n: I18N = i18n or get_i18n()
+        self._printer: Printer = Printer()
+
+        # Native tool support — set up once
+        self._use_native_tools = self._check_native_tool_support()
+        self._openai_tools: list[dict[str, Any]] = []
+        self._available_functions: dict[str, Callable[..., Any]] = {}
+        if self._use_native_tools:
+            self._setup_native_tools()
+
+    # ------------------------------------------------------------------
+    # Public API
+    # ------------------------------------------------------------------
+
+    def execute(self, todo: TodoItem, context: StepExecutionContext) -> StepResult:
+        """Execute a single todo item in isolation.
+
+        Builds a fresh message list, runs a bounded ReAct loop, and returns
+        the result. Never touches external state.
+
+        Args:
+            todo: The todo item to execute.
+            context: Immutable context with task info and dependency results.
+
+        Returns:
+            StepResult with the outcome.
+        """
+        start_time = time.monotonic()
+        tool_calls_made: list[str] = []
+
+        try:
+            messages = self._build_isolated_messages(todo, context)
+            result_text = self._run_react_loop(todo, messages, tool_calls_made)
+
+            elapsed = time.monotonic() - start_time
+            return StepResult(
+                success=True,
+                result=result_text,
+                tool_calls_made=tool_calls_made,
+                execution_time=elapsed,
+            )
+        except Exception as e:
+            elapsed = time.monotonic() - start_time
+            return StepResult(
+                success=False,
+                result="",
+                error=str(e),
+                tool_calls_made=tool_calls_made,
+                execution_time=elapsed,
+            )
+
+    # ------------------------------------------------------------------
+    # Internal: Message building
+    # ------------------------------------------------------------------
+
+    def _build_isolated_messages(
+        self, todo: TodoItem, context: StepExecutionContext
+    ) -> list[LLMMessage]:
+        """Build a fresh message list for this step's execution.
+
+        System prompt tells the LLM it is an Executor focused on one step.
+        User prompt provides the step description, dependencies, and tools.
+        """
+        system_prompt = self._build_system_prompt()
+        user_prompt = self._build_user_prompt(todo, context)
+
+        messages: list[LLMMessage] = [
+            format_message_for_llm(system_prompt, role="system"),
+            format_message_for_llm(user_prompt, role="user"),
+        ]
+        return messages
+
+    def _build_system_prompt(self) -> str:
+        """Build the Executor's system prompt.
+
+        Emphasizes: complete THIS step only. Do not plan ahead.
+        Includes CoT reasoning instruction (per PLAN-AND-ACT Section 3.4).
+        """
+        role = self.agent.role if self.agent else "Assistant"
+        goal = self.agent.goal if self.agent else "Complete tasks efficiently"
+        backstory = getattr(self.agent, "backstory", "") or ""
+
+        tools_section = ""
+        if self.tools and not self._use_native_tools:
+            tool_names = ", ".join(sanitize_tool_name(t.name) for t in self.tools)
+            tools_section = f"\n\nAvailable tools: {tool_names}"
+            tools_section += "\n\nTo use a tool, respond with:\nThought: <your reasoning>\nAction: <tool_name>\nAction Input: <input>"
+            tools_section += "\n\nWhen you have the final answer, respond with:\nThought: <your reasoning>\nFinal Answer: <your answer>"
+
+        return f"""You are {role}. {backstory}
+
+Your goal: {goal}
+
+You are executing a specific step in a multi-step plan. Focus ONLY on completing
+the current step. Do not plan ahead or worry about future steps.
+
+Before acting, briefly reason about what you need to do and which approach
+or tool would be most helpful for this specific step.{tools_section}"""
+
+    def _build_user_prompt(self, todo: TodoItem, context: StepExecutionContext) -> str:
+        """Build the user prompt for this specific step."""
+        parts: list[str] = []
+
+        parts.append(f"## Current Step\n{todo.description}")
+
+        if todo.tool_to_use:
+            parts.append(f"\nSuggested tool: {todo.tool_to_use}")
+
+        # Include dependency results (final results only, no traces)
+        if context.dependency_results:
+            parts.append("\n## Context from previous steps:")
+            for step_num, result in sorted(context.dependency_results.items()):
+                parts.append(f"Step {step_num} result: {result}")
+
+        parts.append("\nComplete this step and provide your result.")
+
+        return "\n".join(parts)
+
+    # ------------------------------------------------------------------
+    # Internal: Bounded ReAct loop
+    # ------------------------------------------------------------------
+
+    def _run_react_loop(
+        self,
+        todo: TodoItem,
+        messages: list[LLMMessage],
+        tool_calls_made: list[str],
+    ) -> str:
+        """Run a bounded ReAct loop for a single step.
+
+        Returns the final answer text.
+        """
+        for iteration in range(_MAX_STEP_ITERATIONS):
+            enforce_rpm_limit(self.request_within_rpm_limit)
+
+            if self._use_native_tools:
+                result = self._native_tool_iteration(messages, tool_calls_made)
+            else:
+                result = self._text_parsed_iteration(messages, tool_calls_made)
+
+            if result is not None:
+                # Got a final answer
+                return result
+
+            # No final answer yet — loop continues with updated messages
+
+        # Max iterations reached — force a final answer
+        return self._force_final_answer(messages)
+
+    def _text_parsed_iteration(
+        self,
+        messages: list[LLMMessage],
+        tool_calls_made: list[str],
+    ) -> str | None:
+        """Single iteration using text-parsed tool calling.
+
+        Returns final answer string if done, None to continue looping.
+        """
+        try:
+            answer = self.llm.call(
+                messages,
+                callbacks=self.callbacks,
+                from_task=self.task,
+                from_agent=self.agent,
+            )
+        except Exception:
+            raise
+
+        if not answer:
+            raise ValueError("Empty response from LLM")
+
+        answer_str = str(answer)
+        use_stop_words = self.llm.supports_stop_words() if self.llm else False
+        formatted = process_llm_response(answer_str, use_stop_words)
+
+        if isinstance(formatted, AgentFinish):
+            return str(formatted.output)
+
+        if isinstance(formatted, AgentAction):
+            # Execute the tool
+            tool_calls_made.append(formatted.tool)
+
+            fingerprint_context = {}
+            if (
+                self.agent
+                and hasattr(self.agent, "security_config")
+                and hasattr(self.agent.security_config, "fingerprint")
+            ):
+                fingerprint_context = {
+                    "agent_fingerprint": str(self.agent.security_config.fingerprint)
+                }
+
+            tool_result = execute_tool_and_check_finality(
+                agent_action=formatted,
+                fingerprint_context=fingerprint_context,
+                tools=self.tools,
+                i18n=self._i18n,
+                agent_key=self.agent.key if self.agent else None,
+                agent_role=self.agent.role if self.agent else None,
+                tools_handler=self.tools_handler,
+                task=self.task,
+                agent=self.agent,
+                function_calling_llm=self.function_calling_llm,
+                crew=self.crew,
+            )
+
+            # Append observation to messages
+            observation = f"Observation: {tool_result.result}"
+            messages.append(
+                format_message_for_llm(
+                    formatted.text + f"\n{observation}",
+                    role="assistant",
+                )
+            )
+
+            if tool_result.result_as_answer:
+                return str(tool_result.result)
+
+            # Add reasoning prompt for next iteration
+            reasoning_prompt = self._i18n.slice("post_tool_reasoning")
+            messages.append(format_message_for_llm(reasoning_prompt, role="user"))
+
+            return None  # Continue looping
+
+        return answer_str  # Fallback: treat as final answer
+
+    def _native_tool_iteration(
+        self,
+        messages: list[LLMMessage],
+        tool_calls_made: list[str],
+    ) -> str | None:
+        """Single iteration using native function calling.
+
+        Returns final answer string if done, None to continue looping.
+        """
+        try:
+            answer = self.llm.call(
+                messages,
+                tools=self._openai_tools,
+                callbacks=self.callbacks,
+                from_task=self.task,
+                from_agent=self.agent,
+            )
+        except Exception:
+            raise
+
+        if not answer:
+            raise ValueError("Empty response from LLM")
+
+        # Check if the response is a list of tool calls
+        if isinstance(answer, list) and answer and self._is_tool_call_list(answer):
+            return self._execute_native_tool_calls(answer, messages, tool_calls_made)
+
+        # Text response — this is the final answer
+        if isinstance(answer, str):
+            return answer
+
+        # BaseModel response
+        if isinstance(answer, BaseModel):
+            return answer.model_dump_json()
+
+        return str(answer)
+
+    def _execute_native_tool_calls(
+        self,
+        tool_calls: list[Any],
+        messages: list[LLMMessage],
+        tool_calls_made: list[str],
+    ) -> str | None:
+        """Execute a batch of native tool calls and append results to messages.
+
+        Returns final answer string if a tool has result_as_answer, else None.
+        """
+        # Build assistant message with tool calls
+        tool_calls_to_report: list[dict[str, Any]] = []
+        for tool_call in tool_calls:
+            info = extract_tool_call_info(tool_call)
+            if not info:
+                continue
+            call_id, func_name, func_args = info
+            tool_calls_to_report.append(
+                {
+                    "id": call_id,
+                    "type": "function",
+                    "function": {
+                        "name": func_name,
+                        "arguments": func_args
+                        if isinstance(func_args, str)
+                        else json.dumps(func_args),
+                    },
+                }
+            )
+
+        if tool_calls_to_report:
+            assistant_message: LLMMessage = {
+                "role": "assistant",
+                "content": None,
+                "tool_calls": tool_calls_to_report,
+            }
+            # Preserve raw parts for Gemini compatibility
+            if all(type(tc).__qualname__ == "Part" for tc in tool_calls):
+                assistant_message["raw_tool_call_parts"] = list(tool_calls)
+            messages.append(assistant_message)
+
+        # Execute each tool call
+        final_answer: str | None = None
+        for tool_call in tool_calls:
+            info = extract_tool_call_info(tool_call)
+            if not info:
+                continue
+
+            call_id, func_name, func_args = info
+            tool_calls_made.append(func_name)
+
+            # Parse arguments
+            if isinstance(func_args, str):
+                try:
+                    args_dict = json.loads(func_args)
+                except json.JSONDecodeError:
+                    args_dict = {}
+            else:
+                args_dict = func_args
+
+            agent_key = (
+                getattr(self.agent, "key", "unknown") if self.agent else "unknown"
+            )
+
+            # Find original tool for cache_function and result_as_answer
+            original_tool = None
+            for tool in self.original_tools:
+                if sanitize_tool_name(tool.name) == func_name:
+                    original_tool = tool
+                    break
+
+            # Check max usage count
+            max_usage_reached = False
+            if (
+                original_tool
+                and original_tool.max_usage_count is not None
+                and original_tool.current_usage_count >= original_tool.max_usage_count
+            ):
+                max_usage_reached = True
+
+            # Check cache
+            from_cache = False
+            input_str = json.dumps(args_dict) if args_dict else ""
+            result = "Tool not found"
+
+            if self.tools_handler and self.tools_handler.cache:
+                cached_result = self.tools_handler.cache.read(
+                    tool=func_name, input=input_str
+                )
+                if cached_result is not None:
+                    result = (
+                        str(cached_result)
+                        if not isinstance(cached_result, str)
+                        else cached_result
+                    )
+                    from_cache = True
+
+            # Emit tool started event
+            started_at = datetime.now()
+            crewai_event_bus.emit(
+                self,
+                event=ToolUsageStartedEvent(
+                    tool_name=func_name,
+                    tool_args=args_dict,
+                    from_agent=self.agent,
+                    from_task=self.task,
+                    agent_key=agent_key,
+                ),
+            )
+
+            track_delegation_if_needed(func_name, args_dict, self.task)
+
+            # Find structured tool for hooks
+            structured_tool: CrewStructuredTool | None = None
+            for structured in self.tools or []:
+                if sanitize_tool_name(structured.name) == func_name:
+                    structured_tool = structured
+                    break
+
+            # Before hooks
+            hook_blocked = False
+            before_hook_context = ToolCallHookContext(
+                tool_name=func_name,
+                tool_input=args_dict,
+                tool=structured_tool,  # type: ignore[arg-type]
+                agent=self.agent,
+                task=self.task,
+                crew=self.crew,
+            )
+            try:
+                for hook in get_before_tool_call_hooks():
+                    if hook(before_hook_context) is False:
+                        hook_blocked = True
+                        break
+            except Exception:
+                pass
+
+            if hook_blocked:
+                result = f"Tool execution blocked by hook. Tool: {func_name}"
+            elif not from_cache and not max_usage_reached:
+                if func_name in self._available_functions:
+                    try:
+                        tool_func = self._available_functions[func_name]
+                        raw_result = tool_func(**args_dict)
+
+                        # Cache result
+                        if self.tools_handler and self.tools_handler.cache:
+                            should_cache = True
+                            if original_tool:
+                                should_cache = original_tool.cache_function(
+                                    args_dict, raw_result
+                                )
+                            if should_cache:
+                                self.tools_handler.cache.add(
+                                    tool=func_name, input=input_str, output=raw_result
+                                )
+
+                        result = (
+                            str(raw_result)
+                            if not isinstance(raw_result, str)
+                            else raw_result
+                        )
+                    except Exception as e:
+                        result = f"Error executing tool: {e}"
+                        if self.task:
+                            self.task.increment_tools_errors()
+                        crewai_event_bus.emit(
+                            self,
+                            event=ToolUsageErrorEvent(
+                                tool_name=func_name,
+                                tool_args=args_dict,
+                                from_agent=self.agent,
+                                from_task=self.task,
+                                agent_key=agent_key,
+                                error=e,
+                            ),
+                        )
+            elif max_usage_reached and original_tool:
+                result = (
+                    f"Tool '{func_name}' has reached its usage limit of "
+                    f"{original_tool.max_usage_count} times and cannot be used anymore."
+                )
+
+            # After hooks
+            after_hook_context = ToolCallHookContext(
+                tool_name=func_name,
+                tool_input=args_dict,
+                tool=structured_tool,  # type: ignore[arg-type]
+                agent=self.agent,
+                task=self.task,
+                crew=self.crew,
+                tool_result=result,
+            )
+            try:
+                for after_hook in get_after_tool_call_hooks():
+                    hook_result = after_hook(after_hook_context)
+                    if hook_result is not None:
+                        result = hook_result
+                        after_hook_context.tool_result = result
+            except Exception:
+                pass
+
+            # Emit tool finished event
+            crewai_event_bus.emit(
+                self,
+                event=ToolUsageFinishedEvent(
+                    output=result,
+                    tool_name=func_name,
+                    tool_args=args_dict,
+                    from_agent=self.agent,
+                    from_task=self.task,
+                    agent_key=agent_key,
+                    started_at=started_at,
+                    finished_at=datetime.now(),
+                ),
+            )
+
+            # Append tool result message
+            tool_message: LLMMessage = {
+                "role": "tool",
+                "tool_call_id": call_id,
+                "name": func_name,
+                "content": result,
+            }
+            messages.append(tool_message)
+
+            if self.agent and self.agent.verbose:
+                cache_info = " (from cache)" if from_cache else ""
+                self._printer.print(
+                    content=f"Tool {func_name} executed with result{cache_info}: {result[:200]}...",
+                    color="green",
+                )
+
+            # Check result_as_answer
+            if (
+                original_tool
+                and hasattr(original_tool, "result_as_answer")
+                and original_tool.result_as_answer
+            ):
+                final_answer = result
+
+        if final_answer is not None:
+            return final_answer
+
+        return None  # Continue looping
+
+    def _force_final_answer(self, messages: list[LLMMessage]) -> str:
+        """Force the LLM to provide a final answer when max iterations reached."""
+        force_prompt = (
+            "You have used the maximum number of tool calls for this step. "
+            "Based on the information gathered so far, provide your final answer now."
+        )
+        if not self._use_native_tools:
+            force_prompt += "\n\nFinal Answer: "
+
+        messages.append(format_message_for_llm(force_prompt, role="user"))
+
+        try:
+            answer = self.llm.call(
+                messages,
+                callbacks=self.callbacks,
+                from_task=self.task,
+                from_agent=self.agent,
+            )
+            if answer:
+                answer_str = str(answer)
+                # Try to extract just the final answer portion
+                if "Final Answer:" in answer_str:
+                    return answer_str.split("Final Answer:")[-1].strip()
+                return answer_str
+        except Exception:
+            pass
+
+        return "Step could not be completed within the iteration limit."
+
+    # ------------------------------------------------------------------
+    # Internal: Native tool support
+    # ------------------------------------------------------------------
+
+    def _check_native_tool_support(self) -> bool:
+        """Check if LLM supports native function calling."""
+        return (
+            hasattr(self.llm, "supports_function_calling")
+            and callable(getattr(self.llm, "supports_function_calling", None))
+            and self.llm.supports_function_calling()
+            and bool(self.original_tools)
+        )
+
+    def _setup_native_tools(self) -> None:
+        """Convert tools to OpenAI schema format for native function calling."""
+        if self.original_tools:
+            self._openai_tools, self._available_functions = (
+                convert_tools_to_openai_schema(self.original_tools)
+            )
+
+    def _is_tool_call_list(self, response: list[Any]) -> bool:
+        """Check if a response is a list of tool calls."""
+        if not response:
+            return False
+        first_item = response[0]
+        # OpenAI-style
+        if hasattr(first_item, "function") or (
+            isinstance(first_item, dict) and "function" in first_item
+        ):
+            return True
+        # Anthropic-style (ToolUseBlock)
+        if (
+            hasattr(first_item, "type")
+            and getattr(first_item, "type", None) == "tool_use"
+        ):
+            return True
+        if hasattr(first_item, "name") and hasattr(first_item, "input"):
+            return True
+        # Bedrock-style
+        if (
+            isinstance(first_item, dict)
+            and "name" in first_item
+            and "input" in first_item
+        ):
+            return True
+        # Gemini-style
+        if hasattr(first_item, "function_call") and first_item.function_call:
+            return True
+        return False
--- a/lib/crewai/src/crewai/events/event_listener.py
+++ b/lib/crewai/src/crewai/events/event_listener.py
@@ -74,6 +74,14 @@ from crewai.events.types.mcp_events import (
    MCPToolExecutionFailedEvent,
    MCPToolExecutionStartedEvent,
 )
+from crewai.events.types.observation_events import (
+    GoalAchievedEarlyEvent,
+    PlanRefinementEvent,
+    PlanReplanTriggeredEvent,
+    StepObservationCompletedEvent,
+    StepObservationFailedEvent,
+    StepObservationStartedEvent,
+)
 from crewai.events.types.reasoning_events import (
    AgentReasoningCompletedEvent,
    AgentReasoningFailedEvent,
@@ -534,6 +542,64 @@ class EventListener(BaseEventListener):
                event.error,
            )

+        # ----------- OBSERVATION EVENTS (Plan-and-Execute) -----------
+
+        @crewai_event_bus.on(StepObservationStartedEvent)
+        def on_step_observation_started(
+            _: Any, event: StepObservationStartedEvent
+        ) -> None:
+            self.formatter.handle_observation_started(
+                event.agent_role,
+                event.step_number,
+                event.step_description,
+            )
+
+        @crewai_event_bus.on(StepObservationCompletedEvent)
+        def on_step_observation_completed(
+            _: Any, event: StepObservationCompletedEvent
+        ) -> None:
+            self.formatter.handle_observation_completed(
+                event.agent_role,
+                event.step_number,
+                event.step_completed_successfully,
+                event.remaining_plan_still_valid,
+                event.key_information_learned,
+                event.needs_full_replan,
+                event.goal_already_achieved,
+            )
+
+        @crewai_event_bus.on(StepObservationFailedEvent)
+        def on_step_observation_failed(
+            _: Any, event: StepObservationFailedEvent
+        ) -> None:
+            self.formatter.handle_observation_failed(
+                event.step_number,
+                event.error,
+            )
+
+        @crewai_event_bus.on(PlanRefinementEvent)
+        def on_plan_refinement(_: Any, event: PlanRefinementEvent) -> None:
+            self.formatter.handle_plan_refinement(
+                event.step_number,
+                event.refined_step_count,
+                event.refinements,
+            )
+
+        @crewai_event_bus.on(PlanReplanTriggeredEvent)
+        def on_plan_replan_triggered(_: Any, event: PlanReplanTriggeredEvent) -> None:
+            self.formatter.handle_plan_replan(
+                event.replan_reason,
+                event.replan_count,
+                event.completed_steps_preserved,
+            )
+
+        @crewai_event_bus.on(GoalAchievedEarlyEvent)
+        def on_goal_achieved_early(_: Any, event: GoalAchievedEarlyEvent) -> None:
+            self.formatter.handle_goal_achieved_early(
+                event.steps_completed,
+                event.steps_remaining,
+            )
+
        # ----------- AGENT LOGGING EVENTS -----------

        @crewai_event_bus.on(AgentLogsStartedEvent)
--- a/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py
+++ b/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py
@@ -93,6 +93,14 @@ from crewai.events.types.memory_events import (
    MemorySaveFailedEvent,
    MemorySaveStartedEvent,
 )
+from crewai.events.types.observation_events import (
+    GoalAchievedEarlyEvent,
+    PlanRefinementEvent,
+    PlanReplanTriggeredEvent,
+    StepObservationCompletedEvent,
+    StepObservationFailedEvent,
+    StepObservationStartedEvent,
+)
 from crewai.events.types.reasoning_events import (
    AgentReasoningCompletedEvent,
    AgentReasoningFailedEvent,
@@ -437,6 +445,39 @@ class TraceCollectionListener(BaseEventListener):
        ) -> None:
            self._handle_action_event("agent_reasoning_failed", source, event)

+        # Observation events (Plan-and-Execute)
+        @event_bus.on(StepObservationStartedEvent)
+        def on_step_observation_started(
+            source: Any, event: StepObservationStartedEvent
+        ) -> None:
+            self._handle_action_event("step_observation_started", source, event)
+
+        @event_bus.on(StepObservationCompletedEvent)
+        def on_step_observation_completed(
+            source: Any, event: StepObservationCompletedEvent
+        ) -> None:
+            self._handle_action_event("step_observation_completed", source, event)
+
+        @event_bus.on(StepObservationFailedEvent)
+        def on_step_observation_failed(
+            source: Any, event: StepObservationFailedEvent
+        ) -> None:
+            self._handle_action_event("step_observation_failed", source, event)
+
+        @event_bus.on(PlanRefinementEvent)
+        def on_plan_refinement(source: Any, event: PlanRefinementEvent) -> None:
+            self._handle_action_event("plan_refinement", source, event)
+
+        @event_bus.on(PlanReplanTriggeredEvent)
+        def on_plan_replan_triggered(
+            source: Any, event: PlanReplanTriggeredEvent
+        ) -> None:
+            self._handle_action_event("plan_replan_triggered", source, event)
+
+        @event_bus.on(GoalAchievedEarlyEvent)
+        def on_goal_achieved_early(source: Any, event: GoalAchievedEarlyEvent) -> None:
+            self._handle_action_event("goal_achieved_early", source, event)
+
        @event_bus.on(KnowledgeRetrievalStartedEvent)
        def on_knowledge_retrieval_started(
            source: Any, event: KnowledgeRetrievalStartedEvent
--- a/lib/crewai/src/crewai/events/types/observation_events.py
+++ b/lib/crewai/src/crewai/events/types/observation_events.py
@@ -0,0 +1,99 @@
+"""Observation events for the Plan-and-Execute architecture.
+
+Emitted during the Observation phase (PLAN-AND-ACT Section 3.3) when the
+PlannerObserver analyzes step execution results and decides on plan
+continuation, refinement, or replanning.
+"""
+
+from typing import Any
+
+from crewai.events.base_events import BaseEvent
+
+
+class ObservationEvent(BaseEvent):
+    """Base event for observation phase events."""
+
+    type: str
+    agent_role: str
+    step_number: int
+    step_description: str = ""
+    from_task: Any | None = None
+    from_agent: Any | None = None
+
+    def __init__(self, **data: Any) -> None:
+        super().__init__(**data)
+        self._set_task_params(data)
+        self._set_agent_params(data)
+
+
+class StepObservationStartedEvent(ObservationEvent):
+    """Emitted when the Planner begins observing a step's result.
+
+    Fires after every step execution, before the observation LLM call.
+    """
+
+    type: str = "step_observation_started"
+
+
+class StepObservationCompletedEvent(ObservationEvent):
+    """Emitted when the Planner finishes observing a step's result.
+
+    Contains the full observation analysis: what was learned, whether
+    the plan is still valid, and what action to take next.
+    """
+
+    type: str = "step_observation_completed"
+    step_completed_successfully: bool = True
+    key_information_learned: str = ""
+    remaining_plan_still_valid: bool = True
+    needs_full_replan: bool = False
+    replan_reason: str | None = None
+    goal_already_achieved: bool = False
+    suggested_refinements: list[str] | None = None
+
+
+class StepObservationFailedEvent(ObservationEvent):
+    """Emitted when the observation LLM call itself fails.
+
+    The system defaults to continuing the plan when this happens,
+    but the event allows monitoring/alerting on observation failures.
+    """
+
+    type: str = "step_observation_failed"
+    error: str = ""
+
+
+class PlanRefinementEvent(ObservationEvent):
+    """Emitted when the Planner refines upcoming step descriptions.
+
+    This is the lightweight refinement path — no full replan, just
+    sharpening pending todo descriptions based on new information.
+    """
+
+    type: str = "plan_refinement"
+    refined_step_count: int = 0
+    refinements: list[str] | None = None
+
+
+class PlanReplanTriggeredEvent(ObservationEvent):
+    """Emitted when the Planner triggers a full replan.
+
+    The remaining plan was deemed fundamentally wrong and will be
+    regenerated from scratch, preserving completed step results.
+    """
+
+    type: str = "plan_replan_triggered"
+    replan_reason: str = ""
+    replan_count: int = 0
+    completed_steps_preserved: int = 0
+
+
+class GoalAchievedEarlyEvent(ObservationEvent):
+    """Emitted when the Planner detects the goal was achieved early.
+
+    Remaining steps will be skipped and execution will finalize.
+    """
+
+    type: str = "goal_achieved_early"
+    steps_remaining: int = 0
+    steps_completed: int = 0
--- a/lib/crewai/src/crewai/events/utils/console_formatter.py
+++ b/lib/crewai/src/crewai/events/utils/console_formatter.py
@@ -851,6 +851,152 @@ To enable tracing, do any one of these:
        )
        self.print_panel(error_content, "❌ Reasoning Error", "red")

+    # ----------- OBSERVATION EVENTS (Plan-and-Execute) -----------
+
+    def handle_observation_started(
+        self,
+        agent_role: str,
+        step_number: int,
+        step_description: str,
+    ) -> None:
+        """Handle step observation started event."""
+        if not self.verbose:
+            return
+
+        content = Text()
+        content.append("Observation Started\n", style="cyan bold")
+        content.append("Agent: ", style="white")
+        content.append(f"{agent_role}\n", style="cyan")
+        content.append("Step: ", style="white")
+        content.append(f"{step_number}\n", style="cyan")
+        if step_description:
+            desc_preview = step_description[:80] + (
+                "..." if len(step_description) > 80 else ""
+            )
+            content.append("Description: ", style="white")
+            content.append(f"{desc_preview}\n", style="cyan")
+
+        self.print_panel(content, "🔍 Observing Step Result", "cyan")
+
+    def handle_observation_completed(
+        self,
+        agent_role: str,
+        step_number: int,
+        step_completed: bool,
+        plan_valid: bool,
+        key_info: str,
+        needs_replan: bool,
+        goal_achieved: bool,
+    ) -> None:
+        """Handle step observation completed event."""
+        if not self.verbose:
+            return
+
+        if goal_achieved:
+            style = "green"
+            status = "Goal Achieved Early"
+        elif needs_replan:
+            style = "yellow"
+            status = "Replan Needed"
+        elif plan_valid:
+            style = "green"
+            status = "Plan Valid — Continue"
+        else:
+            style = "red"
+            status = "Step Failed"
+
+        content = Text()
+        content.append("Observation Complete\n", style=f"{style} bold")
+        content.append("Step: ", style="white")
+        content.append(f"{step_number}\n", style=style)
+        content.append("Status: ", style="white")
+        content.append(f"{status}\n", style=style)
+        if key_info:
+            info_preview = key_info[:120] + ("..." if len(key_info) > 120 else "")
+            content.append("Learned: ", style="white")
+            content.append(f"{info_preview}\n", style=style)
+
+        self.print_panel(content, "🔍 Observation Result", style)
+
+    def handle_observation_failed(
+        self,
+        step_number: int,
+        error: str,
+    ) -> None:
+        """Handle step observation failure event."""
+        if not self.verbose:
+            return
+
+        error_content = self.create_status_content(
+            "Observation Failed",
+            "Error",
+            "red",
+            Step=str(step_number),
+            Error=error,
+        )
+        self.print_panel(error_content, "❌ Observation Error", "red")
+
+    def handle_plan_refinement(
+        self,
+        step_number: int,
+        refined_count: int,
+        refinements: list[str] | None,
+    ) -> None:
+        """Handle plan refinement event."""
+        if not self.verbose:
+            return
+
+        content = Text()
+        content.append("Plan Refined\n", style="cyan bold")
+        content.append("After Step: ", style="white")
+        content.append(f"{step_number}\n", style="cyan")
+        content.append("Steps Updated: ", style="white")
+        content.append(f"{refined_count}\n", style="cyan")
+        if refinements:
+            for r in refinements[:3]:
+                content.append(f"  • {r[:80]}\n", style="white")
+
+        self.print_panel(content, "✏️ Plan Refinement", "cyan")
+
+    def handle_plan_replan(
+        self,
+        reason: str,
+        replan_count: int,
+        preserved_count: int,
+    ) -> None:
+        """Handle plan replan triggered event."""
+        if not self.verbose:
+            return
+
+        content = Text()
+        content.append("Full Replan Triggered\n", style="yellow bold")
+        content.append("Reason: ", style="white")
+        content.append(f"{reason}\n", style="yellow")
+        content.append("Replan #: ", style="white")
+        content.append(f"{replan_count}\n", style="yellow")
+        content.append("Preserved Steps: ", style="white")
+        content.append(f"{preserved_count}\n", style="yellow")
+
+        self.print_panel(content, "🔄 Dynamic Replan", "yellow")
+
+    def handle_goal_achieved_early(
+        self,
+        steps_completed: int,
+        steps_remaining: int,
+    ) -> None:
+        """Handle goal achieved early event."""
+        if not self.verbose:
+            return
+
+        content = Text()
+        content.append("Goal Achieved Early!\n", style="green bold")
+        content.append("Completed: ", style="white")
+        content.append(f"{steps_completed} steps\n", style="green")
+        content.append("Skipped: ", style="white")
+        content.append(f"{steps_remaining} remaining steps\n", style="green")
+
+        self.print_panel(content, "🎯 Early Goal Achievement", "green")
+
    # ----------- AGENT LOGGING EVENTS -----------

    def handle_agent_logs_started(
--- a/lib/crewai/src/crewai/experimental/agent_executor.py
+++ b/lib/crewai/src/crewai/experimental/agent_executor.py
@@ -27,6 +27,11 @@ from crewai.events.types.logging_events import (
    AgentLogsExecutionEvent,
    AgentLogsStartedEvent,
 )
+from crewai.events.types.observation_events import (
+    GoalAchievedEarlyEvent,
+    PlanRefinementEvent,
+    PlanReplanTriggeredEvent,
+)
 from crewai.events.types.tool_usage_events import (
    ToolUsageErrorEvent,
    ToolUsageFinishedEvent,
@@ -62,8 +67,14 @@ from crewai.utilities.agent_utils import (
 )
 from crewai.utilities.constants import TRAINING_DATA_FILE
 from crewai.utilities.i18n import I18N, get_i18n
-from crewai.utilities.planning_types import PlanStep, TodoItem, TodoList
+from crewai.utilities.planning_types import (
+    PlanStep,
+    StepObservation,
+    TodoItem,
+    TodoList,
+)
 from crewai.utilities.printer import Printer
+from crewai.utilities.step_execution_context import StepExecutionContext
 from crewai.utilities.string_utils import sanitize_tool_name
 from crewai.utilities.tool_utils import execute_tool_and_check_finality
 from crewai.utilities.training_handler import CrewTrainingHandler
@@ -109,6 +120,14 @@ class AgentReActState(BaseModel):
    last_replan_reason: str | None = Field(
        default=None, description="Reason for the last replan, if any"
    )
+    observations: dict[int, StepObservation] = Field(
+        default_factory=dict,
+        description="Planner's observation per step (keyed by step_number)",
+    )
+    execution_log: list[dict[str, Any]] = Field(
+        default_factory=list,
+        description="Audit trail for debugging (NOT used for LLM calls)",
+    )


 class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
@@ -222,6 +241,11 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
            )
        self._state = AgentReActState()

+        # Plan-and-Execute components (Phase 2)
+        # Lazy-imported to avoid circular imports during module load
+        self._step_executor: Any = None
+        self._planner_observer: Any = None
+
    def _ensure_flow_initialized(self) -> None:
        """Ensure Flow.__init__() has been called.

@@ -396,6 +420,331 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):

        self.state.todos = TodoList(items=todos)

+    # -------------------------------------------------------------------------
+    # Plan-and-Execute: Component Initialization
+    # -------------------------------------------------------------------------
+
+    def _ensure_step_executor(self) -> Any:
+        """Lazily create the StepExecutor (avoids circular imports)."""
+        if self._step_executor is None:
+            from crewai.agents.step_executor import StepExecutor
+
+            self._step_executor = StepExecutor(
+                llm=self.llm,
+                tools=self.tools,
+                agent=self.agent,
+                original_tools=self.original_tools,
+                tools_handler=self.tools_handler,
+                task=self.task,
+                crew=self.crew,
+                function_calling_llm=self.function_calling_llm,
+                request_within_rpm_limit=self.request_within_rpm_limit,
+                callbacks=self.callbacks,
+                i18n=self._i18n,
+            )
+        return self._step_executor
+
+    def _ensure_planner_observer(self) -> Any:
+        """Lazily create the PlannerObserver (avoids circular imports)."""
+        if self._planner_observer is None:
+            from crewai.agents.planner_observer import PlannerObserver
+
+            self._planner_observer = PlannerObserver(
+                agent=self.agent,
+                task=self.task,
+            )
+        return self._planner_observer
+
+    def _build_context_for_todo(self, todo: TodoItem) -> StepExecutionContext:
+        """Build an isolated execution context for a single todo.
+
+        Passes only final results from completed dependencies — never
+        execution traces, tool calls, or LLM message history.
+
+        Args:
+            todo: The todo item to build context for.
+
+        Returns:
+            Immutable StepExecutionContext with dependency results.
+        """
+        dependency_results: dict[int, str] = {}
+        for dep_num in todo.depends_on:
+            dep_todo = self.state.todos.get_by_step_number(dep_num)
+            if dep_todo and dep_todo.result:
+                dependency_results[dep_num] = dep_todo.result
+
+        task_description = ""
+        task_goal = ""
+        if self.task:
+            task_description = self.task.description or ""
+            task_goal = self.task.expected_output or ""
+        else:
+            task_description = getattr(self, "_kickoff_input", "")
+            task_goal = "Complete the task successfully"
+
+        return StepExecutionContext(
+            task_description=task_description,
+            task_goal=task_goal,
+            dependency_results=dependency_results,
+        )
+
+    # -------------------------------------------------------------------------
+    # Plan-and-Execute: New Observation-Driven Flow Methods
+    # -------------------------------------------------------------------------
+
+    @listen("step_executed")
+    def observe_step_result(self) -> Literal["step_observed"]:
+        """THE OBSERVATION STEP — runs after EVERY step execution.
+
+        This is the Planner's opportunity to incorporate new information
+        learned during execution. It is NOT an error handler — it runs on
+        every step, including successes.
+
+        Based on PLAN-AND-ACT Section 3.3.
+        """
+        current_todo = self.state.todos.current_todo
+        if not current_todo:
+            return "step_observed"
+
+        observer = self._ensure_planner_observer()
+        all_completed = self.state.todos.get_completed_todos()
+        remaining = self.state.todos.get_pending_todos()
+
+        observation = observer.observe(
+            completed_step=current_todo,
+            result=current_todo.result or "",
+            all_completed=all_completed,
+            remaining_todos=remaining,
+        )
+
+        self.state.observations[current_todo.step_number] = observation
+
+        # Log observation for debugging
+        self.state.execution_log.append(
+            {
+                "type": "observation",
+                "step_number": current_todo.step_number,
+                "step_completed_successfully": observation.step_completed_successfully,
+                "key_information_learned": observation.key_information_learned,
+                "remaining_plan_still_valid": observation.remaining_plan_still_valid,
+                "needs_full_replan": observation.needs_full_replan,
+                "goal_already_achieved": observation.goal_already_achieved,
+            }
+        )
+
+        if self.agent.verbose:
+            self._printer.print(
+                content=(
+                    f"[Observe] Step {current_todo.step_number}: "
+                    f"success={observation.step_completed_successfully}, "
+                    f"plan_valid={observation.remaining_plan_still_valid}, "
+                    f"learned={observation.key_information_learned[:80]}..."
+                ),
+                color="cyan",
+            )
+
+        return "step_observed"
+
+    @router("step_observed")
+    def decide_next_action(
+        self,
+    ) -> Literal[
+        "goal_achieved",
+        "replan_now",
+        "refine_and_continue",
+        "continue_plan",
+    ]:
+        """Route based on the Planner's observation.
+
+        This replaces the old reactive _should_replan() heuristics with
+        proactive, LLM-driven decisions.
+        """
+        current_todo = self.state.todos.current_todo
+        if not current_todo:
+            return "continue_plan"
+
+        observation = self.state.observations.get(current_todo.step_number)
+        if not observation:
+            # No observation available — default to continue
+            self.state.todos.mark_completed(current_todo.step_number)
+            return "continue_plan"
+
+        # Goal already achieved — early termination
+        if observation.goal_already_achieved:
+            self.state.todos.mark_completed(
+                current_todo.step_number, result=current_todo.result
+            )
+            if self.agent.verbose:
+                self._printer.print(
+                    content="[Decide] Goal achieved early — finalizing",
+                    color="green",
+                )
+            return "goal_achieved"
+
+        # Full replan needed
+        if observation.needs_full_replan:
+            if self.agent.verbose:
+                self._printer.print(
+                    content=f"[Decide] Full replan needed: {observation.replan_reason}",
+                    color="yellow",
+                )
+            self.state.last_replan_reason = observation.replan_reason
+            return "replan_now"
+
+        # Step failed — also trigger replan
+        if not observation.step_completed_successfully:
+            if self.agent.verbose:
+                self._printer.print(
+                    content="[Decide] Step failed — triggering replan",
+                    color="yellow",
+                )
+            self.state.last_replan_reason = "Step did not complete successfully"
+            return "replan_now"
+
+        # Plan still valid but needs refinement
+        if observation.remaining_plan_still_valid and observation.suggested_refinements:
+            self.state.todos.mark_completed(
+                current_todo.step_number, result=current_todo.result
+            )
+            if self.agent.verbose:
+                self._printer.print(
+                    content="[Decide] Plan valid but refining upcoming steps",
+                    color="cyan",
+                )
+            return "refine_and_continue"
+
+        # Plan still valid, no refinements needed — just continue
+        self.state.todos.mark_completed(
+            current_todo.step_number, result=current_todo.result
+        )
+        if self.agent.verbose:
+            completed = self.state.todos.completed_count
+            total = len(self.state.todos.items)
+            self._printer.print(
+                content=f"[Decide] Continue plan ({completed}/{total} done)",
+                color="green",
+            )
+        return "continue_plan"
+
+    @listen("refine_and_continue")
+    def handle_refine_and_continue(self) -> Literal["has_todos"]:
+        """Lightweight plan refinement — update pending todo descriptions.
+
+        The Planner sharpens upcoming step descriptions based on what was
+        learned, without regenerating the entire plan.
+        """
+        # Find the most recent observation with refinements
+        recent_observation: StepObservation | None = None
+        last_step: int = 0
+        if self.state.observations:
+            last_step = max(self.state.observations.keys())
+            recent_observation = self.state.observations[last_step]
+
+        if recent_observation and recent_observation.suggested_refinements:
+            observer = self._ensure_planner_observer()
+            remaining = self.state.todos.get_pending_todos()
+
+            observer.refine_todos(recent_observation, remaining)
+
+            # Emit refinement event
+            crewai_event_bus.emit(
+                self.agent,
+                event=PlanRefinementEvent(
+                    agent_role=self.agent.role,
+                    step_number=last_step,
+                    step_description="",
+                    refined_step_count=len(remaining),
+                    refinements=recent_observation.suggested_refinements,
+                    from_task=self.task,
+                    from_agent=self.agent,
+                ),
+            )
+
+            if self.agent.verbose:
+                self._printer.print(
+                    content=f"[Refine] Updated {len(remaining)} pending step(s)",
+                    color="cyan",
+                )
+
+        return "has_todos"
+
+    @listen("continue_plan")
+    def handle_continue_plan(self) -> Literal["has_todos", "all_todos_complete"]:
+        """Continue to the next todo after a successful step."""
+        if self.state.todos.is_complete:
+            return "all_todos_complete"
+        return "has_todos"
+
+    @listen("goal_achieved")
+    def handle_goal_achieved(self) -> Literal["all_todos_complete"]:
+        """Handle early goal achievement — skip remaining todos."""
+        completed = self.state.todos.get_completed_todos()
+        remaining = self.state.todos.get_pending_todos()
+
+        # Emit goal achieved early event
+        crewai_event_bus.emit(
+            self.agent,
+            event=GoalAchievedEarlyEvent(
+                agent_role=self.agent.role,
+                step_number=completed[-1].step_number if completed else 0,
+                step_description="",
+                steps_completed=len(completed),
+                steps_remaining=len(remaining),
+                from_task=self.task,
+                from_agent=self.agent,
+            ),
+        )
+
+        if self.agent.verbose:
+            self._printer.print(
+                content="Goal achieved early — skipping remaining steps",
+                color="green",
+            )
+        return "all_todos_complete"
+
+    @listen("replan_now")
+    def handle_replan_now(
+        self,
+    ) -> Literal["has_todos", "all_todos_complete"]:
+        """Handle full replanning — regenerate the remaining plan.
+
+        Preserves completed todo results and replaces only pending steps.
+        """
+        max_replans = 3
+        self.state.replan_count += 1
+
+        if self.state.replan_count > max_replans:
+            if self.agent.verbose:
+                self._printer.print(
+                    content=f"Max replans ({max_replans}) reached — finalizing with current results",
+                    color="yellow",
+                )
+            return "all_todos_complete"
+
+        reason = self.state.last_replan_reason or "Dynamic replan triggered"
+        completed = self.state.todos.get_completed_todos()
+
+        # Emit replan triggered event
+        crewai_event_bus.emit(
+            self.agent,
+            event=PlanReplanTriggeredEvent(
+                agent_role=self.agent.role,
+                step_number=completed[-1].step_number if completed else 0,
+                step_description="",
+                replan_reason=reason,
+                replan_count=self.state.replan_count,
+                completed_steps_preserved=len(completed),
+                from_task=self.task,
+                from_agent=self.agent,
+            ),
+        )
+
+        self._trigger_replan(reason)
+
+        if self.state.todos.get_pending_todos():
+            return "has_todos"
+        return "all_todos_complete"
+
    # -------------------------------------------------------------------------
    # Todo-Driven Execution Flow
    # -------------------------------------------------------------------------
@@ -460,28 +809,73 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
        return "multiple_todos_ready"

    @router("single_todo_ready")
-    def execute_todo_sequential(self) -> Literal["todo_injected"]:
-        """Prepare to execute a single todo by injecting its context.
+    def execute_todo_sequential(
+        self,
+    ) -> Literal["step_executed", "todo_injected"]:
+        """Execute a single todo using StepExecutor (Plan-and-Execute mode)
+        or fall back to the old ReAct injection (legacy mode).

-        Adds a focused prompt for the current todo to the conversation,
-        guiding the agent to complete this specific step.
+        In Plan-and-Execute mode: executes the step in isolation via
+        StepExecutor, stores the result, and routes to the observation step.
+
+        In legacy mode: injects context into the shared message list and
+        routes to the ReAct loop.
        """
        current = self.state.todos.current_todo
+        if not current:
+            return "todo_injected"  # Fall through to legacy

-        # DEBUG: Trace starting todo execution
-        if self.agent.verbose:
-            self._printer.print(
-                content=f"[DEBUG] execute_todo_sequential: starting todo {current.step_number if current else None}",
-                color="cyan",
-            )
-            if current:
+        # Plan-and-Execute path: use StepExecutor for isolated execution
+        if getattr(self.agent, "planning_enabled", False):
+            if self.agent.verbose:
                self._printer.print(
-                    content=f"[DEBUG]   Description: {current.description[:60]}...",
+                    content=(
+                        f"[Execute] Step {current.step_number}: "
+                        f"{current.description[:60]}..."
+                    ),
                    color="cyan",
                )

-        if current:
-            self._inject_todo_context(current)
+            step_executor = self._ensure_step_executor()
+            context = self._build_context_for_todo(current)
+            result = step_executor.execute(current, context)
+
+            # Store result on the todo (do NOT mark completed — observation decides)
+            current.result = result.result
+
+            # Log to audit trail
+            self.state.execution_log.append(
+                {
+                    "type": "step_execution",
+                    "step_number": current.step_number,
+                    "success": result.success,
+                    "result_preview": result.result[:200] if result.result else "",
+                    "error": result.error,
+                    "tool_calls": result.tool_calls_made,
+                    "execution_time": result.execution_time,
+                }
+            )
+
+            if self.agent.verbose:
+                status = "success" if result.success else "failed"
+                self._printer.print(
+                    content=(
+                        f"[Execute] Step {current.step_number} {status} "
+                        f"({result.execution_time:.1f}s, "
+                        f"{len(result.tool_calls_made)} tool calls)"
+                    ),
+                    color="green" if result.success else "red",
+                )
+
+            return "step_executed"
+
+        # Legacy path: inject context into shared messages for ReAct loop
+        if self.agent.verbose:
+            self._printer.print(
+                content=f"[DEBUG] execute_todo_sequential (legacy): starting todo {current.step_number}",
+                color="cyan",
+            )
+        self._inject_todo_context(current)
        return "todo_injected"

    def _inject_todo_context(self, todo: TodoItem) -> None:
@@ -490,18 +884,23 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
        Args:
            todo: The todo item to inject context for.
        """
-        prompt = self._build_todo_prompt(todo)
+        # Build focused task prompt. Context from previous steps is already
+        # in self.state.messages as SYSTEM messages (added by _mark_todo_as_completed)
+        prompt = self._build_todo_prompt(todo, include_dependencies=False)
        todo_message: LLMMessage = {
            "role": "user",
            "content": prompt,
        }
        self.state.messages.append(todo_message)

-    def _build_todo_prompt(self, todo: TodoItem) -> str:
+    def _build_todo_prompt(
+        self, todo: TodoItem, include_dependencies: bool = True
+    ) -> str:
        """Build a focused prompt for executing a single todo.

        Args:
            todo: The todo item to build a prompt for.
+            include_dependencies: Whether to include dependency results in this prompt.

        Returns:
            A prompt string focused on this specific step.
@@ -513,19 +912,13 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
        if todo.tool_to_use:
            parts.append(f"Suggested tool: {todo.tool_to_use}")

-        # Include results from completed dependencies
-        if todo.depends_on:
+        # Include results from completed dependencies if requested (used for parallel execution)
+        if include_dependencies and todo.depends_on:
            dep_results = []
            for dep_num in todo.depends_on:
                dep = self.state.todos.get_by_step_number(dep_num)
                if dep and dep.result:
-                    # Truncate long results
-                    result_preview = (
-                        dep.result[:500] + "..."
-                        if len(dep.result) > 500
-                        else dep.result
-                    )
-                    dep_results.append(f"Step {dep_num} result: {result_preview}")
+                    dep_results.append(f"Step {dep_num} result: {dep.result}")
            if dep_results:
                parts.append("\nContext from previous steps:")
                parts.extend(dep_results)
@@ -561,12 +954,7 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
                        color="red",
                    )
            else:
-                self.state.todos.mark_completed(todo.step_number, result=str(result))
-                if self.agent.verbose:
-                    self._printer.print(
-                        content=f"Todo {todo.step_number} completed",
-                        color="green",
-                    )
+                self._mark_todo_as_completed(todo.step_number, str(result))

        return "parallel_todos_complete"

@@ -580,12 +968,29 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
            The result of executing the todo.
        """
        # Build messages for this specific todo
-        todo_prompt = self._build_todo_prompt(todo)
        messages: list[LLMMessage] = [
            {"role": "system", "content": self._get_todo_system_prompt()},
-            {"role": "user", "content": todo_prompt},
        ]

+        # Inject context into messages for parallel execution (since history is empty)
+        if todo.depends_on:
+            dep_results = []
+            for dep_num in todo.depends_on:
+                dep = self.state.todos.get_by_step_number(dep_num)
+                if dep and dep.result:
+                    dep_results.append(f"Step {dep_num} result: {dep.result}")
+            if dep_results:
+                messages.append(
+                    {
+                        "role": "system",
+                        "content": "Context from previous steps:\n"
+                        + "\n".join(dep_results),
+                    }
+                )
+
+        todo_prompt = self._build_todo_prompt(todo, include_dependencies=False)
+        messages.append({"role": "user", "content": todo_prompt})
+
        # If the todo specifies a tool and we have native tool support
        if todo.tool_to_use and self.state.use_native_tools:
            try:
@@ -1415,22 +1820,49 @@ provide clear results that can be used by subsequent steps."""
                    or last_msg.get("role") == "assistant"
                ):
                    result = str(last_msg.get("content", ""))
+        elif not self.state.current_answer and self.state.messages:
+            # For native tools, results are in the message history as 'tool' roles
+            # We take the content of the most recent tool results
+            tool_results = []
+            for msg in reversed(self.state.messages):
+                if msg.get("role") == "tool":
+                    tool_results.insert(0, str(msg.get("content", "")))
+                elif msg.get("role") == "assistant" and msg.get("tool_calls"):
+                    # Once we hit the assistant message that triggered the tools, we stop
+                    break
+            result = "\n".join(tool_results)

-        self.state.todos.mark_completed(current_todo.step_number, result=result)
+        self._mark_todo_as_completed(current_todo.step_number, result)
+
+        return "todo_marked"
+
+    def _mark_todo_as_completed(self, step_number: int, result: str) -> None:
+        """Helper to mark a todo as completed and update history.
+
+        Args:
+            step_number: The step number to mark.
+            result: The result of the todo.
+        """
+        self.state.todos.mark_completed(step_number, result=result)

        if self.agent.verbose:
            completed = self.state.todos.completed_count
            total = len(self.state.todos.items)
            self._printer.print(
-                content=f"✓ Todo {current_todo.step_number} completed ({completed}/{total})",
+                content=f"✓ Todo {step_number} completed ({completed}/{total})",
                color="green",
            )
            self._printer.print(
-                content=f"[DEBUG] Marked todo {current_todo.step_number} as completed, result_len={len(result)}",
+                content=f"[DEBUG] Marked todo {step_number} as completed, result_len={len(result)}",
                color="cyan",
            )

-        return "todo_marked"
+        # Add to history as a SYSTEM message for subsequent steps
+        if result:
+            self._append_message_to_state(
+                f"**Step {step_number} result:**\n\n{result}",
+                role="system",
+            )

    @router(mark_todo_complete)
    def check_more_todos(
@@ -1500,22 +1932,28 @@ provide clear results that can be used by subsequent steps."""
        """Finalize execution and emit completion logs.

        If todos were used, synthesizes a final answer from all todo results.
+        Handles both the legacy ReAct path (current_answer already set) and
+        the Plan-and-Execute path (synthesize from completed todos).
        """
-        # DEBUG: Trace finalize being called
        if self.agent.verbose:
            self._printer.print(
-                content=f"[DEBUG] finalize called! todos_count={len(self.state.todos.items)}, todos_complete={self.state.todos.is_complete}",
+                content=f"[Finalize] todos_count={len(self.state.todos.items)}, todos_with_results={sum(1 for t in self.state.todos.items if t.result)}",
                color="magenta",
            )
-            if self.state.todos.items:
-                for todo in self.state.todos.items:
-                    self._printer.print(
-                        content=f"[DEBUG]   Todo {todo.step_number}: status={todo.status}, desc={todo.description[:40]}...",
-                        color="magenta",
-                    )

-        # If we have completed todos, synthesize the final answer
-        if self.state.todos.items and self.state.todos.is_complete:
+        # Plan-and-Execute path: synthesize from completed todos
+        # Check for todos with results (even if not all marked "completed" —
+        # the goal_achieved path may skip marking some as completed)
+        todos_with_results = [t for t in self.state.todos.items if t.result]
+        if todos_with_results and self.state.current_answer is None:
+            self._synthesize_final_answer_from_todos()
+
+        # Legacy path: synthesize if todos are all formally complete
+        if (
+            self.state.todos.items
+            and self.state.todos.is_complete
+            and self.state.current_answer is None
+        ):
            self._synthesize_final_answer_from_todos()

        if self.state.current_answer is None:
@@ -1552,7 +1990,7 @@ provide clear results that can be used by subsequent steps."""
        results: list[str] = []
        for todo in self.state.todos.items:
            if todo.result:
-                results.append(f"**Step {todo.step_number}**: {todo.description}")
+                results.append(f"**Step {todo.step_number} result:**")
                results.append(todo.result)
                results.append("")  # Empty line for spacing

@@ -1703,14 +2141,9 @@ provide clear results that can be used by subsequent steps."""
        if completed:
            context_parts.append("Successfully completed steps:")
            for todo in completed:
-                result_preview = (
-                    todo.result[:200] + "..."
-                    if todo.result and len(todo.result) > 200
-                    else todo.result
-                )
                context_parts.append(f"  - Step {todo.step_number}: {todo.description}")
-                if result_preview:
-                    context_parts.append(f"    Result: {result_preview}")
+                if todo.result:
+                    context_parts.append(f"    Result: {todo.result}")

        # Summarize failed todos
        failed = [
@@ -1858,6 +2291,8 @@ Consider:
            self.state.todos = TodoList()
            self.state.replan_count = 0
            self.state.last_replan_reason = None
+            self.state.observations = {}
+            self.state.execution_log = []

            self._kickoff_input = inputs.get("input", "")

@@ -1949,6 +2384,8 @@ Consider:
            self.state.todos = TodoList()
            self.state.replan_count = 0
            self.state.last_replan_reason = None
+            self.state.observations = {}
+            self.state.execution_log = []

            self._kickoff_input = inputs.get("input", "")

--- a/lib/crewai/src/crewai/utilities/planning_types.py
+++ b/lib/crewai/src/crewai/utilities/planning_types.py
@@ -144,3 +144,86 @@ class TodoList(BaseModel):
    def running_count(self) -> int:
        """Count of currently running todos."""
        return sum(1 for item in self.items if item.status == "running")
+
+    def get_completed_todos(self) -> list[TodoItem]:
+        """Get all completed todos.
+
+        Returns:
+            List of completed TodoItem objects.
+        """
+        return [item for item in self.items if item.status == "completed"]
+
+    def get_pending_todos(self) -> list[TodoItem]:
+        """Get all pending todos.
+
+        Returns:
+            List of pending TodoItem objects.
+        """
+        return [item for item in self.items if item.status == "pending"]
+
+    def replace_pending_todos(self, new_items: list[TodoItem]) -> None:
+        """Replace all pending todos with new items.
+
+        Preserves completed and running todos, replaces only pending ones.
+        Used during replanning to swap in a new plan for remaining work.
+
+        Args:
+            new_items: The new todo items to replace pending ones.
+        """
+        non_pending = [item for item in self.items if item.status != "pending"]
+        self.items = non_pending + new_items
+
+
+class StepObservation(BaseModel):
+    """Planner's observation after a step execution completes.
+
+    Returned by the PlannerObserver after EVERY step — not just failures.
+    The Planner uses this to decide whether to continue, refine, or replan.
+
+    Based on PLAN-AND-ACT (Section 3.3): the Planner observes what the Executor
+    did and incorporates new information into the remaining plan.
+
+    Attributes:
+        step_completed_successfully: Whether the step achieved its objective.
+        key_information_learned: New information revealed by this step
+            (e.g., "Found 3 products: A, B, C"). Used to refine upcoming steps.
+        remaining_plan_still_valid: Whether pending todos still make sense
+            given the new information. True does NOT mean no refinement needed.
+        suggested_refinements: Minor tweaks to upcoming step descriptions.
+            These are lightweight in-place updates, not a full replan.
+            Example: ["Step 3 should select product B instead of 'best product'"]
+        needs_full_replan: The remaining plan is fundamentally wrong and must
+            be regenerated from scratch. Mutually exclusive with
+            remaining_plan_still_valid (if this is True, that should be False).
+        replan_reason: Explanation of why a full replan is needed (None if not).
+        goal_already_achieved: The overall task goal has been satisfied early.
+            No more steps needed — skip remaining todos and finalize.
+    """
+
+    step_completed_successfully: bool = Field(
+        description="Whether the step achieved what it was asked to do"
+    )
+    key_information_learned: str = Field(
+        default="",
+        description="What new information this step revealed",
+    )
+    remaining_plan_still_valid: bool = Field(
+        default=True,
+        description="Whether the remaining pending todos still make sense given new information",
+    )
+    suggested_refinements: list[str] | None = Field(
+        default=None,
+        description="Minor tweaks to descriptions of upcoming steps (lightweight, no full replan)",
+    )
+    needs_full_replan: bool = Field(
+        default=False,
+        description="The remaining plan is fundamentally wrong and must be regenerated",
+    )
+    replan_reason: str | None = Field(
+        default=None,
+        description="Explanation of why a full replan is needed",
+    )
+    goal_already_achieved: bool = Field(
+        default=False,
+        description="The overall task goal has been satisfied early; no more steps needed",
+    )
--- a/lib/crewai/src/crewai/utilities/step_execution_context.py
+++ b/lib/crewai/src/crewai/utilities/step_execution_context.py
@@ -0,0 +1,64 @@
+"""Context and result types for isolated step execution in Plan-and-Execute architecture.
+
+These types mediate between the AgentExecutor (orchestrator) and StepExecutor (per-step worker).
+StepExecutionContext carries only final results from dependencies — never LLM message histories.
+StepResult carries only the outcome of a step — never internal execution traces.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+
+@dataclass(frozen=True)
+class StepExecutionContext:
+    """Immutable context passed to a StepExecutor for a single todo.
+
+    Contains only the information the Executor needs to complete one step:
+    the task description, goal, and final results from dependency steps.
+    No LLM message history, no execution traces, no shared mutable state.
+
+    Attributes:
+        task_description: The original task description (from Task or kickoff input).
+        task_goal: The expected output / goal of the overall task.
+        dependency_results: Mapping of step_number → final result string
+            for all completed dependencies of the current step.
+    """
+
+    task_description: str
+    task_goal: str
+    dependency_results: dict[int, str] = field(default_factory=dict)
+
+    def get_dependency_result(self, step_number: int) -> str | None:
+        """Get the final result of a dependency step.
+
+        Args:
+            step_number: The step number to look up.
+
+        Returns:
+            The result string if available, None otherwise.
+        """
+        return self.dependency_results.get(step_number)
+
+
+@dataclass
+class StepResult:
+    """Result returned by a StepExecutor after executing a single todo.
+
+    Contains the final outcome and metadata for debugging/metrics.
+    Tool call details are for audit logging only — they are NOT passed
+    to subsequent steps or the Planner.
+
+    Attributes:
+        success: Whether the step completed successfully.
+        result: The final output string from the step.
+        error: Error message if the step failed (None on success).
+        tool_calls_made: List of tool names invoked (for debugging/logging only).
+        execution_time: Wall-clock time in seconds for the step execution.
+    """
+
+    success: bool
+    result: str
+    error: str | None = None
+    tool_calls_made: list[str] = field(default_factory=list)
+    execution_time: float = 0.0
--- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_with_planning_stores_plan_in_state.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_with_planning_stores_plan_in_state.yaml
@@ -4,18 +4,25 @@ interactions:
      Create minimal, effective execution plans. Prefer fewer steps over more."},{"role":"user","content":"Create
      a focused execution plan for the following task:\n\n## Task\nWhat is 2 + 2?\n\n##
      Expected Output\nComplete the task successfully\n\n## Available Tools\nNo tools
-      available\n\n## Instructions\nCreate ONLY the essential steps needed to complete
-      this task. Use the MINIMUM number of steps required - do NOT pad your plan with
-      unnecessary steps. Most tasks need only 2-5 steps.\n\nFor each step:\n- State
-      the specific action to take\n- Specify which tool to use (if any)\n\nDo NOT
-      include:\n- Setup or preparation steps that are obvious\n- Verification steps
-      unless critical\n- Documentation or cleanup steps unless explicitly required\n-
-      Generic steps like \"review results\" or \"finalize output\"\n\nAfter your plan,
-      state:\n- \"READY: I am ready to execute the task.\" if the plan is complete\n-
-      \"NOT READY: I need to refine my plan because [reason].\" if you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
-      or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The
-      detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether
-      the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}'
+      available\n\n## Planning Principles\nFocus on WHAT needs to be accomplished,
+      not HOW. Group related actions into logical units. Fewer steps = better. Most
+      tasks need 3-6 steps. Hard limit: 20 steps.\n\n## Step Types (only these are
+      valid):\n1. **Tool Step**: Uses a tool to gather information or take action\n2.
+      **Output Step**: Synthesizes prior results into the final deliverable (usually
+      the last step)\n\n## Rules:\n- Each step must either USE A TOOL or PRODUCE THE
+      FINAL OUTPUT\n- Combine related tool calls: \"Research A, B, and C\" = ONE step,
+      not three\n- Combine all synthesis into ONE final output step\n- NO standalone
+      \"thinking\" steps (review, verify, confirm, refine, analyze) - these happen
+      naturally between steps\n\nFor each step: State the action, specify the tool
+      (if any), and note dependencies.\n\nAfter your plan, state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
+      or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A
+      brief summary of the overall plan."},"steps":{"type":"array","description":"List
+      of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step
+      number (1-based)"},"description":{"type":"string","description":"What to do
+      in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to
+      use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step
+      numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether
+      the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}'
    headers:
      User-Agent:
      - X-USER-AGENT-XXX
@@ -28,7 +35,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '1541'
+      - '2315'
      content-type:
      - application/json
      host:
@@ -55,20 +62,24 @@ interactions:
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTTAh68P65LybtqkwNI3p2HXcRv\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078147,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FIyv2pfC7qKbZVvmJNjOVfge1F\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330972,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"## Execution Plan\\n\\n1. **Action:**
-        Perform the addition operation.  \\n   **Tool:** None (manually calculate).\\n\\n2.
-        **Action:** State the result.  \\n   **Tool:** None (manually output).\\n\\nREADY:
-        I am ready to execute the task.\",\n        \"refusal\": null,\n        \"annotations\":
-        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 281,\n    \"completion_tokens\":
-        56,\n    \"total_tokens\": 337,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
+        \           \"id\": \"call_xEDChlUntYR0aSxQhkobswea\",\n            \"type\":
+        \"function\",\n            \"function\": {\n              \"name\": \"create_reasoning_plan\",\n
+        \             \"arguments\": \"{\\\"plan\\\":\\\"Calculate the sum of 2 +
+        2 and provide the result as the final output.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Calculate
+        the sum of 2 + 2\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Provide
+        the result as final output\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]}],\\\"ready\\\":true}\"\n
+        \           }\n          }\n        ],\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"tool_calls\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 440,\n    \"completion_tokens\":
+        92,\n    \"total_tokens\": 532,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
    headers:
      CF-RAY:
      - CF-RAY-XXX
@@ -77,7 +88,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Tue, 03 Feb 2026 00:22:28 GMT
+      - Thu, 05 Feb 2026 22:36:13 GMT
      Server:
      - cloudflare
      Set-Cookie:
@@ -97,7 +108,7 @@ interactions:
      openai-organization:
      - OPENAI-ORG-XXX
      openai-processing-ms:
-      - '1165'
+      - '1670'
      openai-project:
      - OPENAI-PROJECT-XXX
      openai-version:
@@ -123,9 +134,13 @@ interactions:
      message: OK
 - request:
    body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
-      assistant that solves math problems step by step\nYour personal goal is: Help
-      solve simple math problems"},{"role":"user","content":"\nCurrent Task: What
-      is 2 + 2?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
+      assistant that solves math problems step by step\n\nYour goal: Help solve simple
+      math problems\n\nYou are executing a specific step in a multi-step plan. Focus
+      ONLY on completing\nthe current step. Do not plan ahead or worry about future
+      steps.\n\nBefore acting, briefly reason about what you need to do and which
+      approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
+      Current Step\nCalculate the sum of 2 + 2\n\nComplete this step and provide your
+      result."}],"model":"gpt-4o-mini"}'
    headers:
      User-Agent:
      - X-USER-AGENT-XXX
@@ -138,7 +153,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '299'
+      - '597'
      content-type:
      - application/json
      cookie:
@@ -167,20 +182,18 @@ interactions:
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTVB9mdtq1YZrUVf1aSb6dVVQ8G\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078149,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FJ4ZEkHWSBMZA8bDbMqd7upzwY\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330973,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"To solve the problem of 2 + 2, we simply
-        perform the addition:\\n\\n1. Start with the first number: 2\\n2. Add the
-        second number: + 2\\n3. Combine the two: 2 + 2 = 4\\n\\nTherefore, the answer
-        is 4.\",\n        \"refusal\": null,\n        \"annotations\": []\n      },\n
-        \     \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n
-        \ \"usage\": {\n    \"prompt_tokens\": 54,\n    \"completion_tokens\": 62,\n
-        \   \"total_tokens\": 116,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        \"assistant\",\n        \"content\": \"To calculate the sum of 2 + 2, I simply
+        add the two numbers together.\\n\\n2 + 2 = 4\\n\\nThe result is 4.\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        115,\n    \"completion_tokens\": 33,\n    \"total_tokens\": 148,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
    headers:
      CF-RAY:
      - CF-RAY-XXX
@@ -189,7 +202,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Tue, 03 Feb 2026 00:22:30 GMT
+      - Thu, 05 Feb 2026 22:36:14 GMT
      Server:
      - cloudflare
      Strict-Transport-Security:
@@ -207,7 +220,155 @@ interactions:
      openai-organization:
      - OPENAI-ORG-XXX
      openai-processing-ms:
-      - '1300'
+      - '614'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
+      observing execution progress. After each step completes, you analyze what happened
+      and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
+      about:\\n1. What new information was learned from this step's result\\n2. Whether
+      the remaining steps still make sense given this new information\\n3. What refinements,
+      if any, are needed for upcoming steps\\n4. Whether the overall goal has already
+      been achieved\\n\\nBe conservative about triggering full replans \u2014 only
+      do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
+      Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription:
+      Calculate the sum of 2 + 2\\nResult: To calculate the sum of 2 + 2, I simply
+      add the two numbers together.\\n\\n2 + 2 = 4\\n\\nThe result is 4.\\n\\n## Remaining
+      plan steps:\\n  Step 2: Provide the result as final output\\n\\nAnalyze this
+      step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
+      observation after a step execution completes.\\n\\nReturned by the PlannerObserver
+      after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
+      whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
+      3.3): the Planner observes what the Executor\\ndid and incorporates new information
+      into the remaining plan.\\n\\nAttributes:\\n    step_completed_successfully:
+      Whether the step achieved its objective.\\n    key_information_learned: New
+      information revealed by this step\\n        (e.g., \\\"Found 3 products: A,
+      B, C\\\"). Used to refine upcoming steps.\\n    remaining_plan_still_valid:
+      Whether pending todos still make sense\\n        given the new information.
+      True does NOT mean no refinement needed.\\n    suggested_refinements: Minor
+      tweaks to upcoming step descriptions.\\n        These are lightweight in-place
+      updates, not a full replan.\\n        Example: [\\\"Step 3 should select product
+      B instead of 'best product'\\\"]\\n    needs_full_replan: The remaining plan
+      is fundamentally wrong and must\\n        be regenerated from scratch. Mutually
+      exclusive with\\n        remaining_plan_still_valid (if this is True, that should
+      be False).\\n    replan_reason: Explanation of why a full replan is needed (None
+      if not).\\n    goal_already_achieved: The overall task goal has been satisfied
+      early.\\n        No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
+      the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
+      new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
+      the remaining pending todos still make sense given new information\",\"title\":\"Remaining
+      Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
+      tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
+      Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
+      remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
+      Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
+      of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
+      overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
+      Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '4024'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-helper-method:
+      - beta.chat.completions.parse
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FKmJpd8tlJ6Y3OChUQsoz2o5ps\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330974,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The
+        calculation for 2 + 2 is 4.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":true}\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        789,\n    \"completion_tokens\": 64,\n    \"total_tokens\": 853,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:15 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1181'
      openai-project:
      - OPENAI-PROJECT-XXX
      openai-version:
--- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_without_planning_skips_plan_generation.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_without_planning_skips_plan_generation.yaml
@@ -42,17 +42,17 @@ interactions:
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTTFxQ75llVmJv0ee902FIjXE8p\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078147,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FTKj39Y02oqJmQxpmC8sz2piEl\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330983,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"3 + 3 equals 6.\",\n        \"refusal\":
+        \"assistant\",\n        \"content\": \"The sum of 3 + 3 is 6.\",\n        \"refusal\":
        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-        47,\n    \"completion_tokens\": 8,\n    \"total_tokens\": 55,\n    \"prompt_tokens_details\":
+        47,\n    \"completion_tokens\": 12,\n    \"total_tokens\": 59,\n    \"prompt_tokens_details\":
        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
    headers:
      CF-RAY:
      - CF-RAY-XXX
@@ -61,7 +61,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Tue, 03 Feb 2026 00:22:27 GMT
+      - Thu, 05 Feb 2026 22:36:23 GMT
      Server:
      - cloudflare
      Set-Cookie:
@@ -81,7 +81,113 @@ interactions:
      openai-organization:
      - OPENAI-ORG-XXX
      openai-processing-ms:
-      - '401'
+      - '361'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
+      assistant\nYour personal goal is: Help solve simple math problems"},{"role":"user","content":"\nCurrent
+      Task: What is 3 + 3?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '260'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FT7ELyytoJFkmjOtWysQA2Bfvy\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330983,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"The sum of 3 + 3 is 6.\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        47,\n    \"completion_tokens\": 12,\n    \"total_tokens\": 59,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:23 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '362'
      openai-project:
      - OPENAI-PROJECT-XXX
      openai-version:
--- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_executor_state_contains_plan_after_planning.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_executor_state_contains_plan_after_planning.yaml
@@ -4,18 +4,25 @@ interactions:
      Create minimal, effective execution plans. Prefer fewer steps over more."},{"role":"user","content":"Create
      a focused execution plan for the following task:\n\n## Task\nWhat is 7 + 7?\n\n##
      Expected Output\nComplete the task successfully\n\n## Available Tools\nNo tools
-      available\n\n## Instructions\nCreate ONLY the essential steps needed to complete
-      this task. Use the MINIMUM number of steps required - do NOT pad your plan with
-      unnecessary steps. Most tasks need only 2-5 steps.\n\nFor each step:\n- State
-      the specific action to take\n- Specify which tool to use (if any)\n\nDo NOT
-      include:\n- Setup or preparation steps that are obvious\n- Verification steps
-      unless critical\n- Documentation or cleanup steps unless explicitly required\n-
-      Generic steps like \"review results\" or \"finalize output\"\n\nAfter your plan,
-      state:\n- \"READY: I am ready to execute the task.\" if the plan is complete\n-
-      \"NOT READY: I need to refine my plan because [reason].\" if you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
-      or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The
-      detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether
-      the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}'
+      available\n\n## Planning Principles\nFocus on WHAT needs to be accomplished,
+      not HOW. Group related actions into logical units. Fewer steps = better. Most
+      tasks need 3-6 steps. Hard limit: 20 steps.\n\n## Step Types (only these are
+      valid):\n1. **Tool Step**: Uses a tool to gather information or take action\n2.
+      **Output Step**: Synthesizes prior results into the final deliverable (usually
+      the last step)\n\n## Rules:\n- Each step must either USE A TOOL or PRODUCE THE
+      FINAL OUTPUT\n- Combine related tool calls: \"Research A, B, and C\" = ONE step,
+      not three\n- Combine all synthesis into ONE final output step\n- NO standalone
+      \"thinking\" steps (review, verify, confirm, refine, analyze) - these happen
+      naturally between steps\n\nFor each step: State the action, specify the tool
+      (if any), and note dependencies.\n\nAfter your plan, state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
+      or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A
+      brief summary of the overall plan."},"steps":{"type":"array","description":"List
+      of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step
+      number (1-based)"},"description":{"type":"string","description":"What to do
+      in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to
+      use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step
+      numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether
+      the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}'
    headers:
      User-Agent:
      - X-USER-AGENT-XXX
@@ -28,7 +35,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '1541'
+      - '2315'
      content-type:
      - application/json
      host:
@@ -55,18 +62,24 @@ interactions:
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTdqlxwWowSdLncBERFrCgxTvVj\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078157,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FN5xLKcEfF0ISjfbnezYLsZtma\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330977,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"## Execution Plan\\n\\n1. Calculate
-        the sum of 7 and 7.\\n   \\nREADY: I am ready to execute the task.\",\n        \"refusal\":
-        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
-        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-        281,\n    \"completion_tokens\": 28,\n    \"total_tokens\": 309,\n    \"prompt_tokens_details\":
-        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
+        \           \"id\": \"call_rSNKBB5w6x6IXkm0fm2GN1hI\",\n            \"type\":
+        \"function\",\n            \"function\": {\n              \"name\": \"create_reasoning_plan\",\n
+        \             \"arguments\": \"{\\\"plan\\\":\\\"Calculate the sum of 7 +
+        7 and provide the result.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Calculate
+        the sum of 7 + 7.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Provide
+        the final output of the calculation.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]}],\\\"ready\\\":true}\"\n
+        \           }\n          }\n        ],\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"tool_calls\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 440,\n    \"completion_tokens\":
+        89,\n    \"total_tokens\": 529,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
    headers:
      CF-RAY:
      - CF-RAY-XXX
@@ -75,7 +88,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Tue, 03 Feb 2026 00:22:38 GMT
+      - Thu, 05 Feb 2026 22:36:18 GMT
      Server:
      - cloudflare
      Set-Cookie:
@@ -95,7 +108,7 @@ interactions:
      openai-organization:
      - OPENAI-ORG-XXX
      openai-processing-ms:
-      - '709'
+      - '1700'
      openai-project:
      - OPENAI-PROJECT-XXX
      openai-version:
@@ -121,9 +134,13 @@ interactions:
      message: OK
 - request:
    body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
-      assistant that solves math problems step by step\nYour personal goal is: Help
-      solve simple math problems"},{"role":"user","content":"\nCurrent Task: What
-      is 7 + 7?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
+      assistant that solves math problems step by step\n\nYour goal: Help solve simple
+      math problems\n\nYou are executing a specific step in a multi-step plan. Focus
+      ONLY on completing\nthe current step. Do not plan ahead or worry about future
+      steps.\n\nBefore acting, briefly reason about what you need to do and which
+      approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
+      Current Step\nCalculate the sum of 7 + 7.\n\nComplete this step and provide
+      your result."}],"model":"gpt-4o-mini"}'
    headers:
      User-Agent:
      - X-USER-AGENT-XXX
@@ -136,7 +153,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '299'
+      - '598'
      content-type:
      - application/json
      cookie:
@@ -165,18 +182,19 @@ interactions:
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTeB6Miecallw9SjSfLAXPjX2XD\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078158,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FOVRLtzvZr17sXJ05O6NTxw1rI\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330978,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"To find the sum of 7 and 7, you simply
-        add the two numbers together:\\n\\n7 + 7 = 14\\n\\nSo, the answer is 14.\",\n
-        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
-        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-        54,\n    \"completion_tokens\": 35,\n    \"total_tokens\": 89,\n    \"prompt_tokens_details\":
-        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        \"assistant\",\n        \"content\": \"To calculate the sum of 7 + 7, I need
+        to simply add the two numbers together. \\n\\n7 + 7 = 14.\\n\\nSo, the result
+        is 14.\",\n        \"refusal\": null,\n        \"annotations\": []\n      },\n
+        \     \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n
+        \ \"usage\": {\n    \"prompt_tokens\": 115,\n    \"completion_tokens\": 38,\n
+        \   \"total_tokens\": 153,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
    headers:
      CF-RAY:
      - CF-RAY-XXX
@@ -185,7 +203,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Tue, 03 Feb 2026 00:22:38 GMT
+      - Thu, 05 Feb 2026 22:36:19 GMT
      Server:
      - cloudflare
      Strict-Transport-Security:
@@ -203,7 +221,418 @@ interactions:
      openai-organization:
      - OPENAI-ORG-XXX
      openai-processing-ms:
-      - '733'
+      - '868'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
+      observing execution progress. After each step completes, you analyze what happened
+      and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
+      about:\\n1. What new information was learned from this step's result\\n2. Whether
+      the remaining steps still make sense given this new information\\n3. What refinements,
+      if any, are needed for upcoming steps\\n4. Whether the overall goal has already
+      been achieved\\n\\nBe conservative about triggering full replans \u2014 only
+      do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
+      Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription:
+      Calculate the sum of 7 + 7.\\nResult: To calculate the sum of 7 + 7, I need
+      to simply add the two numbers together. \\n\\n7 + 7 = 14.\\n\\nSo, the result
+      is 14.\\n\\n## Remaining plan steps:\\n  Step 2: Provide the final output of
+      the calculation.\\n\\nAnalyze this step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
+      observation after a step execution completes.\\n\\nReturned by the PlannerObserver
+      after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
+      whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
+      3.3): the Planner observes what the Executor\\ndid and incorporates new information
+      into the remaining plan.\\n\\nAttributes:\\n    step_completed_successfully:
+      Whether the step achieved its objective.\\n    key_information_learned: New
+      information revealed by this step\\n        (e.g., \\\"Found 3 products: A,
+      B, C\\\"). Used to refine upcoming steps.\\n    remaining_plan_still_valid:
+      Whether pending todos still make sense\\n        given the new information.
+      True does NOT mean no refinement needed.\\n    suggested_refinements: Minor
+      tweaks to upcoming step descriptions.\\n        These are lightweight in-place
+      updates, not a full replan.\\n        Example: [\\\"Step 3 should select product
+      B instead of 'best product'\\\"]\\n    needs_full_replan: The remaining plan
+      is fundamentally wrong and must\\n        be regenerated from scratch. Mutually
+      exclusive with\\n        remaining_plan_still_valid (if this is True, that should
+      be False).\\n    replan_reason: Explanation of why a full replan is needed (None
+      if not).\\n    goal_already_achieved: The overall task goal has been satisfied
+      early.\\n        No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
+      the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
+      new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
+      the remaining pending todos still make sense given new information\",\"title\":\"Remaining
+      Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
+      tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
+      Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
+      remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
+      Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
+      of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
+      overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
+      Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '4051'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-helper-method:
+      - beta.chat.completions.parse
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FPKZpmhdynDPftfUn6yxeNSmro\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330979,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The
+        sum of 7 + 7 has been correctly calculated to be 14.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":false}\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        795,\n    \"completion_tokens\": 69,\n    \"total_tokens\": 864,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:21 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1071'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
+      assistant that solves math problems step by step\n\nYour goal: Help solve simple
+      math problems\n\nYou are executing a specific step in a multi-step plan. Focus
+      ONLY on completing\nthe current step. Do not plan ahead or worry about future
+      steps.\n\nBefore acting, briefly reason about what you need to do and which
+      approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
+      Current Step\nProvide the final output of the calculation.\n\n## Context from
+      previous steps:\nStep 1 result: To calculate the sum of 7 + 7, I need to simply
+      add the two numbers together. \n\n7 + 7 = 14.\n\nSo, the result is 14.\n\nComplete
+      this step and provide your result."}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '785'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FRqSOxtg5k7zpUfvXk8XEZMz9x\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330981,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"The final output of the calculation
+        is 14.\",\n        \"refusal\": null,\n        \"annotations\": []\n      },\n
+        \     \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n
+        \ \"usage\": {\n    \"prompt_tokens\": 162,\n    \"completion_tokens\": 10,\n
+        \   \"total_tokens\": 172,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:21 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '446'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
+      observing execution progress. After each step completes, you analyze what happened
+      and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
+      about:\\n1. What new information was learned from this step's result\\n2. Whether
+      the remaining steps still make sense given this new information\\n3. What refinements,
+      if any, are needed for upcoming steps\\n4. Whether the overall goal has already
+      been achieved\\n\\nBe conservative about triggering full replans \u2014 only
+      do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
+      Original task\\n\\n\\n## Expected output\\n\\n\\n## Previously completed steps:\\n
+      \ Step 1: Calculate the sum of 7 + 7.\\n    Result: To calculate the sum of
+      7 + 7, I need to simply add the two numbers together. \\n\\n7 + 7 = 14.\\n\\nSo,
+      the result is 14.\\n\\n## Just completed step 2\\nDescription: Provide the final
+      output of the calculation.\\nResult: The final output of the calculation is
+      14.\\n\\n\\nAnalyze this step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
+      observation after a step execution completes.\\n\\nReturned by the PlannerObserver
+      after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
+      whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
+      3.3): the Planner observes what the Executor\\ndid and incorporates new information
+      into the remaining plan.\\n\\nAttributes:\\n    step_completed_successfully:
+      Whether the step achieved its objective.\\n    key_information_learned: New
+      information revealed by this step\\n        (e.g., \\\"Found 3 products: A,
+      B, C\\\"). Used to refine upcoming steps.\\n    remaining_plan_still_valid:
+      Whether pending todos still make sense\\n        given the new information.
+      True does NOT mean no refinement needed.\\n    suggested_refinements: Minor
+      tweaks to upcoming step descriptions.\\n        These are lightweight in-place
+      updates, not a full replan.\\n        Example: [\\\"Step 3 should select product
+      B instead of 'best product'\\\"]\\n    needs_full_replan: The remaining plan
+      is fundamentally wrong and must\\n        be regenerated from scratch. Mutually
+      exclusive with\\n        remaining_plan_still_valid (if this is True, that should
+      be False).\\n    replan_reason: Explanation of why a full replan is needed (None
+      if not).\\n    goal_already_achieved: The overall task goal has been satisfied
+      early.\\n        No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
+      the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
+      new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
+      the remaining pending todos still make sense given new information\",\"title\":\"Remaining
+      Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
+      tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
+      Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
+      remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
+      Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
+      of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
+      overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
+      Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '4113'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-helper-method:
+      - beta.chat.completions.parse
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FRITGCaSHqqF9f8FVEgkrZ36QL\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330981,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The
+        final output of the calculation is confirmed to be 14.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":true}\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        808,\n    \"completion_tokens\": 65,\n    \"total_tokens\": 873,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:22 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '924'
      openai-project:
      - OPENAI-PROJECT-XXX
      openai-version:
--- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_config_disabled_skips_planning.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_config_disabled_skips_planning.yaml
@@ -1,108 +0,0 @@
-interactions:
- request:
-    body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
-      assistant\nYour personal goal is: Help solve simple math problems"},{"role":"user","content":"\nCurrent
-      Task: What is 5 + 5?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
-    headers:
-      User-Agent:
-      - X-USER-AGENT-XXX
-      accept:
-      - application/json
-      accept-encoding:
-      - ACCEPT-ENCODING-XXX
-      authorization:
-      - AUTHORIZATION-XXX
-      connection:
-      - keep-alive
-      content-length:
-      - '260'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      x-stainless-arch:
-      - X-STAINLESS-ARCH-XXX
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - X-STAINLESS-OS-XXX
-      x-stainless-package-version:
-      - 1.83.0
-      x-stainless-read-timeout:
-      - X-STAINLESS-READ-TIMEOUT-XXX
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.13.3
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTf8T2iADffpPCJBZhntLlaoaSy\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078159,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
-        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"5 + 5 equals 10.\",\n        \"refusal\":
-        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
-        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-        47,\n    \"completion_tokens\": 8,\n    \"total_tokens\": 55,\n    \"prompt_tokens_details\":
-        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
-    headers:
-      CF-RAY:
-      - CF-RAY-XXX
-      Connection:
-      - keep-alive
-      Content-Type:
-      - application/json
-      Date:
-      - Tue, 03 Feb 2026 00:22:40 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - SET-COOKIE-XXX
-      Strict-Transport-Security:
-      - STS-XXX
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - X-CONTENT-TYPE-XXX
-      access-control-expose-headers:
-      - ACCESS-CONTROL-XXX
-      alt-svc:
-      - h3=":443"; ma=86400
-      cf-cache-status:
-      - DYNAMIC
-      openai-organization:
-      - OPENAI-ORG-XXX
-      openai-processing-ms:
-      - '515'
-      openai-project:
-      - OPENAI-PROJECT-XXX
-      openai-version:
-      - '2020-10-01'
-      x-openai-proxy-wasm:
-      - v0.1
-      x-ratelimit-limit-requests:
-      - X-RATELIMIT-LIMIT-REQUESTS-XXX
-      x-ratelimit-limit-tokens:
-      - X-RATELIMIT-LIMIT-TOKENS-XXX
-      x-ratelimit-remaining-requests:
-      - X-RATELIMIT-REMAINING-REQUESTS-XXX
-      x-ratelimit-remaining-tokens:
-      - X-RATELIMIT-REMAINING-TOKENS-XXX
-      x-ratelimit-reset-requests:
-      - X-RATELIMIT-RESET-REQUESTS-XXX
-      x-ratelimit-reset-tokens:
-      - X-RATELIMIT-RESET-TOKENS-XXX
-      x-request-id:
-      - X-REQUEST-ID-XXX
-    status:
-      code: 200
-      message: OK
-version: 1
--- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_creates_minimal_steps_for_multi_step_task.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_creates_minimal_steps_for_multi_step_task.yaml
@@ -1,23 +1,104 @@
 interactions:
+- request:
+    body: '{"trace_id": "869cae2c-e863-4e17-b6c7-e9cf6ba8835d", "execution_type":
+      "crew", "user_identifier": null, "execution_context": {"crew_fingerprint": null,
+      "crew_name": "Unknown Crew", "flow_name": null, "crewai_version": "1.9.3", "privacy_level":
+      "standard"}, "execution_metadata": {"expected_duration_estimate": 300, "agent_count":
+      0, "task_count": 0, "flow_method_count": 0, "execution_started_at": "2026-02-05T22:35:59.859861+00:00"}}'
+    headers:
+      Accept:
+      - '*/*'
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '434'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - X-USER-AGENT-XXX
+      X-Crewai-Organization-Id:
+      - 3433f0ee-8a94-4aa4-822b-2ac71aa38b18
+      X-Crewai-Version:
+      - 1.9.3
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+    method: POST
+    uri: https://app.crewai.com/crewai_plus/api/v1/tracing/batches
+  response:
+    body:
+      string: '{"id":"d34854ac-4e95-420c-b08a-af182e63fc75","trace_id":"869cae2c-e863-4e17-b6c7-e9cf6ba8835d","execution_type":"crew","crew_name":"Unknown
+        Crew","flow_name":null,"status":"running","duration_ms":null,"crewai_version":"1.9.3","privacy_level":"standard","total_events":0,"execution_context":{"crew_fingerprint":null,"crew_name":"Unknown
+        Crew","flow_name":null,"crewai_version":"1.9.3","privacy_level":"standard"},"created_at":"2026-02-05T22:36:00.450Z","updated_at":"2026-02-05T22:36:00.450Z"}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '492'
+      Content-Type:
+      - application/json; charset=utf-8
+      Date:
+      - Thu, 05 Feb 2026 22:36:00 GMT
+      cache-control:
+      - no-store
+      content-security-policy:
+      - CSP-FILTERED
+      etag:
+      - ETAG-XXX
+      expires:
+      - '0'
+      permissions-policy:
+      - PERMISSIONS-POLICY-XXX
+      pragma:
+      - no-cache
+      referrer-policy:
+      - REFERRER-POLICY-XXX
+      strict-transport-security:
+      - STS-XXX
+      vary:
+      - Accept
+      x-content-type-options:
+      - X-CONTENT-TYPE-XXX
+      x-frame-options:
+      - X-FRAME-OPTIONS-XXX
+      x-permitted-cross-domain-policies:
+      - X-PERMITTED-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+      x-runtime:
+      - X-RUNTIME-XXX
+      x-xss-protection:
+      - X-XSS-PROTECTION-XXX
+    status:
+      code: 201
+      message: Created
 - request:
    body: '{"messages":[{"role":"system","content":"You are a strategic planning assistant.
      Create minimal, effective execution plans. Prefer fewer steps over more."},{"role":"user","content":"Create
      a focused execution plan for the following task:\n\n## Task\nCalculate the sum
      of the first 3 prime numbers, then multiply that result by 2. Show your work
      for each step.\n\n## Expected Output\nComplete the task successfully\n\n## Available
-      Tools\nNo tools available\n\n## Instructions\nCreate ONLY the essential steps
-      needed to complete this task. Use the MINIMUM number of steps required - do
-      NOT pad your plan with unnecessary steps. Most tasks need only 2-5 steps.\n\nFor
-      each step:\n- State the specific action to take\n- Specify which tool to use
-      (if any)\n\nDo NOT include:\n- Setup or preparation steps that are obvious\n-
-      Verification steps unless critical\n- Documentation or cleanup steps unless
-      explicitly required\n- Generic steps like \"review results\" or \"finalize output\"\n\nAfter
-      your plan, state:\n- \"READY: I am ready to execute the task.\" if the plan
-      is complete\n- \"NOT READY: I need to refine my plan because [reason].\" if
-      you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
-      or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The
-      detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether
-      the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}'
+      Tools\nNo tools available\n\n## Planning Principles\nFocus on WHAT needs to
+      be accomplished, not HOW. Group related actions into logical units. Fewer steps
+      = better. Most tasks need 3-6 steps. Hard limit: 10 steps.\n\n## Step Types
+      (only these are valid):\n1. **Tool Step**: Uses a tool to gather information
+      or take action\n2. **Output Step**: Synthesizes prior results into the final
+      deliverable (usually the last step)\n\n## Rules:\n- Each step must either USE
+      A TOOL or PRODUCE THE FINAL OUTPUT\n- Combine related tool calls: \"Research
+      A, B, and C\" = ONE step, not three\n- Combine all synthesis into ONE final
+      output step\n- NO standalone \"thinking\" steps (review, verify, confirm, refine,
+      analyze) - these happen naturally between steps\n\nFor each step: State the
+      action, specify the tool (if any), and note dependencies.\n\nAfter your plan,
+      state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
+      or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A
+      brief summary of the overall plan."},"steps":{"type":"array","description":"List
+      of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step
+      number (1-based)"},"description":{"type":"string","description":"What to do
+      in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to
+      use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step
+      numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether
+      the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}'
    headers:
      User-Agent:
      - X-USER-AGENT-XXX
@@ -30,7 +111,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '1636'
+      - '2410'
      content-type:
      - application/json
      host:
@@ -57,20 +138,26 @@ interactions:
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTWa7FxCHkHwHF25AYXXeJDBOuY\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078150,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62F62rSoHIF6DpZZFowcKaVmb8Iu\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330960,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"## Execution Plan\\n\\n1. Identify
-        the first 3 prime numbers: 2, 3, and 5.\\n2. Calculate the sum: \\\\(2 + 3
-        + 5 = 10\\\\).\\n3. Multiply the sum by 2: \\\\(10 \\\\times 2 = 20\\\\).\\n\\nREADY:
-        I am ready to execute the task.\",\n        \"refusal\": null,\n        \"annotations\":
-        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 299,\n    \"completion_tokens\":
-        74,\n    \"total_tokens\": 373,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
+        \           \"id\": \"call_bJJDEK5hizeG4PyxSUynX9x8\",\n            \"type\":
+        \"function\",\n            \"function\": {\n              \"name\": \"create_reasoning_plan\",\n
+        \             \"arguments\": \"{\\\"plan\\\":\\\"Calculate the sum of the
+        first 3 prime numbers and multiply that sum by 2.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Identify
+        the first 3 prime numbers (2, 3, 5).\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Calculate
+        the sum of the identified prime numbers (2 + 3 + 5).\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]},{\\\"step_number\\\":3,\\\"description\\\":\\\"Multiply
+        the sum by 2.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[2]},{\\\"step_number\\\":4,\\\"description\\\":\\\"Output
+        the final result.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[3]}],\\\"ready\\\":true}\"\n
+        \           }\n          }\n        ],\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"tool_calls\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 458,\n    \"completion_tokens\":
+        160,\n    \"total_tokens\": 618,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
    headers:
      CF-RAY:
      - CF-RAY-XXX
@@ -79,7 +166,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Tue, 03 Feb 2026 00:22:32 GMT
+      - Thu, 05 Feb 2026 22:36:03 GMT
      Server:
      - cloudflare
      Set-Cookie:
@@ -99,7 +186,7 @@ interactions:
      openai-organization:
      - OPENAI-ORG-XXX
      openai-processing-ms:
-      - '1716'
+      - '2448'
      openai-project:
      - OPENAI-PROJECT-XXX
      openai-version:
@@ -125,10 +212,13 @@ interactions:
      message: OK
 - request:
    body: '{"messages":[{"role":"system","content":"You are Math Tutor. An expert
-      math tutor who breaks down problems step by step\nYour personal goal is: Solve
-      multi-step math problems accurately"},{"role":"user","content":"\nCurrent Task:
-      Calculate the sum of the first 3 prime numbers, then multiply that result by
-      2. Show your work for each step.\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
+      math tutor who breaks down problems step by step\n\nYour goal: Solve multi-step
+      math problems accurately\n\nYou are executing a specific step in a multi-step
+      plan. Focus ONLY on completing\nthe current step. Do not plan ahead or worry
+      about future steps.\n\nBefore acting, briefly reason about what you need to
+      do and which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
+      Current Step\nIdentify the first 3 prime numbers (2, 3, 5).\n\nComplete this
+      step and provide your result."}],"model":"gpt-4o-mini"}'
    headers:
      User-Agent:
      - X-USER-AGENT-XXX
@@ -141,7 +231,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '400'
+      - '622'
      content-type:
      - application/json
      cookie:
@@ -170,30 +260,24 @@ interactions:
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTYJgCZf2oY7wiPMZmN4QEQhHb5\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078152,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62F9RUZUkszp0yCWHbNJVTG16bx9\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330963,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"To solve the problem, let's break it
-        down into two main steps: \\n\\n1. Calculate the sum of the first 3 prime
-        numbers.\\n2. Multiply the result of that sum by 2.\\n\\n### Step 1: Identify
-        the first 3 prime numbers\\nPrime numbers are natural numbers greater than
-        1 that have no positive divisors other than 1 and themselves. \\n\\nThe first
-        three prime numbers are:\\n- 2\\n- 3\\n- 5\\n\\n### Step 2: Calculate the
-        sum of the first 3 prime numbers\\nNow, we add these prime numbers together:\\n\\n\\\\[\\n2
-        + 3 + 5\\n\\\\]\\n\\nCalculating this step-by-step:\\n- First, add 2 and 3:\\n
-        \ \\\\[\\n  2 + 3 = 5\\n  \\\\]\\n  \\n- Next, add this result to 5:\\n  \\\\[\\n
-        \ 5 + 5 = 10\\n  \\\\]\\n\\nSo, the sum of the first 3 prime numbers is \\\\(10\\\\).\\n\\n###
-        Step 3: Multiply the sum by 2\\nNext, we take the sum we calculated and multiply
-        it by 2:\\n\\n\\\\[\\n10 \\\\times 2\\n\\\\]\\n\\nCalculating this:\\n\\\\[\\n10
-        \\\\times 2 = 20\\n\\\\]\\n\\n### Final Answer\\nThus, the final result obtained
-        after performing all the steps is:\\n\\n\\\\[\\n\\\\boxed{20}\\n\\\\]\",\n
-        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
-        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-        74,\n    \"completion_tokens\": 288,\n    \"total_tokens\": 362,\n    \"prompt_tokens_details\":
-        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        \"assistant\",\n        \"content\": \"To identify the first three prime numbers,
+        we need to recall the definition of a prime number: it is a natural number
+        greater than 1 that has no positive divisors other than 1 and itself. \\n\\nStarting
+        from 2, we find:\\n1. The number **2** is prime (divisors are 1 and 2).\\n2.
+        The number **3** is prime (divisors are 1 and 3).\\n3. The number **4** is
+        not prime (divisors are 1, 2, and 4).\\n4. The number **5** is prime (divisors
+        are 1 and 5).\\n\\nThus, the first three prime numbers are **2, 3, and 5**.
+        \\n\\nResult: 2, 3, 5.\",\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 123,\n    \"completion_tokens\":
+        166,\n    \"total_tokens\": 289,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
    headers:
      CF-RAY:
      - CF-RAY-XXX
@@ -202,7 +286,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Tue, 03 Feb 2026 00:22:37 GMT
+      - Thu, 05 Feb 2026 22:36:06 GMT
      Server:
      - cloudflare
      Strict-Transport-Security:
@@ -220,7 +304,444 @@ interactions:
      openai-organization:
      - OPENAI-ORG-XXX
      openai-processing-ms:
-      - '4751'
+      - '3090'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
+      observing execution progress. After each step completes, you analyze what happened
+      and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
+      about:\\n1. What new information was learned from this step's result\\n2. Whether
+      the remaining steps still make sense given this new information\\n3. What refinements,
+      if any, are needed for upcoming steps\\n4. Whether the overall goal has already
+      been achieved\\n\\nBe conservative about triggering full replans \u2014 only
+      do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
+      Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription:
+      Identify the first 3 prime numbers (2, 3, 5).\\nResult: To identify the first
+      three prime numbers, we need to recall the definition of a prime number: it
+      is a natural number greater than 1 that has no positive divisors other than
+      1 and itself. \\n\\nStarting from 2, we find:\\n1. The number **2** is prime
+      (divisors are 1 and 2).\\n2. The number **3** is prime (divisors are 1 and 3).\\n3.
+      The number **4** is not prime (divisors are 1, 2, and 4).\\n4. The number **5**
+      is prime (divisors are 1 and 5).\\n\\nThus, the first three prime numbers are
+      **2, 3, and 5**. \\n\\nResult: 2, 3, 5.\\n\\n## Remaining plan steps:\\n  Step
+      2: Calculate the sum of the identified prime numbers (2 + 3 + 5).\\n  Step 3:
+      Multiply the sum by 2.\\n  Step 4: Output the final result.\\n\\nAnalyze this
+      step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
+      observation after a step execution completes.\\n\\nReturned by the PlannerObserver
+      after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
+      whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
+      3.3): the Planner observes what the Executor\\ndid and incorporates new information
+      into the remaining plan.\\n\\nAttributes:\\n    step_completed_successfully:
+      Whether the step achieved its objective.\\n    key_information_learned: New
+      information revealed by this step\\n        (e.g., \\\"Found 3 products: A,
+      B, C\\\"). Used to refine upcoming steps.\\n    remaining_plan_still_valid:
+      Whether pending todos still make sense\\n        given the new information.
+      True does NOT mean no refinement needed.\\n    suggested_refinements: Minor
+      tweaks to upcoming step descriptions.\\n        These are lightweight in-place
+      updates, not a full replan.\\n        Example: [\\\"Step 3 should select product
+      B instead of 'best product'\\\"]\\n    needs_full_replan: The remaining plan
+      is fundamentally wrong and must\\n        be regenerated from scratch. Mutually
+      exclusive with\\n        remaining_plan_still_valid (if this is True, that should
+      be False).\\n    replan_reason: Explanation of why a full replan is needed (None
+      if not).\\n    goal_already_achieved: The overall task goal has been satisfied
+      early.\\n        No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
+      the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
+      new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
+      the remaining pending todos still make sense given new information\",\"title\":\"Remaining
+      Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
+      tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
+      Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
+      remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
+      Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
+      of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
+      overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
+      Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '4561'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-helper-method:
+      - beta.chat.completions.parse
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FCKhhkyZ4k2uH2KyhxsGnWEM7R\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330966,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The
+        first three prime numbers have been correctly identified as 2, 3, and 5.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":false}\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        960,\n    \"completion_tokens\": 72,\n    \"total_tokens\": 1032,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:07 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1058'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Math Tutor. An expert
+      math tutor who breaks down problems step by step\n\nYour goal: Solve multi-step
+      math problems accurately\n\nYou are executing a specific step in a multi-step
+      plan. Focus ONLY on completing\nthe current step. Do not plan ahead or worry
+      about future steps.\n\nBefore acting, briefly reason about what you need to
+      do and which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
+      Current Step\nCalculate the sum of the identified prime numbers (2 + 3 + 5).\n\n##
+      Context from previous steps:\nStep 1 result: To identify the first three prime
+      numbers, we need to recall the definition of a prime number: it is a natural
+      number greater than 1 that has no positive divisors other than 1 and itself.
+      \n\nStarting from 2, we find:\n1. The number **2** is prime (divisors are 1
+      and 2).\n2. The number **3** is prime (divisors are 1 and 3).\n3. The number
+      **4** is not prime (divisors are 1, 2, and 4).\n4. The number **5** is prime
+      (divisors are 1 and 5).\n\nThus, the first three prime numbers are **2, 3, and
+      5**. \n\nResult: 2, 3, 5.\n\nComplete this step and provide your result."}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1213'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FDWh7MhYTKIsLCnq6r5iXrbdrN\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330967,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"To calculate the sum of the identified
+        prime numbers (2 + 3 + 5), I will follow these steps:\\n\\n1. Add the first
+        two prime numbers: \\n   - \\\\( 2 + 3 = 5 \\\\)\\n\\n2. Then, add the result
+        to the third prime number:\\n   - \\\\( 5 + 5 = 10 \\\\)\\n\\nSo the sum of
+        the identified prime numbers (2 + 3 + 5) is **10**.\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        301,\n    \"completion_tokens\": 95,\n    \"total_tokens\": 396,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:09 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1470'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
+      observing execution progress. After each step completes, you analyze what happened
+      and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
+      about:\\n1. What new information was learned from this step's result\\n2. Whether
+      the remaining steps still make sense given this new information\\n3. What refinements,
+      if any, are needed for upcoming steps\\n4. Whether the overall goal has already
+      been achieved\\n\\nBe conservative about triggering full replans \u2014 only
+      do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
+      Original task\\n\\n\\n## Expected output\\n\\n\\n## Previously completed steps:\\n
+      \ Step 1: Identify the first 3 prime numbers (2, 3, 5).\\n    Result: To identify
+      the first three prime numbers, we need to recall the definition of a prime number:
+      it is a natural number greater than 1 that has no positive divisors other than
+      1 and itself. \\n\\nStarting f\\n\\n## Just completed step 2\\nDescription:
+      Calculate the sum of the identified prime numbers (2 + 3 + 5).\\nResult: To
+      calculate the sum of the identified prime numbers (2 + 3 + 5), I will follow
+      these steps:\\n\\n1. Add the first two prime numbers: \\n   - \\\\( 2 + 3 =
+      5 \\\\)\\n\\n2. Then, add the result to the third prime number:\\n   - \\\\(
+      5 + 5 = 10 \\\\)\\n\\nSo the sum of the identified prime numbers (2 + 3 + 5)
+      is **10**.\\n\\n## Remaining plan steps:\\n  Step 3: Multiply the sum by 2.\\n
+      \ Step 4: Output the final result.\\n\\nAnalyze this step's result and provide
+      your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
+      observation after a step execution completes.\\n\\nReturned by the PlannerObserver
+      after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
+      whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
+      3.3): the Planner observes what the Executor\\ndid and incorporates new information
+      into the remaining plan.\\n\\nAttributes:\\n    step_completed_successfully:
+      Whether the step achieved its objective.\\n    key_information_learned: New
+      information revealed by this step\\n        (e.g., \\\"Found 3 products: A,
+      B, C\\\"). Used to refine upcoming steps.\\n    remaining_plan_still_valid:
+      Whether pending todos still make sense\\n        given the new information.
+      True does NOT mean no refinement needed.\\n    suggested_refinements: Minor
+      tweaks to upcoming step descriptions.\\n        These are lightweight in-place
+      updates, not a full replan.\\n        Example: [\\\"Step 3 should select product
+      B instead of 'best product'\\\"]\\n    needs_full_replan: The remaining plan
+      is fundamentally wrong and must\\n        be regenerated from scratch. Mutually
+      exclusive with\\n        remaining_plan_still_valid (if this is True, that should
+      be False).\\n    replan_reason: Explanation of why a full replan is needed (None
+      if not).\\n    goal_already_achieved: The overall task goal has been satisfied
+      early.\\n        No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
+      the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
+      new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
+      the remaining pending todos still make sense given new information\",\"title\":\"Remaining
+      Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
+      tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
+      Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
+      remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
+      Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
+      of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
+      overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
+      Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '4591'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-helper-method:
+      - beta.chat.completions.parse
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FFIa3JdCnNkh6sa0wz28i55ni1\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330969,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"{\\\"step_completed_successfully\\\":false,\\\"key_information_learned\\\":\\\"The
+        calculation for the sum of the identified prime numbers was incorrect; it
+        should be 2 + 3 + 5 = 10, but there was a typo where the last addition was
+        mistakenly written as 5 + 5 instead of 5 + 2.\\\",\\\"remaining_plan_still_valid\\\":false,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":true,\\\"replan_reason\\\":\\\"The
+        remaining steps are based on an incorrect sum, making them invalid. The calculations
+        must be restarted from the correct determination of the sum of the prime numbers.\\\",\\\"goal_already_achieved\\\":false}\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        942,\n    \"completion_tokens\": 135,\n    \"total_tokens\": 1077,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:11 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '2300'
      openai-project:
      - OPENAI-PROJECT-XXX
      openai-version:
--- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_disabled_skips_planning.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_disabled_skips_planning.yaml
@@ -42,17 +42,17 @@ interactions:
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
-      string: "{\n  \"id\": \"chatcmpl-D4yXGD5IrieoUDSK5hDmJyA2gJtDc\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078382,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FLMJF1jiuD18qhDDxWFYzJxWk3\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330975,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"5 + 5 equals 10.\",\n        \"refusal\":
+        \"assistant\",\n        \"content\": \"The sum of 5 + 5 is 10.\",\n        \"refusal\":
        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-        47,\n    \"completion_tokens\": 8,\n    \"total_tokens\": 55,\n    \"prompt_tokens_details\":
+        47,\n    \"completion_tokens\": 12,\n    \"total_tokens\": 59,\n    \"prompt_tokens_details\":
        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
    headers:
      CF-RAY:
      - CF-RAY-XXX
@@ -61,7 +61,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Tue, 03 Feb 2026 00:26:23 GMT
+      - Thu, 05 Feb 2026 22:36:16 GMT
      Server:
      - cloudflare
      Set-Cookie:
@@ -81,7 +81,113 @@ interactions:
      openai-organization:
      - OPENAI-ORG-XXX
      openai-processing-ms:
-      - '363'
+      - '342'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
+      assistant\nYour personal goal is: Help solve simple math problems"},{"role":"user","content":"\nCurrent
+      Task: What is 5 + 5?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '260'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FM3zRv6CP5jgOiAWIaTukuPjwP\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330976,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"5 + 5 equals 10.\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        47,\n    \"completion_tokens\": 8,\n    \"total_tokens\": 55,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:16 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '488'
      openai-project:
      - OPENAI-PROJECT-XXX
      openai-version:
--- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_handles_sequential_dependency_task.yaml
+++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_handles_sequential_dependency_task.yaml
@@ -5,18 +5,25 @@ interactions:
      a focused execution plan for the following task:\n\n## Task\nConvert 100 degrees
      Celsius to Fahrenheit, then round the result to the nearest 10.\n\n## Expected
      Output\nComplete the task successfully\n\n## Available Tools\nNo tools available\n\n##
-      Instructions\nCreate ONLY the essential steps needed to complete this task.
-      Use the MINIMUM number of steps required - do NOT pad your plan with unnecessary
-      steps. Most tasks need only 2-5 steps.\n\nFor each step:\n- State the specific
-      action to take\n- Specify which tool to use (if any)\n\nDo NOT include:\n- Setup
-      or preparation steps that are obvious\n- Verification steps unless critical\n-
-      Documentation or cleanup steps unless explicitly required\n- Generic steps like
-      \"review results\" or \"finalize output\"\n\nAfter your plan, state:\n- \"READY:
-      I am ready to execute the task.\" if the plan is complete\n- \"NOT READY: I
-      need to refine my plan because [reason].\" if you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
-      or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The
-      detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether
-      the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}'
+      Planning Principles\nFocus on WHAT needs to be accomplished, not HOW. Group
+      related actions into logical units. Fewer steps = better. Most tasks need 3-6
+      steps. Hard limit: 10 steps.\n\n## Step Types (only these are valid):\n1. **Tool
+      Step**: Uses a tool to gather information or take action\n2. **Output Step**:
+      Synthesizes prior results into the final deliverable (usually the last step)\n\n##
+      Rules:\n- Each step must either USE A TOOL or PRODUCE THE FINAL OUTPUT\n- Combine
+      related tool calls: \"Research A, B, and C\" = ONE step, not three\n- Combine
+      all synthesis into ONE final output step\n- NO standalone \"thinking\" steps
+      (review, verify, confirm, refine, analyze) - these happen naturally between
+      steps\n\nFor each step: State the action, specify the tool (if any), and note
+      dependencies.\n\nAfter your plan, state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
+      or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A
+      brief summary of the overall plan."},"steps":{"type":"array","description":"List
+      of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step
+      number (1-based)"},"description":{"type":"string","description":"What to do
+      in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to
+      use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step
+      numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether
+      the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}'
    headers:
      User-Agent:
      - X-USER-AGENT-XXX
@@ -29,7 +36,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '1610'
+      - '2384'
      content-type:
      - application/json
      host:
@@ -56,20 +63,25 @@ interactions:
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTN8fHOefyzzhvdUOHjxdFDR2HW\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078141,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FU2te4ww3DuIzbuySwWTIPTx6A\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330984,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"## Execution Plan\\n\\n1. Convert 100
-        degrees Celsius to Fahrenheit using the formula: \\\\( F = C \\\\times \\\\frac{9}{5}
-        + 32 \\\\).\\n2. Round the Fahrenheit result to the nearest 10.\\n\\nREADY:
-        I am ready to execute the task.\",\n        \"refusal\": null,\n        \"annotations\":
-        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 291,\n    \"completion_tokens\":
-        58,\n    \"total_tokens\": 349,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
+        \           \"id\": \"call_0LXFaxnsqT2kFmUyanui30k0\",\n            \"type\":
+        \"function\",\n            \"function\": {\n              \"name\": \"create_reasoning_plan\",\n
+        \             \"arguments\": \"{\\\"plan\\\":\\\"Convert 100 degrees Celsius
+        to Fahrenheit and round the result to the nearest 10.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Convert
+        100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Round
+        the Fahrenheit result to the nearest 10.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]},{\\\"step_number\\\":3,\\\"description\\\":\\\"Output
+        the final rounded temperature in Fahrenheit.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[2]}],\\\"ready\\\":true}\"\n
+        \           }\n          }\n        ],\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"tool_calls\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 450,\n    \"completion_tokens\":
+        133,\n    \"total_tokens\": 583,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
    headers:
      CF-RAY:
      - CF-RAY-XXX
@@ -78,7 +90,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Tue, 03 Feb 2026 00:22:22 GMT
+      - Thu, 05 Feb 2026 22:36:26 GMT
      Server:
      - cloudflare
      Set-Cookie:
@@ -98,7 +110,7 @@ interactions:
      openai-organization:
      - OPENAI-ORG-XXX
      openai-processing-ms:
-      - '1089'
+      - '1976'
      openai-project:
      - OPENAI-PROJECT-XXX
      openai-version:
@@ -124,10 +136,13 @@ interactions:
      message: OK
 - request:
    body: '{"messages":[{"role":"system","content":"You are Unit Converter. A precise
-      unit conversion specialist\nYour personal goal is: Accurately convert between
-      units and apply transformations"},{"role":"user","content":"\nCurrent Task:
-      Convert 100 degrees Celsius to Fahrenheit, then round the result to the nearest
-      10.\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
+      unit conversion specialist\n\nYour goal: Accurately convert between units and
+      apply transformations\n\nYou are executing a specific step in a multi-step plan.
+      Focus ONLY on completing\nthe current step. Do not plan ahead or worry about
+      future steps.\n\nBefore acting, briefly reason about what you need to do and
+      which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
+      Current Step\nConvert 100 degrees Celsius to Fahrenheit using the formula (C
+      * 9/5) + 32.\n\nComplete this step and provide your result."}],"model":"gpt-4o-mini"}'
    headers:
      User-Agent:
      - X-USER-AGENT-XXX
@@ -140,7 +155,7 @@ interactions:
      connection:
      - keep-alive
      content-length:
-      - '373'
+      - '651'
      content-type:
      - application/json
      cookie:
@@ -169,26 +184,21 @@ interactions:
    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
-      string: "{\n  \"id\": \"chatcmpl-D4yTPQewXDyPdYHI4dHPH7YGHcRge\",\n  \"object\":
-        \"chat.completion\",\n  \"created\": 1770078143,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      string: "{\n  \"id\": \"chatcmpl-D62FWhREtHEudJMFFypgh33C8GLdH\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330986,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-        \"assistant\",\n        \"content\": \"To convert degrees Celsius to Fahrenheit,
-        you can use the formula:\\n\\n\\\\[ F = \\\\left( C \\\\times \\\\frac{9}{5}
-        \\\\right) + 32 \\\\]\\n\\nPlugging in 100 degrees Celsius:\\n\\n\\\\[ F =
-        \\\\left( 100 \\\\times \\\\frac{9}{5} \\\\right) + 32 \\\\]\\n\\nCalculating
-        that step-by-step:\\n\\n1. Multiply 100 by 9: \\n   \\\\[ 100 \\\\times 9
-        = 900 \\\\]\\n\\n2. Divide by 5:\\n   \\\\[ 900 \\\\div 5 = 180 \\\\]\\n\\n3.
-        Add 32:\\n   \\\\[ 180 + 32 = 212 \\\\]\\n\\nSo, 100 degrees Celsius is equal
-        to 212 degrees Fahrenheit.\\n\\nNow, rounding 212 to the nearest 10:\\n\\nThe
-        nearest multiple of 10 to 212 is 210.\\n\\nTherefore, the final result is
-        **210 degrees Fahrenheit**.\",\n        \"refusal\": null,\n        \"annotations\":
+        \"assistant\",\n        \"content\": \"To convert 100 degrees Celsius to Fahrenheit
+        using the formula (C * 9/5) + 32, we substitute C with 100:\\n\\n1. Multiply
+        100 by 9/5:\\n   \\\\[ 100 * \\\\frac{9}{5} = 100 * 1.8 = 180 \\\\]\\n\\n2.
+        Then add 32:\\n   \\\\[ 180 + 32 = 212 \\\\]\\n\\nTherefore, 100 degrees Celsius
+        is equal to 212 degrees Fahrenheit.\",\n        \"refusal\": null,\n        \"annotations\":
        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
-        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 63,\n    \"completion_tokens\":
-        191,\n    \"total_tokens\": 254,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 126,\n    \"completion_tokens\":
+        101,\n    \"total_tokens\": 227,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
-        \"default\",\n  \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
    headers:
      CF-RAY:
      - CF-RAY-XXX
@@ -197,7 +207,7 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Tue, 03 Feb 2026 00:22:26 GMT
+      - Thu, 05 Feb 2026 22:36:27 GMT
      Server:
      - cloudflare
      Strict-Transport-Security:
@@ -215,7 +225,548 @@ interactions:
      openai-organization:
      - OPENAI-ORG-XXX
      openai-processing-ms:
-      - '3736'
+      - '1505'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
+      observing execution progress. After each step completes, you analyze what happened
+      and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
+      about:\\n1. What new information was learned from this step's result\\n2. Whether
+      the remaining steps still make sense given this new information\\n3. What refinements,
+      if any, are needed for upcoming steps\\n4. Whether the overall goal has already
+      been achieved\\n\\nBe conservative about triggering full replans \u2014 only
+      do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
+      Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription:
+      Convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32.\\nResult:
+      To convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32,
+      we substitute C with 100:\\n\\n1. Multiply 100 by 9/5:\\n   \\\\[ 100 * \\\\frac{9}{5}
+      = 100 * 1.8 = 180 \\\\]\\n\\n2. Then add 32:\\n   \\\\[ 180 + 32 = 212 \\\\]\\n\\nTherefore,
+      100 degrees Celsius is equal to 212 degrees Fahrenheit.\\n\\n## Remaining plan
+      steps:\\n  Step 2: Round the Fahrenheit result to the nearest 10.\\n  Step 3:
+      Output the final rounded temperature in Fahrenheit.\\n\\nAnalyze this step's
+      result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
+      observation after a step execution completes.\\n\\nReturned by the PlannerObserver
+      after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
+      whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
+      3.3): the Planner observes what the Executor\\ndid and incorporates new information
+      into the remaining plan.\\n\\nAttributes:\\n    step_completed_successfully:
+      Whether the step achieved its objective.\\n    key_information_learned: New
+      information revealed by this step\\n        (e.g., \\\"Found 3 products: A,
+      B, C\\\"). Used to refine upcoming steps.\\n    remaining_plan_still_valid:
+      Whether pending todos still make sense\\n        given the new information.
+      True does NOT mean no refinement needed.\\n    suggested_refinements: Minor
+      tweaks to upcoming step descriptions.\\n        These are lightweight in-place
+      updates, not a full replan.\\n        Example: [\\\"Step 3 should select product
+      B instead of 'best product'\\\"]\\n    needs_full_replan: The remaining plan
+      is fundamentally wrong and must\\n        be regenerated from scratch. Mutually
+      exclusive with\\n        remaining_plan_still_valid (if this is True, that should
+      be False).\\n    replan_reason: Explanation of why a full replan is needed (None
+      if not).\\n    goal_already_achieved: The overall task goal has been satisfied
+      early.\\n        No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
+      the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
+      new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
+      the remaining pending todos still make sense given new information\",\"title\":\"Remaining
+      Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
+      tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
+      Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
+      remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
+      Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
+      of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
+      overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
+      Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '4342'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-helper-method:
+      - beta.chat.completions.parse
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FXF5UZlLp9eu5O7HsZvIvpC4My\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330987,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"Successfully
+        converted 100 degrees Celsius to 212 degrees Fahrenheit.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":[\\\"Step
+        2 should round 212 to the nearest 10, resulting in 210.\\\"],\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":false}\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        885,\n    \"completion_tokens\": 81,\n    \"total_tokens\": 966,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:29 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '2195'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are refining upcoming plan
+      steps based on new information. Update the step descriptions to be more specific
+      and actionable given what was learned. Keep the same step numbers.\n\nRespond
+      with one line per step in the format:\nStep N: <refined description>"},{"role":"user","content":"##
+      New information learned\nSuccessfully converted 100 degrees Celsius to 212 degrees
+      Fahrenheit.\n\n## Suggested refinements\nStep 2 should round 212 to the nearest
+      10, resulting in 210.\n\n## Current pending steps\nStep 2: Round the Fahrenheit
+      result to the nearest 10.\nStep 3: Output the final rounded temperature in Fahrenheit.\n\nUpdate
+      the step descriptions to incorporate the new information."}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '754'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FaclC9tg2ClH7HU3pfMzmlPJpB\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330990,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"Step 2: Round the Fahrenheit result
+        of 212 degrees to the nearest 10, resulting in 210 degrees.  \\nStep 3: Output
+        the final rounded temperature as 210 degrees Fahrenheit.\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        142,\n    \"completion_tokens\": 40,\n    \"total_tokens\": 182,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:30 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '706'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Unit Converter. A precise
+      unit conversion specialist\n\nYour goal: Accurately convert between units and
+      apply transformations\n\nYou are executing a specific step in a multi-step plan.
+      Focus ONLY on completing\nthe current step. Do not plan ahead or worry about
+      future steps.\n\nBefore acting, briefly reason about what you need to do and
+      which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
+      Current Step\nRound the Fahrenheit result of 212 degrees to the nearest 10,
+      resulting in 210 degrees.\n\n## Context from previous steps:\nStep 1 result:
+      To convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32,
+      we substitute C with 100:\n\n1. Multiply 100 by 9/5:\n   \\[ 100 * \\frac{9}{5}
+      = 100 * 1.8 = 180 \\]\n\n2. Then add 32:\n   \\[ 180 + 32 = 212 \\]\n\nTherefore,
+      100 degrees Celsius is equal to 212 degrees Fahrenheit.\n\nComplete this step
+      and provide your result."}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1011'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62Fb9PlGlUIcZRS2v2Lp9S62brRP\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330991,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"To complete this step, I will round
+        the Fahrenheit result of 212 degrees to the nearest 10. When rounding, since
+        212 is closer to 210 than it is to 220, I will round it down to 210 degrees.\\n\\nResult:
+        210 degrees Fahrenheit.\",\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 236,\n    \"completion_tokens\":
+        56,\n    \"total_tokens\": 292,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:32 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1187'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
+      observing execution progress. After each step completes, you analyze what happened
+      and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
+      about:\\n1. What new information was learned from this step's result\\n2. Whether
+      the remaining steps still make sense given this new information\\n3. What refinements,
+      if any, are needed for upcoming steps\\n4. Whether the overall goal has already
+      been achieved\\n\\nBe conservative about triggering full replans \u2014 only
+      do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
+      Original task\\n\\n\\n## Expected output\\n\\n\\n## Previously completed steps:\\n
+      \ Step 1: Convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5)
+      + 32.\\n    Result: To convert 100 degrees Celsius to Fahrenheit using the formula
+      (C * 9/5) + 32, we substitute C with 100:\\n\\n1. Multiply 100 by 9/5:\\n   \\\\[
+      100 * \\\\frac{9}{5} = 100 * 1.8 = 180 \\\\]\\n\\n2. Then add 32:\\n   \\\\[
+      18\\n\\n## Just completed step 2\\nDescription: Round the Fahrenheit result
+      of 212 degrees to the nearest 10, resulting in 210 degrees.\\nResult: To complete
+      this step, I will round the Fahrenheit result of 212 degrees to the nearest
+      10. When rounding, since 212 is closer to 210 than it is to 220, I will round
+      it down to 210 degrees.\\n\\nResult: 210 degrees Fahrenheit.\\n\\n## Remaining
+      plan steps:\\n  Step 3: Output the final rounded temperature as 210 degrees
+      Fahrenheit.\\n\\nAnalyze this step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
+      observation after a step execution completes.\\n\\nReturned by the PlannerObserver
+      after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
+      whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
+      3.3): the Planner observes what the Executor\\ndid and incorporates new information
+      into the remaining plan.\\n\\nAttributes:\\n    step_completed_successfully:
+      Whether the step achieved its objective.\\n    key_information_learned: New
+      information revealed by this step\\n        (e.g., \\\"Found 3 products: A,
+      B, C\\\"). Used to refine upcoming steps.\\n    remaining_plan_still_valid:
+      Whether pending todos still make sense\\n        given the new information.
+      True does NOT mean no refinement needed.\\n    suggested_refinements: Minor
+      tweaks to upcoming step descriptions.\\n        These are lightweight in-place
+      updates, not a full replan.\\n        Example: [\\\"Step 3 should select product
+      B instead of 'best product'\\\"]\\n    needs_full_replan: The remaining plan
+      is fundamentally wrong and must\\n        be regenerated from scratch. Mutually
+      exclusive with\\n        remaining_plan_still_valid (if this is True, that should
+      be False).\\n    replan_reason: Explanation of why a full replan is needed (None
+      if not).\\n    goal_already_achieved: The overall task goal has been satisfied
+      early.\\n        No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
+      the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
+      new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
+      the remaining pending todos still make sense given new information\",\"title\":\"Remaining
+      Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
+      tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
+      Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
+      remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
+      Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
+      of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
+      overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
+      Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '4579'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-helper-method:
+      - beta.chat.completions.parse
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.3
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-D62FctLDvklBSvOY641JCvwFaTugO\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1770330992,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"Successfully
+        rounded the Fahrenheit result of 212 degrees down to 210 degrees.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":true}\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        941,\n    \"completion_tokens\": 67,\n    \"total_tokens\": 1008,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 05 Feb 2026 22:36:33 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1208'
      openai-project:
      - OPENAI-PROJECT-XXX
      openai-version: