diff --git a/lib/crewai/src/crewai/agents/planner_observer.py b/lib/crewai/src/crewai/agents/planner_observer.py new file mode 100644 index 000000000..492c374c4 --- /dev/null +++ b/lib/crewai/src/crewai/agents/planner_observer.py @@ -0,0 +1,355 @@ +"""PlannerObserver: Observation phase after each step execution. + +Implements the "Observe" phase from PLAN-AND-ACT (Section 3.3). After every +step execution, the Planner analyzes what happened, what new information was +learned, and whether the remaining plan is still valid. + +This is NOT an error detector — it runs on every step, including successes, +to incorporate runtime observations into the remaining plan. +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Any + +from crewai.events.event_bus import crewai_event_bus +from crewai.events.types.observation_events import ( + StepObservationCompletedEvent, + StepObservationFailedEvent, + StepObservationStartedEvent, +) +from crewai.utilities.llm_utils import create_llm +from crewai.utilities.planning_types import StepObservation, TodoItem +from crewai.utilities.types import LLMMessage + + +if TYPE_CHECKING: + from crewai.agent import Agent + from crewai.task import Task + +logger = logging.getLogger(__name__) + + +class PlannerObserver: + """Observes step execution results and decides on plan continuation. + + After EVERY step execution, this class: + 1. Analyzes what the step accomplished + 2. Identifies new information learned + 3. Decides if the remaining plan is still valid + 4. Suggests lightweight refinements or triggers full replanning + + LLM resolution (magical fallback): + - If ``agent.planning_config.llm`` is explicitly set → use that + - Otherwise → fall back to ``agent.llm`` (same LLM for everything) + + Args: + agent: The agent instance (for LLM resolution and config). + task: Optional task context (for description and expected output). + """ + + def __init__(self, agent: Agent, task: Task | None = None) -> None: + self.agent = agent + self.task = task + self.llm = self._resolve_llm() + + def _resolve_llm(self) -> Any: + """Resolve which LLM to use for observation/planning. + + Mirrors AgentReasoning._resolve_llm(): uses planning_config.llm + if explicitly set, otherwise falls back to agent.llm. + + Returns: + The resolved LLM instance. + """ + from crewai.llm import LLM + + config = getattr(self.agent, "planning_config", None) + if config is not None and config.llm is not None: + if isinstance(config.llm, LLM): + return config.llm + return create_llm(config.llm) + return self.agent.llm + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def observe( + self, + completed_step: TodoItem, + result: str, + all_completed: list[TodoItem], + remaining_todos: list[TodoItem], + ) -> StepObservation: + """Observe a step's result and decide on plan continuation. + + This runs after EVERY step execution — not just failures. + + Args: + completed_step: The todo item that was just executed. + result: The final result string from the step. + all_completed: All previously completed todos (for context). + remaining_todos: The pending todos still in the plan. + + Returns: + StepObservation with the Planner's analysis. + """ + agent_role = self.agent.role if self.agent else "unknown" + + # Emit observation started event + crewai_event_bus.emit( + self.agent, + event=StepObservationStartedEvent( + agent_role=agent_role, + step_number=completed_step.step_number, + step_description=completed_step.description, + from_task=self.task, + from_agent=self.agent, + ), + ) + + messages = self._build_observation_messages( + completed_step, result, all_completed, remaining_todos + ) + + try: + response = self.llm.call( + messages, + response_model=StepObservation, + from_task=self.task, + from_agent=self.agent, + ) + + if isinstance(response, StepObservation): + observation = response + else: + # If the LLM returned raw text instead of structured output, + # parse it conservatively + observation = StepObservation( + step_completed_successfully=True, + key_information_learned=str(response) if response else "", + remaining_plan_still_valid=True, + ) + + # Emit observation completed event + crewai_event_bus.emit( + self.agent, + event=StepObservationCompletedEvent( + agent_role=agent_role, + step_number=completed_step.step_number, + step_description=completed_step.description, + step_completed_successfully=observation.step_completed_successfully, + key_information_learned=observation.key_information_learned, + remaining_plan_still_valid=observation.remaining_plan_still_valid, + needs_full_replan=observation.needs_full_replan, + replan_reason=observation.replan_reason, + goal_already_achieved=observation.goal_already_achieved, + suggested_refinements=observation.suggested_refinements, + from_task=self.task, + from_agent=self.agent, + ), + ) + + return observation + + except Exception as e: + logger.warning(f"Observation LLM call failed: {e}. Defaulting to continue.") + + # Emit observation failed event + crewai_event_bus.emit( + self.agent, + event=StepObservationFailedEvent( + agent_role=agent_role, + step_number=completed_step.step_number, + step_description=completed_step.description, + error=str(e), + from_task=self.task, + from_agent=self.agent, + ), + ) + + return StepObservation( + step_completed_successfully=True, + key_information_learned="", + remaining_plan_still_valid=True, + ) + + def refine_todos( + self, + observation: StepObservation, + remaining_todos: list[TodoItem], + ) -> list[TodoItem]: + """Refine pending todo descriptions based on observation. + + This is a LIGHTWEIGHT operation — no full replan. It updates the + description field of pending todos based on new information learned. + + Example: Step 1 found "3 products: A, B, C" → Step 2 changes from + "Select the best product" to "Select product B (highest rated)" + + Args: + observation: The observation with suggested refinements. + remaining_todos: The pending todos to refine. + + Returns: + The refined todo list (same objects, updated descriptions). + """ + if not observation.suggested_refinements: + return remaining_todos + + # Ask the LLM to apply the refinements to the todo descriptions + messages = self._build_refinement_messages(observation, remaining_todos) + + try: + response = self.llm.call( + messages, + from_task=self.task, + from_agent=self.agent, + ) + + if response: + # Parse the LLM's refined descriptions and apply them + self._apply_refinements(str(response), remaining_todos) + + except Exception as e: + logger.warning( + f"Refinement LLM call failed: {e}. Keeping original descriptions." + ) + + return remaining_todos + + # ------------------------------------------------------------------ + # Internal: Message building + # ------------------------------------------------------------------ + + def _build_observation_messages( + self, + completed_step: TodoItem, + result: str, + all_completed: list[TodoItem], + remaining_todos: list[TodoItem], + ) -> list[LLMMessage]: + """Build messages for the observation LLM call.""" + task_desc = "" + task_goal = "" + if self.task: + task_desc = self.task.description or "" + task_goal = self.task.expected_output or "" + + system_prompt = ( + "You are a Planning Agent observing execution progress. " + "After each step completes, you analyze what happened and decide " + "whether the remaining plan is still valid.\n\n" + "Reason step-by-step about:\n" + "1. What new information was learned from this step's result\n" + "2. Whether the remaining steps still make sense given this new information\n" + "3. What refinements, if any, are needed for upcoming steps\n" + "4. Whether the overall goal has already been achieved\n\n" + "Be conservative about triggering full replans — only do so when the " + "remaining plan is fundamentally wrong, not just suboptimal." + ) + + # Build context of what's been done + completed_summary = "" + if all_completed: + completed_lines = [] + for todo in all_completed: + result_preview = (todo.result or "")[:200] + completed_lines.append( + f" Step {todo.step_number}: {todo.description}\n" + f" Result: {result_preview}" + ) + completed_summary = "\n## Previously completed steps:\n" + "\n".join( + completed_lines + ) + + # Build remaining plan + remaining_summary = "" + if remaining_todos: + remaining_lines = [ + f" Step {todo.step_number}: {todo.description}" + for todo in remaining_todos + ] + remaining_summary = "\n## Remaining plan steps:\n" + "\n".join( + remaining_lines + ) + + user_prompt = ( + f"## Original task\n{task_desc}\n\n" + f"## Expected output\n{task_goal}\n" + f"{completed_summary}\n" + f"\n## Just completed step {completed_step.step_number}\n" + f"Description: {completed_step.description}\n" + f"Result: {result}\n" + f"{remaining_summary}\n\n" + "Analyze this step's result and provide your observation." + ) + + return [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ] + + def _build_refinement_messages( + self, + observation: StepObservation, + remaining_todos: list[TodoItem], + ) -> list[LLMMessage]: + """Build messages for the refinement LLM call.""" + system_prompt = ( + "You are refining upcoming plan steps based on new information. " + "Update the step descriptions to be more specific and actionable " + "given what was learned. Keep the same step numbers.\n\n" + "Respond with one line per step in the format:\n" + "Step N: " + ) + + refinements = "\n".join(observation.suggested_refinements or []) + todo_lines = "\n".join( + f"Step {t.step_number}: {t.description}" for t in remaining_todos + ) + + user_prompt = ( + f"## New information learned\n{observation.key_information_learned}\n\n" + f"## Suggested refinements\n{refinements}\n\n" + f"## Current pending steps\n{todo_lines}\n\n" + "Update the step descriptions to incorporate the new information." + ) + + return [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ] + + def _apply_refinements( + self, + llm_response: str, + remaining_todos: list[TodoItem], + ) -> None: + """Parse LLM refinement response and update todo descriptions. + + Expects format: "Step N: " per line. + """ + # Build lookup for quick access + todo_by_step: dict[int, TodoItem] = {t.step_number: t for t in remaining_todos} + + for line in llm_response.strip().split("\n"): + line = line.strip() + if not line.startswith("Step "): + continue + + # Parse "Step N: description" + try: + parts = line.split(":", 1) + if len(parts) < 2: + continue + step_part = parts[0].strip() # "Step N" + description = parts[1].strip() + step_num = int(step_part.replace("Step", "").strip()) + + if step_num in todo_by_step and description: + todo_by_step[step_num].description = description + except (ValueError, IndexError): + continue diff --git a/lib/crewai/src/crewai/agents/step_executor.py b/lib/crewai/src/crewai/agents/step_executor.py new file mode 100644 index 000000000..a9d05246a --- /dev/null +++ b/lib/crewai/src/crewai/agents/step_executor.py @@ -0,0 +1,703 @@ +"""StepExecutor: Isolated executor for a single plan step. + +Implements a bounded ReAct loop scoped to ONE todo item. The tool execution +machinery (native function calling, text-parsed tools, caching, hooks) lives +here — moved from AgentExecutor so the outer Plan-and-Execute loop stays clean. + +Based on PLAN-AND-ACT (Section 3.2): The Executor translates high-level plan +steps into concrete environment actions. +""" + +from __future__ import annotations + +from collections.abc import Callable +from datetime import datetime +import json +import time +from typing import TYPE_CHECKING, Any + +from pydantic import BaseModel + +from crewai.agents.parser import ( + AgentAction, + AgentFinish, +) +from crewai.events.event_bus import crewai_event_bus +from crewai.events.types.tool_usage_events import ( + ToolUsageErrorEvent, + ToolUsageFinishedEvent, + ToolUsageStartedEvent, +) +from crewai.hooks.tool_hooks import ( + ToolCallHookContext, + get_after_tool_call_hooks, + get_before_tool_call_hooks, +) +from crewai.utilities.agent_utils import ( + convert_tools_to_openai_schema, + enforce_rpm_limit, + extract_tool_call_info, + format_message_for_llm, + process_llm_response, + track_delegation_if_needed, +) +from crewai.utilities.i18n import I18N, get_i18n +from crewai.utilities.planning_types import TodoItem +from crewai.utilities.printer import Printer +from crewai.utilities.step_execution_context import StepExecutionContext, StepResult +from crewai.utilities.string_utils import sanitize_tool_name +from crewai.utilities.tool_utils import execute_tool_and_check_finality +from crewai.utilities.types import LLMMessage + + +if TYPE_CHECKING: + from crewai.agent import Agent + from crewai.agents.tools_handler import ToolsHandler + from crewai.crew import Crew + from crewai.llms.base_llm import BaseLLM + from crewai.task import Task + from crewai.tools.base_tool import BaseTool + from crewai.tools.structured_tool import CrewStructuredTool + + +# Maximum number of tool-call iterations within a single step +_MAX_STEP_ITERATIONS: int = 10 + + +class StepExecutor: + """Executes a SINGLE todo item in isolation using a bounded ReAct loop. + + The StepExecutor owns its own message list per invocation. It never reads + or writes the AgentExecutor's state. Results flow back via StepResult. + + The internal loop: + 1. Build messages from todo + context + 2. Call LLM (with or without native tools) + 3. If tool call → execute tool, append result, loop back to 2 + 4. If final answer → return StepResult + 5. If max iterations → force final answer + + Args: + llm: The language model to use for execution. + tools: Structured tools available to the executor. + agent: The agent instance (for role/goal/verbose/config). + original_tools: Original BaseTool instances (needed for native tool schema). + tools_handler: Optional tools handler for caching and delegation tracking. + task: Optional task context. + crew: Optional crew context. + function_calling_llm: Optional separate LLM for function calling. + request_within_rpm_limit: Optional RPM limit function. + callbacks: Optional list of callbacks. + """ + + def __init__( + self, + llm: BaseLLM, + tools: list[CrewStructuredTool], + agent: Agent, + original_tools: list[BaseTool] | None = None, + tools_handler: ToolsHandler | None = None, + task: Task | None = None, + crew: Crew | None = None, + function_calling_llm: BaseLLM | Any | None = None, + request_within_rpm_limit: Callable[[], bool] | None = None, + callbacks: list[Any] | None = None, + i18n: I18N | None = None, + ) -> None: + self.llm = llm + self.tools = tools + self.agent = agent + self.original_tools = original_tools or [] + self.tools_handler = tools_handler + self.task = task + self.crew = crew + self.function_calling_llm = function_calling_llm + self.request_within_rpm_limit = request_within_rpm_limit + self.callbacks = callbacks or [] + self._i18n: I18N = i18n or get_i18n() + self._printer: Printer = Printer() + + # Native tool support — set up once + self._use_native_tools = self._check_native_tool_support() + self._openai_tools: list[dict[str, Any]] = [] + self._available_functions: dict[str, Callable[..., Any]] = {} + if self._use_native_tools: + self._setup_native_tools() + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def execute(self, todo: TodoItem, context: StepExecutionContext) -> StepResult: + """Execute a single todo item in isolation. + + Builds a fresh message list, runs a bounded ReAct loop, and returns + the result. Never touches external state. + + Args: + todo: The todo item to execute. + context: Immutable context with task info and dependency results. + + Returns: + StepResult with the outcome. + """ + start_time = time.monotonic() + tool_calls_made: list[str] = [] + + try: + messages = self._build_isolated_messages(todo, context) + result_text = self._run_react_loop(todo, messages, tool_calls_made) + + elapsed = time.monotonic() - start_time + return StepResult( + success=True, + result=result_text, + tool_calls_made=tool_calls_made, + execution_time=elapsed, + ) + except Exception as e: + elapsed = time.monotonic() - start_time + return StepResult( + success=False, + result="", + error=str(e), + tool_calls_made=tool_calls_made, + execution_time=elapsed, + ) + + # ------------------------------------------------------------------ + # Internal: Message building + # ------------------------------------------------------------------ + + def _build_isolated_messages( + self, todo: TodoItem, context: StepExecutionContext + ) -> list[LLMMessage]: + """Build a fresh message list for this step's execution. + + System prompt tells the LLM it is an Executor focused on one step. + User prompt provides the step description, dependencies, and tools. + """ + system_prompt = self._build_system_prompt() + user_prompt = self._build_user_prompt(todo, context) + + messages: list[LLMMessage] = [ + format_message_for_llm(system_prompt, role="system"), + format_message_for_llm(user_prompt, role="user"), + ] + return messages + + def _build_system_prompt(self) -> str: + """Build the Executor's system prompt. + + Emphasizes: complete THIS step only. Do not plan ahead. + Includes CoT reasoning instruction (per PLAN-AND-ACT Section 3.4). + """ + role = self.agent.role if self.agent else "Assistant" + goal = self.agent.goal if self.agent else "Complete tasks efficiently" + backstory = getattr(self.agent, "backstory", "") or "" + + tools_section = "" + if self.tools and not self._use_native_tools: + tool_names = ", ".join(sanitize_tool_name(t.name) for t in self.tools) + tools_section = f"\n\nAvailable tools: {tool_names}" + tools_section += "\n\nTo use a tool, respond with:\nThought: \nAction: \nAction Input: " + tools_section += "\n\nWhen you have the final answer, respond with:\nThought: \nFinal Answer: " + + return f"""You are {role}. {backstory} + +Your goal: {goal} + +You are executing a specific step in a multi-step plan. Focus ONLY on completing +the current step. Do not plan ahead or worry about future steps. + +Before acting, briefly reason about what you need to do and which approach +or tool would be most helpful for this specific step.{tools_section}""" + + def _build_user_prompt(self, todo: TodoItem, context: StepExecutionContext) -> str: + """Build the user prompt for this specific step.""" + parts: list[str] = [] + + parts.append(f"## Current Step\n{todo.description}") + + if todo.tool_to_use: + parts.append(f"\nSuggested tool: {todo.tool_to_use}") + + # Include dependency results (final results only, no traces) + if context.dependency_results: + parts.append("\n## Context from previous steps:") + for step_num, result in sorted(context.dependency_results.items()): + parts.append(f"Step {step_num} result: {result}") + + parts.append("\nComplete this step and provide your result.") + + return "\n".join(parts) + + # ------------------------------------------------------------------ + # Internal: Bounded ReAct loop + # ------------------------------------------------------------------ + + def _run_react_loop( + self, + todo: TodoItem, + messages: list[LLMMessage], + tool_calls_made: list[str], + ) -> str: + """Run a bounded ReAct loop for a single step. + + Returns the final answer text. + """ + for iteration in range(_MAX_STEP_ITERATIONS): + enforce_rpm_limit(self.request_within_rpm_limit) + + if self._use_native_tools: + result = self._native_tool_iteration(messages, tool_calls_made) + else: + result = self._text_parsed_iteration(messages, tool_calls_made) + + if result is not None: + # Got a final answer + return result + + # No final answer yet — loop continues with updated messages + + # Max iterations reached — force a final answer + return self._force_final_answer(messages) + + def _text_parsed_iteration( + self, + messages: list[LLMMessage], + tool_calls_made: list[str], + ) -> str | None: + """Single iteration using text-parsed tool calling. + + Returns final answer string if done, None to continue looping. + """ + try: + answer = self.llm.call( + messages, + callbacks=self.callbacks, + from_task=self.task, + from_agent=self.agent, + ) + except Exception: + raise + + if not answer: + raise ValueError("Empty response from LLM") + + answer_str = str(answer) + use_stop_words = self.llm.supports_stop_words() if self.llm else False + formatted = process_llm_response(answer_str, use_stop_words) + + if isinstance(formatted, AgentFinish): + return str(formatted.output) + + if isinstance(formatted, AgentAction): + # Execute the tool + tool_calls_made.append(formatted.tool) + + fingerprint_context = {} + if ( + self.agent + and hasattr(self.agent, "security_config") + and hasattr(self.agent.security_config, "fingerprint") + ): + fingerprint_context = { + "agent_fingerprint": str(self.agent.security_config.fingerprint) + } + + tool_result = execute_tool_and_check_finality( + agent_action=formatted, + fingerprint_context=fingerprint_context, + tools=self.tools, + i18n=self._i18n, + agent_key=self.agent.key if self.agent else None, + agent_role=self.agent.role if self.agent else None, + tools_handler=self.tools_handler, + task=self.task, + agent=self.agent, + function_calling_llm=self.function_calling_llm, + crew=self.crew, + ) + + # Append observation to messages + observation = f"Observation: {tool_result.result}" + messages.append( + format_message_for_llm( + formatted.text + f"\n{observation}", + role="assistant", + ) + ) + + if tool_result.result_as_answer: + return str(tool_result.result) + + # Add reasoning prompt for next iteration + reasoning_prompt = self._i18n.slice("post_tool_reasoning") + messages.append(format_message_for_llm(reasoning_prompt, role="user")) + + return None # Continue looping + + return answer_str # Fallback: treat as final answer + + def _native_tool_iteration( + self, + messages: list[LLMMessage], + tool_calls_made: list[str], + ) -> str | None: + """Single iteration using native function calling. + + Returns final answer string if done, None to continue looping. + """ + try: + answer = self.llm.call( + messages, + tools=self._openai_tools, + callbacks=self.callbacks, + from_task=self.task, + from_agent=self.agent, + ) + except Exception: + raise + + if not answer: + raise ValueError("Empty response from LLM") + + # Check if the response is a list of tool calls + if isinstance(answer, list) and answer and self._is_tool_call_list(answer): + return self._execute_native_tool_calls(answer, messages, tool_calls_made) + + # Text response — this is the final answer + if isinstance(answer, str): + return answer + + # BaseModel response + if isinstance(answer, BaseModel): + return answer.model_dump_json() + + return str(answer) + + def _execute_native_tool_calls( + self, + tool_calls: list[Any], + messages: list[LLMMessage], + tool_calls_made: list[str], + ) -> str | None: + """Execute a batch of native tool calls and append results to messages. + + Returns final answer string if a tool has result_as_answer, else None. + """ + # Build assistant message with tool calls + tool_calls_to_report: list[dict[str, Any]] = [] + for tool_call in tool_calls: + info = extract_tool_call_info(tool_call) + if not info: + continue + call_id, func_name, func_args = info + tool_calls_to_report.append( + { + "id": call_id, + "type": "function", + "function": { + "name": func_name, + "arguments": func_args + if isinstance(func_args, str) + else json.dumps(func_args), + }, + } + ) + + if tool_calls_to_report: + assistant_message: LLMMessage = { + "role": "assistant", + "content": None, + "tool_calls": tool_calls_to_report, + } + # Preserve raw parts for Gemini compatibility + if all(type(tc).__qualname__ == "Part" for tc in tool_calls): + assistant_message["raw_tool_call_parts"] = list(tool_calls) + messages.append(assistant_message) + + # Execute each tool call + final_answer: str | None = None + for tool_call in tool_calls: + info = extract_tool_call_info(tool_call) + if not info: + continue + + call_id, func_name, func_args = info + tool_calls_made.append(func_name) + + # Parse arguments + if isinstance(func_args, str): + try: + args_dict = json.loads(func_args) + except json.JSONDecodeError: + args_dict = {} + else: + args_dict = func_args + + agent_key = ( + getattr(self.agent, "key", "unknown") if self.agent else "unknown" + ) + + # Find original tool for cache_function and result_as_answer + original_tool = None + for tool in self.original_tools: + if sanitize_tool_name(tool.name) == func_name: + original_tool = tool + break + + # Check max usage count + max_usage_reached = False + if ( + original_tool + and original_tool.max_usage_count is not None + and original_tool.current_usage_count >= original_tool.max_usage_count + ): + max_usage_reached = True + + # Check cache + from_cache = False + input_str = json.dumps(args_dict) if args_dict else "" + result = "Tool not found" + + if self.tools_handler and self.tools_handler.cache: + cached_result = self.tools_handler.cache.read( + tool=func_name, input=input_str + ) + if cached_result is not None: + result = ( + str(cached_result) + if not isinstance(cached_result, str) + else cached_result + ) + from_cache = True + + # Emit tool started event + started_at = datetime.now() + crewai_event_bus.emit( + self, + event=ToolUsageStartedEvent( + tool_name=func_name, + tool_args=args_dict, + from_agent=self.agent, + from_task=self.task, + agent_key=agent_key, + ), + ) + + track_delegation_if_needed(func_name, args_dict, self.task) + + # Find structured tool for hooks + structured_tool: CrewStructuredTool | None = None + for structured in self.tools or []: + if sanitize_tool_name(structured.name) == func_name: + structured_tool = structured + break + + # Before hooks + hook_blocked = False + before_hook_context = ToolCallHookContext( + tool_name=func_name, + tool_input=args_dict, + tool=structured_tool, # type: ignore[arg-type] + agent=self.agent, + task=self.task, + crew=self.crew, + ) + try: + for hook in get_before_tool_call_hooks(): + if hook(before_hook_context) is False: + hook_blocked = True + break + except Exception: + pass + + if hook_blocked: + result = f"Tool execution blocked by hook. Tool: {func_name}" + elif not from_cache and not max_usage_reached: + if func_name in self._available_functions: + try: + tool_func = self._available_functions[func_name] + raw_result = tool_func(**args_dict) + + # Cache result + if self.tools_handler and self.tools_handler.cache: + should_cache = True + if original_tool: + should_cache = original_tool.cache_function( + args_dict, raw_result + ) + if should_cache: + self.tools_handler.cache.add( + tool=func_name, input=input_str, output=raw_result + ) + + result = ( + str(raw_result) + if not isinstance(raw_result, str) + else raw_result + ) + except Exception as e: + result = f"Error executing tool: {e}" + if self.task: + self.task.increment_tools_errors() + crewai_event_bus.emit( + self, + event=ToolUsageErrorEvent( + tool_name=func_name, + tool_args=args_dict, + from_agent=self.agent, + from_task=self.task, + agent_key=agent_key, + error=e, + ), + ) + elif max_usage_reached and original_tool: + result = ( + f"Tool '{func_name}' has reached its usage limit of " + f"{original_tool.max_usage_count} times and cannot be used anymore." + ) + + # After hooks + after_hook_context = ToolCallHookContext( + tool_name=func_name, + tool_input=args_dict, + tool=structured_tool, # type: ignore[arg-type] + agent=self.agent, + task=self.task, + crew=self.crew, + tool_result=result, + ) + try: + for after_hook in get_after_tool_call_hooks(): + hook_result = after_hook(after_hook_context) + if hook_result is not None: + result = hook_result + after_hook_context.tool_result = result + except Exception: + pass + + # Emit tool finished event + crewai_event_bus.emit( + self, + event=ToolUsageFinishedEvent( + output=result, + tool_name=func_name, + tool_args=args_dict, + from_agent=self.agent, + from_task=self.task, + agent_key=agent_key, + started_at=started_at, + finished_at=datetime.now(), + ), + ) + + # Append tool result message + tool_message: LLMMessage = { + "role": "tool", + "tool_call_id": call_id, + "name": func_name, + "content": result, + } + messages.append(tool_message) + + if self.agent and self.agent.verbose: + cache_info = " (from cache)" if from_cache else "" + self._printer.print( + content=f"Tool {func_name} executed with result{cache_info}: {result[:200]}...", + color="green", + ) + + # Check result_as_answer + if ( + original_tool + and hasattr(original_tool, "result_as_answer") + and original_tool.result_as_answer + ): + final_answer = result + + if final_answer is not None: + return final_answer + + return None # Continue looping + + def _force_final_answer(self, messages: list[LLMMessage]) -> str: + """Force the LLM to provide a final answer when max iterations reached.""" + force_prompt = ( + "You have used the maximum number of tool calls for this step. " + "Based on the information gathered so far, provide your final answer now." + ) + if not self._use_native_tools: + force_prompt += "\n\nFinal Answer: " + + messages.append(format_message_for_llm(force_prompt, role="user")) + + try: + answer = self.llm.call( + messages, + callbacks=self.callbacks, + from_task=self.task, + from_agent=self.agent, + ) + if answer: + answer_str = str(answer) + # Try to extract just the final answer portion + if "Final Answer:" in answer_str: + return answer_str.split("Final Answer:")[-1].strip() + return answer_str + except Exception: + pass + + return "Step could not be completed within the iteration limit." + + # ------------------------------------------------------------------ + # Internal: Native tool support + # ------------------------------------------------------------------ + + def _check_native_tool_support(self) -> bool: + """Check if LLM supports native function calling.""" + return ( + hasattr(self.llm, "supports_function_calling") + and callable(getattr(self.llm, "supports_function_calling", None)) + and self.llm.supports_function_calling() + and bool(self.original_tools) + ) + + def _setup_native_tools(self) -> None: + """Convert tools to OpenAI schema format for native function calling.""" + if self.original_tools: + self._openai_tools, self._available_functions = ( + convert_tools_to_openai_schema(self.original_tools) + ) + + def _is_tool_call_list(self, response: list[Any]) -> bool: + """Check if a response is a list of tool calls.""" + if not response: + return False + first_item = response[0] + # OpenAI-style + if hasattr(first_item, "function") or ( + isinstance(first_item, dict) and "function" in first_item + ): + return True + # Anthropic-style (ToolUseBlock) + if ( + hasattr(first_item, "type") + and getattr(first_item, "type", None) == "tool_use" + ): + return True + if hasattr(first_item, "name") and hasattr(first_item, "input"): + return True + # Bedrock-style + if ( + isinstance(first_item, dict) + and "name" in first_item + and "input" in first_item + ): + return True + # Gemini-style + if hasattr(first_item, "function_call") and first_item.function_call: + return True + return False diff --git a/lib/crewai/src/crewai/events/event_listener.py b/lib/crewai/src/crewai/events/event_listener.py index 5f22d0188..80f808e96 100644 --- a/lib/crewai/src/crewai/events/event_listener.py +++ b/lib/crewai/src/crewai/events/event_listener.py @@ -74,6 +74,14 @@ from crewai.events.types.mcp_events import ( MCPToolExecutionFailedEvent, MCPToolExecutionStartedEvent, ) +from crewai.events.types.observation_events import ( + GoalAchievedEarlyEvent, + PlanRefinementEvent, + PlanReplanTriggeredEvent, + StepObservationCompletedEvent, + StepObservationFailedEvent, + StepObservationStartedEvent, +) from crewai.events.types.reasoning_events import ( AgentReasoningCompletedEvent, AgentReasoningFailedEvent, @@ -534,6 +542,64 @@ class EventListener(BaseEventListener): event.error, ) + # ----------- OBSERVATION EVENTS (Plan-and-Execute) ----------- + + @crewai_event_bus.on(StepObservationStartedEvent) + def on_step_observation_started( + _: Any, event: StepObservationStartedEvent + ) -> None: + self.formatter.handle_observation_started( + event.agent_role, + event.step_number, + event.step_description, + ) + + @crewai_event_bus.on(StepObservationCompletedEvent) + def on_step_observation_completed( + _: Any, event: StepObservationCompletedEvent + ) -> None: + self.formatter.handle_observation_completed( + event.agent_role, + event.step_number, + event.step_completed_successfully, + event.remaining_plan_still_valid, + event.key_information_learned, + event.needs_full_replan, + event.goal_already_achieved, + ) + + @crewai_event_bus.on(StepObservationFailedEvent) + def on_step_observation_failed( + _: Any, event: StepObservationFailedEvent + ) -> None: + self.formatter.handle_observation_failed( + event.step_number, + event.error, + ) + + @crewai_event_bus.on(PlanRefinementEvent) + def on_plan_refinement(_: Any, event: PlanRefinementEvent) -> None: + self.formatter.handle_plan_refinement( + event.step_number, + event.refined_step_count, + event.refinements, + ) + + @crewai_event_bus.on(PlanReplanTriggeredEvent) + def on_plan_replan_triggered(_: Any, event: PlanReplanTriggeredEvent) -> None: + self.formatter.handle_plan_replan( + event.replan_reason, + event.replan_count, + event.completed_steps_preserved, + ) + + @crewai_event_bus.on(GoalAchievedEarlyEvent) + def on_goal_achieved_early(_: Any, event: GoalAchievedEarlyEvent) -> None: + self.formatter.handle_goal_achieved_early( + event.steps_completed, + event.steps_remaining, + ) + # ----------- AGENT LOGGING EVENTS ----------- @crewai_event_bus.on(AgentLogsStartedEvent) diff --git a/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py b/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py index ee337d7fd..a9f10d484 100644 --- a/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py +++ b/lib/crewai/src/crewai/events/listeners/tracing/trace_listener.py @@ -93,6 +93,14 @@ from crewai.events.types.memory_events import ( MemorySaveFailedEvent, MemorySaveStartedEvent, ) +from crewai.events.types.observation_events import ( + GoalAchievedEarlyEvent, + PlanRefinementEvent, + PlanReplanTriggeredEvent, + StepObservationCompletedEvent, + StepObservationFailedEvent, + StepObservationStartedEvent, +) from crewai.events.types.reasoning_events import ( AgentReasoningCompletedEvent, AgentReasoningFailedEvent, @@ -437,6 +445,39 @@ class TraceCollectionListener(BaseEventListener): ) -> None: self._handle_action_event("agent_reasoning_failed", source, event) + # Observation events (Plan-and-Execute) + @event_bus.on(StepObservationStartedEvent) + def on_step_observation_started( + source: Any, event: StepObservationStartedEvent + ) -> None: + self._handle_action_event("step_observation_started", source, event) + + @event_bus.on(StepObservationCompletedEvent) + def on_step_observation_completed( + source: Any, event: StepObservationCompletedEvent + ) -> None: + self._handle_action_event("step_observation_completed", source, event) + + @event_bus.on(StepObservationFailedEvent) + def on_step_observation_failed( + source: Any, event: StepObservationFailedEvent + ) -> None: + self._handle_action_event("step_observation_failed", source, event) + + @event_bus.on(PlanRefinementEvent) + def on_plan_refinement(source: Any, event: PlanRefinementEvent) -> None: + self._handle_action_event("plan_refinement", source, event) + + @event_bus.on(PlanReplanTriggeredEvent) + def on_plan_replan_triggered( + source: Any, event: PlanReplanTriggeredEvent + ) -> None: + self._handle_action_event("plan_replan_triggered", source, event) + + @event_bus.on(GoalAchievedEarlyEvent) + def on_goal_achieved_early(source: Any, event: GoalAchievedEarlyEvent) -> None: + self._handle_action_event("goal_achieved_early", source, event) + @event_bus.on(KnowledgeRetrievalStartedEvent) def on_knowledge_retrieval_started( source: Any, event: KnowledgeRetrievalStartedEvent diff --git a/lib/crewai/src/crewai/events/types/observation_events.py b/lib/crewai/src/crewai/events/types/observation_events.py new file mode 100644 index 000000000..2c95f3ae0 --- /dev/null +++ b/lib/crewai/src/crewai/events/types/observation_events.py @@ -0,0 +1,99 @@ +"""Observation events for the Plan-and-Execute architecture. + +Emitted during the Observation phase (PLAN-AND-ACT Section 3.3) when the +PlannerObserver analyzes step execution results and decides on plan +continuation, refinement, or replanning. +""" + +from typing import Any + +from crewai.events.base_events import BaseEvent + + +class ObservationEvent(BaseEvent): + """Base event for observation phase events.""" + + type: str + agent_role: str + step_number: int + step_description: str = "" + from_task: Any | None = None + from_agent: Any | None = None + + def __init__(self, **data: Any) -> None: + super().__init__(**data) + self._set_task_params(data) + self._set_agent_params(data) + + +class StepObservationStartedEvent(ObservationEvent): + """Emitted when the Planner begins observing a step's result. + + Fires after every step execution, before the observation LLM call. + """ + + type: str = "step_observation_started" + + +class StepObservationCompletedEvent(ObservationEvent): + """Emitted when the Planner finishes observing a step's result. + + Contains the full observation analysis: what was learned, whether + the plan is still valid, and what action to take next. + """ + + type: str = "step_observation_completed" + step_completed_successfully: bool = True + key_information_learned: str = "" + remaining_plan_still_valid: bool = True + needs_full_replan: bool = False + replan_reason: str | None = None + goal_already_achieved: bool = False + suggested_refinements: list[str] | None = None + + +class StepObservationFailedEvent(ObservationEvent): + """Emitted when the observation LLM call itself fails. + + The system defaults to continuing the plan when this happens, + but the event allows monitoring/alerting on observation failures. + """ + + type: str = "step_observation_failed" + error: str = "" + + +class PlanRefinementEvent(ObservationEvent): + """Emitted when the Planner refines upcoming step descriptions. + + This is the lightweight refinement path — no full replan, just + sharpening pending todo descriptions based on new information. + """ + + type: str = "plan_refinement" + refined_step_count: int = 0 + refinements: list[str] | None = None + + +class PlanReplanTriggeredEvent(ObservationEvent): + """Emitted when the Planner triggers a full replan. + + The remaining plan was deemed fundamentally wrong and will be + regenerated from scratch, preserving completed step results. + """ + + type: str = "plan_replan_triggered" + replan_reason: str = "" + replan_count: int = 0 + completed_steps_preserved: int = 0 + + +class GoalAchievedEarlyEvent(ObservationEvent): + """Emitted when the Planner detects the goal was achieved early. + + Remaining steps will be skipped and execution will finalize. + """ + + type: str = "goal_achieved_early" + steps_remaining: int = 0 + steps_completed: int = 0 diff --git a/lib/crewai/src/crewai/events/utils/console_formatter.py b/lib/crewai/src/crewai/events/utils/console_formatter.py index ac6caabcf..8ffcc868f 100644 --- a/lib/crewai/src/crewai/events/utils/console_formatter.py +++ b/lib/crewai/src/crewai/events/utils/console_formatter.py @@ -851,6 +851,152 @@ To enable tracing, do any one of these: ) self.print_panel(error_content, "❌ Reasoning Error", "red") + # ----------- OBSERVATION EVENTS (Plan-and-Execute) ----------- + + def handle_observation_started( + self, + agent_role: str, + step_number: int, + step_description: str, + ) -> None: + """Handle step observation started event.""" + if not self.verbose: + return + + content = Text() + content.append("Observation Started\n", style="cyan bold") + content.append("Agent: ", style="white") + content.append(f"{agent_role}\n", style="cyan") + content.append("Step: ", style="white") + content.append(f"{step_number}\n", style="cyan") + if step_description: + desc_preview = step_description[:80] + ( + "..." if len(step_description) > 80 else "" + ) + content.append("Description: ", style="white") + content.append(f"{desc_preview}\n", style="cyan") + + self.print_panel(content, "🔍 Observing Step Result", "cyan") + + def handle_observation_completed( + self, + agent_role: str, + step_number: int, + step_completed: bool, + plan_valid: bool, + key_info: str, + needs_replan: bool, + goal_achieved: bool, + ) -> None: + """Handle step observation completed event.""" + if not self.verbose: + return + + if goal_achieved: + style = "green" + status = "Goal Achieved Early" + elif needs_replan: + style = "yellow" + status = "Replan Needed" + elif plan_valid: + style = "green" + status = "Plan Valid — Continue" + else: + style = "red" + status = "Step Failed" + + content = Text() + content.append("Observation Complete\n", style=f"{style} bold") + content.append("Step: ", style="white") + content.append(f"{step_number}\n", style=style) + content.append("Status: ", style="white") + content.append(f"{status}\n", style=style) + if key_info: + info_preview = key_info[:120] + ("..." if len(key_info) > 120 else "") + content.append("Learned: ", style="white") + content.append(f"{info_preview}\n", style=style) + + self.print_panel(content, "🔍 Observation Result", style) + + def handle_observation_failed( + self, + step_number: int, + error: str, + ) -> None: + """Handle step observation failure event.""" + if not self.verbose: + return + + error_content = self.create_status_content( + "Observation Failed", + "Error", + "red", + Step=str(step_number), + Error=error, + ) + self.print_panel(error_content, "❌ Observation Error", "red") + + def handle_plan_refinement( + self, + step_number: int, + refined_count: int, + refinements: list[str] | None, + ) -> None: + """Handle plan refinement event.""" + if not self.verbose: + return + + content = Text() + content.append("Plan Refined\n", style="cyan bold") + content.append("After Step: ", style="white") + content.append(f"{step_number}\n", style="cyan") + content.append("Steps Updated: ", style="white") + content.append(f"{refined_count}\n", style="cyan") + if refinements: + for r in refinements[:3]: + content.append(f" • {r[:80]}\n", style="white") + + self.print_panel(content, "✏️ Plan Refinement", "cyan") + + def handle_plan_replan( + self, + reason: str, + replan_count: int, + preserved_count: int, + ) -> None: + """Handle plan replan triggered event.""" + if not self.verbose: + return + + content = Text() + content.append("Full Replan Triggered\n", style="yellow bold") + content.append("Reason: ", style="white") + content.append(f"{reason}\n", style="yellow") + content.append("Replan #: ", style="white") + content.append(f"{replan_count}\n", style="yellow") + content.append("Preserved Steps: ", style="white") + content.append(f"{preserved_count}\n", style="yellow") + + self.print_panel(content, "🔄 Dynamic Replan", "yellow") + + def handle_goal_achieved_early( + self, + steps_completed: int, + steps_remaining: int, + ) -> None: + """Handle goal achieved early event.""" + if not self.verbose: + return + + content = Text() + content.append("Goal Achieved Early!\n", style="green bold") + content.append("Completed: ", style="white") + content.append(f"{steps_completed} steps\n", style="green") + content.append("Skipped: ", style="white") + content.append(f"{steps_remaining} remaining steps\n", style="green") + + self.print_panel(content, "🎯 Early Goal Achievement", "green") + # ----------- AGENT LOGGING EVENTS ----------- def handle_agent_logs_started( diff --git a/lib/crewai/src/crewai/experimental/agent_executor.py b/lib/crewai/src/crewai/experimental/agent_executor.py index adee3a8c1..c26b42783 100644 --- a/lib/crewai/src/crewai/experimental/agent_executor.py +++ b/lib/crewai/src/crewai/experimental/agent_executor.py @@ -27,6 +27,11 @@ from crewai.events.types.logging_events import ( AgentLogsExecutionEvent, AgentLogsStartedEvent, ) +from crewai.events.types.observation_events import ( + GoalAchievedEarlyEvent, + PlanRefinementEvent, + PlanReplanTriggeredEvent, +) from crewai.events.types.tool_usage_events import ( ToolUsageErrorEvent, ToolUsageFinishedEvent, @@ -62,8 +67,14 @@ from crewai.utilities.agent_utils import ( ) from crewai.utilities.constants import TRAINING_DATA_FILE from crewai.utilities.i18n import I18N, get_i18n -from crewai.utilities.planning_types import PlanStep, TodoItem, TodoList +from crewai.utilities.planning_types import ( + PlanStep, + StepObservation, + TodoItem, + TodoList, +) from crewai.utilities.printer import Printer +from crewai.utilities.step_execution_context import StepExecutionContext from crewai.utilities.string_utils import sanitize_tool_name from crewai.utilities.tool_utils import execute_tool_and_check_finality from crewai.utilities.training_handler import CrewTrainingHandler @@ -109,6 +120,14 @@ class AgentReActState(BaseModel): last_replan_reason: str | None = Field( default=None, description="Reason for the last replan, if any" ) + observations: dict[int, StepObservation] = Field( + default_factory=dict, + description="Planner's observation per step (keyed by step_number)", + ) + execution_log: list[dict[str, Any]] = Field( + default_factory=list, + description="Audit trail for debugging (NOT used for LLM calls)", + ) class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin): @@ -222,6 +241,11 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin): ) self._state = AgentReActState() + # Plan-and-Execute components (Phase 2) + # Lazy-imported to avoid circular imports during module load + self._step_executor: Any = None + self._planner_observer: Any = None + def _ensure_flow_initialized(self) -> None: """Ensure Flow.__init__() has been called. @@ -396,6 +420,331 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin): self.state.todos = TodoList(items=todos) + # ------------------------------------------------------------------------- + # Plan-and-Execute: Component Initialization + # ------------------------------------------------------------------------- + + def _ensure_step_executor(self) -> Any: + """Lazily create the StepExecutor (avoids circular imports).""" + if self._step_executor is None: + from crewai.agents.step_executor import StepExecutor + + self._step_executor = StepExecutor( + llm=self.llm, + tools=self.tools, + agent=self.agent, + original_tools=self.original_tools, + tools_handler=self.tools_handler, + task=self.task, + crew=self.crew, + function_calling_llm=self.function_calling_llm, + request_within_rpm_limit=self.request_within_rpm_limit, + callbacks=self.callbacks, + i18n=self._i18n, + ) + return self._step_executor + + def _ensure_planner_observer(self) -> Any: + """Lazily create the PlannerObserver (avoids circular imports).""" + if self._planner_observer is None: + from crewai.agents.planner_observer import PlannerObserver + + self._planner_observer = PlannerObserver( + agent=self.agent, + task=self.task, + ) + return self._planner_observer + + def _build_context_for_todo(self, todo: TodoItem) -> StepExecutionContext: + """Build an isolated execution context for a single todo. + + Passes only final results from completed dependencies — never + execution traces, tool calls, or LLM message history. + + Args: + todo: The todo item to build context for. + + Returns: + Immutable StepExecutionContext with dependency results. + """ + dependency_results: dict[int, str] = {} + for dep_num in todo.depends_on: + dep_todo = self.state.todos.get_by_step_number(dep_num) + if dep_todo and dep_todo.result: + dependency_results[dep_num] = dep_todo.result + + task_description = "" + task_goal = "" + if self.task: + task_description = self.task.description or "" + task_goal = self.task.expected_output or "" + else: + task_description = getattr(self, "_kickoff_input", "") + task_goal = "Complete the task successfully" + + return StepExecutionContext( + task_description=task_description, + task_goal=task_goal, + dependency_results=dependency_results, + ) + + # ------------------------------------------------------------------------- + # Plan-and-Execute: New Observation-Driven Flow Methods + # ------------------------------------------------------------------------- + + @listen("step_executed") + def observe_step_result(self) -> Literal["step_observed"]: + """THE OBSERVATION STEP — runs after EVERY step execution. + + This is the Planner's opportunity to incorporate new information + learned during execution. It is NOT an error handler — it runs on + every step, including successes. + + Based on PLAN-AND-ACT Section 3.3. + """ + current_todo = self.state.todos.current_todo + if not current_todo: + return "step_observed" + + observer = self._ensure_planner_observer() + all_completed = self.state.todos.get_completed_todos() + remaining = self.state.todos.get_pending_todos() + + observation = observer.observe( + completed_step=current_todo, + result=current_todo.result or "", + all_completed=all_completed, + remaining_todos=remaining, + ) + + self.state.observations[current_todo.step_number] = observation + + # Log observation for debugging + self.state.execution_log.append( + { + "type": "observation", + "step_number": current_todo.step_number, + "step_completed_successfully": observation.step_completed_successfully, + "key_information_learned": observation.key_information_learned, + "remaining_plan_still_valid": observation.remaining_plan_still_valid, + "needs_full_replan": observation.needs_full_replan, + "goal_already_achieved": observation.goal_already_achieved, + } + ) + + if self.agent.verbose: + self._printer.print( + content=( + f"[Observe] Step {current_todo.step_number}: " + f"success={observation.step_completed_successfully}, " + f"plan_valid={observation.remaining_plan_still_valid}, " + f"learned={observation.key_information_learned[:80]}..." + ), + color="cyan", + ) + + return "step_observed" + + @router("step_observed") + def decide_next_action( + self, + ) -> Literal[ + "goal_achieved", + "replan_now", + "refine_and_continue", + "continue_plan", + ]: + """Route based on the Planner's observation. + + This replaces the old reactive _should_replan() heuristics with + proactive, LLM-driven decisions. + """ + current_todo = self.state.todos.current_todo + if not current_todo: + return "continue_plan" + + observation = self.state.observations.get(current_todo.step_number) + if not observation: + # No observation available — default to continue + self.state.todos.mark_completed(current_todo.step_number) + return "continue_plan" + + # Goal already achieved — early termination + if observation.goal_already_achieved: + self.state.todos.mark_completed( + current_todo.step_number, result=current_todo.result + ) + if self.agent.verbose: + self._printer.print( + content="[Decide] Goal achieved early — finalizing", + color="green", + ) + return "goal_achieved" + + # Full replan needed + if observation.needs_full_replan: + if self.agent.verbose: + self._printer.print( + content=f"[Decide] Full replan needed: {observation.replan_reason}", + color="yellow", + ) + self.state.last_replan_reason = observation.replan_reason + return "replan_now" + + # Step failed — also trigger replan + if not observation.step_completed_successfully: + if self.agent.verbose: + self._printer.print( + content="[Decide] Step failed — triggering replan", + color="yellow", + ) + self.state.last_replan_reason = "Step did not complete successfully" + return "replan_now" + + # Plan still valid but needs refinement + if observation.remaining_plan_still_valid and observation.suggested_refinements: + self.state.todos.mark_completed( + current_todo.step_number, result=current_todo.result + ) + if self.agent.verbose: + self._printer.print( + content="[Decide] Plan valid but refining upcoming steps", + color="cyan", + ) + return "refine_and_continue" + + # Plan still valid, no refinements needed — just continue + self.state.todos.mark_completed( + current_todo.step_number, result=current_todo.result + ) + if self.agent.verbose: + completed = self.state.todos.completed_count + total = len(self.state.todos.items) + self._printer.print( + content=f"[Decide] Continue plan ({completed}/{total} done)", + color="green", + ) + return "continue_plan" + + @listen("refine_and_continue") + def handle_refine_and_continue(self) -> Literal["has_todos"]: + """Lightweight plan refinement — update pending todo descriptions. + + The Planner sharpens upcoming step descriptions based on what was + learned, without regenerating the entire plan. + """ + # Find the most recent observation with refinements + recent_observation: StepObservation | None = None + last_step: int = 0 + if self.state.observations: + last_step = max(self.state.observations.keys()) + recent_observation = self.state.observations[last_step] + + if recent_observation and recent_observation.suggested_refinements: + observer = self._ensure_planner_observer() + remaining = self.state.todos.get_pending_todos() + + observer.refine_todos(recent_observation, remaining) + + # Emit refinement event + crewai_event_bus.emit( + self.agent, + event=PlanRefinementEvent( + agent_role=self.agent.role, + step_number=last_step, + step_description="", + refined_step_count=len(remaining), + refinements=recent_observation.suggested_refinements, + from_task=self.task, + from_agent=self.agent, + ), + ) + + if self.agent.verbose: + self._printer.print( + content=f"[Refine] Updated {len(remaining)} pending step(s)", + color="cyan", + ) + + return "has_todos" + + @listen("continue_plan") + def handle_continue_plan(self) -> Literal["has_todos", "all_todos_complete"]: + """Continue to the next todo after a successful step.""" + if self.state.todos.is_complete: + return "all_todos_complete" + return "has_todos" + + @listen("goal_achieved") + def handle_goal_achieved(self) -> Literal["all_todos_complete"]: + """Handle early goal achievement — skip remaining todos.""" + completed = self.state.todos.get_completed_todos() + remaining = self.state.todos.get_pending_todos() + + # Emit goal achieved early event + crewai_event_bus.emit( + self.agent, + event=GoalAchievedEarlyEvent( + agent_role=self.agent.role, + step_number=completed[-1].step_number if completed else 0, + step_description="", + steps_completed=len(completed), + steps_remaining=len(remaining), + from_task=self.task, + from_agent=self.agent, + ), + ) + + if self.agent.verbose: + self._printer.print( + content="Goal achieved early — skipping remaining steps", + color="green", + ) + return "all_todos_complete" + + @listen("replan_now") + def handle_replan_now( + self, + ) -> Literal["has_todos", "all_todos_complete"]: + """Handle full replanning — regenerate the remaining plan. + + Preserves completed todo results and replaces only pending steps. + """ + max_replans = 3 + self.state.replan_count += 1 + + if self.state.replan_count > max_replans: + if self.agent.verbose: + self._printer.print( + content=f"Max replans ({max_replans}) reached — finalizing with current results", + color="yellow", + ) + return "all_todos_complete" + + reason = self.state.last_replan_reason or "Dynamic replan triggered" + completed = self.state.todos.get_completed_todos() + + # Emit replan triggered event + crewai_event_bus.emit( + self.agent, + event=PlanReplanTriggeredEvent( + agent_role=self.agent.role, + step_number=completed[-1].step_number if completed else 0, + step_description="", + replan_reason=reason, + replan_count=self.state.replan_count, + completed_steps_preserved=len(completed), + from_task=self.task, + from_agent=self.agent, + ), + ) + + self._trigger_replan(reason) + + if self.state.todos.get_pending_todos(): + return "has_todos" + return "all_todos_complete" + # ------------------------------------------------------------------------- # Todo-Driven Execution Flow # ------------------------------------------------------------------------- @@ -460,28 +809,73 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin): return "multiple_todos_ready" @router("single_todo_ready") - def execute_todo_sequential(self) -> Literal["todo_injected"]: - """Prepare to execute a single todo by injecting its context. + def execute_todo_sequential( + self, + ) -> Literal["step_executed", "todo_injected"]: + """Execute a single todo using StepExecutor (Plan-and-Execute mode) + or fall back to the old ReAct injection (legacy mode). - Adds a focused prompt for the current todo to the conversation, - guiding the agent to complete this specific step. + In Plan-and-Execute mode: executes the step in isolation via + StepExecutor, stores the result, and routes to the observation step. + + In legacy mode: injects context into the shared message list and + routes to the ReAct loop. """ current = self.state.todos.current_todo + if not current: + return "todo_injected" # Fall through to legacy - # DEBUG: Trace starting todo execution - if self.agent.verbose: - self._printer.print( - content=f"[DEBUG] execute_todo_sequential: starting todo {current.step_number if current else None}", - color="cyan", - ) - if current: + # Plan-and-Execute path: use StepExecutor for isolated execution + if getattr(self.agent, "planning_enabled", False): + if self.agent.verbose: self._printer.print( - content=f"[DEBUG] Description: {current.description[:60]}...", + content=( + f"[Execute] Step {current.step_number}: " + f"{current.description[:60]}..." + ), color="cyan", ) - if current: - self._inject_todo_context(current) + step_executor = self._ensure_step_executor() + context = self._build_context_for_todo(current) + result = step_executor.execute(current, context) + + # Store result on the todo (do NOT mark completed — observation decides) + current.result = result.result + + # Log to audit trail + self.state.execution_log.append( + { + "type": "step_execution", + "step_number": current.step_number, + "success": result.success, + "result_preview": result.result[:200] if result.result else "", + "error": result.error, + "tool_calls": result.tool_calls_made, + "execution_time": result.execution_time, + } + ) + + if self.agent.verbose: + status = "success" if result.success else "failed" + self._printer.print( + content=( + f"[Execute] Step {current.step_number} {status} " + f"({result.execution_time:.1f}s, " + f"{len(result.tool_calls_made)} tool calls)" + ), + color="green" if result.success else "red", + ) + + return "step_executed" + + # Legacy path: inject context into shared messages for ReAct loop + if self.agent.verbose: + self._printer.print( + content=f"[DEBUG] execute_todo_sequential (legacy): starting todo {current.step_number}", + color="cyan", + ) + self._inject_todo_context(current) return "todo_injected" def _inject_todo_context(self, todo: TodoItem) -> None: @@ -490,18 +884,23 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin): Args: todo: The todo item to inject context for. """ - prompt = self._build_todo_prompt(todo) + # Build focused task prompt. Context from previous steps is already + # in self.state.messages as SYSTEM messages (added by _mark_todo_as_completed) + prompt = self._build_todo_prompt(todo, include_dependencies=False) todo_message: LLMMessage = { "role": "user", "content": prompt, } self.state.messages.append(todo_message) - def _build_todo_prompt(self, todo: TodoItem) -> str: + def _build_todo_prompt( + self, todo: TodoItem, include_dependencies: bool = True + ) -> str: """Build a focused prompt for executing a single todo. Args: todo: The todo item to build a prompt for. + include_dependencies: Whether to include dependency results in this prompt. Returns: A prompt string focused on this specific step. @@ -513,19 +912,13 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin): if todo.tool_to_use: parts.append(f"Suggested tool: {todo.tool_to_use}") - # Include results from completed dependencies - if todo.depends_on: + # Include results from completed dependencies if requested (used for parallel execution) + if include_dependencies and todo.depends_on: dep_results = [] for dep_num in todo.depends_on: dep = self.state.todos.get_by_step_number(dep_num) if dep and dep.result: - # Truncate long results - result_preview = ( - dep.result[:500] + "..." - if len(dep.result) > 500 - else dep.result - ) - dep_results.append(f"Step {dep_num} result: {result_preview}") + dep_results.append(f"Step {dep_num} result: {dep.result}") if dep_results: parts.append("\nContext from previous steps:") parts.extend(dep_results) @@ -561,12 +954,7 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin): color="red", ) else: - self.state.todos.mark_completed(todo.step_number, result=str(result)) - if self.agent.verbose: - self._printer.print( - content=f"Todo {todo.step_number} completed", - color="green", - ) + self._mark_todo_as_completed(todo.step_number, str(result)) return "parallel_todos_complete" @@ -580,12 +968,29 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin): The result of executing the todo. """ # Build messages for this specific todo - todo_prompt = self._build_todo_prompt(todo) messages: list[LLMMessage] = [ {"role": "system", "content": self._get_todo_system_prompt()}, - {"role": "user", "content": todo_prompt}, ] + # Inject context into messages for parallel execution (since history is empty) + if todo.depends_on: + dep_results = [] + for dep_num in todo.depends_on: + dep = self.state.todos.get_by_step_number(dep_num) + if dep and dep.result: + dep_results.append(f"Step {dep_num} result: {dep.result}") + if dep_results: + messages.append( + { + "role": "system", + "content": "Context from previous steps:\n" + + "\n".join(dep_results), + } + ) + + todo_prompt = self._build_todo_prompt(todo, include_dependencies=False) + messages.append({"role": "user", "content": todo_prompt}) + # If the todo specifies a tool and we have native tool support if todo.tool_to_use and self.state.use_native_tools: try: @@ -1415,22 +1820,49 @@ provide clear results that can be used by subsequent steps.""" or last_msg.get("role") == "assistant" ): result = str(last_msg.get("content", "")) + elif not self.state.current_answer and self.state.messages: + # For native tools, results are in the message history as 'tool' roles + # We take the content of the most recent tool results + tool_results = [] + for msg in reversed(self.state.messages): + if msg.get("role") == "tool": + tool_results.insert(0, str(msg.get("content", ""))) + elif msg.get("role") == "assistant" and msg.get("tool_calls"): + # Once we hit the assistant message that triggered the tools, we stop + break + result = "\n".join(tool_results) - self.state.todos.mark_completed(current_todo.step_number, result=result) + self._mark_todo_as_completed(current_todo.step_number, result) + + return "todo_marked" + + def _mark_todo_as_completed(self, step_number: int, result: str) -> None: + """Helper to mark a todo as completed and update history. + + Args: + step_number: The step number to mark. + result: The result of the todo. + """ + self.state.todos.mark_completed(step_number, result=result) if self.agent.verbose: completed = self.state.todos.completed_count total = len(self.state.todos.items) self._printer.print( - content=f"✓ Todo {current_todo.step_number} completed ({completed}/{total})", + content=f"✓ Todo {step_number} completed ({completed}/{total})", color="green", ) self._printer.print( - content=f"[DEBUG] Marked todo {current_todo.step_number} as completed, result_len={len(result)}", + content=f"[DEBUG] Marked todo {step_number} as completed, result_len={len(result)}", color="cyan", ) - return "todo_marked" + # Add to history as a SYSTEM message for subsequent steps + if result: + self._append_message_to_state( + f"**Step {step_number} result:**\n\n{result}", + role="system", + ) @router(mark_todo_complete) def check_more_todos( @@ -1500,22 +1932,28 @@ provide clear results that can be used by subsequent steps.""" """Finalize execution and emit completion logs. If todos were used, synthesizes a final answer from all todo results. + Handles both the legacy ReAct path (current_answer already set) and + the Plan-and-Execute path (synthesize from completed todos). """ - # DEBUG: Trace finalize being called if self.agent.verbose: self._printer.print( - content=f"[DEBUG] finalize called! todos_count={len(self.state.todos.items)}, todos_complete={self.state.todos.is_complete}", + content=f"[Finalize] todos_count={len(self.state.todos.items)}, todos_with_results={sum(1 for t in self.state.todos.items if t.result)}", color="magenta", ) - if self.state.todos.items: - for todo in self.state.todos.items: - self._printer.print( - content=f"[DEBUG] Todo {todo.step_number}: status={todo.status}, desc={todo.description[:40]}...", - color="magenta", - ) - # If we have completed todos, synthesize the final answer - if self.state.todos.items and self.state.todos.is_complete: + # Plan-and-Execute path: synthesize from completed todos + # Check for todos with results (even if not all marked "completed" — + # the goal_achieved path may skip marking some as completed) + todos_with_results = [t for t in self.state.todos.items if t.result] + if todos_with_results and self.state.current_answer is None: + self._synthesize_final_answer_from_todos() + + # Legacy path: synthesize if todos are all formally complete + if ( + self.state.todos.items + and self.state.todos.is_complete + and self.state.current_answer is None + ): self._synthesize_final_answer_from_todos() if self.state.current_answer is None: @@ -1552,7 +1990,7 @@ provide clear results that can be used by subsequent steps.""" results: list[str] = [] for todo in self.state.todos.items: if todo.result: - results.append(f"**Step {todo.step_number}**: {todo.description}") + results.append(f"**Step {todo.step_number} result:**") results.append(todo.result) results.append("") # Empty line for spacing @@ -1703,14 +2141,9 @@ provide clear results that can be used by subsequent steps.""" if completed: context_parts.append("Successfully completed steps:") for todo in completed: - result_preview = ( - todo.result[:200] + "..." - if todo.result and len(todo.result) > 200 - else todo.result - ) context_parts.append(f" - Step {todo.step_number}: {todo.description}") - if result_preview: - context_parts.append(f" Result: {result_preview}") + if todo.result: + context_parts.append(f" Result: {todo.result}") # Summarize failed todos failed = [ @@ -1858,6 +2291,8 @@ Consider: self.state.todos = TodoList() self.state.replan_count = 0 self.state.last_replan_reason = None + self.state.observations = {} + self.state.execution_log = [] self._kickoff_input = inputs.get("input", "") @@ -1949,6 +2384,8 @@ Consider: self.state.todos = TodoList() self.state.replan_count = 0 self.state.last_replan_reason = None + self.state.observations = {} + self.state.execution_log = [] self._kickoff_input = inputs.get("input", "") diff --git a/lib/crewai/src/crewai/utilities/planning_types.py b/lib/crewai/src/crewai/utilities/planning_types.py index 69bc079cd..306b3b4fc 100644 --- a/lib/crewai/src/crewai/utilities/planning_types.py +++ b/lib/crewai/src/crewai/utilities/planning_types.py @@ -144,3 +144,86 @@ class TodoList(BaseModel): def running_count(self) -> int: """Count of currently running todos.""" return sum(1 for item in self.items if item.status == "running") + + def get_completed_todos(self) -> list[TodoItem]: + """Get all completed todos. + + Returns: + List of completed TodoItem objects. + """ + return [item for item in self.items if item.status == "completed"] + + def get_pending_todos(self) -> list[TodoItem]: + """Get all pending todos. + + Returns: + List of pending TodoItem objects. + """ + return [item for item in self.items if item.status == "pending"] + + def replace_pending_todos(self, new_items: list[TodoItem]) -> None: + """Replace all pending todos with new items. + + Preserves completed and running todos, replaces only pending ones. + Used during replanning to swap in a new plan for remaining work. + + Args: + new_items: The new todo items to replace pending ones. + """ + non_pending = [item for item in self.items if item.status != "pending"] + self.items = non_pending + new_items + + +class StepObservation(BaseModel): + """Planner's observation after a step execution completes. + + Returned by the PlannerObserver after EVERY step — not just failures. + The Planner uses this to decide whether to continue, refine, or replan. + + Based on PLAN-AND-ACT (Section 3.3): the Planner observes what the Executor + did and incorporates new information into the remaining plan. + + Attributes: + step_completed_successfully: Whether the step achieved its objective. + key_information_learned: New information revealed by this step + (e.g., "Found 3 products: A, B, C"). Used to refine upcoming steps. + remaining_plan_still_valid: Whether pending todos still make sense + given the new information. True does NOT mean no refinement needed. + suggested_refinements: Minor tweaks to upcoming step descriptions. + These are lightweight in-place updates, not a full replan. + Example: ["Step 3 should select product B instead of 'best product'"] + needs_full_replan: The remaining plan is fundamentally wrong and must + be regenerated from scratch. Mutually exclusive with + remaining_plan_still_valid (if this is True, that should be False). + replan_reason: Explanation of why a full replan is needed (None if not). + goal_already_achieved: The overall task goal has been satisfied early. + No more steps needed — skip remaining todos and finalize. + """ + + step_completed_successfully: bool = Field( + description="Whether the step achieved what it was asked to do" + ) + key_information_learned: str = Field( + default="", + description="What new information this step revealed", + ) + remaining_plan_still_valid: bool = Field( + default=True, + description="Whether the remaining pending todos still make sense given new information", + ) + suggested_refinements: list[str] | None = Field( + default=None, + description="Minor tweaks to descriptions of upcoming steps (lightweight, no full replan)", + ) + needs_full_replan: bool = Field( + default=False, + description="The remaining plan is fundamentally wrong and must be regenerated", + ) + replan_reason: str | None = Field( + default=None, + description="Explanation of why a full replan is needed", + ) + goal_already_achieved: bool = Field( + default=False, + description="The overall task goal has been satisfied early; no more steps needed", + ) diff --git a/lib/crewai/src/crewai/utilities/step_execution_context.py b/lib/crewai/src/crewai/utilities/step_execution_context.py new file mode 100644 index 000000000..170427948 --- /dev/null +++ b/lib/crewai/src/crewai/utilities/step_execution_context.py @@ -0,0 +1,64 @@ +"""Context and result types for isolated step execution in Plan-and-Execute architecture. + +These types mediate between the AgentExecutor (orchestrator) and StepExecutor (per-step worker). +StepExecutionContext carries only final results from dependencies — never LLM message histories. +StepResult carries only the outcome of a step — never internal execution traces. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field + + +@dataclass(frozen=True) +class StepExecutionContext: + """Immutable context passed to a StepExecutor for a single todo. + + Contains only the information the Executor needs to complete one step: + the task description, goal, and final results from dependency steps. + No LLM message history, no execution traces, no shared mutable state. + + Attributes: + task_description: The original task description (from Task or kickoff input). + task_goal: The expected output / goal of the overall task. + dependency_results: Mapping of step_number → final result string + for all completed dependencies of the current step. + """ + + task_description: str + task_goal: str + dependency_results: dict[int, str] = field(default_factory=dict) + + def get_dependency_result(self, step_number: int) -> str | None: + """Get the final result of a dependency step. + + Args: + step_number: The step number to look up. + + Returns: + The result string if available, None otherwise. + """ + return self.dependency_results.get(step_number) + + +@dataclass +class StepResult: + """Result returned by a StepExecutor after executing a single todo. + + Contains the final outcome and metadata for debugging/metrics. + Tool call details are for audit logging only — they are NOT passed + to subsequent steps or the Planner. + + Attributes: + success: Whether the step completed successfully. + result: The final output string from the step. + error: Error message if the step failed (None on success). + tool_calls_made: List of tool names invoked (for debugging/logging only). + execution_time: Wall-clock time in seconds for the step execution. + """ + + success: bool + result: str + error: str | None = None + tool_calls_made: list[str] = field(default_factory=list) + execution_time: float = 0.0 diff --git a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_with_planning_stores_plan_in_state.yaml b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_with_planning_stores_plan_in_state.yaml index 97ca50aa2..f2b14ae14 100644 --- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_with_planning_stores_plan_in_state.yaml +++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_with_planning_stores_plan_in_state.yaml @@ -4,18 +4,25 @@ interactions: Create minimal, effective execution plans. Prefer fewer steps over more."},{"role":"user","content":"Create a focused execution plan for the following task:\n\n## Task\nWhat is 2 + 2?\n\n## Expected Output\nComplete the task successfully\n\n## Available Tools\nNo tools - available\n\n## Instructions\nCreate ONLY the essential steps needed to complete - this task. Use the MINIMUM number of steps required - do NOT pad your plan with - unnecessary steps. Most tasks need only 2-5 steps.\n\nFor each step:\n- State - the specific action to take\n- Specify which tool to use (if any)\n\nDo NOT - include:\n- Setup or preparation steps that are obvious\n- Verification steps - unless critical\n- Documentation or cleanup steps unless explicitly required\n- - Generic steps like \"review results\" or \"finalize output\"\n\nAfter your plan, - state:\n- \"READY: I am ready to execute the task.\" if the plan is complete\n- - \"NOT READY: I need to refine my plan because [reason].\" if you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create - or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The - detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether - the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}' + available\n\n## Planning Principles\nFocus on WHAT needs to be accomplished, + not HOW. Group related actions into logical units. Fewer steps = better. Most + tasks need 3-6 steps. Hard limit: 20 steps.\n\n## Step Types (only these are + valid):\n1. **Tool Step**: Uses a tool to gather information or take action\n2. + **Output Step**: Synthesizes prior results into the final deliverable (usually + the last step)\n\n## Rules:\n- Each step must either USE A TOOL or PRODUCE THE + FINAL OUTPUT\n- Combine related tool calls: \"Research A, B, and C\" = ONE step, + not three\n- Combine all synthesis into ONE final output step\n- NO standalone + \"thinking\" steps (review, verify, confirm, refine, analyze) - these happen + naturally between steps\n\nFor each step: State the action, specify the tool + (if any), and note dependencies.\n\nAfter your plan, state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create + or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A + brief summary of the overall plan."},"steps":{"type":"array","description":"List + of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step + number (1-based)"},"description":{"type":"string","description":"What to do + in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to + use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step + numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether + the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}' headers: User-Agent: - X-USER-AGENT-XXX @@ -28,7 +35,7 @@ interactions: connection: - keep-alive content-length: - - '1541' + - '2315' content-type: - application/json host: @@ -55,20 +62,24 @@ interactions: uri: https://api.openai.com/v1/chat/completions response: body: - string: "{\n \"id\": \"chatcmpl-D4yTTAh68P65LybtqkwNI3p2HXcRv\",\n \"object\": - \"chat.completion\",\n \"created\": 1770078147,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + string: "{\n \"id\": \"chatcmpl-D62FIyv2pfC7qKbZVvmJNjOVfge1F\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330972,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": - \"assistant\",\n \"content\": \"## Execution Plan\\n\\n1. **Action:** - Perform the addition operation. \\n **Tool:** None (manually calculate).\\n\\n2. - **Action:** State the result. \\n **Tool:** None (manually output).\\n\\nREADY: - I am ready to execute the task.\",\n \"refusal\": null,\n \"annotations\": - []\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n - \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 281,\n \"completion_tokens\": - 56,\n \"total_tokens\": 337,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + \"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n + \ \"id\": \"call_xEDChlUntYR0aSxQhkobswea\",\n \"type\": + \"function\",\n \"function\": {\n \"name\": \"create_reasoning_plan\",\n + \ \"arguments\": \"{\\\"plan\\\":\\\"Calculate the sum of 2 + + 2 and provide the result as the final output.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Calculate + the sum of 2 + 2\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Provide + the result as final output\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]}],\\\"ready\\\":true}\"\n + \ }\n }\n ],\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"tool_calls\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 440,\n \"completion_tokens\": + 92,\n \"total_tokens\": 532,\n \"prompt_tokens_details\": {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": - \"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n" + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" headers: CF-RAY: - CF-RAY-XXX @@ -77,7 +88,7 @@ interactions: Content-Type: - application/json Date: - - Tue, 03 Feb 2026 00:22:28 GMT + - Thu, 05 Feb 2026 22:36:13 GMT Server: - cloudflare Set-Cookie: @@ -97,7 +108,7 @@ interactions: openai-organization: - OPENAI-ORG-XXX openai-processing-ms: - - '1165' + - '1670' openai-project: - OPENAI-PROJECT-XXX openai-version: @@ -123,9 +134,13 @@ interactions: message: OK - request: body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful - assistant that solves math problems step by step\nYour personal goal is: Help - solve simple math problems"},{"role":"user","content":"\nCurrent Task: What - is 2 + 2?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}' + assistant that solves math problems step by step\n\nYour goal: Help solve simple + math problems\n\nYou are executing a specific step in a multi-step plan. Focus + ONLY on completing\nthe current step. Do not plan ahead or worry about future + steps.\n\nBefore acting, briefly reason about what you need to do and which + approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"## + Current Step\nCalculate the sum of 2 + 2\n\nComplete this step and provide your + result."}],"model":"gpt-4o-mini"}' headers: User-Agent: - X-USER-AGENT-XXX @@ -138,7 +153,7 @@ interactions: connection: - keep-alive content-length: - - '299' + - '597' content-type: - application/json cookie: @@ -167,20 +182,18 @@ interactions: uri: https://api.openai.com/v1/chat/completions response: body: - string: "{\n \"id\": \"chatcmpl-D4yTVB9mdtq1YZrUVf1aSb6dVVQ8G\",\n \"object\": - \"chat.completion\",\n \"created\": 1770078149,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + string: "{\n \"id\": \"chatcmpl-D62FJ4ZEkHWSBMZA8bDbMqd7upzwY\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330973,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": - \"assistant\",\n \"content\": \"To solve the problem of 2 + 2, we simply - perform the addition:\\n\\n1. Start with the first number: 2\\n2. Add the - second number: + 2\\n3. Combine the two: 2 + 2 = 4\\n\\nTherefore, the answer - is 4.\",\n \"refusal\": null,\n \"annotations\": []\n },\n - \ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n - \ \"usage\": {\n \"prompt_tokens\": 54,\n \"completion_tokens\": 62,\n - \ \"total_tokens\": 116,\n \"prompt_tokens_details\": {\n \"cached_tokens\": - 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + \"assistant\",\n \"content\": \"To calculate the sum of 2 + 2, I simply + add the two numbers together.\\n\\n2 + 2 = 4\\n\\nThe result is 4.\",\n \"refusal\": + null,\n \"annotations\": []\n },\n \"logprobs\": null,\n + \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 115,\n \"completion_tokens\": 33,\n \"total_tokens\": 148,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": - \"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n" + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" headers: CF-RAY: - CF-RAY-XXX @@ -189,7 +202,7 @@ interactions: Content-Type: - application/json Date: - - Tue, 03 Feb 2026 00:22:30 GMT + - Thu, 05 Feb 2026 22:36:14 GMT Server: - cloudflare Strict-Transport-Security: @@ -207,7 +220,155 @@ interactions: openai-organization: - OPENAI-ORG-XXX openai-processing-ms: - - '1300' + - '614' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent + observing execution progress. After each step completes, you analyze what happened + and decide whether the remaining plan is still valid.\\n\\nReason step-by-step + about:\\n1. What new information was learned from this step's result\\n2. Whether + the remaining steps still make sense given this new information\\n3. What refinements, + if any, are needed for upcoming steps\\n4. Whether the overall goal has already + been achieved\\n\\nBe conservative about triggering full replans \u2014 only + do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"## + Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription: + Calculate the sum of 2 + 2\\nResult: To calculate the sum of 2 + 2, I simply + add the two numbers together.\\n\\n2 + 2 = 4\\n\\nThe result is 4.\\n\\n## Remaining + plan steps:\\n Step 2: Provide the result as final output\\n\\nAnalyze this + step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's + observation after a step execution completes.\\n\\nReturned by the PlannerObserver + after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide + whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section + 3.3): the Planner observes what the Executor\\ndid and incorporates new information + into the remaining plan.\\n\\nAttributes:\\n step_completed_successfully: + Whether the step achieved its objective.\\n key_information_learned: New + information revealed by this step\\n (e.g., \\\"Found 3 products: A, + B, C\\\"). Used to refine upcoming steps.\\n remaining_plan_still_valid: + Whether pending todos still make sense\\n given the new information. + True does NOT mean no refinement needed.\\n suggested_refinements: Minor + tweaks to upcoming step descriptions.\\n These are lightweight in-place + updates, not a full replan.\\n Example: [\\\"Step 3 should select product + B instead of 'best product'\\\"]\\n needs_full_replan: The remaining plan + is fundamentally wrong and must\\n be regenerated from scratch. Mutually + exclusive with\\n remaining_plan_still_valid (if this is True, that should + be False).\\n replan_reason: Explanation of why a full replan is needed (None + if not).\\n goal_already_achieved: The overall task goal has been satisfied + early.\\n No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether + the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What + new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether + the remaining pending todos still make sense given new information\",\"title\":\"Remaining + Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor + tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested + Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The + remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs + Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation + of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The + overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal + Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}" + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '4024' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-helper-method: + - beta.chat.completions.parse + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D62FKmJpd8tlJ6Y3OChUQsoz2o5ps\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330974,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The + calculation for 2 + 2 is 4.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":true}\",\n + \ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\": + null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 789,\n \"completion_tokens\": 64,\n \"total_tokens\": 853,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 05 Feb 2026 22:36:15 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '1181' openai-project: - OPENAI-PROJECT-XXX openai-version: diff --git a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_without_planning_skips_plan_generation.yaml b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_without_planning_skips_plan_generation.yaml index 3ceb8fa34..a24b4277b 100644 --- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_without_planning_skips_plan_generation.yaml +++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_agent_kickoff_without_planning_skips_plan_generation.yaml @@ -42,17 +42,17 @@ interactions: uri: https://api.openai.com/v1/chat/completions response: body: - string: "{\n \"id\": \"chatcmpl-D4yTTFxQ75llVmJv0ee902FIjXE8p\",\n \"object\": - \"chat.completion\",\n \"created\": 1770078147,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + string: "{\n \"id\": \"chatcmpl-D62FTKj39Y02oqJmQxpmC8sz2piEl\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330983,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": - \"assistant\",\n \"content\": \"3 + 3 equals 6.\",\n \"refusal\": + \"assistant\",\n \"content\": \"The sum of 3 + 3 is 6.\",\n \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\": null,\n \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": - 47,\n \"completion_tokens\": 8,\n \"total_tokens\": 55,\n \"prompt_tokens_details\": + 47,\n \"completion_tokens\": 12,\n \"total_tokens\": 59,\n \"prompt_tokens_details\": {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": - \"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n" + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" headers: CF-RAY: - CF-RAY-XXX @@ -61,7 +61,7 @@ interactions: Content-Type: - application/json Date: - - Tue, 03 Feb 2026 00:22:27 GMT + - Thu, 05 Feb 2026 22:36:23 GMT Server: - cloudflare Set-Cookie: @@ -81,7 +81,113 @@ interactions: openai-organization: - OPENAI-ORG-XXX openai-processing-ms: - - '401' + - '361' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful + assistant\nYour personal goal is: Help solve simple math problems"},{"role":"user","content":"\nCurrent + Task: What is 3 + 3?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '260' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D62FT7ELyytoJFkmjOtWysQA2Bfvy\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330983,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"The sum of 3 + 3 is 6.\",\n \"refusal\": + null,\n \"annotations\": []\n },\n \"logprobs\": null,\n + \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 47,\n \"completion_tokens\": 12,\n \"total_tokens\": 59,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 05 Feb 2026 22:36:23 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '362' openai-project: - OPENAI-PROJECT-XXX openai-version: diff --git a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_executor_state_contains_plan_after_planning.yaml b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_executor_state_contains_plan_after_planning.yaml index 003471628..8aa857a0b 100644 --- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_executor_state_contains_plan_after_planning.yaml +++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_executor_state_contains_plan_after_planning.yaml @@ -4,18 +4,25 @@ interactions: Create minimal, effective execution plans. Prefer fewer steps over more."},{"role":"user","content":"Create a focused execution plan for the following task:\n\n## Task\nWhat is 7 + 7?\n\n## Expected Output\nComplete the task successfully\n\n## Available Tools\nNo tools - available\n\n## Instructions\nCreate ONLY the essential steps needed to complete - this task. Use the MINIMUM number of steps required - do NOT pad your plan with - unnecessary steps. Most tasks need only 2-5 steps.\n\nFor each step:\n- State - the specific action to take\n- Specify which tool to use (if any)\n\nDo NOT - include:\n- Setup or preparation steps that are obvious\n- Verification steps - unless critical\n- Documentation or cleanup steps unless explicitly required\n- - Generic steps like \"review results\" or \"finalize output\"\n\nAfter your plan, - state:\n- \"READY: I am ready to execute the task.\" if the plan is complete\n- - \"NOT READY: I need to refine my plan because [reason].\" if you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create - or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The - detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether - the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}' + available\n\n## Planning Principles\nFocus on WHAT needs to be accomplished, + not HOW. Group related actions into logical units. Fewer steps = better. Most + tasks need 3-6 steps. Hard limit: 20 steps.\n\n## Step Types (only these are + valid):\n1. **Tool Step**: Uses a tool to gather information or take action\n2. + **Output Step**: Synthesizes prior results into the final deliverable (usually + the last step)\n\n## Rules:\n- Each step must either USE A TOOL or PRODUCE THE + FINAL OUTPUT\n- Combine related tool calls: \"Research A, B, and C\" = ONE step, + not three\n- Combine all synthesis into ONE final output step\n- NO standalone + \"thinking\" steps (review, verify, confirm, refine, analyze) - these happen + naturally between steps\n\nFor each step: State the action, specify the tool + (if any), and note dependencies.\n\nAfter your plan, state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create + or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A + brief summary of the overall plan."},"steps":{"type":"array","description":"List + of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step + number (1-based)"},"description":{"type":"string","description":"What to do + in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to + use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step + numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether + the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}' headers: User-Agent: - X-USER-AGENT-XXX @@ -28,7 +35,7 @@ interactions: connection: - keep-alive content-length: - - '1541' + - '2315' content-type: - application/json host: @@ -55,18 +62,24 @@ interactions: uri: https://api.openai.com/v1/chat/completions response: body: - string: "{\n \"id\": \"chatcmpl-D4yTdqlxwWowSdLncBERFrCgxTvVj\",\n \"object\": - \"chat.completion\",\n \"created\": 1770078157,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + string: "{\n \"id\": \"chatcmpl-D62FN5xLKcEfF0ISjfbnezYLsZtma\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330977,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": - \"assistant\",\n \"content\": \"## Execution Plan\\n\\n1. Calculate - the sum of 7 and 7.\\n \\nREADY: I am ready to execute the task.\",\n \"refusal\": - null,\n \"annotations\": []\n },\n \"logprobs\": null,\n - \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": - 281,\n \"completion_tokens\": 28,\n \"total_tokens\": 309,\n \"prompt_tokens_details\": - {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + \"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n + \ \"id\": \"call_rSNKBB5w6x6IXkm0fm2GN1hI\",\n \"type\": + \"function\",\n \"function\": {\n \"name\": \"create_reasoning_plan\",\n + \ \"arguments\": \"{\\\"plan\\\":\\\"Calculate the sum of 7 + + 7 and provide the result.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Calculate + the sum of 7 + 7.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Provide + the final output of the calculation.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]}],\\\"ready\\\":true}\"\n + \ }\n }\n ],\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"tool_calls\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 440,\n \"completion_tokens\": + 89,\n \"total_tokens\": 529,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": - \"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n" + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" headers: CF-RAY: - CF-RAY-XXX @@ -75,7 +88,7 @@ interactions: Content-Type: - application/json Date: - - Tue, 03 Feb 2026 00:22:38 GMT + - Thu, 05 Feb 2026 22:36:18 GMT Server: - cloudflare Set-Cookie: @@ -95,7 +108,7 @@ interactions: openai-organization: - OPENAI-ORG-XXX openai-processing-ms: - - '709' + - '1700' openai-project: - OPENAI-PROJECT-XXX openai-version: @@ -121,9 +134,13 @@ interactions: message: OK - request: body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful - assistant that solves math problems step by step\nYour personal goal is: Help - solve simple math problems"},{"role":"user","content":"\nCurrent Task: What - is 7 + 7?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}' + assistant that solves math problems step by step\n\nYour goal: Help solve simple + math problems\n\nYou are executing a specific step in a multi-step plan. Focus + ONLY on completing\nthe current step. Do not plan ahead or worry about future + steps.\n\nBefore acting, briefly reason about what you need to do and which + approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"## + Current Step\nCalculate the sum of 7 + 7.\n\nComplete this step and provide + your result."}],"model":"gpt-4o-mini"}' headers: User-Agent: - X-USER-AGENT-XXX @@ -136,7 +153,7 @@ interactions: connection: - keep-alive content-length: - - '299' + - '598' content-type: - application/json cookie: @@ -165,18 +182,19 @@ interactions: uri: https://api.openai.com/v1/chat/completions response: body: - string: "{\n \"id\": \"chatcmpl-D4yTeB6Miecallw9SjSfLAXPjX2XD\",\n \"object\": - \"chat.completion\",\n \"created\": 1770078158,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + string: "{\n \"id\": \"chatcmpl-D62FOVRLtzvZr17sXJ05O6NTxw1rI\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330978,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": - \"assistant\",\n \"content\": \"To find the sum of 7 and 7, you simply - add the two numbers together:\\n\\n7 + 7 = 14\\n\\nSo, the answer is 14.\",\n - \ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\": - null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": - 54,\n \"completion_tokens\": 35,\n \"total_tokens\": 89,\n \"prompt_tokens_details\": - {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + \"assistant\",\n \"content\": \"To calculate the sum of 7 + 7, I need + to simply add the two numbers together. \\n\\n7 + 7 = 14.\\n\\nSo, the result + is 14.\",\n \"refusal\": null,\n \"annotations\": []\n },\n + \ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n + \ \"usage\": {\n \"prompt_tokens\": 115,\n \"completion_tokens\": 38,\n + \ \"total_tokens\": 153,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": - \"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n" + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" headers: CF-RAY: - CF-RAY-XXX @@ -185,7 +203,7 @@ interactions: Content-Type: - application/json Date: - - Tue, 03 Feb 2026 00:22:38 GMT + - Thu, 05 Feb 2026 22:36:19 GMT Server: - cloudflare Strict-Transport-Security: @@ -203,7 +221,418 @@ interactions: openai-organization: - OPENAI-ORG-XXX openai-processing-ms: - - '733' + - '868' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent + observing execution progress. After each step completes, you analyze what happened + and decide whether the remaining plan is still valid.\\n\\nReason step-by-step + about:\\n1. What new information was learned from this step's result\\n2. Whether + the remaining steps still make sense given this new information\\n3. What refinements, + if any, are needed for upcoming steps\\n4. Whether the overall goal has already + been achieved\\n\\nBe conservative about triggering full replans \u2014 only + do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"## + Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription: + Calculate the sum of 7 + 7.\\nResult: To calculate the sum of 7 + 7, I need + to simply add the two numbers together. \\n\\n7 + 7 = 14.\\n\\nSo, the result + is 14.\\n\\n## Remaining plan steps:\\n Step 2: Provide the final output of + the calculation.\\n\\nAnalyze this step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's + observation after a step execution completes.\\n\\nReturned by the PlannerObserver + after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide + whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section + 3.3): the Planner observes what the Executor\\ndid and incorporates new information + into the remaining plan.\\n\\nAttributes:\\n step_completed_successfully: + Whether the step achieved its objective.\\n key_information_learned: New + information revealed by this step\\n (e.g., \\\"Found 3 products: A, + B, C\\\"). Used to refine upcoming steps.\\n remaining_plan_still_valid: + Whether pending todos still make sense\\n given the new information. + True does NOT mean no refinement needed.\\n suggested_refinements: Minor + tweaks to upcoming step descriptions.\\n These are lightweight in-place + updates, not a full replan.\\n Example: [\\\"Step 3 should select product + B instead of 'best product'\\\"]\\n needs_full_replan: The remaining plan + is fundamentally wrong and must\\n be regenerated from scratch. Mutually + exclusive with\\n remaining_plan_still_valid (if this is True, that should + be False).\\n replan_reason: Explanation of why a full replan is needed (None + if not).\\n goal_already_achieved: The overall task goal has been satisfied + early.\\n No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether + the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What + new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether + the remaining pending todos still make sense given new information\",\"title\":\"Remaining + Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor + tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested + Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The + remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs + Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation + of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The + overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal + Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}" + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '4051' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-helper-method: + - beta.chat.completions.parse + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D62FPKZpmhdynDPftfUn6yxeNSmro\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330979,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The + sum of 7 + 7 has been correctly calculated to be 14.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":false}\",\n + \ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\": + null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 795,\n \"completion_tokens\": 69,\n \"total_tokens\": 864,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 05 Feb 2026 22:36:21 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '1071' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful + assistant that solves math problems step by step\n\nYour goal: Help solve simple + math problems\n\nYou are executing a specific step in a multi-step plan. Focus + ONLY on completing\nthe current step. Do not plan ahead or worry about future + steps.\n\nBefore acting, briefly reason about what you need to do and which + approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"## + Current Step\nProvide the final output of the calculation.\n\n## Context from + previous steps:\nStep 1 result: To calculate the sum of 7 + 7, I need to simply + add the two numbers together. \n\n7 + 7 = 14.\n\nSo, the result is 14.\n\nComplete + this step and provide your result."}],"model":"gpt-4o-mini"}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '785' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D62FRqSOxtg5k7zpUfvXk8XEZMz9x\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330981,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"The final output of the calculation + is 14.\",\n \"refusal\": null,\n \"annotations\": []\n },\n + \ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n + \ \"usage\": {\n \"prompt_tokens\": 162,\n \"completion_tokens\": 10,\n + \ \"total_tokens\": 172,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 05 Feb 2026 22:36:21 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '446' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent + observing execution progress. After each step completes, you analyze what happened + and decide whether the remaining plan is still valid.\\n\\nReason step-by-step + about:\\n1. What new information was learned from this step's result\\n2. Whether + the remaining steps still make sense given this new information\\n3. What refinements, + if any, are needed for upcoming steps\\n4. Whether the overall goal has already + been achieved\\n\\nBe conservative about triggering full replans \u2014 only + do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"## + Original task\\n\\n\\n## Expected output\\n\\n\\n## Previously completed steps:\\n + \ Step 1: Calculate the sum of 7 + 7.\\n Result: To calculate the sum of + 7 + 7, I need to simply add the two numbers together. \\n\\n7 + 7 = 14.\\n\\nSo, + the result is 14.\\n\\n## Just completed step 2\\nDescription: Provide the final + output of the calculation.\\nResult: The final output of the calculation is + 14.\\n\\n\\nAnalyze this step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's + observation after a step execution completes.\\n\\nReturned by the PlannerObserver + after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide + whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section + 3.3): the Planner observes what the Executor\\ndid and incorporates new information + into the remaining plan.\\n\\nAttributes:\\n step_completed_successfully: + Whether the step achieved its objective.\\n key_information_learned: New + information revealed by this step\\n (e.g., \\\"Found 3 products: A, + B, C\\\"). Used to refine upcoming steps.\\n remaining_plan_still_valid: + Whether pending todos still make sense\\n given the new information. + True does NOT mean no refinement needed.\\n suggested_refinements: Minor + tweaks to upcoming step descriptions.\\n These are lightweight in-place + updates, not a full replan.\\n Example: [\\\"Step 3 should select product + B instead of 'best product'\\\"]\\n needs_full_replan: The remaining plan + is fundamentally wrong and must\\n be regenerated from scratch. Mutually + exclusive with\\n remaining_plan_still_valid (if this is True, that should + be False).\\n replan_reason: Explanation of why a full replan is needed (None + if not).\\n goal_already_achieved: The overall task goal has been satisfied + early.\\n No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether + the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What + new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether + the remaining pending todos still make sense given new information\",\"title\":\"Remaining + Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor + tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested + Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The + remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs + Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation + of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The + overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal + Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}" + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '4113' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-helper-method: + - beta.chat.completions.parse + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D62FRITGCaSHqqF9f8FVEgkrZ36QL\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330981,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The + final output of the calculation is confirmed to be 14.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":true}\",\n + \ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\": + null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 808,\n \"completion_tokens\": 65,\n \"total_tokens\": 873,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 05 Feb 2026 22:36:22 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '924' openai-project: - OPENAI-PROJECT-XXX openai-version: diff --git a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_config_disabled_skips_planning.yaml b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_config_disabled_skips_planning.yaml deleted file mode 100644 index ccb9aee30..000000000 --- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_config_disabled_skips_planning.yaml +++ /dev/null @@ -1,108 +0,0 @@ -interactions: -- request: - body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful - assistant\nYour personal goal is: Help solve simple math problems"},{"role":"user","content":"\nCurrent - Task: What is 5 + 5?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}' - headers: - User-Agent: - - X-USER-AGENT-XXX - accept: - - application/json - accept-encoding: - - ACCEPT-ENCODING-XXX - authorization: - - AUTHORIZATION-XXX - connection: - - keep-alive - content-length: - - '260' - content-type: - - application/json - host: - - api.openai.com - x-stainless-arch: - - X-STAINLESS-ARCH-XXX - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - X-STAINLESS-OS-XXX - x-stainless-package-version: - - 1.83.0 - x-stainless-read-timeout: - - X-STAINLESS-READ-TIMEOUT-XXX - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.13.3 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - body: - string: "{\n \"id\": \"chatcmpl-D4yTf8T2iADffpPCJBZhntLlaoaSy\",\n \"object\": - \"chat.completion\",\n \"created\": 1770078159,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n - \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": - \"assistant\",\n \"content\": \"5 + 5 equals 10.\",\n \"refusal\": - null,\n \"annotations\": []\n },\n \"logprobs\": null,\n - \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": - 47,\n \"completion_tokens\": 8,\n \"total_tokens\": 55,\n \"prompt_tokens_details\": - {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": - {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": - 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": - \"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n" - headers: - CF-RAY: - - CF-RAY-XXX - Connection: - - keep-alive - Content-Type: - - application/json - Date: - - Tue, 03 Feb 2026 00:22:40 GMT - Server: - - cloudflare - Set-Cookie: - - SET-COOKIE-XXX - Strict-Transport-Security: - - STS-XXX - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - X-CONTENT-TYPE-XXX - access-control-expose-headers: - - ACCESS-CONTROL-XXX - alt-svc: - - h3=":443"; ma=86400 - cf-cache-status: - - DYNAMIC - openai-organization: - - OPENAI-ORG-XXX - openai-processing-ms: - - '515' - openai-project: - - OPENAI-PROJECT-XXX - openai-version: - - '2020-10-01' - x-openai-proxy-wasm: - - v0.1 - x-ratelimit-limit-requests: - - X-RATELIMIT-LIMIT-REQUESTS-XXX - x-ratelimit-limit-tokens: - - X-RATELIMIT-LIMIT-TOKENS-XXX - x-ratelimit-remaining-requests: - - X-RATELIMIT-REMAINING-REQUESTS-XXX - x-ratelimit-remaining-tokens: - - X-RATELIMIT-REMAINING-TOKENS-XXX - x-ratelimit-reset-requests: - - X-RATELIMIT-RESET-REQUESTS-XXX - x-ratelimit-reset-tokens: - - X-RATELIMIT-RESET-TOKENS-XXX - x-request-id: - - X-REQUEST-ID-XXX - status: - code: 200 - message: OK -version: 1 diff --git a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_creates_minimal_steps_for_multi_step_task.yaml b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_creates_minimal_steps_for_multi_step_task.yaml index 17307bdcc..5d2acbc90 100644 --- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_creates_minimal_steps_for_multi_step_task.yaml +++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_creates_minimal_steps_for_multi_step_task.yaml @@ -1,23 +1,104 @@ interactions: +- request: + body: '{"trace_id": "869cae2c-e863-4e17-b6c7-e9cf6ba8835d", "execution_type": + "crew", "user_identifier": null, "execution_context": {"crew_fingerprint": null, + "crew_name": "Unknown Crew", "flow_name": null, "crewai_version": "1.9.3", "privacy_level": + "standard"}, "execution_metadata": {"expected_duration_estimate": 300, "agent_count": + 0, "task_count": 0, "flow_method_count": 0, "execution_started_at": "2026-02-05T22:35:59.859861+00:00"}}' + headers: + Accept: + - '*/*' + Connection: + - keep-alive + Content-Length: + - '434' + Content-Type: + - application/json + User-Agent: + - X-USER-AGENT-XXX + X-Crewai-Organization-Id: + - 3433f0ee-8a94-4aa4-822b-2ac71aa38b18 + X-Crewai-Version: + - 1.9.3 + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + method: POST + uri: https://app.crewai.com/crewai_plus/api/v1/tracing/batches + response: + body: + string: '{"id":"d34854ac-4e95-420c-b08a-af182e63fc75","trace_id":"869cae2c-e863-4e17-b6c7-e9cf6ba8835d","execution_type":"crew","crew_name":"Unknown + Crew","flow_name":null,"status":"running","duration_ms":null,"crewai_version":"1.9.3","privacy_level":"standard","total_events":0,"execution_context":{"crew_fingerprint":null,"crew_name":"Unknown + Crew","flow_name":null,"crewai_version":"1.9.3","privacy_level":"standard"},"created_at":"2026-02-05T22:36:00.450Z","updated_at":"2026-02-05T22:36:00.450Z"}' + headers: + Connection: + - keep-alive + Content-Length: + - '492' + Content-Type: + - application/json; charset=utf-8 + Date: + - Thu, 05 Feb 2026 22:36:00 GMT + cache-control: + - no-store + content-security-policy: + - CSP-FILTERED + etag: + - ETAG-XXX + expires: + - '0' + permissions-policy: + - PERMISSIONS-POLICY-XXX + pragma: + - no-cache + referrer-policy: + - REFERRER-POLICY-XXX + strict-transport-security: + - STS-XXX + vary: + - Accept + x-content-type-options: + - X-CONTENT-TYPE-XXX + x-frame-options: + - X-FRAME-OPTIONS-XXX + x-permitted-cross-domain-policies: + - X-PERMITTED-XXX + x-request-id: + - X-REQUEST-ID-XXX + x-runtime: + - X-RUNTIME-XXX + x-xss-protection: + - X-XSS-PROTECTION-XXX + status: + code: 201 + message: Created - request: body: '{"messages":[{"role":"system","content":"You are a strategic planning assistant. Create minimal, effective execution plans. Prefer fewer steps over more."},{"role":"user","content":"Create a focused execution plan for the following task:\n\n## Task\nCalculate the sum of the first 3 prime numbers, then multiply that result by 2. Show your work for each step.\n\n## Expected Output\nComplete the task successfully\n\n## Available - Tools\nNo tools available\n\n## Instructions\nCreate ONLY the essential steps - needed to complete this task. Use the MINIMUM number of steps required - do - NOT pad your plan with unnecessary steps. Most tasks need only 2-5 steps.\n\nFor - each step:\n- State the specific action to take\n- Specify which tool to use - (if any)\n\nDo NOT include:\n- Setup or preparation steps that are obvious\n- - Verification steps unless critical\n- Documentation or cleanup steps unless - explicitly required\n- Generic steps like \"review results\" or \"finalize output\"\n\nAfter - your plan, state:\n- \"READY: I am ready to execute the task.\" if the plan - is complete\n- \"NOT READY: I need to refine my plan because [reason].\" if - you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create - or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The - detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether - the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}' + Tools\nNo tools available\n\n## Planning Principles\nFocus on WHAT needs to + be accomplished, not HOW. Group related actions into logical units. Fewer steps + = better. Most tasks need 3-6 steps. Hard limit: 10 steps.\n\n## Step Types + (only these are valid):\n1. **Tool Step**: Uses a tool to gather information + or take action\n2. **Output Step**: Synthesizes prior results into the final + deliverable (usually the last step)\n\n## Rules:\n- Each step must either USE + A TOOL or PRODUCE THE FINAL OUTPUT\n- Combine related tool calls: \"Research + A, B, and C\" = ONE step, not three\n- Combine all synthesis into ONE final + output step\n- NO standalone \"thinking\" steps (review, verify, confirm, refine, + analyze) - these happen naturally between steps\n\nFor each step: State the + action, specify the tool (if any), and note dependencies.\n\nAfter your plan, + state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create + or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A + brief summary of the overall plan."},"steps":{"type":"array","description":"List + of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step + number (1-based)"},"description":{"type":"string","description":"What to do + in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to + use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step + numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether + the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}' headers: User-Agent: - X-USER-AGENT-XXX @@ -30,7 +111,7 @@ interactions: connection: - keep-alive content-length: - - '1636' + - '2410' content-type: - application/json host: @@ -57,20 +138,26 @@ interactions: uri: https://api.openai.com/v1/chat/completions response: body: - string: "{\n \"id\": \"chatcmpl-D4yTWa7FxCHkHwHF25AYXXeJDBOuY\",\n \"object\": - \"chat.completion\",\n \"created\": 1770078150,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + string: "{\n \"id\": \"chatcmpl-D62F62rSoHIF6DpZZFowcKaVmb8Iu\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330960,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": - \"assistant\",\n \"content\": \"## Execution Plan\\n\\n1. Identify - the first 3 prime numbers: 2, 3, and 5.\\n2. Calculate the sum: \\\\(2 + 3 - + 5 = 10\\\\).\\n3. Multiply the sum by 2: \\\\(10 \\\\times 2 = 20\\\\).\\n\\nREADY: - I am ready to execute the task.\",\n \"refusal\": null,\n \"annotations\": - []\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n - \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 299,\n \"completion_tokens\": - 74,\n \"total_tokens\": 373,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + \"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n + \ \"id\": \"call_bJJDEK5hizeG4PyxSUynX9x8\",\n \"type\": + \"function\",\n \"function\": {\n \"name\": \"create_reasoning_plan\",\n + \ \"arguments\": \"{\\\"plan\\\":\\\"Calculate the sum of the + first 3 prime numbers and multiply that sum by 2.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Identify + the first 3 prime numbers (2, 3, 5).\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Calculate + the sum of the identified prime numbers (2 + 3 + 5).\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]},{\\\"step_number\\\":3,\\\"description\\\":\\\"Multiply + the sum by 2.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[2]},{\\\"step_number\\\":4,\\\"description\\\":\\\"Output + the final result.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[3]}],\\\"ready\\\":true}\"\n + \ }\n }\n ],\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"tool_calls\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 458,\n \"completion_tokens\": + 160,\n \"total_tokens\": 618,\n \"prompt_tokens_details\": {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": - \"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n" + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" headers: CF-RAY: - CF-RAY-XXX @@ -79,7 +166,7 @@ interactions: Content-Type: - application/json Date: - - Tue, 03 Feb 2026 00:22:32 GMT + - Thu, 05 Feb 2026 22:36:03 GMT Server: - cloudflare Set-Cookie: @@ -99,7 +186,7 @@ interactions: openai-organization: - OPENAI-ORG-XXX openai-processing-ms: - - '1716' + - '2448' openai-project: - OPENAI-PROJECT-XXX openai-version: @@ -125,10 +212,13 @@ interactions: message: OK - request: body: '{"messages":[{"role":"system","content":"You are Math Tutor. An expert - math tutor who breaks down problems step by step\nYour personal goal is: Solve - multi-step math problems accurately"},{"role":"user","content":"\nCurrent Task: - Calculate the sum of the first 3 prime numbers, then multiply that result by - 2. Show your work for each step.\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}' + math tutor who breaks down problems step by step\n\nYour goal: Solve multi-step + math problems accurately\n\nYou are executing a specific step in a multi-step + plan. Focus ONLY on completing\nthe current step. Do not plan ahead or worry + about future steps.\n\nBefore acting, briefly reason about what you need to + do and which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"## + Current Step\nIdentify the first 3 prime numbers (2, 3, 5).\n\nComplete this + step and provide your result."}],"model":"gpt-4o-mini"}' headers: User-Agent: - X-USER-AGENT-XXX @@ -141,7 +231,7 @@ interactions: connection: - keep-alive content-length: - - '400' + - '622' content-type: - application/json cookie: @@ -170,30 +260,24 @@ interactions: uri: https://api.openai.com/v1/chat/completions response: body: - string: "{\n \"id\": \"chatcmpl-D4yTYJgCZf2oY7wiPMZmN4QEQhHb5\",\n \"object\": - \"chat.completion\",\n \"created\": 1770078152,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + string: "{\n \"id\": \"chatcmpl-D62F9RUZUkszp0yCWHbNJVTG16bx9\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330963,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": - \"assistant\",\n \"content\": \"To solve the problem, let's break it - down into two main steps: \\n\\n1. Calculate the sum of the first 3 prime - numbers.\\n2. Multiply the result of that sum by 2.\\n\\n### Step 1: Identify - the first 3 prime numbers\\nPrime numbers are natural numbers greater than - 1 that have no positive divisors other than 1 and themselves. \\n\\nThe first - three prime numbers are:\\n- 2\\n- 3\\n- 5\\n\\n### Step 2: Calculate the - sum of the first 3 prime numbers\\nNow, we add these prime numbers together:\\n\\n\\\\[\\n2 - + 3 + 5\\n\\\\]\\n\\nCalculating this step-by-step:\\n- First, add 2 and 3:\\n - \ \\\\[\\n 2 + 3 = 5\\n \\\\]\\n \\n- Next, add this result to 5:\\n \\\\[\\n - \ 5 + 5 = 10\\n \\\\]\\n\\nSo, the sum of the first 3 prime numbers is \\\\(10\\\\).\\n\\n### - Step 3: Multiply the sum by 2\\nNext, we take the sum we calculated and multiply - it by 2:\\n\\n\\\\[\\n10 \\\\times 2\\n\\\\]\\n\\nCalculating this:\\n\\\\[\\n10 - \\\\times 2 = 20\\n\\\\]\\n\\n### Final Answer\\nThus, the final result obtained - after performing all the steps is:\\n\\n\\\\[\\n\\\\boxed{20}\\n\\\\]\",\n - \ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\": - null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": - 74,\n \"completion_tokens\": 288,\n \"total_tokens\": 362,\n \"prompt_tokens_details\": - {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + \"assistant\",\n \"content\": \"To identify the first three prime numbers, + we need to recall the definition of a prime number: it is a natural number + greater than 1 that has no positive divisors other than 1 and itself. \\n\\nStarting + from 2, we find:\\n1. The number **2** is prime (divisors are 1 and 2).\\n2. + The number **3** is prime (divisors are 1 and 3).\\n3. The number **4** is + not prime (divisors are 1, 2, and 4).\\n4. The number **5** is prime (divisors + are 1 and 5).\\n\\nThus, the first three prime numbers are **2, 3, and 5**. + \\n\\nResult: 2, 3, 5.\",\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 123,\n \"completion_tokens\": + 166,\n \"total_tokens\": 289,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": - \"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n" + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" headers: CF-RAY: - CF-RAY-XXX @@ -202,7 +286,7 @@ interactions: Content-Type: - application/json Date: - - Tue, 03 Feb 2026 00:22:37 GMT + - Thu, 05 Feb 2026 22:36:06 GMT Server: - cloudflare Strict-Transport-Security: @@ -220,7 +304,444 @@ interactions: openai-organization: - OPENAI-ORG-XXX openai-processing-ms: - - '4751' + - '3090' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent + observing execution progress. After each step completes, you analyze what happened + and decide whether the remaining plan is still valid.\\n\\nReason step-by-step + about:\\n1. What new information was learned from this step's result\\n2. Whether + the remaining steps still make sense given this new information\\n3. What refinements, + if any, are needed for upcoming steps\\n4. Whether the overall goal has already + been achieved\\n\\nBe conservative about triggering full replans \u2014 only + do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"## + Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription: + Identify the first 3 prime numbers (2, 3, 5).\\nResult: To identify the first + three prime numbers, we need to recall the definition of a prime number: it + is a natural number greater than 1 that has no positive divisors other than + 1 and itself. \\n\\nStarting from 2, we find:\\n1. The number **2** is prime + (divisors are 1 and 2).\\n2. The number **3** is prime (divisors are 1 and 3).\\n3. + The number **4** is not prime (divisors are 1, 2, and 4).\\n4. The number **5** + is prime (divisors are 1 and 5).\\n\\nThus, the first three prime numbers are + **2, 3, and 5**. \\n\\nResult: 2, 3, 5.\\n\\n## Remaining plan steps:\\n Step + 2: Calculate the sum of the identified prime numbers (2 + 3 + 5).\\n Step 3: + Multiply the sum by 2.\\n Step 4: Output the final result.\\n\\nAnalyze this + step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's + observation after a step execution completes.\\n\\nReturned by the PlannerObserver + after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide + whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section + 3.3): the Planner observes what the Executor\\ndid and incorporates new information + into the remaining plan.\\n\\nAttributes:\\n step_completed_successfully: + Whether the step achieved its objective.\\n key_information_learned: New + information revealed by this step\\n (e.g., \\\"Found 3 products: A, + B, C\\\"). Used to refine upcoming steps.\\n remaining_plan_still_valid: + Whether pending todos still make sense\\n given the new information. + True does NOT mean no refinement needed.\\n suggested_refinements: Minor + tweaks to upcoming step descriptions.\\n These are lightweight in-place + updates, not a full replan.\\n Example: [\\\"Step 3 should select product + B instead of 'best product'\\\"]\\n needs_full_replan: The remaining plan + is fundamentally wrong and must\\n be regenerated from scratch. Mutually + exclusive with\\n remaining_plan_still_valid (if this is True, that should + be False).\\n replan_reason: Explanation of why a full replan is needed (None + if not).\\n goal_already_achieved: The overall task goal has been satisfied + early.\\n No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether + the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What + new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether + the remaining pending todos still make sense given new information\",\"title\":\"Remaining + Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor + tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested + Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The + remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs + Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation + of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The + overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal + Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}" + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '4561' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-helper-method: + - beta.chat.completions.parse + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D62FCKhhkyZ4k2uH2KyhxsGnWEM7R\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330966,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The + first three prime numbers have been correctly identified as 2, 3, and 5.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":false}\",\n + \ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\": + null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 960,\n \"completion_tokens\": 72,\n \"total_tokens\": 1032,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 05 Feb 2026 22:36:07 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '1058' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":"You are Math Tutor. An expert + math tutor who breaks down problems step by step\n\nYour goal: Solve multi-step + math problems accurately\n\nYou are executing a specific step in a multi-step + plan. Focus ONLY on completing\nthe current step. Do not plan ahead or worry + about future steps.\n\nBefore acting, briefly reason about what you need to + do and which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"## + Current Step\nCalculate the sum of the identified prime numbers (2 + 3 + 5).\n\n## + Context from previous steps:\nStep 1 result: To identify the first three prime + numbers, we need to recall the definition of a prime number: it is a natural + number greater than 1 that has no positive divisors other than 1 and itself. + \n\nStarting from 2, we find:\n1. The number **2** is prime (divisors are 1 + and 2).\n2. The number **3** is prime (divisors are 1 and 3).\n3. The number + **4** is not prime (divisors are 1, 2, and 4).\n4. The number **5** is prime + (divisors are 1 and 5).\n\nThus, the first three prime numbers are **2, 3, and + 5**. \n\nResult: 2, 3, 5.\n\nComplete this step and provide your result."}],"model":"gpt-4o-mini"}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '1213' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D62FDWh7MhYTKIsLCnq6r5iXrbdrN\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330967,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"To calculate the sum of the identified + prime numbers (2 + 3 + 5), I will follow these steps:\\n\\n1. Add the first + two prime numbers: \\n - \\\\( 2 + 3 = 5 \\\\)\\n\\n2. Then, add the result + to the third prime number:\\n - \\\\( 5 + 5 = 10 \\\\)\\n\\nSo the sum of + the identified prime numbers (2 + 3 + 5) is **10**.\",\n \"refusal\": + null,\n \"annotations\": []\n },\n \"logprobs\": null,\n + \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 301,\n \"completion_tokens\": 95,\n \"total_tokens\": 396,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 05 Feb 2026 22:36:09 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '1470' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent + observing execution progress. After each step completes, you analyze what happened + and decide whether the remaining plan is still valid.\\n\\nReason step-by-step + about:\\n1. What new information was learned from this step's result\\n2. Whether + the remaining steps still make sense given this new information\\n3. What refinements, + if any, are needed for upcoming steps\\n4. Whether the overall goal has already + been achieved\\n\\nBe conservative about triggering full replans \u2014 only + do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"## + Original task\\n\\n\\n## Expected output\\n\\n\\n## Previously completed steps:\\n + \ Step 1: Identify the first 3 prime numbers (2, 3, 5).\\n Result: To identify + the first three prime numbers, we need to recall the definition of a prime number: + it is a natural number greater than 1 that has no positive divisors other than + 1 and itself. \\n\\nStarting f\\n\\n## Just completed step 2\\nDescription: + Calculate the sum of the identified prime numbers (2 + 3 + 5).\\nResult: To + calculate the sum of the identified prime numbers (2 + 3 + 5), I will follow + these steps:\\n\\n1. Add the first two prime numbers: \\n - \\\\( 2 + 3 = + 5 \\\\)\\n\\n2. Then, add the result to the third prime number:\\n - \\\\( + 5 + 5 = 10 \\\\)\\n\\nSo the sum of the identified prime numbers (2 + 3 + 5) + is **10**.\\n\\n## Remaining plan steps:\\n Step 3: Multiply the sum by 2.\\n + \ Step 4: Output the final result.\\n\\nAnalyze this step's result and provide + your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's + observation after a step execution completes.\\n\\nReturned by the PlannerObserver + after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide + whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section + 3.3): the Planner observes what the Executor\\ndid and incorporates new information + into the remaining plan.\\n\\nAttributes:\\n step_completed_successfully: + Whether the step achieved its objective.\\n key_information_learned: New + information revealed by this step\\n (e.g., \\\"Found 3 products: A, + B, C\\\"). Used to refine upcoming steps.\\n remaining_plan_still_valid: + Whether pending todos still make sense\\n given the new information. + True does NOT mean no refinement needed.\\n suggested_refinements: Minor + tweaks to upcoming step descriptions.\\n These are lightweight in-place + updates, not a full replan.\\n Example: [\\\"Step 3 should select product + B instead of 'best product'\\\"]\\n needs_full_replan: The remaining plan + is fundamentally wrong and must\\n be regenerated from scratch. Mutually + exclusive with\\n remaining_plan_still_valid (if this is True, that should + be False).\\n replan_reason: Explanation of why a full replan is needed (None + if not).\\n goal_already_achieved: The overall task goal has been satisfied + early.\\n No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether + the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What + new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether + the remaining pending todos still make sense given new information\",\"title\":\"Remaining + Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor + tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested + Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The + remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs + Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation + of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The + overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal + Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}" + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '4591' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-helper-method: + - beta.chat.completions.parse + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D62FFIa3JdCnNkh6sa0wz28i55ni1\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330969,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"{\\\"step_completed_successfully\\\":false,\\\"key_information_learned\\\":\\\"The + calculation for the sum of the identified prime numbers was incorrect; it + should be 2 + 3 + 5 = 10, but there was a typo where the last addition was + mistakenly written as 5 + 5 instead of 5 + 2.\\\",\\\"remaining_plan_still_valid\\\":false,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":true,\\\"replan_reason\\\":\\\"The + remaining steps are based on an incorrect sum, making them invalid. The calculations + must be restarted from the correct determination of the sum of the prime numbers.\\\",\\\"goal_already_achieved\\\":false}\",\n + \ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\": + null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 942,\n \"completion_tokens\": 135,\n \"total_tokens\": 1077,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 05 Feb 2026 22:36:11 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '2300' openai-project: - OPENAI-PROJECT-XXX openai-version: diff --git a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_disabled_skips_planning.yaml b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_disabled_skips_planning.yaml index 88617c427..930d5134f 100644 --- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_disabled_skips_planning.yaml +++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_disabled_skips_planning.yaml @@ -42,17 +42,17 @@ interactions: uri: https://api.openai.com/v1/chat/completions response: body: - string: "{\n \"id\": \"chatcmpl-D4yXGD5IrieoUDSK5hDmJyA2gJtDc\",\n \"object\": - \"chat.completion\",\n \"created\": 1770078382,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + string: "{\n \"id\": \"chatcmpl-D62FLMJF1jiuD18qhDDxWFYzJxWk3\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330975,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": - \"assistant\",\n \"content\": \"5 + 5 equals 10.\",\n \"refusal\": + \"assistant\",\n \"content\": \"The sum of 5 + 5 is 10.\",\n \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\": null,\n \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": - 47,\n \"completion_tokens\": 8,\n \"total_tokens\": 55,\n \"prompt_tokens_details\": + 47,\n \"completion_tokens\": 12,\n \"total_tokens\": 59,\n \"prompt_tokens_details\": {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": - \"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n" + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" headers: CF-RAY: - CF-RAY-XXX @@ -61,7 +61,7 @@ interactions: Content-Type: - application/json Date: - - Tue, 03 Feb 2026 00:26:23 GMT + - Thu, 05 Feb 2026 22:36:16 GMT Server: - cloudflare Set-Cookie: @@ -81,7 +81,113 @@ interactions: openai-organization: - OPENAI-ORG-XXX openai-processing-ms: - - '363' + - '342' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful + assistant\nYour personal goal is: Help solve simple math problems"},{"role":"user","content":"\nCurrent + Task: What is 5 + 5?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '260' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D62FM3zRv6CP5jgOiAWIaTukuPjwP\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330976,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"5 + 5 equals 10.\",\n \"refusal\": + null,\n \"annotations\": []\n },\n \"logprobs\": null,\n + \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 47,\n \"completion_tokens\": 8,\n \"total_tokens\": 55,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 05 Feb 2026 22:36:16 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '488' openai-project: - OPENAI-PROJECT-XXX openai-version: diff --git a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_handles_sequential_dependency_task.yaml b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_handles_sequential_dependency_task.yaml index 35714d2cf..b0e7e4883 100644 --- a/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_handles_sequential_dependency_task.yaml +++ b/lib/crewai/tests/cassettes/agents/TestAgentExecutorPlanning.test_planning_handles_sequential_dependency_task.yaml @@ -5,18 +5,25 @@ interactions: a focused execution plan for the following task:\n\n## Task\nConvert 100 degrees Celsius to Fahrenheit, then round the result to the nearest 10.\n\n## Expected Output\nComplete the task successfully\n\n## Available Tools\nNo tools available\n\n## - Instructions\nCreate ONLY the essential steps needed to complete this task. - Use the MINIMUM number of steps required - do NOT pad your plan with unnecessary - steps. Most tasks need only 2-5 steps.\n\nFor each step:\n- State the specific - action to take\n- Specify which tool to use (if any)\n\nDo NOT include:\n- Setup - or preparation steps that are obvious\n- Verification steps unless critical\n- - Documentation or cleanup steps unless explicitly required\n- Generic steps like - \"review results\" or \"finalize output\"\n\nAfter your plan, state:\n- \"READY: - I am ready to execute the task.\" if the plan is complete\n- \"NOT READY: I - need to refine my plan because [reason].\" if you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create - or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The - detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether - the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}' + Planning Principles\nFocus on WHAT needs to be accomplished, not HOW. Group + related actions into logical units. Fewer steps = better. Most tasks need 3-6 + steps. Hard limit: 10 steps.\n\n## Step Types (only these are valid):\n1. **Tool + Step**: Uses a tool to gather information or take action\n2. **Output Step**: + Synthesizes prior results into the final deliverable (usually the last step)\n\n## + Rules:\n- Each step must either USE A TOOL or PRODUCE THE FINAL OUTPUT\n- Combine + related tool calls: \"Research A, B, and C\" = ONE step, not three\n- Combine + all synthesis into ONE final output step\n- NO standalone \"thinking\" steps + (review, verify, confirm, refine, analyze) - these happen naturally between + steps\n\nFor each step: State the action, specify the tool (if any), and note + dependencies.\n\nAfter your plan, state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create + or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A + brief summary of the overall plan."},"steps":{"type":"array","description":"List + of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step + number (1-based)"},"description":{"type":"string","description":"What to do + in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to + use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step + numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether + the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}' headers: User-Agent: - X-USER-AGENT-XXX @@ -29,7 +36,7 @@ interactions: connection: - keep-alive content-length: - - '1610' + - '2384' content-type: - application/json host: @@ -56,20 +63,25 @@ interactions: uri: https://api.openai.com/v1/chat/completions response: body: - string: "{\n \"id\": \"chatcmpl-D4yTN8fHOefyzzhvdUOHjxdFDR2HW\",\n \"object\": - \"chat.completion\",\n \"created\": 1770078141,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + string: "{\n \"id\": \"chatcmpl-D62FU2te4ww3DuIzbuySwWTIPTx6A\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330984,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": - \"assistant\",\n \"content\": \"## Execution Plan\\n\\n1. Convert 100 - degrees Celsius to Fahrenheit using the formula: \\\\( F = C \\\\times \\\\frac{9}{5} - + 32 \\\\).\\n2. Round the Fahrenheit result to the nearest 10.\\n\\nREADY: - I am ready to execute the task.\",\n \"refusal\": null,\n \"annotations\": - []\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n - \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 291,\n \"completion_tokens\": - 58,\n \"total_tokens\": 349,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + \"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n + \ \"id\": \"call_0LXFaxnsqT2kFmUyanui30k0\",\n \"type\": + \"function\",\n \"function\": {\n \"name\": \"create_reasoning_plan\",\n + \ \"arguments\": \"{\\\"plan\\\":\\\"Convert 100 degrees Celsius + to Fahrenheit and round the result to the nearest 10.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Convert + 100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Round + the Fahrenheit result to the nearest 10.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]},{\\\"step_number\\\":3,\\\"description\\\":\\\"Output + the final rounded temperature in Fahrenheit.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[2]}],\\\"ready\\\":true}\"\n + \ }\n }\n ],\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"tool_calls\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 450,\n \"completion_tokens\": + 133,\n \"total_tokens\": 583,\n \"prompt_tokens_details\": {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": - \"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n" + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" headers: CF-RAY: - CF-RAY-XXX @@ -78,7 +90,7 @@ interactions: Content-Type: - application/json Date: - - Tue, 03 Feb 2026 00:22:22 GMT + - Thu, 05 Feb 2026 22:36:26 GMT Server: - cloudflare Set-Cookie: @@ -98,7 +110,7 @@ interactions: openai-organization: - OPENAI-ORG-XXX openai-processing-ms: - - '1089' + - '1976' openai-project: - OPENAI-PROJECT-XXX openai-version: @@ -124,10 +136,13 @@ interactions: message: OK - request: body: '{"messages":[{"role":"system","content":"You are Unit Converter. A precise - unit conversion specialist\nYour personal goal is: Accurately convert between - units and apply transformations"},{"role":"user","content":"\nCurrent Task: - Convert 100 degrees Celsius to Fahrenheit, then round the result to the nearest - 10.\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}' + unit conversion specialist\n\nYour goal: Accurately convert between units and + apply transformations\n\nYou are executing a specific step in a multi-step plan. + Focus ONLY on completing\nthe current step. Do not plan ahead or worry about + future steps.\n\nBefore acting, briefly reason about what you need to do and + which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"## + Current Step\nConvert 100 degrees Celsius to Fahrenheit using the formula (C + * 9/5) + 32.\n\nComplete this step and provide your result."}],"model":"gpt-4o-mini"}' headers: User-Agent: - X-USER-AGENT-XXX @@ -140,7 +155,7 @@ interactions: connection: - keep-alive content-length: - - '373' + - '651' content-type: - application/json cookie: @@ -169,26 +184,21 @@ interactions: uri: https://api.openai.com/v1/chat/completions response: body: - string: "{\n \"id\": \"chatcmpl-D4yTPQewXDyPdYHI4dHPH7YGHcRge\",\n \"object\": - \"chat.completion\",\n \"created\": 1770078143,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + string: "{\n \"id\": \"chatcmpl-D62FWhREtHEudJMFFypgh33C8GLdH\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330986,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": - \"assistant\",\n \"content\": \"To convert degrees Celsius to Fahrenheit, - you can use the formula:\\n\\n\\\\[ F = \\\\left( C \\\\times \\\\frac{9}{5} - \\\\right) + 32 \\\\]\\n\\nPlugging in 100 degrees Celsius:\\n\\n\\\\[ F = - \\\\left( 100 \\\\times \\\\frac{9}{5} \\\\right) + 32 \\\\]\\n\\nCalculating - that step-by-step:\\n\\n1. Multiply 100 by 9: \\n \\\\[ 100 \\\\times 9 - = 900 \\\\]\\n\\n2. Divide by 5:\\n \\\\[ 900 \\\\div 5 = 180 \\\\]\\n\\n3. - Add 32:\\n \\\\[ 180 + 32 = 212 \\\\]\\n\\nSo, 100 degrees Celsius is equal - to 212 degrees Fahrenheit.\\n\\nNow, rounding 212 to the nearest 10:\\n\\nThe - nearest multiple of 10 to 212 is 210.\\n\\nTherefore, the final result is - **210 degrees Fahrenheit**.\",\n \"refusal\": null,\n \"annotations\": + \"assistant\",\n \"content\": \"To convert 100 degrees Celsius to Fahrenheit + using the formula (C * 9/5) + 32, we substitute C with 100:\\n\\n1. Multiply + 100 by 9/5:\\n \\\\[ 100 * \\\\frac{9}{5} = 100 * 1.8 = 180 \\\\]\\n\\n2. + Then add 32:\\n \\\\[ 180 + 32 = 212 \\\\]\\n\\nTherefore, 100 degrees Celsius + is equal to 212 degrees Fahrenheit.\",\n \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n - \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 63,\n \"completion_tokens\": - 191,\n \"total_tokens\": 254,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 126,\n \"completion_tokens\": + 101,\n \"total_tokens\": 227,\n \"prompt_tokens_details\": {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": - \"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n" + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" headers: CF-RAY: - CF-RAY-XXX @@ -197,7 +207,7 @@ interactions: Content-Type: - application/json Date: - - Tue, 03 Feb 2026 00:22:26 GMT + - Thu, 05 Feb 2026 22:36:27 GMT Server: - cloudflare Strict-Transport-Security: @@ -215,7 +225,548 @@ interactions: openai-organization: - OPENAI-ORG-XXX openai-processing-ms: - - '3736' + - '1505' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent + observing execution progress. After each step completes, you analyze what happened + and decide whether the remaining plan is still valid.\\n\\nReason step-by-step + about:\\n1. What new information was learned from this step's result\\n2. Whether + the remaining steps still make sense given this new information\\n3. What refinements, + if any, are needed for upcoming steps\\n4. Whether the overall goal has already + been achieved\\n\\nBe conservative about triggering full replans \u2014 only + do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"## + Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription: + Convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32.\\nResult: + To convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32, + we substitute C with 100:\\n\\n1. Multiply 100 by 9/5:\\n \\\\[ 100 * \\\\frac{9}{5} + = 100 * 1.8 = 180 \\\\]\\n\\n2. Then add 32:\\n \\\\[ 180 + 32 = 212 \\\\]\\n\\nTherefore, + 100 degrees Celsius is equal to 212 degrees Fahrenheit.\\n\\n## Remaining plan + steps:\\n Step 2: Round the Fahrenheit result to the nearest 10.\\n Step 3: + Output the final rounded temperature in Fahrenheit.\\n\\nAnalyze this step's + result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's + observation after a step execution completes.\\n\\nReturned by the PlannerObserver + after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide + whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section + 3.3): the Planner observes what the Executor\\ndid and incorporates new information + into the remaining plan.\\n\\nAttributes:\\n step_completed_successfully: + Whether the step achieved its objective.\\n key_information_learned: New + information revealed by this step\\n (e.g., \\\"Found 3 products: A, + B, C\\\"). Used to refine upcoming steps.\\n remaining_plan_still_valid: + Whether pending todos still make sense\\n given the new information. + True does NOT mean no refinement needed.\\n suggested_refinements: Minor + tweaks to upcoming step descriptions.\\n These are lightweight in-place + updates, not a full replan.\\n Example: [\\\"Step 3 should select product + B instead of 'best product'\\\"]\\n needs_full_replan: The remaining plan + is fundamentally wrong and must\\n be regenerated from scratch. Mutually + exclusive with\\n remaining_plan_still_valid (if this is True, that should + be False).\\n replan_reason: Explanation of why a full replan is needed (None + if not).\\n goal_already_achieved: The overall task goal has been satisfied + early.\\n No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether + the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What + new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether + the remaining pending todos still make sense given new information\",\"title\":\"Remaining + Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor + tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested + Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The + remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs + Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation + of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The + overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal + Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}" + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '4342' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-helper-method: + - beta.chat.completions.parse + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D62FXF5UZlLp9eu5O7HsZvIvpC4My\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330987,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"Successfully + converted 100 degrees Celsius to 212 degrees Fahrenheit.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":[\\\"Step + 2 should round 212 to the nearest 10, resulting in 210.\\\"],\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":false}\",\n + \ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\": + null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 885,\n \"completion_tokens\": 81,\n \"total_tokens\": 966,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 05 Feb 2026 22:36:29 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '2195' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":"You are refining upcoming plan + steps based on new information. Update the step descriptions to be more specific + and actionable given what was learned. Keep the same step numbers.\n\nRespond + with one line per step in the format:\nStep N: "},{"role":"user","content":"## + New information learned\nSuccessfully converted 100 degrees Celsius to 212 degrees + Fahrenheit.\n\n## Suggested refinements\nStep 2 should round 212 to the nearest + 10, resulting in 210.\n\n## Current pending steps\nStep 2: Round the Fahrenheit + result to the nearest 10.\nStep 3: Output the final rounded temperature in Fahrenheit.\n\nUpdate + the step descriptions to incorporate the new information."}],"model":"gpt-4o-mini"}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '754' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D62FaclC9tg2ClH7HU3pfMzmlPJpB\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330990,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Step 2: Round the Fahrenheit result + of 212 degrees to the nearest 10, resulting in 210 degrees. \\nStep 3: Output + the final rounded temperature as 210 degrees Fahrenheit.\",\n \"refusal\": + null,\n \"annotations\": []\n },\n \"logprobs\": null,\n + \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 142,\n \"completion_tokens\": 40,\n \"total_tokens\": 182,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 05 Feb 2026 22:36:30 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '706' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":"You are Unit Converter. A precise + unit conversion specialist\n\nYour goal: Accurately convert between units and + apply transformations\n\nYou are executing a specific step in a multi-step plan. + Focus ONLY on completing\nthe current step. Do not plan ahead or worry about + future steps.\n\nBefore acting, briefly reason about what you need to do and + which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"## + Current Step\nRound the Fahrenheit result of 212 degrees to the nearest 10, + resulting in 210 degrees.\n\n## Context from previous steps:\nStep 1 result: + To convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32, + we substitute C with 100:\n\n1. Multiply 100 by 9/5:\n \\[ 100 * \\frac{9}{5} + = 100 * 1.8 = 180 \\]\n\n2. Then add 32:\n \\[ 180 + 32 = 212 \\]\n\nTherefore, + 100 degrees Celsius is equal to 212 degrees Fahrenheit.\n\nComplete this step + and provide your result."}],"model":"gpt-4o-mini"}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '1011' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D62Fb9PlGlUIcZRS2v2Lp9S62brRP\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330991,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"To complete this step, I will round + the Fahrenheit result of 212 degrees to the nearest 10. When rounding, since + 212 is closer to 210 than it is to 220, I will round it down to 210 degrees.\\n\\nResult: + 210 degrees Fahrenheit.\",\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 236,\n \"completion_tokens\": + 56,\n \"total_tokens\": 292,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 05 Feb 2026 22:36:32 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '1187' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent + observing execution progress. After each step completes, you analyze what happened + and decide whether the remaining plan is still valid.\\n\\nReason step-by-step + about:\\n1. What new information was learned from this step's result\\n2. Whether + the remaining steps still make sense given this new information\\n3. What refinements, + if any, are needed for upcoming steps\\n4. Whether the overall goal has already + been achieved\\n\\nBe conservative about triggering full replans \u2014 only + do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"## + Original task\\n\\n\\n## Expected output\\n\\n\\n## Previously completed steps:\\n + \ Step 1: Convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + + 32.\\n Result: To convert 100 degrees Celsius to Fahrenheit using the formula + (C * 9/5) + 32, we substitute C with 100:\\n\\n1. Multiply 100 by 9/5:\\n \\\\[ + 100 * \\\\frac{9}{5} = 100 * 1.8 = 180 \\\\]\\n\\n2. Then add 32:\\n \\\\[ + 18\\n\\n## Just completed step 2\\nDescription: Round the Fahrenheit result + of 212 degrees to the nearest 10, resulting in 210 degrees.\\nResult: To complete + this step, I will round the Fahrenheit result of 212 degrees to the nearest + 10. When rounding, since 212 is closer to 210 than it is to 220, I will round + it down to 210 degrees.\\n\\nResult: 210 degrees Fahrenheit.\\n\\n## Remaining + plan steps:\\n Step 3: Output the final rounded temperature as 210 degrees + Fahrenheit.\\n\\nAnalyze this step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's + observation after a step execution completes.\\n\\nReturned by the PlannerObserver + after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide + whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section + 3.3): the Planner observes what the Executor\\ndid and incorporates new information + into the remaining plan.\\n\\nAttributes:\\n step_completed_successfully: + Whether the step achieved its objective.\\n key_information_learned: New + information revealed by this step\\n (e.g., \\\"Found 3 products: A, + B, C\\\"). Used to refine upcoming steps.\\n remaining_plan_still_valid: + Whether pending todos still make sense\\n given the new information. + True does NOT mean no refinement needed.\\n suggested_refinements: Minor + tweaks to upcoming step descriptions.\\n These are lightweight in-place + updates, not a full replan.\\n Example: [\\\"Step 3 should select product + B instead of 'best product'\\\"]\\n needs_full_replan: The remaining plan + is fundamentally wrong and must\\n be regenerated from scratch. Mutually + exclusive with\\n remaining_plan_still_valid (if this is True, that should + be False).\\n replan_reason: Explanation of why a full replan is needed (None + if not).\\n goal_already_achieved: The overall task goal has been satisfied + early.\\n No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether + the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What + new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether + the remaining pending todos still make sense given new information\",\"title\":\"Remaining + Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor + tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested + Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The + remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs + Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation + of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The + overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal + Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}" + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '4579' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-helper-method: + - beta.chat.completions.parse + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D62FctLDvklBSvOY641JCvwFaTugO\",\n \"object\": + \"chat.completion\",\n \"created\": 1770330992,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"Successfully + rounded the Fahrenheit result of 212 degrees down to 210 degrees.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":true}\",\n + \ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\": + null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 941,\n \"completion_tokens\": 67,\n \"total_tokens\": 1008,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 05 Feb 2026 22:36:33 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '1208' openai-project: - OPENAI-PROJECT-XXX openai-version: