feat: introduce PlannerObserver and StepExecutor for enhanced plan execution

This commit adds the PlannerObserver and StepExecutor classes to the CrewAI framework, implementing the observation phase of the Plan-and-Execute architecture. The PlannerObserver analyzes step execution results, determines plan validity, and suggests refinements, while the StepExecutor executes individual todo items in isolation. These additions improve the overall planning and execution process, allowing for more dynamic and responsive agent behavior. Additionally, new observation events have been defined to facilitate monitoring and logging of the planning process.
This commit is contained in:
lorenzejay
2026-02-05 15:46:21 -08:00
parent 81d9fd4ab3
commit 8e1474d371
16 changed files with 4120 additions and 360 deletions

View File

@@ -0,0 +1,355 @@
"""PlannerObserver: Observation phase after each step execution.
Implements the "Observe" phase from PLAN-AND-ACT (Section 3.3). After every
step execution, the Planner analyzes what happened, what new information was
learned, and whether the remaining plan is still valid.
This is NOT an error detector — it runs on every step, including successes,
to incorporate runtime observations into the remaining plan.
"""
from __future__ import annotations
import logging
from typing import TYPE_CHECKING, Any
from crewai.events.event_bus import crewai_event_bus
from crewai.events.types.observation_events import (
StepObservationCompletedEvent,
StepObservationFailedEvent,
StepObservationStartedEvent,
)
from crewai.utilities.llm_utils import create_llm
from crewai.utilities.planning_types import StepObservation, TodoItem
from crewai.utilities.types import LLMMessage
if TYPE_CHECKING:
from crewai.agent import Agent
from crewai.task import Task
logger = logging.getLogger(__name__)
class PlannerObserver:
"""Observes step execution results and decides on plan continuation.
After EVERY step execution, this class:
1. Analyzes what the step accomplished
2. Identifies new information learned
3. Decides if the remaining plan is still valid
4. Suggests lightweight refinements or triggers full replanning
LLM resolution (magical fallback):
- If ``agent.planning_config.llm`` is explicitly set → use that
- Otherwise → fall back to ``agent.llm`` (same LLM for everything)
Args:
agent: The agent instance (for LLM resolution and config).
task: Optional task context (for description and expected output).
"""
def __init__(self, agent: Agent, task: Task | None = None) -> None:
self.agent = agent
self.task = task
self.llm = self._resolve_llm()
def _resolve_llm(self) -> Any:
"""Resolve which LLM to use for observation/planning.
Mirrors AgentReasoning._resolve_llm(): uses planning_config.llm
if explicitly set, otherwise falls back to agent.llm.
Returns:
The resolved LLM instance.
"""
from crewai.llm import LLM
config = getattr(self.agent, "planning_config", None)
if config is not None and config.llm is not None:
if isinstance(config.llm, LLM):
return config.llm
return create_llm(config.llm)
return self.agent.llm
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def observe(
self,
completed_step: TodoItem,
result: str,
all_completed: list[TodoItem],
remaining_todos: list[TodoItem],
) -> StepObservation:
"""Observe a step's result and decide on plan continuation.
This runs after EVERY step execution — not just failures.
Args:
completed_step: The todo item that was just executed.
result: The final result string from the step.
all_completed: All previously completed todos (for context).
remaining_todos: The pending todos still in the plan.
Returns:
StepObservation with the Planner's analysis.
"""
agent_role = self.agent.role if self.agent else "unknown"
# Emit observation started event
crewai_event_bus.emit(
self.agent,
event=StepObservationStartedEvent(
agent_role=agent_role,
step_number=completed_step.step_number,
step_description=completed_step.description,
from_task=self.task,
from_agent=self.agent,
),
)
messages = self._build_observation_messages(
completed_step, result, all_completed, remaining_todos
)
try:
response = self.llm.call(
messages,
response_model=StepObservation,
from_task=self.task,
from_agent=self.agent,
)
if isinstance(response, StepObservation):
observation = response
else:
# If the LLM returned raw text instead of structured output,
# parse it conservatively
observation = StepObservation(
step_completed_successfully=True,
key_information_learned=str(response) if response else "",
remaining_plan_still_valid=True,
)
# Emit observation completed event
crewai_event_bus.emit(
self.agent,
event=StepObservationCompletedEvent(
agent_role=agent_role,
step_number=completed_step.step_number,
step_description=completed_step.description,
step_completed_successfully=observation.step_completed_successfully,
key_information_learned=observation.key_information_learned,
remaining_plan_still_valid=observation.remaining_plan_still_valid,
needs_full_replan=observation.needs_full_replan,
replan_reason=observation.replan_reason,
goal_already_achieved=observation.goal_already_achieved,
suggested_refinements=observation.suggested_refinements,
from_task=self.task,
from_agent=self.agent,
),
)
return observation
except Exception as e:
logger.warning(f"Observation LLM call failed: {e}. Defaulting to continue.")
# Emit observation failed event
crewai_event_bus.emit(
self.agent,
event=StepObservationFailedEvent(
agent_role=agent_role,
step_number=completed_step.step_number,
step_description=completed_step.description,
error=str(e),
from_task=self.task,
from_agent=self.agent,
),
)
return StepObservation(
step_completed_successfully=True,
key_information_learned="",
remaining_plan_still_valid=True,
)
def refine_todos(
self,
observation: StepObservation,
remaining_todos: list[TodoItem],
) -> list[TodoItem]:
"""Refine pending todo descriptions based on observation.
This is a LIGHTWEIGHT operation — no full replan. It updates the
description field of pending todos based on new information learned.
Example: Step 1 found "3 products: A, B, C" → Step 2 changes from
"Select the best product" to "Select product B (highest rated)"
Args:
observation: The observation with suggested refinements.
remaining_todos: The pending todos to refine.
Returns:
The refined todo list (same objects, updated descriptions).
"""
if not observation.suggested_refinements:
return remaining_todos
# Ask the LLM to apply the refinements to the todo descriptions
messages = self._build_refinement_messages(observation, remaining_todos)
try:
response = self.llm.call(
messages,
from_task=self.task,
from_agent=self.agent,
)
if response:
# Parse the LLM's refined descriptions and apply them
self._apply_refinements(str(response), remaining_todos)
except Exception as e:
logger.warning(
f"Refinement LLM call failed: {e}. Keeping original descriptions."
)
return remaining_todos
# ------------------------------------------------------------------
# Internal: Message building
# ------------------------------------------------------------------
def _build_observation_messages(
self,
completed_step: TodoItem,
result: str,
all_completed: list[TodoItem],
remaining_todos: list[TodoItem],
) -> list[LLMMessage]:
"""Build messages for the observation LLM call."""
task_desc = ""
task_goal = ""
if self.task:
task_desc = self.task.description or ""
task_goal = self.task.expected_output or ""
system_prompt = (
"You are a Planning Agent observing execution progress. "
"After each step completes, you analyze what happened and decide "
"whether the remaining plan is still valid.\n\n"
"Reason step-by-step about:\n"
"1. What new information was learned from this step's result\n"
"2. Whether the remaining steps still make sense given this new information\n"
"3. What refinements, if any, are needed for upcoming steps\n"
"4. Whether the overall goal has already been achieved\n\n"
"Be conservative about triggering full replans — only do so when the "
"remaining plan is fundamentally wrong, not just suboptimal."
)
# Build context of what's been done
completed_summary = ""
if all_completed:
completed_lines = []
for todo in all_completed:
result_preview = (todo.result or "")[:200]
completed_lines.append(
f" Step {todo.step_number}: {todo.description}\n"
f" Result: {result_preview}"
)
completed_summary = "\n## Previously completed steps:\n" + "\n".join(
completed_lines
)
# Build remaining plan
remaining_summary = ""
if remaining_todos:
remaining_lines = [
f" Step {todo.step_number}: {todo.description}"
for todo in remaining_todos
]
remaining_summary = "\n## Remaining plan steps:\n" + "\n".join(
remaining_lines
)
user_prompt = (
f"## Original task\n{task_desc}\n\n"
f"## Expected output\n{task_goal}\n"
f"{completed_summary}\n"
f"\n## Just completed step {completed_step.step_number}\n"
f"Description: {completed_step.description}\n"
f"Result: {result}\n"
f"{remaining_summary}\n\n"
"Analyze this step's result and provide your observation."
)
return [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
def _build_refinement_messages(
self,
observation: StepObservation,
remaining_todos: list[TodoItem],
) -> list[LLMMessage]:
"""Build messages for the refinement LLM call."""
system_prompt = (
"You are refining upcoming plan steps based on new information. "
"Update the step descriptions to be more specific and actionable "
"given what was learned. Keep the same step numbers.\n\n"
"Respond with one line per step in the format:\n"
"Step N: <refined description>"
)
refinements = "\n".join(observation.suggested_refinements or [])
todo_lines = "\n".join(
f"Step {t.step_number}: {t.description}" for t in remaining_todos
)
user_prompt = (
f"## New information learned\n{observation.key_information_learned}\n\n"
f"## Suggested refinements\n{refinements}\n\n"
f"## Current pending steps\n{todo_lines}\n\n"
"Update the step descriptions to incorporate the new information."
)
return [
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
]
def _apply_refinements(
self,
llm_response: str,
remaining_todos: list[TodoItem],
) -> None:
"""Parse LLM refinement response and update todo descriptions.
Expects format: "Step N: <description>" per line.
"""
# Build lookup for quick access
todo_by_step: dict[int, TodoItem] = {t.step_number: t for t in remaining_todos}
for line in llm_response.strip().split("\n"):
line = line.strip()
if not line.startswith("Step "):
continue
# Parse "Step N: description"
try:
parts = line.split(":", 1)
if len(parts) < 2:
continue
step_part = parts[0].strip() # "Step N"
description = parts[1].strip()
step_num = int(step_part.replace("Step", "").strip())
if step_num in todo_by_step and description:
todo_by_step[step_num].description = description
except (ValueError, IndexError):
continue

View File

@@ -0,0 +1,703 @@
"""StepExecutor: Isolated executor for a single plan step.
Implements a bounded ReAct loop scoped to ONE todo item. The tool execution
machinery (native function calling, text-parsed tools, caching, hooks) lives
here — moved from AgentExecutor so the outer Plan-and-Execute loop stays clean.
Based on PLAN-AND-ACT (Section 3.2): The Executor translates high-level plan
steps into concrete environment actions.
"""
from __future__ import annotations
from collections.abc import Callable
from datetime import datetime
import json
import time
from typing import TYPE_CHECKING, Any
from pydantic import BaseModel
from crewai.agents.parser import (
AgentAction,
AgentFinish,
)
from crewai.events.event_bus import crewai_event_bus
from crewai.events.types.tool_usage_events import (
ToolUsageErrorEvent,
ToolUsageFinishedEvent,
ToolUsageStartedEvent,
)
from crewai.hooks.tool_hooks import (
ToolCallHookContext,
get_after_tool_call_hooks,
get_before_tool_call_hooks,
)
from crewai.utilities.agent_utils import (
convert_tools_to_openai_schema,
enforce_rpm_limit,
extract_tool_call_info,
format_message_for_llm,
process_llm_response,
track_delegation_if_needed,
)
from crewai.utilities.i18n import I18N, get_i18n
from crewai.utilities.planning_types import TodoItem
from crewai.utilities.printer import Printer
from crewai.utilities.step_execution_context import StepExecutionContext, StepResult
from crewai.utilities.string_utils import sanitize_tool_name
from crewai.utilities.tool_utils import execute_tool_and_check_finality
from crewai.utilities.types import LLMMessage
if TYPE_CHECKING:
from crewai.agent import Agent
from crewai.agents.tools_handler import ToolsHandler
from crewai.crew import Crew
from crewai.llms.base_llm import BaseLLM
from crewai.task import Task
from crewai.tools.base_tool import BaseTool
from crewai.tools.structured_tool import CrewStructuredTool
# Maximum number of tool-call iterations within a single step
_MAX_STEP_ITERATIONS: int = 10
class StepExecutor:
"""Executes a SINGLE todo item in isolation using a bounded ReAct loop.
The StepExecutor owns its own message list per invocation. It never reads
or writes the AgentExecutor's state. Results flow back via StepResult.
The internal loop:
1. Build messages from todo + context
2. Call LLM (with or without native tools)
3. If tool call → execute tool, append result, loop back to 2
4. If final answer → return StepResult
5. If max iterations → force final answer
Args:
llm: The language model to use for execution.
tools: Structured tools available to the executor.
agent: The agent instance (for role/goal/verbose/config).
original_tools: Original BaseTool instances (needed for native tool schema).
tools_handler: Optional tools handler for caching and delegation tracking.
task: Optional task context.
crew: Optional crew context.
function_calling_llm: Optional separate LLM for function calling.
request_within_rpm_limit: Optional RPM limit function.
callbacks: Optional list of callbacks.
"""
def __init__(
self,
llm: BaseLLM,
tools: list[CrewStructuredTool],
agent: Agent,
original_tools: list[BaseTool] | None = None,
tools_handler: ToolsHandler | None = None,
task: Task | None = None,
crew: Crew | None = None,
function_calling_llm: BaseLLM | Any | None = None,
request_within_rpm_limit: Callable[[], bool] | None = None,
callbacks: list[Any] | None = None,
i18n: I18N | None = None,
) -> None:
self.llm = llm
self.tools = tools
self.agent = agent
self.original_tools = original_tools or []
self.tools_handler = tools_handler
self.task = task
self.crew = crew
self.function_calling_llm = function_calling_llm
self.request_within_rpm_limit = request_within_rpm_limit
self.callbacks = callbacks or []
self._i18n: I18N = i18n or get_i18n()
self._printer: Printer = Printer()
# Native tool support — set up once
self._use_native_tools = self._check_native_tool_support()
self._openai_tools: list[dict[str, Any]] = []
self._available_functions: dict[str, Callable[..., Any]] = {}
if self._use_native_tools:
self._setup_native_tools()
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
def execute(self, todo: TodoItem, context: StepExecutionContext) -> StepResult:
"""Execute a single todo item in isolation.
Builds a fresh message list, runs a bounded ReAct loop, and returns
the result. Never touches external state.
Args:
todo: The todo item to execute.
context: Immutable context with task info and dependency results.
Returns:
StepResult with the outcome.
"""
start_time = time.monotonic()
tool_calls_made: list[str] = []
try:
messages = self._build_isolated_messages(todo, context)
result_text = self._run_react_loop(todo, messages, tool_calls_made)
elapsed = time.monotonic() - start_time
return StepResult(
success=True,
result=result_text,
tool_calls_made=tool_calls_made,
execution_time=elapsed,
)
except Exception as e:
elapsed = time.monotonic() - start_time
return StepResult(
success=False,
result="",
error=str(e),
tool_calls_made=tool_calls_made,
execution_time=elapsed,
)
# ------------------------------------------------------------------
# Internal: Message building
# ------------------------------------------------------------------
def _build_isolated_messages(
self, todo: TodoItem, context: StepExecutionContext
) -> list[LLMMessage]:
"""Build a fresh message list for this step's execution.
System prompt tells the LLM it is an Executor focused on one step.
User prompt provides the step description, dependencies, and tools.
"""
system_prompt = self._build_system_prompt()
user_prompt = self._build_user_prompt(todo, context)
messages: list[LLMMessage] = [
format_message_for_llm(system_prompt, role="system"),
format_message_for_llm(user_prompt, role="user"),
]
return messages
def _build_system_prompt(self) -> str:
"""Build the Executor's system prompt.
Emphasizes: complete THIS step only. Do not plan ahead.
Includes CoT reasoning instruction (per PLAN-AND-ACT Section 3.4).
"""
role = self.agent.role if self.agent else "Assistant"
goal = self.agent.goal if self.agent else "Complete tasks efficiently"
backstory = getattr(self.agent, "backstory", "") or ""
tools_section = ""
if self.tools and not self._use_native_tools:
tool_names = ", ".join(sanitize_tool_name(t.name) for t in self.tools)
tools_section = f"\n\nAvailable tools: {tool_names}"
tools_section += "\n\nTo use a tool, respond with:\nThought: <your reasoning>\nAction: <tool_name>\nAction Input: <input>"
tools_section += "\n\nWhen you have the final answer, respond with:\nThought: <your reasoning>\nFinal Answer: <your answer>"
return f"""You are {role}. {backstory}
Your goal: {goal}
You are executing a specific step in a multi-step plan. Focus ONLY on completing
the current step. Do not plan ahead or worry about future steps.
Before acting, briefly reason about what you need to do and which approach
or tool would be most helpful for this specific step.{tools_section}"""
def _build_user_prompt(self, todo: TodoItem, context: StepExecutionContext) -> str:
"""Build the user prompt for this specific step."""
parts: list[str] = []
parts.append(f"## Current Step\n{todo.description}")
if todo.tool_to_use:
parts.append(f"\nSuggested tool: {todo.tool_to_use}")
# Include dependency results (final results only, no traces)
if context.dependency_results:
parts.append("\n## Context from previous steps:")
for step_num, result in sorted(context.dependency_results.items()):
parts.append(f"Step {step_num} result: {result}")
parts.append("\nComplete this step and provide your result.")
return "\n".join(parts)
# ------------------------------------------------------------------
# Internal: Bounded ReAct loop
# ------------------------------------------------------------------
def _run_react_loop(
self,
todo: TodoItem,
messages: list[LLMMessage],
tool_calls_made: list[str],
) -> str:
"""Run a bounded ReAct loop for a single step.
Returns the final answer text.
"""
for iteration in range(_MAX_STEP_ITERATIONS):
enforce_rpm_limit(self.request_within_rpm_limit)
if self._use_native_tools:
result = self._native_tool_iteration(messages, tool_calls_made)
else:
result = self._text_parsed_iteration(messages, tool_calls_made)
if result is not None:
# Got a final answer
return result
# No final answer yet — loop continues with updated messages
# Max iterations reached — force a final answer
return self._force_final_answer(messages)
def _text_parsed_iteration(
self,
messages: list[LLMMessage],
tool_calls_made: list[str],
) -> str | None:
"""Single iteration using text-parsed tool calling.
Returns final answer string if done, None to continue looping.
"""
try:
answer = self.llm.call(
messages,
callbacks=self.callbacks,
from_task=self.task,
from_agent=self.agent,
)
except Exception:
raise
if not answer:
raise ValueError("Empty response from LLM")
answer_str = str(answer)
use_stop_words = self.llm.supports_stop_words() if self.llm else False
formatted = process_llm_response(answer_str, use_stop_words)
if isinstance(formatted, AgentFinish):
return str(formatted.output)
if isinstance(formatted, AgentAction):
# Execute the tool
tool_calls_made.append(formatted.tool)
fingerprint_context = {}
if (
self.agent
and hasattr(self.agent, "security_config")
and hasattr(self.agent.security_config, "fingerprint")
):
fingerprint_context = {
"agent_fingerprint": str(self.agent.security_config.fingerprint)
}
tool_result = execute_tool_and_check_finality(
agent_action=formatted,
fingerprint_context=fingerprint_context,
tools=self.tools,
i18n=self._i18n,
agent_key=self.agent.key if self.agent else None,
agent_role=self.agent.role if self.agent else None,
tools_handler=self.tools_handler,
task=self.task,
agent=self.agent,
function_calling_llm=self.function_calling_llm,
crew=self.crew,
)
# Append observation to messages
observation = f"Observation: {tool_result.result}"
messages.append(
format_message_for_llm(
formatted.text + f"\n{observation}",
role="assistant",
)
)
if tool_result.result_as_answer:
return str(tool_result.result)
# Add reasoning prompt for next iteration
reasoning_prompt = self._i18n.slice("post_tool_reasoning")
messages.append(format_message_for_llm(reasoning_prompt, role="user"))
return None # Continue looping
return answer_str # Fallback: treat as final answer
def _native_tool_iteration(
self,
messages: list[LLMMessage],
tool_calls_made: list[str],
) -> str | None:
"""Single iteration using native function calling.
Returns final answer string if done, None to continue looping.
"""
try:
answer = self.llm.call(
messages,
tools=self._openai_tools,
callbacks=self.callbacks,
from_task=self.task,
from_agent=self.agent,
)
except Exception:
raise
if not answer:
raise ValueError("Empty response from LLM")
# Check if the response is a list of tool calls
if isinstance(answer, list) and answer and self._is_tool_call_list(answer):
return self._execute_native_tool_calls(answer, messages, tool_calls_made)
# Text response — this is the final answer
if isinstance(answer, str):
return answer
# BaseModel response
if isinstance(answer, BaseModel):
return answer.model_dump_json()
return str(answer)
def _execute_native_tool_calls(
self,
tool_calls: list[Any],
messages: list[LLMMessage],
tool_calls_made: list[str],
) -> str | None:
"""Execute a batch of native tool calls and append results to messages.
Returns final answer string if a tool has result_as_answer, else None.
"""
# Build assistant message with tool calls
tool_calls_to_report: list[dict[str, Any]] = []
for tool_call in tool_calls:
info = extract_tool_call_info(tool_call)
if not info:
continue
call_id, func_name, func_args = info
tool_calls_to_report.append(
{
"id": call_id,
"type": "function",
"function": {
"name": func_name,
"arguments": func_args
if isinstance(func_args, str)
else json.dumps(func_args),
},
}
)
if tool_calls_to_report:
assistant_message: LLMMessage = {
"role": "assistant",
"content": None,
"tool_calls": tool_calls_to_report,
}
# Preserve raw parts for Gemini compatibility
if all(type(tc).__qualname__ == "Part" for tc in tool_calls):
assistant_message["raw_tool_call_parts"] = list(tool_calls)
messages.append(assistant_message)
# Execute each tool call
final_answer: str | None = None
for tool_call in tool_calls:
info = extract_tool_call_info(tool_call)
if not info:
continue
call_id, func_name, func_args = info
tool_calls_made.append(func_name)
# Parse arguments
if isinstance(func_args, str):
try:
args_dict = json.loads(func_args)
except json.JSONDecodeError:
args_dict = {}
else:
args_dict = func_args
agent_key = (
getattr(self.agent, "key", "unknown") if self.agent else "unknown"
)
# Find original tool for cache_function and result_as_answer
original_tool = None
for tool in self.original_tools:
if sanitize_tool_name(tool.name) == func_name:
original_tool = tool
break
# Check max usage count
max_usage_reached = False
if (
original_tool
and original_tool.max_usage_count is not None
and original_tool.current_usage_count >= original_tool.max_usage_count
):
max_usage_reached = True
# Check cache
from_cache = False
input_str = json.dumps(args_dict) if args_dict else ""
result = "Tool not found"
if self.tools_handler and self.tools_handler.cache:
cached_result = self.tools_handler.cache.read(
tool=func_name, input=input_str
)
if cached_result is not None:
result = (
str(cached_result)
if not isinstance(cached_result, str)
else cached_result
)
from_cache = True
# Emit tool started event
started_at = datetime.now()
crewai_event_bus.emit(
self,
event=ToolUsageStartedEvent(
tool_name=func_name,
tool_args=args_dict,
from_agent=self.agent,
from_task=self.task,
agent_key=agent_key,
),
)
track_delegation_if_needed(func_name, args_dict, self.task)
# Find structured tool for hooks
structured_tool: CrewStructuredTool | None = None
for structured in self.tools or []:
if sanitize_tool_name(structured.name) == func_name:
structured_tool = structured
break
# Before hooks
hook_blocked = False
before_hook_context = ToolCallHookContext(
tool_name=func_name,
tool_input=args_dict,
tool=structured_tool, # type: ignore[arg-type]
agent=self.agent,
task=self.task,
crew=self.crew,
)
try:
for hook in get_before_tool_call_hooks():
if hook(before_hook_context) is False:
hook_blocked = True
break
except Exception:
pass
if hook_blocked:
result = f"Tool execution blocked by hook. Tool: {func_name}"
elif not from_cache and not max_usage_reached:
if func_name in self._available_functions:
try:
tool_func = self._available_functions[func_name]
raw_result = tool_func(**args_dict)
# Cache result
if self.tools_handler and self.tools_handler.cache:
should_cache = True
if original_tool:
should_cache = original_tool.cache_function(
args_dict, raw_result
)
if should_cache:
self.tools_handler.cache.add(
tool=func_name, input=input_str, output=raw_result
)
result = (
str(raw_result)
if not isinstance(raw_result, str)
else raw_result
)
except Exception as e:
result = f"Error executing tool: {e}"
if self.task:
self.task.increment_tools_errors()
crewai_event_bus.emit(
self,
event=ToolUsageErrorEvent(
tool_name=func_name,
tool_args=args_dict,
from_agent=self.agent,
from_task=self.task,
agent_key=agent_key,
error=e,
),
)
elif max_usage_reached and original_tool:
result = (
f"Tool '{func_name}' has reached its usage limit of "
f"{original_tool.max_usage_count} times and cannot be used anymore."
)
# After hooks
after_hook_context = ToolCallHookContext(
tool_name=func_name,
tool_input=args_dict,
tool=structured_tool, # type: ignore[arg-type]
agent=self.agent,
task=self.task,
crew=self.crew,
tool_result=result,
)
try:
for after_hook in get_after_tool_call_hooks():
hook_result = after_hook(after_hook_context)
if hook_result is not None:
result = hook_result
after_hook_context.tool_result = result
except Exception:
pass
# Emit tool finished event
crewai_event_bus.emit(
self,
event=ToolUsageFinishedEvent(
output=result,
tool_name=func_name,
tool_args=args_dict,
from_agent=self.agent,
from_task=self.task,
agent_key=agent_key,
started_at=started_at,
finished_at=datetime.now(),
),
)
# Append tool result message
tool_message: LLMMessage = {
"role": "tool",
"tool_call_id": call_id,
"name": func_name,
"content": result,
}
messages.append(tool_message)
if self.agent and self.agent.verbose:
cache_info = " (from cache)" if from_cache else ""
self._printer.print(
content=f"Tool {func_name} executed with result{cache_info}: {result[:200]}...",
color="green",
)
# Check result_as_answer
if (
original_tool
and hasattr(original_tool, "result_as_answer")
and original_tool.result_as_answer
):
final_answer = result
if final_answer is not None:
return final_answer
return None # Continue looping
def _force_final_answer(self, messages: list[LLMMessage]) -> str:
"""Force the LLM to provide a final answer when max iterations reached."""
force_prompt = (
"You have used the maximum number of tool calls for this step. "
"Based on the information gathered so far, provide your final answer now."
)
if not self._use_native_tools:
force_prompt += "\n\nFinal Answer: "
messages.append(format_message_for_llm(force_prompt, role="user"))
try:
answer = self.llm.call(
messages,
callbacks=self.callbacks,
from_task=self.task,
from_agent=self.agent,
)
if answer:
answer_str = str(answer)
# Try to extract just the final answer portion
if "Final Answer:" in answer_str:
return answer_str.split("Final Answer:")[-1].strip()
return answer_str
except Exception:
pass
return "Step could not be completed within the iteration limit."
# ------------------------------------------------------------------
# Internal: Native tool support
# ------------------------------------------------------------------
def _check_native_tool_support(self) -> bool:
"""Check if LLM supports native function calling."""
return (
hasattr(self.llm, "supports_function_calling")
and callable(getattr(self.llm, "supports_function_calling", None))
and self.llm.supports_function_calling()
and bool(self.original_tools)
)
def _setup_native_tools(self) -> None:
"""Convert tools to OpenAI schema format for native function calling."""
if self.original_tools:
self._openai_tools, self._available_functions = (
convert_tools_to_openai_schema(self.original_tools)
)
def _is_tool_call_list(self, response: list[Any]) -> bool:
"""Check if a response is a list of tool calls."""
if not response:
return False
first_item = response[0]
# OpenAI-style
if hasattr(first_item, "function") or (
isinstance(first_item, dict) and "function" in first_item
):
return True
# Anthropic-style (ToolUseBlock)
if (
hasattr(first_item, "type")
and getattr(first_item, "type", None) == "tool_use"
):
return True
if hasattr(first_item, "name") and hasattr(first_item, "input"):
return True
# Bedrock-style
if (
isinstance(first_item, dict)
and "name" in first_item
and "input" in first_item
):
return True
# Gemini-style
if hasattr(first_item, "function_call") and first_item.function_call:
return True
return False

View File

@@ -74,6 +74,14 @@ from crewai.events.types.mcp_events import (
MCPToolExecutionFailedEvent,
MCPToolExecutionStartedEvent,
)
from crewai.events.types.observation_events import (
GoalAchievedEarlyEvent,
PlanRefinementEvent,
PlanReplanTriggeredEvent,
StepObservationCompletedEvent,
StepObservationFailedEvent,
StepObservationStartedEvent,
)
from crewai.events.types.reasoning_events import (
AgentReasoningCompletedEvent,
AgentReasoningFailedEvent,
@@ -534,6 +542,64 @@ class EventListener(BaseEventListener):
event.error,
)
# ----------- OBSERVATION EVENTS (Plan-and-Execute) -----------
@crewai_event_bus.on(StepObservationStartedEvent)
def on_step_observation_started(
_: Any, event: StepObservationStartedEvent
) -> None:
self.formatter.handle_observation_started(
event.agent_role,
event.step_number,
event.step_description,
)
@crewai_event_bus.on(StepObservationCompletedEvent)
def on_step_observation_completed(
_: Any, event: StepObservationCompletedEvent
) -> None:
self.formatter.handle_observation_completed(
event.agent_role,
event.step_number,
event.step_completed_successfully,
event.remaining_plan_still_valid,
event.key_information_learned,
event.needs_full_replan,
event.goal_already_achieved,
)
@crewai_event_bus.on(StepObservationFailedEvent)
def on_step_observation_failed(
_: Any, event: StepObservationFailedEvent
) -> None:
self.formatter.handle_observation_failed(
event.step_number,
event.error,
)
@crewai_event_bus.on(PlanRefinementEvent)
def on_plan_refinement(_: Any, event: PlanRefinementEvent) -> None:
self.formatter.handle_plan_refinement(
event.step_number,
event.refined_step_count,
event.refinements,
)
@crewai_event_bus.on(PlanReplanTriggeredEvent)
def on_plan_replan_triggered(_: Any, event: PlanReplanTriggeredEvent) -> None:
self.formatter.handle_plan_replan(
event.replan_reason,
event.replan_count,
event.completed_steps_preserved,
)
@crewai_event_bus.on(GoalAchievedEarlyEvent)
def on_goal_achieved_early(_: Any, event: GoalAchievedEarlyEvent) -> None:
self.formatter.handle_goal_achieved_early(
event.steps_completed,
event.steps_remaining,
)
# ----------- AGENT LOGGING EVENTS -----------
@crewai_event_bus.on(AgentLogsStartedEvent)

View File

@@ -93,6 +93,14 @@ from crewai.events.types.memory_events import (
MemorySaveFailedEvent,
MemorySaveStartedEvent,
)
from crewai.events.types.observation_events import (
GoalAchievedEarlyEvent,
PlanRefinementEvent,
PlanReplanTriggeredEvent,
StepObservationCompletedEvent,
StepObservationFailedEvent,
StepObservationStartedEvent,
)
from crewai.events.types.reasoning_events import (
AgentReasoningCompletedEvent,
AgentReasoningFailedEvent,
@@ -437,6 +445,39 @@ class TraceCollectionListener(BaseEventListener):
) -> None:
self._handle_action_event("agent_reasoning_failed", source, event)
# Observation events (Plan-and-Execute)
@event_bus.on(StepObservationStartedEvent)
def on_step_observation_started(
source: Any, event: StepObservationStartedEvent
) -> None:
self._handle_action_event("step_observation_started", source, event)
@event_bus.on(StepObservationCompletedEvent)
def on_step_observation_completed(
source: Any, event: StepObservationCompletedEvent
) -> None:
self._handle_action_event("step_observation_completed", source, event)
@event_bus.on(StepObservationFailedEvent)
def on_step_observation_failed(
source: Any, event: StepObservationFailedEvent
) -> None:
self._handle_action_event("step_observation_failed", source, event)
@event_bus.on(PlanRefinementEvent)
def on_plan_refinement(source: Any, event: PlanRefinementEvent) -> None:
self._handle_action_event("plan_refinement", source, event)
@event_bus.on(PlanReplanTriggeredEvent)
def on_plan_replan_triggered(
source: Any, event: PlanReplanTriggeredEvent
) -> None:
self._handle_action_event("plan_replan_triggered", source, event)
@event_bus.on(GoalAchievedEarlyEvent)
def on_goal_achieved_early(source: Any, event: GoalAchievedEarlyEvent) -> None:
self._handle_action_event("goal_achieved_early", source, event)
@event_bus.on(KnowledgeRetrievalStartedEvent)
def on_knowledge_retrieval_started(
source: Any, event: KnowledgeRetrievalStartedEvent

View File

@@ -0,0 +1,99 @@
"""Observation events for the Plan-and-Execute architecture.
Emitted during the Observation phase (PLAN-AND-ACT Section 3.3) when the
PlannerObserver analyzes step execution results and decides on plan
continuation, refinement, or replanning.
"""
from typing import Any
from crewai.events.base_events import BaseEvent
class ObservationEvent(BaseEvent):
"""Base event for observation phase events."""
type: str
agent_role: str
step_number: int
step_description: str = ""
from_task: Any | None = None
from_agent: Any | None = None
def __init__(self, **data: Any) -> None:
super().__init__(**data)
self._set_task_params(data)
self._set_agent_params(data)
class StepObservationStartedEvent(ObservationEvent):
"""Emitted when the Planner begins observing a step's result.
Fires after every step execution, before the observation LLM call.
"""
type: str = "step_observation_started"
class StepObservationCompletedEvent(ObservationEvent):
"""Emitted when the Planner finishes observing a step's result.
Contains the full observation analysis: what was learned, whether
the plan is still valid, and what action to take next.
"""
type: str = "step_observation_completed"
step_completed_successfully: bool = True
key_information_learned: str = ""
remaining_plan_still_valid: bool = True
needs_full_replan: bool = False
replan_reason: str | None = None
goal_already_achieved: bool = False
suggested_refinements: list[str] | None = None
class StepObservationFailedEvent(ObservationEvent):
"""Emitted when the observation LLM call itself fails.
The system defaults to continuing the plan when this happens,
but the event allows monitoring/alerting on observation failures.
"""
type: str = "step_observation_failed"
error: str = ""
class PlanRefinementEvent(ObservationEvent):
"""Emitted when the Planner refines upcoming step descriptions.
This is the lightweight refinement path — no full replan, just
sharpening pending todo descriptions based on new information.
"""
type: str = "plan_refinement"
refined_step_count: int = 0
refinements: list[str] | None = None
class PlanReplanTriggeredEvent(ObservationEvent):
"""Emitted when the Planner triggers a full replan.
The remaining plan was deemed fundamentally wrong and will be
regenerated from scratch, preserving completed step results.
"""
type: str = "plan_replan_triggered"
replan_reason: str = ""
replan_count: int = 0
completed_steps_preserved: int = 0
class GoalAchievedEarlyEvent(ObservationEvent):
"""Emitted when the Planner detects the goal was achieved early.
Remaining steps will be skipped and execution will finalize.
"""
type: str = "goal_achieved_early"
steps_remaining: int = 0
steps_completed: int = 0

View File

@@ -851,6 +851,152 @@ To enable tracing, do any one of these:
)
self.print_panel(error_content, "❌ Reasoning Error", "red")
# ----------- OBSERVATION EVENTS (Plan-and-Execute) -----------
def handle_observation_started(
self,
agent_role: str,
step_number: int,
step_description: str,
) -> None:
"""Handle step observation started event."""
if not self.verbose:
return
content = Text()
content.append("Observation Started\n", style="cyan bold")
content.append("Agent: ", style="white")
content.append(f"{agent_role}\n", style="cyan")
content.append("Step: ", style="white")
content.append(f"{step_number}\n", style="cyan")
if step_description:
desc_preview = step_description[:80] + (
"..." if len(step_description) > 80 else ""
)
content.append("Description: ", style="white")
content.append(f"{desc_preview}\n", style="cyan")
self.print_panel(content, "🔍 Observing Step Result", "cyan")
def handle_observation_completed(
self,
agent_role: str,
step_number: int,
step_completed: bool,
plan_valid: bool,
key_info: str,
needs_replan: bool,
goal_achieved: bool,
) -> None:
"""Handle step observation completed event."""
if not self.verbose:
return
if goal_achieved:
style = "green"
status = "Goal Achieved Early"
elif needs_replan:
style = "yellow"
status = "Replan Needed"
elif plan_valid:
style = "green"
status = "Plan Valid — Continue"
else:
style = "red"
status = "Step Failed"
content = Text()
content.append("Observation Complete\n", style=f"{style} bold")
content.append("Step: ", style="white")
content.append(f"{step_number}\n", style=style)
content.append("Status: ", style="white")
content.append(f"{status}\n", style=style)
if key_info:
info_preview = key_info[:120] + ("..." if len(key_info) > 120 else "")
content.append("Learned: ", style="white")
content.append(f"{info_preview}\n", style=style)
self.print_panel(content, "🔍 Observation Result", style)
def handle_observation_failed(
self,
step_number: int,
error: str,
) -> None:
"""Handle step observation failure event."""
if not self.verbose:
return
error_content = self.create_status_content(
"Observation Failed",
"Error",
"red",
Step=str(step_number),
Error=error,
)
self.print_panel(error_content, "❌ Observation Error", "red")
def handle_plan_refinement(
self,
step_number: int,
refined_count: int,
refinements: list[str] | None,
) -> None:
"""Handle plan refinement event."""
if not self.verbose:
return
content = Text()
content.append("Plan Refined\n", style="cyan bold")
content.append("After Step: ", style="white")
content.append(f"{step_number}\n", style="cyan")
content.append("Steps Updated: ", style="white")
content.append(f"{refined_count}\n", style="cyan")
if refinements:
for r in refinements[:3]:
content.append(f"{r[:80]}\n", style="white")
self.print_panel(content, "✏️ Plan Refinement", "cyan")
def handle_plan_replan(
self,
reason: str,
replan_count: int,
preserved_count: int,
) -> None:
"""Handle plan replan triggered event."""
if not self.verbose:
return
content = Text()
content.append("Full Replan Triggered\n", style="yellow bold")
content.append("Reason: ", style="white")
content.append(f"{reason}\n", style="yellow")
content.append("Replan #: ", style="white")
content.append(f"{replan_count}\n", style="yellow")
content.append("Preserved Steps: ", style="white")
content.append(f"{preserved_count}\n", style="yellow")
self.print_panel(content, "🔄 Dynamic Replan", "yellow")
def handle_goal_achieved_early(
self,
steps_completed: int,
steps_remaining: int,
) -> None:
"""Handle goal achieved early event."""
if not self.verbose:
return
content = Text()
content.append("Goal Achieved Early!\n", style="green bold")
content.append("Completed: ", style="white")
content.append(f"{steps_completed} steps\n", style="green")
content.append("Skipped: ", style="white")
content.append(f"{steps_remaining} remaining steps\n", style="green")
self.print_panel(content, "🎯 Early Goal Achievement", "green")
# ----------- AGENT LOGGING EVENTS -----------
def handle_agent_logs_started(

View File

@@ -27,6 +27,11 @@ from crewai.events.types.logging_events import (
AgentLogsExecutionEvent,
AgentLogsStartedEvent,
)
from crewai.events.types.observation_events import (
GoalAchievedEarlyEvent,
PlanRefinementEvent,
PlanReplanTriggeredEvent,
)
from crewai.events.types.tool_usage_events import (
ToolUsageErrorEvent,
ToolUsageFinishedEvent,
@@ -62,8 +67,14 @@ from crewai.utilities.agent_utils import (
)
from crewai.utilities.constants import TRAINING_DATA_FILE
from crewai.utilities.i18n import I18N, get_i18n
from crewai.utilities.planning_types import PlanStep, TodoItem, TodoList
from crewai.utilities.planning_types import (
PlanStep,
StepObservation,
TodoItem,
TodoList,
)
from crewai.utilities.printer import Printer
from crewai.utilities.step_execution_context import StepExecutionContext
from crewai.utilities.string_utils import sanitize_tool_name
from crewai.utilities.tool_utils import execute_tool_and_check_finality
from crewai.utilities.training_handler import CrewTrainingHandler
@@ -109,6 +120,14 @@ class AgentReActState(BaseModel):
last_replan_reason: str | None = Field(
default=None, description="Reason for the last replan, if any"
)
observations: dict[int, StepObservation] = Field(
default_factory=dict,
description="Planner's observation per step (keyed by step_number)",
)
execution_log: list[dict[str, Any]] = Field(
default_factory=list,
description="Audit trail for debugging (NOT used for LLM calls)",
)
class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
@@ -222,6 +241,11 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
)
self._state = AgentReActState()
# Plan-and-Execute components (Phase 2)
# Lazy-imported to avoid circular imports during module load
self._step_executor: Any = None
self._planner_observer: Any = None
def _ensure_flow_initialized(self) -> None:
"""Ensure Flow.__init__() has been called.
@@ -396,6 +420,331 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
self.state.todos = TodoList(items=todos)
# -------------------------------------------------------------------------
# Plan-and-Execute: Component Initialization
# -------------------------------------------------------------------------
def _ensure_step_executor(self) -> Any:
"""Lazily create the StepExecutor (avoids circular imports)."""
if self._step_executor is None:
from crewai.agents.step_executor import StepExecutor
self._step_executor = StepExecutor(
llm=self.llm,
tools=self.tools,
agent=self.agent,
original_tools=self.original_tools,
tools_handler=self.tools_handler,
task=self.task,
crew=self.crew,
function_calling_llm=self.function_calling_llm,
request_within_rpm_limit=self.request_within_rpm_limit,
callbacks=self.callbacks,
i18n=self._i18n,
)
return self._step_executor
def _ensure_planner_observer(self) -> Any:
"""Lazily create the PlannerObserver (avoids circular imports)."""
if self._planner_observer is None:
from crewai.agents.planner_observer import PlannerObserver
self._planner_observer = PlannerObserver(
agent=self.agent,
task=self.task,
)
return self._planner_observer
def _build_context_for_todo(self, todo: TodoItem) -> StepExecutionContext:
"""Build an isolated execution context for a single todo.
Passes only final results from completed dependencies — never
execution traces, tool calls, or LLM message history.
Args:
todo: The todo item to build context for.
Returns:
Immutable StepExecutionContext with dependency results.
"""
dependency_results: dict[int, str] = {}
for dep_num in todo.depends_on:
dep_todo = self.state.todos.get_by_step_number(dep_num)
if dep_todo and dep_todo.result:
dependency_results[dep_num] = dep_todo.result
task_description = ""
task_goal = ""
if self.task:
task_description = self.task.description or ""
task_goal = self.task.expected_output or ""
else:
task_description = getattr(self, "_kickoff_input", "")
task_goal = "Complete the task successfully"
return StepExecutionContext(
task_description=task_description,
task_goal=task_goal,
dependency_results=dependency_results,
)
# -------------------------------------------------------------------------
# Plan-and-Execute: New Observation-Driven Flow Methods
# -------------------------------------------------------------------------
@listen("step_executed")
def observe_step_result(self) -> Literal["step_observed"]:
"""THE OBSERVATION STEP — runs after EVERY step execution.
This is the Planner's opportunity to incorporate new information
learned during execution. It is NOT an error handler — it runs on
every step, including successes.
Based on PLAN-AND-ACT Section 3.3.
"""
current_todo = self.state.todos.current_todo
if not current_todo:
return "step_observed"
observer = self._ensure_planner_observer()
all_completed = self.state.todos.get_completed_todos()
remaining = self.state.todos.get_pending_todos()
observation = observer.observe(
completed_step=current_todo,
result=current_todo.result or "",
all_completed=all_completed,
remaining_todos=remaining,
)
self.state.observations[current_todo.step_number] = observation
# Log observation for debugging
self.state.execution_log.append(
{
"type": "observation",
"step_number": current_todo.step_number,
"step_completed_successfully": observation.step_completed_successfully,
"key_information_learned": observation.key_information_learned,
"remaining_plan_still_valid": observation.remaining_plan_still_valid,
"needs_full_replan": observation.needs_full_replan,
"goal_already_achieved": observation.goal_already_achieved,
}
)
if self.agent.verbose:
self._printer.print(
content=(
f"[Observe] Step {current_todo.step_number}: "
f"success={observation.step_completed_successfully}, "
f"plan_valid={observation.remaining_plan_still_valid}, "
f"learned={observation.key_information_learned[:80]}..."
),
color="cyan",
)
return "step_observed"
@router("step_observed")
def decide_next_action(
self,
) -> Literal[
"goal_achieved",
"replan_now",
"refine_and_continue",
"continue_plan",
]:
"""Route based on the Planner's observation.
This replaces the old reactive _should_replan() heuristics with
proactive, LLM-driven decisions.
"""
current_todo = self.state.todos.current_todo
if not current_todo:
return "continue_plan"
observation = self.state.observations.get(current_todo.step_number)
if not observation:
# No observation available — default to continue
self.state.todos.mark_completed(current_todo.step_number)
return "continue_plan"
# Goal already achieved — early termination
if observation.goal_already_achieved:
self.state.todos.mark_completed(
current_todo.step_number, result=current_todo.result
)
if self.agent.verbose:
self._printer.print(
content="[Decide] Goal achieved early — finalizing",
color="green",
)
return "goal_achieved"
# Full replan needed
if observation.needs_full_replan:
if self.agent.verbose:
self._printer.print(
content=f"[Decide] Full replan needed: {observation.replan_reason}",
color="yellow",
)
self.state.last_replan_reason = observation.replan_reason
return "replan_now"
# Step failed — also trigger replan
if not observation.step_completed_successfully:
if self.agent.verbose:
self._printer.print(
content="[Decide] Step failed — triggering replan",
color="yellow",
)
self.state.last_replan_reason = "Step did not complete successfully"
return "replan_now"
# Plan still valid but needs refinement
if observation.remaining_plan_still_valid and observation.suggested_refinements:
self.state.todos.mark_completed(
current_todo.step_number, result=current_todo.result
)
if self.agent.verbose:
self._printer.print(
content="[Decide] Plan valid but refining upcoming steps",
color="cyan",
)
return "refine_and_continue"
# Plan still valid, no refinements needed — just continue
self.state.todos.mark_completed(
current_todo.step_number, result=current_todo.result
)
if self.agent.verbose:
completed = self.state.todos.completed_count
total = len(self.state.todos.items)
self._printer.print(
content=f"[Decide] Continue plan ({completed}/{total} done)",
color="green",
)
return "continue_plan"
@listen("refine_and_continue")
def handle_refine_and_continue(self) -> Literal["has_todos"]:
"""Lightweight plan refinement — update pending todo descriptions.
The Planner sharpens upcoming step descriptions based on what was
learned, without regenerating the entire plan.
"""
# Find the most recent observation with refinements
recent_observation: StepObservation | None = None
last_step: int = 0
if self.state.observations:
last_step = max(self.state.observations.keys())
recent_observation = self.state.observations[last_step]
if recent_observation and recent_observation.suggested_refinements:
observer = self._ensure_planner_observer()
remaining = self.state.todos.get_pending_todos()
observer.refine_todos(recent_observation, remaining)
# Emit refinement event
crewai_event_bus.emit(
self.agent,
event=PlanRefinementEvent(
agent_role=self.agent.role,
step_number=last_step,
step_description="",
refined_step_count=len(remaining),
refinements=recent_observation.suggested_refinements,
from_task=self.task,
from_agent=self.agent,
),
)
if self.agent.verbose:
self._printer.print(
content=f"[Refine] Updated {len(remaining)} pending step(s)",
color="cyan",
)
return "has_todos"
@listen("continue_plan")
def handle_continue_plan(self) -> Literal["has_todos", "all_todos_complete"]:
"""Continue to the next todo after a successful step."""
if self.state.todos.is_complete:
return "all_todos_complete"
return "has_todos"
@listen("goal_achieved")
def handle_goal_achieved(self) -> Literal["all_todos_complete"]:
"""Handle early goal achievement — skip remaining todos."""
completed = self.state.todos.get_completed_todos()
remaining = self.state.todos.get_pending_todos()
# Emit goal achieved early event
crewai_event_bus.emit(
self.agent,
event=GoalAchievedEarlyEvent(
agent_role=self.agent.role,
step_number=completed[-1].step_number if completed else 0,
step_description="",
steps_completed=len(completed),
steps_remaining=len(remaining),
from_task=self.task,
from_agent=self.agent,
),
)
if self.agent.verbose:
self._printer.print(
content="Goal achieved early — skipping remaining steps",
color="green",
)
return "all_todos_complete"
@listen("replan_now")
def handle_replan_now(
self,
) -> Literal["has_todos", "all_todos_complete"]:
"""Handle full replanning — regenerate the remaining plan.
Preserves completed todo results and replaces only pending steps.
"""
max_replans = 3
self.state.replan_count += 1
if self.state.replan_count > max_replans:
if self.agent.verbose:
self._printer.print(
content=f"Max replans ({max_replans}) reached — finalizing with current results",
color="yellow",
)
return "all_todos_complete"
reason = self.state.last_replan_reason or "Dynamic replan triggered"
completed = self.state.todos.get_completed_todos()
# Emit replan triggered event
crewai_event_bus.emit(
self.agent,
event=PlanReplanTriggeredEvent(
agent_role=self.agent.role,
step_number=completed[-1].step_number if completed else 0,
step_description="",
replan_reason=reason,
replan_count=self.state.replan_count,
completed_steps_preserved=len(completed),
from_task=self.task,
from_agent=self.agent,
),
)
self._trigger_replan(reason)
if self.state.todos.get_pending_todos():
return "has_todos"
return "all_todos_complete"
# -------------------------------------------------------------------------
# Todo-Driven Execution Flow
# -------------------------------------------------------------------------
@@ -460,28 +809,73 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
return "multiple_todos_ready"
@router("single_todo_ready")
def execute_todo_sequential(self) -> Literal["todo_injected"]:
"""Prepare to execute a single todo by injecting its context.
def execute_todo_sequential(
self,
) -> Literal["step_executed", "todo_injected"]:
"""Execute a single todo using StepExecutor (Plan-and-Execute mode)
or fall back to the old ReAct injection (legacy mode).
Adds a focused prompt for the current todo to the conversation,
guiding the agent to complete this specific step.
In Plan-and-Execute mode: executes the step in isolation via
StepExecutor, stores the result, and routes to the observation step.
In legacy mode: injects context into the shared message list and
routes to the ReAct loop.
"""
current = self.state.todos.current_todo
if not current:
return "todo_injected" # Fall through to legacy
# DEBUG: Trace starting todo execution
if self.agent.verbose:
self._printer.print(
content=f"[DEBUG] execute_todo_sequential: starting todo {current.step_number if current else None}",
color="cyan",
)
if current:
# Plan-and-Execute path: use StepExecutor for isolated execution
if getattr(self.agent, "planning_enabled", False):
if self.agent.verbose:
self._printer.print(
content=f"[DEBUG] Description: {current.description[:60]}...",
content=(
f"[Execute] Step {current.step_number}: "
f"{current.description[:60]}..."
),
color="cyan",
)
if current:
self._inject_todo_context(current)
step_executor = self._ensure_step_executor()
context = self._build_context_for_todo(current)
result = step_executor.execute(current, context)
# Store result on the todo (do NOT mark completed — observation decides)
current.result = result.result
# Log to audit trail
self.state.execution_log.append(
{
"type": "step_execution",
"step_number": current.step_number,
"success": result.success,
"result_preview": result.result[:200] if result.result else "",
"error": result.error,
"tool_calls": result.tool_calls_made,
"execution_time": result.execution_time,
}
)
if self.agent.verbose:
status = "success" if result.success else "failed"
self._printer.print(
content=(
f"[Execute] Step {current.step_number} {status} "
f"({result.execution_time:.1f}s, "
f"{len(result.tool_calls_made)} tool calls)"
),
color="green" if result.success else "red",
)
return "step_executed"
# Legacy path: inject context into shared messages for ReAct loop
if self.agent.verbose:
self._printer.print(
content=f"[DEBUG] execute_todo_sequential (legacy): starting todo {current.step_number}",
color="cyan",
)
self._inject_todo_context(current)
return "todo_injected"
def _inject_todo_context(self, todo: TodoItem) -> None:
@@ -490,18 +884,23 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
Args:
todo: The todo item to inject context for.
"""
prompt = self._build_todo_prompt(todo)
# Build focused task prompt. Context from previous steps is already
# in self.state.messages as SYSTEM messages (added by _mark_todo_as_completed)
prompt = self._build_todo_prompt(todo, include_dependencies=False)
todo_message: LLMMessage = {
"role": "user",
"content": prompt,
}
self.state.messages.append(todo_message)
def _build_todo_prompt(self, todo: TodoItem) -> str:
def _build_todo_prompt(
self, todo: TodoItem, include_dependencies: bool = True
) -> str:
"""Build a focused prompt for executing a single todo.
Args:
todo: The todo item to build a prompt for.
include_dependencies: Whether to include dependency results in this prompt.
Returns:
A prompt string focused on this specific step.
@@ -513,19 +912,13 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
if todo.tool_to_use:
parts.append(f"Suggested tool: {todo.tool_to_use}")
# Include results from completed dependencies
if todo.depends_on:
# Include results from completed dependencies if requested (used for parallel execution)
if include_dependencies and todo.depends_on:
dep_results = []
for dep_num in todo.depends_on:
dep = self.state.todos.get_by_step_number(dep_num)
if dep and dep.result:
# Truncate long results
result_preview = (
dep.result[:500] + "..."
if len(dep.result) > 500
else dep.result
)
dep_results.append(f"Step {dep_num} result: {result_preview}")
dep_results.append(f"Step {dep_num} result: {dep.result}")
if dep_results:
parts.append("\nContext from previous steps:")
parts.extend(dep_results)
@@ -561,12 +954,7 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
color="red",
)
else:
self.state.todos.mark_completed(todo.step_number, result=str(result))
if self.agent.verbose:
self._printer.print(
content=f"Todo {todo.step_number} completed",
color="green",
)
self._mark_todo_as_completed(todo.step_number, str(result))
return "parallel_todos_complete"
@@ -580,12 +968,29 @@ class AgentExecutor(Flow[AgentReActState], CrewAgentExecutorMixin):
The result of executing the todo.
"""
# Build messages for this specific todo
todo_prompt = self._build_todo_prompt(todo)
messages: list[LLMMessage] = [
{"role": "system", "content": self._get_todo_system_prompt()},
{"role": "user", "content": todo_prompt},
]
# Inject context into messages for parallel execution (since history is empty)
if todo.depends_on:
dep_results = []
for dep_num in todo.depends_on:
dep = self.state.todos.get_by_step_number(dep_num)
if dep and dep.result:
dep_results.append(f"Step {dep_num} result: {dep.result}")
if dep_results:
messages.append(
{
"role": "system",
"content": "Context from previous steps:\n"
+ "\n".join(dep_results),
}
)
todo_prompt = self._build_todo_prompt(todo, include_dependencies=False)
messages.append({"role": "user", "content": todo_prompt})
# If the todo specifies a tool and we have native tool support
if todo.tool_to_use and self.state.use_native_tools:
try:
@@ -1415,22 +1820,49 @@ provide clear results that can be used by subsequent steps."""
or last_msg.get("role") == "assistant"
):
result = str(last_msg.get("content", ""))
elif not self.state.current_answer and self.state.messages:
# For native tools, results are in the message history as 'tool' roles
# We take the content of the most recent tool results
tool_results = []
for msg in reversed(self.state.messages):
if msg.get("role") == "tool":
tool_results.insert(0, str(msg.get("content", "")))
elif msg.get("role") == "assistant" and msg.get("tool_calls"):
# Once we hit the assistant message that triggered the tools, we stop
break
result = "\n".join(tool_results)
self.state.todos.mark_completed(current_todo.step_number, result=result)
self._mark_todo_as_completed(current_todo.step_number, result)
return "todo_marked"
def _mark_todo_as_completed(self, step_number: int, result: str) -> None:
"""Helper to mark a todo as completed and update history.
Args:
step_number: The step number to mark.
result: The result of the todo.
"""
self.state.todos.mark_completed(step_number, result=result)
if self.agent.verbose:
completed = self.state.todos.completed_count
total = len(self.state.todos.items)
self._printer.print(
content=f"✓ Todo {current_todo.step_number} completed ({completed}/{total})",
content=f"✓ Todo {step_number} completed ({completed}/{total})",
color="green",
)
self._printer.print(
content=f"[DEBUG] Marked todo {current_todo.step_number} as completed, result_len={len(result)}",
content=f"[DEBUG] Marked todo {step_number} as completed, result_len={len(result)}",
color="cyan",
)
return "todo_marked"
# Add to history as a SYSTEM message for subsequent steps
if result:
self._append_message_to_state(
f"**Step {step_number} result:**\n\n{result}",
role="system",
)
@router(mark_todo_complete)
def check_more_todos(
@@ -1500,22 +1932,28 @@ provide clear results that can be used by subsequent steps."""
"""Finalize execution and emit completion logs.
If todos were used, synthesizes a final answer from all todo results.
Handles both the legacy ReAct path (current_answer already set) and
the Plan-and-Execute path (synthesize from completed todos).
"""
# DEBUG: Trace finalize being called
if self.agent.verbose:
self._printer.print(
content=f"[DEBUG] finalize called! todos_count={len(self.state.todos.items)}, todos_complete={self.state.todos.is_complete}",
content=f"[Finalize] todos_count={len(self.state.todos.items)}, todos_with_results={sum(1 for t in self.state.todos.items if t.result)}",
color="magenta",
)
if self.state.todos.items:
for todo in self.state.todos.items:
self._printer.print(
content=f"[DEBUG] Todo {todo.step_number}: status={todo.status}, desc={todo.description[:40]}...",
color="magenta",
)
# If we have completed todos, synthesize the final answer
if self.state.todos.items and self.state.todos.is_complete:
# Plan-and-Execute path: synthesize from completed todos
# Check for todos with results (even if not all marked "completed" —
# the goal_achieved path may skip marking some as completed)
todos_with_results = [t for t in self.state.todos.items if t.result]
if todos_with_results and self.state.current_answer is None:
self._synthesize_final_answer_from_todos()
# Legacy path: synthesize if todos are all formally complete
if (
self.state.todos.items
and self.state.todos.is_complete
and self.state.current_answer is None
):
self._synthesize_final_answer_from_todos()
if self.state.current_answer is None:
@@ -1552,7 +1990,7 @@ provide clear results that can be used by subsequent steps."""
results: list[str] = []
for todo in self.state.todos.items:
if todo.result:
results.append(f"**Step {todo.step_number}**: {todo.description}")
results.append(f"**Step {todo.step_number} result:**")
results.append(todo.result)
results.append("") # Empty line for spacing
@@ -1703,14 +2141,9 @@ provide clear results that can be used by subsequent steps."""
if completed:
context_parts.append("Successfully completed steps:")
for todo in completed:
result_preview = (
todo.result[:200] + "..."
if todo.result and len(todo.result) > 200
else todo.result
)
context_parts.append(f" - Step {todo.step_number}: {todo.description}")
if result_preview:
context_parts.append(f" Result: {result_preview}")
if todo.result:
context_parts.append(f" Result: {todo.result}")
# Summarize failed todos
failed = [
@@ -1858,6 +2291,8 @@ Consider:
self.state.todos = TodoList()
self.state.replan_count = 0
self.state.last_replan_reason = None
self.state.observations = {}
self.state.execution_log = []
self._kickoff_input = inputs.get("input", "")
@@ -1949,6 +2384,8 @@ Consider:
self.state.todos = TodoList()
self.state.replan_count = 0
self.state.last_replan_reason = None
self.state.observations = {}
self.state.execution_log = []
self._kickoff_input = inputs.get("input", "")

View File

@@ -144,3 +144,86 @@ class TodoList(BaseModel):
def running_count(self) -> int:
"""Count of currently running todos."""
return sum(1 for item in self.items if item.status == "running")
def get_completed_todos(self) -> list[TodoItem]:
"""Get all completed todos.
Returns:
List of completed TodoItem objects.
"""
return [item for item in self.items if item.status == "completed"]
def get_pending_todos(self) -> list[TodoItem]:
"""Get all pending todos.
Returns:
List of pending TodoItem objects.
"""
return [item for item in self.items if item.status == "pending"]
def replace_pending_todos(self, new_items: list[TodoItem]) -> None:
"""Replace all pending todos with new items.
Preserves completed and running todos, replaces only pending ones.
Used during replanning to swap in a new plan for remaining work.
Args:
new_items: The new todo items to replace pending ones.
"""
non_pending = [item for item in self.items if item.status != "pending"]
self.items = non_pending + new_items
class StepObservation(BaseModel):
"""Planner's observation after a step execution completes.
Returned by the PlannerObserver after EVERY step — not just failures.
The Planner uses this to decide whether to continue, refine, or replan.
Based on PLAN-AND-ACT (Section 3.3): the Planner observes what the Executor
did and incorporates new information into the remaining plan.
Attributes:
step_completed_successfully: Whether the step achieved its objective.
key_information_learned: New information revealed by this step
(e.g., "Found 3 products: A, B, C"). Used to refine upcoming steps.
remaining_plan_still_valid: Whether pending todos still make sense
given the new information. True does NOT mean no refinement needed.
suggested_refinements: Minor tweaks to upcoming step descriptions.
These are lightweight in-place updates, not a full replan.
Example: ["Step 3 should select product B instead of 'best product'"]
needs_full_replan: The remaining plan is fundamentally wrong and must
be regenerated from scratch. Mutually exclusive with
remaining_plan_still_valid (if this is True, that should be False).
replan_reason: Explanation of why a full replan is needed (None if not).
goal_already_achieved: The overall task goal has been satisfied early.
No more steps needed — skip remaining todos and finalize.
"""
step_completed_successfully: bool = Field(
description="Whether the step achieved what it was asked to do"
)
key_information_learned: str = Field(
default="",
description="What new information this step revealed",
)
remaining_plan_still_valid: bool = Field(
default=True,
description="Whether the remaining pending todos still make sense given new information",
)
suggested_refinements: list[str] | None = Field(
default=None,
description="Minor tweaks to descriptions of upcoming steps (lightweight, no full replan)",
)
needs_full_replan: bool = Field(
default=False,
description="The remaining plan is fundamentally wrong and must be regenerated",
)
replan_reason: str | None = Field(
default=None,
description="Explanation of why a full replan is needed",
)
goal_already_achieved: bool = Field(
default=False,
description="The overall task goal has been satisfied early; no more steps needed",
)

View File

@@ -0,0 +1,64 @@
"""Context and result types for isolated step execution in Plan-and-Execute architecture.
These types mediate between the AgentExecutor (orchestrator) and StepExecutor (per-step worker).
StepExecutionContext carries only final results from dependencies — never LLM message histories.
StepResult carries only the outcome of a step — never internal execution traces.
"""
from __future__ import annotations
from dataclasses import dataclass, field
@dataclass(frozen=True)
class StepExecutionContext:
"""Immutable context passed to a StepExecutor for a single todo.
Contains only the information the Executor needs to complete one step:
the task description, goal, and final results from dependency steps.
No LLM message history, no execution traces, no shared mutable state.
Attributes:
task_description: The original task description (from Task or kickoff input).
task_goal: The expected output / goal of the overall task.
dependency_results: Mapping of step_number → final result string
for all completed dependencies of the current step.
"""
task_description: str
task_goal: str
dependency_results: dict[int, str] = field(default_factory=dict)
def get_dependency_result(self, step_number: int) -> str | None:
"""Get the final result of a dependency step.
Args:
step_number: The step number to look up.
Returns:
The result string if available, None otherwise.
"""
return self.dependency_results.get(step_number)
@dataclass
class StepResult:
"""Result returned by a StepExecutor after executing a single todo.
Contains the final outcome and metadata for debugging/metrics.
Tool call details are for audit logging only — they are NOT passed
to subsequent steps or the Planner.
Attributes:
success: Whether the step completed successfully.
result: The final output string from the step.
error: Error message if the step failed (None on success).
tool_calls_made: List of tool names invoked (for debugging/logging only).
execution_time: Wall-clock time in seconds for the step execution.
"""
success: bool
result: str
error: str | None = None
tool_calls_made: list[str] = field(default_factory=list)
execution_time: float = 0.0

View File

@@ -4,18 +4,25 @@ interactions:
Create minimal, effective execution plans. Prefer fewer steps over more."},{"role":"user","content":"Create
a focused execution plan for the following task:\n\n## Task\nWhat is 2 + 2?\n\n##
Expected Output\nComplete the task successfully\n\n## Available Tools\nNo tools
available\n\n## Instructions\nCreate ONLY the essential steps needed to complete
this task. Use the MINIMUM number of steps required - do NOT pad your plan with
unnecessary steps. Most tasks need only 2-5 steps.\n\nFor each step:\n- State
the specific action to take\n- Specify which tool to use (if any)\n\nDo NOT
include:\n- Setup or preparation steps that are obvious\n- Verification steps
unless critical\n- Documentation or cleanup steps unless explicitly required\n-
Generic steps like \"review results\" or \"finalize output\"\n\nAfter your plan,
state:\n- \"READY: I am ready to execute the task.\" if the plan is complete\n-
\"NOT READY: I need to refine my plan because [reason].\" if you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The
detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether
the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}'
available\n\n## Planning Principles\nFocus on WHAT needs to be accomplished,
not HOW. Group related actions into logical units. Fewer steps = better. Most
tasks need 3-6 steps. Hard limit: 20 steps.\n\n## Step Types (only these are
valid):\n1. **Tool Step**: Uses a tool to gather information or take action\n2.
**Output Step**: Synthesizes prior results into the final deliverable (usually
the last step)\n\n## Rules:\n- Each step must either USE A TOOL or PRODUCE THE
FINAL OUTPUT\n- Combine related tool calls: \"Research A, B, and C\" = ONE step,
not three\n- Combine all synthesis into ONE final output step\n- NO standalone
\"thinking\" steps (review, verify, confirm, refine, analyze) - these happen
naturally between steps\n\nFor each step: State the action, specify the tool
(if any), and note dependencies.\n\nAfter your plan, state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A
brief summary of the overall plan."},"steps":{"type":"array","description":"List
of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step
number (1-based)"},"description":{"type":"string","description":"What to do
in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to
use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step
numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether
the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}'
headers:
User-Agent:
- X-USER-AGENT-XXX
@@ -28,7 +35,7 @@ interactions:
connection:
- keep-alive
content-length:
- '1541'
- '2315'
content-type:
- application/json
host:
@@ -55,20 +62,24 @@ interactions:
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D4yTTAh68P65LybtqkwNI3p2HXcRv\",\n \"object\":
\"chat.completion\",\n \"created\": 1770078147,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
string: "{\n \"id\": \"chatcmpl-D62FIyv2pfC7qKbZVvmJNjOVfge1F\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330972,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"## Execution Plan\\n\\n1. **Action:**
Perform the addition operation. \\n **Tool:** None (manually calculate).\\n\\n2.
**Action:** State the result. \\n **Tool:** None (manually output).\\n\\nREADY:
I am ready to execute the task.\",\n \"refusal\": null,\n \"annotations\":
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 281,\n \"completion_tokens\":
56,\n \"total_tokens\": 337,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
\"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n
\ \"id\": \"call_xEDChlUntYR0aSxQhkobswea\",\n \"type\":
\"function\",\n \"function\": {\n \"name\": \"create_reasoning_plan\",\n
\ \"arguments\": \"{\\\"plan\\\":\\\"Calculate the sum of 2 +
2 and provide the result as the final output.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Calculate
the sum of 2 + 2\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Provide
the result as final output\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]}],\\\"ready\\\":true}\"\n
\ }\n }\n ],\n \"refusal\": null,\n \"annotations\":
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"tool_calls\"\n
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 440,\n \"completion_tokens\":
92,\n \"total_tokens\": 532,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
@@ -77,7 +88,7 @@ interactions:
Content-Type:
- application/json
Date:
- Tue, 03 Feb 2026 00:22:28 GMT
- Thu, 05 Feb 2026 22:36:13 GMT
Server:
- cloudflare
Set-Cookie:
@@ -97,7 +108,7 @@ interactions:
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '1165'
- '1670'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
@@ -123,9 +134,13 @@ interactions:
message: OK
- request:
body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
assistant that solves math problems step by step\nYour personal goal is: Help
solve simple math problems"},{"role":"user","content":"\nCurrent Task: What
is 2 + 2?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
assistant that solves math problems step by step\n\nYour goal: Help solve simple
math problems\n\nYou are executing a specific step in a multi-step plan. Focus
ONLY on completing\nthe current step. Do not plan ahead or worry about future
steps.\n\nBefore acting, briefly reason about what you need to do and which
approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
Current Step\nCalculate the sum of 2 + 2\n\nComplete this step and provide your
result."}],"model":"gpt-4o-mini"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
@@ -138,7 +153,7 @@ interactions:
connection:
- keep-alive
content-length:
- '299'
- '597'
content-type:
- application/json
cookie:
@@ -167,20 +182,18 @@ interactions:
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D4yTVB9mdtq1YZrUVf1aSb6dVVQ8G\",\n \"object\":
\"chat.completion\",\n \"created\": 1770078149,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
string: "{\n \"id\": \"chatcmpl-D62FJ4ZEkHWSBMZA8bDbMqd7upzwY\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330973,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"To solve the problem of 2 + 2, we simply
perform the addition:\\n\\n1. Start with the first number: 2\\n2. Add the
second number: + 2\\n3. Combine the two: 2 + 2 = 4\\n\\nTherefore, the answer
is 4.\",\n \"refusal\": null,\n \"annotations\": []\n },\n
\ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n
\ \"usage\": {\n \"prompt_tokens\": 54,\n \"completion_tokens\": 62,\n
\ \"total_tokens\": 116,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
\"assistant\",\n \"content\": \"To calculate the sum of 2 + 2, I simply
add the two numbers together.\\n\\n2 + 2 = 4\\n\\nThe result is 4.\",\n \"refusal\":
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
115,\n \"completion_tokens\": 33,\n \"total_tokens\": 148,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
@@ -189,7 +202,7 @@ interactions:
Content-Type:
- application/json
Date:
- Tue, 03 Feb 2026 00:22:30 GMT
- Thu, 05 Feb 2026 22:36:14 GMT
Server:
- cloudflare
Strict-Transport-Security:
@@ -207,7 +220,155 @@ interactions:
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '1300'
- '614'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
- request:
body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
observing execution progress. After each step completes, you analyze what happened
and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
about:\\n1. What new information was learned from this step's result\\n2. Whether
the remaining steps still make sense given this new information\\n3. What refinements,
if any, are needed for upcoming steps\\n4. Whether the overall goal has already
been achieved\\n\\nBe conservative about triggering full replans \u2014 only
do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription:
Calculate the sum of 2 + 2\\nResult: To calculate the sum of 2 + 2, I simply
add the two numbers together.\\n\\n2 + 2 = 4\\n\\nThe result is 4.\\n\\n## Remaining
plan steps:\\n Step 2: Provide the result as final output\\n\\nAnalyze this
step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
observation after a step execution completes.\\n\\nReturned by the PlannerObserver
after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
3.3): the Planner observes what the Executor\\ndid and incorporates new information
into the remaining plan.\\n\\nAttributes:\\n step_completed_successfully:
Whether the step achieved its objective.\\n key_information_learned: New
information revealed by this step\\n (e.g., \\\"Found 3 products: A,
B, C\\\"). Used to refine upcoming steps.\\n remaining_plan_still_valid:
Whether pending todos still make sense\\n given the new information.
True does NOT mean no refinement needed.\\n suggested_refinements: Minor
tweaks to upcoming step descriptions.\\n These are lightweight in-place
updates, not a full replan.\\n Example: [\\\"Step 3 should select product
B instead of 'best product'\\\"]\\n needs_full_replan: The remaining plan
is fundamentally wrong and must\\n be regenerated from scratch. Mutually
exclusive with\\n remaining_plan_still_valid (if this is True, that should
be False).\\n replan_reason: Explanation of why a full replan is needed (None
if not).\\n goal_already_achieved: The overall task goal has been satisfied
early.\\n No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
the remaining pending todos still make sense given new information\",\"title\":\"Remaining
Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '4024'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-helper-method:
- beta.chat.completions.parse
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D62FKmJpd8tlJ6Y3OChUQsoz2o5ps\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330974,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The
calculation for 2 + 2 is 4.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":true}\",\n
\ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\":
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
789,\n \"completion_tokens\": 64,\n \"total_tokens\": 853,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 05 Feb 2026 22:36:15 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '1181'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:

View File

@@ -42,17 +42,17 @@ interactions:
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D4yTTFxQ75llVmJv0ee902FIjXE8p\",\n \"object\":
\"chat.completion\",\n \"created\": 1770078147,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
string: "{\n \"id\": \"chatcmpl-D62FTKj39Y02oqJmQxpmC8sz2piEl\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330983,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"3 + 3 equals 6.\",\n \"refusal\":
\"assistant\",\n \"content\": \"The sum of 3 + 3 is 6.\",\n \"refusal\":
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
47,\n \"completion_tokens\": 8,\n \"total_tokens\": 55,\n \"prompt_tokens_details\":
47,\n \"completion_tokens\": 12,\n \"total_tokens\": 59,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
@@ -61,7 +61,7 @@ interactions:
Content-Type:
- application/json
Date:
- Tue, 03 Feb 2026 00:22:27 GMT
- Thu, 05 Feb 2026 22:36:23 GMT
Server:
- cloudflare
Set-Cookie:
@@ -81,7 +81,113 @@ interactions:
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '401'
- '361'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
- request:
body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
assistant\nYour personal goal is: Help solve simple math problems"},{"role":"user","content":"\nCurrent
Task: What is 3 + 3?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '260'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D62FT7ELyytoJFkmjOtWysQA2Bfvy\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330983,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"The sum of 3 + 3 is 6.\",\n \"refusal\":
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
47,\n \"completion_tokens\": 12,\n \"total_tokens\": 59,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 05 Feb 2026 22:36:23 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '362'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:

View File

@@ -4,18 +4,25 @@ interactions:
Create minimal, effective execution plans. Prefer fewer steps over more."},{"role":"user","content":"Create
a focused execution plan for the following task:\n\n## Task\nWhat is 7 + 7?\n\n##
Expected Output\nComplete the task successfully\n\n## Available Tools\nNo tools
available\n\n## Instructions\nCreate ONLY the essential steps needed to complete
this task. Use the MINIMUM number of steps required - do NOT pad your plan with
unnecessary steps. Most tasks need only 2-5 steps.\n\nFor each step:\n- State
the specific action to take\n- Specify which tool to use (if any)\n\nDo NOT
include:\n- Setup or preparation steps that are obvious\n- Verification steps
unless critical\n- Documentation or cleanup steps unless explicitly required\n-
Generic steps like \"review results\" or \"finalize output\"\n\nAfter your plan,
state:\n- \"READY: I am ready to execute the task.\" if the plan is complete\n-
\"NOT READY: I need to refine my plan because [reason].\" if you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The
detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether
the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}'
available\n\n## Planning Principles\nFocus on WHAT needs to be accomplished,
not HOW. Group related actions into logical units. Fewer steps = better. Most
tasks need 3-6 steps. Hard limit: 20 steps.\n\n## Step Types (only these are
valid):\n1. **Tool Step**: Uses a tool to gather information or take action\n2.
**Output Step**: Synthesizes prior results into the final deliverable (usually
the last step)\n\n## Rules:\n- Each step must either USE A TOOL or PRODUCE THE
FINAL OUTPUT\n- Combine related tool calls: \"Research A, B, and C\" = ONE step,
not three\n- Combine all synthesis into ONE final output step\n- NO standalone
\"thinking\" steps (review, verify, confirm, refine, analyze) - these happen
naturally between steps\n\nFor each step: State the action, specify the tool
(if any), and note dependencies.\n\nAfter your plan, state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A
brief summary of the overall plan."},"steps":{"type":"array","description":"List
of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step
number (1-based)"},"description":{"type":"string","description":"What to do
in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to
use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step
numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether
the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}'
headers:
User-Agent:
- X-USER-AGENT-XXX
@@ -28,7 +35,7 @@ interactions:
connection:
- keep-alive
content-length:
- '1541'
- '2315'
content-type:
- application/json
host:
@@ -55,18 +62,24 @@ interactions:
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D4yTdqlxwWowSdLncBERFrCgxTvVj\",\n \"object\":
\"chat.completion\",\n \"created\": 1770078157,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
string: "{\n \"id\": \"chatcmpl-D62FN5xLKcEfF0ISjfbnezYLsZtma\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330977,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"## Execution Plan\\n\\n1. Calculate
the sum of 7 and 7.\\n \\nREADY: I am ready to execute the task.\",\n \"refusal\":
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
281,\n \"completion_tokens\": 28,\n \"total_tokens\": 309,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
\"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n
\ \"id\": \"call_rSNKBB5w6x6IXkm0fm2GN1hI\",\n \"type\":
\"function\",\n \"function\": {\n \"name\": \"create_reasoning_plan\",\n
\ \"arguments\": \"{\\\"plan\\\":\\\"Calculate the sum of 7 +
7 and provide the result.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Calculate
the sum of 7 + 7.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Provide
the final output of the calculation.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]}],\\\"ready\\\":true}\"\n
\ }\n }\n ],\n \"refusal\": null,\n \"annotations\":
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"tool_calls\"\n
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 440,\n \"completion_tokens\":
89,\n \"total_tokens\": 529,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
@@ -75,7 +88,7 @@ interactions:
Content-Type:
- application/json
Date:
- Tue, 03 Feb 2026 00:22:38 GMT
- Thu, 05 Feb 2026 22:36:18 GMT
Server:
- cloudflare
Set-Cookie:
@@ -95,7 +108,7 @@ interactions:
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '709'
- '1700'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
@@ -121,9 +134,13 @@ interactions:
message: OK
- request:
body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
assistant that solves math problems step by step\nYour personal goal is: Help
solve simple math problems"},{"role":"user","content":"\nCurrent Task: What
is 7 + 7?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
assistant that solves math problems step by step\n\nYour goal: Help solve simple
math problems\n\nYou are executing a specific step in a multi-step plan. Focus
ONLY on completing\nthe current step. Do not plan ahead or worry about future
steps.\n\nBefore acting, briefly reason about what you need to do and which
approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
Current Step\nCalculate the sum of 7 + 7.\n\nComplete this step and provide
your result."}],"model":"gpt-4o-mini"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
@@ -136,7 +153,7 @@ interactions:
connection:
- keep-alive
content-length:
- '299'
- '598'
content-type:
- application/json
cookie:
@@ -165,18 +182,19 @@ interactions:
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D4yTeB6Miecallw9SjSfLAXPjX2XD\",\n \"object\":
\"chat.completion\",\n \"created\": 1770078158,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
string: "{\n \"id\": \"chatcmpl-D62FOVRLtzvZr17sXJ05O6NTxw1rI\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330978,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"To find the sum of 7 and 7, you simply
add the two numbers together:\\n\\n7 + 7 = 14\\n\\nSo, the answer is 14.\",\n
\ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\":
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
54,\n \"completion_tokens\": 35,\n \"total_tokens\": 89,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
\"assistant\",\n \"content\": \"To calculate the sum of 7 + 7, I need
to simply add the two numbers together. \\n\\n7 + 7 = 14.\\n\\nSo, the result
is 14.\",\n \"refusal\": null,\n \"annotations\": []\n },\n
\ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n
\ \"usage\": {\n \"prompt_tokens\": 115,\n \"completion_tokens\": 38,\n
\ \"total_tokens\": 153,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
@@ -185,7 +203,7 @@ interactions:
Content-Type:
- application/json
Date:
- Tue, 03 Feb 2026 00:22:38 GMT
- Thu, 05 Feb 2026 22:36:19 GMT
Server:
- cloudflare
Strict-Transport-Security:
@@ -203,7 +221,418 @@ interactions:
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '733'
- '868'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
- request:
body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
observing execution progress. After each step completes, you analyze what happened
and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
about:\\n1. What new information was learned from this step's result\\n2. Whether
the remaining steps still make sense given this new information\\n3. What refinements,
if any, are needed for upcoming steps\\n4. Whether the overall goal has already
been achieved\\n\\nBe conservative about triggering full replans \u2014 only
do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription:
Calculate the sum of 7 + 7.\\nResult: To calculate the sum of 7 + 7, I need
to simply add the two numbers together. \\n\\n7 + 7 = 14.\\n\\nSo, the result
is 14.\\n\\n## Remaining plan steps:\\n Step 2: Provide the final output of
the calculation.\\n\\nAnalyze this step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
observation after a step execution completes.\\n\\nReturned by the PlannerObserver
after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
3.3): the Planner observes what the Executor\\ndid and incorporates new information
into the remaining plan.\\n\\nAttributes:\\n step_completed_successfully:
Whether the step achieved its objective.\\n key_information_learned: New
information revealed by this step\\n (e.g., \\\"Found 3 products: A,
B, C\\\"). Used to refine upcoming steps.\\n remaining_plan_still_valid:
Whether pending todos still make sense\\n given the new information.
True does NOT mean no refinement needed.\\n suggested_refinements: Minor
tweaks to upcoming step descriptions.\\n These are lightweight in-place
updates, not a full replan.\\n Example: [\\\"Step 3 should select product
B instead of 'best product'\\\"]\\n needs_full_replan: The remaining plan
is fundamentally wrong and must\\n be regenerated from scratch. Mutually
exclusive with\\n remaining_plan_still_valid (if this is True, that should
be False).\\n replan_reason: Explanation of why a full replan is needed (None
if not).\\n goal_already_achieved: The overall task goal has been satisfied
early.\\n No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
the remaining pending todos still make sense given new information\",\"title\":\"Remaining
Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '4051'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-helper-method:
- beta.chat.completions.parse
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D62FPKZpmhdynDPftfUn6yxeNSmro\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330979,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The
sum of 7 + 7 has been correctly calculated to be 14.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":false}\",\n
\ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\":
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
795,\n \"completion_tokens\": 69,\n \"total_tokens\": 864,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 05 Feb 2026 22:36:21 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '1071'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
- request:
body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
assistant that solves math problems step by step\n\nYour goal: Help solve simple
math problems\n\nYou are executing a specific step in a multi-step plan. Focus
ONLY on completing\nthe current step. Do not plan ahead or worry about future
steps.\n\nBefore acting, briefly reason about what you need to do and which
approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
Current Step\nProvide the final output of the calculation.\n\n## Context from
previous steps:\nStep 1 result: To calculate the sum of 7 + 7, I need to simply
add the two numbers together. \n\n7 + 7 = 14.\n\nSo, the result is 14.\n\nComplete
this step and provide your result."}],"model":"gpt-4o-mini"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '785'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D62FRqSOxtg5k7zpUfvXk8XEZMz9x\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330981,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"The final output of the calculation
is 14.\",\n \"refusal\": null,\n \"annotations\": []\n },\n
\ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n
\ \"usage\": {\n \"prompt_tokens\": 162,\n \"completion_tokens\": 10,\n
\ \"total_tokens\": 172,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 05 Feb 2026 22:36:21 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '446'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
- request:
body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
observing execution progress. After each step completes, you analyze what happened
and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
about:\\n1. What new information was learned from this step's result\\n2. Whether
the remaining steps still make sense given this new information\\n3. What refinements,
if any, are needed for upcoming steps\\n4. Whether the overall goal has already
been achieved\\n\\nBe conservative about triggering full replans \u2014 only
do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
Original task\\n\\n\\n## Expected output\\n\\n\\n## Previously completed steps:\\n
\ Step 1: Calculate the sum of 7 + 7.\\n Result: To calculate the sum of
7 + 7, I need to simply add the two numbers together. \\n\\n7 + 7 = 14.\\n\\nSo,
the result is 14.\\n\\n## Just completed step 2\\nDescription: Provide the final
output of the calculation.\\nResult: The final output of the calculation is
14.\\n\\n\\nAnalyze this step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
observation after a step execution completes.\\n\\nReturned by the PlannerObserver
after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
3.3): the Planner observes what the Executor\\ndid and incorporates new information
into the remaining plan.\\n\\nAttributes:\\n step_completed_successfully:
Whether the step achieved its objective.\\n key_information_learned: New
information revealed by this step\\n (e.g., \\\"Found 3 products: A,
B, C\\\"). Used to refine upcoming steps.\\n remaining_plan_still_valid:
Whether pending todos still make sense\\n given the new information.
True does NOT mean no refinement needed.\\n suggested_refinements: Minor
tweaks to upcoming step descriptions.\\n These are lightweight in-place
updates, not a full replan.\\n Example: [\\\"Step 3 should select product
B instead of 'best product'\\\"]\\n needs_full_replan: The remaining plan
is fundamentally wrong and must\\n be regenerated from scratch. Mutually
exclusive with\\n remaining_plan_still_valid (if this is True, that should
be False).\\n replan_reason: Explanation of why a full replan is needed (None
if not).\\n goal_already_achieved: The overall task goal has been satisfied
early.\\n No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
the remaining pending todos still make sense given new information\",\"title\":\"Remaining
Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '4113'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-helper-method:
- beta.chat.completions.parse
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D62FRITGCaSHqqF9f8FVEgkrZ36QL\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330981,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The
final output of the calculation is confirmed to be 14.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":true}\",\n
\ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\":
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
808,\n \"completion_tokens\": 65,\n \"total_tokens\": 873,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 05 Feb 2026 22:36:22 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '924'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:

View File

@@ -1,108 +0,0 @@
interactions:
- request:
body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
assistant\nYour personal goal is: Help solve simple math problems"},{"role":"user","content":"\nCurrent
Task: What is 5 + 5?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '260'
content-type:
- application/json
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D4yTf8T2iADffpPCJBZhntLlaoaSy\",\n \"object\":
\"chat.completion\",\n \"created\": 1770078159,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"5 + 5 equals 10.\",\n \"refusal\":
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
47,\n \"completion_tokens\": 8,\n \"total_tokens\": 55,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Tue, 03 Feb 2026 00:22:40 GMT
Server:
- cloudflare
Set-Cookie:
- SET-COOKIE-XXX
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '515'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -1,23 +1,104 @@
interactions:
- request:
body: '{"trace_id": "869cae2c-e863-4e17-b6c7-e9cf6ba8835d", "execution_type":
"crew", "user_identifier": null, "execution_context": {"crew_fingerprint": null,
"crew_name": "Unknown Crew", "flow_name": null, "crewai_version": "1.9.3", "privacy_level":
"standard"}, "execution_metadata": {"expected_duration_estimate": 300, "agent_count":
0, "task_count": 0, "flow_method_count": 0, "execution_started_at": "2026-02-05T22:35:59.859861+00:00"}}'
headers:
Accept:
- '*/*'
Connection:
- keep-alive
Content-Length:
- '434'
Content-Type:
- application/json
User-Agent:
- X-USER-AGENT-XXX
X-Crewai-Organization-Id:
- 3433f0ee-8a94-4aa4-822b-2ac71aa38b18
X-Crewai-Version:
- 1.9.3
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
method: POST
uri: https://app.crewai.com/crewai_plus/api/v1/tracing/batches
response:
body:
string: '{"id":"d34854ac-4e95-420c-b08a-af182e63fc75","trace_id":"869cae2c-e863-4e17-b6c7-e9cf6ba8835d","execution_type":"crew","crew_name":"Unknown
Crew","flow_name":null,"status":"running","duration_ms":null,"crewai_version":"1.9.3","privacy_level":"standard","total_events":0,"execution_context":{"crew_fingerprint":null,"crew_name":"Unknown
Crew","flow_name":null,"crewai_version":"1.9.3","privacy_level":"standard"},"created_at":"2026-02-05T22:36:00.450Z","updated_at":"2026-02-05T22:36:00.450Z"}'
headers:
Connection:
- keep-alive
Content-Length:
- '492'
Content-Type:
- application/json; charset=utf-8
Date:
- Thu, 05 Feb 2026 22:36:00 GMT
cache-control:
- no-store
content-security-policy:
- CSP-FILTERED
etag:
- ETAG-XXX
expires:
- '0'
permissions-policy:
- PERMISSIONS-POLICY-XXX
pragma:
- no-cache
referrer-policy:
- REFERRER-POLICY-XXX
strict-transport-security:
- STS-XXX
vary:
- Accept
x-content-type-options:
- X-CONTENT-TYPE-XXX
x-frame-options:
- X-FRAME-OPTIONS-XXX
x-permitted-cross-domain-policies:
- X-PERMITTED-XXX
x-request-id:
- X-REQUEST-ID-XXX
x-runtime:
- X-RUNTIME-XXX
x-xss-protection:
- X-XSS-PROTECTION-XXX
status:
code: 201
message: Created
- request:
body: '{"messages":[{"role":"system","content":"You are a strategic planning assistant.
Create minimal, effective execution plans. Prefer fewer steps over more."},{"role":"user","content":"Create
a focused execution plan for the following task:\n\n## Task\nCalculate the sum
of the first 3 prime numbers, then multiply that result by 2. Show your work
for each step.\n\n## Expected Output\nComplete the task successfully\n\n## Available
Tools\nNo tools available\n\n## Instructions\nCreate ONLY the essential steps
needed to complete this task. Use the MINIMUM number of steps required - do
NOT pad your plan with unnecessary steps. Most tasks need only 2-5 steps.\n\nFor
each step:\n- State the specific action to take\n- Specify which tool to use
(if any)\n\nDo NOT include:\n- Setup or preparation steps that are obvious\n-
Verification steps unless critical\n- Documentation or cleanup steps unless
explicitly required\n- Generic steps like \"review results\" or \"finalize output\"\n\nAfter
your plan, state:\n- \"READY: I am ready to execute the task.\" if the plan
is complete\n- \"NOT READY: I need to refine my plan because [reason].\" if
you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The
detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether
the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}'
Tools\nNo tools available\n\n## Planning Principles\nFocus on WHAT needs to
be accomplished, not HOW. Group related actions into logical units. Fewer steps
= better. Most tasks need 3-6 steps. Hard limit: 10 steps.\n\n## Step Types
(only these are valid):\n1. **Tool Step**: Uses a tool to gather information
or take action\n2. **Output Step**: Synthesizes prior results into the final
deliverable (usually the last step)\n\n## Rules:\n- Each step must either USE
A TOOL or PRODUCE THE FINAL OUTPUT\n- Combine related tool calls: \"Research
A, B, and C\" = ONE step, not three\n- Combine all synthesis into ONE final
output step\n- NO standalone \"thinking\" steps (review, verify, confirm, refine,
analyze) - these happen naturally between steps\n\nFor each step: State the
action, specify the tool (if any), and note dependencies.\n\nAfter your plan,
state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A
brief summary of the overall plan."},"steps":{"type":"array","description":"List
of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step
number (1-based)"},"description":{"type":"string","description":"What to do
in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to
use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step
numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether
the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}'
headers:
User-Agent:
- X-USER-AGENT-XXX
@@ -30,7 +111,7 @@ interactions:
connection:
- keep-alive
content-length:
- '1636'
- '2410'
content-type:
- application/json
host:
@@ -57,20 +138,26 @@ interactions:
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D4yTWa7FxCHkHwHF25AYXXeJDBOuY\",\n \"object\":
\"chat.completion\",\n \"created\": 1770078150,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
string: "{\n \"id\": \"chatcmpl-D62F62rSoHIF6DpZZFowcKaVmb8Iu\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330960,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"## Execution Plan\\n\\n1. Identify
the first 3 prime numbers: 2, 3, and 5.\\n2. Calculate the sum: \\\\(2 + 3
+ 5 = 10\\\\).\\n3. Multiply the sum by 2: \\\\(10 \\\\times 2 = 20\\\\).\\n\\nREADY:
I am ready to execute the task.\",\n \"refusal\": null,\n \"annotations\":
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 299,\n \"completion_tokens\":
74,\n \"total_tokens\": 373,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
\"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n
\ \"id\": \"call_bJJDEK5hizeG4PyxSUynX9x8\",\n \"type\":
\"function\",\n \"function\": {\n \"name\": \"create_reasoning_plan\",\n
\ \"arguments\": \"{\\\"plan\\\":\\\"Calculate the sum of the
first 3 prime numbers and multiply that sum by 2.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Identify
the first 3 prime numbers (2, 3, 5).\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Calculate
the sum of the identified prime numbers (2 + 3 + 5).\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]},{\\\"step_number\\\":3,\\\"description\\\":\\\"Multiply
the sum by 2.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[2]},{\\\"step_number\\\":4,\\\"description\\\":\\\"Output
the final result.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[3]}],\\\"ready\\\":true}\"\n
\ }\n }\n ],\n \"refusal\": null,\n \"annotations\":
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"tool_calls\"\n
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 458,\n \"completion_tokens\":
160,\n \"total_tokens\": 618,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
@@ -79,7 +166,7 @@ interactions:
Content-Type:
- application/json
Date:
- Tue, 03 Feb 2026 00:22:32 GMT
- Thu, 05 Feb 2026 22:36:03 GMT
Server:
- cloudflare
Set-Cookie:
@@ -99,7 +186,7 @@ interactions:
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '1716'
- '2448'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
@@ -125,10 +212,13 @@ interactions:
message: OK
- request:
body: '{"messages":[{"role":"system","content":"You are Math Tutor. An expert
math tutor who breaks down problems step by step\nYour personal goal is: Solve
multi-step math problems accurately"},{"role":"user","content":"\nCurrent Task:
Calculate the sum of the first 3 prime numbers, then multiply that result by
2. Show your work for each step.\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
math tutor who breaks down problems step by step\n\nYour goal: Solve multi-step
math problems accurately\n\nYou are executing a specific step in a multi-step
plan. Focus ONLY on completing\nthe current step. Do not plan ahead or worry
about future steps.\n\nBefore acting, briefly reason about what you need to
do and which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
Current Step\nIdentify the first 3 prime numbers (2, 3, 5).\n\nComplete this
step and provide your result."}],"model":"gpt-4o-mini"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
@@ -141,7 +231,7 @@ interactions:
connection:
- keep-alive
content-length:
- '400'
- '622'
content-type:
- application/json
cookie:
@@ -170,30 +260,24 @@ interactions:
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D4yTYJgCZf2oY7wiPMZmN4QEQhHb5\",\n \"object\":
\"chat.completion\",\n \"created\": 1770078152,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
string: "{\n \"id\": \"chatcmpl-D62F9RUZUkszp0yCWHbNJVTG16bx9\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330963,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"To solve the problem, let's break it
down into two main steps: \\n\\n1. Calculate the sum of the first 3 prime
numbers.\\n2. Multiply the result of that sum by 2.\\n\\n### Step 1: Identify
the first 3 prime numbers\\nPrime numbers are natural numbers greater than
1 that have no positive divisors other than 1 and themselves. \\n\\nThe first
three prime numbers are:\\n- 2\\n- 3\\n- 5\\n\\n### Step 2: Calculate the
sum of the first 3 prime numbers\\nNow, we add these prime numbers together:\\n\\n\\\\[\\n2
+ 3 + 5\\n\\\\]\\n\\nCalculating this step-by-step:\\n- First, add 2 and 3:\\n
\ \\\\[\\n 2 + 3 = 5\\n \\\\]\\n \\n- Next, add this result to 5:\\n \\\\[\\n
\ 5 + 5 = 10\\n \\\\]\\n\\nSo, the sum of the first 3 prime numbers is \\\\(10\\\\).\\n\\n###
Step 3: Multiply the sum by 2\\nNext, we take the sum we calculated and multiply
it by 2:\\n\\n\\\\[\\n10 \\\\times 2\\n\\\\]\\n\\nCalculating this:\\n\\\\[\\n10
\\\\times 2 = 20\\n\\\\]\\n\\n### Final Answer\\nThus, the final result obtained
after performing all the steps is:\\n\\n\\\\[\\n\\\\boxed{20}\\n\\\\]\",\n
\ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\":
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
74,\n \"completion_tokens\": 288,\n \"total_tokens\": 362,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
\"assistant\",\n \"content\": \"To identify the first three prime numbers,
we need to recall the definition of a prime number: it is a natural number
greater than 1 that has no positive divisors other than 1 and itself. \\n\\nStarting
from 2, we find:\\n1. The number **2** is prime (divisors are 1 and 2).\\n2.
The number **3** is prime (divisors are 1 and 3).\\n3. The number **4** is
not prime (divisors are 1, 2, and 4).\\n4. The number **5** is prime (divisors
are 1 and 5).\\n\\nThus, the first three prime numbers are **2, 3, and 5**.
\\n\\nResult: 2, 3, 5.\",\n \"refusal\": null,\n \"annotations\":
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 123,\n \"completion_tokens\":
166,\n \"total_tokens\": 289,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
@@ -202,7 +286,7 @@ interactions:
Content-Type:
- application/json
Date:
- Tue, 03 Feb 2026 00:22:37 GMT
- Thu, 05 Feb 2026 22:36:06 GMT
Server:
- cloudflare
Strict-Transport-Security:
@@ -220,7 +304,444 @@ interactions:
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '4751'
- '3090'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
- request:
body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
observing execution progress. After each step completes, you analyze what happened
and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
about:\\n1. What new information was learned from this step's result\\n2. Whether
the remaining steps still make sense given this new information\\n3. What refinements,
if any, are needed for upcoming steps\\n4. Whether the overall goal has already
been achieved\\n\\nBe conservative about triggering full replans \u2014 only
do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription:
Identify the first 3 prime numbers (2, 3, 5).\\nResult: To identify the first
three prime numbers, we need to recall the definition of a prime number: it
is a natural number greater than 1 that has no positive divisors other than
1 and itself. \\n\\nStarting from 2, we find:\\n1. The number **2** is prime
(divisors are 1 and 2).\\n2. The number **3** is prime (divisors are 1 and 3).\\n3.
The number **4** is not prime (divisors are 1, 2, and 4).\\n4. The number **5**
is prime (divisors are 1 and 5).\\n\\nThus, the first three prime numbers are
**2, 3, and 5**. \\n\\nResult: 2, 3, 5.\\n\\n## Remaining plan steps:\\n Step
2: Calculate the sum of the identified prime numbers (2 + 3 + 5).\\n Step 3:
Multiply the sum by 2.\\n Step 4: Output the final result.\\n\\nAnalyze this
step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
observation after a step execution completes.\\n\\nReturned by the PlannerObserver
after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
3.3): the Planner observes what the Executor\\ndid and incorporates new information
into the remaining plan.\\n\\nAttributes:\\n step_completed_successfully:
Whether the step achieved its objective.\\n key_information_learned: New
information revealed by this step\\n (e.g., \\\"Found 3 products: A,
B, C\\\"). Used to refine upcoming steps.\\n remaining_plan_still_valid:
Whether pending todos still make sense\\n given the new information.
True does NOT mean no refinement needed.\\n suggested_refinements: Minor
tweaks to upcoming step descriptions.\\n These are lightweight in-place
updates, not a full replan.\\n Example: [\\\"Step 3 should select product
B instead of 'best product'\\\"]\\n needs_full_replan: The remaining plan
is fundamentally wrong and must\\n be regenerated from scratch. Mutually
exclusive with\\n remaining_plan_still_valid (if this is True, that should
be False).\\n replan_reason: Explanation of why a full replan is needed (None
if not).\\n goal_already_achieved: The overall task goal has been satisfied
early.\\n No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
the remaining pending todos still make sense given new information\",\"title\":\"Remaining
Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '4561'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-helper-method:
- beta.chat.completions.parse
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D62FCKhhkyZ4k2uH2KyhxsGnWEM7R\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330966,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"The
first three prime numbers have been correctly identified as 2, 3, and 5.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":false}\",\n
\ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\":
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
960,\n \"completion_tokens\": 72,\n \"total_tokens\": 1032,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 05 Feb 2026 22:36:07 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '1058'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
- request:
body: '{"messages":[{"role":"system","content":"You are Math Tutor. An expert
math tutor who breaks down problems step by step\n\nYour goal: Solve multi-step
math problems accurately\n\nYou are executing a specific step in a multi-step
plan. Focus ONLY on completing\nthe current step. Do not plan ahead or worry
about future steps.\n\nBefore acting, briefly reason about what you need to
do and which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
Current Step\nCalculate the sum of the identified prime numbers (2 + 3 + 5).\n\n##
Context from previous steps:\nStep 1 result: To identify the first three prime
numbers, we need to recall the definition of a prime number: it is a natural
number greater than 1 that has no positive divisors other than 1 and itself.
\n\nStarting from 2, we find:\n1. The number **2** is prime (divisors are 1
and 2).\n2. The number **3** is prime (divisors are 1 and 3).\n3. The number
**4** is not prime (divisors are 1, 2, and 4).\n4. The number **5** is prime
(divisors are 1 and 5).\n\nThus, the first three prime numbers are **2, 3, and
5**. \n\nResult: 2, 3, 5.\n\nComplete this step and provide your result."}],"model":"gpt-4o-mini"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '1213'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D62FDWh7MhYTKIsLCnq6r5iXrbdrN\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330967,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"To calculate the sum of the identified
prime numbers (2 + 3 + 5), I will follow these steps:\\n\\n1. Add the first
two prime numbers: \\n - \\\\( 2 + 3 = 5 \\\\)\\n\\n2. Then, add the result
to the third prime number:\\n - \\\\( 5 + 5 = 10 \\\\)\\n\\nSo the sum of
the identified prime numbers (2 + 3 + 5) is **10**.\",\n \"refusal\":
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
301,\n \"completion_tokens\": 95,\n \"total_tokens\": 396,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 05 Feb 2026 22:36:09 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '1470'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
- request:
body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
observing execution progress. After each step completes, you analyze what happened
and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
about:\\n1. What new information was learned from this step's result\\n2. Whether
the remaining steps still make sense given this new information\\n3. What refinements,
if any, are needed for upcoming steps\\n4. Whether the overall goal has already
been achieved\\n\\nBe conservative about triggering full replans \u2014 only
do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
Original task\\n\\n\\n## Expected output\\n\\n\\n## Previously completed steps:\\n
\ Step 1: Identify the first 3 prime numbers (2, 3, 5).\\n Result: To identify
the first three prime numbers, we need to recall the definition of a prime number:
it is a natural number greater than 1 that has no positive divisors other than
1 and itself. \\n\\nStarting f\\n\\n## Just completed step 2\\nDescription:
Calculate the sum of the identified prime numbers (2 + 3 + 5).\\nResult: To
calculate the sum of the identified prime numbers (2 + 3 + 5), I will follow
these steps:\\n\\n1. Add the first two prime numbers: \\n - \\\\( 2 + 3 =
5 \\\\)\\n\\n2. Then, add the result to the third prime number:\\n - \\\\(
5 + 5 = 10 \\\\)\\n\\nSo the sum of the identified prime numbers (2 + 3 + 5)
is **10**.\\n\\n## Remaining plan steps:\\n Step 3: Multiply the sum by 2.\\n
\ Step 4: Output the final result.\\n\\nAnalyze this step's result and provide
your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
observation after a step execution completes.\\n\\nReturned by the PlannerObserver
after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
3.3): the Planner observes what the Executor\\ndid and incorporates new information
into the remaining plan.\\n\\nAttributes:\\n step_completed_successfully:
Whether the step achieved its objective.\\n key_information_learned: New
information revealed by this step\\n (e.g., \\\"Found 3 products: A,
B, C\\\"). Used to refine upcoming steps.\\n remaining_plan_still_valid:
Whether pending todos still make sense\\n given the new information.
True does NOT mean no refinement needed.\\n suggested_refinements: Minor
tweaks to upcoming step descriptions.\\n These are lightweight in-place
updates, not a full replan.\\n Example: [\\\"Step 3 should select product
B instead of 'best product'\\\"]\\n needs_full_replan: The remaining plan
is fundamentally wrong and must\\n be regenerated from scratch. Mutually
exclusive with\\n remaining_plan_still_valid (if this is True, that should
be False).\\n replan_reason: Explanation of why a full replan is needed (None
if not).\\n goal_already_achieved: The overall task goal has been satisfied
early.\\n No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
the remaining pending todos still make sense given new information\",\"title\":\"Remaining
Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '4591'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-helper-method:
- beta.chat.completions.parse
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D62FFIa3JdCnNkh6sa0wz28i55ni1\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330969,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"{\\\"step_completed_successfully\\\":false,\\\"key_information_learned\\\":\\\"The
calculation for the sum of the identified prime numbers was incorrect; it
should be 2 + 3 + 5 = 10, but there was a typo where the last addition was
mistakenly written as 5 + 5 instead of 5 + 2.\\\",\\\"remaining_plan_still_valid\\\":false,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":true,\\\"replan_reason\\\":\\\"The
remaining steps are based on an incorrect sum, making them invalid. The calculations
must be restarted from the correct determination of the sum of the prime numbers.\\\",\\\"goal_already_achieved\\\":false}\",\n
\ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\":
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
942,\n \"completion_tokens\": 135,\n \"total_tokens\": 1077,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 05 Feb 2026 22:36:11 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '2300'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:

View File

@@ -42,17 +42,17 @@ interactions:
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D4yXGD5IrieoUDSK5hDmJyA2gJtDc\",\n \"object\":
\"chat.completion\",\n \"created\": 1770078382,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
string: "{\n \"id\": \"chatcmpl-D62FLMJF1jiuD18qhDDxWFYzJxWk3\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330975,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"5 + 5 equals 10.\",\n \"refusal\":
\"assistant\",\n \"content\": \"The sum of 5 + 5 is 10.\",\n \"refusal\":
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
47,\n \"completion_tokens\": 8,\n \"total_tokens\": 55,\n \"prompt_tokens_details\":
47,\n \"completion_tokens\": 12,\n \"total_tokens\": 59,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
@@ -61,7 +61,7 @@ interactions:
Content-Type:
- application/json
Date:
- Tue, 03 Feb 2026 00:26:23 GMT
- Thu, 05 Feb 2026 22:36:16 GMT
Server:
- cloudflare
Set-Cookie:
@@ -81,7 +81,113 @@ interactions:
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '363'
- '342'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
- request:
body: '{"messages":[{"role":"system","content":"You are Math Assistant. A helpful
assistant\nYour personal goal is: Help solve simple math problems"},{"role":"user","content":"\nCurrent
Task: What is 5 + 5?\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '260'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D62FM3zRv6CP5jgOiAWIaTukuPjwP\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330976,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"5 + 5 equals 10.\",\n \"refusal\":
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
47,\n \"completion_tokens\": 8,\n \"total_tokens\": 55,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 05 Feb 2026 22:36:16 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '488'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:

View File

@@ -5,18 +5,25 @@ interactions:
a focused execution plan for the following task:\n\n## Task\nConvert 100 degrees
Celsius to Fahrenheit, then round the result to the nearest 10.\n\n## Expected
Output\nComplete the task successfully\n\n## Available Tools\nNo tools available\n\n##
Instructions\nCreate ONLY the essential steps needed to complete this task.
Use the MINIMUM number of steps required - do NOT pad your plan with unnecessary
steps. Most tasks need only 2-5 steps.\n\nFor each step:\n- State the specific
action to take\n- Specify which tool to use (if any)\n\nDo NOT include:\n- Setup
or preparation steps that are obvious\n- Verification steps unless critical\n-
Documentation or cleanup steps unless explicitly required\n- Generic steps like
\"review results\" or \"finalize output\"\n\nAfter your plan, state:\n- \"READY:
I am ready to execute the task.\" if the plan is complete\n- \"NOT READY: I
need to refine my plan because [reason].\" if you need more thinking"}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
or refine a reasoning plan for a task","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"The
detailed reasoning plan for the task."},"ready":{"type":"boolean","description":"Whether
the agent is ready to execute the task."}},"required":["plan","ready"],"additionalProperties":false}}}]}'
Planning Principles\nFocus on WHAT needs to be accomplished, not HOW. Group
related actions into logical units. Fewer steps = better. Most tasks need 3-6
steps. Hard limit: 10 steps.\n\n## Step Types (only these are valid):\n1. **Tool
Step**: Uses a tool to gather information or take action\n2. **Output Step**:
Synthesizes prior results into the final deliverable (usually the last step)\n\n##
Rules:\n- Each step must either USE A TOOL or PRODUCE THE FINAL OUTPUT\n- Combine
related tool calls: \"Research A, B, and C\" = ONE step, not three\n- Combine
all synthesis into ONE final output step\n- NO standalone \"thinking\" steps
(review, verify, confirm, refine, analyze) - these happen naturally between
steps\n\nFor each step: State the action, specify the tool (if any), and note
dependencies.\n\nAfter your plan, state READY or NOT READY."}],"model":"gpt-4o-mini","tool_choice":"auto","tools":[{"type":"function","function":{"name":"create_reasoning_plan","description":"Create
or refine a reasoning plan for a task with structured steps","strict":true,"parameters":{"type":"object","properties":{"plan":{"type":"string","description":"A
brief summary of the overall plan."},"steps":{"type":"array","description":"List
of discrete steps to execute the plan","items":{"type":"object","properties":{"step_number":{"type":"integer","description":"Step
number (1-based)"},"description":{"type":"string","description":"What to do
in this step"},"tool_to_use":{"type":["string","null"],"description":"Tool to
use for this step, or null if no tool needed"},"depends_on":{"type":"array","items":{"type":"integer"},"description":"Step
numbers this step depends on (empty array if none)"}},"required":["step_number","description","tool_to_use","depends_on"],"additionalProperties":false}},"ready":{"type":"boolean","description":"Whether
the agent is ready to execute the task."}},"required":["plan","steps","ready"],"additionalProperties":false}}}]}'
headers:
User-Agent:
- X-USER-AGENT-XXX
@@ -29,7 +36,7 @@ interactions:
connection:
- keep-alive
content-length:
- '1610'
- '2384'
content-type:
- application/json
host:
@@ -56,20 +63,25 @@ interactions:
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D4yTN8fHOefyzzhvdUOHjxdFDR2HW\",\n \"object\":
\"chat.completion\",\n \"created\": 1770078141,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
string: "{\n \"id\": \"chatcmpl-D62FU2te4ww3DuIzbuySwWTIPTx6A\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330984,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"## Execution Plan\\n\\n1. Convert 100
degrees Celsius to Fahrenheit using the formula: \\\\( F = C \\\\times \\\\frac{9}{5}
+ 32 \\\\).\\n2. Round the Fahrenheit result to the nearest 10.\\n\\nREADY:
I am ready to execute the task.\",\n \"refusal\": null,\n \"annotations\":
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 291,\n \"completion_tokens\":
58,\n \"total_tokens\": 349,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
\"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n
\ \"id\": \"call_0LXFaxnsqT2kFmUyanui30k0\",\n \"type\":
\"function\",\n \"function\": {\n \"name\": \"create_reasoning_plan\",\n
\ \"arguments\": \"{\\\"plan\\\":\\\"Convert 100 degrees Celsius
to Fahrenheit and round the result to the nearest 10.\\\",\\\"steps\\\":[{\\\"step_number\\\":1,\\\"description\\\":\\\"Convert
100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[]},{\\\"step_number\\\":2,\\\"description\\\":\\\"Round
the Fahrenheit result to the nearest 10.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[1]},{\\\"step_number\\\":3,\\\"description\\\":\\\"Output
the final rounded temperature in Fahrenheit.\\\",\\\"tool_to_use\\\":null,\\\"depends_on\\\":[2]}],\\\"ready\\\":true}\"\n
\ }\n }\n ],\n \"refusal\": null,\n \"annotations\":
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"tool_calls\"\n
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 450,\n \"completion_tokens\":
133,\n \"total_tokens\": 583,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
@@ -78,7 +90,7 @@ interactions:
Content-Type:
- application/json
Date:
- Tue, 03 Feb 2026 00:22:22 GMT
- Thu, 05 Feb 2026 22:36:26 GMT
Server:
- cloudflare
Set-Cookie:
@@ -98,7 +110,7 @@ interactions:
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '1089'
- '1976'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
@@ -124,10 +136,13 @@ interactions:
message: OK
- request:
body: '{"messages":[{"role":"system","content":"You are Unit Converter. A precise
unit conversion specialist\nYour personal goal is: Accurately convert between
units and apply transformations"},{"role":"user","content":"\nCurrent Task:
Convert 100 degrees Celsius to Fahrenheit, then round the result to the nearest
10.\n\nProvide your complete response:"}],"model":"gpt-4o-mini"}'
unit conversion specialist\n\nYour goal: Accurately convert between units and
apply transformations\n\nYou are executing a specific step in a multi-step plan.
Focus ONLY on completing\nthe current step. Do not plan ahead or worry about
future steps.\n\nBefore acting, briefly reason about what you need to do and
which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
Current Step\nConvert 100 degrees Celsius to Fahrenheit using the formula (C
* 9/5) + 32.\n\nComplete this step and provide your result."}],"model":"gpt-4o-mini"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
@@ -140,7 +155,7 @@ interactions:
connection:
- keep-alive
content-length:
- '373'
- '651'
content-type:
- application/json
cookie:
@@ -169,26 +184,21 @@ interactions:
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D4yTPQewXDyPdYHI4dHPH7YGHcRge\",\n \"object\":
\"chat.completion\",\n \"created\": 1770078143,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
string: "{\n \"id\": \"chatcmpl-D62FWhREtHEudJMFFypgh33C8GLdH\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330986,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"To convert degrees Celsius to Fahrenheit,
you can use the formula:\\n\\n\\\\[ F = \\\\left( C \\\\times \\\\frac{9}{5}
\\\\right) + 32 \\\\]\\n\\nPlugging in 100 degrees Celsius:\\n\\n\\\\[ F =
\\\\left( 100 \\\\times \\\\frac{9}{5} \\\\right) + 32 \\\\]\\n\\nCalculating
that step-by-step:\\n\\n1. Multiply 100 by 9: \\n \\\\[ 100 \\\\times 9
= 900 \\\\]\\n\\n2. Divide by 5:\\n \\\\[ 900 \\\\div 5 = 180 \\\\]\\n\\n3.
Add 32:\\n \\\\[ 180 + 32 = 212 \\\\]\\n\\nSo, 100 degrees Celsius is equal
to 212 degrees Fahrenheit.\\n\\nNow, rounding 212 to the nearest 10:\\n\\nThe
nearest multiple of 10 to 212 is 210.\\n\\nTherefore, the final result is
**210 degrees Fahrenheit**.\",\n \"refusal\": null,\n \"annotations\":
\"assistant\",\n \"content\": \"To convert 100 degrees Celsius to Fahrenheit
using the formula (C * 9/5) + 32, we substitute C with 100:\\n\\n1. Multiply
100 by 9/5:\\n \\\\[ 100 * \\\\frac{9}{5} = 100 * 1.8 = 180 \\\\]\\n\\n2.
Then add 32:\\n \\\\[ 180 + 32 = 212 \\\\]\\n\\nTherefore, 100 degrees Celsius
is equal to 212 degrees Fahrenheit.\",\n \"refusal\": null,\n \"annotations\":
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 63,\n \"completion_tokens\":
191,\n \"total_tokens\": 254,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 126,\n \"completion_tokens\":
101,\n \"total_tokens\": 227,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_1590f93f9d\"\n}\n"
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
@@ -197,7 +207,7 @@ interactions:
Content-Type:
- application/json
Date:
- Tue, 03 Feb 2026 00:22:26 GMT
- Thu, 05 Feb 2026 22:36:27 GMT
Server:
- cloudflare
Strict-Transport-Security:
@@ -215,7 +225,548 @@ interactions:
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '3736'
- '1505'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
- request:
body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
observing execution progress. After each step completes, you analyze what happened
and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
about:\\n1. What new information was learned from this step's result\\n2. Whether
the remaining steps still make sense given this new information\\n3. What refinements,
if any, are needed for upcoming steps\\n4. Whether the overall goal has already
been achieved\\n\\nBe conservative about triggering full replans \u2014 only
do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
Original task\\n\\n\\n## Expected output\\n\\n\\n\\n## Just completed step 1\\nDescription:
Convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32.\\nResult:
To convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32,
we substitute C with 100:\\n\\n1. Multiply 100 by 9/5:\\n \\\\[ 100 * \\\\frac{9}{5}
= 100 * 1.8 = 180 \\\\]\\n\\n2. Then add 32:\\n \\\\[ 180 + 32 = 212 \\\\]\\n\\nTherefore,
100 degrees Celsius is equal to 212 degrees Fahrenheit.\\n\\n## Remaining plan
steps:\\n Step 2: Round the Fahrenheit result to the nearest 10.\\n Step 3:
Output the final rounded temperature in Fahrenheit.\\n\\nAnalyze this step's
result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
observation after a step execution completes.\\n\\nReturned by the PlannerObserver
after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
3.3): the Planner observes what the Executor\\ndid and incorporates new information
into the remaining plan.\\n\\nAttributes:\\n step_completed_successfully:
Whether the step achieved its objective.\\n key_information_learned: New
information revealed by this step\\n (e.g., \\\"Found 3 products: A,
B, C\\\"). Used to refine upcoming steps.\\n remaining_plan_still_valid:
Whether pending todos still make sense\\n given the new information.
True does NOT mean no refinement needed.\\n suggested_refinements: Minor
tweaks to upcoming step descriptions.\\n These are lightweight in-place
updates, not a full replan.\\n Example: [\\\"Step 3 should select product
B instead of 'best product'\\\"]\\n needs_full_replan: The remaining plan
is fundamentally wrong and must\\n be regenerated from scratch. Mutually
exclusive with\\n remaining_plan_still_valid (if this is True, that should
be False).\\n replan_reason: Explanation of why a full replan is needed (None
if not).\\n goal_already_achieved: The overall task goal has been satisfied
early.\\n No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
the remaining pending todos still make sense given new information\",\"title\":\"Remaining
Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '4342'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-helper-method:
- beta.chat.completions.parse
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D62FXF5UZlLp9eu5O7HsZvIvpC4My\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330987,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"Successfully
converted 100 degrees Celsius to 212 degrees Fahrenheit.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":[\\\"Step
2 should round 212 to the nearest 10, resulting in 210.\\\"],\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":false}\",\n
\ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\":
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
885,\n \"completion_tokens\": 81,\n \"total_tokens\": 966,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 05 Feb 2026 22:36:29 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '2195'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
- request:
body: '{"messages":[{"role":"system","content":"You are refining upcoming plan
steps based on new information. Update the step descriptions to be more specific
and actionable given what was learned. Keep the same step numbers.\n\nRespond
with one line per step in the format:\nStep N: <refined description>"},{"role":"user","content":"##
New information learned\nSuccessfully converted 100 degrees Celsius to 212 degrees
Fahrenheit.\n\n## Suggested refinements\nStep 2 should round 212 to the nearest
10, resulting in 210.\n\n## Current pending steps\nStep 2: Round the Fahrenheit
result to the nearest 10.\nStep 3: Output the final rounded temperature in Fahrenheit.\n\nUpdate
the step descriptions to incorporate the new information."}],"model":"gpt-4o-mini"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '754'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D62FaclC9tg2ClH7HU3pfMzmlPJpB\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330990,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"Step 2: Round the Fahrenheit result
of 212 degrees to the nearest 10, resulting in 210 degrees. \\nStep 3: Output
the final rounded temperature as 210 degrees Fahrenheit.\",\n \"refusal\":
null,\n \"annotations\": []\n },\n \"logprobs\": null,\n
\ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
142,\n \"completion_tokens\": 40,\n \"total_tokens\": 182,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 05 Feb 2026 22:36:30 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '706'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
- request:
body: '{"messages":[{"role":"system","content":"You are Unit Converter. A precise
unit conversion specialist\n\nYour goal: Accurately convert between units and
apply transformations\n\nYou are executing a specific step in a multi-step plan.
Focus ONLY on completing\nthe current step. Do not plan ahead or worry about
future steps.\n\nBefore acting, briefly reason about what you need to do and
which approach\nor tool would be most helpful for this specific step."},{"role":"user","content":"##
Current Step\nRound the Fahrenheit result of 212 degrees to the nearest 10,
resulting in 210 degrees.\n\n## Context from previous steps:\nStep 1 result:
To convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5) + 32,
we substitute C with 100:\n\n1. Multiply 100 by 9/5:\n \\[ 100 * \\frac{9}{5}
= 100 * 1.8 = 180 \\]\n\n2. Then add 32:\n \\[ 180 + 32 = 212 \\]\n\nTherefore,
100 degrees Celsius is equal to 212 degrees Fahrenheit.\n\nComplete this step
and provide your result."}],"model":"gpt-4o-mini"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '1011'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D62Fb9PlGlUIcZRS2v2Lp9S62brRP\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330991,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"To complete this step, I will round
the Fahrenheit result of 212 degrees to the nearest 10. When rounding, since
212 is closer to 210 than it is to 220, I will round it down to 210 degrees.\\n\\nResult:
210 degrees Fahrenheit.\",\n \"refusal\": null,\n \"annotations\":
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 236,\n \"completion_tokens\":
56,\n \"total_tokens\": 292,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 05 Feb 2026 22:36:32 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '1187'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
- request:
body: "{\"messages\":[{\"role\":\"system\",\"content\":\"You are a Planning Agent
observing execution progress. After each step completes, you analyze what happened
and decide whether the remaining plan is still valid.\\n\\nReason step-by-step
about:\\n1. What new information was learned from this step's result\\n2. Whether
the remaining steps still make sense given this new information\\n3. What refinements,
if any, are needed for upcoming steps\\n4. Whether the overall goal has already
been achieved\\n\\nBe conservative about triggering full replans \u2014 only
do so when the remaining plan is fundamentally wrong, not just suboptimal.\"},{\"role\":\"user\",\"content\":\"##
Original task\\n\\n\\n## Expected output\\n\\n\\n## Previously completed steps:\\n
\ Step 1: Convert 100 degrees Celsius to Fahrenheit using the formula (C * 9/5)
+ 32.\\n Result: To convert 100 degrees Celsius to Fahrenheit using the formula
(C * 9/5) + 32, we substitute C with 100:\\n\\n1. Multiply 100 by 9/5:\\n \\\\[
100 * \\\\frac{9}{5} = 100 * 1.8 = 180 \\\\]\\n\\n2. Then add 32:\\n \\\\[
18\\n\\n## Just completed step 2\\nDescription: Round the Fahrenheit result
of 212 degrees to the nearest 10, resulting in 210 degrees.\\nResult: To complete
this step, I will round the Fahrenheit result of 212 degrees to the nearest
10. When rounding, since 212 is closer to 210 than it is to 220, I will round
it down to 210 degrees.\\n\\nResult: 210 degrees Fahrenheit.\\n\\n## Remaining
plan steps:\\n Step 3: Output the final rounded temperature as 210 degrees
Fahrenheit.\\n\\nAnalyze this step's result and provide your observation.\"}],\"model\":\"gpt-4o-mini\",\"response_format\":{\"type\":\"json_schema\",\"json_schema\":{\"schema\":{\"description\":\"Planner's
observation after a step execution completes.\\n\\nReturned by the PlannerObserver
after EVERY step \u2014 not just failures.\\nThe Planner uses this to decide
whether to continue, refine, or replan.\\n\\nBased on PLAN-AND-ACT (Section
3.3): the Planner observes what the Executor\\ndid and incorporates new information
into the remaining plan.\\n\\nAttributes:\\n step_completed_successfully:
Whether the step achieved its objective.\\n key_information_learned: New
information revealed by this step\\n (e.g., \\\"Found 3 products: A,
B, C\\\"). Used to refine upcoming steps.\\n remaining_plan_still_valid:
Whether pending todos still make sense\\n given the new information.
True does NOT mean no refinement needed.\\n suggested_refinements: Minor
tweaks to upcoming step descriptions.\\n These are lightweight in-place
updates, not a full replan.\\n Example: [\\\"Step 3 should select product
B instead of 'best product'\\\"]\\n needs_full_replan: The remaining plan
is fundamentally wrong and must\\n be regenerated from scratch. Mutually
exclusive with\\n remaining_plan_still_valid (if this is True, that should
be False).\\n replan_reason: Explanation of why a full replan is needed (None
if not).\\n goal_already_achieved: The overall task goal has been satisfied
early.\\n No more steps needed \u2014 skip remaining todos and finalize.\",\"properties\":{\"step_completed_successfully\":{\"description\":\"Whether
the step achieved what it was asked to do\",\"title\":\"Step Completed Successfully\",\"type\":\"boolean\"},\"key_information_learned\":{\"default\":\"\",\"description\":\"What
new information this step revealed\",\"title\":\"Key Information Learned\",\"type\":\"string\"},\"remaining_plan_still_valid\":{\"default\":true,\"description\":\"Whether
the remaining pending todos still make sense given new information\",\"title\":\"Remaining
Plan Still Valid\",\"type\":\"boolean\"},\"suggested_refinements\":{\"anyOf\":[{\"items\":{\"type\":\"string\"},\"type\":\"array\"},{\"type\":\"null\"}],\"description\":\"Minor
tweaks to descriptions of upcoming steps (lightweight, no full replan)\",\"title\":\"Suggested
Refinements\"},\"needs_full_replan\":{\"default\":false,\"description\":\"The
remaining plan is fundamentally wrong and must be regenerated\",\"title\":\"Needs
Full Replan\",\"type\":\"boolean\"},\"replan_reason\":{\"anyOf\":[{\"type\":\"string\"},{\"type\":\"null\"}],\"description\":\"Explanation
of why a full replan is needed\",\"title\":\"Replan Reason\"},\"goal_already_achieved\":{\"default\":false,\"description\":\"The
overall task goal has been satisfied early; no more steps needed\",\"title\":\"Goal
Already Achieved\",\"type\":\"boolean\"}},\"required\":[\"step_completed_successfully\",\"key_information_learned\",\"remaining_plan_still_valid\",\"suggested_refinements\",\"needs_full_replan\",\"replan_reason\",\"goal_already_achieved\"],\"title\":\"StepObservation\",\"type\":\"object\",\"additionalProperties\":false},\"name\":\"StepObservation\",\"strict\":true}},\"stream\":false}"
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '4579'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-helper-method:
- beta.chat.completions.parse
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-D62FctLDvklBSvOY641JCvwFaTugO\",\n \"object\":
\"chat.completion\",\n \"created\": 1770330992,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"{\\\"step_completed_successfully\\\":true,\\\"key_information_learned\\\":\\\"Successfully
rounded the Fahrenheit result of 212 degrees down to 210 degrees.\\\",\\\"remaining_plan_still_valid\\\":true,\\\"suggested_refinements\\\":null,\\\"needs_full_replan\\\":false,\\\"replan_reason\\\":null,\\\"goal_already_achieved\\\":true}\",\n
\ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\":
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
941,\n \"completion_tokens\": 67,\n \"total_tokens\": 1008,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_f4ae844694\"\n}\n"
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 05 Feb 2026 22:36:33 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '1208'
openai-project:
- OPENAI-PROJECT-XXX
openai-version: