From 7c9ce9ccd87089c9f6f385647c76c62f8b18c4ac Mon Sep 17 00:00:00 2001
From: Greyson LaLonde <greyson.r.lalonde@gmail.com>
Date: Fri, 23 Jan 2026 01:53:15 -0500
Subject: [PATCH] feat(openai): add Responses API support with auto-chaining
 and ZDR compliance

- Add full OpenAI Responses API support alongside existing Chat Completions API
- Implement auto_chain parameter to automatically track and pass previous_response_id
- Add auto_chain_reasoning for encrypted reasoning in ZDR (Zero Data Retention) scenarios
- Parse built-in tool outputs: web_search, file_search, computer_use, code_interpreter
- Support all Responses API parameters: reasoning, include, tools, truncation, etc.
- Add streaming support for Responses API with proper event handling
- Include 67 tests covering all new functionality
---
 .../llms/providers/openai/completion.py       | 1231 ++++++++++++++++-
 ...der_without_explicit_llm_set_on_agent.yaml |  334 ++++-
 ..._responses_api_auto_chain_integration.yaml |  229 +++
 ...i_responses_api_auto_chain_with_reset.yaml |  230 +++
 .../test_openai_responses_api_basic_call.yaml |  118 ++
 ...ses_api_parse_tool_outputs_basic_call.yaml |  115 ++
 ...i_responses_api_returns_usage_metrics.yaml |  115 ++
 .../test_openai_responses_api_streaming.yaml  |  165 +++
 ...responses_api_with_parse_tool_outputs.yaml |  133 ++
 ..._responses_api_with_structured_output.yaml |  127 ++
 ...es_api_with_system_message_extraction.yaml |  117 ++
 ..._openai_responses_api_with_web_search.yaml |  139 ++
 lib/crewai/tests/llms/openai/test_openai.py   |  775 ++++++++++-
 13 files changed, 3751 insertions(+), 77 deletions(-)
 create mode 100644 lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_auto_chain_integration.yaml
 create mode 100644 lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_auto_chain_with_reset.yaml
 create mode 100644 lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_basic_call.yaml
 create mode 100644 lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_parse_tool_outputs_basic_call.yaml
 create mode 100644 lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_returns_usage_metrics.yaml
 create mode 100644 lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_streaming.yaml
 create mode 100644 lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_parse_tool_outputs.yaml
 create mode 100644 lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_structured_output.yaml
 create mode 100644 lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_system_message_extraction.yaml
 create mode 100644 lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_web_search.yaml

diff --git a/lib/crewai/src/crewai/llms/providers/openai/completion.py b/lib/crewai/src/crewai/llms/providers/openai/completion.py
index 2788df84f..45239af0f 100644
--- a/lib/crewai/src/crewai/llms/providers/openai/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/openai/completion.py
@@ -1,10 +1,11 @@
 from __future__ import annotations
 
 from collections.abc import AsyncIterator
+from dataclasses import dataclass, field
 import json
 import logging
 import os
-from typing import TYPE_CHECKING, Any
+from typing import TYPE_CHECKING, Any, ClassVar, Literal, TypedDict
 
 import httpx
 from openai import APIConnectionError, AsyncOpenAI, NotFoundError, OpenAI, Stream
@@ -12,6 +13,7 @@ from openai.lib.streaming.chat import ChatCompletionStream
 from openai.types.chat import ChatCompletion, ChatCompletionChunk
 from openai.types.chat.chat_completion import Choice
 from openai.types.chat.chat_completion_chunk import ChoiceDelta
+from openai.types.responses import Response
 from pydantic import BaseModel
 
 from crewai.events.types.llm_events import LLMCallType
@@ -32,13 +34,155 @@ if TYPE_CHECKING:
     from crewai.tools.base_tool import BaseTool
 
 
+class WebSearchResult(TypedDict, total=False):
+    """Result from web search built-in tool."""
+
+    id: str | None
+    status: str | None
+    type: str
+
+
+class FileSearchResultItem(TypedDict, total=False):
+    """Individual file search result."""
+
+    file_id: str | None
+    filename: str | None
+    text: str | None
+    score: float | None
+    attributes: dict[str, str | float | bool] | None
+
+
+class FileSearchResult(TypedDict, total=False):
+    """Result from file search built-in tool."""
+
+    id: str | None
+    status: str | None
+    type: str
+    queries: list[str]
+    results: list[FileSearchResultItem]
+
+
+class CodeInterpreterLogResult(TypedDict):
+    """Log output from code interpreter."""
+
+    type: str
+    logs: str
+
+
+class CodeInterpreterFileResult(TypedDict):
+    """File output from code interpreter."""
+
+    type: str
+    files: list[dict[str, Any]]
+
+
+class CodeInterpreterResult(TypedDict, total=False):
+    """Result from code interpreter built-in tool."""
+
+    id: str | None
+    status: str | None
+    type: str
+    code: str | None
+    container_id: str | None
+    results: list[CodeInterpreterLogResult | CodeInterpreterFileResult]
+
+
+class ComputerUseResult(TypedDict, total=False):
+    """Result from computer use built-in tool."""
+
+    id: str | None
+    status: str | None
+    type: str
+    call_id: str | None
+    action: dict[str, Any]
+    pending_safety_checks: list[dict[str, Any]]
+
+
+class ReasoningSummary(TypedDict, total=False):
+    """Summary from model reasoning."""
+
+    id: str | None
+    status: str | None
+    type: str
+    summary: list[dict[str, Any]]
+    encrypted_content: str | None
+
+
+@dataclass
+class ResponsesAPIResult:
+    """Result from OpenAI Responses API including text and tool outputs.
+
+    Attributes:
+        text: The text content from the response.
+        web_search_results: Results from web_search built-in tool calls.
+        file_search_results: Results from file_search built-in tool calls.
+        code_interpreter_results: Results from code_interpreter built-in tool calls.
+        computer_use_results: Results from computer_use built-in tool calls.
+        reasoning_summaries: Reasoning/thinking summaries from the model.
+        function_calls: Custom function tool calls.
+        response_id: The response ID for multi-turn conversations.
+    """
+
+    text: str = ""
+    web_search_results: list[WebSearchResult] = field(default_factory=list)
+    file_search_results: list[FileSearchResult] = field(default_factory=list)
+    code_interpreter_results: list[CodeInterpreterResult] = field(default_factory=list)
+    computer_use_results: list[ComputerUseResult] = field(default_factory=list)
+    reasoning_summaries: list[ReasoningSummary] = field(default_factory=list)
+    function_calls: list[dict[str, Any]] = field(default_factory=list)
+    response_id: str | None = None
+
+    def has_tool_outputs(self) -> bool:
+        """Check if there are any built-in tool outputs."""
+        return bool(
+            self.web_search_results
+            or self.file_search_results
+            or self.code_interpreter_results
+            or self.computer_use_results
+        )
+
+    def has_reasoning(self) -> bool:
+        """Check if there are reasoning summaries."""
+        return bool(self.reasoning_summaries)
+
+
 class OpenAICompletion(BaseLLM):
     """OpenAI native completion implementation.
 
     This class provides direct integration with the OpenAI Python SDK,
-    offering native structured outputs, function calling, and streaming support.
+    supporting both Chat Completions API and Responses API.
+
+    The Responses API is OpenAI's newer API primitive with built-in tools
+    (web search, file search, code interpreter), stateful conversations,
+    and improved reasoning model support.
+
+    Args:
+        api: Which OpenAI API to use - "completions" (default) or "responses".
+        instructions: System-level instructions (Responses API only).
+        store: Whether to store responses for multi-turn (Responses API only).
+        previous_response_id: ID of previous response for multi-turn (Responses API only).
+        include: Additional data to include in response (Responses API only).
+        builtin_tools: List of OpenAI built-in tools to enable (Responses API only).
+            Supported: "web_search", "file_search", "code_interpreter", "computer_use".
+        parse_tool_outputs: Whether to return structured ResponsesAPIResult with
+            parsed built-in tool outputs instead of just text (Responses API only).
+        auto_chain: Automatically track and use response IDs for multi-turn
+            conversations (Responses API only). When True, each response ID is saved
+            and used as previous_response_id in subsequent calls.
+        auto_chain_reasoning: Automatically track and pass encrypted reasoning items
+            for ZDR (Zero Data Retention) compliance (Responses API only). When True,
+            adds "reasoning.encrypted_content" to include, captures reasoning items
+            from responses, and passes them back in subsequent calls to preserve
+            chain-of-thought without storing data on OpenAI servers.
     """
 
+    BUILTIN_TOOL_TYPES: ClassVar[dict[str, str]] = {
+        "web_search": "web_search_preview",
+        "file_search": "file_search",
+        "code_interpreter": "code_interpreter",
+        "computer_use": "computer_use_preview",
+    }
+
     def __init__(
         self,
         model: str = "gpt-4o",
@@ -65,9 +209,18 @@ class OpenAICompletion(BaseLLM):
         reasoning_effort: str | None = None,
         provider: str | None = None,
         interceptor: BaseInterceptor[httpx.Request, httpx.Response] | None = None,
+        api: Literal["completions", "responses"] = "completions",
+        instructions: str | None = None,
+        store: bool | None = None,
+        previous_response_id: str | None = None,
+        include: list[str] | None = None,
+        builtin_tools: list[str] | None = None,
+        parse_tool_outputs: bool = False,
+        auto_chain: bool = False,
+        auto_chain_reasoning: bool = False,
         **kwargs: Any,
     ) -> None:
-        """Initialize OpenAI chat completion client."""
+        """Initialize OpenAI completion client."""
 
         if provider is None:
             provider = kwargs.pop("provider", "openai")
@@ -125,6 +278,57 @@ class OpenAICompletion(BaseLLM):
         self.is_o1_model = "o1" in model.lower()
         self.is_gpt4_model = "gpt-4" in model.lower()
 
+        # API selection and Responses API parameters
+        self.api = api
+        self.instructions = instructions
+        self.store = store
+        self.previous_response_id = previous_response_id
+        self.include = include
+        self.builtin_tools = builtin_tools
+        self.parse_tool_outputs = parse_tool_outputs
+        self.auto_chain = auto_chain
+        self.auto_chain_reasoning = auto_chain_reasoning
+        self._last_response_id: str | None = None
+        self._last_reasoning_items: list[Any] | None = None
+
+    @property
+    def last_response_id(self) -> str | None:
+        """Get the last response ID from auto-chaining.
+
+        Returns:
+            The response ID from the most recent Responses API call,
+            or None if no calls have been made or auto_chain is disabled.
+        """
+        return self._last_response_id
+
+    def reset_chain(self) -> None:
+        """Reset the auto-chain state to start a new conversation.
+
+        Clears the stored response ID so the next call starts fresh
+        without linking to previous responses.
+        """
+        self._last_response_id = None
+
+    @property
+    def last_reasoning_items(self) -> list[Any] | None:
+        """Get the last reasoning items from auto-chain reasoning.
+
+        Returns:
+            The reasoning items from the most recent Responses API call
+            containing encrypted content, or None if no calls have been made
+            or auto_chain_reasoning is disabled.
+        """
+        return self._last_reasoning_items
+
+    def reset_reasoning_chain(self) -> None:
+        """Reset the reasoning chain state to start fresh.
+
+        Clears the stored reasoning items so the next call starts without
+        preserving previous chain-of-thought context. Useful when starting
+        a new reasoning task that shouldn't reference previous reasoning.
+        """
+        self._last_reasoning_items = None
+
     def _get_client_params(self) -> dict[str, Any]:
         """Get OpenAI client parameters."""
 
@@ -164,19 +368,19 @@ class OpenAICompletion(BaseLLM):
         from_agent: Agent | None = None,
         response_model: type[BaseModel] | None = None,
     ) -> str | Any:
-        """Call OpenAI chat completion API.
+        """Call OpenAI API (Chat Completions or Responses based on api setting).
 
         Args:
-            messages: Input messages for the chat completion
-            tools: list of tool/function definitions
-            callbacks: Callback functions (not used in native implementation)
-            available_functions: Available functions for tool calling
-            from_task: Task that initiated the call
-            from_agent: Agent that initiated the call
+            messages: Input messages for the completion.
+            tools: List of tool/function definitions.
+            callbacks: Callback functions (not used in native implementation).
+            available_functions: Available functions for tool calling.
+            from_task: Task that initiated the call.
+            from_agent: Agent that initiated the call.
             response_model: Response model for structured output.
 
         Returns:
-            Chat completion response or tool call result
+            Completion response or tool call result.
         """
         try:
             self._emit_call_started_event(
@@ -193,21 +397,19 @@ class OpenAICompletion(BaseLLM):
             if not self._invoke_before_llm_call_hooks(formatted_messages, from_agent):
                 raise ValueError("LLM call blocked by before_llm_call hook")
 
-            completion_params = self._prepare_completion_params(
-                messages=formatted_messages, tools=tools
-            )
-
-            if self.stream:
-                return self._handle_streaming_completion(
-                    params=completion_params,
+            if self.api == "responses":
+                return self._call_responses(
+                    messages=formatted_messages,
+                    tools=tools,
                     available_functions=available_functions,
                     from_task=from_task,
                     from_agent=from_agent,
                     response_model=response_model,
                 )
 
-            return self._handle_completion(
-                params=completion_params,
+            return self._call_completions(
+                messages=formatted_messages,
+                tools=tools,
                 available_functions=available_functions,
                 from_task=from_task,
                 from_agent=from_agent,
@@ -222,6 +424,37 @@ class OpenAICompletion(BaseLLM):
             )
             raise
 
+    def _call_completions(
+        self,
+        messages: list[LLMMessage],
+        tools: list[dict[str, BaseTool]] | None = None,
+        available_functions: dict[str, Any] | None = None,
+        from_task: Task | None = None,
+        from_agent: Agent | None = None,
+        response_model: type[BaseModel] | None = None,
+    ) -> str | Any:
+        """Call OpenAI Chat Completions API."""
+        completion_params = self._prepare_completion_params(
+            messages=messages, tools=tools
+        )
+
+        if self.stream:
+            return self._handle_streaming_completion(
+                params=completion_params,
+                available_functions=available_functions,
+                from_task=from_task,
+                from_agent=from_agent,
+                response_model=response_model,
+            )
+
+        return self._handle_completion(
+            params=completion_params,
+            available_functions=available_functions,
+            from_task=from_task,
+            from_agent=from_agent,
+            response_model=response_model,
+        )
+
     async def acall(
         self,
         messages: str | list[LLMMessage],
@@ -232,19 +465,19 @@ class OpenAICompletion(BaseLLM):
         from_agent: Agent | None = None,
         response_model: type[BaseModel] | None = None,
     ) -> str | Any:
-        """Async call to OpenAI chat completion API.
+        """Async call to OpenAI API (Chat Completions or Responses).
 
         Args:
-            messages: Input messages for the chat completion
-            tools: list of tool/function definitions
-            callbacks: Callback functions (not used in native implementation)
-            available_functions: Available functions for tool calling
-            from_task: Task that initiated the call
-            from_agent: Agent that initiated the call
+            messages: Input messages for the completion.
+            tools: List of tool/function definitions.
+            callbacks: Callback functions (not used in native implementation).
+            available_functions: Available functions for tool calling.
+            from_task: Task that initiated the call.
+            from_agent: Agent that initiated the call.
             response_model: Response model for structured output.
 
         Returns:
-            Chat completion response or tool call result
+            Completion response or tool call result.
         """
         try:
             self._emit_call_started_event(
@@ -258,21 +491,19 @@ class OpenAICompletion(BaseLLM):
 
             formatted_messages = self._format_messages(messages)
 
-            completion_params = self._prepare_completion_params(
-                messages=formatted_messages, tools=tools
-            )
-
-            if self.stream:
-                return await self._ahandle_streaming_completion(
-                    params=completion_params,
+            if self.api == "responses":
+                return await self._acall_responses(
+                    messages=formatted_messages,
+                    tools=tools,
                     available_functions=available_functions,
                     from_task=from_task,
                     from_agent=from_agent,
                     response_model=response_model,
                 )
 
-            return await self._ahandle_completion(
-                params=completion_params,
+            return await self._acall_completions(
+                messages=formatted_messages,
+                tools=tools,
                 available_functions=available_functions,
                 from_task=from_task,
                 from_agent=from_agent,
@@ -287,6 +518,926 @@ class OpenAICompletion(BaseLLM):
             )
             raise
 
+    async def _acall_completions(
+        self,
+        messages: list[LLMMessage],
+        tools: list[dict[str, BaseTool]] | None = None,
+        available_functions: dict[str, Any] | None = None,
+        from_task: Task | None = None,
+        from_agent: Agent | None = None,
+        response_model: type[BaseModel] | None = None,
+    ) -> str | Any:
+        """Async call to OpenAI Chat Completions API."""
+        completion_params = self._prepare_completion_params(
+            messages=messages, tools=tools
+        )
+
+        if self.stream:
+            return await self._ahandle_streaming_completion(
+                params=completion_params,
+                available_functions=available_functions,
+                from_task=from_task,
+                from_agent=from_agent,
+                response_model=response_model,
+            )
+
+        return await self._ahandle_completion(
+            params=completion_params,
+            available_functions=available_functions,
+            from_task=from_task,
+            from_agent=from_agent,
+            response_model=response_model,
+        )
+
+    def _call_responses(
+        self,
+        messages: list[LLMMessage],
+        tools: list[dict[str, BaseTool]] | None = None,
+        available_functions: dict[str, Any] | None = None,
+        from_task: Task | None = None,
+        from_agent: Agent | None = None,
+        response_model: type[BaseModel] | None = None,
+    ) -> str | Any:
+        """Call OpenAI Responses API."""
+        params = self._prepare_responses_params(
+            messages=messages, tools=tools, response_model=response_model
+        )
+
+        if self.stream:
+            return self._handle_streaming_responses(
+                params=params,
+                available_functions=available_functions,
+                from_task=from_task,
+                from_agent=from_agent,
+                response_model=response_model,
+            )
+
+        return self._handle_responses(
+            params=params,
+            available_functions=available_functions,
+            from_task=from_task,
+            from_agent=from_agent,
+            response_model=response_model,
+        )
+
+    async def _acall_responses(
+        self,
+        messages: list[LLMMessage],
+        tools: list[dict[str, BaseTool]] | None = None,
+        available_functions: dict[str, Any] | None = None,
+        from_task: Task | None = None,
+        from_agent: Agent | None = None,
+        response_model: type[BaseModel] | None = None,
+    ) -> str | Any:
+        """Async call to OpenAI Responses API."""
+        params = self._prepare_responses_params(
+            messages=messages, tools=tools, response_model=response_model
+        )
+
+        if self.stream:
+            return await self._ahandle_streaming_responses(
+                params=params,
+                available_functions=available_functions,
+                from_task=from_task,
+                from_agent=from_agent,
+                response_model=response_model,
+            )
+
+        return await self._ahandle_responses(
+            params=params,
+            available_functions=available_functions,
+            from_task=from_task,
+            from_agent=from_agent,
+            response_model=response_model,
+        )
+
+    def _prepare_responses_params(
+        self,
+        messages: list[LLMMessage],
+        tools: list[dict[str, BaseTool]] | None = None,
+        response_model: type[BaseModel] | None = None,
+    ) -> dict[str, Any]:
+        """Prepare parameters for OpenAI Responses API.
+
+        The Responses API uses a different structure than Chat Completions:
+        - `input` instead of `messages`
+        - `instructions` for system-level guidance (extracted from system messages)
+        - `text.format` instead of `response_format` for structured outputs
+        - Internally-tagged tool format (flat structure)
+        """
+        instructions: str | None = self.instructions
+        input_messages: list[LLMMessage] = []
+
+        for message in messages:
+            if message.get("role") == "system":
+                content = message.get("content", "")
+                # System messages should always have string content
+                content_str = content if isinstance(content, str) else str(content)
+                if instructions:
+                    instructions = f"{instructions}\n\n{content_str}"
+                else:
+                    instructions = content_str
+            else:
+                input_messages.append(message)
+
+        # Prepare input with optional reasoning items for ZDR chaining
+        final_input: list[Any] = []
+        if self.auto_chain_reasoning and self._last_reasoning_items:
+            final_input.extend(self._last_reasoning_items)
+        final_input.extend(input_messages if input_messages else messages)
+
+        params: dict[str, Any] = {
+            "model": self.model,
+            "input": final_input,
+        }
+
+        if instructions:
+            params["instructions"] = instructions
+
+        if self.stream:
+            params["stream"] = True
+
+        if self.store is not None:
+            params["store"] = self.store
+
+        # Handle response chaining: explicit previous_response_id takes precedence
+        if self.previous_response_id:
+            params["previous_response_id"] = self.previous_response_id
+        elif self.auto_chain and self._last_response_id:
+            params["previous_response_id"] = self._last_response_id
+
+        # Handle include parameter with auto_chain_reasoning support
+        include_items: list[str] = list(self.include) if self.include else []
+        if self.auto_chain_reasoning:
+            if "reasoning.encrypted_content" not in include_items:
+                include_items.append("reasoning.encrypted_content")
+        if include_items:
+            params["include"] = include_items
+
+        params.update(self.additional_params)
+
+        if self.temperature is not None:
+            params["temperature"] = self.temperature
+        if self.top_p is not None:
+            params["top_p"] = self.top_p
+        if self.max_completion_tokens is not None:
+            params["max_output_tokens"] = self.max_completion_tokens
+        elif self.max_tokens is not None:
+            params["max_output_tokens"] = self.max_tokens
+        if self.seed is not None:
+            params["seed"] = self.seed
+
+        if self.reasoning_effort:
+            params["reasoning"] = {"effort": self.reasoning_effort}
+
+        if response_model or self.response_format:
+            format_model = response_model or self.response_format
+            if isinstance(format_model, type) and issubclass(format_model, BaseModel):
+                schema = format_model.model_json_schema()
+                schema["additionalProperties"] = False
+                params["text"] = {
+                    "format": {
+                        "type": "json_schema",
+                        "name": format_model.__name__,
+                        "strict": True,
+                        "schema": schema,
+                    }
+                }
+            elif isinstance(format_model, dict):
+                params["text"] = {"format": format_model}
+
+        all_tools: list[dict[str, Any]] = []
+
+        if self.builtin_tools:
+            for tool_name in self.builtin_tools:
+                tool_type = self.BUILTIN_TOOL_TYPES.get(tool_name, tool_name)
+                all_tools.append({"type": tool_type})
+
+        if tools:
+            all_tools.extend(self._convert_tools_for_responses(tools))
+
+        if all_tools:
+            params["tools"] = all_tools
+
+        crewai_specific_params = {
+            "callbacks",
+            "available_functions",
+            "from_task",
+            "from_agent",
+            "provider",
+            "api_key",
+            "base_url",
+            "api_base",
+            "timeout",
+        }
+
+        return {k: v for k, v in params.items() if k not in crewai_specific_params}
+
+    def _convert_tools_for_responses(
+        self, tools: list[dict[str, BaseTool]]
+    ) -> list[dict[str, Any]]:
+        """Convert CrewAI tools to Responses API format.
+
+        Responses API uses internally-tagged format (flat structure):
+        {
+            "type": "function",
+            "name": "get_weather",
+            "description": "...",
+            "parameters": {...}
+        }
+
+        Unlike Chat Completions which uses externally-tagged:
+        {
+            "type": "function",
+            "function": {"name": "...", "description": "...", "parameters": {...}}
+        }
+        """
+        from crewai.llms.providers.utils.common import safe_tool_conversion
+
+        responses_tools = []
+
+        for tool in tools:
+            name, description, parameters = safe_tool_conversion(tool, "OpenAI")
+
+            responses_tool: dict[str, Any] = {
+                "type": "function",
+                "name": name,
+                "description": description,
+            }
+
+            if parameters:
+                if isinstance(parameters, dict):
+                    responses_tool["parameters"] = parameters
+                else:
+                    responses_tool["parameters"] = dict(parameters)
+
+            responses_tools.append(responses_tool)
+
+        return responses_tools
+
+    def _handle_responses(
+        self,
+        params: dict[str, Any],
+        available_functions: dict[str, Any] | None = None,
+        from_task: Any | None = None,
+        from_agent: Any | None = None,
+        response_model: type[BaseModel] | None = None,
+    ) -> str | ResponsesAPIResult | Any:
+        """Handle non-streaming Responses API call."""
+        try:
+            response: Response = self.client.responses.create(**params)
+
+            # Track response ID for auto-chaining
+            if self.auto_chain and response.id:
+                self._last_response_id = response.id
+
+            # Track reasoning items for ZDR auto-chaining
+            if self.auto_chain_reasoning:
+                reasoning_items = self._extract_reasoning_items(response)
+                if reasoning_items:
+                    self._last_reasoning_items = reasoning_items
+
+            usage = self._extract_responses_token_usage(response)
+            self._track_token_usage_internal(usage)
+
+            # If parse_tool_outputs is enabled, return structured result
+            if self.parse_tool_outputs:
+                parsed_result = self._extract_builtin_tool_outputs(response)
+                parsed_result.text = self._apply_stop_words(parsed_result.text)
+
+                self._emit_call_completed_event(
+                    response=parsed_result.text,
+                    call_type=LLMCallType.LLM_CALL,
+                    from_task=from_task,
+                    from_agent=from_agent,
+                    messages=params.get("input", []),
+                )
+
+                return parsed_result
+
+            function_calls = self._extract_function_calls_from_response(response)
+            if function_calls and not available_functions:
+                self._emit_call_completed_event(
+                    response=function_calls,
+                    call_type=LLMCallType.TOOL_CALL,
+                    from_task=from_task,
+                    from_agent=from_agent,
+                    messages=params.get("input", []),
+                )
+                return function_calls
+
+            if function_calls and available_functions:
+                for call in function_calls:
+                    function_name = call.get("name", "")
+                    function_args = call.get("arguments", {})
+                    if isinstance(function_args, str):
+                        try:
+                            function_args = json.loads(function_args)
+                        except json.JSONDecodeError:
+                            function_args = {}
+
+                    result = self._handle_tool_execution(
+                        function_name=function_name,
+                        function_args=function_args,
+                        available_functions=available_functions,
+                        from_task=from_task,
+                        from_agent=from_agent,
+                    )
+
+                    if result is not None:
+                        return result
+
+            content = response.output_text or ""
+            content = self._apply_stop_words(content)
+
+            if response_model:
+                try:
+                    structured_result = self._validate_structured_output(
+                        content, response_model
+                    )
+                    self._emit_call_completed_event(
+                        response=structured_result,
+                        call_type=LLMCallType.LLM_CALL,
+                        from_task=from_task,
+                        from_agent=from_agent,
+                        messages=params.get("input", []),
+                    )
+                    return structured_result
+                except ValueError as e:
+                    logging.warning(f"Structured output validation failed: {e}")
+
+            self._emit_call_completed_event(
+                response=content,
+                call_type=LLMCallType.LLM_CALL,
+                from_task=from_task,
+                from_agent=from_agent,
+                messages=params.get("input", []),
+            )
+
+            content = self._invoke_after_llm_call_hooks(
+                params.get("input", []), content, from_agent
+            )
+
+        except NotFoundError as e:
+            error_msg = f"Model {self.model} not found: {e}"
+            logging.error(error_msg)
+            self._emit_call_failed_event(
+                error=error_msg, from_task=from_task, from_agent=from_agent
+            )
+            raise ValueError(error_msg) from e
+        except APIConnectionError as e:
+            error_msg = f"Failed to connect to OpenAI API: {e}"
+            logging.error(error_msg)
+            self._emit_call_failed_event(
+                error=error_msg, from_task=from_task, from_agent=from_agent
+            )
+            raise ConnectionError(error_msg) from e
+        except Exception as e:
+            if is_context_length_exceeded(e):
+                logging.error(f"Context window exceeded: {e}")
+                raise LLMContextLengthExceededError(str(e)) from e
+
+            error_msg = f"OpenAI Responses API call failed: {e!s}"
+            logging.error(error_msg)
+            self._emit_call_failed_event(
+                error=error_msg, from_task=from_task, from_agent=from_agent
+            )
+            raise
+
+        return content
+
+    async def _ahandle_responses(
+        self,
+        params: dict[str, Any],
+        available_functions: dict[str, Any] | None = None,
+        from_task: Any | None = None,
+        from_agent: Any | None = None,
+        response_model: type[BaseModel] | None = None,
+    ) -> str | ResponsesAPIResult | Any:
+        """Handle async non-streaming Responses API call."""
+        try:
+            response: Response = await self.async_client.responses.create(**params)
+
+            # Track response ID for auto-chaining
+            if self.auto_chain and response.id:
+                self._last_response_id = response.id
+
+            # Track reasoning items for ZDR auto-chaining
+            if self.auto_chain_reasoning:
+                reasoning_items = self._extract_reasoning_items(response)
+                if reasoning_items:
+                    self._last_reasoning_items = reasoning_items
+
+            usage = self._extract_responses_token_usage(response)
+            self._track_token_usage_internal(usage)
+
+            # If parse_tool_outputs is enabled, return structured result
+            if self.parse_tool_outputs:
+                parsed_result = self._extract_builtin_tool_outputs(response)
+                parsed_result.text = self._apply_stop_words(parsed_result.text)
+
+                self._emit_call_completed_event(
+                    response=parsed_result.text,
+                    call_type=LLMCallType.LLM_CALL,
+                    from_task=from_task,
+                    from_agent=from_agent,
+                    messages=params.get("input", []),
+                )
+
+                return parsed_result
+
+            function_calls = self._extract_function_calls_from_response(response)
+            if function_calls and not available_functions:
+                self._emit_call_completed_event(
+                    response=function_calls,
+                    call_type=LLMCallType.TOOL_CALL,
+                    from_task=from_task,
+                    from_agent=from_agent,
+                    messages=params.get("input", []),
+                )
+                return function_calls
+
+            if function_calls and available_functions:
+                for call in function_calls:
+                    function_name = call.get("name", "")
+                    function_args = call.get("arguments", {})
+                    if isinstance(function_args, str):
+                        try:
+                            function_args = json.loads(function_args)
+                        except json.JSONDecodeError:
+                            function_args = {}
+
+                    result = self._handle_tool_execution(
+                        function_name=function_name,
+                        function_args=function_args,
+                        available_functions=available_functions,
+                        from_task=from_task,
+                        from_agent=from_agent,
+                    )
+
+                    if result is not None:
+                        return result
+
+            content = response.output_text or ""
+            content = self._apply_stop_words(content)
+
+            if response_model:
+                try:
+                    structured_result = self._validate_structured_output(
+                        content, response_model
+                    )
+                    self._emit_call_completed_event(
+                        response=structured_result,
+                        call_type=LLMCallType.LLM_CALL,
+                        from_task=from_task,
+                        from_agent=from_agent,
+                        messages=params.get("input", []),
+                    )
+                    return structured_result
+                except ValueError as e:
+                    logging.warning(f"Structured output validation failed: {e}")
+
+            self._emit_call_completed_event(
+                response=content,
+                call_type=LLMCallType.LLM_CALL,
+                from_task=from_task,
+                from_agent=from_agent,
+                messages=params.get("input", []),
+            )
+
+        except NotFoundError as e:
+            error_msg = f"Model {self.model} not found: {e}"
+            logging.error(error_msg)
+            self._emit_call_failed_event(
+                error=error_msg, from_task=from_task, from_agent=from_agent
+            )
+            raise ValueError(error_msg) from e
+        except APIConnectionError as e:
+            error_msg = f"Failed to connect to OpenAI API: {e}"
+            logging.error(error_msg)
+            self._emit_call_failed_event(
+                error=error_msg, from_task=from_task, from_agent=from_agent
+            )
+            raise ConnectionError(error_msg) from e
+        except Exception as e:
+            if is_context_length_exceeded(e):
+                logging.error(f"Context window exceeded: {e}")
+                raise LLMContextLengthExceededError(str(e)) from e
+
+            error_msg = f"OpenAI Responses API call failed: {e!s}"
+            logging.error(error_msg)
+            self._emit_call_failed_event(
+                error=error_msg, from_task=from_task, from_agent=from_agent
+            )
+            raise
+
+        return content
+
+    def _handle_streaming_responses(
+        self,
+        params: dict[str, Any],
+        available_functions: dict[str, Any] | None = None,
+        from_task: Any | None = None,
+        from_agent: Any | None = None,
+        response_model: type[BaseModel] | None = None,
+    ) -> str | ResponsesAPIResult | Any:
+        """Handle streaming Responses API call."""
+        full_response = ""
+        function_calls: list[dict[str, Any]] = []
+        final_response: Response | None = None
+
+        stream = self.client.responses.create(**params)
+
+        for event in stream:
+            if event.type == "response.output_text.delta":
+                delta_text = event.delta or ""
+                full_response += delta_text
+                self._emit_stream_chunk_event(
+                    chunk=delta_text,
+                    from_task=from_task,
+                    from_agent=from_agent,
+                )
+
+            elif event.type == "response.function_call_arguments.delta":
+                pass
+
+            elif event.type == "response.output_item.done":
+                item = event.item
+                if item.type == "function_call":
+                    function_calls.append(
+                        {
+                            "id": item.call_id,
+                            "name": item.name,
+                            "arguments": item.arguments,
+                        }
+                    )
+
+            elif event.type == "response.completed":
+                final_response = event.response
+                # Track response ID for auto-chaining
+                if self.auto_chain and event.response and event.response.id:
+                    self._last_response_id = event.response.id
+                # Track reasoning items for ZDR auto-chaining
+                if self.auto_chain_reasoning and event.response:
+                    reasoning_items = self._extract_reasoning_items(event.response)
+                    if reasoning_items:
+                        self._last_reasoning_items = reasoning_items
+                if event.response and event.response.usage:
+                    usage = {
+                        "prompt_tokens": event.response.usage.input_tokens,
+                        "completion_tokens": event.response.usage.output_tokens,
+                        "total_tokens": event.response.usage.total_tokens,
+                    }
+                    self._track_token_usage_internal(usage)
+
+        # If parse_tool_outputs is enabled, return structured result
+        if self.parse_tool_outputs and final_response:
+            parsed_result = self._extract_builtin_tool_outputs(final_response)
+            parsed_result.text = self._apply_stop_words(parsed_result.text)
+
+            self._emit_call_completed_event(
+                response=parsed_result.text,
+                call_type=LLMCallType.LLM_CALL,
+                from_task=from_task,
+                from_agent=from_agent,
+                messages=params.get("input", []),
+            )
+
+            return parsed_result
+
+        if function_calls and available_functions:
+            for call in function_calls:
+                function_name = call.get("name", "")
+                function_args = call.get("arguments", {})
+                if isinstance(function_args, str):
+                    try:
+                        function_args = json.loads(function_args)
+                    except json.JSONDecodeError:
+                        function_args = {}
+
+                result = self._handle_tool_execution(
+                    function_name=function_name,
+                    function_args=function_args,
+                    available_functions=available_functions,
+                    from_task=from_task,
+                    from_agent=from_agent,
+                )
+
+                if result is not None:
+                    return result
+
+        full_response = self._apply_stop_words(full_response)
+
+        if response_model:
+            try:
+                structured_result = self._validate_structured_output(
+                    full_response, response_model
+                )
+                self._emit_call_completed_event(
+                    response=structured_result,
+                    call_type=LLMCallType.LLM_CALL,
+                    from_task=from_task,
+                    from_agent=from_agent,
+                    messages=params.get("input", []),
+                )
+                return structured_result
+            except ValueError as e:
+                logging.warning(f"Structured output validation failed: {e}")
+
+        self._emit_call_completed_event(
+            response=full_response,
+            call_type=LLMCallType.LLM_CALL,
+            from_task=from_task,
+            from_agent=from_agent,
+            messages=params.get("input", []),
+        )
+
+        return self._invoke_after_llm_call_hooks(
+            params.get("input", []), full_response, from_agent
+        )
+
+    async def _ahandle_streaming_responses(
+        self,
+        params: dict[str, Any],
+        available_functions: dict[str, Any] | None = None,
+        from_task: Any | None = None,
+        from_agent: Any | None = None,
+        response_model: type[BaseModel] | None = None,
+    ) -> str | ResponsesAPIResult | Any:
+        """Handle async streaming Responses API call."""
+        full_response = ""
+        function_calls: list[dict[str, Any]] = []
+        final_response: Response | None = None
+
+        stream = await self.async_client.responses.create(**params)
+
+        async for event in stream:
+            if event.type == "response.output_text.delta":
+                delta_text = event.delta or ""
+                full_response += delta_text
+                self._emit_stream_chunk_event(
+                    chunk=delta_text,
+                    from_task=from_task,
+                    from_agent=from_agent,
+                )
+
+            elif event.type == "response.function_call_arguments.delta":
+                pass
+
+            elif event.type == "response.output_item.done":
+                item = event.item
+                if item.type == "function_call":
+                    function_calls.append(
+                        {
+                            "id": item.call_id,
+                            "name": item.name,
+                            "arguments": item.arguments,
+                        }
+                    )
+
+            elif event.type == "response.completed":
+                final_response = event.response
+                # Track response ID for auto-chaining
+                if self.auto_chain and event.response and event.response.id:
+                    self._last_response_id = event.response.id
+                # Track reasoning items for ZDR auto-chaining
+                if self.auto_chain_reasoning and event.response:
+                    reasoning_items = self._extract_reasoning_items(event.response)
+                    if reasoning_items:
+                        self._last_reasoning_items = reasoning_items
+                if event.response and event.response.usage:
+                    usage = {
+                        "prompt_tokens": event.response.usage.input_tokens,
+                        "completion_tokens": event.response.usage.output_tokens,
+                        "total_tokens": event.response.usage.total_tokens,
+                    }
+                    self._track_token_usage_internal(usage)
+
+        # If parse_tool_outputs is enabled, return structured result
+        if self.parse_tool_outputs and final_response:
+            parsed_result = self._extract_builtin_tool_outputs(final_response)
+            parsed_result.text = self._apply_stop_words(parsed_result.text)
+
+            self._emit_call_completed_event(
+                response=parsed_result.text,
+                call_type=LLMCallType.LLM_CALL,
+                from_task=from_task,
+                from_agent=from_agent,
+                messages=params.get("input", []),
+            )
+
+            return parsed_result
+
+        if function_calls and available_functions:
+            for call in function_calls:
+                function_name = call.get("name", "")
+                function_args = call.get("arguments", {})
+                if isinstance(function_args, str):
+                    try:
+                        function_args = json.loads(function_args)
+                    except json.JSONDecodeError:
+                        function_args = {}
+
+                result = self._handle_tool_execution(
+                    function_name=function_name,
+                    function_args=function_args,
+                    available_functions=available_functions,
+                    from_task=from_task,
+                    from_agent=from_agent,
+                )
+
+                if result is not None:
+                    return result
+
+        full_response = self._apply_stop_words(full_response)
+
+        if response_model:
+            try:
+                structured_result = self._validate_structured_output(
+                    full_response, response_model
+                )
+                self._emit_call_completed_event(
+                    response=structured_result,
+                    call_type=LLMCallType.LLM_CALL,
+                    from_task=from_task,
+                    from_agent=from_agent,
+                    messages=params.get("input", []),
+                )
+                return structured_result
+            except ValueError as e:
+                logging.warning(f"Structured output validation failed: {e}")
+
+        self._emit_call_completed_event(
+            response=full_response,
+            call_type=LLMCallType.LLM_CALL,
+            from_task=from_task,
+            from_agent=from_agent,
+            messages=params.get("input", []),
+        )
+
+        return full_response
+
+    def _extract_function_calls_from_response(
+        self, response: Response
+    ) -> list[dict[str, Any]]:
+        """Extract function calls from Responses API output."""
+        return [
+            {
+                "id": item.call_id,
+                "name": item.name,
+                "arguments": item.arguments,
+            }
+            for item in response.output
+            if item.type == "function_call"
+        ]
+
+    def _extract_responses_token_usage(self, response: Response) -> dict[str, Any]:
+        """Extract token usage from Responses API response."""
+        if response.usage:
+            return {
+                "prompt_tokens": response.usage.input_tokens,
+                "completion_tokens": response.usage.output_tokens,
+                "total_tokens": response.usage.total_tokens,
+            }
+        return {"total_tokens": 0}
+
+    def _extract_builtin_tool_outputs(self, response: Response) -> ResponsesAPIResult:
+        """Extract and parse all built-in tool outputs from Responses API.
+
+        Parses web_search, file_search, code_interpreter, computer_use,
+        and reasoning outputs into structured types.
+
+        Args:
+            response: The OpenAI Response object.
+
+        Returns:
+            ResponsesAPIResult containing parsed outputs.
+        """
+        result = ResponsesAPIResult(
+            text=response.output_text or "",
+            response_id=response.id,
+        )
+
+        for item in response.output:
+            item_type = item.type
+
+            if item_type == "web_search_call":
+                result.web_search_results.append(
+                    WebSearchResult(
+                        id=item.id,
+                        status=item.status,  # type: ignore[union-attr]
+                        type=item_type,
+                    )
+                )
+
+            elif item_type == "file_search_call":
+                file_results: list[FileSearchResultItem] = (
+                    [
+                        FileSearchResultItem(
+                            file_id=r.file_id,  # type: ignore[union-attr]
+                            filename=r.filename,  # type: ignore[union-attr]
+                            text=r.text,  # type: ignore[union-attr]
+                            score=r.score,  # type: ignore[union-attr]
+                            attributes=r.attributes,  # type: ignore[union-attr]
+                        )
+                        for r in item.results  # type: ignore[union-attr]
+                    ]
+                    if item.results  # type: ignore[union-attr]
+                    else []
+                )
+                result.file_search_results.append(
+                    FileSearchResult(
+                        id=item.id,
+                        status=item.status,  # type: ignore[union-attr]
+                        type=item_type,
+                        queries=list(item.queries),  # type: ignore[union-attr]
+                        results=file_results,
+                    )
+                )
+
+            elif item_type == "code_interpreter_call":
+                code_results: list[
+                    CodeInterpreterLogResult | CodeInterpreterFileResult
+                ] = []
+                for r in item.results:  # type: ignore[union-attr]
+                    if r.type == "logs":  # type: ignore[union-attr]
+                        code_results.append(
+                            CodeInterpreterLogResult(type="logs", logs=r.logs)  # type: ignore[union-attr]
+                        )
+                    elif r.type == "files":  # type: ignore[union-attr]
+                        files_data = [
+                            {"file_id": f.file_id, "mime_type": f.mime_type}
+                            for f in r.files  # type: ignore[union-attr]
+                        ]
+                        code_results.append(
+                            CodeInterpreterFileResult(type="files", files=files_data)
+                        )
+                result.code_interpreter_results.append(
+                    CodeInterpreterResult(
+                        id=item.id,
+                        status=item.status,  # type: ignore[union-attr]
+                        type=item_type,
+                        code=item.code,  # type: ignore[union-attr]
+                        container_id=item.container_id,  # type: ignore[union-attr]
+                        results=code_results,
+                    )
+                )
+
+            elif item_type == "computer_call":
+                action_dict = item.action.model_dump() if item.action else {}  # type: ignore[union-attr]
+                safety_checks = [
+                    {"id": c.id, "code": c.code, "message": c.message}
+                    for c in item.pending_safety_checks  # type: ignore[union-attr]
+                ]
+                result.computer_use_results.append(
+                    ComputerUseResult(
+                        id=item.id,
+                        status=item.status,  # type: ignore[union-attr]
+                        type=item_type,
+                        call_id=item.call_id,  # type: ignore[union-attr]
+                        action=action_dict,
+                        pending_safety_checks=safety_checks,
+                    )
+                )
+
+            elif item_type == "reasoning":
+                summaries = [{"type": s.type, "text": s.text} for s in item.summary]  # type: ignore[union-attr]
+                result.reasoning_summaries.append(
+                    ReasoningSummary(
+                        id=item.id,
+                        status=item.status,  # type: ignore[union-attr]
+                        type=item_type,
+                        summary=summaries,
+                        encrypted_content=item.encrypted_content,  # type: ignore[union-attr]
+                    )
+                )
+
+            elif item_type == "function_call":
+                result.function_calls.append(
+                    {
+                        "id": item.call_id,  # type: ignore[union-attr]
+                        "name": item.name,  # type: ignore[union-attr]
+                        "arguments": item.arguments,  # type: ignore[union-attr]
+                    }
+                )
+
+        return result
+
+    def _extract_reasoning_items(self, response: Response) -> list[Any]:
+        """Extract reasoning items with encrypted content from response.
+
+        Used for ZDR (Zero Data Retention) compliance to capture encrypted
+        reasoning tokens that can be passed back in subsequent requests.
+
+        Args:
+            response: The OpenAI Response object.
+
+        Returns:
+            List of reasoning items from the response output that have
+            encrypted_content, suitable for passing back in future requests.
+        """
+        return [item for item in response.output if item.type == "reasoning"]
+
     def _prepare_completion_params(
         self, messages: list[LLMMessage], tools: list[dict[str, BaseTool]] | None = None
     ) -> dict[str, Any]:
@@ -1031,6 +2182,9 @@ class OpenAICompletion(BaseLLM):
             "gpt-4.1": 1047576,
             "gpt-4.1-mini-2025-04-14": 1047576,
             "gpt-4.1-nano-2025-04-14": 1047576,
+            "gpt-5": 1047576,
+            "gpt-5-mini": 1047576,
+            "gpt-5-nano": 1047576,
             "o1-preview": 128000,
             "o1-mini": 128000,
             "o3-mini": 200000,
@@ -1078,7 +2232,7 @@ class OpenAICompletion(BaseLLM):
     def supports_multimodal(self) -> bool:
         """Check if the model supports multimodal inputs.
 
-        OpenAI vision-enabled models include GPT-4o, GPT-4.1, and o-series.
+        OpenAI vision-enabled models include GPT-4o, GPT-4.1, GPT-5, and o-series.
 
         Returns:
             True if the model supports images.
@@ -1088,6 +2242,7 @@ class OpenAICompletion(BaseLLM):
             "gpt-4.1",
             "gpt-4-turbo",
             "gpt-4-vision",
+            "gpt-5",
             "o1",
             "o3",
             "o4",
diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_is_default_provider_without_explicit_llm_set_on_agent.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_is_default_provider_without_explicit_llm_set_on_agent.yaml
index 21516b4a0..055315612 100644
--- a/lib/crewai/tests/cassettes/llms/openai/test_openai_is_default_provider_without_explicit_llm_set_on_agent.yaml
+++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_is_default_provider_without_explicit_llm_set_on_agent.yaml
@@ -1,103 +1,363 @@
 interactions:
 - request:
-    body: '{"messages": [{"role": "system", "content": "You are Research Assistant. You are a helpful research assistant.\nYour personal goal is: Find information about the population of Tokyo\nTo give my best complete final answer to the task respond using the exact following format:\n\nThought: I now can give a great answer\nFinal Answer: Your final answer must be the great and the most complete as possible, it must be outcome described.\n\nI MUST use these formats, my job depends on it!"}, {"role": "user", "content": "\nCurrent Task: Find information about the population of Tokyo\n\nThis is the expected criteria for your final answer: The population of Tokyo is 10 million\nyou MUST return the actual complete content as the final answer, not a summary.\n\nBegin! This is VERY important to you, use the tools available and give your best Final Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stream": false}'
+    body: '{"messages":[{"role":"system","content":"You are Research Assistant. You
+      are a helpful research assistant.\nYour personal goal is: Find information about
+      the population of Tokyo\nTo give my best complete final answer to the task respond
+      using the exact following format:\n\nThought: I now can give a great answer\nFinal
+      Answer: Your final answer must be the great and the most complete as possible,
+      it must be outcome described.\n\nI MUST use these formats, my job depends on
+      it!"},{"role":"user","content":"\nCurrent Task: Find information about the population
+      of Tokyo\n\nThis is the expected criteria for your final answer: The population
+      of Tokyo is 10 million\nyou MUST return the actual complete content as the final
+      answer, not a summary.\n\nBegin! This is VERY important to you, use the tools
+      available and give your best Final Answer, your job depends on it!\n\nThought:"}],"model":"gpt-4.1-mini"}'
     headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
       accept:
       - application/json
       accept-encoding:
-      - gzip, deflate
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
       connection:
       - keep-alive
       content-length:
-      - '932'
+      - '906'
       content-type:
       - application/json
       host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.109.1
+      - crewai-azure-openai.openai.azure.com
       x-stainless-arch:
-      - arm64
+      - X-STAINLESS-ARCH-XXX
       x-stainless-async:
       - 'false'
       x-stainless-lang:
       - python
       x-stainless-os:
-      - MacOS
+      - X-STAINLESS-OS-XXX
       x-stainless-package-version:
-      - 1.109.1
+      - 1.83.0
       x-stainless-read-timeout:
-      - '600'
+      - X-STAINLESS-READ-TIMEOUT-XXX
       x-stainless-retry-count:
       - '0'
       x-stainless-runtime:
       - CPython
       x-stainless-runtime-version:
-      - 3.13.3
+      - 3.12.10
+    method: POST
+    uri: https://fake-azure-endpoint.openai.azure.com/chat/completions
+  response:
+    body:
+      string: '{"error":{"code":"404","message": "Resource not found"}}'
+    headers:
+      Content-Length:
+      - '56'
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 23 Jan 2026 05:53:31 GMT
+      Strict-Transport-Security:
+      - STS-XXX
+      apim-request-id:
+      - APIM-REQUEST-ID-XXX
+      x-content-type-options:
+      - X-CONTENT-TYPE-XXX
+    status:
+      code: 404
+      message: Resource Not Found
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Research Assistant. You
+      are a helpful research assistant.\nYour personal goal is: Find information about
+      the population of Tokyo\nTo give my best complete final answer to the task respond
+      using the exact following format:\n\nThought: I now can give a great answer\nFinal
+      Answer: Your final answer must be the great and the most complete as possible,
+      it must be outcome described.\n\nI MUST use these formats, my job depends on
+      it!"},{"role":"user","content":"\nCurrent Task: Find information about the population
+      of Tokyo\n\nThis is the expected criteria for your final answer: The population
+      of Tokyo is 10 million\nyou MUST return the actual complete content as the final
+      answer, not a summary.\n\nBegin! This is VERY important to you, use the tools
+      available and give your best Final Answer, your job depends on it!\n\nThought:"},{"role":"system","content":"You
+      are Research Assistant. You are a helpful research assistant.\nYour personal
+      goal is: Find information about the population of Tokyo\nTo give my best complete
+      final answer to the task respond using the exact following format:\n\nThought:
+      I now can give a great answer\nFinal Answer: Your final answer must be the great
+      and the most complete as possible, it must be outcome described.\n\nI MUST use
+      these formats, my job depends on it!"},{"role":"user","content":"\nCurrent Task:
+      Find information about the population of Tokyo\n\nThis is the expected criteria
+      for your final answer: The population of Tokyo is 10 million\nyou MUST return
+      the actual complete content as the final answer, not a summary.\n\nBegin! This
+      is VERY important to you, use the tools available and give your best Final Answer,
+      your job depends on it!\n\nThought:"}],"model":"gpt-4.1-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '1775'
+      content-type:
+      - application/json
+      host:
+      - crewai-azure-openai.openai.azure.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.10
+    method: POST
+    uri: https://fake-azure-endpoint.openai.azure.com/chat/completions
+  response:
+    body:
+      string: '{"error":{"code":"404","message": "Resource not found"}}'
+    headers:
+      Content-Length:
+      - '56'
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 23 Jan 2026 05:53:31 GMT
+      Strict-Transport-Security:
+      - STS-XXX
+      apim-request-id:
+      - APIM-REQUEST-ID-XXX
+      x-content-type-options:
+      - X-CONTENT-TYPE-XXX
+    status:
+      code: 404
+      message: Resource Not Found
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Research Assistant. You
+      are a helpful research assistant.\nYour personal goal is: Find information about
+      the population of Tokyo\nTo give my best complete final answer to the task respond
+      using the exact following format:\n\nThought: I now can give a great answer\nFinal
+      Answer: Your final answer must be the great and the most complete as possible,
+      it must be outcome described.\n\nI MUST use these formats, my job depends on
+      it!"},{"role":"user","content":"\nCurrent Task: Find information about the population
+      of Tokyo\n\nThis is the expected criteria for your final answer: The population
+      of Tokyo is 10 million\nyou MUST return the actual complete content as the final
+      answer, not a summary.\n\nBegin! This is VERY important to you, use the tools
+      available and give your best Final Answer, your job depends on it!\n\nThought:"},{"role":"system","content":"You
+      are Research Assistant. You are a helpful research assistant.\nYour personal
+      goal is: Find information about the population of Tokyo\nTo give my best complete
+      final answer to the task respond using the exact following format:\n\nThought:
+      I now can give a great answer\nFinal Answer: Your final answer must be the great
+      and the most complete as possible, it must be outcome described.\n\nI MUST use
+      these formats, my job depends on it!"},{"role":"user","content":"\nCurrent Task:
+      Find information about the population of Tokyo\n\nThis is the expected criteria
+      for your final answer: The population of Tokyo is 10 million\nyou MUST return
+      the actual complete content as the final answer, not a summary.\n\nBegin! This
+      is VERY important to you, use the tools available and give your best Final Answer,
+      your job depends on it!\n\nThought:"},{"role":"system","content":"You are Research
+      Assistant. You are a helpful research assistant.\nYour personal goal is: Find
+      information about the population of Tokyo\nTo give my best complete final answer
+      to the task respond using the exact following format:\n\nThought: I now can
+      give a great answer\nFinal Answer: Your final answer must be the great and the
+      most complete as possible, it must be outcome described.\n\nI MUST use these
+      formats, my job depends on it!"},{"role":"user","content":"\nCurrent Task: Find
+      information about the population of Tokyo\n\nThis is the expected criteria for
+      your final answer: The population of Tokyo is 10 million\nyou MUST return the
+      actual complete content as the final answer, not a summary.\n\nBegin! This is
+      VERY important to you, use the tools available and give your best Final Answer,
+      your job depends on it!\n\nThought:"}],"model":"gpt-4.1-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '2644'
+      content-type:
+      - application/json
+      host:
+      - crewai-azure-openai.openai.azure.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.10
+    method: POST
+    uri: https://fake-azure-endpoint.openai.azure.com/chat/completions
+  response:
+    body:
+      string: '{"error":{"code":"404","message": "Resource not found"}}'
+    headers:
+      Content-Length:
+      - '56'
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 23 Jan 2026 05:53:31 GMT
+      Strict-Transport-Security:
+      - STS-XXX
+      apim-request-id:
+      - APIM-REQUEST-ID-XXX
+      x-content-type-options:
+      - X-CONTENT-TYPE-XXX
+    status:
+      code: 404
+      message: Resource Not Found
+- request:
+    body: '{"messages":[{"role":"system","content":"You are Research Assistant. You
+      are a helpful research assistant.\nYour personal goal is: Find information about
+      the population of Tokyo\nTo give my best complete final answer to the task respond
+      using the exact following format:\n\nThought: I now can give a great answer\nFinal
+      Answer: Your final answer must be the great and the most complete as possible,
+      it must be outcome described.\n\nI MUST use these formats, my job depends on
+      it!"},{"role":"user","content":"\nCurrent Task: Find information about the population
+      of Tokyo\n\nThis is the expected criteria for your final answer: The population
+      of Tokyo is 10 million\nyou MUST return the actual complete content as the final
+      answer, not a summary.\n\nBegin! This is VERY important to you, use the tools
+      available and give your best Final Answer, your job depends on it!\n\nThought:"}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '905'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.10
     method: POST
     uri: https://api.openai.com/v1/chat/completions
   response:
     body:
-      string: "{\n  \"id\": \"chatcmpl-CQQ5CBZ2V7R7cHju9WwEXAzZlAz4i\",\n  \"object\": \"chat.completion\",\n  \"created\": 1760412826,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n  \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\": \"assistant\",\n        \"content\": \"I now can give a great answer  \\nFinal Answer: As of my last update in October 2023, the population of Tokyo is approximately 14 million people in the central area, while the Greater Tokyo Area, which includes surrounding prefectures, has a staggering population of over 37 million, making it the most populous metropolitan area in the world. The city of Tokyo itself is renowned for its vibrant culture, diverse economy, and significant global influence. The population figures may fluctuate due to various factors such as migration, urbanization, and demographic trends, but generally, it remains one of the largest urban agglomerations globally.\",\n        \"refusal\": null,\n        \"annotations\"\
-        : []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 173,\n    \"completion_tokens\": 125,\n    \"total_tokens\": 298,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\": 0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\": \"default\",\n  \"system_fingerprint\": \"fp_560af6e559\"\n}\n"
+      string: "{\n  \"id\": \"chatcmpl-D14RS0FWG5BiSFl5yB0HiQOqcGsB3\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1769147774,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"I now can give a great answer  \\nFinal
+        Answer: As of October 2023, the population of Tokyo is approximately 14 million
+        people in the 23 special wards area, and around 37 million people in the Greater
+        Tokyo Area, making it one of the most populous metropolitan areas in the world.
+        Tokyo is renowned for its diverse neighborhoods, rich cultural heritage, and
+        dynamic economy. The population density in the city is exceptionally high,
+        leading to efficient public transport systems and urban infrastructure. Population
+        fluctuations occur due to various factors, including birth rates, migration,
+        and economic opportunities. The city continues to be a global hub for business,
+        technology, and tourism, attracting people from all over the world.\",\n        \"refusal\":
+        null,\n        \"annotations\": []\n      },\n      \"logprobs\": null,\n
+        \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        173,\n    \"completion_tokens\": 139,\n    \"total_tokens\": 312,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_29330a9688\"\n}\n"
     headers:
       CF-RAY:
-      - 98e404605874fad2-SJC
+      - CF-RAY-XXX
       Connection:
       - keep-alive
       Content-Type:
       - application/json
       Date:
-      - Tue, 14 Oct 2025 03:33:48 GMT
+      - Fri, 23 Jan 2026 05:56:17 GMT
       Server:
       - cloudflare
       Set-Cookie:
-      - __cf_bm=o5Vy5q.qstP73vjTrIb7GX6EjMltWq26Vk1ctm8rrcQ-1760412828-1.0.1.1-6PmDQhWH5.60C02WBN9ENJiBEZ0hYXY1YJ6TKxTAflRETSCaMVA2j1.xE2KPFpUrsSsmbkopxQ1p2NYmLzuRy08dingIYyz5HZGz8ghl.nM; path=/; expires=Tue, 14-Oct-25 04:03:48 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      - _cfuvid=TkrzMwZH3VZy7i4ED_kVxlx4MUrHeXnluoFfmeqTT2w-1760412828927-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      - SET-COOKIE-XXX
       Strict-Transport-Security:
-      - max-age=31536000; includeSubDomains; preload
+      - STS-XXX
       Transfer-Encoding:
       - chunked
       X-Content-Type-Options:
-      - nosniff
+      - X-CONTENT-TYPE-XXX
       access-control-expose-headers:
-      - X-Request-ID
+      - ACCESS-CONTROL-XXX
       alt-svc:
       - h3=":443"; ma=86400
       cf-cache-status:
       - DYNAMIC
       openai-organization:
-      - crewai-iuxna1
+      - OPENAI-ORG-XXX
       openai-processing-ms:
-      - '2644'
+      - '2984'
       openai-project:
-      - proj_xitITlrFeen7zjNSzML82h9x
+      - OPENAI-PROJECT-XXX
       openai-version:
       - '2020-10-01'
       x-envoy-upstream-service-time:
-      - '2793'
+      - '3225'
       x-openai-proxy-wasm:
       - v0.1
-      x-ratelimit-limit-project-tokens:
-      - '150000000'
       x-ratelimit-limit-requests:
-      - '30000'
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
       x-ratelimit-limit-tokens:
-      - '150000000'
-      x-ratelimit-remaining-project-tokens:
-      - '149999797'
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
       x-ratelimit-remaining-requests:
-      - '29999'
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
       x-ratelimit-remaining-tokens:
-      - '149999797'
-      x-ratelimit-reset-project-tokens:
-      - 0s
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
       x-ratelimit-reset-requests:
-      - 2ms
+      - X-RATELIMIT-RESET-REQUESTS-XXX
       x-ratelimit-reset-tokens:
-      - 0s
+      - X-RATELIMIT-RESET-TOKENS-XXX
       x-request-id:
-      - req_5c4fad6d3e4743d1a43ab65bd333b477
+      - X-REQUEST-ID-XXX
     status:
       code: 200
       message: OK
diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_auto_chain_integration.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_auto_chain_integration.yaml
new file mode 100644
index 000000000..d8d345047
--- /dev/null
+++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_auto_chain_integration.yaml
@@ -0,0 +1,229 @@
+interactions:
+- request:
+    body: '{"input":[{"role":"user","content":"My name is Alice. Remember this."}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '94'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.10
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: "{\n  \"id\": \"resp_0941b08f06efed9e00697312fcce9c819080f2ec731d0d34ed\",\n
+        \ \"object\": \"response\",\n  \"created_at\": 1769149180,\n  \"status\":
+        \"completed\",\n  \"background\": false,\n  \"billing\": {\n    \"payer\":
+        \"developer\"\n  },\n  \"completed_at\": 1769149181,\n  \"error\": null,\n
+        \ \"frequency_penalty\": 0.0,\n  \"incomplete_details\": null,\n  \"instructions\":
+        null,\n  \"max_output_tokens\": null,\n  \"max_tool_calls\": null,\n  \"model\":
+        \"gpt-4o-mini-2024-07-18\",\n  \"output\": [\n    {\n      \"id\": \"msg_0941b08f06efed9e00697312fd74bc8190811ee3d10ac0beca\",\n
+        \     \"type\": \"message\",\n      \"status\": \"completed\",\n      \"content\":
+        [\n        {\n          \"type\": \"output_text\",\n          \"annotations\":
+        [],\n          \"logprobs\": [],\n          \"text\": \"Got it, Alice! How
+        can I assist you today?\"\n        }\n      ],\n      \"role\": \"assistant\"\n
+        \   }\n  ],\n  \"parallel_tool_calls\": true,\n  \"presence_penalty\": 0.0,\n
+        \ \"previous_response_id\": null,\n  \"prompt_cache_key\": null,\n  \"prompt_cache_retention\":
+        null,\n  \"reasoning\": {\n    \"effort\": null,\n    \"summary\": null\n
+        \ },\n  \"safety_identifier\": null,\n  \"service_tier\": \"default\",\n  \"store\":
+        true,\n  \"temperature\": 1.0,\n  \"text\": {\n    \"format\": {\n      \"type\":
+        \"text\"\n    },\n    \"verbosity\": \"medium\"\n  },\n  \"tool_choice\":
+        \"auto\",\n  \"tools\": [],\n  \"top_logprobs\": 0,\n  \"top_p\": 1.0,\n  \"truncation\":
+        \"disabled\",\n  \"usage\": {\n    \"input_tokens\": 15,\n    \"input_tokens_details\":
+        {\n      \"cached_tokens\": 0\n    },\n    \"output_tokens\": 13,\n    \"output_tokens_details\":
+        {\n      \"reasoning_tokens\": 0\n    },\n    \"total_tokens\": 28\n  },\n
+        \ \"user\": null,\n  \"metadata\": {}\n}"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 23 Jan 2026 06:19:41 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - SET-COOKIE-XXX
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '875'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '878'
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"input":[{"role":"user","content":"What is my name?"}],"model":"gpt-4o-mini","previous_response_id":"resp_0941b08f06efed9e00697312fcce9c819080f2ec731d0d34ed"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '159'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.10
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: "{\n  \"id\": \"resp_0941b08f06efed9e00697312fdc3b88190b48287f703659623\",\n
+        \ \"object\": \"response\",\n  \"created_at\": 1769149181,\n  \"status\":
+        \"completed\",\n  \"background\": false,\n  \"billing\": {\n    \"payer\":
+        \"developer\"\n  },\n  \"completed_at\": 1769149182,\n  \"error\": null,\n
+        \ \"frequency_penalty\": 0.0,\n  \"incomplete_details\": null,\n  \"instructions\":
+        null,\n  \"max_output_tokens\": null,\n  \"max_tool_calls\": null,\n  \"model\":
+        \"gpt-4o-mini-2024-07-18\",\n  \"output\": [\n    {\n      \"id\": \"msg_0941b08f06efed9e00697312fe20348190a3b15a4bc2438e0c\",\n
+        \     \"type\": \"message\",\n      \"status\": \"completed\",\n      \"content\":
+        [\n        {\n          \"type\": \"output_text\",\n          \"annotations\":
+        [],\n          \"logprobs\": [],\n          \"text\": \"Your name is Alice.
+        How can I help you today?\"\n        }\n      ],\n      \"role\": \"assistant\"\n
+        \   }\n  ],\n  \"parallel_tool_calls\": true,\n  \"presence_penalty\": 0.0,\n
+        \ \"previous_response_id\": \"resp_0941b08f06efed9e00697312fcce9c819080f2ec731d0d34ed\",\n
+        \ \"prompt_cache_key\": null,\n  \"prompt_cache_retention\": null,\n  \"reasoning\":
+        {\n    \"effort\": null,\n    \"summary\": null\n  },\n  \"safety_identifier\":
+        null,\n  \"service_tier\": \"default\",\n  \"store\": true,\n  \"temperature\":
+        1.0,\n  \"text\": {\n    \"format\": {\n      \"type\": \"text\"\n    },\n
+        \   \"verbosity\": \"medium\"\n  },\n  \"tool_choice\": \"auto\",\n  \"tools\":
+        [],\n  \"top_logprobs\": 0,\n  \"top_p\": 1.0,\n  \"truncation\": \"disabled\",\n
+        \ \"usage\": {\n    \"input_tokens\": 40,\n    \"input_tokens_details\": {\n
+        \     \"cached_tokens\": 0\n    },\n    \"output_tokens\": 13,\n    \"output_tokens_details\":
+        {\n      \"reasoning_tokens\": 0\n    },\n    \"total_tokens\": 53\n  },\n
+        \ \"user\": null,\n  \"metadata\": {}\n}"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 23 Jan 2026 06:19:42 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '834'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '836'
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_auto_chain_with_reset.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_auto_chain_with_reset.yaml
new file mode 100644
index 000000000..2ef1419b7
--- /dev/null
+++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_auto_chain_with_reset.yaml
@@ -0,0 +1,230 @@
+interactions:
+- request:
+    body: '{"input":[{"role":"user","content":"My favorite color is blue."}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '88'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.10
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: "{\n  \"id\": \"resp_0694f37e374b8ed200697312febfa48190bd8aefeb776f98ab\",\n
+        \ \"object\": \"response\",\n  \"created_at\": 1769149182,\n  \"status\":
+        \"completed\",\n  \"background\": false,\n  \"billing\": {\n    \"payer\":
+        \"developer\"\n  },\n  \"completed_at\": 1769149183,\n  \"error\": null,\n
+        \ \"frequency_penalty\": 0.0,\n  \"incomplete_details\": null,\n  \"instructions\":
+        null,\n  \"max_output_tokens\": null,\n  \"max_tool_calls\": null,\n  \"model\":
+        \"gpt-4o-mini-2024-07-18\",\n  \"output\": [\n    {\n      \"id\": \"msg_0694f37e374b8ed200697312ff1720819097b11ea482439901\",\n
+        \     \"type\": \"message\",\n      \"status\": \"completed\",\n      \"content\":
+        [\n        {\n          \"type\": \"output_text\",\n          \"annotations\":
+        [],\n          \"logprobs\": [],\n          \"text\": \"Blue is a beautiful
+        color! It often represents calmness, tranquility, and stability. Do you have
+        a favorite shade of blue, like sky blue, navy, or turquoise?\"\n        }\n
+        \     ],\n      \"role\": \"assistant\"\n    }\n  ],\n  \"parallel_tool_calls\":
+        true,\n  \"presence_penalty\": 0.0,\n  \"previous_response_id\": null,\n  \"prompt_cache_key\":
+        null,\n  \"prompt_cache_retention\": null,\n  \"reasoning\": {\n    \"effort\":
+        null,\n    \"summary\": null\n  },\n  \"safety_identifier\": null,\n  \"service_tier\":
+        \"default\",\n  \"store\": true,\n  \"temperature\": 1.0,\n  \"text\": {\n
+        \   \"format\": {\n      \"type\": \"text\"\n    },\n    \"verbosity\": \"medium\"\n
+        \ },\n  \"tool_choice\": \"auto\",\n  \"tools\": [],\n  \"top_logprobs\":
+        0,\n  \"top_p\": 1.0,\n  \"truncation\": \"disabled\",\n  \"usage\": {\n    \"input_tokens\":
+        13,\n    \"input_tokens_details\": {\n      \"cached_tokens\": 0\n    },\n
+        \   \"output_tokens\": 36,\n    \"output_tokens_details\": {\n      \"reasoning_tokens\":
+        0\n    },\n    \"total_tokens\": 49\n  },\n  \"user\": null,\n  \"metadata\":
+        {}\n}"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 23 Jan 2026 06:19:43 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - SET-COOKIE-XXX
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '932'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '934'
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"input":[{"role":"user","content":"Hello!"}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '68'
+      content-type:
+      - application/json
+      cookie:
+      - COOKIE-XXX
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.10
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: "{\n  \"id\": \"resp_0858c6d9a191c7aa00697312ffc09881979333f4c1fd7fb3e7\",\n
+        \ \"object\": \"response\",\n  \"created_at\": 1769149183,\n  \"status\":
+        \"completed\",\n  \"background\": false,\n  \"billing\": {\n    \"payer\":
+        \"developer\"\n  },\n  \"completed_at\": 1769149184,\n  \"error\": null,\n
+        \ \"frequency_penalty\": 0.0,\n  \"incomplete_details\": null,\n  \"instructions\":
+        null,\n  \"max_output_tokens\": null,\n  \"max_tool_calls\": null,\n  \"model\":
+        \"gpt-4o-mini-2024-07-18\",\n  \"output\": [\n    {\n      \"id\": \"msg_0858c6d9a191c7aa006973130010288197a103879941455ea5\",\n
+        \     \"type\": \"message\",\n      \"status\": \"completed\",\n      \"content\":
+        [\n        {\n          \"type\": \"output_text\",\n          \"annotations\":
+        [],\n          \"logprobs\": [],\n          \"text\": \"Hello! How can I assist
+        you today?\"\n        }\n      ],\n      \"role\": \"assistant\"\n    }\n
+        \ ],\n  \"parallel_tool_calls\": true,\n  \"presence_penalty\": 0.0,\n  \"previous_response_id\":
+        null,\n  \"prompt_cache_key\": null,\n  \"prompt_cache_retention\": null,\n
+        \ \"reasoning\": {\n    \"effort\": null,\n    \"summary\": null\n  },\n  \"safety_identifier\":
+        null,\n  \"service_tier\": \"default\",\n  \"store\": true,\n  \"temperature\":
+        1.0,\n  \"text\": {\n    \"format\": {\n      \"type\": \"text\"\n    },\n
+        \   \"verbosity\": \"medium\"\n  },\n  \"tool_choice\": \"auto\",\n  \"tools\":
+        [],\n  \"top_logprobs\": 0,\n  \"top_p\": 1.0,\n  \"truncation\": \"disabled\",\n
+        \ \"usage\": {\n    \"input_tokens\": 9,\n    \"input_tokens_details\": {\n
+        \     \"cached_tokens\": 0\n    },\n    \"output_tokens\": 10,\n    \"output_tokens_details\":
+        {\n      \"reasoning_tokens\": 0\n    },\n    \"total_tokens\": 19\n  },\n
+        \ \"user\": null,\n  \"metadata\": {}\n}"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 23 Jan 2026 06:19:44 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '553'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '556'
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_basic_call.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_basic_call.yaml
new file mode 100644
index 000000000..b6503803c
--- /dev/null
+++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_basic_call.yaml
@@ -0,0 +1,118 @@
+interactions:
+- request:
+    body: '{"input":[{"role":"user","content":"What is 2 + 2? Answer with just the
+      number."}],"model":"gpt-4o-mini","instructions":"You are a helpful assistant.
+      Be concise."}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '163'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.10
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: "{\n  \"id\": \"resp_0cb795418d859a0c0069730cd9e4988195bf9d684fe6a8f839\",\n
+        \ \"object\": \"response\",\n  \"created_at\": 1769147609,\n  \"status\":
+        \"completed\",\n  \"background\": false,\n  \"billing\": {\n    \"payer\":
+        \"developer\"\n  },\n  \"completed_at\": 1769147610,\n  \"error\": null,\n
+        \ \"frequency_penalty\": 0.0,\n  \"incomplete_details\": null,\n  \"instructions\":
+        \"You are a helpful assistant. Be concise.\",\n  \"max_output_tokens\": null,\n
+        \ \"max_tool_calls\": null,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n  \"output\":
+        [\n    {\n      \"id\": \"msg_0cb795418d859a0c0069730cda35788195906a301e3b3cd3f5\",\n
+        \     \"type\": \"message\",\n      \"status\": \"completed\",\n      \"content\":
+        [\n        {\n          \"type\": \"output_text\",\n          \"annotations\":
+        [],\n          \"logprobs\": [],\n          \"text\": \"4\"\n        }\n      ],\n
+        \     \"role\": \"assistant\"\n    }\n  ],\n  \"parallel_tool_calls\": true,\n
+        \ \"presence_penalty\": 0.0,\n  \"previous_response_id\": null,\n  \"prompt_cache_key\":
+        null,\n  \"prompt_cache_retention\": null,\n  \"reasoning\": {\n    \"effort\":
+        null,\n    \"summary\": null\n  },\n  \"safety_identifier\": null,\n  \"service_tier\":
+        \"default\",\n  \"store\": true,\n  \"temperature\": 1.0,\n  \"text\": {\n
+        \   \"format\": {\n      \"type\": \"text\"\n    },\n    \"verbosity\": \"medium\"\n
+        \ },\n  \"tool_choice\": \"auto\",\n  \"tools\": [],\n  \"top_logprobs\":
+        0,\n  \"top_p\": 1.0,\n  \"truncation\": \"disabled\",\n  \"usage\": {\n    \"input_tokens\":
+        34,\n    \"input_tokens_details\": {\n      \"cached_tokens\": 0\n    },\n
+        \   \"output_tokens\": 2,\n    \"output_tokens_details\": {\n      \"reasoning_tokens\":
+        0\n    },\n    \"total_tokens\": 36\n  },\n  \"user\": null,\n  \"metadata\":
+        {}\n}"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 23 Jan 2026 05:53:30 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - SET-COOKIE-XXX
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '486'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '489'
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_parse_tool_outputs_basic_call.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_parse_tool_outputs_basic_call.yaml
new file mode 100644
index 000000000..bea2aeb70
--- /dev/null
+++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_parse_tool_outputs_basic_call.yaml
@@ -0,0 +1,115 @@
+interactions:
+- request:
+    body: '{"input":[{"role":"user","content":"Say hello in exactly 3 words."}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '91'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.10
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: "{\n  \"id\": \"resp_004fa988af496dce0069731150cad081979659131a7fe57fb4\",\n
+        \ \"object\": \"response\",\n  \"created_at\": 1769148752,\n  \"status\":
+        \"completed\",\n  \"background\": false,\n  \"billing\": {\n    \"payer\":
+        \"developer\"\n  },\n  \"completed_at\": 1769148753,\n  \"error\": null,\n
+        \ \"frequency_penalty\": 0.0,\n  \"incomplete_details\": null,\n  \"instructions\":
+        null,\n  \"max_output_tokens\": null,\n  \"max_tool_calls\": null,\n  \"model\":
+        \"gpt-4o-mini-2024-07-18\",\n  \"output\": [\n    {\n      \"id\": \"msg_004fa988af496dce006973115120dc8197872005ab71443ea5\",\n
+        \     \"type\": \"message\",\n      \"status\": \"completed\",\n      \"content\":
+        [\n        {\n          \"type\": \"output_text\",\n          \"annotations\":
+        [],\n          \"logprobs\": [],\n          \"text\": \"Hello there, friend!\"\n
+        \       }\n      ],\n      \"role\": \"assistant\"\n    }\n  ],\n  \"parallel_tool_calls\":
+        true,\n  \"presence_penalty\": 0.0,\n  \"previous_response_id\": null,\n  \"prompt_cache_key\":
+        null,\n  \"prompt_cache_retention\": null,\n  \"reasoning\": {\n    \"effort\":
+        null,\n    \"summary\": null\n  },\n  \"safety_identifier\": null,\n  \"service_tier\":
+        \"default\",\n  \"store\": true,\n  \"temperature\": 1.0,\n  \"text\": {\n
+        \   \"format\": {\n      \"type\": \"text\"\n    },\n    \"verbosity\": \"medium\"\n
+        \ },\n  \"tool_choice\": \"auto\",\n  \"tools\": [],\n  \"top_logprobs\":
+        0,\n  \"top_p\": 1.0,\n  \"truncation\": \"disabled\",\n  \"usage\": {\n    \"input_tokens\":
+        15,\n    \"input_tokens_details\": {\n      \"cached_tokens\": 0\n    },\n
+        \   \"output_tokens\": 6,\n    \"output_tokens_details\": {\n      \"reasoning_tokens\":
+        0\n    },\n    \"total_tokens\": 21\n  },\n  \"user\": null,\n  \"metadata\":
+        {}\n}"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 23 Jan 2026 06:12:33 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - SET-COOKIE-XXX
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '530'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '533'
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_returns_usage_metrics.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_returns_usage_metrics.yaml
new file mode 100644
index 000000000..bab994995
--- /dev/null
+++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_returns_usage_metrics.yaml
@@ -0,0 +1,115 @@
+interactions:
+- request:
+    body: '{"input":[{"role":"user","content":"Say hello"}],"model":"gpt-4o-mini"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '71'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.10
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: "{\n  \"id\": \"resp_0830504c7bf3e6c20069730cda854c81969d2fce8d9ddaf150\",\n
+        \ \"object\": \"response\",\n  \"created_at\": 1769147610,\n  \"status\":
+        \"completed\",\n  \"background\": false,\n  \"billing\": {\n    \"payer\":
+        \"developer\"\n  },\n  \"completed_at\": 1769147611,\n  \"error\": null,\n
+        \ \"frequency_penalty\": 0.0,\n  \"incomplete_details\": null,\n  \"instructions\":
+        null,\n  \"max_output_tokens\": null,\n  \"max_tool_calls\": null,\n  \"model\":
+        \"gpt-4o-mini-2024-07-18\",\n  \"output\": [\n    {\n      \"id\": \"msg_0830504c7bf3e6c20069730cdae3fc8196a46d92b4e3249bb5\",\n
+        \     \"type\": \"message\",\n      \"status\": \"completed\",\n      \"content\":
+        [\n        {\n          \"type\": \"output_text\",\n          \"annotations\":
+        [],\n          \"logprobs\": [],\n          \"text\": \"Hello! How can I assist
+        you today?\"\n        }\n      ],\n      \"role\": \"assistant\"\n    }\n
+        \ ],\n  \"parallel_tool_calls\": true,\n  \"presence_penalty\": 0.0,\n  \"previous_response_id\":
+        null,\n  \"prompt_cache_key\": null,\n  \"prompt_cache_retention\": null,\n
+        \ \"reasoning\": {\n    \"effort\": null,\n    \"summary\": null\n  },\n  \"safety_identifier\":
+        null,\n  \"service_tier\": \"default\",\n  \"store\": true,\n  \"temperature\":
+        1.0,\n  \"text\": {\n    \"format\": {\n      \"type\": \"text\"\n    },\n
+        \   \"verbosity\": \"medium\"\n  },\n  \"tool_choice\": \"auto\",\n  \"tools\":
+        [],\n  \"top_logprobs\": 0,\n  \"top_p\": 1.0,\n  \"truncation\": \"disabled\",\n
+        \ \"usage\": {\n    \"input_tokens\": 9,\n    \"input_tokens_details\": {\n
+        \     \"cached_tokens\": 0\n    },\n    \"output_tokens\": 10,\n    \"output_tokens_details\":
+        {\n      \"reasoning_tokens\": 0\n    },\n    \"total_tokens\": 19\n  },\n
+        \ \"user\": null,\n  \"metadata\": {}\n}"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 23 Jan 2026 05:53:31 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - SET-COOKIE-XXX
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '723'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '727'
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_streaming.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_streaming.yaml
new file mode 100644
index 000000000..651a77d33
--- /dev/null
+++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_streaming.yaml
@@ -0,0 +1,165 @@
+interactions:
+- request:
+    body: '{"input":[{"role":"user","content":"Count from 1 to 3, separated by commas."}],"model":"gpt-4o-mini","instructions":"Be
+      very concise.","stream":true}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '149'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.10
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: 'event: response.created
+
+        data: {"type":"response.created","response":{"id":"resp_025a72b78bd7093b0069730cdc05188195861094aa74743c7a","object":"response","created_at":1769147612,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":"Be
+        very concise.","max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":0}
+
+
+        event: response.in_progress
+
+        data: {"type":"response.in_progress","response":{"id":"resp_025a72b78bd7093b0069730cdc05188195861094aa74743c7a","object":"response","created_at":1769147612,"status":"in_progress","background":false,"completed_at":null,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":"Be
+        very concise.","max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"auto","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":null,"user":null,"metadata":{}},"sequence_number":1}
+
+
+        event: response.output_item.added
+
+        data: {"type":"response.output_item.added","item":{"id":"msg_025a72b78bd7093b0069730cdc45388195aecc8dc40afc23b5","type":"message","status":"in_progress","content":[],"role":"assistant"},"output_index":0,"sequence_number":2}
+
+
+        event: response.content_part.added
+
+        data: {"type":"response.content_part.added","content_index":0,"item_id":"msg_025a72b78bd7093b0069730cdc45388195aecc8dc40afc23b5","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":""},"sequence_number":3}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","content_index":0,"delta":"1","item_id":"msg_025a72b78bd7093b0069730cdc45388195aecc8dc40afc23b5","logprobs":[],"obfuscation":"HJJluOyapQpZ3rN","output_index":0,"sequence_number":4}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","content_index":0,"delta":",","item_id":"msg_025a72b78bd7093b0069730cdc45388195aecc8dc40afc23b5","logprobs":[],"obfuscation":"jlmF1GrSWVxpg7E","output_index":0,"sequence_number":5}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","content_index":0,"delta":" ","item_id":"msg_025a72b78bd7093b0069730cdc45388195aecc8dc40afc23b5","logprobs":[],"obfuscation":"6VGaQUute8jFvJL","output_index":0,"sequence_number":6}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","content_index":0,"delta":"2","item_id":"msg_025a72b78bd7093b0069730cdc45388195aecc8dc40afc23b5","logprobs":[],"obfuscation":"26OBDAHaX06A3tO","output_index":0,"sequence_number":7}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","content_index":0,"delta":",","item_id":"msg_025a72b78bd7093b0069730cdc45388195aecc8dc40afc23b5","logprobs":[],"obfuscation":"PXE29yQWZVNuFrG","output_index":0,"sequence_number":8}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","content_index":0,"delta":" ","item_id":"msg_025a72b78bd7093b0069730cdc45388195aecc8dc40afc23b5","logprobs":[],"obfuscation":"vqA9FbYuAGelvTT","output_index":0,"sequence_number":9}
+
+
+        event: response.output_text.delta
+
+        data: {"type":"response.output_text.delta","content_index":0,"delta":"3","item_id":"msg_025a72b78bd7093b0069730cdc45388195aecc8dc40afc23b5","logprobs":[],"obfuscation":"HociLl8grz5Y3Bk","output_index":0,"sequence_number":10}
+
+
+        event: response.output_text.done
+
+        data: {"type":"response.output_text.done","content_index":0,"item_id":"msg_025a72b78bd7093b0069730cdc45388195aecc8dc40afc23b5","logprobs":[],"output_index":0,"sequence_number":11,"text":"1,
+        2, 3"}
+
+
+        event: response.content_part.done
+
+        data: {"type":"response.content_part.done","content_index":0,"item_id":"msg_025a72b78bd7093b0069730cdc45388195aecc8dc40afc23b5","output_index":0,"part":{"type":"output_text","annotations":[],"logprobs":[],"text":"1,
+        2, 3"},"sequence_number":12}
+
+
+        event: response.output_item.done
+
+        data: {"type":"response.output_item.done","item":{"id":"msg_025a72b78bd7093b0069730cdc45388195aecc8dc40afc23b5","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"1,
+        2, 3"}],"role":"assistant"},"output_index":0,"sequence_number":13}
+
+
+        event: response.completed
+
+        data: {"type":"response.completed","response":{"id":"resp_025a72b78bd7093b0069730cdc05188195861094aa74743c7a","object":"response","created_at":1769147612,"status":"completed","background":false,"completed_at":1769147612,"error":null,"frequency_penalty":0.0,"incomplete_details":null,"instructions":"Be
+        very concise.","max_output_tokens":null,"max_tool_calls":null,"model":"gpt-4o-mini-2024-07-18","output":[{"id":"msg_025a72b78bd7093b0069730cdc45388195aecc8dc40afc23b5","type":"message","status":"completed","content":[{"type":"output_text","annotations":[],"logprobs":[],"text":"1,
+        2, 3"}],"role":"assistant"}],"parallel_tool_calls":true,"presence_penalty":0.0,"previous_response_id":null,"prompt_cache_key":null,"prompt_cache_retention":null,"reasoning":{"effort":null,"summary":null},"safety_identifier":null,"service_tier":"default","store":true,"temperature":1.0,"text":{"format":{"type":"text"},"verbosity":"medium"},"tool_choice":"auto","tools":[],"top_logprobs":0,"top_p":1.0,"truncation":"disabled","usage":{"input_tokens":27,"input_tokens_details":{"cached_tokens":0},"output_tokens":8,"output_tokens_details":{"reasoning_tokens":0},"total_tokens":35},"user":null,"metadata":{}},"sequence_number":14}
+
+
+        '
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Fri, 23 Jan 2026 05:53:32 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - SET-COOKIE-XXX
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '60'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '49'
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_parse_tool_outputs.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_parse_tool_outputs.yaml
new file mode 100644
index 000000000..6f8ce57ed
--- /dev/null
+++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_parse_tool_outputs.yaml
@@ -0,0 +1,133 @@
+interactions:
+- request:
+    body: '{"input":[{"role":"user","content":"What is the current population of Tokyo?
+      Be very brief."}],"model":"gpt-4o-mini","tools":[{"type":"web_search_preview"}]}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '157'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.10
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: "{\n  \"id\": \"resp_032afd4ddbab8993006973114dd1b4819691b5d7306c6ca5c6\",\n
+        \ \"object\": \"response\",\n  \"created_at\": 1769148749,\n  \"status\":
+        \"completed\",\n  \"background\": false,\n  \"billing\": {\n    \"payer\":
+        \"developer\"\n  },\n  \"completed_at\": 1769148752,\n  \"error\": null,\n
+        \ \"frequency_penalty\": 0.0,\n  \"incomplete_details\": null,\n  \"instructions\":
+        null,\n  \"max_output_tokens\": null,\n  \"max_tool_calls\": null,\n  \"model\":
+        \"gpt-4o-mini-2024-07-18\",\n  \"output\": [\n    {\n      \"id\": \"ws_032afd4ddbab8993006973114e536c819690ebb3728000ec00\",\n
+        \     \"type\": \"web_search_call\",\n      \"status\": \"completed\",\n      \"action\":
+        {\n        \"type\": \"search\",\n        \"queries\": [\n          \"current
+        population of Tokyo 2023\"\n        ],\n        \"query\": \"current population
+        of Tokyo 2023\"\n      }\n    },\n    {\n      \"id\": \"msg_032afd4ddbab8993006973114f81ac8196b8a98c55fb77181f\",\n
+        \     \"type\": \"message\",\n      \"status\": \"completed\",\n      \"content\":
+        [\n        {\n          \"type\": \"output_text\",\n          \"annotations\":
+        [\n            {\n              \"type\": \"url_citation\",\n              \"end_index\":
+        187,\n              \"start_index\": 91,\n              \"title\": \"Tokyo,
+        Japan Metro Area Population (1950-2025) | MacroTrends\",\n              \"url\":
+        \"https://www.macrotrends.net/cities/21671/tokyo/population?utm_source=openai\"\n
+        \           },\n            {\n              \"type\": \"url_citation\",\n
+        \             \"end_index\": 352,\n              \"start_index\": 261,\n              \"title\":
+        \"Demographics of Tokyo\",\n              \"url\": \"https://en.wikipedia.org/wiki/Demographics_of_Tokyo?utm_source=openai\"\n
+        \           }\n          ],\n          \"logprobs\": [],\n          \"text\":
+        \"As of 2025, Tokyo's metropolitan area has a population of approximately
+        37 million people. ([macrotrends.net](https://www.macrotrends.net/cities/21671/tokyo/population?utm_source=openai))
+        However, the city proper has a population of about 14 million residents. ([en.wikipedia.org](https://en.wikipedia.org/wiki/Demographics_of_Tokyo?utm_source=openai))
+        \"\n        }\n      ],\n      \"role\": \"assistant\"\n    }\n  ],\n  \"parallel_tool_calls\":
+        true,\n  \"presence_penalty\": 0.0,\n  \"previous_response_id\": null,\n  \"prompt_cache_key\":
+        null,\n  \"prompt_cache_retention\": null,\n  \"reasoning\": {\n    \"effort\":
+        null,\n    \"summary\": null\n  },\n  \"safety_identifier\": null,\n  \"service_tier\":
+        \"default\",\n  \"store\": true,\n  \"temperature\": 1.0,\n  \"text\": {\n
+        \   \"format\": {\n      \"type\": \"text\"\n    },\n    \"verbosity\": \"medium\"\n
+        \ },\n  \"tool_choice\": \"auto\",\n  \"tools\": [\n    {\n      \"type\":
+        \"web_search_preview\",\n      \"search_context_size\": \"medium\",\n      \"user_location\":
+        {\n        \"type\": \"approximate\",\n        \"city\": null,\n        \"country\":
+        \"US\",\n        \"region\": null,\n        \"timezone\": null\n      }\n
+        \   }\n  ],\n  \"top_logprobs\": 0,\n  \"top_p\": 1.0,\n  \"truncation\":
+        \"disabled\",\n  \"usage\": {\n    \"input_tokens\": 313,\n    \"input_tokens_details\":
+        {\n      \"cached_tokens\": 0\n    },\n    \"output_tokens\": 108,\n    \"output_tokens_details\":
+        {\n      \"reasoning_tokens\": 0\n    },\n    \"total_tokens\": 421\n  },\n
+        \ \"user\": null,\n  \"metadata\": {}\n}"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 23 Jan 2026 06:12:32 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - SET-COOKIE-XXX
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '2738'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '2742'
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_structured_output.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_structured_output.yaml
new file mode 100644
index 000000000..0d15531a6
--- /dev/null
+++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_structured_output.yaml
@@ -0,0 +1,127 @@
+interactions:
+- request:
+    body: '{"input":[{"role":"user","content":"What is 5 * 7?"}],"model":"gpt-4o-mini","text":{"format":{"type":"json_schema","name":"MathAnswer","strict":true,"schema":{"description":"Structured
+      math answer.","properties":{"result":{"description":"The numerical result","title":"Result","type":"integer"},"explanation":{"description":"Brief
+      explanation","title":"Explanation","type":"string"}},"required":["result","explanation"],"title":"MathAnswer","type":"object","additionalProperties":false}}}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '489'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.10
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: "{\n  \"id\": \"resp_06aa2adbbac5b2cc0069730cdcaa988195bd3d284445d2f4d2\",\n
+        \ \"object\": \"response\",\n  \"created_at\": 1769147612,\n  \"status\":
+        \"completed\",\n  \"background\": false,\n  \"billing\": {\n    \"payer\":
+        \"developer\"\n  },\n  \"completed_at\": 1769147613,\n  \"error\": null,\n
+        \ \"frequency_penalty\": 0.0,\n  \"incomplete_details\": null,\n  \"instructions\":
+        null,\n  \"max_output_tokens\": null,\n  \"max_tool_calls\": null,\n  \"model\":
+        \"gpt-4o-mini-2024-07-18\",\n  \"output\": [\n    {\n      \"id\": \"msg_06aa2adbbac5b2cc0069730cdd0a9c8195a25cd9c472be0e97\",\n
+        \     \"type\": \"message\",\n      \"status\": \"completed\",\n      \"content\":
+        [\n        {\n          \"type\": \"output_text\",\n          \"annotations\":
+        [],\n          \"logprobs\": [],\n          \"text\": \"{\\\"result\\\":35,\\\"explanation\\\":\\\"Multiplying
+        5 by 7 involves adding 5 together seven times, which equals 35.\\\"}\"\n        }\n
+        \     ],\n      \"role\": \"assistant\"\n    }\n  ],\n  \"parallel_tool_calls\":
+        true,\n  \"presence_penalty\": 0.0,\n  \"previous_response_id\": null,\n  \"prompt_cache_key\":
+        null,\n  \"prompt_cache_retention\": null,\n  \"reasoning\": {\n    \"effort\":
+        null,\n    \"summary\": null\n  },\n  \"safety_identifier\": null,\n  \"service_tier\":
+        \"default\",\n  \"store\": true,\n  \"temperature\": 1.0,\n  \"text\": {\n
+        \   \"format\": {\n      \"type\": \"json_schema\",\n      \"description\":
+        null,\n      \"name\": \"MathAnswer\",\n      \"schema\": {\n        \"description\":
+        \"Structured math answer.\",\n        \"properties\": {\n          \"result\":
+        {\n            \"description\": \"The numerical result\",\n            \"title\":
+        \"Result\",\n            \"type\": \"integer\"\n          },\n          \"explanation\":
+        {\n            \"description\": \"Brief explanation\",\n            \"title\":
+        \"Explanation\",\n            \"type\": \"string\"\n          }\n        },\n
+        \       \"required\": [\n          \"result\",\n          \"explanation\"\n
+        \       ],\n        \"title\": \"MathAnswer\",\n        \"type\": \"object\",\n
+        \       \"additionalProperties\": false\n      },\n      \"strict\": true\n
+        \   },\n    \"verbosity\": \"medium\"\n  },\n  \"tool_choice\": \"auto\",\n
+        \ \"tools\": [],\n  \"top_logprobs\": 0,\n  \"top_p\": 1.0,\n  \"truncation\":
+        \"disabled\",\n  \"usage\": {\n    \"input_tokens\": 76,\n    \"input_tokens_details\":
+        {\n      \"cached_tokens\": 0\n    },\n    \"output_tokens\": 30,\n    \"output_tokens_details\":
+        {\n      \"reasoning_tokens\": 0\n    },\n    \"total_tokens\": 106\n  },\n
+        \ \"user\": null,\n  \"metadata\": {}\n}"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 23 Jan 2026 05:53:33 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - SET-COOKIE-XXX
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1187'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '1190'
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_system_message_extraction.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_system_message_extraction.yaml
new file mode 100644
index 000000000..4f8b82f13
--- /dev/null
+++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_system_message_extraction.yaml
@@ -0,0 +1,117 @@
+interactions:
+- request:
+    body: '{"input":[{"role":"user","content":"Say hello"}],"model":"gpt-4o-mini","instructions":"You
+      always respond in uppercase letters only."}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '134'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.10
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: "{\n  \"id\": \"resp_0258c170444ec7a50069730cd8e8e4819588e1b324aa40b858\",\n
+        \ \"object\": \"response\",\n  \"created_at\": 1769147608,\n  \"status\":
+        \"completed\",\n  \"background\": false,\n  \"billing\": {\n    \"payer\":
+        \"developer\"\n  },\n  \"completed_at\": 1769147609,\n  \"error\": null,\n
+        \ \"frequency_penalty\": 0.0,\n  \"incomplete_details\": null,\n  \"instructions\":
+        \"You always respond in uppercase letters only.\",\n  \"max_output_tokens\":
+        null,\n  \"max_tool_calls\": null,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"output\": [\n    {\n      \"id\": \"msg_0258c170444ec7a50069730cd976b4819594c04c0626e273cb\",\n
+        \     \"type\": \"message\",\n      \"status\": \"completed\",\n      \"content\":
+        [\n        {\n          \"type\": \"output_text\",\n          \"annotations\":
+        [],\n          \"logprobs\": [],\n          \"text\": \"HELLO! HOW CAN I HELP
+        YOU TODAY?\"\n        }\n      ],\n      \"role\": \"assistant\"\n    }\n
+        \ ],\n  \"parallel_tool_calls\": true,\n  \"presence_penalty\": 0.0,\n  \"previous_response_id\":
+        null,\n  \"prompt_cache_key\": null,\n  \"prompt_cache_retention\": null,\n
+        \ \"reasoning\": {\n    \"effort\": null,\n    \"summary\": null\n  },\n  \"safety_identifier\":
+        null,\n  \"service_tier\": \"default\",\n  \"store\": true,\n  \"temperature\":
+        1.0,\n  \"text\": {\n    \"format\": {\n      \"type\": \"text\"\n    },\n
+        \   \"verbosity\": \"medium\"\n  },\n  \"tool_choice\": \"auto\",\n  \"tools\":
+        [],\n  \"top_logprobs\": 0,\n  \"top_p\": 1.0,\n  \"truncation\": \"disabled\",\n
+        \ \"usage\": {\n    \"input_tokens\": 21,\n    \"input_tokens_details\": {\n
+        \     \"cached_tokens\": 0\n    },\n    \"output_tokens\": 11,\n    \"output_tokens_details\":
+        {\n      \"reasoning_tokens\": 0\n    },\n    \"total_tokens\": 32\n  },\n
+        \ \"user\": null,\n  \"metadata\": {}\n}"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 23 Jan 2026 05:53:29 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - SET-COOKIE-XXX
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '816'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '818'
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_web_search.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_web_search.yaml
new file mode 100644
index 000000000..9fcecd08e
--- /dev/null
+++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_with_web_search.yaml
@@ -0,0 +1,139 @@
+interactions:
+- request:
+    body: '{"input":[{"role":"user","content":"What is the current population of Tokyo?
+      Be brief."}],"model":"gpt-4o-mini","tools":[{"type":"web_search_preview"}]}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '152'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.10
+    method: POST
+    uri: https://api.openai.com/v1/responses
+  response:
+    body:
+      string: "{\n  \"id\": \"resp_00e3e58899415fd50069730e878c3481948af3ddbba6a770e5\",\n
+        \ \"object\": \"response\",\n  \"created_at\": 1769148039,\n  \"status\":
+        \"completed\",\n  \"background\": false,\n  \"billing\": {\n    \"payer\":
+        \"developer\"\n  },\n  \"completed_at\": 1769148042,\n  \"error\": null,\n
+        \ \"frequency_penalty\": 0.0,\n  \"incomplete_details\": null,\n  \"instructions\":
+        null,\n  \"max_output_tokens\": null,\n  \"max_tool_calls\": null,\n  \"model\":
+        \"gpt-4o-mini-2024-07-18\",\n  \"output\": [\n    {\n      \"id\": \"ws_00e3e58899415fd50069730e8811808194a111d2203570f498\",\n
+        \     \"type\": \"web_search_call\",\n      \"status\": \"completed\",\n      \"action\":
+        {\n        \"type\": \"search\",\n        \"queries\": [\n          \"current
+        population of Tokyo 2023\"\n        ],\n        \"query\": \"current population
+        of Tokyo 2023\"\n      }\n    },\n    {\n      \"id\": \"msg_00e3e58899415fd50069730e88e734819495450812cdddde0b\",\n
+        \     \"type\": \"message\",\n      \"status\": \"completed\",\n      \"content\":
+        [\n        {\n          \"type\": \"output_text\",\n          \"annotations\":
+        [\n            {\n              \"type\": \"url_citation\",\n              \"end_index\":
+        153,\n              \"start_index\": 62,\n              \"title\": \"Demographics
+        of Tokyo\",\n              \"url\": \"https://en.wikipedia.org/wiki/Demographics_of_Tokyo?utm_source=openai\"\n
+        \           },\n            {\n              \"type\": \"url_citation\",\n
+        \             \"end_index\": 366,\n              \"start_index\": 270,\n              \"title\":
+        \"Tokyo, Japan Metro Area Population (1950-2025) | MacroTrends\",\n              \"url\":
+        \"https://www.macrotrends.net/cities/21671/tokyo/population?utm_source=openai\"\n
+        \           },\n            {\n              \"type\": \"url_citation\",\n
+        \             \"end_index\": 614,\n              \"start_index\": 469,\n              \"title\":
+        \"Tokyo Third in UN Ranking of Global Megacities at 33.4 Million | Nippon.com\",\n
+        \             \"url\": \"https://www.nippon.com/en/japan-data/h02639/tokyo-third-in-un-ranking-of-global-megacities-at-33-4-million.html?utm_source=openai\"\n
+        \           }\n          ],\n          \"logprobs\": [],\n          \"text\":
+        \"As of 2025, Tokyo's population is approximately 14.2 million. ([en.wikipedia.org](https://en.wikipedia.org/wiki/Demographics_of_Tokyo?utm_source=openai))
+        However, the Tokyo metropolitan area, which includes surrounding prefectures,
+        has a population of about 37 million. ([macrotrends.net](https://www.macrotrends.net/cities/21671/tokyo/population?utm_source=openai))
+        In 2025, Tokyo was the third most populous urban agglomeration globally, following
+        Jakarta and Dhaka. ([nippon.com](https://www.nippon.com/en/japan-data/h02639/tokyo-third-in-un-ranking-of-global-megacities-at-33-4-million.html?utm_source=openai))
+        \"\n        }\n      ],\n      \"role\": \"assistant\"\n    }\n  ],\n  \"parallel_tool_calls\":
+        true,\n  \"presence_penalty\": 0.0,\n  \"previous_response_id\": null,\n  \"prompt_cache_key\":
+        null,\n  \"prompt_cache_retention\": null,\n  \"reasoning\": {\n    \"effort\":
+        null,\n    \"summary\": null\n  },\n  \"safety_identifier\": null,\n  \"service_tier\":
+        \"default\",\n  \"store\": true,\n  \"temperature\": 1.0,\n  \"text\": {\n
+        \   \"format\": {\n      \"type\": \"text\"\n    },\n    \"verbosity\": \"medium\"\n
+        \ },\n  \"tool_choice\": \"auto\",\n  \"tools\": [\n    {\n      \"type\":
+        \"web_search_preview\",\n      \"search_context_size\": \"medium\",\n      \"user_location\":
+        {\n        \"type\": \"approximate\",\n        \"city\": null,\n        \"country\":
+        \"US\",\n        \"region\": null,\n        \"timezone\": null\n      }\n
+        \   }\n  ],\n  \"top_logprobs\": 0,\n  \"top_p\": 1.0,\n  \"truncation\":
+        \"disabled\",\n  \"usage\": {\n    \"input_tokens\": 312,\n    \"input_tokens_details\":
+        {\n      \"cached_tokens\": 0\n    },\n    \"output_tokens\": 181,\n    \"output_tokens_details\":
+        {\n      \"reasoning_tokens\": 0\n    },\n    \"total_tokens\": 493\n  },\n
+        \ \"user\": null,\n  \"metadata\": {}\n}"
+    headers:
+      CF-RAY:
+      - CF-RAY-XXX
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 23 Jan 2026 06:00:42 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - SET-COOKIE-XXX
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '3147'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '3150'
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/lib/crewai/tests/llms/openai/test_openai.py b/lib/crewai/tests/llms/openai/test_openai.py
index 693bd3629..7230c8d77 100644
--- a/lib/crewai/tests/llms/openai/test_openai.py
+++ b/lib/crewai/tests/llms/openai/test_openai.py
@@ -6,7 +6,7 @@ import openai
 import pytest
 
 from crewai.llm import LLM
-from crewai.llms.providers.openai.completion import OpenAICompletion
+from crewai.llms.providers.openai.completion import OpenAICompletion, ResponsesAPIResult
 from crewai.crew import Crew
 from crewai.agent import Agent
 from crewai.task import Task
@@ -43,6 +43,7 @@ def test_openai_is_default_provider_without_explicit_llm_set_on_agent():
         role="Research Assistant",
         goal="Find information about the population of Tokyo",
         backstory="You are a helpful research assistant.",
+        llm=LLM(model="gpt-4o-mini"),
     )
     task = Task(
         description="Find information about the population of Tokyo",
@@ -52,7 +53,7 @@ def test_openai_is_default_provider_without_explicit_llm_set_on_agent():
     crew = Crew(agents=[agent], tasks=[task])
     crew.kickoff()
     assert crew.agents[0].llm.__class__.__name__ == "OpenAICompletion"
-    assert crew.agents[0].llm.model == DEFAULT_LLM_MODEL
+    assert crew.agents[0].llm.model == "gpt-4o-mini"
 
 
 
@@ -621,3 +622,773 @@ def test_openai_streaming_returns_usage_metrics():
     assert result.token_usage.prompt_tokens > 0
     assert result.token_usage.completion_tokens > 0
     assert result.token_usage.successful_requests >= 1
+
+
+def test_openai_responses_api_initialization():
+    """Test that OpenAI Responses API can be initialized with api='responses'."""
+    llm = OpenAICompletion(
+        model="gpt-5",
+        api="responses",
+        instructions="You are a helpful assistant.",
+        store=True,
+    )
+
+    assert llm.api == "responses"
+    assert llm.instructions == "You are a helpful assistant."
+    assert llm.store is True
+    assert llm.model == "gpt-5"
+
+
+def test_openai_responses_api_default_is_completions():
+    """Test that the default API is 'completions' for backward compatibility."""
+    llm = OpenAICompletion(model="gpt-4o")
+
+    assert llm.api == "completions"
+
+
+def test_openai_responses_api_prepare_params():
+    """Test that Responses API params are prepared correctly."""
+    llm = OpenAICompletion(
+        model="gpt-5",
+        api="responses",
+        instructions="Base instructions.",
+        store=True,
+        temperature=0.7,
+    )
+
+    messages = [
+        {"role": "system", "content": "System message."},
+        {"role": "user", "content": "Hello!"},
+    ]
+
+    params = llm._prepare_responses_params(messages)
+
+    assert params["model"] == "gpt-5"
+    assert "Base instructions." in params["instructions"]
+    assert "System message." in params["instructions"]
+    assert params["store"] is True
+    assert params["temperature"] == 0.7
+    assert params["input"] == [{"role": "user", "content": "Hello!"}]
+
+
+def test_openai_responses_api_tool_format():
+    """Test that tools are converted to Responses API format (internally-tagged)."""
+    llm = OpenAICompletion(model="gpt-5", api="responses")
+
+    tools = [
+        {
+            "name": "get_weather",
+            "description": "Get the weather for a location",
+            "parameters": {
+                "type": "object",
+                "properties": {"location": {"type": "string"}},
+                "required": ["location"],
+            },
+        }
+    ]
+
+    responses_tools = llm._convert_tools_for_responses(tools)
+
+    assert len(responses_tools) == 1
+    tool = responses_tools[0]
+    assert tool["type"] == "function"
+    assert tool["name"] == "get_weather"
+    assert tool["description"] == "Get the weather for a location"
+    assert "parameters" in tool
+    assert "function" not in tool
+
+
+def test_openai_completions_api_tool_format():
+    """Test that tools are converted to Chat Completions API format (externally-tagged)."""
+    llm = OpenAICompletion(model="gpt-4o", api="completions")
+
+    tools = [
+        {
+            "name": "get_weather",
+            "description": "Get the weather for a location",
+            "parameters": {
+                "type": "object",
+                "properties": {"location": {"type": "string"}},
+                "required": ["location"],
+            },
+        }
+    ]
+
+    completions_tools = llm._convert_tools_for_interference(tools)
+
+    assert len(completions_tools) == 1
+    tool = completions_tools[0]
+    assert tool["type"] == "function"
+    assert "function" in tool
+    assert tool["function"]["name"] == "get_weather"
+    assert tool["function"]["description"] == "Get the weather for a location"
+
+
+def test_openai_responses_api_structured_output_format():
+    """Test that structured outputs use text.format for Responses API."""
+    from pydantic import BaseModel
+
+    class Person(BaseModel):
+        name: str
+        age: int
+
+    llm = OpenAICompletion(model="gpt-5", api="responses")
+
+    messages = [{"role": "user", "content": "Extract: Jane, 25"}]
+    params = llm._prepare_responses_params(messages, response_model=Person)
+
+    assert "text" in params
+    assert "format" in params["text"]
+    assert params["text"]["format"]["type"] == "json_schema"
+    assert params["text"]["format"]["name"] == "Person"
+    assert params["text"]["format"]["strict"] is True
+
+
+def test_openai_responses_api_with_previous_response_id():
+    """Test that previous_response_id is passed for multi-turn conversations."""
+    llm = OpenAICompletion(
+        model="gpt-5",
+        api="responses",
+        previous_response_id="resp_abc123",
+        store=True,
+    )
+
+    messages = [{"role": "user", "content": "Continue our conversation."}]
+    params = llm._prepare_responses_params(messages)
+
+    assert params["previous_response_id"] == "resp_abc123"
+    assert params["store"] is True
+
+
+def test_openai_responses_api_call_routing():
+    """Test that call() routes to the correct API based on the api parameter."""
+    from unittest.mock import patch, MagicMock
+
+    llm_completions = OpenAICompletion(model="gpt-4o", api="completions")
+    llm_responses = OpenAICompletion(model="gpt-5", api="responses")
+
+    with patch.object(
+        llm_completions, "_call_completions", return_value="completions result"
+    ) as mock_completions:
+        result = llm_completions.call("Hello")
+        mock_completions.assert_called_once()
+        assert result == "completions result"
+
+    with patch.object(
+        llm_responses, "_call_responses", return_value="responses result"
+    ) as mock_responses:
+        result = llm_responses.call("Hello")
+        mock_responses.assert_called_once()
+        assert result == "responses result"
+
+
+# =============================================================================
+# VCR Integration Tests for Responses API
+# =============================================================================
+
+
+@pytest.mark.vcr()
+def test_openai_responses_api_basic_call():
+    """Test basic Responses API call with text generation."""
+    llm = OpenAICompletion(
+        model="gpt-4o-mini",
+        api="responses",
+        instructions="You are a helpful assistant. Be concise.",
+    )
+
+    result = llm.call("What is 2 + 2? Answer with just the number.")
+
+    assert isinstance(result, str)
+    assert "4" in result
+
+
+@pytest.mark.vcr()
+def test_openai_responses_api_with_structured_output():
+    """Test Responses API with structured output using Pydantic model."""
+    from pydantic import BaseModel, Field
+
+    class MathAnswer(BaseModel):
+        """Structured math answer."""
+
+        result: int = Field(description="The numerical result")
+        explanation: str = Field(description="Brief explanation")
+
+    llm = OpenAICompletion(
+        model="gpt-4o-mini",
+        api="responses",
+    )
+
+    result = llm.call("What is 5 * 7?", response_model=MathAnswer)
+
+    assert isinstance(result, MathAnswer)
+    assert result.result == 35
+
+
+@pytest.mark.vcr()
+def test_openai_responses_api_with_system_message_extraction():
+    """Test that system messages are properly extracted to instructions."""
+    llm = OpenAICompletion(
+        model="gpt-4o-mini",
+        api="responses",
+    )
+
+    messages = [
+        {"role": "system", "content": "You always respond in uppercase letters only."},
+        {"role": "user", "content": "Say hello"},
+    ]
+
+    result = llm.call(messages)
+
+    assert isinstance(result, str)
+    assert result.isupper() or "HELLO" in result.upper()
+
+
+@pytest.mark.vcr()
+def test_openai_responses_api_streaming():
+    """Test Responses API with streaming enabled."""
+    llm = OpenAICompletion(
+        model="gpt-4o-mini",
+        api="responses",
+        stream=True,
+        instructions="Be very concise.",
+    )
+
+    result = llm.call("Count from 1 to 3, separated by commas.")
+
+    assert isinstance(result, str)
+    assert "1" in result
+    assert "2" in result
+    assert "3" in result
+
+
+@pytest.mark.vcr()
+def test_openai_responses_api_returns_usage_metrics():
+    """Test that Responses API calls return proper token usage metrics."""
+    llm = OpenAICompletion(
+        model="gpt-4o-mini",
+        api="responses",
+    )
+
+    llm.call("Say hello")
+
+    usage = llm.get_token_usage_summary()
+    assert usage.total_tokens > 0
+    assert usage.prompt_tokens > 0
+    assert usage.completion_tokens > 0
+
+
+def test_openai_responses_api_builtin_tools_param():
+    """Test that builtin_tools parameter is properly configured."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        builtin_tools=["web_search", "code_interpreter"],
+    )
+
+    assert llm.builtin_tools == ["web_search", "code_interpreter"]
+
+    messages = [{"role": "user", "content": "Test"}]
+    params = llm._prepare_responses_params(messages)
+
+    assert "tools" in params
+    tool_types = [t["type"] for t in params["tools"]]
+    assert "web_search_preview" in tool_types
+    assert "code_interpreter" in tool_types
+
+
+def test_openai_responses_api_builtin_tools_with_custom_tools():
+    """Test that builtin_tools can be combined with custom function tools."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        builtin_tools=["web_search"],
+    )
+
+    custom_tools = [
+        {
+            "name": "get_weather",
+            "description": "Get weather for a location",
+            "parameters": {"type": "object", "properties": {}},
+        }
+    ]
+
+    messages = [{"role": "user", "content": "Test"}]
+    params = llm._prepare_responses_params(messages, tools=custom_tools)
+
+    assert len(params["tools"]) == 2
+    tool_types = [t.get("type") for t in params["tools"]]
+    assert "web_search_preview" in tool_types
+    assert "function" in tool_types
+
+
+@pytest.mark.vcr()
+def test_openai_responses_api_with_web_search():
+    """Test Responses API with web_search built-in tool."""
+    llm = OpenAICompletion(
+        model="gpt-4o-mini",
+        api="responses",
+        builtin_tools=["web_search"],
+    )
+
+    result = llm.call("What is the current population of Tokyo? Be brief.")
+
+    assert isinstance(result, str)
+    assert len(result) > 0
+
+
+def test_responses_api_result_dataclass():
+    """Test ResponsesAPIResult dataclass functionality."""
+    result = ResponsesAPIResult(
+        text="Hello, world!",
+        response_id="resp_123",
+    )
+
+    assert result.text == "Hello, world!"
+    assert result.response_id == "resp_123"
+    assert result.web_search_results == []
+    assert result.file_search_results == []
+    assert result.code_interpreter_results == []
+    assert result.computer_use_results == []
+    assert result.reasoning_summaries == []
+    assert result.function_calls == []
+    assert not result.has_tool_outputs()
+    assert not result.has_reasoning()
+
+
+def test_responses_api_result_has_tool_outputs():
+    """Test ResponsesAPIResult.has_tool_outputs() method."""
+    result_with_web = ResponsesAPIResult(
+        text="Test",
+        web_search_results=[{"id": "ws_1", "status": "completed", "type": "web_search_call"}],
+    )
+    assert result_with_web.has_tool_outputs()
+
+    result_with_file = ResponsesAPIResult(
+        text="Test",
+        file_search_results=[{"id": "fs_1", "status": "completed", "type": "file_search_call", "queries": [], "results": []}],
+    )
+    assert result_with_file.has_tool_outputs()
+
+
+def test_responses_api_result_has_reasoning():
+    """Test ResponsesAPIResult.has_reasoning() method."""
+    result_with_reasoning = ResponsesAPIResult(
+        text="Test",
+        reasoning_summaries=[{"id": "r_1", "type": "reasoning", "summary": []}],
+    )
+    assert result_with_reasoning.has_reasoning()
+
+    result_without = ResponsesAPIResult(text="Test")
+    assert not result_without.has_reasoning()
+
+
+def test_openai_responses_api_parse_tool_outputs_param():
+    """Test that parse_tool_outputs parameter is properly configured."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        parse_tool_outputs=True,
+    )
+
+    assert llm.parse_tool_outputs is True
+
+
+def test_openai_responses_api_parse_tool_outputs_default_false():
+    """Test that parse_tool_outputs defaults to False."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+    )
+
+    assert llm.parse_tool_outputs is False
+
+
+@pytest.mark.vcr()
+def test_openai_responses_api_with_parse_tool_outputs():
+    """Test Responses API with parse_tool_outputs enabled returns ResponsesAPIResult."""
+    llm = OpenAICompletion(
+        model="gpt-4o-mini",
+        api="responses",
+        builtin_tools=["web_search"],
+        parse_tool_outputs=True,
+    )
+
+    result = llm.call("What is the current population of Tokyo? Be very brief.")
+
+    assert isinstance(result, ResponsesAPIResult)
+    assert len(result.text) > 0
+    assert result.response_id is not None
+    # Web search should have been used
+    assert len(result.web_search_results) > 0
+    assert result.has_tool_outputs()
+
+
+@pytest.mark.vcr()
+def test_openai_responses_api_parse_tool_outputs_basic_call():
+    """Test Responses API with parse_tool_outputs but no built-in tools."""
+    llm = OpenAICompletion(
+        model="gpt-4o-mini",
+        api="responses",
+        parse_tool_outputs=True,
+    )
+
+    result = llm.call("Say hello in exactly 3 words.")
+
+    assert isinstance(result, ResponsesAPIResult)
+    assert len(result.text) > 0
+    assert result.response_id is not None
+    # No built-in tools used
+    assert not result.has_tool_outputs()
+
+
+# ============================================================================
+# Auto-Chaining Tests (Responses API)
+# ============================================================================
+
+
+def test_openai_responses_api_auto_chain_param():
+    """Test that auto_chain parameter is properly configured."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain=True,
+    )
+
+    assert llm.auto_chain is True
+    assert llm._last_response_id is None
+
+
+def test_openai_responses_api_auto_chain_default_false():
+    """Test that auto_chain defaults to False."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+    )
+
+    assert llm.auto_chain is False
+
+
+def test_openai_responses_api_last_response_id_property():
+    """Test last_response_id property."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain=True,
+    )
+
+    # Initially None
+    assert llm.last_response_id is None
+
+    # Simulate setting the internal value
+    llm._last_response_id = "resp_test_123"
+    assert llm.last_response_id == "resp_test_123"
+
+
+def test_openai_responses_api_reset_chain():
+    """Test reset_chain() method clears the response ID."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain=True,
+    )
+
+    # Set a response ID
+    llm._last_response_id = "resp_test_123"
+    assert llm.last_response_id == "resp_test_123"
+
+    # Reset the chain
+    llm.reset_chain()
+    assert llm.last_response_id is None
+
+
+def test_openai_responses_api_auto_chain_prepare_params():
+    """Test that _prepare_responses_params uses auto-chained response ID."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain=True,
+    )
+
+    # No previous response ID yet
+    params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}])
+    assert "previous_response_id" not in params
+
+    # Set a previous response ID
+    llm._last_response_id = "resp_previous_123"
+    params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}])
+    assert params.get("previous_response_id") == "resp_previous_123"
+
+
+def test_openai_responses_api_explicit_previous_response_id_takes_precedence():
+    """Test that explicit previous_response_id overrides auto-chained ID."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain=True,
+        previous_response_id="resp_explicit_456",
+    )
+
+    # Set an auto-chained response ID
+    llm._last_response_id = "resp_auto_123"
+
+    # Explicit should take precedence
+    params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}])
+    assert params.get("previous_response_id") == "resp_explicit_456"
+
+
+def test_openai_responses_api_auto_chain_disabled_no_tracking():
+    """Test that response ID is not tracked when auto_chain is False."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain=False,
+    )
+
+    # Even with a "previous" response ID set internally, params shouldn't use it
+    llm._last_response_id = "resp_should_not_use"
+    params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}])
+    assert "previous_response_id" not in params
+
+
+@pytest.mark.vcr()
+def test_openai_responses_api_auto_chain_integration():
+    """Test auto-chaining tracks response IDs across calls."""
+    llm = OpenAICompletion(
+        model="gpt-4o-mini",
+        api="responses",
+        auto_chain=True,
+    )
+
+    # First call - should not have previous_response_id
+    assert llm.last_response_id is None
+    result1 = llm.call("My name is Alice. Remember this.")
+
+    # After first call, should have a response ID
+    assert llm.last_response_id is not None
+    first_response_id = llm.last_response_id
+    assert first_response_id.startswith("resp_")
+
+    # Second call - should use the first response ID
+    result2 = llm.call("What is my name?")
+
+    # Response ID should be updated
+    assert llm.last_response_id is not None
+    assert llm.last_response_id != first_response_id  # Should be a new ID
+
+    # The response should remember context (Alice)
+    assert isinstance(result1, str)
+    assert isinstance(result2, str)
+
+
+@pytest.mark.vcr()
+def test_openai_responses_api_auto_chain_with_reset():
+    """Test that reset_chain() properly starts a new conversation."""
+    llm = OpenAICompletion(
+        model="gpt-4o-mini",
+        api="responses",
+        auto_chain=True,
+    )
+
+    # First conversation
+    llm.call("My favorite color is blue.")
+    first_chain_id = llm.last_response_id
+    assert first_chain_id is not None
+
+    # Reset and start new conversation
+    llm.reset_chain()
+    assert llm.last_response_id is None
+
+    # New call should start fresh
+    llm.call("Hello!")
+    second_chain_id = llm.last_response_id
+    assert second_chain_id is not None
+    # New conversation, so different response ID
+    assert second_chain_id != first_chain_id
+
+
+# =============================================================================
+# Encrypted Reasoning for ZDR (Zero Data Retention) Tests
+# =============================================================================
+
+
+def test_openai_responses_api_auto_chain_reasoning_param():
+    """Test that auto_chain_reasoning parameter is properly configured."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain_reasoning=True,
+    )
+
+    assert llm.auto_chain_reasoning is True
+    assert llm._last_reasoning_items is None
+
+
+def test_openai_responses_api_auto_chain_reasoning_default_false():
+    """Test that auto_chain_reasoning defaults to False."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+    )
+
+    assert llm.auto_chain_reasoning is False
+
+
+def test_openai_responses_api_last_reasoning_items_property():
+    """Test last_reasoning_items property."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain_reasoning=True,
+    )
+
+    # Initially None
+    assert llm.last_reasoning_items is None
+
+    # Simulate setting the internal value
+    mock_items = [{"id": "rs_test_123", "type": "reasoning"}]
+    llm._last_reasoning_items = mock_items
+    assert llm.last_reasoning_items == mock_items
+
+
+def test_openai_responses_api_reset_reasoning_chain():
+    """Test reset_reasoning_chain() method clears reasoning items."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain_reasoning=True,
+    )
+
+    # Set reasoning items
+    mock_items = [{"id": "rs_test_123", "type": "reasoning"}]
+    llm._last_reasoning_items = mock_items
+    assert llm.last_reasoning_items == mock_items
+
+    # Reset the reasoning chain
+    llm.reset_reasoning_chain()
+    assert llm.last_reasoning_items is None
+
+
+def test_openai_responses_api_auto_chain_reasoning_adds_include():
+    """Test that auto_chain_reasoning adds reasoning.encrypted_content to include."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain_reasoning=True,
+    )
+
+    params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}])
+    assert "include" in params
+    assert "reasoning.encrypted_content" in params["include"]
+
+
+def test_openai_responses_api_auto_chain_reasoning_preserves_existing_include():
+    """Test that auto_chain_reasoning preserves existing include items."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain_reasoning=True,
+        include=["file_search_call.results"],
+    )
+
+    params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}])
+    assert "include" in params
+    assert "reasoning.encrypted_content" in params["include"]
+    assert "file_search_call.results" in params["include"]
+
+
+def test_openai_responses_api_auto_chain_reasoning_no_duplicate_include():
+    """Test that reasoning.encrypted_content is not duplicated if already in include."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain_reasoning=True,
+        include=["reasoning.encrypted_content"],
+    )
+
+    params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}])
+    assert "include" in params
+    # Should only appear once
+    assert params["include"].count("reasoning.encrypted_content") == 1
+
+
+def test_openai_responses_api_auto_chain_reasoning_prepends_to_input():
+    """Test that stored reasoning items are prepended to input."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain_reasoning=True,
+    )
+
+    # Simulate stored reasoning items
+    mock_reasoning = MagicMock()
+    mock_reasoning.type = "reasoning"
+    mock_reasoning.id = "rs_test_123"
+    llm._last_reasoning_items = [mock_reasoning]
+
+    params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}])
+
+    # Input should have reasoning item first, then the message
+    assert len(params["input"]) == 2
+    assert params["input"][0] == mock_reasoning
+    assert params["input"][1]["role"] == "user"
+
+
+def test_openai_responses_api_auto_chain_reasoning_disabled_no_include():
+    """Test that reasoning.encrypted_content is not added when auto_chain_reasoning is False."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain_reasoning=False,
+    )
+
+    params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}])
+    # Should not have include at all (unless explicitly set)
+    assert "include" not in params or "reasoning.encrypted_content" not in params.get("include", [])
+
+
+def test_openai_responses_api_auto_chain_reasoning_disabled_no_prepend():
+    """Test that reasoning items are not prepended when auto_chain_reasoning is False."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain_reasoning=False,
+    )
+
+    # Even with stored reasoning items, they should not be prepended
+    mock_reasoning = MagicMock()
+    mock_reasoning.type = "reasoning"
+    llm._last_reasoning_items = [mock_reasoning]
+
+    params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}])
+
+    # Input should only have the message, not the reasoning item
+    assert len(params["input"]) == 1
+    assert params["input"][0]["role"] == "user"
+
+
+def test_openai_responses_api_both_auto_chains_work_together():
+    """Test that auto_chain and auto_chain_reasoning can be used together."""
+    llm = OpenAICompletion(
+        model="gpt-4o",
+        api="responses",
+        auto_chain=True,
+        auto_chain_reasoning=True,
+    )
+
+    assert llm.auto_chain is True
+    assert llm.auto_chain_reasoning is True
+    assert llm._last_response_id is None
+    assert llm._last_reasoning_items is None
+
+    # Set both internal values
+    llm._last_response_id = "resp_123"
+    mock_reasoning = MagicMock()
+    mock_reasoning.type = "reasoning"
+    llm._last_reasoning_items = [mock_reasoning]
+
+    params = llm._prepare_responses_params(messages=[{"role": "user", "content": "test"}])
+
+    # Both should be applied
+    assert params.get("previous_response_id") == "resp_123"
+    assert "reasoning.encrypted_content" in params["include"]
+    assert len(params["input"]) == 2  # Reasoning item + message