feat: add Responses API support for Azure OpenAI provider

When api='responses' is specified for Azure, creates an internal OpenAICompletion instance configured with AzureOpenAI/AsyncAzureOpenAI clients from the openai Python SDK, which natively supports the Responses API on Azure. Key changes: - Extended AzureCompletion.__init__() with Responses API parameters (api, instructions, store, previous_response_id, include, builtin_tools, parse_tool_outputs, auto_chain, auto_chain_reasoning, seed, reasoning_effort, max_completion_tokens) - Added _init_responses_delegate() method using composition/delegation - Added delegation logic in call() and acall() methods - Added pass-through properties: last_response_id, last_reasoning_items - Added pass-through methods: reset_chain(), reset_reasoning_chain() - Preserved base endpoint before validation for Azure client config - Interceptors now allowed in responses mode (via OpenAI SDK) - Added AZURE_RESPONSES_API_VERSION constant (2025-03-01-preview) - Added 30+ comprehensive tests covering all new functionality Closes #4974 Co-Authored-By: João <joao@crewai.com>
2026-04-11 05:22:41 +00:00 · 2026-03-20 07:22:40 +00:00
2 changed files with 836 additions and 18 deletions
--- a/lib/crewai/src/crewai/llms/providers/azure/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py
@@ -3,7 +3,7 @@ from __future__ import annotations
 import json
 import logging
 import os
-from typing import TYPE_CHECKING, Any, TypedDict
+from typing import TYPE_CHECKING, Any, Literal, TypedDict

 from pydantic import BaseModel
 from typing_extensions import Self
@@ -69,11 +69,37 @@ class AzureCompletionParams(TypedDict, total=False):
    tool_choice: str


+# Default API version for Azure Responses API support
+AZURE_RESPONSES_API_VERSION = "2025-03-01-preview"
+
+
 class AzureCompletion(BaseLLM):
    """Azure AI Inference native completion implementation.

    This class provides direct integration with the Azure AI Inference Python SDK,
    offering native function calling, streaming support, and proper Azure authentication.
+
+    Supports both the Chat Completions API (default) and the Responses API.
+    When api="responses" is specified, the class delegates to an internal
+    OpenAICompletion instance configured with AzureOpenAI clients from the
+    openai Python SDK, which natively supports the Responses API on Azure.
+
+    Args:
+        api: Which API to use - "completions" (default) or "responses".
+            When "responses" is selected, Azure OpenAI Responses API is used
+            via the openai Python SDK's AzureOpenAI client.
+        instructions: System-level instructions (Responses API only).
+        store: Whether to store responses for multi-turn (Responses API only).
+        previous_response_id: ID of previous response for multi-turn (Responses API only).
+        include: Additional data to include in response (Responses API only).
+        builtin_tools: List of OpenAI built-in tools to enable (Responses API only).
+            Supported: "web_search", "file_search", "code_interpreter", "computer_use".
+        parse_tool_outputs: Whether to return structured ResponsesAPIResult with
+            parsed built-in tool outputs instead of just text (Responses API only).
+        auto_chain: Automatically track and use response IDs for multi-turn
+            conversations (Responses API only).
+        auto_chain_reasoning: Automatically track and pass encrypted reasoning items
+            for ZDR (Zero Data Retention) compliance (Responses API only).
    """

    def __init__(
@@ -89,10 +115,22 @@ class AzureCompletion(BaseLLM):
        frequency_penalty: float | None = None,
        presence_penalty: float | None = None,
        max_tokens: int | None = None,
+        max_completion_tokens: int | None = None,
        stop: list[str] | None = None,
        stream: bool = False,
        interceptor: BaseInterceptor[Any, Any] | None = None,
        response_format: type[BaseModel] | None = None,
+        api: Literal["completions", "responses"] = "completions",
+        instructions: str | None = None,
+        store: bool | None = None,
+        previous_response_id: str | None = None,
+        include: list[str] | None = None,
+        builtin_tools: list[str] | None = None,
+        parse_tool_outputs: bool = False,
+        auto_chain: bool = False,
+        auto_chain_reasoning: bool = False,
+        seed: int | None = None,
+        reasoning_effort: str | None = None,
        **kwargs: Any,
    ):
        """Initialize Azure AI Inference chat completion client.
@@ -109,15 +147,27 @@ class AzureCompletion(BaseLLM):
            frequency_penalty: Frequency penalty (-2 to 2)
            presence_penalty: Presence penalty (-2 to 2)
            max_tokens: Maximum tokens in response
+            max_completion_tokens: Maximum completion tokens in response
            stop: Stop sequences
            stream: Enable streaming responses
            interceptor: HTTP interceptor (not yet supported for Azure).
            response_format: Pydantic model for structured output. Used as default when
                           response_model is not passed to call()/acall() methods.
                           Only works with OpenAI models deployed on Azure.
+            api: Which API to use - "completions" (default) or "responses".
+            instructions: System-level instructions (Responses API only).
+            store: Whether to store responses for multi-turn (Responses API only).
+            previous_response_id: ID of previous response for multi-turn (Responses API only).
+            include: Additional data to include in response (Responses API only).
+            builtin_tools: List of OpenAI built-in tools to enable (Responses API only).
+            parse_tool_outputs: Whether to return structured ResponsesAPIResult (Responses API only).
+            auto_chain: Auto-track response IDs for multi-turn (Responses API only).
+            auto_chain_reasoning: Auto-track encrypted reasoning items for ZDR (Responses API only).
+            seed: Random seed for deterministic outputs.
+            reasoning_effort: Reasoning effort level for reasoning models.
            **kwargs: Additional parameters
        """
-        if interceptor is not None:
+        if interceptor is not None and api != "responses":
            raise NotImplementedError(
                "HTTP interceptors are not yet supported for Azure AI Inference provider. "
                "Interceptors are currently supported for OpenAI and Anthropic providers only."
@@ -128,12 +178,13 @@ class AzureCompletion(BaseLLM):
        )

        self.api_key = api_key or os.getenv("AZURE_API_KEY")
-        self.endpoint = (
+        self.base_endpoint = (
            endpoint
            or os.getenv("AZURE_ENDPOINT")
            or os.getenv("AZURE_OPENAI_ENDPOINT")
            or os.getenv("AZURE_API_BASE")
        )
+        self.api = api
        self.api_version = api_version or os.getenv("AZURE_API_VERSION") or "2024-06-01"
        self.timeout = timeout
        self.max_retries = max_retries
@@ -142,34 +193,68 @@ class AzureCompletion(BaseLLM):
            raise ValueError(
                "Azure API key is required. Set AZURE_API_KEY environment variable or pass api_key parameter."
            )
-        if not self.endpoint:
+        if not self.base_endpoint:
            raise ValueError(
                "Azure endpoint is required. Set AZURE_ENDPOINT environment variable or pass endpoint parameter."
            )

-        # Validate and potentially fix Azure OpenAI endpoint URL
-        self.endpoint = self._validate_and_fix_endpoint(self.endpoint, model)
+        # Store the base endpoint before validation modifies it
+        self.endpoint = self.base_endpoint

-        # Build client kwargs
-        client_kwargs = {
-            "endpoint": self.endpoint,
-            "credential": AzureKeyCredential(self.api_key),
-        }
+        # Responses API mode: delegate to OpenAICompletion with AzureOpenAI clients
+        self._responses_delegate: Any | None = None
+        if self.api == "responses":
+            self._init_responses_delegate(
+                model=model,
+                temperature=temperature,
+                top_p=top_p,
+                frequency_penalty=frequency_penalty,
+                presence_penalty=presence_penalty,
+                max_tokens=max_tokens,
+                max_completion_tokens=max_completion_tokens,
+                stop=stop,
+                stream=stream,
+                response_format=response_format,
+                instructions=instructions,
+                store=store,
+                previous_response_id=previous_response_id,
+                include=include,
+                builtin_tools=builtin_tools,
+                parse_tool_outputs=parse_tool_outputs,
+                auto_chain=auto_chain,
+                auto_chain_reasoning=auto_chain_reasoning,
+                seed=seed,
+                reasoning_effort=reasoning_effort,
+                interceptor=interceptor,
+                api_version=api_version,
+            )
+        else:
+            # Validate and potentially fix Azure OpenAI endpoint URL (completions mode)
+            self.endpoint = self._validate_and_fix_endpoint(self.endpoint, model)

-        # Add api_version if specified (primarily for Azure OpenAI endpoints)
-        if self.api_version:
-            client_kwargs["api_version"] = self.api_version
+            # Build client kwargs
+            client_kwargs = {
+                "endpoint": self.endpoint,
+                "credential": AzureKeyCredential(self.api_key),
+            }

-        self.client = ChatCompletionsClient(**client_kwargs)  # type: ignore[arg-type]
+            # Add api_version if specified (primarily for Azure OpenAI endpoints)
+            if self.api_version:
+                client_kwargs["api_version"] = self.api_version

-        self.async_client = AsyncChatCompletionsClient(**client_kwargs)  # type: ignore[arg-type]
+            self.client = ChatCompletionsClient(**client_kwargs)  # type: ignore[arg-type]
+
+            self.async_client = AsyncChatCompletionsClient(**client_kwargs)  # type: ignore[arg-type]

        self.top_p = top_p
        self.frequency_penalty = frequency_penalty
        self.presence_penalty = presence_penalty
        self.max_tokens = max_tokens
+        self.max_completion_tokens = max_completion_tokens
        self.stream = stream
        self.response_format = response_format
+        self.seed = seed
+        self.reasoning_effort = reasoning_effort

        self.is_openai_model = any(
            prefix in model.lower() for prefix in ["gpt-", "o1-", "text-"]
@@ -180,6 +265,100 @@ class AzureCompletion(BaseLLM):
            and "/openai/deployments/" in self.endpoint
        )

+    def _init_responses_delegate(
+        self,
+        model: str,
+        temperature: float | None = None,
+        top_p: float | None = None,
+        frequency_penalty: float | None = None,
+        presence_penalty: float | None = None,
+        max_tokens: int | None = None,
+        max_completion_tokens: int | None = None,
+        stop: list[str] | None = None,
+        stream: bool = False,
+        response_format: type[BaseModel] | None = None,
+        instructions: str | None = None,
+        store: bool | None = None,
+        previous_response_id: str | None = None,
+        include: list[str] | None = None,
+        builtin_tools: list[str] | None = None,
+        parse_tool_outputs: bool = False,
+        auto_chain: bool = False,
+        auto_chain_reasoning: bool = False,
+        seed: int | None = None,
+        reasoning_effort: str | None = None,
+        interceptor: BaseInterceptor[Any, Any] | None = None,
+        api_version: str | None = None,
+    ) -> None:
+        """Initialize the Responses API delegate using OpenAICompletion with AzureOpenAI clients.
+
+        Creates an OpenAICompletion instance and replaces its OpenAI clients with
+        AzureOpenAI/AsyncAzureOpenAI clients configured with Azure credentials.
+        """
+        try:
+            from openai import AzureOpenAI, AsyncAzureOpenAI
+        except ImportError:
+            raise ImportError(
+                "OpenAI package is required for Azure Responses API support. "
+                'Install it with: uv add "crewai[openai]" or pip install openai'
+            ) from None
+
+        from crewai.llms.providers.openai.completion import OpenAICompletion
+
+        # Determine the correct API version for Responses API
+        responses_api_version = api_version or os.getenv("AZURE_API_VERSION") or AZURE_RESPONSES_API_VERSION
+
+        # Extract the base Azure endpoint (without /openai/deployments/...)
+        azure_endpoint = self.base_endpoint or ""
+        azure_endpoint = azure_endpoint.rstrip("/")
+        # Strip /openai/deployments/... suffix if present
+        if "/openai/deployments/" in azure_endpoint:
+            azure_endpoint = azure_endpoint.split("/openai/deployments/")[0]
+
+        # Build AzureOpenAI client kwargs
+        azure_kwargs: dict[str, Any] = {
+            "azure_endpoint": azure_endpoint,
+            "api_key": self.api_key,
+            "api_version": responses_api_version,
+        }
+        if self.timeout is not None:
+            azure_kwargs["timeout"] = self.timeout
+        if self.max_retries:
+            azure_kwargs["max_retries"] = self.max_retries
+
+        # Create the OpenAICompletion delegate with responses API config
+        delegate = OpenAICompletion(
+            model=model,
+            api_key=self.api_key,
+            api="responses",
+            temperature=temperature,
+            top_p=top_p,
+            frequency_penalty=frequency_penalty,
+            presence_penalty=presence_penalty,
+            max_tokens=max_tokens,
+            max_completion_tokens=max_completion_tokens,
+            stop=stop,
+            stream=stream,
+            response_format=response_format,
+            instructions=instructions,
+            store=store,
+            previous_response_id=previous_response_id,
+            include=include,
+            builtin_tools=builtin_tools,
+            parse_tool_outputs=parse_tool_outputs,
+            auto_chain=auto_chain,
+            auto_chain_reasoning=auto_chain_reasoning,
+            seed=seed,
+            reasoning_effort=reasoning_effort,
+            interceptor=interceptor,
+        )
+
+        # Replace the OpenAI clients with AzureOpenAI clients
+        delegate.client = AzureOpenAI(**azure_kwargs)  # type: ignore[assignment]
+        delegate.async_client = AsyncAzureOpenAI(**azure_kwargs)  # type: ignore[assignment]
+
+        self._responses_delegate = delegate
+
    @staticmethod
    def _validate_and_fix_endpoint(endpoint: str, model: str) -> str:
        """Validate and fix Azure endpoint URL format.
@@ -269,6 +448,30 @@ class AzureCompletion(BaseLLM):
        )
        raise error

+    @property
+    def last_response_id(self) -> str | None:
+        """Get the last response ID from auto-chaining (Responses API only)."""
+        if self._responses_delegate is not None:
+            return self._responses_delegate.last_response_id
+        return None
+
+    def reset_chain(self) -> None:
+        """Reset the auto-chain state (Responses API only)."""
+        if self._responses_delegate is not None:
+            self._responses_delegate.reset_chain()
+
+    @property
+    def last_reasoning_items(self) -> list[Any] | None:
+        """Get the last reasoning items from auto-chain reasoning (Responses API only)."""
+        if self._responses_delegate is not None:
+            return self._responses_delegate.last_reasoning_items
+        return None
+
+    def reset_reasoning_chain(self) -> None:
+        """Reset the reasoning chain state (Responses API only)."""
+        if self._responses_delegate is not None:
+            self._responses_delegate.reset_reasoning_chain()
+
    def call(
        self,
        messages: str | list[LLMMessage],
@@ -279,7 +482,7 @@ class AzureCompletion(BaseLLM):
        from_agent: Any | None = None,
        response_model: type[BaseModel] | None = None,
    ) -> str | Any:
-        """Call Azure AI Inference chat completions API.
+        """Call Azure AI Inference API (Chat Completions or Responses based on api setting).

        Args:
            messages: Input messages for the chat completion
@@ -293,6 +496,18 @@ class AzureCompletion(BaseLLM):
        Returns:
            Chat completion response or tool call result
        """
+        # Delegate to Responses API if configured
+        if self.api == "responses" and self._responses_delegate is not None:
+            return self._responses_delegate.call(
+                messages=messages,
+                tools=tools,
+                callbacks=callbacks,
+                available_functions=available_functions,
+                from_task=from_task,
+                from_agent=from_agent,
+                response_model=response_model,
+            )
+
        with llm_call_context():
            try:
                # Emit call started event
@@ -351,7 +566,7 @@ class AzureCompletion(BaseLLM):
        from_agent: Any | None = None,
        response_model: type[BaseModel] | None = None,
    ) -> str | Any:
-        """Call Azure AI Inference chat completions API asynchronously.
+        """Call Azure AI Inference API asynchronously (Chat Completions or Responses).

        Args:
            messages: Input messages for the chat completion
@@ -365,6 +580,18 @@ class AzureCompletion(BaseLLM):
        Returns:
            Chat completion response or tool call result
        """
+        # Delegate to Responses API if configured
+        if self.api == "responses" and self._responses_delegate is not None:
+            return await self._responses_delegate.acall(
+                messages=messages,
+                tools=tools,
+                callbacks=callbacks,
+                available_functions=available_functions,
+                from_task=from_task,
+                from_agent=from_agent,
+                response_model=response_model,
+            )
+
        with llm_call_context():
            try:
                self._emit_call_started_event(
--- a/lib/crewai/tests/llms/azure/test_azure.py
+++ b/lib/crewai/tests/llms/azure/test_azure.py
@@ -1403,3 +1403,594 @@ def test_azure_stop_words_still_applied_to_regular_responses():
        assert "Observation:" not in result
        assert "Found results" not in result
        assert "I need to search for more information" in result
+
+
+# =============================================================================
+# Azure Responses API Tests
+# =============================================================================
+
+
+def test_azure_responses_api_initialization():
+    """Test that AzureCompletion can be initialized with api='responses'."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+        instructions="You are a helpful assistant.",
+        store=True,
+    )
+
+    assert llm.api == "responses"
+    assert llm._responses_delegate is not None
+    assert llm._responses_delegate.api == "responses"
+    assert llm._responses_delegate.instructions == "You are a helpful assistant."
+    assert llm._responses_delegate.store is True
+    assert llm.model == "gpt-4o"
+
+
+def test_azure_responses_api_default_is_completions():
+    """Test that the default API is 'completions' for backward compatibility."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+    )
+
+    assert llm.api == "completions"
+    assert llm._responses_delegate is None
+
+
+def test_azure_responses_api_delegate_uses_azure_openai_clients():
+    """Test that the delegate's clients are AzureOpenAI instances, not plain OpenAI."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+    )
+
+    from openai import AzureOpenAI, AsyncAzureOpenAI
+
+    assert isinstance(llm._responses_delegate.client, AzureOpenAI)
+    assert isinstance(llm._responses_delegate.async_client, AsyncAzureOpenAI)
+
+
+def test_azure_responses_api_strips_deployment_suffix_for_azure_endpoint():
+    """Test that /openai/deployments/... suffix is stripped when building Azure clients."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com/openai/deployments/gpt-4o",
+        api="responses",
+    )
+
+    # The delegate should have been created
+    assert llm._responses_delegate is not None
+    # The delegate's client should point to the base Azure endpoint
+    from openai import AzureOpenAI
+    assert isinstance(llm._responses_delegate.client, AzureOpenAI)
+
+
+def test_azure_responses_api_uses_correct_api_version():
+    """Test that the Responses API uses the correct API version."""
+    from crewai.llms.providers.azure.completion import (
+        AzureCompletion,
+        AZURE_RESPONSES_API_VERSION,
+    )
+
+    # Default version
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+    )
+
+    assert llm._responses_delegate is not None
+    # Check that the AzureOpenAI client was created with the right version
+    assert AZURE_RESPONSES_API_VERSION == "2025-03-01-preview"
+
+    # Custom version
+    llm_custom = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+        api_version="2025-06-01",
+    )
+    assert llm_custom._responses_delegate is not None
+
+
+def test_azure_responses_api_passes_all_params_to_delegate():
+    """Test that all Responses API params are forwarded to the delegate."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+        instructions="Be concise.",
+        store=True,
+        previous_response_id="resp_abc123",
+        include=["reasoning.encrypted_content"],
+        builtin_tools=["web_search"],
+        parse_tool_outputs=True,
+        auto_chain=True,
+        auto_chain_reasoning=True,
+        temperature=0.5,
+        top_p=0.9,
+        seed=42,
+        reasoning_effort="high",
+    )
+
+    delegate = llm._responses_delegate
+    assert delegate is not None
+    assert delegate.instructions == "Be concise."
+    assert delegate.store is True
+    assert delegate.previous_response_id == "resp_abc123"
+    assert delegate.include == ["reasoning.encrypted_content"]
+    assert delegate.builtin_tools == ["web_search"]
+    assert delegate.parse_tool_outputs is True
+    assert delegate.auto_chain is True
+    assert delegate.auto_chain_reasoning is True
+    assert delegate.temperature == 0.5
+    assert delegate.top_p == 0.9
+    assert delegate.seed == 42
+    assert delegate.reasoning_effort == "high"
+
+
+def test_azure_responses_api_call_delegates_to_openai_completion():
+    """Test that call() delegates to the internal OpenAICompletion when api='responses'."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+    )
+
+    with patch.object(
+        llm._responses_delegate, "call", return_value="Hello from Responses API!"
+    ) as mock_call:
+        result = llm.call("Hello!")
+
+        mock_call.assert_called_once_with(
+            messages="Hello!",
+            tools=None,
+            callbacks=None,
+            available_functions=None,
+            from_task=None,
+            from_agent=None,
+            response_model=None,
+        )
+        assert result == "Hello from Responses API!"
+
+
+@pytest.mark.asyncio
+async def test_azure_responses_api_acall_delegates_to_openai_completion():
+    """Test that acall() delegates to the internal OpenAICompletion when api='responses'."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+    )
+
+    from unittest.mock import AsyncMock
+
+    llm._responses_delegate.acall = AsyncMock(return_value="Async hello from Responses API!")
+
+    result = await llm.acall("Hello async!")
+
+    llm._responses_delegate.acall.assert_called_once_with(
+        messages="Hello async!",
+        tools=None,
+        callbacks=None,
+        available_functions=None,
+        from_task=None,
+        from_agent=None,
+        response_model=None,
+    )
+    assert result == "Async hello from Responses API!"
+
+
+def test_azure_responses_api_call_with_tools():
+    """Test that call() passes tools to the delegate for Responses API."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+    )
+
+    tools = [
+        {
+            "name": "get_weather",
+            "description": "Get the weather for a location",
+            "parameters": {
+                "type": "object",
+                "properties": {"location": {"type": "string"}},
+                "required": ["location"],
+            },
+        }
+    ]
+
+    with patch.object(
+        llm._responses_delegate, "call", return_value="It's sunny."
+    ) as mock_call:
+        result = llm.call(
+            messages=[{"role": "user", "content": "What's the weather?"}],
+            tools=tools,
+            available_functions={"get_weather": lambda loc: "Sunny"},
+        )
+
+        mock_call.assert_called_once()
+        call_kwargs = mock_call.call_args
+        assert call_kwargs.kwargs["tools"] == tools
+        assert result == "It's sunny."
+
+
+def test_azure_responses_api_call_with_response_model():
+    """Test that call() passes response_model to the delegate for structured output."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+    from pydantic import BaseModel
+
+    class WeatherResult(BaseModel):
+        temperature: float
+        condition: str
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+    )
+
+    with patch.object(
+        llm._responses_delegate, "call", return_value='{"temperature": 72.0, "condition": "sunny"}'
+    ) as mock_call:
+        result = llm.call(
+            messages="What's the weather?",
+            response_model=WeatherResult,
+        )
+
+        mock_call.assert_called_once()
+        assert mock_call.call_args.kwargs["response_model"] == WeatherResult
+
+
+def test_azure_responses_api_last_response_id_property():
+    """Test that last_response_id property delegates to the internal delegate."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+        auto_chain=True,
+    )
+
+    # Initially None
+    assert llm.last_response_id is None
+
+    # Set the delegate's internal state
+    llm._responses_delegate._last_response_id = "resp_test123"
+    assert llm.last_response_id == "resp_test123"
+
+
+def test_azure_responses_api_last_response_id_returns_none_for_completions():
+    """Test that last_response_id returns None when api='completions'."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+    )
+
+    assert llm.last_response_id is None
+
+
+def test_azure_responses_api_reset_chain():
+    """Test that reset_chain delegates to the internal delegate."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+        auto_chain=True,
+    )
+
+    # Set and then reset
+    llm._responses_delegate._last_response_id = "resp_test123"
+    assert llm.last_response_id == "resp_test123"
+
+    llm.reset_chain()
+    assert llm.last_response_id is None
+
+
+def test_azure_responses_api_reset_chain_no_op_for_completions():
+    """Test that reset_chain is a no-op when api='completions'."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+    )
+
+    # Should not raise
+    llm.reset_chain()
+
+
+def test_azure_responses_api_last_reasoning_items_property():
+    """Test that last_reasoning_items property delegates to the internal delegate."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+        auto_chain_reasoning=True,
+    )
+
+    # Initially None
+    assert llm.last_reasoning_items is None
+
+    # Set the delegate's internal state
+    mock_items = [{"type": "reasoning", "id": "rs_test"}]
+    llm._responses_delegate._last_reasoning_items = mock_items
+    assert llm.last_reasoning_items == mock_items
+
+
+def test_azure_responses_api_last_reasoning_items_returns_none_for_completions():
+    """Test that last_reasoning_items returns None when api='completions'."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+    )
+
+    assert llm.last_reasoning_items is None
+
+
+def test_azure_responses_api_reset_reasoning_chain():
+    """Test that reset_reasoning_chain delegates to the internal delegate."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+        auto_chain_reasoning=True,
+    )
+
+    # Set and then reset
+    llm._responses_delegate._last_reasoning_items = [{"type": "reasoning"}]
+    assert llm.last_reasoning_items is not None
+
+    llm.reset_reasoning_chain()
+    assert llm.last_reasoning_items is None
+
+
+def test_azure_responses_api_reset_reasoning_chain_no_op_for_completions():
+    """Test that reset_reasoning_chain is a no-op when api='completions'."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+    )
+
+    # Should not raise
+    llm.reset_reasoning_chain()
+
+
+def test_azure_responses_api_completions_mode_unaffected():
+    """Test that existing completions mode behavior is not affected by responses changes."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+    )
+
+    assert llm.api == "completions"
+    assert llm._responses_delegate is None
+    # Should have the Azure AI Inference client
+    assert hasattr(llm, "client")
+    assert hasattr(llm, "async_client")
+
+
+def test_azure_responses_api_interceptor_allowed():
+    """Test that interceptors are allowed when api='responses' (since they go through OpenAI SDK)."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    mock_interceptor = MagicMock()
+
+    # This should NOT raise
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+        interceptor=mock_interceptor,
+    )
+    assert llm._responses_delegate is not None
+
+
+def test_azure_responses_api_interceptor_blocked_for_completions():
+    """Test that interceptors are still blocked for completions mode."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    mock_interceptor = MagicMock()
+
+    with pytest.raises(NotImplementedError, match="HTTP interceptors are not yet supported"):
+        AzureCompletion(
+            model="gpt-4o",
+            api_key="test-key",
+            endpoint="https://test.openai.azure.com",
+            api="completions",
+            interceptor=mock_interceptor,
+        )
+
+
+def test_azure_responses_api_builtin_tools():
+    """Test that builtin_tools param is forwarded to the delegate."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+        builtin_tools=["web_search", "code_interpreter"],
+    )
+
+    assert llm._responses_delegate.builtin_tools == ["web_search", "code_interpreter"]
+
+
+def test_azure_responses_api_with_previous_response_id():
+    """Test that previous_response_id is forwarded to the delegate."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+        previous_response_id="resp_abc123",
+        store=True,
+    )
+
+    delegate = llm._responses_delegate
+    assert delegate.previous_response_id == "resp_abc123"
+    assert delegate.store is True
+
+
+def test_azure_responses_api_env_var_api_version():
+    """Test that AZURE_API_VERSION env var is used for responses API version."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    with patch.dict(os.environ, {"AZURE_API_VERSION": "2025-10-01"}):
+        llm = AzureCompletion(
+            model="gpt-4o",
+            api_key="test-key",
+            endpoint="https://test.openai.azure.com",
+            api="responses",
+        )
+        assert llm._responses_delegate is not None
+
+
+def test_azure_responses_api_timeout_and_retries():
+    """Test that timeout and max_retries are passed to the Azure clients."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+        timeout=30.0,
+        max_retries=5,
+    )
+
+    assert llm._responses_delegate is not None
+    assert llm.timeout == 30.0
+    assert llm.max_retries == 5
+
+
+def test_azure_responses_api_streaming_param():
+    """Test that stream parameter is forwarded to the delegate."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+        stream=True,
+    )
+
+    assert llm._responses_delegate.stream is True
+
+
+def test_azure_responses_api_with_non_azure_openai_endpoint():
+    """Test Responses API with a non-azure-openai endpoint (e.g., Azure AI Foundry)."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://models.inference.ai.azure.com",
+        api="responses",
+    )
+
+    assert llm._responses_delegate is not None
+    from openai import AzureOpenAI
+    assert isinstance(llm._responses_delegate.client, AzureOpenAI)
+
+
+def test_azure_responses_api_base_endpoint_preserved():
+    """Test that base_endpoint is preserved and not modified by endpoint validation."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    endpoint = "https://test.openai.azure.com"
+
+    llm = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint=endpoint,
+        api="responses",
+    )
+
+    # base_endpoint should be the original, unmodified endpoint
+    assert llm.base_endpoint == endpoint
+    # endpoint should also be the original since responses mode skips validation
+    assert llm.endpoint == endpoint
+
+
+def test_azure_responses_api_endpoint_not_validated_for_responses():
+    """Test that endpoint URL validation (adding /openai/deployments/) is skipped for responses mode."""
+    from crewai.llms.providers.azure.completion import AzureCompletion
+
+    # In completions mode, this endpoint would get /openai/deployments/gpt-4o appended
+    llm_completions = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="completions",
+    )
+    assert "/openai/deployments/" in llm_completions.endpoint
+
+    # In responses mode, the endpoint should NOT be modified
+    llm_responses = AzureCompletion(
+        model="gpt-4o",
+        api_key="test-key",
+        endpoint="https://test.openai.azure.com",
+        api="responses",
+    )
+    assert llm_responses.endpoint == "https://test.openai.azure.com"