From f4fa90dc0493fd71e4410fceb06e245d6fd35f10 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 20 Mar 2026 07:22:40 +0000 Subject: [PATCH] feat: add Responses API support for Azure OpenAI provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When api='responses' is specified for Azure, creates an internal OpenAICompletion instance configured with AzureOpenAI/AsyncAzureOpenAI clients from the openai Python SDK, which natively supports the Responses API on Azure. Key changes: - Extended AzureCompletion.__init__() with Responses API parameters (api, instructions, store, previous_response_id, include, builtin_tools, parse_tool_outputs, auto_chain, auto_chain_reasoning, seed, reasoning_effort, max_completion_tokens) - Added _init_responses_delegate() method using composition/delegation - Added delegation logic in call() and acall() methods - Added pass-through properties: last_response_id, last_reasoning_items - Added pass-through methods: reset_chain(), reset_reasoning_chain() - Preserved base endpoint before validation for Azure client config - Interceptors now allowed in responses mode (via OpenAI SDK) - Added AZURE_RESPONSES_API_VERSION constant (2025-03-01-preview) - Added 30+ comprehensive tests covering all new functionality Closes #4974 Co-Authored-By: João --- .../crewai/llms/providers/azure/completion.py | 263 +++++++- lib/crewai/tests/llms/azure/test_azure.py | 591 ++++++++++++++++++ 2 files changed, 836 insertions(+), 18 deletions(-) diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index 00c10112d..2daee2ed2 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -3,7 +3,7 @@ from __future__ import annotations import json import logging import os -from typing import TYPE_CHECKING, Any, TypedDict +from typing import TYPE_CHECKING, Any, Literal, TypedDict from pydantic import BaseModel from typing_extensions import Self @@ -69,11 +69,37 @@ class AzureCompletionParams(TypedDict, total=False): tool_choice: str +# Default API version for Azure Responses API support +AZURE_RESPONSES_API_VERSION = "2025-03-01-preview" + + class AzureCompletion(BaseLLM): """Azure AI Inference native completion implementation. This class provides direct integration with the Azure AI Inference Python SDK, offering native function calling, streaming support, and proper Azure authentication. + + Supports both the Chat Completions API (default) and the Responses API. + When api="responses" is specified, the class delegates to an internal + OpenAICompletion instance configured with AzureOpenAI clients from the + openai Python SDK, which natively supports the Responses API on Azure. + + Args: + api: Which API to use - "completions" (default) or "responses". + When "responses" is selected, Azure OpenAI Responses API is used + via the openai Python SDK's AzureOpenAI client. + instructions: System-level instructions (Responses API only). + store: Whether to store responses for multi-turn (Responses API only). + previous_response_id: ID of previous response for multi-turn (Responses API only). + include: Additional data to include in response (Responses API only). + builtin_tools: List of OpenAI built-in tools to enable (Responses API only). + Supported: "web_search", "file_search", "code_interpreter", "computer_use". + parse_tool_outputs: Whether to return structured ResponsesAPIResult with + parsed built-in tool outputs instead of just text (Responses API only). + auto_chain: Automatically track and use response IDs for multi-turn + conversations (Responses API only). + auto_chain_reasoning: Automatically track and pass encrypted reasoning items + for ZDR (Zero Data Retention) compliance (Responses API only). """ def __init__( @@ -89,10 +115,22 @@ class AzureCompletion(BaseLLM): frequency_penalty: float | None = None, presence_penalty: float | None = None, max_tokens: int | None = None, + max_completion_tokens: int | None = None, stop: list[str] | None = None, stream: bool = False, interceptor: BaseInterceptor[Any, Any] | None = None, response_format: type[BaseModel] | None = None, + api: Literal["completions", "responses"] = "completions", + instructions: str | None = None, + store: bool | None = None, + previous_response_id: str | None = None, + include: list[str] | None = None, + builtin_tools: list[str] | None = None, + parse_tool_outputs: bool = False, + auto_chain: bool = False, + auto_chain_reasoning: bool = False, + seed: int | None = None, + reasoning_effort: str | None = None, **kwargs: Any, ): """Initialize Azure AI Inference chat completion client. @@ -109,15 +147,27 @@ class AzureCompletion(BaseLLM): frequency_penalty: Frequency penalty (-2 to 2) presence_penalty: Presence penalty (-2 to 2) max_tokens: Maximum tokens in response + max_completion_tokens: Maximum completion tokens in response stop: Stop sequences stream: Enable streaming responses interceptor: HTTP interceptor (not yet supported for Azure). response_format: Pydantic model for structured output. Used as default when response_model is not passed to call()/acall() methods. Only works with OpenAI models deployed on Azure. + api: Which API to use - "completions" (default) or "responses". + instructions: System-level instructions (Responses API only). + store: Whether to store responses for multi-turn (Responses API only). + previous_response_id: ID of previous response for multi-turn (Responses API only). + include: Additional data to include in response (Responses API only). + builtin_tools: List of OpenAI built-in tools to enable (Responses API only). + parse_tool_outputs: Whether to return structured ResponsesAPIResult (Responses API only). + auto_chain: Auto-track response IDs for multi-turn (Responses API only). + auto_chain_reasoning: Auto-track encrypted reasoning items for ZDR (Responses API only). + seed: Random seed for deterministic outputs. + reasoning_effort: Reasoning effort level for reasoning models. **kwargs: Additional parameters """ - if interceptor is not None: + if interceptor is not None and api != "responses": raise NotImplementedError( "HTTP interceptors are not yet supported for Azure AI Inference provider. " "Interceptors are currently supported for OpenAI and Anthropic providers only." @@ -128,12 +178,13 @@ class AzureCompletion(BaseLLM): ) self.api_key = api_key or os.getenv("AZURE_API_KEY") - self.endpoint = ( + self.base_endpoint = ( endpoint or os.getenv("AZURE_ENDPOINT") or os.getenv("AZURE_OPENAI_ENDPOINT") or os.getenv("AZURE_API_BASE") ) + self.api = api self.api_version = api_version or os.getenv("AZURE_API_VERSION") or "2024-06-01" self.timeout = timeout self.max_retries = max_retries @@ -142,34 +193,68 @@ class AzureCompletion(BaseLLM): raise ValueError( "Azure API key is required. Set AZURE_API_KEY environment variable or pass api_key parameter." ) - if not self.endpoint: + if not self.base_endpoint: raise ValueError( "Azure endpoint is required. Set AZURE_ENDPOINT environment variable or pass endpoint parameter." ) - # Validate and potentially fix Azure OpenAI endpoint URL - self.endpoint = self._validate_and_fix_endpoint(self.endpoint, model) + # Store the base endpoint before validation modifies it + self.endpoint = self.base_endpoint - # Build client kwargs - client_kwargs = { - "endpoint": self.endpoint, - "credential": AzureKeyCredential(self.api_key), - } + # Responses API mode: delegate to OpenAICompletion with AzureOpenAI clients + self._responses_delegate: Any | None = None + if self.api == "responses": + self._init_responses_delegate( + model=model, + temperature=temperature, + top_p=top_p, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + max_tokens=max_tokens, + max_completion_tokens=max_completion_tokens, + stop=stop, + stream=stream, + response_format=response_format, + instructions=instructions, + store=store, + previous_response_id=previous_response_id, + include=include, + builtin_tools=builtin_tools, + parse_tool_outputs=parse_tool_outputs, + auto_chain=auto_chain, + auto_chain_reasoning=auto_chain_reasoning, + seed=seed, + reasoning_effort=reasoning_effort, + interceptor=interceptor, + api_version=api_version, + ) + else: + # Validate and potentially fix Azure OpenAI endpoint URL (completions mode) + self.endpoint = self._validate_and_fix_endpoint(self.endpoint, model) - # Add api_version if specified (primarily for Azure OpenAI endpoints) - if self.api_version: - client_kwargs["api_version"] = self.api_version + # Build client kwargs + client_kwargs = { + "endpoint": self.endpoint, + "credential": AzureKeyCredential(self.api_key), + } - self.client = ChatCompletionsClient(**client_kwargs) # type: ignore[arg-type] + # Add api_version if specified (primarily for Azure OpenAI endpoints) + if self.api_version: + client_kwargs["api_version"] = self.api_version - self.async_client = AsyncChatCompletionsClient(**client_kwargs) # type: ignore[arg-type] + self.client = ChatCompletionsClient(**client_kwargs) # type: ignore[arg-type] + + self.async_client = AsyncChatCompletionsClient(**client_kwargs) # type: ignore[arg-type] self.top_p = top_p self.frequency_penalty = frequency_penalty self.presence_penalty = presence_penalty self.max_tokens = max_tokens + self.max_completion_tokens = max_completion_tokens self.stream = stream self.response_format = response_format + self.seed = seed + self.reasoning_effort = reasoning_effort self.is_openai_model = any( prefix in model.lower() for prefix in ["gpt-", "o1-", "text-"] @@ -180,6 +265,100 @@ class AzureCompletion(BaseLLM): and "/openai/deployments/" in self.endpoint ) + def _init_responses_delegate( + self, + model: str, + temperature: float | None = None, + top_p: float | None = None, + frequency_penalty: float | None = None, + presence_penalty: float | None = None, + max_tokens: int | None = None, + max_completion_tokens: int | None = None, + stop: list[str] | None = None, + stream: bool = False, + response_format: type[BaseModel] | None = None, + instructions: str | None = None, + store: bool | None = None, + previous_response_id: str | None = None, + include: list[str] | None = None, + builtin_tools: list[str] | None = None, + parse_tool_outputs: bool = False, + auto_chain: bool = False, + auto_chain_reasoning: bool = False, + seed: int | None = None, + reasoning_effort: str | None = None, + interceptor: BaseInterceptor[Any, Any] | None = None, + api_version: str | None = None, + ) -> None: + """Initialize the Responses API delegate using OpenAICompletion with AzureOpenAI clients. + + Creates an OpenAICompletion instance and replaces its OpenAI clients with + AzureOpenAI/AsyncAzureOpenAI clients configured with Azure credentials. + """ + try: + from openai import AzureOpenAI, AsyncAzureOpenAI + except ImportError: + raise ImportError( + "OpenAI package is required for Azure Responses API support. " + 'Install it with: uv add "crewai[openai]" or pip install openai' + ) from None + + from crewai.llms.providers.openai.completion import OpenAICompletion + + # Determine the correct API version for Responses API + responses_api_version = api_version or os.getenv("AZURE_API_VERSION") or AZURE_RESPONSES_API_VERSION + + # Extract the base Azure endpoint (without /openai/deployments/...) + azure_endpoint = self.base_endpoint or "" + azure_endpoint = azure_endpoint.rstrip("/") + # Strip /openai/deployments/... suffix if present + if "/openai/deployments/" in azure_endpoint: + azure_endpoint = azure_endpoint.split("/openai/deployments/")[0] + + # Build AzureOpenAI client kwargs + azure_kwargs: dict[str, Any] = { + "azure_endpoint": azure_endpoint, + "api_key": self.api_key, + "api_version": responses_api_version, + } + if self.timeout is not None: + azure_kwargs["timeout"] = self.timeout + if self.max_retries: + azure_kwargs["max_retries"] = self.max_retries + + # Create the OpenAICompletion delegate with responses API config + delegate = OpenAICompletion( + model=model, + api_key=self.api_key, + api="responses", + temperature=temperature, + top_p=top_p, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + max_tokens=max_tokens, + max_completion_tokens=max_completion_tokens, + stop=stop, + stream=stream, + response_format=response_format, + instructions=instructions, + store=store, + previous_response_id=previous_response_id, + include=include, + builtin_tools=builtin_tools, + parse_tool_outputs=parse_tool_outputs, + auto_chain=auto_chain, + auto_chain_reasoning=auto_chain_reasoning, + seed=seed, + reasoning_effort=reasoning_effort, + interceptor=interceptor, + ) + + # Replace the OpenAI clients with AzureOpenAI clients + delegate.client = AzureOpenAI(**azure_kwargs) # type: ignore[assignment] + delegate.async_client = AsyncAzureOpenAI(**azure_kwargs) # type: ignore[assignment] + + self._responses_delegate = delegate + @staticmethod def _validate_and_fix_endpoint(endpoint: str, model: str) -> str: """Validate and fix Azure endpoint URL format. @@ -269,6 +448,30 @@ class AzureCompletion(BaseLLM): ) raise error + @property + def last_response_id(self) -> str | None: + """Get the last response ID from auto-chaining (Responses API only).""" + if self._responses_delegate is not None: + return self._responses_delegate.last_response_id + return None + + def reset_chain(self) -> None: + """Reset the auto-chain state (Responses API only).""" + if self._responses_delegate is not None: + self._responses_delegate.reset_chain() + + @property + def last_reasoning_items(self) -> list[Any] | None: + """Get the last reasoning items from auto-chain reasoning (Responses API only).""" + if self._responses_delegate is not None: + return self._responses_delegate.last_reasoning_items + return None + + def reset_reasoning_chain(self) -> None: + """Reset the reasoning chain state (Responses API only).""" + if self._responses_delegate is not None: + self._responses_delegate.reset_reasoning_chain() + def call( self, messages: str | list[LLMMessage], @@ -279,7 +482,7 @@ class AzureCompletion(BaseLLM): from_agent: Any | None = None, response_model: type[BaseModel] | None = None, ) -> str | Any: - """Call Azure AI Inference chat completions API. + """Call Azure AI Inference API (Chat Completions or Responses based on api setting). Args: messages: Input messages for the chat completion @@ -293,6 +496,18 @@ class AzureCompletion(BaseLLM): Returns: Chat completion response or tool call result """ + # Delegate to Responses API if configured + if self.api == "responses" and self._responses_delegate is not None: + return self._responses_delegate.call( + messages=messages, + tools=tools, + callbacks=callbacks, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + response_model=response_model, + ) + with llm_call_context(): try: # Emit call started event @@ -351,7 +566,7 @@ class AzureCompletion(BaseLLM): from_agent: Any | None = None, response_model: type[BaseModel] | None = None, ) -> str | Any: - """Call Azure AI Inference chat completions API asynchronously. + """Call Azure AI Inference API asynchronously (Chat Completions or Responses). Args: messages: Input messages for the chat completion @@ -365,6 +580,18 @@ class AzureCompletion(BaseLLM): Returns: Chat completion response or tool call result """ + # Delegate to Responses API if configured + if self.api == "responses" and self._responses_delegate is not None: + return await self._responses_delegate.acall( + messages=messages, + tools=tools, + callbacks=callbacks, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + response_model=response_model, + ) + with llm_call_context(): try: self._emit_call_started_event( diff --git a/lib/crewai/tests/llms/azure/test_azure.py b/lib/crewai/tests/llms/azure/test_azure.py index d25b607a8..a803a091a 100644 --- a/lib/crewai/tests/llms/azure/test_azure.py +++ b/lib/crewai/tests/llms/azure/test_azure.py @@ -1403,3 +1403,594 @@ def test_azure_stop_words_still_applied_to_regular_responses(): assert "Observation:" not in result assert "Found results" not in result assert "I need to search for more information" in result + + +# ============================================================================= +# Azure Responses API Tests +# ============================================================================= + + +def test_azure_responses_api_initialization(): + """Test that AzureCompletion can be initialized with api='responses'.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + instructions="You are a helpful assistant.", + store=True, + ) + + assert llm.api == "responses" + assert llm._responses_delegate is not None + assert llm._responses_delegate.api == "responses" + assert llm._responses_delegate.instructions == "You are a helpful assistant." + assert llm._responses_delegate.store is True + assert llm.model == "gpt-4o" + + +def test_azure_responses_api_default_is_completions(): + """Test that the default API is 'completions' for backward compatibility.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + assert llm.api == "completions" + assert llm._responses_delegate is None + + +def test_azure_responses_api_delegate_uses_azure_openai_clients(): + """Test that the delegate's clients are AzureOpenAI instances, not plain OpenAI.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + ) + + from openai import AzureOpenAI, AsyncAzureOpenAI + + assert isinstance(llm._responses_delegate.client, AzureOpenAI) + assert isinstance(llm._responses_delegate.async_client, AsyncAzureOpenAI) + + +def test_azure_responses_api_strips_deployment_suffix_for_azure_endpoint(): + """Test that /openai/deployments/... suffix is stripped when building Azure clients.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com/openai/deployments/gpt-4o", + api="responses", + ) + + # The delegate should have been created + assert llm._responses_delegate is not None + # The delegate's client should point to the base Azure endpoint + from openai import AzureOpenAI + assert isinstance(llm._responses_delegate.client, AzureOpenAI) + + +def test_azure_responses_api_uses_correct_api_version(): + """Test that the Responses API uses the correct API version.""" + from crewai.llms.providers.azure.completion import ( + AzureCompletion, + AZURE_RESPONSES_API_VERSION, + ) + + # Default version + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + ) + + assert llm._responses_delegate is not None + # Check that the AzureOpenAI client was created with the right version + assert AZURE_RESPONSES_API_VERSION == "2025-03-01-preview" + + # Custom version + llm_custom = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + api_version="2025-06-01", + ) + assert llm_custom._responses_delegate is not None + + +def test_azure_responses_api_passes_all_params_to_delegate(): + """Test that all Responses API params are forwarded to the delegate.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + instructions="Be concise.", + store=True, + previous_response_id="resp_abc123", + include=["reasoning.encrypted_content"], + builtin_tools=["web_search"], + parse_tool_outputs=True, + auto_chain=True, + auto_chain_reasoning=True, + temperature=0.5, + top_p=0.9, + seed=42, + reasoning_effort="high", + ) + + delegate = llm._responses_delegate + assert delegate is not None + assert delegate.instructions == "Be concise." + assert delegate.store is True + assert delegate.previous_response_id == "resp_abc123" + assert delegate.include == ["reasoning.encrypted_content"] + assert delegate.builtin_tools == ["web_search"] + assert delegate.parse_tool_outputs is True + assert delegate.auto_chain is True + assert delegate.auto_chain_reasoning is True + assert delegate.temperature == 0.5 + assert delegate.top_p == 0.9 + assert delegate.seed == 42 + assert delegate.reasoning_effort == "high" + + +def test_azure_responses_api_call_delegates_to_openai_completion(): + """Test that call() delegates to the internal OpenAICompletion when api='responses'.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + ) + + with patch.object( + llm._responses_delegate, "call", return_value="Hello from Responses API!" + ) as mock_call: + result = llm.call("Hello!") + + mock_call.assert_called_once_with( + messages="Hello!", + tools=None, + callbacks=None, + available_functions=None, + from_task=None, + from_agent=None, + response_model=None, + ) + assert result == "Hello from Responses API!" + + +@pytest.mark.asyncio +async def test_azure_responses_api_acall_delegates_to_openai_completion(): + """Test that acall() delegates to the internal OpenAICompletion when api='responses'.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + ) + + from unittest.mock import AsyncMock + + llm._responses_delegate.acall = AsyncMock(return_value="Async hello from Responses API!") + + result = await llm.acall("Hello async!") + + llm._responses_delegate.acall.assert_called_once_with( + messages="Hello async!", + tools=None, + callbacks=None, + available_functions=None, + from_task=None, + from_agent=None, + response_model=None, + ) + assert result == "Async hello from Responses API!" + + +def test_azure_responses_api_call_with_tools(): + """Test that call() passes tools to the delegate for Responses API.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + ) + + tools = [ + { + "name": "get_weather", + "description": "Get the weather for a location", + "parameters": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"], + }, + } + ] + + with patch.object( + llm._responses_delegate, "call", return_value="It's sunny." + ) as mock_call: + result = llm.call( + messages=[{"role": "user", "content": "What's the weather?"}], + tools=tools, + available_functions={"get_weather": lambda loc: "Sunny"}, + ) + + mock_call.assert_called_once() + call_kwargs = mock_call.call_args + assert call_kwargs.kwargs["tools"] == tools + assert result == "It's sunny." + + +def test_azure_responses_api_call_with_response_model(): + """Test that call() passes response_model to the delegate for structured output.""" + from crewai.llms.providers.azure.completion import AzureCompletion + from pydantic import BaseModel + + class WeatherResult(BaseModel): + temperature: float + condition: str + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + ) + + with patch.object( + llm._responses_delegate, "call", return_value='{"temperature": 72.0, "condition": "sunny"}' + ) as mock_call: + result = llm.call( + messages="What's the weather?", + response_model=WeatherResult, + ) + + mock_call.assert_called_once() + assert mock_call.call_args.kwargs["response_model"] == WeatherResult + + +def test_azure_responses_api_last_response_id_property(): + """Test that last_response_id property delegates to the internal delegate.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + auto_chain=True, + ) + + # Initially None + assert llm.last_response_id is None + + # Set the delegate's internal state + llm._responses_delegate._last_response_id = "resp_test123" + assert llm.last_response_id == "resp_test123" + + +def test_azure_responses_api_last_response_id_returns_none_for_completions(): + """Test that last_response_id returns None when api='completions'.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + assert llm.last_response_id is None + + +def test_azure_responses_api_reset_chain(): + """Test that reset_chain delegates to the internal delegate.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + auto_chain=True, + ) + + # Set and then reset + llm._responses_delegate._last_response_id = "resp_test123" + assert llm.last_response_id == "resp_test123" + + llm.reset_chain() + assert llm.last_response_id is None + + +def test_azure_responses_api_reset_chain_no_op_for_completions(): + """Test that reset_chain is a no-op when api='completions'.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + # Should not raise + llm.reset_chain() + + +def test_azure_responses_api_last_reasoning_items_property(): + """Test that last_reasoning_items property delegates to the internal delegate.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + auto_chain_reasoning=True, + ) + + # Initially None + assert llm.last_reasoning_items is None + + # Set the delegate's internal state + mock_items = [{"type": "reasoning", "id": "rs_test"}] + llm._responses_delegate._last_reasoning_items = mock_items + assert llm.last_reasoning_items == mock_items + + +def test_azure_responses_api_last_reasoning_items_returns_none_for_completions(): + """Test that last_reasoning_items returns None when api='completions'.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + assert llm.last_reasoning_items is None + + +def test_azure_responses_api_reset_reasoning_chain(): + """Test that reset_reasoning_chain delegates to the internal delegate.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + auto_chain_reasoning=True, + ) + + # Set and then reset + llm._responses_delegate._last_reasoning_items = [{"type": "reasoning"}] + assert llm.last_reasoning_items is not None + + llm.reset_reasoning_chain() + assert llm.last_reasoning_items is None + + +def test_azure_responses_api_reset_reasoning_chain_no_op_for_completions(): + """Test that reset_reasoning_chain is a no-op when api='completions'.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + # Should not raise + llm.reset_reasoning_chain() + + +def test_azure_responses_api_completions_mode_unaffected(): + """Test that existing completions mode behavior is not affected by responses changes.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + assert llm.api == "completions" + assert llm._responses_delegate is None + # Should have the Azure AI Inference client + assert hasattr(llm, "client") + assert hasattr(llm, "async_client") + + +def test_azure_responses_api_interceptor_allowed(): + """Test that interceptors are allowed when api='responses' (since they go through OpenAI SDK).""" + from crewai.llms.providers.azure.completion import AzureCompletion + + mock_interceptor = MagicMock() + + # This should NOT raise + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + interceptor=mock_interceptor, + ) + assert llm._responses_delegate is not None + + +def test_azure_responses_api_interceptor_blocked_for_completions(): + """Test that interceptors are still blocked for completions mode.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + mock_interceptor = MagicMock() + + with pytest.raises(NotImplementedError, match="HTTP interceptors are not yet supported"): + AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="completions", + interceptor=mock_interceptor, + ) + + +def test_azure_responses_api_builtin_tools(): + """Test that builtin_tools param is forwarded to the delegate.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + builtin_tools=["web_search", "code_interpreter"], + ) + + assert llm._responses_delegate.builtin_tools == ["web_search", "code_interpreter"] + + +def test_azure_responses_api_with_previous_response_id(): + """Test that previous_response_id is forwarded to the delegate.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + previous_response_id="resp_abc123", + store=True, + ) + + delegate = llm._responses_delegate + assert delegate.previous_response_id == "resp_abc123" + assert delegate.store is True + + +def test_azure_responses_api_env_var_api_version(): + """Test that AZURE_API_VERSION env var is used for responses API version.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch.dict(os.environ, {"AZURE_API_VERSION": "2025-10-01"}): + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + ) + assert llm._responses_delegate is not None + + +def test_azure_responses_api_timeout_and_retries(): + """Test that timeout and max_retries are passed to the Azure clients.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + timeout=30.0, + max_retries=5, + ) + + assert llm._responses_delegate is not None + assert llm.timeout == 30.0 + assert llm.max_retries == 5 + + +def test_azure_responses_api_streaming_param(): + """Test that stream parameter is forwarded to the delegate.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + stream=True, + ) + + assert llm._responses_delegate.stream is True + + +def test_azure_responses_api_with_non_azure_openai_endpoint(): + """Test Responses API with a non-azure-openai endpoint (e.g., Azure AI Foundry).""" + from crewai.llms.providers.azure.completion import AzureCompletion + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://models.inference.ai.azure.com", + api="responses", + ) + + assert llm._responses_delegate is not None + from openai import AzureOpenAI + assert isinstance(llm._responses_delegate.client, AzureOpenAI) + + +def test_azure_responses_api_base_endpoint_preserved(): + """Test that base_endpoint is preserved and not modified by endpoint validation.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + endpoint = "https://test.openai.azure.com" + + llm = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint=endpoint, + api="responses", + ) + + # base_endpoint should be the original, unmodified endpoint + assert llm.base_endpoint == endpoint + # endpoint should also be the original since responses mode skips validation + assert llm.endpoint == endpoint + + +def test_azure_responses_api_endpoint_not_validated_for_responses(): + """Test that endpoint URL validation (adding /openai/deployments/) is skipped for responses mode.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + # In completions mode, this endpoint would get /openai/deployments/gpt-4o appended + llm_completions = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="completions", + ) + assert "/openai/deployments/" in llm_completions.endpoint + + # In responses mode, the endpoint should NOT be modified + llm_responses = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + api="responses", + ) + assert llm_responses.endpoint == "https://test.openai.azure.com"