Compare commits

...

1 Commits

Author SHA1 Message Date
Devin AI
f4fa90dc04 feat: add Responses API support for Azure OpenAI provider
When api='responses' is specified for Azure, creates an internal
OpenAICompletion instance configured with AzureOpenAI/AsyncAzureOpenAI
clients from the openai Python SDK, which natively supports the
Responses API on Azure.

Key changes:
- Extended AzureCompletion.__init__() with Responses API parameters
  (api, instructions, store, previous_response_id, include,
  builtin_tools, parse_tool_outputs, auto_chain, auto_chain_reasoning,
  seed, reasoning_effort, max_completion_tokens)
- Added _init_responses_delegate() method using composition/delegation
- Added delegation logic in call() and acall() methods
- Added pass-through properties: last_response_id, last_reasoning_items
- Added pass-through methods: reset_chain(), reset_reasoning_chain()
- Preserved base endpoint before validation for Azure client config
- Interceptors now allowed in responses mode (via OpenAI SDK)
- Added AZURE_RESPONSES_API_VERSION constant (2025-03-01-preview)
- Added 30+ comprehensive tests covering all new functionality

Closes #4974

Co-Authored-By: João <joao@crewai.com>
2026-03-20 07:22:40 +00:00
2 changed files with 836 additions and 18 deletions

View File

@@ -3,7 +3,7 @@ from __future__ import annotations
import json
import logging
import os
from typing import TYPE_CHECKING, Any, TypedDict
from typing import TYPE_CHECKING, Any, Literal, TypedDict
from pydantic import BaseModel
from typing_extensions import Self
@@ -69,11 +69,37 @@ class AzureCompletionParams(TypedDict, total=False):
tool_choice: str
# Default API version for Azure Responses API support
AZURE_RESPONSES_API_VERSION = "2025-03-01-preview"
class AzureCompletion(BaseLLM):
"""Azure AI Inference native completion implementation.
This class provides direct integration with the Azure AI Inference Python SDK,
offering native function calling, streaming support, and proper Azure authentication.
Supports both the Chat Completions API (default) and the Responses API.
When api="responses" is specified, the class delegates to an internal
OpenAICompletion instance configured with AzureOpenAI clients from the
openai Python SDK, which natively supports the Responses API on Azure.
Args:
api: Which API to use - "completions" (default) or "responses".
When "responses" is selected, Azure OpenAI Responses API is used
via the openai Python SDK's AzureOpenAI client.
instructions: System-level instructions (Responses API only).
store: Whether to store responses for multi-turn (Responses API only).
previous_response_id: ID of previous response for multi-turn (Responses API only).
include: Additional data to include in response (Responses API only).
builtin_tools: List of OpenAI built-in tools to enable (Responses API only).
Supported: "web_search", "file_search", "code_interpreter", "computer_use".
parse_tool_outputs: Whether to return structured ResponsesAPIResult with
parsed built-in tool outputs instead of just text (Responses API only).
auto_chain: Automatically track and use response IDs for multi-turn
conversations (Responses API only).
auto_chain_reasoning: Automatically track and pass encrypted reasoning items
for ZDR (Zero Data Retention) compliance (Responses API only).
"""
def __init__(
@@ -89,10 +115,22 @@ class AzureCompletion(BaseLLM):
frequency_penalty: float | None = None,
presence_penalty: float | None = None,
max_tokens: int | None = None,
max_completion_tokens: int | None = None,
stop: list[str] | None = None,
stream: bool = False,
interceptor: BaseInterceptor[Any, Any] | None = None,
response_format: type[BaseModel] | None = None,
api: Literal["completions", "responses"] = "completions",
instructions: str | None = None,
store: bool | None = None,
previous_response_id: str | None = None,
include: list[str] | None = None,
builtin_tools: list[str] | None = None,
parse_tool_outputs: bool = False,
auto_chain: bool = False,
auto_chain_reasoning: bool = False,
seed: int | None = None,
reasoning_effort: str | None = None,
**kwargs: Any,
):
"""Initialize Azure AI Inference chat completion client.
@@ -109,15 +147,27 @@ class AzureCompletion(BaseLLM):
frequency_penalty: Frequency penalty (-2 to 2)
presence_penalty: Presence penalty (-2 to 2)
max_tokens: Maximum tokens in response
max_completion_tokens: Maximum completion tokens in response
stop: Stop sequences
stream: Enable streaming responses
interceptor: HTTP interceptor (not yet supported for Azure).
response_format: Pydantic model for structured output. Used as default when
response_model is not passed to call()/acall() methods.
Only works with OpenAI models deployed on Azure.
api: Which API to use - "completions" (default) or "responses".
instructions: System-level instructions (Responses API only).
store: Whether to store responses for multi-turn (Responses API only).
previous_response_id: ID of previous response for multi-turn (Responses API only).
include: Additional data to include in response (Responses API only).
builtin_tools: List of OpenAI built-in tools to enable (Responses API only).
parse_tool_outputs: Whether to return structured ResponsesAPIResult (Responses API only).
auto_chain: Auto-track response IDs for multi-turn (Responses API only).
auto_chain_reasoning: Auto-track encrypted reasoning items for ZDR (Responses API only).
seed: Random seed for deterministic outputs.
reasoning_effort: Reasoning effort level for reasoning models.
**kwargs: Additional parameters
"""
if interceptor is not None:
if interceptor is not None and api != "responses":
raise NotImplementedError(
"HTTP interceptors are not yet supported for Azure AI Inference provider. "
"Interceptors are currently supported for OpenAI and Anthropic providers only."
@@ -128,12 +178,13 @@ class AzureCompletion(BaseLLM):
)
self.api_key = api_key or os.getenv("AZURE_API_KEY")
self.endpoint = (
self.base_endpoint = (
endpoint
or os.getenv("AZURE_ENDPOINT")
or os.getenv("AZURE_OPENAI_ENDPOINT")
or os.getenv("AZURE_API_BASE")
)
self.api = api
self.api_version = api_version or os.getenv("AZURE_API_VERSION") or "2024-06-01"
self.timeout = timeout
self.max_retries = max_retries
@@ -142,34 +193,68 @@ class AzureCompletion(BaseLLM):
raise ValueError(
"Azure API key is required. Set AZURE_API_KEY environment variable or pass api_key parameter."
)
if not self.endpoint:
if not self.base_endpoint:
raise ValueError(
"Azure endpoint is required. Set AZURE_ENDPOINT environment variable or pass endpoint parameter."
)
# Validate and potentially fix Azure OpenAI endpoint URL
self.endpoint = self._validate_and_fix_endpoint(self.endpoint, model)
# Store the base endpoint before validation modifies it
self.endpoint = self.base_endpoint
# Build client kwargs
client_kwargs = {
"endpoint": self.endpoint,
"credential": AzureKeyCredential(self.api_key),
}
# Responses API mode: delegate to OpenAICompletion with AzureOpenAI clients
self._responses_delegate: Any | None = None
if self.api == "responses":
self._init_responses_delegate(
model=model,
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
presence_penalty=presence_penalty,
max_tokens=max_tokens,
max_completion_tokens=max_completion_tokens,
stop=stop,
stream=stream,
response_format=response_format,
instructions=instructions,
store=store,
previous_response_id=previous_response_id,
include=include,
builtin_tools=builtin_tools,
parse_tool_outputs=parse_tool_outputs,
auto_chain=auto_chain,
auto_chain_reasoning=auto_chain_reasoning,
seed=seed,
reasoning_effort=reasoning_effort,
interceptor=interceptor,
api_version=api_version,
)
else:
# Validate and potentially fix Azure OpenAI endpoint URL (completions mode)
self.endpoint = self._validate_and_fix_endpoint(self.endpoint, model)
# Add api_version if specified (primarily for Azure OpenAI endpoints)
if self.api_version:
client_kwargs["api_version"] = self.api_version
# Build client kwargs
client_kwargs = {
"endpoint": self.endpoint,
"credential": AzureKeyCredential(self.api_key),
}
self.client = ChatCompletionsClient(**client_kwargs) # type: ignore[arg-type]
# Add api_version if specified (primarily for Azure OpenAI endpoints)
if self.api_version:
client_kwargs["api_version"] = self.api_version
self.async_client = AsyncChatCompletionsClient(**client_kwargs) # type: ignore[arg-type]
self.client = ChatCompletionsClient(**client_kwargs) # type: ignore[arg-type]
self.async_client = AsyncChatCompletionsClient(**client_kwargs) # type: ignore[arg-type]
self.top_p = top_p
self.frequency_penalty = frequency_penalty
self.presence_penalty = presence_penalty
self.max_tokens = max_tokens
self.max_completion_tokens = max_completion_tokens
self.stream = stream
self.response_format = response_format
self.seed = seed
self.reasoning_effort = reasoning_effort
self.is_openai_model = any(
prefix in model.lower() for prefix in ["gpt-", "o1-", "text-"]
@@ -180,6 +265,100 @@ class AzureCompletion(BaseLLM):
and "/openai/deployments/" in self.endpoint
)
def _init_responses_delegate(
self,
model: str,
temperature: float | None = None,
top_p: float | None = None,
frequency_penalty: float | None = None,
presence_penalty: float | None = None,
max_tokens: int | None = None,
max_completion_tokens: int | None = None,
stop: list[str] | None = None,
stream: bool = False,
response_format: type[BaseModel] | None = None,
instructions: str | None = None,
store: bool | None = None,
previous_response_id: str | None = None,
include: list[str] | None = None,
builtin_tools: list[str] | None = None,
parse_tool_outputs: bool = False,
auto_chain: bool = False,
auto_chain_reasoning: bool = False,
seed: int | None = None,
reasoning_effort: str | None = None,
interceptor: BaseInterceptor[Any, Any] | None = None,
api_version: str | None = None,
) -> None:
"""Initialize the Responses API delegate using OpenAICompletion with AzureOpenAI clients.
Creates an OpenAICompletion instance and replaces its OpenAI clients with
AzureOpenAI/AsyncAzureOpenAI clients configured with Azure credentials.
"""
try:
from openai import AzureOpenAI, AsyncAzureOpenAI
except ImportError:
raise ImportError(
"OpenAI package is required for Azure Responses API support. "
'Install it with: uv add "crewai[openai]" or pip install openai'
) from None
from crewai.llms.providers.openai.completion import OpenAICompletion
# Determine the correct API version for Responses API
responses_api_version = api_version or os.getenv("AZURE_API_VERSION") or AZURE_RESPONSES_API_VERSION
# Extract the base Azure endpoint (without /openai/deployments/...)
azure_endpoint = self.base_endpoint or ""
azure_endpoint = azure_endpoint.rstrip("/")
# Strip /openai/deployments/... suffix if present
if "/openai/deployments/" in azure_endpoint:
azure_endpoint = azure_endpoint.split("/openai/deployments/")[0]
# Build AzureOpenAI client kwargs
azure_kwargs: dict[str, Any] = {
"azure_endpoint": azure_endpoint,
"api_key": self.api_key,
"api_version": responses_api_version,
}
if self.timeout is not None:
azure_kwargs["timeout"] = self.timeout
if self.max_retries:
azure_kwargs["max_retries"] = self.max_retries
# Create the OpenAICompletion delegate with responses API config
delegate = OpenAICompletion(
model=model,
api_key=self.api_key,
api="responses",
temperature=temperature,
top_p=top_p,
frequency_penalty=frequency_penalty,
presence_penalty=presence_penalty,
max_tokens=max_tokens,
max_completion_tokens=max_completion_tokens,
stop=stop,
stream=stream,
response_format=response_format,
instructions=instructions,
store=store,
previous_response_id=previous_response_id,
include=include,
builtin_tools=builtin_tools,
parse_tool_outputs=parse_tool_outputs,
auto_chain=auto_chain,
auto_chain_reasoning=auto_chain_reasoning,
seed=seed,
reasoning_effort=reasoning_effort,
interceptor=interceptor,
)
# Replace the OpenAI clients with AzureOpenAI clients
delegate.client = AzureOpenAI(**azure_kwargs) # type: ignore[assignment]
delegate.async_client = AsyncAzureOpenAI(**azure_kwargs) # type: ignore[assignment]
self._responses_delegate = delegate
@staticmethod
def _validate_and_fix_endpoint(endpoint: str, model: str) -> str:
"""Validate and fix Azure endpoint URL format.
@@ -269,6 +448,30 @@ class AzureCompletion(BaseLLM):
)
raise error
@property
def last_response_id(self) -> str | None:
"""Get the last response ID from auto-chaining (Responses API only)."""
if self._responses_delegate is not None:
return self._responses_delegate.last_response_id
return None
def reset_chain(self) -> None:
"""Reset the auto-chain state (Responses API only)."""
if self._responses_delegate is not None:
self._responses_delegate.reset_chain()
@property
def last_reasoning_items(self) -> list[Any] | None:
"""Get the last reasoning items from auto-chain reasoning (Responses API only)."""
if self._responses_delegate is not None:
return self._responses_delegate.last_reasoning_items
return None
def reset_reasoning_chain(self) -> None:
"""Reset the reasoning chain state (Responses API only)."""
if self._responses_delegate is not None:
self._responses_delegate.reset_reasoning_chain()
def call(
self,
messages: str | list[LLMMessage],
@@ -279,7 +482,7 @@ class AzureCompletion(BaseLLM):
from_agent: Any | None = None,
response_model: type[BaseModel] | None = None,
) -> str | Any:
"""Call Azure AI Inference chat completions API.
"""Call Azure AI Inference API (Chat Completions or Responses based on api setting).
Args:
messages: Input messages for the chat completion
@@ -293,6 +496,18 @@ class AzureCompletion(BaseLLM):
Returns:
Chat completion response or tool call result
"""
# Delegate to Responses API if configured
if self.api == "responses" and self._responses_delegate is not None:
return self._responses_delegate.call(
messages=messages,
tools=tools,
callbacks=callbacks,
available_functions=available_functions,
from_task=from_task,
from_agent=from_agent,
response_model=response_model,
)
with llm_call_context():
try:
# Emit call started event
@@ -351,7 +566,7 @@ class AzureCompletion(BaseLLM):
from_agent: Any | None = None,
response_model: type[BaseModel] | None = None,
) -> str | Any:
"""Call Azure AI Inference chat completions API asynchronously.
"""Call Azure AI Inference API asynchronously (Chat Completions or Responses).
Args:
messages: Input messages for the chat completion
@@ -365,6 +580,18 @@ class AzureCompletion(BaseLLM):
Returns:
Chat completion response or tool call result
"""
# Delegate to Responses API if configured
if self.api == "responses" and self._responses_delegate is not None:
return await self._responses_delegate.acall(
messages=messages,
tools=tools,
callbacks=callbacks,
available_functions=available_functions,
from_task=from_task,
from_agent=from_agent,
response_model=response_model,
)
with llm_call_context():
try:
self._emit_call_started_event(

View File

@@ -1403,3 +1403,594 @@ def test_azure_stop_words_still_applied_to_regular_responses():
assert "Observation:" not in result
assert "Found results" not in result
assert "I need to search for more information" in result
# =============================================================================
# Azure Responses API Tests
# =============================================================================
def test_azure_responses_api_initialization():
"""Test that AzureCompletion can be initialized with api='responses'."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
instructions="You are a helpful assistant.",
store=True,
)
assert llm.api == "responses"
assert llm._responses_delegate is not None
assert llm._responses_delegate.api == "responses"
assert llm._responses_delegate.instructions == "You are a helpful assistant."
assert llm._responses_delegate.store is True
assert llm.model == "gpt-4o"
def test_azure_responses_api_default_is_completions():
"""Test that the default API is 'completions' for backward compatibility."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
)
assert llm.api == "completions"
assert llm._responses_delegate is None
def test_azure_responses_api_delegate_uses_azure_openai_clients():
"""Test that the delegate's clients are AzureOpenAI instances, not plain OpenAI."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
)
from openai import AzureOpenAI, AsyncAzureOpenAI
assert isinstance(llm._responses_delegate.client, AzureOpenAI)
assert isinstance(llm._responses_delegate.async_client, AsyncAzureOpenAI)
def test_azure_responses_api_strips_deployment_suffix_for_azure_endpoint():
"""Test that /openai/deployments/... suffix is stripped when building Azure clients."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com/openai/deployments/gpt-4o",
api="responses",
)
# The delegate should have been created
assert llm._responses_delegate is not None
# The delegate's client should point to the base Azure endpoint
from openai import AzureOpenAI
assert isinstance(llm._responses_delegate.client, AzureOpenAI)
def test_azure_responses_api_uses_correct_api_version():
"""Test that the Responses API uses the correct API version."""
from crewai.llms.providers.azure.completion import (
AzureCompletion,
AZURE_RESPONSES_API_VERSION,
)
# Default version
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
)
assert llm._responses_delegate is not None
# Check that the AzureOpenAI client was created with the right version
assert AZURE_RESPONSES_API_VERSION == "2025-03-01-preview"
# Custom version
llm_custom = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
api_version="2025-06-01",
)
assert llm_custom._responses_delegate is not None
def test_azure_responses_api_passes_all_params_to_delegate():
"""Test that all Responses API params are forwarded to the delegate."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
instructions="Be concise.",
store=True,
previous_response_id="resp_abc123",
include=["reasoning.encrypted_content"],
builtin_tools=["web_search"],
parse_tool_outputs=True,
auto_chain=True,
auto_chain_reasoning=True,
temperature=0.5,
top_p=0.9,
seed=42,
reasoning_effort="high",
)
delegate = llm._responses_delegate
assert delegate is not None
assert delegate.instructions == "Be concise."
assert delegate.store is True
assert delegate.previous_response_id == "resp_abc123"
assert delegate.include == ["reasoning.encrypted_content"]
assert delegate.builtin_tools == ["web_search"]
assert delegate.parse_tool_outputs is True
assert delegate.auto_chain is True
assert delegate.auto_chain_reasoning is True
assert delegate.temperature == 0.5
assert delegate.top_p == 0.9
assert delegate.seed == 42
assert delegate.reasoning_effort == "high"
def test_azure_responses_api_call_delegates_to_openai_completion():
"""Test that call() delegates to the internal OpenAICompletion when api='responses'."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
)
with patch.object(
llm._responses_delegate, "call", return_value="Hello from Responses API!"
) as mock_call:
result = llm.call("Hello!")
mock_call.assert_called_once_with(
messages="Hello!",
tools=None,
callbacks=None,
available_functions=None,
from_task=None,
from_agent=None,
response_model=None,
)
assert result == "Hello from Responses API!"
@pytest.mark.asyncio
async def test_azure_responses_api_acall_delegates_to_openai_completion():
"""Test that acall() delegates to the internal OpenAICompletion when api='responses'."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
)
from unittest.mock import AsyncMock
llm._responses_delegate.acall = AsyncMock(return_value="Async hello from Responses API!")
result = await llm.acall("Hello async!")
llm._responses_delegate.acall.assert_called_once_with(
messages="Hello async!",
tools=None,
callbacks=None,
available_functions=None,
from_task=None,
from_agent=None,
response_model=None,
)
assert result == "Async hello from Responses API!"
def test_azure_responses_api_call_with_tools():
"""Test that call() passes tools to the delegate for Responses API."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
)
tools = [
{
"name": "get_weather",
"description": "Get the weather for a location",
"parameters": {
"type": "object",
"properties": {"location": {"type": "string"}},
"required": ["location"],
},
}
]
with patch.object(
llm._responses_delegate, "call", return_value="It's sunny."
) as mock_call:
result = llm.call(
messages=[{"role": "user", "content": "What's the weather?"}],
tools=tools,
available_functions={"get_weather": lambda loc: "Sunny"},
)
mock_call.assert_called_once()
call_kwargs = mock_call.call_args
assert call_kwargs.kwargs["tools"] == tools
assert result == "It's sunny."
def test_azure_responses_api_call_with_response_model():
"""Test that call() passes response_model to the delegate for structured output."""
from crewai.llms.providers.azure.completion import AzureCompletion
from pydantic import BaseModel
class WeatherResult(BaseModel):
temperature: float
condition: str
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
)
with patch.object(
llm._responses_delegate, "call", return_value='{"temperature": 72.0, "condition": "sunny"}'
) as mock_call:
result = llm.call(
messages="What's the weather?",
response_model=WeatherResult,
)
mock_call.assert_called_once()
assert mock_call.call_args.kwargs["response_model"] == WeatherResult
def test_azure_responses_api_last_response_id_property():
"""Test that last_response_id property delegates to the internal delegate."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
auto_chain=True,
)
# Initially None
assert llm.last_response_id is None
# Set the delegate's internal state
llm._responses_delegate._last_response_id = "resp_test123"
assert llm.last_response_id == "resp_test123"
def test_azure_responses_api_last_response_id_returns_none_for_completions():
"""Test that last_response_id returns None when api='completions'."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
)
assert llm.last_response_id is None
def test_azure_responses_api_reset_chain():
"""Test that reset_chain delegates to the internal delegate."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
auto_chain=True,
)
# Set and then reset
llm._responses_delegate._last_response_id = "resp_test123"
assert llm.last_response_id == "resp_test123"
llm.reset_chain()
assert llm.last_response_id is None
def test_azure_responses_api_reset_chain_no_op_for_completions():
"""Test that reset_chain is a no-op when api='completions'."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
)
# Should not raise
llm.reset_chain()
def test_azure_responses_api_last_reasoning_items_property():
"""Test that last_reasoning_items property delegates to the internal delegate."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
auto_chain_reasoning=True,
)
# Initially None
assert llm.last_reasoning_items is None
# Set the delegate's internal state
mock_items = [{"type": "reasoning", "id": "rs_test"}]
llm._responses_delegate._last_reasoning_items = mock_items
assert llm.last_reasoning_items == mock_items
def test_azure_responses_api_last_reasoning_items_returns_none_for_completions():
"""Test that last_reasoning_items returns None when api='completions'."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
)
assert llm.last_reasoning_items is None
def test_azure_responses_api_reset_reasoning_chain():
"""Test that reset_reasoning_chain delegates to the internal delegate."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
auto_chain_reasoning=True,
)
# Set and then reset
llm._responses_delegate._last_reasoning_items = [{"type": "reasoning"}]
assert llm.last_reasoning_items is not None
llm.reset_reasoning_chain()
assert llm.last_reasoning_items is None
def test_azure_responses_api_reset_reasoning_chain_no_op_for_completions():
"""Test that reset_reasoning_chain is a no-op when api='completions'."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
)
# Should not raise
llm.reset_reasoning_chain()
def test_azure_responses_api_completions_mode_unaffected():
"""Test that existing completions mode behavior is not affected by responses changes."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4",
api_key="test-key",
endpoint="https://test.openai.azure.com",
)
assert llm.api == "completions"
assert llm._responses_delegate is None
# Should have the Azure AI Inference client
assert hasattr(llm, "client")
assert hasattr(llm, "async_client")
def test_azure_responses_api_interceptor_allowed():
"""Test that interceptors are allowed when api='responses' (since they go through OpenAI SDK)."""
from crewai.llms.providers.azure.completion import AzureCompletion
mock_interceptor = MagicMock()
# This should NOT raise
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
interceptor=mock_interceptor,
)
assert llm._responses_delegate is not None
def test_azure_responses_api_interceptor_blocked_for_completions():
"""Test that interceptors are still blocked for completions mode."""
from crewai.llms.providers.azure.completion import AzureCompletion
mock_interceptor = MagicMock()
with pytest.raises(NotImplementedError, match="HTTP interceptors are not yet supported"):
AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="completions",
interceptor=mock_interceptor,
)
def test_azure_responses_api_builtin_tools():
"""Test that builtin_tools param is forwarded to the delegate."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
builtin_tools=["web_search", "code_interpreter"],
)
assert llm._responses_delegate.builtin_tools == ["web_search", "code_interpreter"]
def test_azure_responses_api_with_previous_response_id():
"""Test that previous_response_id is forwarded to the delegate."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
previous_response_id="resp_abc123",
store=True,
)
delegate = llm._responses_delegate
assert delegate.previous_response_id == "resp_abc123"
assert delegate.store is True
def test_azure_responses_api_env_var_api_version():
"""Test that AZURE_API_VERSION env var is used for responses API version."""
from crewai.llms.providers.azure.completion import AzureCompletion
with patch.dict(os.environ, {"AZURE_API_VERSION": "2025-10-01"}):
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
)
assert llm._responses_delegate is not None
def test_azure_responses_api_timeout_and_retries():
"""Test that timeout and max_retries are passed to the Azure clients."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
timeout=30.0,
max_retries=5,
)
assert llm._responses_delegate is not None
assert llm.timeout == 30.0
assert llm.max_retries == 5
def test_azure_responses_api_streaming_param():
"""Test that stream parameter is forwarded to the delegate."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
stream=True,
)
assert llm._responses_delegate.stream is True
def test_azure_responses_api_with_non_azure_openai_endpoint():
"""Test Responses API with a non-azure-openai endpoint (e.g., Azure AI Foundry)."""
from crewai.llms.providers.azure.completion import AzureCompletion
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://models.inference.ai.azure.com",
api="responses",
)
assert llm._responses_delegate is not None
from openai import AzureOpenAI
assert isinstance(llm._responses_delegate.client, AzureOpenAI)
def test_azure_responses_api_base_endpoint_preserved():
"""Test that base_endpoint is preserved and not modified by endpoint validation."""
from crewai.llms.providers.azure.completion import AzureCompletion
endpoint = "https://test.openai.azure.com"
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint=endpoint,
api="responses",
)
# base_endpoint should be the original, unmodified endpoint
assert llm.base_endpoint == endpoint
# endpoint should also be the original since responses mode skips validation
assert llm.endpoint == endpoint
def test_azure_responses_api_endpoint_not_validated_for_responses():
"""Test that endpoint URL validation (adding /openai/deployments/) is skipped for responses mode."""
from crewai.llms.providers.azure.completion import AzureCompletion
# In completions mode, this endpoint would get /openai/deployments/gpt-4o appended
llm_completions = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="completions",
)
assert "/openai/deployments/" in llm_completions.endpoint
# In responses mode, the endpoint should NOT be modified
llm_responses = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://test.openai.azure.com",
api="responses",
)
assert llm_responses.endpoint == "https://test.openai.azure.com"