Compare commits

...

3 Commits

Author SHA1 Message Date
Devin AI
2315422fc4 fix: forward frequency_penalty and presence_penalty to Responses API delegate
Addresses Bugbot review feedback - these parameters were silently
ignored when using api='responses' mode.

Co-Authored-By: João <joao@crewai.com>
2026-04-01 09:20:26 +00:00
Devin AI
1a7d2ad05c fix: resolve mypy no-any-return error in last_response_id property
Co-Authored-By: João <joao@crewai.com>
2026-04-01 09:13:51 +00:00
Devin AI
f69171cd76 feat: add Responses API support for Azure OpenAI provider
When api='responses' is set on an Azure LLM instance, calls are
delegated to the OpenAI Responses API implementation with the Azure
resource's /openai/v1/ base URL, reusing the fully-tested OpenAI
Responses API code path.

New fields on AzureCompletion:
- api: Literal['completions', 'responses'] (default: 'completions')
- instructions, store, previous_response_id, include, builtin_tools
- parse_tool_outputs, auto_chain, auto_chain_reasoning
- reasoning_effort, seed, max_completion_tokens

Usage:
  llm = LLM(model='azure/gpt-4o', api='responses', api_key=KEY, endpoint=ENDPOINT)

Closes #5202

Co-Authored-By: João <joao@crewai.com>
2026-04-01 09:09:12 +00:00
2 changed files with 623 additions and 12 deletions

View File

@@ -3,7 +3,7 @@ from __future__ import annotations
import json
import logging
import os
from typing import Any, TypedDict
from typing import Any, Literal, TypedDict
from urllib.parse import urlparse
from pydantic import BaseModel, PrivateAttr, model_validator
@@ -72,6 +72,19 @@ class AzureCompletion(BaseLLM):
This class provides direct integration with the Azure AI Inference Python SDK,
offering native function calling, streaming support, and proper Azure authentication.
Supports both Chat Completions API (default) and Responses API.
When ``api="responses"`` is set, calls are delegated to the OpenAI Responses API
implementation with the Azure resource's ``/openai/v1/`` base URL, reusing the
fully-tested OpenAI Responses API code path.
Example::
# Chat Completions (default)
llm = LLM(model="azure/gpt-4o", api_key=KEY, endpoint=ENDPOINT)
# Responses API
llm = LLM(model="azure/gpt-4o", api="responses", api_key=KEY, endpoint=ENDPOINT)
"""
endpoint: str | None = None
@@ -82,14 +95,27 @@ class AzureCompletion(BaseLLM):
frequency_penalty: float | None = None
presence_penalty: float | None = None
max_tokens: int | None = None
max_completion_tokens: int | None = None
stream: bool = False
interceptor: BaseInterceptor[Any, Any] | None = None
response_format: type[BaseModel] | None = None
is_openai_model: bool = False
is_azure_openai_endpoint: bool = False
api: Literal["completions", "responses"] = "completions"
instructions: str | None = None
store: bool | None = None
previous_response_id: str | None = None
include: list[str] | None = None
builtin_tools: list[str] | None = None
parse_tool_outputs: bool = False
auto_chain: bool = False
auto_chain_reasoning: bool = False
reasoning_effort: str | None = None
seed: int | None = None
_client: Any = PrivateAttr(default=None)
_async_client: Any = PrivateAttr(default=None)
_responses_delegate: Any = PrivateAttr(default=None)
@model_validator(mode="before")
@classmethod
@@ -142,17 +168,95 @@ class AzureCompletion(BaseLLM):
def _init_clients(self) -> AzureCompletion:
if not self.api_key:
raise ValueError("Azure API key is required.")
client_kwargs: dict[str, Any] = {
"endpoint": self.endpoint,
"credential": AzureKeyCredential(self.api_key),
}
if self.api_version:
client_kwargs["api_version"] = self.api_version
self._client = ChatCompletionsClient(**client_kwargs)
self._async_client = AsyncChatCompletionsClient(**client_kwargs)
if self.api == "responses":
self._init_responses_delegate()
else:
client_kwargs: dict[str, Any] = {
"endpoint": self.endpoint,
"credential": AzureKeyCredential(self.api_key),
}
if self.api_version:
client_kwargs["api_version"] = self.api_version
self._client = ChatCompletionsClient(**client_kwargs)
self._async_client = AsyncChatCompletionsClient(**client_kwargs)
return self
def _init_responses_delegate(self) -> None:
"""Initialise the OpenAICompletion delegate for Responses API calls.
Constructs the Azure-compatible ``/openai/v1/`` base URL from the
configured endpoint and creates an :class:`OpenAICompletion` instance
that handles all Responses API logic.
"""
from crewai.llms.providers.openai.completion import OpenAICompletion
# Build the Azure base_url: <resource>/openai/v1/
raw_endpoint = self.endpoint or ""
# Strip the /openai/deployments/<deployment> suffix if present
deployment_idx = raw_endpoint.find("/openai/deployments/")
if deployment_idx != -1:
resource_url = raw_endpoint[:deployment_idx]
else:
resource_url = raw_endpoint.rstrip("/")
api_version = self.api_version or "2024-06-01"
base_url = f"{resource_url}/openai/v1/?api-version={api_version}"
delegate_kwargs: dict[str, Any] = {
"model": self.model,
"provider": "openai",
"api_key": self.api_key,
"base_url": base_url,
"api": "responses",
"stream": self.stream,
}
# Forward Responses API parameters
if self.instructions is not None:
delegate_kwargs["instructions"] = self.instructions
if self.store is not None:
delegate_kwargs["store"] = self.store
if self.previous_response_id is not None:
delegate_kwargs["previous_response_id"] = self.previous_response_id
if self.include is not None:
delegate_kwargs["include"] = self.include
if self.builtin_tools is not None:
delegate_kwargs["builtin_tools"] = self.builtin_tools
if self.parse_tool_outputs:
delegate_kwargs["parse_tool_outputs"] = self.parse_tool_outputs
if self.auto_chain:
delegate_kwargs["auto_chain"] = self.auto_chain
if self.auto_chain_reasoning:
delegate_kwargs["auto_chain_reasoning"] = self.auto_chain_reasoning
if self.reasoning_effort is not None:
delegate_kwargs["reasoning_effort"] = self.reasoning_effort
if self.temperature is not None:
delegate_kwargs["temperature"] = self.temperature
if self.top_p is not None:
delegate_kwargs["top_p"] = self.top_p
if self.max_tokens is not None:
delegate_kwargs["max_tokens"] = self.max_tokens
if self.max_completion_tokens is not None:
delegate_kwargs["max_completion_tokens"] = self.max_completion_tokens
if self.seed is not None:
delegate_kwargs["seed"] = self.seed
if self.timeout is not None:
delegate_kwargs["timeout"] = self.timeout
if self.max_retries != 2:
delegate_kwargs["max_retries"] = self.max_retries
if self.response_format is not None:
delegate_kwargs["response_format"] = self.response_format
if self.stop:
delegate_kwargs["stop"] = self.stop
if self.frequency_penalty is not None:
delegate_kwargs["frequency_penalty"] = self.frequency_penalty
if self.presence_penalty is not None:
delegate_kwargs["presence_penalty"] = self.presence_penalty
self._responses_delegate = OpenAICompletion(**delegate_kwargs)
def to_config_dict(self) -> dict[str, Any]:
"""Extend base config with Azure-specific fields."""
config = super().to_config_dict()
@@ -172,6 +276,10 @@ class AzureCompletion(BaseLLM):
config["presence_penalty"] = self.presence_penalty
if self.max_tokens is not None:
config["max_tokens"] = self.max_tokens
if self.api != "completions":
config["api"] = self.api
if self.reasoning_effort is not None:
config["reasoning_effort"] = self.reasoning_effort
return config
@staticmethod
@@ -277,7 +385,7 @@ class AzureCompletion(BaseLLM):
from_agent: Any | None = None,
response_model: type[BaseModel] | None = None,
) -> str | Any:
"""Call Azure AI Inference chat completions API.
"""Call Azure AI Inference API (Chat Completions or Responses).
Args:
messages: Input messages for the chat completion
@@ -291,6 +399,17 @@ class AzureCompletion(BaseLLM):
Returns:
Chat completion response or tool call result
"""
if self.api == "responses" and self._responses_delegate is not None:
return self._responses_delegate.call(
messages=messages,
tools=tools,
callbacks=callbacks,
available_functions=available_functions,
from_task=from_task,
from_agent=from_agent,
response_model=response_model,
)
with llm_call_context():
try:
# Emit call started event
@@ -349,7 +468,7 @@ class AzureCompletion(BaseLLM):
from_agent: Any | None = None,
response_model: type[BaseModel] | None = None,
) -> str | Any:
"""Call Azure AI Inference chat completions API asynchronously.
"""Call Azure AI Inference API asynchronously (Chat Completions or Responses).
Args:
messages: Input messages for the chat completion
@@ -363,6 +482,17 @@ class AzureCompletion(BaseLLM):
Returns:
Chat completion response or tool call result
"""
if self.api == "responses" and self._responses_delegate is not None:
return await self._responses_delegate.acall(
messages=messages,
tools=tools,
callbacks=callbacks,
available_functions=available_functions,
from_task=from_task,
from_agent=from_agent,
response_model=response_model,
)
with llm_call_context():
try:
self._emit_call_started_event(
@@ -1090,6 +1220,19 @@ class AzureCompletion(BaseLLM):
}
return {"total_tokens": 0}
@property
def last_response_id(self) -> str | None:
"""Get the last response ID from auto-chaining (Responses API only)."""
if self._responses_delegate is not None:
rid: str | None = self._responses_delegate.last_response_id
return rid
return None
def reset_chain(self) -> None:
"""Reset the auto-chain state (Responses API only)."""
if self._responses_delegate is not None:
self._responses_delegate.reset_chain()
async def aclose(self) -> None:
"""Close the async client and clean up resources.

View File

@@ -1,13 +1,14 @@
import os
import sys
import types
from unittest.mock import patch, MagicMock, Mock
from unittest.mock import patch, MagicMock, Mock, AsyncMock
import pytest
from crewai.llm import LLM
from crewai.crew import Crew
from crewai.agent import Agent
from crewai.task import Task
from crewai.llms.providers.azure.completion import AzureCompletion
@pytest.fixture
@@ -1403,3 +1404,470 @@ def test_azure_stop_words_still_applied_to_regular_responses():
assert "Observation:" not in result
assert "Found results" not in result
assert "I need to search for more information" in result
# =============================================================================
# Azure Responses API Tests
# =============================================================================
def test_azure_responses_api_initialization():
"""Test that AzureCompletion can be initialized with api='responses'."""
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
instructions="You are a helpful assistant.",
store=True,
)
assert llm.api == "responses"
assert llm.instructions == "You are a helpful assistant."
assert llm.store is True
assert llm.model == "gpt-4o"
assert llm._responses_delegate is not None
def test_azure_responses_api_default_is_completions():
"""Test that the default API is 'completions' for backward compatibility."""
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
)
assert llm.api == "completions"
assert llm._responses_delegate is None
def test_azure_responses_api_delegate_is_openai_completion():
"""Test that the Responses API delegate is an OpenAICompletion instance."""
from crewai.llms.providers.openai.completion import OpenAICompletion
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
)
assert isinstance(llm._responses_delegate, OpenAICompletion)
assert llm._responses_delegate.api == "responses"
assert llm._responses_delegate.model == "gpt-4o"
def test_azure_responses_api_base_url_construction():
"""Test that the Azure base URL is correctly constructed for Responses API."""
from crewai.llms.providers.openai.completion import OpenAICompletion
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
api_version="2025-03-01-preview",
)
delegate = llm._responses_delegate
assert isinstance(delegate, OpenAICompletion)
assert delegate.base_url == "https://my-resource.openai.azure.com/openai/v1/?api-version=2025-03-01-preview"
def test_azure_responses_api_base_url_strips_deployment_suffix():
"""Test that deployment suffix is stripped from endpoint for Responses API base URL."""
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com/openai/deployments/gpt-4o",
)
delegate = llm._responses_delegate
assert "my-resource.openai.azure.com/openai/v1/" in delegate.base_url
assert "/openai/deployments/" not in delegate.base_url
def test_azure_responses_api_base_url_with_trailing_slash():
"""Test that endpoint with trailing slash is handled correctly."""
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com/",
api_version="2025-03-01-preview",
)
delegate = llm._responses_delegate
assert delegate.base_url == "https://my-resource.openai.azure.com/openai/v1/?api-version=2025-03-01-preview"
def test_azure_responses_api_forwards_parameters():
"""Test that Responses API parameters are forwarded to the delegate."""
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
instructions="Be helpful",
store=True,
previous_response_id="resp_abc123",
include=["reasoning.encrypted_content"],
builtin_tools=["web_search_preview"],
parse_tool_outputs=True,
auto_chain=True,
auto_chain_reasoning=True,
temperature=0.5,
top_p=0.9,
max_tokens=1000,
reasoning_effort="high",
seed=42,
frequency_penalty=0.3,
presence_penalty=0.6,
)
delegate = llm._responses_delegate
assert delegate.instructions == "Be helpful"
assert delegate.store is True
assert delegate.previous_response_id == "resp_abc123"
assert delegate.include == ["reasoning.encrypted_content"]
assert delegate.builtin_tools == ["web_search_preview"]
assert delegate.parse_tool_outputs is True
assert delegate.auto_chain is True
assert delegate.auto_chain_reasoning is True
assert delegate.temperature == 0.5
assert delegate.top_p == 0.9
assert delegate.max_tokens == 1000
assert delegate.reasoning_effort == "high"
assert delegate.seed == 42
assert delegate.frequency_penalty == 0.3
assert delegate.presence_penalty == 0.6
def test_azure_responses_api_call_delegates_to_openai():
"""Test that call() with api='responses' delegates to the OpenAI delegate."""
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
)
with patch.object(llm._responses_delegate, "call", return_value="responses result") as mock_call:
result = llm.call("Hello, world!")
mock_call.assert_called_once_with(
messages="Hello, world!",
tools=None,
callbacks=None,
available_functions=None,
from_task=None,
from_agent=None,
response_model=None,
)
assert result == "responses result"
@pytest.mark.asyncio
async def test_azure_responses_api_acall_delegates_to_openai():
"""Test that acall() with api='responses' delegates to the OpenAI delegate."""
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
)
with patch.object(
llm._responses_delegate, "acall", new_callable=AsyncMock, return_value="async responses result"
) as mock_acall:
result = await llm.acall("Hello async!")
mock_acall.assert_called_once_with(
messages="Hello async!",
tools=None,
callbacks=None,
available_functions=None,
from_task=None,
from_agent=None,
response_model=None,
)
assert result == "async responses result"
def test_azure_responses_api_call_with_tools():
"""Test that call() with api='responses' forwards tools to the delegate."""
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
)
tools = [
{
"name": "get_weather",
"description": "Get weather",
"parameters": {
"type": "object",
"properties": {"location": {"type": "string"}},
"required": ["location"],
},
}
]
available_functions = {"get_weather": lambda location: f"Sunny in {location}"}
with patch.object(llm._responses_delegate, "call", return_value="Weather result") as mock_call:
result = llm.call(
messages=[{"role": "user", "content": "What's the weather?"}],
tools=tools,
available_functions=available_functions,
)
mock_call.assert_called_once()
call_kwargs = mock_call.call_args
assert call_kwargs.kwargs["tools"] == tools
assert call_kwargs.kwargs["available_functions"] == available_functions
assert result == "Weather result"
def test_azure_responses_api_completions_not_affected():
"""Test that completions API path is unaffected when api='completions'."""
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
)
assert llm.api == "completions"
assert llm._responses_delegate is None
assert llm._client is not None
assert llm._async_client is not None
def test_azure_responses_api_via_llm_factory():
"""Test that api='responses' works when creating via LLM factory."""
llm = LLM(
model="azure/gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
)
assert isinstance(llm, AzureCompletion)
assert llm.api == "responses"
assert llm._responses_delegate is not None
def test_azure_responses_api_to_config_dict():
"""Test that to_config_dict() includes api field when set to 'responses'."""
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
reasoning_effort="high",
)
config = llm.to_config_dict()
assert config["api"] == "responses"
assert config["reasoning_effort"] == "high"
def test_azure_completions_api_to_config_dict_no_api_field():
"""Test that to_config_dict() does not include api when default 'completions'."""
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
)
config = llm.to_config_dict()
assert "api" not in config
def test_azure_responses_api_last_response_id():
"""Test that last_response_id property delegates to the OpenAI delegate."""
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
auto_chain=True,
)
# Initially None
assert llm.last_response_id is None
# Mock the delegate's last_response_id
llm._responses_delegate._last_response_id = "resp_xyz789"
assert llm.last_response_id == "resp_xyz789"
def test_azure_responses_api_reset_chain():
"""Test that reset_chain() delegates to the OpenAI delegate."""
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
auto_chain=True,
)
# Set a response ID on the delegate
llm._responses_delegate._last_response_id = "resp_xyz789"
assert llm.last_response_id == "resp_xyz789"
# Reset the chain
llm.reset_chain()
assert llm.last_response_id is None
def test_azure_responses_api_last_response_id_without_delegate():
"""Test that last_response_id returns None when no delegate (completions mode)."""
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
)
assert llm.last_response_id is None
def test_azure_responses_api_reset_chain_without_delegate():
"""Test that reset_chain() is a no-op when no delegate (completions mode)."""
llm = AzureCompletion(
model="gpt-4o",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
)
# Should not raise
llm.reset_chain()
def test_azure_responses_api_with_structured_output():
"""Test that structured output (response_model) is forwarded to the delegate."""
from pydantic import BaseModel, Field
class MathAnswer(BaseModel):
result: int = Field(description="The numerical result")
explanation: str = Field(description="Brief explanation")
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
)
mock_answer = MathAnswer(result=42, explanation="The answer to everything")
with patch.object(llm._responses_delegate, "call", return_value=mock_answer) as mock_call:
result = llm.call("What is the answer?", response_model=MathAnswer)
mock_call.assert_called_once()
call_kwargs = mock_call.call_args
assert call_kwargs is not None
assert call_kwargs.kwargs["response_model"] == MathAnswer
assert isinstance(result, MathAnswer)
assert result.result == 42
def test_azure_responses_api_streaming_forwarded():
"""Test that stream=True is forwarded to the Responses API delegate."""
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
stream=True,
)
assert llm._responses_delegate.stream is True
def test_azure_responses_api_max_completion_tokens_forwarded():
"""Test that max_completion_tokens is forwarded to the delegate."""
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
max_completion_tokens=500,
)
assert llm._responses_delegate.max_completion_tokens == 500
def test_azure_responses_api_default_api_version_in_url():
"""Test that the api_version is included in the Responses API base URL."""
with patch.dict(os.environ, {}, clear=False):
# Remove AZURE_API_VERSION if set to ensure we get the code default
env = os.environ.copy()
env.pop("AZURE_API_VERSION", None)
with patch.dict(os.environ, env, clear=True):
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
)
assert "api-version=" in llm._responses_delegate.base_url
assert "api-version=2024-06-01" in llm._responses_delegate.base_url
def test_azure_responses_api_custom_api_version_in_url():
"""Test that custom api_version is used in URL when specified."""
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
api_version="2025-03-01-preview",
)
assert "api-version=2025-03-01-preview" in llm._responses_delegate.base_url
def test_azure_responses_api_no_chat_clients_created():
"""Test that Chat Completions clients are NOT created when api='responses'."""
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
)
# In responses mode, the native Azure clients should not be initialized
assert llm._client is None
assert llm._async_client is None
assert llm._responses_delegate is not None
def test_azure_responses_api_stop_words_forwarded():
"""Test that stop words are forwarded to the delegate."""
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
stop=["STOP"],
)
assert llm._responses_delegate.stop == ["STOP"]
def test_azure_responses_api_response_format_forwarded():
"""Test that response_format is forwarded to the delegate."""
from pydantic import BaseModel
class MyFormat(BaseModel):
answer: str
llm = AzureCompletion(
model="gpt-4o",
api="responses",
api_key="test-key",
endpoint="https://my-resource.openai.azure.com",
response_format=MyFormat,
)
assert llm._responses_delegate.response_format == MyFormat