feat(azure): add Responses API support for Azure OpenAI provider (#5201)

* Support azure openai responses

* Revert function supported condition

* Revert comment deletion

* Update support stop words

* Add cassette based tests

* Fix linting
This commit is contained in:
Kunal Karmakar
2026-04-29 23:42:11 +05:30
committed by GitHub
parent 2a40316521
commit e0b86750c2
7 changed files with 1049 additions and 8 deletions

View File

@@ -89,8 +89,22 @@ class AzureCompletion(BaseLLM):
is_openai_model: bool = False
is_azure_openai_endpoint: bool = False
# Responses API settings
api: Literal["completions", "responses"] = "completions"
reasoning_effort: str | None = None
instructions: str | None = None
store: bool | None = None
previous_response_id: str | None = None
include: list[str] | None = None
builtin_tools: list[str] | None = None
parse_tool_outputs: bool = False
auto_chain: bool = False
auto_chain_reasoning: bool = False
max_completion_tokens: int | None = None
_client: Any = PrivateAttr(default=None)
_async_client: Any = PrivateAttr(default=None)
_responses_delegate: Any = PrivateAttr(default=None)
@model_validator(mode="before")
@classmethod
@@ -147,12 +161,89 @@ class AzureCompletion(BaseLLM):
import time even before deployment env vars are set.
"""
try:
self._client = self._build_sync_client()
self._async_client = self._build_async_client()
if self.api == "responses":
self._init_responses_delegate()
else:
self._client = self._build_sync_client()
self._async_client = self._build_async_client()
except ValueError:
pass
return self
def _init_responses_delegate(self) -> None:
"""Create an OpenAICompletion delegate for the Azure OpenAI Responses API.
The Azure OpenAI Responses API uses the standard OpenAI Python SDK
with a base_url pointing to the Azure resource's /openai/v1/ endpoint.
"""
from crewai.llms.providers.openai.completion import OpenAICompletion
base_url = self._get_responses_base_url()
delegate_kwargs: dict[str, Any] = {
"model": self.model,
"api_key": self.api_key,
"base_url": base_url,
"api": "responses",
"provider": "openai",
"stream": self.stream,
}
if self.temperature is not None:
delegate_kwargs["temperature"] = self.temperature
if self.top_p is not None:
delegate_kwargs["top_p"] = self.top_p
if self.max_tokens is not None:
delegate_kwargs["max_tokens"] = self.max_tokens
if self.max_completion_tokens is not None:
delegate_kwargs["max_completion_tokens"] = self.max_completion_tokens
if self.stop:
delegate_kwargs["stop"] = self.stop
if self.timeout is not None:
delegate_kwargs["timeout"] = self.timeout
if self.max_retries != 2:
delegate_kwargs["max_retries"] = self.max_retries
if self.reasoning_effort is not None:
delegate_kwargs["reasoning_effort"] = self.reasoning_effort
if self.instructions is not None:
delegate_kwargs["instructions"] = self.instructions
if self.store is not None:
delegate_kwargs["store"] = self.store
if self.previous_response_id is not None:
delegate_kwargs["previous_response_id"] = self.previous_response_id
if self.include is not None:
delegate_kwargs["include"] = self.include
if self.builtin_tools is not None:
delegate_kwargs["builtin_tools"] = self.builtin_tools
if self.parse_tool_outputs:
delegate_kwargs["parse_tool_outputs"] = self.parse_tool_outputs
if self.auto_chain:
delegate_kwargs["auto_chain"] = self.auto_chain
if self.auto_chain_reasoning:
delegate_kwargs["auto_chain_reasoning"] = self.auto_chain_reasoning
if self.response_format is not None:
delegate_kwargs["response_format"] = self.response_format
if self.additional_params:
delegate_kwargs["additional_params"] = self.additional_params
self._responses_delegate = OpenAICompletion(**delegate_kwargs)
def _get_responses_base_url(self) -> str:
"""Construct the base URL for the Azure OpenAI Responses API.
Extracts the scheme and host from the configured endpoint and appends
the ``/openai/v1/`` path required by the Azure OpenAI Responses API.
Returns:
The Responses API base URL, e.g.
``https://myresource.openai.azure.com/openai/v1/``
"""
if not self.endpoint:
raise ValueError("Azure endpoint is required for Responses API")
parsed = urlparse(self.endpoint)
base = f"{parsed.scheme}://{parsed.netloc}"
return f"{base}/openai/v1/"
def _build_sync_client(self) -> Any:
return ChatCompletionsClient(**self._make_client_kwargs())
@@ -252,6 +343,16 @@ class AzureCompletion(BaseLLM):
config["presence_penalty"] = self.presence_penalty
if self.max_tokens is not None:
config["max_tokens"] = self.max_tokens
if self.api != "completions":
config["api"] = self.api
if self.reasoning_effort is not None:
config["reasoning_effort"] = self.reasoning_effort
if self.instructions is not None:
config["instructions"] = self.instructions
if self.store is not None:
config["store"] = self.store
if self.max_completion_tokens is not None:
config["max_completion_tokens"] = self.max_completion_tokens
return config
@staticmethod
@@ -357,10 +458,10 @@ class AzureCompletion(BaseLLM):
from_agent: Any | None = None,
response_model: type[BaseModel] | None = None,
) -> str | Any:
"""Call Azure AI Inference chat completions API.
"""Call Azure AI Inference API.
Args:
messages: Input messages for the chat completion
messages: Input messages
tools: List of tool/function definitions
callbacks: Callback functions (not used in native implementation)
available_functions: Available functions for tool calling
@@ -369,8 +470,19 @@ class AzureCompletion(BaseLLM):
response_model: Response model
Returns:
Chat completion response or tool call result
Completion response or tool call result
"""
if self.api == "responses":
return self._responses_delegate.call(
messages=messages,
tools=tools,
callbacks=callbacks,
available_functions=available_functions,
from_task=from_task,
from_agent=from_agent,
response_model=response_model,
)
with llm_call_context():
try:
# Emit call started event
@@ -429,10 +541,10 @@ class AzureCompletion(BaseLLM):
from_agent: Any | None = None,
response_model: type[BaseModel] | None = None,
) -> str | Any:
"""Call Azure AI Inference chat completions API asynchronously.
"""Call Azure AI Inference API asynchronously.
Args:
messages: Input messages for the chat completion
messages: Input messages
tools: List of tool/function definitions
callbacks: Callback functions (not used in native implementation)
available_functions: Available functions for tool calling
@@ -441,8 +553,19 @@ class AzureCompletion(BaseLLM):
response_model: Pydantic model for structured output
Returns:
Chat completion response or tool call result
Completion response or tool call result
"""
if self.api == "responses":
return await self._responses_delegate.acall(
messages=messages,
tools=tools,
callbacks=callbacks,
available_functions=available_functions,
from_task=from_task,
from_agent=from_agent,
response_model=response_model,
)
with llm_call_context():
try:
self._emit_call_started_event(
@@ -1178,6 +1301,32 @@ class AzureCompletion(BaseLLM):
return result
return {"total_tokens": 0}
@property
def last_response_id(self) -> str | None:
"""Get the last response ID from Responses API auto-chaining."""
if self._responses_delegate is not None:
result: str | None = self._responses_delegate.last_response_id
return result
return None
@property
def last_reasoning_items(self) -> list[Any] | None:
"""Get the last reasoning items from Responses API auto-chain reasoning."""
if self._responses_delegate is not None:
result: list[Any] | None = self._responses_delegate.last_reasoning_items
return result
return None
def reset_chain(self) -> None:
"""Reset the Responses API auto-chain state."""
if self._responses_delegate is not None:
self._responses_delegate.reset_chain()
def reset_reasoning_chain(self) -> None:
"""Reset the Responses API reasoning chain state."""
if self._responses_delegate is not None:
self._responses_delegate.reset_reasoning_chain()
async def aclose(self) -> None:
"""Close the async client and clean up resources.

View File

@@ -0,0 +1,133 @@
interactions:
- request:
body: '{"input":[{"role":"user","content":"Say hello in one sentence."}],"model":"gpt-5.2-chat"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '89'
content-type:
- application/json
host:
- kkarmakar-ai-eus2.openai.azure.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 2.32.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.12
method: POST
uri: https://fake-azure-endpoint.openai.azure.com/openai/v1/responses
response:
body:
string: "{\n \"id\": \"resp_0473c8c2b1c49f8c0069f23d0910e081958ebce72a734935c7\",\n
\ \"object\": \"response\",\n \"created_at\": 1777483017,\n \"status\":
\"completed\",\n \"background\": false,\n \"completed_at\": 1777483018,\n
\ \"content_filters\": [\n {\n \"blocked\": false,\n \"source_type\":
\"prompt\",\n \"content_filter_raw\": [],\n \"content_filter_results\":
{\n \"jailbreak\": {\n \"detected\": false,\n \"filtered\":
false\n },\n \"hate\": {\n \"filtered\": false,\n \"severity\":
\"safe\"\n },\n \"sexual\": {\n \"filtered\": false,\n
\ \"severity\": \"safe\"\n },\n \"violence\": {\n \"filtered\":
false,\n \"severity\": \"safe\"\n },\n \"self_harm\":
{\n \"filtered\": false,\n \"severity\": \"safe\"\n }\n
\ },\n \"content_filter_offsets\": {\n \"start_offset\": 0,\n
\ \"end_offset\": 368,\n \"check_offset\": 0\n }\n },\n
\ {\n \"blocked\": false,\n \"source_type\": \"completion\",\n
\ \"content_filter_raw\": [],\n \"content_filter_results\": {\n \"protected_material_code\":
{\n \"detected\": false,\n \"filtered\": false\n },\n
\ \"protected_material_text\": {\n \"detected\": false,\n \"filtered\":
false\n },\n \"hate\": {\n \"filtered\": false,\n \"severity\":
\"safe\"\n },\n \"sexual\": {\n \"filtered\": false,\n
\ \"severity\": \"safe\"\n },\n \"violence\": {\n \"filtered\":
false,\n \"severity\": \"safe\"\n },\n \"self_harm\":
{\n \"filtered\": false,\n \"severity\": \"safe\"\n }\n
\ },\n \"content_filter_offsets\": {\n \"start_offset\": 0,\n
\ \"end_offset\": 53,\n \"check_offset\": 0\n }\n }\n
\ ],\n \"error\": null,\n \"frequency_penalty\": 0.0,\n \"incomplete_details\":
null,\n \"instructions\": null,\n \"max_output_tokens\": null,\n \"max_tool_calls\":
null,\n \"model\": \"gpt-5.2-chat\",\n \"output\": [\n {\n \"id\":
\"rs_0473c8c2b1c49f8c0069f23d09f24481959bcf9fd847a9a475\",\n \"type\":
\"reasoning\",\n \"summary\": []\n },\n {\n \"id\": \"msg_0473c8c2b1c49f8c0069f23d0a8ccc81958f776ad6016d7edd\",\n
\ \"type\": \"message\",\n \"status\": \"completed\",\n \"content\":
[\n {\n \"type\": \"output_text\",\n \"annotations\":
[],\n \"logprobs\": [],\n \"text\": \"Hello! \\ud83d\\ude0a\"\n
\ }\n ],\n \"role\": \"assistant\"\n }\n ],\n \"parallel_tool_calls\":
true,\n \"presence_penalty\": 0.0,\n \"previous_response_id\": null,\n \"prompt_cache_key\":
null,\n \"prompt_cache_retention\": null,\n \"reasoning\": {\n \"effort\":
\"medium\",\n \"summary\": null\n },\n \"safety_identifier\": null,\n
\ \"service_tier\": \"default\",\n \"store\": true,\n \"temperature\": 1.0,\n
\ \"text\": {\n \"format\": {\n \"type\": \"text\"\n },\n \"verbosity\":
\"medium\"\n },\n \"tool_choice\": \"auto\",\n \"tools\": [],\n \"top_logprobs\":
0,\n \"top_p\": 0.85,\n \"truncation\": \"disabled\",\n \"usage\": {\n
\ \"input_tokens\": 12,\n \"input_tokens_details\": {\n \"cached_tokens\":
0\n },\n \"output_tokens\": 22,\n \"output_tokens_details\": {\n
\ \"reasoning_tokens\": 0\n },\n \"total_tokens\": 34\n },\n \"user\":
null,\n \"metadata\": {}\n}"
headers:
Content-Length:
- '3203'
Content-Type:
- application/json
Date:
- Wed, 29 Apr 2026 17:16:59 GMT
Strict-Transport-Security:
- STS-XXX
apim-request-id:
- APIM-REQUEST-ID-XXX
skip-error-remapping:
- 'true'
x-content-type-options:
- X-CONTENT-TYPE-XXX
x-ms-client-request-id:
- X-MS-CLIENT-REQUEST-ID-XXX
x-ms-is-spilled-over:
- 'false'
x-ms-region:
- X-MS-REGION-XXX
x-ratelimit-abusepenalty-active:
- 'False'
x-ratelimit-key:
- gpt-5.2-chat
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-renewalperiod-requests:
- '60'
x-ratelimit-renewalperiod-tokens:
- '60'
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,137 @@
interactions:
- request:
body: '{"input":[{"role":"user","content":"What is 2 + 2? Be brief."}],"model":"gpt-5.2-chat","tools":[{"type":"web_search_preview"}]}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '127'
content-type:
- application/json
host:
- kkarmakar-ai-eus2.openai.azure.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 2.32.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.12
method: POST
uri: https://fake-azure-endpoint.openai.azure.com/openai/v1/responses
response:
body:
string: "{\n \"id\": \"resp_0d80ad9adad65fca0069f23d0c904c8194862acae4bd866cf5\",\n
\ \"object\": \"response\",\n \"created_at\": 1777483020,\n \"status\":
\"completed\",\n \"background\": false,\n \"completed_at\": 1777483022,\n
\ \"content_filters\": [\n {\n \"blocked\": false,\n \"source_type\":
\"prompt\",\n \"content_filter_raw\": [],\n \"content_filter_results\":
{\n \"jailbreak\": {\n \"detected\": false,\n \"filtered\":
false\n },\n \"hate\": {\n \"filtered\": false,\n \"severity\":
\"safe\"\n },\n \"sexual\": {\n \"filtered\": false,\n
\ \"severity\": \"safe\"\n },\n \"violence\": {\n \"filtered\":
false,\n \"severity\": \"safe\"\n },\n \"self_harm\":
{\n \"filtered\": false,\n \"severity\": \"safe\"\n }\n
\ },\n \"content_filter_offsets\": {\n \"start_offset\": 0,\n
\ \"end_offset\": 19017,\n \"check_offset\": 0\n }\n },\n
\ {\n \"blocked\": false,\n \"source_type\": \"completion\",\n
\ \"content_filter_raw\": [],\n \"content_filter_results\": {\n \"hate\":
{\n \"filtered\": false,\n \"severity\": \"safe\"\n },\n
\ \"sexual\": {\n \"filtered\": false,\n \"severity\":
\"safe\"\n },\n \"violence\": {\n \"filtered\": false,\n
\ \"severity\": \"safe\"\n },\n \"self_harm\": {\n \"filtered\":
false,\n \"severity\": \"safe\"\n },\n \"protected_material_code\":
{\n \"detected\": false,\n \"filtered\": false\n },\n
\ \"protected_material_text\": {\n \"detected\": false,\n \"filtered\":
false\n }\n },\n \"content_filter_offsets\": {\n \"start_offset\":
0,\n \"end_offset\": 889,\n \"check_offset\": 0\n }\n }\n
\ ],\n \"error\": null,\n \"frequency_penalty\": 0.0,\n \"incomplete_details\":
null,\n \"instructions\": null,\n \"max_output_tokens\": null,\n \"max_tool_calls\":
null,\n \"model\": \"gpt-5.2-chat\",\n \"output\": [\n {\n \"id\":
\"rs_0d80ad9adad65fca0069f23d0d8b8c8194b1a9ab61ddc3420d\",\n \"type\":
\"reasoning\",\n \"summary\": []\n },\n {\n \"id\": \"msg_0d80ad9adad65fca0069f23d0e262081949c36d6cc1958eeed\",\n
\ \"type\": \"message\",\n \"status\": \"completed\",\n \"content\":
[\n {\n \"type\": \"output_text\",\n \"annotations\":
[],\n \"logprobs\": [],\n \"text\": \"2 + 2 = 4.\"\n }\n
\ ],\n \"role\": \"assistant\"\n }\n ],\n \"parallel_tool_calls\":
true,\n \"presence_penalty\": 0.0,\n \"previous_response_id\": null,\n \"prompt_cache_key\":
null,\n \"prompt_cache_retention\": null,\n \"reasoning\": {\n \"effort\":
\"medium\",\n \"summary\": null\n },\n \"safety_identifier\": null,\n
\ \"service_tier\": \"default\",\n \"store\": true,\n \"temperature\": 1.0,\n
\ \"text\": {\n \"format\": {\n \"type\": \"text\"\n },\n \"verbosity\":
\"medium\"\n },\n \"tool_choice\": \"auto\",\n \"tools\": [\n {\n \"type\":
\"web_search_preview\",\n \"search_content_types\": [\n \"text\"\n
\ ],\n \"search_context_size\": \"medium\",\n \"user_location\":
{\n \"type\": \"approximate\",\n \"city\": null,\n \"country\":
\"US\",\n \"region\": null,\n \"timezone\": null\n }\n
\ }\n ],\n \"top_logprobs\": 0,\n \"top_p\": 0.85,\n \"truncation\":
\"disabled\",\n \"usage\": {\n \"input_tokens\": 4312,\n \"input_tokens_details\":
{\n \"cached_tokens\": 0\n },\n \"output_tokens\": 28,\n \"output_tokens_details\":
{\n \"reasoning_tokens\": 0\n },\n \"total_tokens\": 4340\n },\n
\ \"user\": null,\n \"metadata\": {}\n}"
headers:
Content-Length:
- '3507'
Content-Type:
- application/json
Date:
- Wed, 29 Apr 2026 17:17:03 GMT
Strict-Transport-Security:
- STS-XXX
apim-request-id:
- APIM-REQUEST-ID-XXX
skip-error-remapping:
- 'true'
x-content-type-options:
- X-CONTENT-TYPE-XXX
x-ms-client-request-id:
- X-MS-CLIENT-REQUEST-ID-XXX
x-ms-is-spilled-over:
- 'false'
x-ms-region:
- X-MS-REGION-XXX
x-ratelimit-abusepenalty-active:
- 'False'
x-ratelimit-key:
- gpt-5.2-chat
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-renewalperiod-requests:
- '60'
x-ratelimit-renewalperiod-tokens:
- '60'
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,84 @@
interactions:
- request:
body: '{"messages": [{"role": "user", "content": "Say hello in one sentence."}],
"stream": false}'
headers:
Accept:
- application/json
Connection:
- keep-alive
Content-Length:
- '90'
Content-Type:
- application/json
User-Agent:
- X-USER-AGENT-XXX
accept-encoding:
- ACCEPT-ENCODING-XXX
api-key:
- X-API-KEY-XXX
authorization:
- AUTHORIZATION-XXX
x-ms-client-request-id:
- X-MS-CLIENT-REQUEST-ID-XXX
method: POST
uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5.2-chat/chat/completions?api-version=2024-02-15-preview
response:
body:
string: "{\"choices\":[{\"content_filter_results\":{\"hate\":{\"filtered\":false,\"severity\":\"safe\"},\"protected_material_code\":{\"detected\":false,\"filtered\":false},\"protected_material_text\":{\"detected\":false,\"filtered\":false},\"self_harm\":{\"filtered\":false,\"severity\":\"safe\"},\"sexual\":{\"filtered\":false,\"severity\":\"safe\"},\"violence\":{\"filtered\":false,\"severity\":\"safe\"}},\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"annotations\":[],\"content\":\"Hello!
\U0001F60A\",\"refusal\":null,\"role\":\"assistant\"}}],\"created\":1777483024,\"id\":\"chatcmpl-Da2oyIDHFopG5fmCKbhDiEYG5ciBN\",\"model\":\"gpt-5.2-chat-latest\",\"object\":\"chat.completion\",\"prompt_filter_results\":[{\"prompt_index\":0,\"content_filter_results\":{\"hate\":{\"filtered\":false,\"severity\":\"safe\"},\"jailbreak\":{\"detected\":false,\"filtered\":false},\"self_harm\":{\"filtered\":false,\"severity\":\"safe\"},\"sexual\":{\"filtered\":false,\"severity\":\"safe\"},\"violence\":{\"filtered\":false,\"severity\":\"safe\"}}}],\"service_tier\":\"default\",\"system_fingerprint\":null,\"usage\":{\"completion_tokens\":13,\"completion_tokens_details\":{\"accepted_prediction_tokens\":0,\"audio_tokens\":0,\"reasoning_tokens\":0,\"rejected_prediction_tokens\":0},\"prompt_tokens\":12,\"prompt_tokens_details\":{\"audio_tokens\":0,\"cached_tokens\":0},\"total_tokens\":25}}\n"
headers:
Content-Length:
- '1233'
Content-Type:
- application/json
Date:
- Wed, 29 Apr 2026 17:17:05 GMT
Strict-Transport-Security:
- STS-XXX
apim-request-id:
- APIM-REQUEST-ID-XXX
azureml-model-session:
- AZUREML-MODEL-SESSION-XXX
skip-error-remapping:
- 'true'
x-accel-buffering:
- 'no'
x-content-type-options:
- X-CONTENT-TYPE-XXX
x-ms-client-request-id:
- X-MS-CLIENT-REQUEST-ID-XXX
x-ms-deployment-name:
- gpt-5.2-chat
x-ms-is-spilled-over:
- 'false'
x-ms-rai-invoked:
- 'true'
x-ms-region:
- X-MS-REGION-XXX
x-ratelimit-abusepenalty-active:
- 'False'
x-ratelimit-key:
- gpt-5.2-chat
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-renewalperiod-requests:
- '60'
x-ratelimit-renewalperiod-tokens:
- '60'
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,128 @@
interactions:
- request:
body: '{"input":[{"role":"user","content":"Say hello in one sentence."}],"model":"gpt-5.2-chat"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '89'
content-type:
- application/json
host:
- kkarmakar-ai-eus2.openai.azure.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- async:asyncio
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 2.32.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.12
method: POST
uri: https://fake-azure-endpoint.openai.azure.com/openai/v1/responses
response:
body:
string: "{\n \"id\": \"resp_02861ec017218a520069f23d21dbf88193aa91a73d63d91302\",\n
\ \"object\": \"response\",\n \"created_at\": 1777483041,\n \"status\":
\"completed\",\n \"background\": false,\n \"completed_at\": 1777483043,\n
\ \"content_filters\": [\n {\n \"blocked\": false,\n \"source_type\":
\"prompt\",\n \"content_filter_raw\": [],\n \"content_filter_results\":
{\n \"jailbreak\": {\n \"detected\": false,\n \"filtered\":
false\n }\n },\n \"content_filter_offsets\": {\n \"start_offset\":
0,\n \"end_offset\": 368,\n \"check_offset\": 0\n }\n },\n
\ {\n \"blocked\": false,\n \"source_type\": \"completion\",\n
\ \"content_filter_raw\": [],\n \"content_filter_results\": {\n \"protected_material_text\":
{\n \"detected\": false,\n \"filtered\": false\n },\n
\ \"protected_material_code\": {\n \"detected\": false,\n \"filtered\":
false\n },\n \"hate\": {\n \"filtered\": false,\n \"severity\":
\"safe\"\n },\n \"sexual\": {\n \"filtered\": false,\n
\ \"severity\": \"safe\"\n },\n \"violence\": {\n \"filtered\":
false,\n \"severity\": \"safe\"\n },\n \"self_harm\":
{\n \"filtered\": false,\n \"severity\": \"safe\"\n }\n
\ },\n \"content_filter_offsets\": {\n \"start_offset\": 0,\n
\ \"end_offset\": 44,\n \"check_offset\": 0\n }\n }\n
\ ],\n \"error\": null,\n \"frequency_penalty\": 0.0,\n \"incomplete_details\":
null,\n \"instructions\": null,\n \"max_output_tokens\": null,\n \"max_tool_calls\":
null,\n \"model\": \"gpt-5.2-chat\",\n \"output\": [\n {\n \"id\":
\"rs_02861ec017218a520069f23d2287ac819399dd23b8dd56028e\",\n \"type\":
\"reasoning\",\n \"summary\": []\n },\n {\n \"id\": \"msg_02861ec017218a520069f23d23082c81939838ab2eebf4e89c\",\n
\ \"type\": \"message\",\n \"status\": \"completed\",\n \"content\":
[\n {\n \"type\": \"output_text\",\n \"annotations\":
[],\n \"logprobs\": [],\n \"text\": \"Hello! \\ud83d\\udc4b\"\n
\ }\n ],\n \"role\": \"assistant\"\n }\n ],\n \"parallel_tool_calls\":
true,\n \"presence_penalty\": 0.0,\n \"previous_response_id\": null,\n \"prompt_cache_key\":
null,\n \"prompt_cache_retention\": null,\n \"reasoning\": {\n \"effort\":
\"medium\",\n \"summary\": null\n },\n \"safety_identifier\": null,\n
\ \"service_tier\": \"default\",\n \"store\": true,\n \"temperature\": 1.0,\n
\ \"text\": {\n \"format\": {\n \"type\": \"text\"\n },\n \"verbosity\":
\"medium\"\n },\n \"tool_choice\": \"auto\",\n \"tools\": [],\n \"top_logprobs\":
0,\n \"top_p\": 0.85,\n \"truncation\": \"disabled\",\n \"usage\": {\n
\ \"input_tokens\": 12,\n \"input_tokens_details\": {\n \"cached_tokens\":
0\n },\n \"output_tokens\": 21,\n \"output_tokens_details\": {\n
\ \"reasoning_tokens\": 0\n },\n \"total_tokens\": 33\n },\n \"user\":
null,\n \"metadata\": {}\n}"
headers:
Content-Length:
- '2844'
Content-Type:
- application/json
Date:
- Wed, 29 Apr 2026 17:17:25 GMT
Strict-Transport-Security:
- STS-XXX
apim-request-id:
- APIM-REQUEST-ID-XXX
skip-error-remapping:
- 'true'
x-content-type-options:
- X-CONTENT-TYPE-XXX
x-ms-client-request-id:
- X-MS-CLIENT-REQUEST-ID-XXX
x-ms-is-spilled-over:
- 'false'
x-ms-region:
- X-MS-REGION-XXX
x-ratelimit-abusepenalty-active:
- 'False'
x-ratelimit-key:
- gpt-5.2-chat
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-renewalperiod-requests:
- '60'
x-ratelimit-renewalperiod-tokens:
- '60'
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,395 @@
"""Tests for Azure OpenAI Responses API support.
Verifies that AzureCompletion with api='responses' correctly delegates
to OpenAICompletion configured with the Azure OpenAI /openai/v1/ base URL.
"""
import os
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def azure_env():
"""Set Azure environment variables for tests."""
with patch.dict(
os.environ,
{
"AZURE_API_KEY": "test-azure-key",
"AZURE_ENDPOINT": "https://myresource.openai.azure.com",
},
):
yield
@pytest.fixture
def mock_openai_completion():
"""Mock OpenAICompletion to avoid real client creation.
Patches at the source module so that the dynamic import inside
_init_responses_delegate picks up the mock.
"""
instance = MagicMock()
instance.call = MagicMock(return_value="responses-result")
instance.acall = AsyncMock(return_value="async-responses-result")
instance.last_response_id = "resp_abc123"
instance.last_reasoning_items = [{"type": "reasoning"}]
instance.reset_chain = MagicMock()
instance.reset_reasoning_chain = MagicMock()
mock_cls = MagicMock(return_value=instance)
with patch(
"crewai.llms.providers.openai.completion.OpenAICompletion",
mock_cls,
):
yield mock_cls, instance
# ---------------------------------------------------------------------------
# Helper to build AzureCompletion with api="responses" while mocking imports
# ---------------------------------------------------------------------------
def _create_azure_responses(**overrides):
"""Create an AzureCompletion(api='responses').
Must be called inside a context where OpenAICompletion is already mocked
(i.e. via the ``mock_openai_completion`` fixture).
"""
from crewai.llms.providers.azure.completion import AzureCompletion
defaults = {
"model": "gpt-4o",
"api_key": "test-azure-key",
"endpoint": "https://myresource.openai.azure.com",
"api": "responses",
}
defaults.update(overrides)
return AzureCompletion(**defaults)
# ---------------------------------------------------------------------------
# Initialization tests
# ---------------------------------------------------------------------------
class TestAzureResponsesInit:
"""Test initialization with api='responses'."""
def test_default_api_is_completions(self):
"""Default api should be 'completions' (existing behaviour)."""
from crewai.llms.providers.azure.completion import AzureCompletion
comp = AzureCompletion(
model="gpt-4o",
api_key="key",
endpoint="https://res.openai.azure.com",
)
assert comp.api == "completions"
assert comp._responses_delegate is None
def test_responses_api_creates_delegate(self, mock_openai_completion):
mock_cls, instance = mock_openai_completion
comp = _create_azure_responses()
assert comp.api == "responses"
assert comp._responses_delegate is instance
mock_cls.assert_called_once()
def test_completions_clients_not_created_in_responses_mode(
self, mock_openai_completion
):
"""When api='responses', azure-ai-inference clients should not be created."""
_mock_cls, _ = mock_openai_completion
comp = _create_azure_responses()
assert comp._client is None
assert comp._async_client is None
def test_responses_base_url_from_base_endpoint(self, mock_openai_completion):
mock_cls, _ = mock_openai_completion
_create_azure_responses(
endpoint="https://myresource.openai.azure.com",
)
call_kwargs = mock_cls.call_args[1]
assert (
call_kwargs["base_url"] == "https://myresource.openai.azure.com/openai/v1/"
)
def test_responses_base_url_strips_deployment_path(self, mock_openai_completion):
"""Endpoint with /openai/deployments/... should still produce correct base_url."""
mock_cls, _ = mock_openai_completion
_create_azure_responses(
endpoint="https://myresource.openai.azure.com/openai/deployments/gpt-4o",
)
call_kwargs = mock_cls.call_args[1]
assert (
call_kwargs["base_url"] == "https://myresource.openai.azure.com/openai/v1/"
)
def test_responses_base_url_preserves_port(self, mock_openai_completion):
mock_cls, _ = mock_openai_completion
_create_azure_responses(
endpoint="https://myresource.openai.azure.com:8443/openai/deployments/gpt-4o",
)
call_kwargs = mock_cls.call_args[1]
assert (
call_kwargs["base_url"]
== "https://myresource.openai.azure.com:8443/openai/v1/"
)
def test_delegate_receives_model_and_api_key(self, mock_openai_completion):
mock_cls, _ = mock_openai_completion
_create_azure_responses(
model="gpt-4o",
api_key="my-key",
)
call_kwargs = mock_cls.call_args[1]
assert call_kwargs["model"] == "gpt-4o"
assert call_kwargs["api_key"] == "my-key"
assert call_kwargs["api"] == "responses"
assert call_kwargs["provider"] == "openai"
def test_delegate_receives_optional_params(self, mock_openai_completion):
mock_cls, _ = mock_openai_completion
_create_azure_responses(
temperature=0.5,
top_p=0.9,
max_tokens=1000,
max_completion_tokens=800,
reasoning_effort="medium",
instructions="Be helpful",
store=True,
previous_response_id="resp_prev",
include=["reasoning.encrypted_content"],
builtin_tools=["web_search"],
parse_tool_outputs=True,
auto_chain=True,
auto_chain_reasoning=True,
stream=True,
)
call_kwargs = mock_cls.call_args[1]
assert call_kwargs["temperature"] == 0.5
assert call_kwargs["top_p"] == 0.9
assert call_kwargs["max_tokens"] == 1000
assert call_kwargs["max_completion_tokens"] == 800
assert call_kwargs["reasoning_effort"] == "medium"
assert call_kwargs["instructions"] == "Be helpful"
assert call_kwargs["store"] is True
assert call_kwargs["previous_response_id"] == "resp_prev"
assert call_kwargs["include"] == ["reasoning.encrypted_content"]
assert call_kwargs["builtin_tools"] == ["web_search"]
assert call_kwargs["parse_tool_outputs"] is True
assert call_kwargs["auto_chain"] is True
assert call_kwargs["auto_chain_reasoning"] is True
assert call_kwargs["stream"] is True
def test_delegate_omits_unset_optional_params(self, mock_openai_completion):
"""Params left at defaults should not be passed to the delegate."""
mock_cls, _ = mock_openai_completion
_create_azure_responses()
call_kwargs = mock_cls.call_args[1]
# These should NOT be in kwargs because they were not set
assert "temperature" not in call_kwargs
assert "reasoning_effort" not in call_kwargs
assert "instructions" not in call_kwargs
assert "store" not in call_kwargs
assert "max_completion_tokens" not in call_kwargs
# ---------------------------------------------------------------------------
# Call delegation tests (VCR cassette-based)
# ---------------------------------------------------------------------------
class TestAzureResponsesCall:
"""Test call / acall delegation to the Responses API using VCR cassettes."""
@pytest.mark.vcr()
def test_call_delegates_to_responses(self):
from crewai.llm import LLM
llm = LLM(model="azure/gpt-5.2-chat", api="responses")
result = llm.call("Say hello in one sentence.")
assert isinstance(result, str)
assert len(result) > 0
@pytest.mark.vcr()
def test_call_with_tools_delegates(self):
from crewai.llm import LLM
llm = LLM(
model="azure/gpt-5.2-chat",
api="responses",
builtin_tools=["web_search"],
)
result = llm.call("What is 2 + 2? Be brief.")
assert isinstance(result, str)
assert len(result) > 0
@pytest.mark.vcr()
def test_completions_call_unchanged(self):
"""Default api='completions' should not use the responses delegate."""
from crewai.llm import LLM
llm = LLM(model="azure/gpt-5.2-chat")
result = llm.call("Say hello in one sentence.")
assert isinstance(result, str)
assert len(result) > 0
# ---------------------------------------------------------------------------
# Delegated property & method tests
# ---------------------------------------------------------------------------
class TestAzureResponsesProperties:
"""Test properties and methods delegated to the responses delegate."""
def test_last_response_id(self, mock_openai_completion):
_mock_cls, _ = mock_openai_completion
comp = _create_azure_responses()
assert comp.last_response_id == "resp_abc123"
def test_last_response_id_none_for_completions(self):
from crewai.llms.providers.azure.completion import AzureCompletion
comp = AzureCompletion(
model="gpt-4o",
api_key="key",
endpoint="https://res.openai.azure.com",
)
assert comp.last_response_id is None
def test_last_reasoning_items(self, mock_openai_completion):
_mock_cls, _ = mock_openai_completion
comp = _create_azure_responses()
assert comp.last_reasoning_items == [{"type": "reasoning"}]
def test_reset_chain(self, mock_openai_completion):
_mock_cls, instance = mock_openai_completion
comp = _create_azure_responses()
comp.reset_chain()
instance.reset_chain.assert_called_once()
def test_reset_reasoning_chain(self, mock_openai_completion):
_mock_cls, instance = mock_openai_completion
comp = _create_azure_responses()
comp.reset_reasoning_chain()
instance.reset_reasoning_chain.assert_called_once()
def test_reset_chain_noop_for_completions(self):
"""reset_chain should not raise when delegate is None."""
from crewai.llms.providers.azure.completion import AzureCompletion
comp = AzureCompletion(
model="gpt-4o",
api_key="key",
endpoint="https://res.openai.azure.com",
)
comp.reset_chain() # should not raise
# ---------------------------------------------------------------------------
# Feature-support method tests
# ---------------------------------------------------------------------------
class TestAzureResponsesFeatures:
"""Test supports_* and config methods."""
def test_supports_function_calling_responses(self, mock_openai_completion):
_mock_cls, _ = mock_openai_completion
comp = _create_azure_responses()
assert comp.supports_function_calling() is True
def test_supports_function_calling_completions_openai_model(self):
from crewai.llms.providers.azure.completion import AzureCompletion
comp = AzureCompletion(
model="gpt-4o",
api_key="key",
endpoint="https://res.openai.azure.com",
)
assert comp.supports_function_calling() is True
def test_supports_stop_words_false_for_responses(self, mock_openai_completion):
_mock_cls, _ = mock_openai_completion
comp = _create_azure_responses(model="o4-mini")
assert comp.supports_stop_words() is False
def test_supports_stop_words_true_for_completions_gpt4(self):
from crewai.llms.providers.azure.completion import AzureCompletion
comp = AzureCompletion(
model="gpt-4o",
api_key="key",
endpoint="https://res.openai.azure.com",
)
assert comp.supports_stop_words() is True
def test_to_config_dict_includes_responses_fields(self, mock_openai_completion):
_mock_cls, _ = mock_openai_completion
comp = _create_azure_responses(
reasoning_effort="high",
instructions="Be concise",
store=True,
max_completion_tokens=500,
)
config = comp.to_config_dict()
assert config["api"] == "responses"
assert config["reasoning_effort"] == "high"
assert config["instructions"] == "Be concise"
assert config["store"] is True
assert config["max_completion_tokens"] == 500
def test_to_config_dict_omits_api_for_completions(self):
from crewai.llms.providers.azure.completion import AzureCompletion
comp = AzureCompletion(
model="gpt-4o",
api_key="key",
endpoint="https://res.openai.azure.com",
)
config = comp.to_config_dict()
assert "api" not in config
# ---------------------------------------------------------------------------
# LLM factory integration test
# ---------------------------------------------------------------------------
class TestAzureResponsesViaLLMFactory:
"""Test that the LLM factory passes api='responses' through to AzureCompletion."""
@pytest.mark.usefixtures("azure_env")
def test_llm_factory_passes_api_kwarg(self):
"""LLM(model='azure/gpt-4o', api='responses') should create AzureCompletion
with api='responses' and a delegate."""
with (
patch(
"crewai.llms.providers.openai.completion.OpenAI",
),
patch(
"crewai.llms.providers.openai.completion.AsyncOpenAI",
),
):
from crewai.llm import LLM
llm = LLM(model="azure/gpt-4o", api="responses")
from crewai.llms.providers.azure.completion import AzureCompletion
assert isinstance(llm, AzureCompletion)
assert llm.api == "responses"
assert llm._responses_delegate is not None

View File

@@ -0,0 +1,15 @@
"""Async tests for Azure OpenAI Responses API support."""
import pytest
@pytest.mark.vcr()
@pytest.mark.asyncio
async def test_acall_delegates_to_responses():
from crewai.llm import LLM
llm = LLM(model="azure/gpt-5.2-chat", api="responses")
result = await llm.acall("Say hello in one sentence.")
assert isinstance(result, str)
assert len(result) > 0