From e0b86750c27eedc41256957dc3735ecb885b78e0 Mon Sep 17 00:00:00 2001 From: Kunal Karmakar <5303824+kunalk16@users.noreply.github.com> Date: Wed, 29 Apr 2026 23:42:11 +0530 Subject: [PATCH] feat(azure): add Responses API support for Azure OpenAI provider (#5201) * Support azure openai responses * Revert function supported condition * Revert comment deletion * Update support stop words * Add cassette based tests * Fix linting --- .../crewai/llms/providers/azure/completion.py | 165 +++++++- ...Call.test_call_delegates_to_responses.yaml | 133 ++++++ ...esCall.test_call_with_tools_delegates.yaml | 137 ++++++ ...sCall.test_completions_call_unchanged.yaml | 84 ++++ .../test_acall_delegates_to_responses.yaml | 128 ++++++ .../tests/llms/azure/test_azure_responses.py | 395 ++++++++++++++++++ .../llms/azure/test_azure_responses_async.py | 15 + 7 files changed, 1049 insertions(+), 8 deletions(-) create mode 100644 lib/crewai/tests/cassettes/llms/azure/TestAzureResponsesCall.test_call_delegates_to_responses.yaml create mode 100644 lib/crewai/tests/cassettes/llms/azure/TestAzureResponsesCall.test_call_with_tools_delegates.yaml create mode 100644 lib/crewai/tests/cassettes/llms/azure/TestAzureResponsesCall.test_completions_call_unchanged.yaml create mode 100644 lib/crewai/tests/cassettes/llms/azure/test_acall_delegates_to_responses.yaml create mode 100644 lib/crewai/tests/llms/azure/test_azure_responses.py create mode 100644 lib/crewai/tests/llms/azure/test_azure_responses_async.py diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index 714a7f0e9..b5cb30fb1 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -89,8 +89,22 @@ class AzureCompletion(BaseLLM): is_openai_model: bool = False is_azure_openai_endpoint: bool = False + # Responses API settings + api: Literal["completions", "responses"] = "completions" + reasoning_effort: str | None = None + instructions: str | None = None + store: bool | None = None + previous_response_id: str | None = None + include: list[str] | None = None + builtin_tools: list[str] | None = None + parse_tool_outputs: bool = False + auto_chain: bool = False + auto_chain_reasoning: bool = False + max_completion_tokens: int | None = None + _client: Any = PrivateAttr(default=None) _async_client: Any = PrivateAttr(default=None) + _responses_delegate: Any = PrivateAttr(default=None) @model_validator(mode="before") @classmethod @@ -147,12 +161,89 @@ class AzureCompletion(BaseLLM): import time even before deployment env vars are set. """ try: - self._client = self._build_sync_client() - self._async_client = self._build_async_client() + if self.api == "responses": + self._init_responses_delegate() + else: + self._client = self._build_sync_client() + self._async_client = self._build_async_client() except ValueError: pass return self + def _init_responses_delegate(self) -> None: + """Create an OpenAICompletion delegate for the Azure OpenAI Responses API. + + The Azure OpenAI Responses API uses the standard OpenAI Python SDK + with a base_url pointing to the Azure resource's /openai/v1/ endpoint. + """ + from crewai.llms.providers.openai.completion import OpenAICompletion + + base_url = self._get_responses_base_url() + + delegate_kwargs: dict[str, Any] = { + "model": self.model, + "api_key": self.api_key, + "base_url": base_url, + "api": "responses", + "provider": "openai", + "stream": self.stream, + } + + if self.temperature is not None: + delegate_kwargs["temperature"] = self.temperature + if self.top_p is not None: + delegate_kwargs["top_p"] = self.top_p + if self.max_tokens is not None: + delegate_kwargs["max_tokens"] = self.max_tokens + if self.max_completion_tokens is not None: + delegate_kwargs["max_completion_tokens"] = self.max_completion_tokens + if self.stop: + delegate_kwargs["stop"] = self.stop + if self.timeout is not None: + delegate_kwargs["timeout"] = self.timeout + if self.max_retries != 2: + delegate_kwargs["max_retries"] = self.max_retries + if self.reasoning_effort is not None: + delegate_kwargs["reasoning_effort"] = self.reasoning_effort + if self.instructions is not None: + delegate_kwargs["instructions"] = self.instructions + if self.store is not None: + delegate_kwargs["store"] = self.store + if self.previous_response_id is not None: + delegate_kwargs["previous_response_id"] = self.previous_response_id + if self.include is not None: + delegate_kwargs["include"] = self.include + if self.builtin_tools is not None: + delegate_kwargs["builtin_tools"] = self.builtin_tools + if self.parse_tool_outputs: + delegate_kwargs["parse_tool_outputs"] = self.parse_tool_outputs + if self.auto_chain: + delegate_kwargs["auto_chain"] = self.auto_chain + if self.auto_chain_reasoning: + delegate_kwargs["auto_chain_reasoning"] = self.auto_chain_reasoning + if self.response_format is not None: + delegate_kwargs["response_format"] = self.response_format + if self.additional_params: + delegate_kwargs["additional_params"] = self.additional_params + + self._responses_delegate = OpenAICompletion(**delegate_kwargs) + + def _get_responses_base_url(self) -> str: + """Construct the base URL for the Azure OpenAI Responses API. + + Extracts the scheme and host from the configured endpoint and appends + the ``/openai/v1/`` path required by the Azure OpenAI Responses API. + + Returns: + The Responses API base URL, e.g. + ``https://myresource.openai.azure.com/openai/v1/`` + """ + if not self.endpoint: + raise ValueError("Azure endpoint is required for Responses API") + parsed = urlparse(self.endpoint) + base = f"{parsed.scheme}://{parsed.netloc}" + return f"{base}/openai/v1/" + def _build_sync_client(self) -> Any: return ChatCompletionsClient(**self._make_client_kwargs()) @@ -252,6 +343,16 @@ class AzureCompletion(BaseLLM): config["presence_penalty"] = self.presence_penalty if self.max_tokens is not None: config["max_tokens"] = self.max_tokens + if self.api != "completions": + config["api"] = self.api + if self.reasoning_effort is not None: + config["reasoning_effort"] = self.reasoning_effort + if self.instructions is not None: + config["instructions"] = self.instructions + if self.store is not None: + config["store"] = self.store + if self.max_completion_tokens is not None: + config["max_completion_tokens"] = self.max_completion_tokens return config @staticmethod @@ -357,10 +458,10 @@ class AzureCompletion(BaseLLM): from_agent: Any | None = None, response_model: type[BaseModel] | None = None, ) -> str | Any: - """Call Azure AI Inference chat completions API. + """Call Azure AI Inference API. Args: - messages: Input messages for the chat completion + messages: Input messages tools: List of tool/function definitions callbacks: Callback functions (not used in native implementation) available_functions: Available functions for tool calling @@ -369,8 +470,19 @@ class AzureCompletion(BaseLLM): response_model: Response model Returns: - Chat completion response or tool call result + Completion response or tool call result """ + if self.api == "responses": + return self._responses_delegate.call( + messages=messages, + tools=tools, + callbacks=callbacks, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + response_model=response_model, + ) + with llm_call_context(): try: # Emit call started event @@ -429,10 +541,10 @@ class AzureCompletion(BaseLLM): from_agent: Any | None = None, response_model: type[BaseModel] | None = None, ) -> str | Any: - """Call Azure AI Inference chat completions API asynchronously. + """Call Azure AI Inference API asynchronously. Args: - messages: Input messages for the chat completion + messages: Input messages tools: List of tool/function definitions callbacks: Callback functions (not used in native implementation) available_functions: Available functions for tool calling @@ -441,8 +553,19 @@ class AzureCompletion(BaseLLM): response_model: Pydantic model for structured output Returns: - Chat completion response or tool call result + Completion response or tool call result """ + if self.api == "responses": + return await self._responses_delegate.acall( + messages=messages, + tools=tools, + callbacks=callbacks, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + response_model=response_model, + ) + with llm_call_context(): try: self._emit_call_started_event( @@ -1178,6 +1301,32 @@ class AzureCompletion(BaseLLM): return result return {"total_tokens": 0} + @property + def last_response_id(self) -> str | None: + """Get the last response ID from Responses API auto-chaining.""" + if self._responses_delegate is not None: + result: str | None = self._responses_delegate.last_response_id + return result + return None + + @property + def last_reasoning_items(self) -> list[Any] | None: + """Get the last reasoning items from Responses API auto-chain reasoning.""" + if self._responses_delegate is not None: + result: list[Any] | None = self._responses_delegate.last_reasoning_items + return result + return None + + def reset_chain(self) -> None: + """Reset the Responses API auto-chain state.""" + if self._responses_delegate is not None: + self._responses_delegate.reset_chain() + + def reset_reasoning_chain(self) -> None: + """Reset the Responses API reasoning chain state.""" + if self._responses_delegate is not None: + self._responses_delegate.reset_reasoning_chain() + async def aclose(self) -> None: """Close the async client and clean up resources. diff --git a/lib/crewai/tests/cassettes/llms/azure/TestAzureResponsesCall.test_call_delegates_to_responses.yaml b/lib/crewai/tests/cassettes/llms/azure/TestAzureResponsesCall.test_call_delegates_to_responses.yaml new file mode 100644 index 000000000..7d3bd8437 --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/azure/TestAzureResponsesCall.test_call_delegates_to_responses.yaml @@ -0,0 +1,133 @@ +interactions: +- request: + body: '{"input":[{"role":"user","content":"Say hello in one sentence."}],"model":"gpt-5.2-chat"}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '89' + content-type: + - application/json + host: + - kkarmakar-ai-eus2.openai.azure.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 2.32.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.12 + method: POST + uri: https://fake-azure-endpoint.openai.azure.com/openai/v1/responses + response: + body: + string: "{\n \"id\": \"resp_0473c8c2b1c49f8c0069f23d0910e081958ebce72a734935c7\",\n + \ \"object\": \"response\",\n \"created_at\": 1777483017,\n \"status\": + \"completed\",\n \"background\": false,\n \"completed_at\": 1777483018,\n + \ \"content_filters\": [\n {\n \"blocked\": false,\n \"source_type\": + \"prompt\",\n \"content_filter_raw\": [],\n \"content_filter_results\": + {\n \"jailbreak\": {\n \"detected\": false,\n \"filtered\": + false\n },\n \"hate\": {\n \"filtered\": false,\n \"severity\": + \"safe\"\n },\n \"sexual\": {\n \"filtered\": false,\n + \ \"severity\": \"safe\"\n },\n \"violence\": {\n \"filtered\": + false,\n \"severity\": \"safe\"\n },\n \"self_harm\": + {\n \"filtered\": false,\n \"severity\": \"safe\"\n }\n + \ },\n \"content_filter_offsets\": {\n \"start_offset\": 0,\n + \ \"end_offset\": 368,\n \"check_offset\": 0\n }\n },\n + \ {\n \"blocked\": false,\n \"source_type\": \"completion\",\n + \ \"content_filter_raw\": [],\n \"content_filter_results\": {\n \"protected_material_code\": + {\n \"detected\": false,\n \"filtered\": false\n },\n + \ \"protected_material_text\": {\n \"detected\": false,\n \"filtered\": + false\n },\n \"hate\": {\n \"filtered\": false,\n \"severity\": + \"safe\"\n },\n \"sexual\": {\n \"filtered\": false,\n + \ \"severity\": \"safe\"\n },\n \"violence\": {\n \"filtered\": + false,\n \"severity\": \"safe\"\n },\n \"self_harm\": + {\n \"filtered\": false,\n \"severity\": \"safe\"\n }\n + \ },\n \"content_filter_offsets\": {\n \"start_offset\": 0,\n + \ \"end_offset\": 53,\n \"check_offset\": 0\n }\n }\n + \ ],\n \"error\": null,\n \"frequency_penalty\": 0.0,\n \"incomplete_details\": + null,\n \"instructions\": null,\n \"max_output_tokens\": null,\n \"max_tool_calls\": + null,\n \"model\": \"gpt-5.2-chat\",\n \"output\": [\n {\n \"id\": + \"rs_0473c8c2b1c49f8c0069f23d09f24481959bcf9fd847a9a475\",\n \"type\": + \"reasoning\",\n \"summary\": []\n },\n {\n \"id\": \"msg_0473c8c2b1c49f8c0069f23d0a8ccc81958f776ad6016d7edd\",\n + \ \"type\": \"message\",\n \"status\": \"completed\",\n \"content\": + [\n {\n \"type\": \"output_text\",\n \"annotations\": + [],\n \"logprobs\": [],\n \"text\": \"Hello! \\ud83d\\ude0a\"\n + \ }\n ],\n \"role\": \"assistant\"\n }\n ],\n \"parallel_tool_calls\": + true,\n \"presence_penalty\": 0.0,\n \"previous_response_id\": null,\n \"prompt_cache_key\": + null,\n \"prompt_cache_retention\": null,\n \"reasoning\": {\n \"effort\": + \"medium\",\n \"summary\": null\n },\n \"safety_identifier\": null,\n + \ \"service_tier\": \"default\",\n \"store\": true,\n \"temperature\": 1.0,\n + \ \"text\": {\n \"format\": {\n \"type\": \"text\"\n },\n \"verbosity\": + \"medium\"\n },\n \"tool_choice\": \"auto\",\n \"tools\": [],\n \"top_logprobs\": + 0,\n \"top_p\": 0.85,\n \"truncation\": \"disabled\",\n \"usage\": {\n + \ \"input_tokens\": 12,\n \"input_tokens_details\": {\n \"cached_tokens\": + 0\n },\n \"output_tokens\": 22,\n \"output_tokens_details\": {\n + \ \"reasoning_tokens\": 0\n },\n \"total_tokens\": 34\n },\n \"user\": + null,\n \"metadata\": {}\n}" + headers: + Content-Length: + - '3203' + Content-Type: + - application/json + Date: + - Wed, 29 Apr 2026 17:16:59 GMT + Strict-Transport-Security: + - STS-XXX + apim-request-id: + - APIM-REQUEST-ID-XXX + skip-error-remapping: + - 'true' + x-content-type-options: + - X-CONTENT-TYPE-XXX + x-ms-client-request-id: + - X-MS-CLIENT-REQUEST-ID-XXX + x-ms-is-spilled-over: + - 'false' + x-ms-region: + - X-MS-REGION-XXX + x-ratelimit-abusepenalty-active: + - 'False' + x-ratelimit-key: + - gpt-5.2-chat + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-renewalperiod-requests: + - '60' + x-ratelimit-renewalperiod-tokens: + - '60' + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/llms/azure/TestAzureResponsesCall.test_call_with_tools_delegates.yaml b/lib/crewai/tests/cassettes/llms/azure/TestAzureResponsesCall.test_call_with_tools_delegates.yaml new file mode 100644 index 000000000..5a886ea55 --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/azure/TestAzureResponsesCall.test_call_with_tools_delegates.yaml @@ -0,0 +1,137 @@ +interactions: +- request: + body: '{"input":[{"role":"user","content":"What is 2 + 2? Be brief."}],"model":"gpt-5.2-chat","tools":[{"type":"web_search_preview"}]}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '127' + content-type: + - application/json + host: + - kkarmakar-ai-eus2.openai.azure.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 2.32.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.12 + method: POST + uri: https://fake-azure-endpoint.openai.azure.com/openai/v1/responses + response: + body: + string: "{\n \"id\": \"resp_0d80ad9adad65fca0069f23d0c904c8194862acae4bd866cf5\",\n + \ \"object\": \"response\",\n \"created_at\": 1777483020,\n \"status\": + \"completed\",\n \"background\": false,\n \"completed_at\": 1777483022,\n + \ \"content_filters\": [\n {\n \"blocked\": false,\n \"source_type\": + \"prompt\",\n \"content_filter_raw\": [],\n \"content_filter_results\": + {\n \"jailbreak\": {\n \"detected\": false,\n \"filtered\": + false\n },\n \"hate\": {\n \"filtered\": false,\n \"severity\": + \"safe\"\n },\n \"sexual\": {\n \"filtered\": false,\n + \ \"severity\": \"safe\"\n },\n \"violence\": {\n \"filtered\": + false,\n \"severity\": \"safe\"\n },\n \"self_harm\": + {\n \"filtered\": false,\n \"severity\": \"safe\"\n }\n + \ },\n \"content_filter_offsets\": {\n \"start_offset\": 0,\n + \ \"end_offset\": 19017,\n \"check_offset\": 0\n }\n },\n + \ {\n \"blocked\": false,\n \"source_type\": \"completion\",\n + \ \"content_filter_raw\": [],\n \"content_filter_results\": {\n \"hate\": + {\n \"filtered\": false,\n \"severity\": \"safe\"\n },\n + \ \"sexual\": {\n \"filtered\": false,\n \"severity\": + \"safe\"\n },\n \"violence\": {\n \"filtered\": false,\n + \ \"severity\": \"safe\"\n },\n \"self_harm\": {\n \"filtered\": + false,\n \"severity\": \"safe\"\n },\n \"protected_material_code\": + {\n \"detected\": false,\n \"filtered\": false\n },\n + \ \"protected_material_text\": {\n \"detected\": false,\n \"filtered\": + false\n }\n },\n \"content_filter_offsets\": {\n \"start_offset\": + 0,\n \"end_offset\": 889,\n \"check_offset\": 0\n }\n }\n + \ ],\n \"error\": null,\n \"frequency_penalty\": 0.0,\n \"incomplete_details\": + null,\n \"instructions\": null,\n \"max_output_tokens\": null,\n \"max_tool_calls\": + null,\n \"model\": \"gpt-5.2-chat\",\n \"output\": [\n {\n \"id\": + \"rs_0d80ad9adad65fca0069f23d0d8b8c8194b1a9ab61ddc3420d\",\n \"type\": + \"reasoning\",\n \"summary\": []\n },\n {\n \"id\": \"msg_0d80ad9adad65fca0069f23d0e262081949c36d6cc1958eeed\",\n + \ \"type\": \"message\",\n \"status\": \"completed\",\n \"content\": + [\n {\n \"type\": \"output_text\",\n \"annotations\": + [],\n \"logprobs\": [],\n \"text\": \"2 + 2 = 4.\"\n }\n + \ ],\n \"role\": \"assistant\"\n }\n ],\n \"parallel_tool_calls\": + true,\n \"presence_penalty\": 0.0,\n \"previous_response_id\": null,\n \"prompt_cache_key\": + null,\n \"prompt_cache_retention\": null,\n \"reasoning\": {\n \"effort\": + \"medium\",\n \"summary\": null\n },\n \"safety_identifier\": null,\n + \ \"service_tier\": \"default\",\n \"store\": true,\n \"temperature\": 1.0,\n + \ \"text\": {\n \"format\": {\n \"type\": \"text\"\n },\n \"verbosity\": + \"medium\"\n },\n \"tool_choice\": \"auto\",\n \"tools\": [\n {\n \"type\": + \"web_search_preview\",\n \"search_content_types\": [\n \"text\"\n + \ ],\n \"search_context_size\": \"medium\",\n \"user_location\": + {\n \"type\": \"approximate\",\n \"city\": null,\n \"country\": + \"US\",\n \"region\": null,\n \"timezone\": null\n }\n + \ }\n ],\n \"top_logprobs\": 0,\n \"top_p\": 0.85,\n \"truncation\": + \"disabled\",\n \"usage\": {\n \"input_tokens\": 4312,\n \"input_tokens_details\": + {\n \"cached_tokens\": 0\n },\n \"output_tokens\": 28,\n \"output_tokens_details\": + {\n \"reasoning_tokens\": 0\n },\n \"total_tokens\": 4340\n },\n + \ \"user\": null,\n \"metadata\": {}\n}" + headers: + Content-Length: + - '3507' + Content-Type: + - application/json + Date: + - Wed, 29 Apr 2026 17:17:03 GMT + Strict-Transport-Security: + - STS-XXX + apim-request-id: + - APIM-REQUEST-ID-XXX + skip-error-remapping: + - 'true' + x-content-type-options: + - X-CONTENT-TYPE-XXX + x-ms-client-request-id: + - X-MS-CLIENT-REQUEST-ID-XXX + x-ms-is-spilled-over: + - 'false' + x-ms-region: + - X-MS-REGION-XXX + x-ratelimit-abusepenalty-active: + - 'False' + x-ratelimit-key: + - gpt-5.2-chat + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-renewalperiod-requests: + - '60' + x-ratelimit-renewalperiod-tokens: + - '60' + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/llms/azure/TestAzureResponsesCall.test_completions_call_unchanged.yaml b/lib/crewai/tests/cassettes/llms/azure/TestAzureResponsesCall.test_completions_call_unchanged.yaml new file mode 100644 index 000000000..3c4fc8675 --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/azure/TestAzureResponsesCall.test_completions_call_unchanged.yaml @@ -0,0 +1,84 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Say hello in one sentence."}], + "stream": false}' + headers: + Accept: + - application/json + Connection: + - keep-alive + Content-Length: + - '90' + Content-Type: + - application/json + User-Agent: + - X-USER-AGENT-XXX + accept-encoding: + - ACCEPT-ENCODING-XXX + api-key: + - X-API-KEY-XXX + authorization: + - AUTHORIZATION-XXX + x-ms-client-request-id: + - X-MS-CLIENT-REQUEST-ID-XXX + method: POST + uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-5.2-chat/chat/completions?api-version=2024-02-15-preview + response: + body: + string: "{\"choices\":[{\"content_filter_results\":{\"hate\":{\"filtered\":false,\"severity\":\"safe\"},\"protected_material_code\":{\"detected\":false,\"filtered\":false},\"protected_material_text\":{\"detected\":false,\"filtered\":false},\"self_harm\":{\"filtered\":false,\"severity\":\"safe\"},\"sexual\":{\"filtered\":false,\"severity\":\"safe\"},\"violence\":{\"filtered\":false,\"severity\":\"safe\"}},\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"annotations\":[],\"content\":\"Hello! + \U0001F60A\",\"refusal\":null,\"role\":\"assistant\"}}],\"created\":1777483024,\"id\":\"chatcmpl-Da2oyIDHFopG5fmCKbhDiEYG5ciBN\",\"model\":\"gpt-5.2-chat-latest\",\"object\":\"chat.completion\",\"prompt_filter_results\":[{\"prompt_index\":0,\"content_filter_results\":{\"hate\":{\"filtered\":false,\"severity\":\"safe\"},\"jailbreak\":{\"detected\":false,\"filtered\":false},\"self_harm\":{\"filtered\":false,\"severity\":\"safe\"},\"sexual\":{\"filtered\":false,\"severity\":\"safe\"},\"violence\":{\"filtered\":false,\"severity\":\"safe\"}}}],\"service_tier\":\"default\",\"system_fingerprint\":null,\"usage\":{\"completion_tokens\":13,\"completion_tokens_details\":{\"accepted_prediction_tokens\":0,\"audio_tokens\":0,\"reasoning_tokens\":0,\"rejected_prediction_tokens\":0},\"prompt_tokens\":12,\"prompt_tokens_details\":{\"audio_tokens\":0,\"cached_tokens\":0},\"total_tokens\":25}}\n" + headers: + Content-Length: + - '1233' + Content-Type: + - application/json + Date: + - Wed, 29 Apr 2026 17:17:05 GMT + Strict-Transport-Security: + - STS-XXX + apim-request-id: + - APIM-REQUEST-ID-XXX + azureml-model-session: + - AZUREML-MODEL-SESSION-XXX + skip-error-remapping: + - 'true' + x-accel-buffering: + - 'no' + x-content-type-options: + - X-CONTENT-TYPE-XXX + x-ms-client-request-id: + - X-MS-CLIENT-REQUEST-ID-XXX + x-ms-deployment-name: + - gpt-5.2-chat + x-ms-is-spilled-over: + - 'false' + x-ms-rai-invoked: + - 'true' + x-ms-region: + - X-MS-REGION-XXX + x-ratelimit-abusepenalty-active: + - 'False' + x-ratelimit-key: + - gpt-5.2-chat + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-renewalperiod-requests: + - '60' + x-ratelimit-renewalperiod-tokens: + - '60' + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/llms/azure/test_acall_delegates_to_responses.yaml b/lib/crewai/tests/cassettes/llms/azure/test_acall_delegates_to_responses.yaml new file mode 100644 index 000000000..205bb3dab --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/azure/test_acall_delegates_to_responses.yaml @@ -0,0 +1,128 @@ +interactions: +- request: + body: '{"input":[{"role":"user","content":"Say hello in one sentence."}],"model":"gpt-5.2-chat"}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '89' + content-type: + - application/json + host: + - kkarmakar-ai-eus2.openai.azure.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 2.32.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.12 + method: POST + uri: https://fake-azure-endpoint.openai.azure.com/openai/v1/responses + response: + body: + string: "{\n \"id\": \"resp_02861ec017218a520069f23d21dbf88193aa91a73d63d91302\",\n + \ \"object\": \"response\",\n \"created_at\": 1777483041,\n \"status\": + \"completed\",\n \"background\": false,\n \"completed_at\": 1777483043,\n + \ \"content_filters\": [\n {\n \"blocked\": false,\n \"source_type\": + \"prompt\",\n \"content_filter_raw\": [],\n \"content_filter_results\": + {\n \"jailbreak\": {\n \"detected\": false,\n \"filtered\": + false\n }\n },\n \"content_filter_offsets\": {\n \"start_offset\": + 0,\n \"end_offset\": 368,\n \"check_offset\": 0\n }\n },\n + \ {\n \"blocked\": false,\n \"source_type\": \"completion\",\n + \ \"content_filter_raw\": [],\n \"content_filter_results\": {\n \"protected_material_text\": + {\n \"detected\": false,\n \"filtered\": false\n },\n + \ \"protected_material_code\": {\n \"detected\": false,\n \"filtered\": + false\n },\n \"hate\": {\n \"filtered\": false,\n \"severity\": + \"safe\"\n },\n \"sexual\": {\n \"filtered\": false,\n + \ \"severity\": \"safe\"\n },\n \"violence\": {\n \"filtered\": + false,\n \"severity\": \"safe\"\n },\n \"self_harm\": + {\n \"filtered\": false,\n \"severity\": \"safe\"\n }\n + \ },\n \"content_filter_offsets\": {\n \"start_offset\": 0,\n + \ \"end_offset\": 44,\n \"check_offset\": 0\n }\n }\n + \ ],\n \"error\": null,\n \"frequency_penalty\": 0.0,\n \"incomplete_details\": + null,\n \"instructions\": null,\n \"max_output_tokens\": null,\n \"max_tool_calls\": + null,\n \"model\": \"gpt-5.2-chat\",\n \"output\": [\n {\n \"id\": + \"rs_02861ec017218a520069f23d2287ac819399dd23b8dd56028e\",\n \"type\": + \"reasoning\",\n \"summary\": []\n },\n {\n \"id\": \"msg_02861ec017218a520069f23d23082c81939838ab2eebf4e89c\",\n + \ \"type\": \"message\",\n \"status\": \"completed\",\n \"content\": + [\n {\n \"type\": \"output_text\",\n \"annotations\": + [],\n \"logprobs\": [],\n \"text\": \"Hello! \\ud83d\\udc4b\"\n + \ }\n ],\n \"role\": \"assistant\"\n }\n ],\n \"parallel_tool_calls\": + true,\n \"presence_penalty\": 0.0,\n \"previous_response_id\": null,\n \"prompt_cache_key\": + null,\n \"prompt_cache_retention\": null,\n \"reasoning\": {\n \"effort\": + \"medium\",\n \"summary\": null\n },\n \"safety_identifier\": null,\n + \ \"service_tier\": \"default\",\n \"store\": true,\n \"temperature\": 1.0,\n + \ \"text\": {\n \"format\": {\n \"type\": \"text\"\n },\n \"verbosity\": + \"medium\"\n },\n \"tool_choice\": \"auto\",\n \"tools\": [],\n \"top_logprobs\": + 0,\n \"top_p\": 0.85,\n \"truncation\": \"disabled\",\n \"usage\": {\n + \ \"input_tokens\": 12,\n \"input_tokens_details\": {\n \"cached_tokens\": + 0\n },\n \"output_tokens\": 21,\n \"output_tokens_details\": {\n + \ \"reasoning_tokens\": 0\n },\n \"total_tokens\": 33\n },\n \"user\": + null,\n \"metadata\": {}\n}" + headers: + Content-Length: + - '2844' + Content-Type: + - application/json + Date: + - Wed, 29 Apr 2026 17:17:25 GMT + Strict-Transport-Security: + - STS-XXX + apim-request-id: + - APIM-REQUEST-ID-XXX + skip-error-remapping: + - 'true' + x-content-type-options: + - X-CONTENT-TYPE-XXX + x-ms-client-request-id: + - X-MS-CLIENT-REQUEST-ID-XXX + x-ms-is-spilled-over: + - 'false' + x-ms-region: + - X-MS-REGION-XXX + x-ratelimit-abusepenalty-active: + - 'False' + x-ratelimit-key: + - gpt-5.2-chat + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-renewalperiod-requests: + - '60' + x-ratelimit-renewalperiod-tokens: + - '60' + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/llms/azure/test_azure_responses.py b/lib/crewai/tests/llms/azure/test_azure_responses.py new file mode 100644 index 000000000..765dbb40b --- /dev/null +++ b/lib/crewai/tests/llms/azure/test_azure_responses.py @@ -0,0 +1,395 @@ +"""Tests for Azure OpenAI Responses API support. + +Verifies that AzureCompletion with api='responses' correctly delegates +to OpenAICompletion configured with the Azure OpenAI /openai/v1/ base URL. +""" + +import os +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +@pytest.fixture +def azure_env(): + """Set Azure environment variables for tests.""" + with patch.dict( + os.environ, + { + "AZURE_API_KEY": "test-azure-key", + "AZURE_ENDPOINT": "https://myresource.openai.azure.com", + }, + ): + yield + + +@pytest.fixture +def mock_openai_completion(): + """Mock OpenAICompletion to avoid real client creation. + + Patches at the source module so that the dynamic import inside + _init_responses_delegate picks up the mock. + """ + instance = MagicMock() + instance.call = MagicMock(return_value="responses-result") + instance.acall = AsyncMock(return_value="async-responses-result") + instance.last_response_id = "resp_abc123" + instance.last_reasoning_items = [{"type": "reasoning"}] + instance.reset_chain = MagicMock() + instance.reset_reasoning_chain = MagicMock() + mock_cls = MagicMock(return_value=instance) + + with patch( + "crewai.llms.providers.openai.completion.OpenAICompletion", + mock_cls, + ): + yield mock_cls, instance + + +# --------------------------------------------------------------------------- +# Helper to build AzureCompletion with api="responses" while mocking imports +# --------------------------------------------------------------------------- + + +def _create_azure_responses(**overrides): + """Create an AzureCompletion(api='responses'). + + Must be called inside a context where OpenAICompletion is already mocked + (i.e. via the ``mock_openai_completion`` fixture). + """ + from crewai.llms.providers.azure.completion import AzureCompletion + + defaults = { + "model": "gpt-4o", + "api_key": "test-azure-key", + "endpoint": "https://myresource.openai.azure.com", + "api": "responses", + } + defaults.update(overrides) + return AzureCompletion(**defaults) + + +# --------------------------------------------------------------------------- +# Initialization tests +# --------------------------------------------------------------------------- + + +class TestAzureResponsesInit: + """Test initialization with api='responses'.""" + + def test_default_api_is_completions(self): + """Default api should be 'completions' (existing behaviour).""" + from crewai.llms.providers.azure.completion import AzureCompletion + + comp = AzureCompletion( + model="gpt-4o", + api_key="key", + endpoint="https://res.openai.azure.com", + ) + assert comp.api == "completions" + assert comp._responses_delegate is None + + def test_responses_api_creates_delegate(self, mock_openai_completion): + mock_cls, instance = mock_openai_completion + comp = _create_azure_responses() + + assert comp.api == "responses" + assert comp._responses_delegate is instance + mock_cls.assert_called_once() + + def test_completions_clients_not_created_in_responses_mode( + self, mock_openai_completion + ): + """When api='responses', azure-ai-inference clients should not be created.""" + _mock_cls, _ = mock_openai_completion + comp = _create_azure_responses() + + assert comp._client is None + assert comp._async_client is None + + def test_responses_base_url_from_base_endpoint(self, mock_openai_completion): + mock_cls, _ = mock_openai_completion + _create_azure_responses( + endpoint="https://myresource.openai.azure.com", + ) + call_kwargs = mock_cls.call_args[1] + assert ( + call_kwargs["base_url"] == "https://myresource.openai.azure.com/openai/v1/" + ) + + def test_responses_base_url_strips_deployment_path(self, mock_openai_completion): + """Endpoint with /openai/deployments/... should still produce correct base_url.""" + mock_cls, _ = mock_openai_completion + _create_azure_responses( + endpoint="https://myresource.openai.azure.com/openai/deployments/gpt-4o", + ) + call_kwargs = mock_cls.call_args[1] + assert ( + call_kwargs["base_url"] == "https://myresource.openai.azure.com/openai/v1/" + ) + + def test_responses_base_url_preserves_port(self, mock_openai_completion): + mock_cls, _ = mock_openai_completion + _create_azure_responses( + endpoint="https://myresource.openai.azure.com:8443/openai/deployments/gpt-4o", + ) + call_kwargs = mock_cls.call_args[1] + assert ( + call_kwargs["base_url"] + == "https://myresource.openai.azure.com:8443/openai/v1/" + ) + + def test_delegate_receives_model_and_api_key(self, mock_openai_completion): + mock_cls, _ = mock_openai_completion + _create_azure_responses( + model="gpt-4o", + api_key="my-key", + ) + call_kwargs = mock_cls.call_args[1] + assert call_kwargs["model"] == "gpt-4o" + assert call_kwargs["api_key"] == "my-key" + assert call_kwargs["api"] == "responses" + assert call_kwargs["provider"] == "openai" + + def test_delegate_receives_optional_params(self, mock_openai_completion): + mock_cls, _ = mock_openai_completion + _create_azure_responses( + temperature=0.5, + top_p=0.9, + max_tokens=1000, + max_completion_tokens=800, + reasoning_effort="medium", + instructions="Be helpful", + store=True, + previous_response_id="resp_prev", + include=["reasoning.encrypted_content"], + builtin_tools=["web_search"], + parse_tool_outputs=True, + auto_chain=True, + auto_chain_reasoning=True, + stream=True, + ) + call_kwargs = mock_cls.call_args[1] + assert call_kwargs["temperature"] == 0.5 + assert call_kwargs["top_p"] == 0.9 + assert call_kwargs["max_tokens"] == 1000 + assert call_kwargs["max_completion_tokens"] == 800 + assert call_kwargs["reasoning_effort"] == "medium" + assert call_kwargs["instructions"] == "Be helpful" + assert call_kwargs["store"] is True + assert call_kwargs["previous_response_id"] == "resp_prev" + assert call_kwargs["include"] == ["reasoning.encrypted_content"] + assert call_kwargs["builtin_tools"] == ["web_search"] + assert call_kwargs["parse_tool_outputs"] is True + assert call_kwargs["auto_chain"] is True + assert call_kwargs["auto_chain_reasoning"] is True + assert call_kwargs["stream"] is True + + def test_delegate_omits_unset_optional_params(self, mock_openai_completion): + """Params left at defaults should not be passed to the delegate.""" + mock_cls, _ = mock_openai_completion + _create_azure_responses() + call_kwargs = mock_cls.call_args[1] + # These should NOT be in kwargs because they were not set + assert "temperature" not in call_kwargs + assert "reasoning_effort" not in call_kwargs + assert "instructions" not in call_kwargs + assert "store" not in call_kwargs + assert "max_completion_tokens" not in call_kwargs + + +# --------------------------------------------------------------------------- +# Call delegation tests (VCR cassette-based) +# --------------------------------------------------------------------------- + + +class TestAzureResponsesCall: + """Test call / acall delegation to the Responses API using VCR cassettes.""" + + @pytest.mark.vcr() + def test_call_delegates_to_responses(self): + from crewai.llm import LLM + + llm = LLM(model="azure/gpt-5.2-chat", api="responses") + result = llm.call("Say hello in one sentence.") + + assert isinstance(result, str) + assert len(result) > 0 + + @pytest.mark.vcr() + def test_call_with_tools_delegates(self): + from crewai.llm import LLM + + llm = LLM( + model="azure/gpt-5.2-chat", + api="responses", + builtin_tools=["web_search"], + ) + result = llm.call("What is 2 + 2? Be brief.") + + assert isinstance(result, str) + assert len(result) > 0 + + @pytest.mark.vcr() + def test_completions_call_unchanged(self): + """Default api='completions' should not use the responses delegate.""" + from crewai.llm import LLM + + llm = LLM(model="azure/gpt-5.2-chat") + result = llm.call("Say hello in one sentence.") + + assert isinstance(result, str) + assert len(result) > 0 + + +# --------------------------------------------------------------------------- +# Delegated property & method tests +# --------------------------------------------------------------------------- + + +class TestAzureResponsesProperties: + """Test properties and methods delegated to the responses delegate.""" + + def test_last_response_id(self, mock_openai_completion): + _mock_cls, _ = mock_openai_completion + comp = _create_azure_responses() + assert comp.last_response_id == "resp_abc123" + + def test_last_response_id_none_for_completions(self): + from crewai.llms.providers.azure.completion import AzureCompletion + + comp = AzureCompletion( + model="gpt-4o", + api_key="key", + endpoint="https://res.openai.azure.com", + ) + assert comp.last_response_id is None + + def test_last_reasoning_items(self, mock_openai_completion): + _mock_cls, _ = mock_openai_completion + comp = _create_azure_responses() + assert comp.last_reasoning_items == [{"type": "reasoning"}] + + def test_reset_chain(self, mock_openai_completion): + _mock_cls, instance = mock_openai_completion + comp = _create_azure_responses() + comp.reset_chain() + instance.reset_chain.assert_called_once() + + def test_reset_reasoning_chain(self, mock_openai_completion): + _mock_cls, instance = mock_openai_completion + comp = _create_azure_responses() + comp.reset_reasoning_chain() + instance.reset_reasoning_chain.assert_called_once() + + def test_reset_chain_noop_for_completions(self): + """reset_chain should not raise when delegate is None.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + comp = AzureCompletion( + model="gpt-4o", + api_key="key", + endpoint="https://res.openai.azure.com", + ) + comp.reset_chain() # should not raise + + +# --------------------------------------------------------------------------- +# Feature-support method tests +# --------------------------------------------------------------------------- + + +class TestAzureResponsesFeatures: + """Test supports_* and config methods.""" + + def test_supports_function_calling_responses(self, mock_openai_completion): + _mock_cls, _ = mock_openai_completion + comp = _create_azure_responses() + assert comp.supports_function_calling() is True + + def test_supports_function_calling_completions_openai_model(self): + from crewai.llms.providers.azure.completion import AzureCompletion + + comp = AzureCompletion( + model="gpt-4o", + api_key="key", + endpoint="https://res.openai.azure.com", + ) + assert comp.supports_function_calling() is True + + def test_supports_stop_words_false_for_responses(self, mock_openai_completion): + _mock_cls, _ = mock_openai_completion + comp = _create_azure_responses(model="o4-mini") + assert comp.supports_stop_words() is False + + def test_supports_stop_words_true_for_completions_gpt4(self): + from crewai.llms.providers.azure.completion import AzureCompletion + + comp = AzureCompletion( + model="gpt-4o", + api_key="key", + endpoint="https://res.openai.azure.com", + ) + assert comp.supports_stop_words() is True + + def test_to_config_dict_includes_responses_fields(self, mock_openai_completion): + _mock_cls, _ = mock_openai_completion + comp = _create_azure_responses( + reasoning_effort="high", + instructions="Be concise", + store=True, + max_completion_tokens=500, + ) + config = comp.to_config_dict() + assert config["api"] == "responses" + assert config["reasoning_effort"] == "high" + assert config["instructions"] == "Be concise" + assert config["store"] is True + assert config["max_completion_tokens"] == 500 + + def test_to_config_dict_omits_api_for_completions(self): + from crewai.llms.providers.azure.completion import AzureCompletion + + comp = AzureCompletion( + model="gpt-4o", + api_key="key", + endpoint="https://res.openai.azure.com", + ) + config = comp.to_config_dict() + assert "api" not in config + + +# --------------------------------------------------------------------------- +# LLM factory integration test +# --------------------------------------------------------------------------- + + +class TestAzureResponsesViaLLMFactory: + """Test that the LLM factory passes api='responses' through to AzureCompletion.""" + + @pytest.mark.usefixtures("azure_env") + def test_llm_factory_passes_api_kwarg(self): + """LLM(model='azure/gpt-4o', api='responses') should create AzureCompletion + with api='responses' and a delegate.""" + with ( + patch( + "crewai.llms.providers.openai.completion.OpenAI", + ), + patch( + "crewai.llms.providers.openai.completion.AsyncOpenAI", + ), + ): + from crewai.llm import LLM + + llm = LLM(model="azure/gpt-4o", api="responses") + + from crewai.llms.providers.azure.completion import AzureCompletion + + assert isinstance(llm, AzureCompletion) + assert llm.api == "responses" + assert llm._responses_delegate is not None diff --git a/lib/crewai/tests/llms/azure/test_azure_responses_async.py b/lib/crewai/tests/llms/azure/test_azure_responses_async.py new file mode 100644 index 000000000..934b5fe0a --- /dev/null +++ b/lib/crewai/tests/llms/azure/test_azure_responses_async.py @@ -0,0 +1,15 @@ +"""Async tests for Azure OpenAI Responses API support.""" + +import pytest + + +@pytest.mark.vcr() +@pytest.mark.asyncio +async def test_acall_delegates_to_responses(): + from crewai.llm import LLM + + llm = LLM(model="azure/gpt-5.2-chat", api="responses") + result = await llm.acall("Say hello in one sentence.") + + assert isinstance(result, str) + assert len(result) > 0