mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-04-11 05:22:41 +00:00
Compare commits
1 Commits
feature/re
...
devin/1773
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f4fa90dc04 |
@@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Any, TypedDict
|
||||
from typing import TYPE_CHECKING, Any, Literal, TypedDict
|
||||
|
||||
from pydantic import BaseModel
|
||||
from typing_extensions import Self
|
||||
@@ -69,11 +69,37 @@ class AzureCompletionParams(TypedDict, total=False):
|
||||
tool_choice: str
|
||||
|
||||
|
||||
# Default API version for Azure Responses API support
|
||||
AZURE_RESPONSES_API_VERSION = "2025-03-01-preview"
|
||||
|
||||
|
||||
class AzureCompletion(BaseLLM):
|
||||
"""Azure AI Inference native completion implementation.
|
||||
|
||||
This class provides direct integration with the Azure AI Inference Python SDK,
|
||||
offering native function calling, streaming support, and proper Azure authentication.
|
||||
|
||||
Supports both the Chat Completions API (default) and the Responses API.
|
||||
When api="responses" is specified, the class delegates to an internal
|
||||
OpenAICompletion instance configured with AzureOpenAI clients from the
|
||||
openai Python SDK, which natively supports the Responses API on Azure.
|
||||
|
||||
Args:
|
||||
api: Which API to use - "completions" (default) or "responses".
|
||||
When "responses" is selected, Azure OpenAI Responses API is used
|
||||
via the openai Python SDK's AzureOpenAI client.
|
||||
instructions: System-level instructions (Responses API only).
|
||||
store: Whether to store responses for multi-turn (Responses API only).
|
||||
previous_response_id: ID of previous response for multi-turn (Responses API only).
|
||||
include: Additional data to include in response (Responses API only).
|
||||
builtin_tools: List of OpenAI built-in tools to enable (Responses API only).
|
||||
Supported: "web_search", "file_search", "code_interpreter", "computer_use".
|
||||
parse_tool_outputs: Whether to return structured ResponsesAPIResult with
|
||||
parsed built-in tool outputs instead of just text (Responses API only).
|
||||
auto_chain: Automatically track and use response IDs for multi-turn
|
||||
conversations (Responses API only).
|
||||
auto_chain_reasoning: Automatically track and pass encrypted reasoning items
|
||||
for ZDR (Zero Data Retention) compliance (Responses API only).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -89,10 +115,22 @@ class AzureCompletion(BaseLLM):
|
||||
frequency_penalty: float | None = None,
|
||||
presence_penalty: float | None = None,
|
||||
max_tokens: int | None = None,
|
||||
max_completion_tokens: int | None = None,
|
||||
stop: list[str] | None = None,
|
||||
stream: bool = False,
|
||||
interceptor: BaseInterceptor[Any, Any] | None = None,
|
||||
response_format: type[BaseModel] | None = None,
|
||||
api: Literal["completions", "responses"] = "completions",
|
||||
instructions: str | None = None,
|
||||
store: bool | None = None,
|
||||
previous_response_id: str | None = None,
|
||||
include: list[str] | None = None,
|
||||
builtin_tools: list[str] | None = None,
|
||||
parse_tool_outputs: bool = False,
|
||||
auto_chain: bool = False,
|
||||
auto_chain_reasoning: bool = False,
|
||||
seed: int | None = None,
|
||||
reasoning_effort: str | None = None,
|
||||
**kwargs: Any,
|
||||
):
|
||||
"""Initialize Azure AI Inference chat completion client.
|
||||
@@ -109,15 +147,27 @@ class AzureCompletion(BaseLLM):
|
||||
frequency_penalty: Frequency penalty (-2 to 2)
|
||||
presence_penalty: Presence penalty (-2 to 2)
|
||||
max_tokens: Maximum tokens in response
|
||||
max_completion_tokens: Maximum completion tokens in response
|
||||
stop: Stop sequences
|
||||
stream: Enable streaming responses
|
||||
interceptor: HTTP interceptor (not yet supported for Azure).
|
||||
response_format: Pydantic model for structured output. Used as default when
|
||||
response_model is not passed to call()/acall() methods.
|
||||
Only works with OpenAI models deployed on Azure.
|
||||
api: Which API to use - "completions" (default) or "responses".
|
||||
instructions: System-level instructions (Responses API only).
|
||||
store: Whether to store responses for multi-turn (Responses API only).
|
||||
previous_response_id: ID of previous response for multi-turn (Responses API only).
|
||||
include: Additional data to include in response (Responses API only).
|
||||
builtin_tools: List of OpenAI built-in tools to enable (Responses API only).
|
||||
parse_tool_outputs: Whether to return structured ResponsesAPIResult (Responses API only).
|
||||
auto_chain: Auto-track response IDs for multi-turn (Responses API only).
|
||||
auto_chain_reasoning: Auto-track encrypted reasoning items for ZDR (Responses API only).
|
||||
seed: Random seed for deterministic outputs.
|
||||
reasoning_effort: Reasoning effort level for reasoning models.
|
||||
**kwargs: Additional parameters
|
||||
"""
|
||||
if interceptor is not None:
|
||||
if interceptor is not None and api != "responses":
|
||||
raise NotImplementedError(
|
||||
"HTTP interceptors are not yet supported for Azure AI Inference provider. "
|
||||
"Interceptors are currently supported for OpenAI and Anthropic providers only."
|
||||
@@ -128,12 +178,13 @@ class AzureCompletion(BaseLLM):
|
||||
)
|
||||
|
||||
self.api_key = api_key or os.getenv("AZURE_API_KEY")
|
||||
self.endpoint = (
|
||||
self.base_endpoint = (
|
||||
endpoint
|
||||
or os.getenv("AZURE_ENDPOINT")
|
||||
or os.getenv("AZURE_OPENAI_ENDPOINT")
|
||||
or os.getenv("AZURE_API_BASE")
|
||||
)
|
||||
self.api = api
|
||||
self.api_version = api_version or os.getenv("AZURE_API_VERSION") or "2024-06-01"
|
||||
self.timeout = timeout
|
||||
self.max_retries = max_retries
|
||||
@@ -142,34 +193,68 @@ class AzureCompletion(BaseLLM):
|
||||
raise ValueError(
|
||||
"Azure API key is required. Set AZURE_API_KEY environment variable or pass api_key parameter."
|
||||
)
|
||||
if not self.endpoint:
|
||||
if not self.base_endpoint:
|
||||
raise ValueError(
|
||||
"Azure endpoint is required. Set AZURE_ENDPOINT environment variable or pass endpoint parameter."
|
||||
)
|
||||
|
||||
# Validate and potentially fix Azure OpenAI endpoint URL
|
||||
self.endpoint = self._validate_and_fix_endpoint(self.endpoint, model)
|
||||
# Store the base endpoint before validation modifies it
|
||||
self.endpoint = self.base_endpoint
|
||||
|
||||
# Build client kwargs
|
||||
client_kwargs = {
|
||||
"endpoint": self.endpoint,
|
||||
"credential": AzureKeyCredential(self.api_key),
|
||||
}
|
||||
# Responses API mode: delegate to OpenAICompletion with AzureOpenAI clients
|
||||
self._responses_delegate: Any | None = None
|
||||
if self.api == "responses":
|
||||
self._init_responses_delegate(
|
||||
model=model,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
frequency_penalty=frequency_penalty,
|
||||
presence_penalty=presence_penalty,
|
||||
max_tokens=max_tokens,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
stop=stop,
|
||||
stream=stream,
|
||||
response_format=response_format,
|
||||
instructions=instructions,
|
||||
store=store,
|
||||
previous_response_id=previous_response_id,
|
||||
include=include,
|
||||
builtin_tools=builtin_tools,
|
||||
parse_tool_outputs=parse_tool_outputs,
|
||||
auto_chain=auto_chain,
|
||||
auto_chain_reasoning=auto_chain_reasoning,
|
||||
seed=seed,
|
||||
reasoning_effort=reasoning_effort,
|
||||
interceptor=interceptor,
|
||||
api_version=api_version,
|
||||
)
|
||||
else:
|
||||
# Validate and potentially fix Azure OpenAI endpoint URL (completions mode)
|
||||
self.endpoint = self._validate_and_fix_endpoint(self.endpoint, model)
|
||||
|
||||
# Add api_version if specified (primarily for Azure OpenAI endpoints)
|
||||
if self.api_version:
|
||||
client_kwargs["api_version"] = self.api_version
|
||||
# Build client kwargs
|
||||
client_kwargs = {
|
||||
"endpoint": self.endpoint,
|
||||
"credential": AzureKeyCredential(self.api_key),
|
||||
}
|
||||
|
||||
self.client = ChatCompletionsClient(**client_kwargs) # type: ignore[arg-type]
|
||||
# Add api_version if specified (primarily for Azure OpenAI endpoints)
|
||||
if self.api_version:
|
||||
client_kwargs["api_version"] = self.api_version
|
||||
|
||||
self.async_client = AsyncChatCompletionsClient(**client_kwargs) # type: ignore[arg-type]
|
||||
self.client = ChatCompletionsClient(**client_kwargs) # type: ignore[arg-type]
|
||||
|
||||
self.async_client = AsyncChatCompletionsClient(**client_kwargs) # type: ignore[arg-type]
|
||||
|
||||
self.top_p = top_p
|
||||
self.frequency_penalty = frequency_penalty
|
||||
self.presence_penalty = presence_penalty
|
||||
self.max_tokens = max_tokens
|
||||
self.max_completion_tokens = max_completion_tokens
|
||||
self.stream = stream
|
||||
self.response_format = response_format
|
||||
self.seed = seed
|
||||
self.reasoning_effort = reasoning_effort
|
||||
|
||||
self.is_openai_model = any(
|
||||
prefix in model.lower() for prefix in ["gpt-", "o1-", "text-"]
|
||||
@@ -180,6 +265,100 @@ class AzureCompletion(BaseLLM):
|
||||
and "/openai/deployments/" in self.endpoint
|
||||
)
|
||||
|
||||
def _init_responses_delegate(
|
||||
self,
|
||||
model: str,
|
||||
temperature: float | None = None,
|
||||
top_p: float | None = None,
|
||||
frequency_penalty: float | None = None,
|
||||
presence_penalty: float | None = None,
|
||||
max_tokens: int | None = None,
|
||||
max_completion_tokens: int | None = None,
|
||||
stop: list[str] | None = None,
|
||||
stream: bool = False,
|
||||
response_format: type[BaseModel] | None = None,
|
||||
instructions: str | None = None,
|
||||
store: bool | None = None,
|
||||
previous_response_id: str | None = None,
|
||||
include: list[str] | None = None,
|
||||
builtin_tools: list[str] | None = None,
|
||||
parse_tool_outputs: bool = False,
|
||||
auto_chain: bool = False,
|
||||
auto_chain_reasoning: bool = False,
|
||||
seed: int | None = None,
|
||||
reasoning_effort: str | None = None,
|
||||
interceptor: BaseInterceptor[Any, Any] | None = None,
|
||||
api_version: str | None = None,
|
||||
) -> None:
|
||||
"""Initialize the Responses API delegate using OpenAICompletion with AzureOpenAI clients.
|
||||
|
||||
Creates an OpenAICompletion instance and replaces its OpenAI clients with
|
||||
AzureOpenAI/AsyncAzureOpenAI clients configured with Azure credentials.
|
||||
"""
|
||||
try:
|
||||
from openai import AzureOpenAI, AsyncAzureOpenAI
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"OpenAI package is required for Azure Responses API support. "
|
||||
'Install it with: uv add "crewai[openai]" or pip install openai'
|
||||
) from None
|
||||
|
||||
from crewai.llms.providers.openai.completion import OpenAICompletion
|
||||
|
||||
# Determine the correct API version for Responses API
|
||||
responses_api_version = api_version or os.getenv("AZURE_API_VERSION") or AZURE_RESPONSES_API_VERSION
|
||||
|
||||
# Extract the base Azure endpoint (without /openai/deployments/...)
|
||||
azure_endpoint = self.base_endpoint or ""
|
||||
azure_endpoint = azure_endpoint.rstrip("/")
|
||||
# Strip /openai/deployments/... suffix if present
|
||||
if "/openai/deployments/" in azure_endpoint:
|
||||
azure_endpoint = azure_endpoint.split("/openai/deployments/")[0]
|
||||
|
||||
# Build AzureOpenAI client kwargs
|
||||
azure_kwargs: dict[str, Any] = {
|
||||
"azure_endpoint": azure_endpoint,
|
||||
"api_key": self.api_key,
|
||||
"api_version": responses_api_version,
|
||||
}
|
||||
if self.timeout is not None:
|
||||
azure_kwargs["timeout"] = self.timeout
|
||||
if self.max_retries:
|
||||
azure_kwargs["max_retries"] = self.max_retries
|
||||
|
||||
# Create the OpenAICompletion delegate with responses API config
|
||||
delegate = OpenAICompletion(
|
||||
model=model,
|
||||
api_key=self.api_key,
|
||||
api="responses",
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
frequency_penalty=frequency_penalty,
|
||||
presence_penalty=presence_penalty,
|
||||
max_tokens=max_tokens,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
stop=stop,
|
||||
stream=stream,
|
||||
response_format=response_format,
|
||||
instructions=instructions,
|
||||
store=store,
|
||||
previous_response_id=previous_response_id,
|
||||
include=include,
|
||||
builtin_tools=builtin_tools,
|
||||
parse_tool_outputs=parse_tool_outputs,
|
||||
auto_chain=auto_chain,
|
||||
auto_chain_reasoning=auto_chain_reasoning,
|
||||
seed=seed,
|
||||
reasoning_effort=reasoning_effort,
|
||||
interceptor=interceptor,
|
||||
)
|
||||
|
||||
# Replace the OpenAI clients with AzureOpenAI clients
|
||||
delegate.client = AzureOpenAI(**azure_kwargs) # type: ignore[assignment]
|
||||
delegate.async_client = AsyncAzureOpenAI(**azure_kwargs) # type: ignore[assignment]
|
||||
|
||||
self._responses_delegate = delegate
|
||||
|
||||
@staticmethod
|
||||
def _validate_and_fix_endpoint(endpoint: str, model: str) -> str:
|
||||
"""Validate and fix Azure endpoint URL format.
|
||||
@@ -269,6 +448,30 @@ class AzureCompletion(BaseLLM):
|
||||
)
|
||||
raise error
|
||||
|
||||
@property
|
||||
def last_response_id(self) -> str | None:
|
||||
"""Get the last response ID from auto-chaining (Responses API only)."""
|
||||
if self._responses_delegate is not None:
|
||||
return self._responses_delegate.last_response_id
|
||||
return None
|
||||
|
||||
def reset_chain(self) -> None:
|
||||
"""Reset the auto-chain state (Responses API only)."""
|
||||
if self._responses_delegate is not None:
|
||||
self._responses_delegate.reset_chain()
|
||||
|
||||
@property
|
||||
def last_reasoning_items(self) -> list[Any] | None:
|
||||
"""Get the last reasoning items from auto-chain reasoning (Responses API only)."""
|
||||
if self._responses_delegate is not None:
|
||||
return self._responses_delegate.last_reasoning_items
|
||||
return None
|
||||
|
||||
def reset_reasoning_chain(self) -> None:
|
||||
"""Reset the reasoning chain state (Responses API only)."""
|
||||
if self._responses_delegate is not None:
|
||||
self._responses_delegate.reset_reasoning_chain()
|
||||
|
||||
def call(
|
||||
self,
|
||||
messages: str | list[LLMMessage],
|
||||
@@ -279,7 +482,7 @@ class AzureCompletion(BaseLLM):
|
||||
from_agent: Any | None = None,
|
||||
response_model: type[BaseModel] | None = None,
|
||||
) -> str | Any:
|
||||
"""Call Azure AI Inference chat completions API.
|
||||
"""Call Azure AI Inference API (Chat Completions or Responses based on api setting).
|
||||
|
||||
Args:
|
||||
messages: Input messages for the chat completion
|
||||
@@ -293,6 +496,18 @@ class AzureCompletion(BaseLLM):
|
||||
Returns:
|
||||
Chat completion response or tool call result
|
||||
"""
|
||||
# Delegate to Responses API if configured
|
||||
if self.api == "responses" and self._responses_delegate is not None:
|
||||
return self._responses_delegate.call(
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
callbacks=callbacks,
|
||||
available_functions=available_functions,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
response_model=response_model,
|
||||
)
|
||||
|
||||
with llm_call_context():
|
||||
try:
|
||||
# Emit call started event
|
||||
@@ -351,7 +566,7 @@ class AzureCompletion(BaseLLM):
|
||||
from_agent: Any | None = None,
|
||||
response_model: type[BaseModel] | None = None,
|
||||
) -> str | Any:
|
||||
"""Call Azure AI Inference chat completions API asynchronously.
|
||||
"""Call Azure AI Inference API asynchronously (Chat Completions or Responses).
|
||||
|
||||
Args:
|
||||
messages: Input messages for the chat completion
|
||||
@@ -365,6 +580,18 @@ class AzureCompletion(BaseLLM):
|
||||
Returns:
|
||||
Chat completion response or tool call result
|
||||
"""
|
||||
# Delegate to Responses API if configured
|
||||
if self.api == "responses" and self._responses_delegate is not None:
|
||||
return await self._responses_delegate.acall(
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
callbacks=callbacks,
|
||||
available_functions=available_functions,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
response_model=response_model,
|
||||
)
|
||||
|
||||
with llm_call_context():
|
||||
try:
|
||||
self._emit_call_started_event(
|
||||
|
||||
@@ -1403,3 +1403,594 @@ def test_azure_stop_words_still_applied_to_regular_responses():
|
||||
assert "Observation:" not in result
|
||||
assert "Found results" not in result
|
||||
assert "I need to search for more information" in result
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Azure Responses API Tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def test_azure_responses_api_initialization():
|
||||
"""Test that AzureCompletion can be initialized with api='responses'."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
instructions="You are a helpful assistant.",
|
||||
store=True,
|
||||
)
|
||||
|
||||
assert llm.api == "responses"
|
||||
assert llm._responses_delegate is not None
|
||||
assert llm._responses_delegate.api == "responses"
|
||||
assert llm._responses_delegate.instructions == "You are a helpful assistant."
|
||||
assert llm._responses_delegate.store is True
|
||||
assert llm.model == "gpt-4o"
|
||||
|
||||
|
||||
def test_azure_responses_api_default_is_completions():
|
||||
"""Test that the default API is 'completions' for backward compatibility."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
)
|
||||
|
||||
assert llm.api == "completions"
|
||||
assert llm._responses_delegate is None
|
||||
|
||||
|
||||
def test_azure_responses_api_delegate_uses_azure_openai_clients():
|
||||
"""Test that the delegate's clients are AzureOpenAI instances, not plain OpenAI."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
)
|
||||
|
||||
from openai import AzureOpenAI, AsyncAzureOpenAI
|
||||
|
||||
assert isinstance(llm._responses_delegate.client, AzureOpenAI)
|
||||
assert isinstance(llm._responses_delegate.async_client, AsyncAzureOpenAI)
|
||||
|
||||
|
||||
def test_azure_responses_api_strips_deployment_suffix_for_azure_endpoint():
|
||||
"""Test that /openai/deployments/... suffix is stripped when building Azure clients."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com/openai/deployments/gpt-4o",
|
||||
api="responses",
|
||||
)
|
||||
|
||||
# The delegate should have been created
|
||||
assert llm._responses_delegate is not None
|
||||
# The delegate's client should point to the base Azure endpoint
|
||||
from openai import AzureOpenAI
|
||||
assert isinstance(llm._responses_delegate.client, AzureOpenAI)
|
||||
|
||||
|
||||
def test_azure_responses_api_uses_correct_api_version():
|
||||
"""Test that the Responses API uses the correct API version."""
|
||||
from crewai.llms.providers.azure.completion import (
|
||||
AzureCompletion,
|
||||
AZURE_RESPONSES_API_VERSION,
|
||||
)
|
||||
|
||||
# Default version
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
)
|
||||
|
||||
assert llm._responses_delegate is not None
|
||||
# Check that the AzureOpenAI client was created with the right version
|
||||
assert AZURE_RESPONSES_API_VERSION == "2025-03-01-preview"
|
||||
|
||||
# Custom version
|
||||
llm_custom = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
api_version="2025-06-01",
|
||||
)
|
||||
assert llm_custom._responses_delegate is not None
|
||||
|
||||
|
||||
def test_azure_responses_api_passes_all_params_to_delegate():
|
||||
"""Test that all Responses API params are forwarded to the delegate."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
instructions="Be concise.",
|
||||
store=True,
|
||||
previous_response_id="resp_abc123",
|
||||
include=["reasoning.encrypted_content"],
|
||||
builtin_tools=["web_search"],
|
||||
parse_tool_outputs=True,
|
||||
auto_chain=True,
|
||||
auto_chain_reasoning=True,
|
||||
temperature=0.5,
|
||||
top_p=0.9,
|
||||
seed=42,
|
||||
reasoning_effort="high",
|
||||
)
|
||||
|
||||
delegate = llm._responses_delegate
|
||||
assert delegate is not None
|
||||
assert delegate.instructions == "Be concise."
|
||||
assert delegate.store is True
|
||||
assert delegate.previous_response_id == "resp_abc123"
|
||||
assert delegate.include == ["reasoning.encrypted_content"]
|
||||
assert delegate.builtin_tools == ["web_search"]
|
||||
assert delegate.parse_tool_outputs is True
|
||||
assert delegate.auto_chain is True
|
||||
assert delegate.auto_chain_reasoning is True
|
||||
assert delegate.temperature == 0.5
|
||||
assert delegate.top_p == 0.9
|
||||
assert delegate.seed == 42
|
||||
assert delegate.reasoning_effort == "high"
|
||||
|
||||
|
||||
def test_azure_responses_api_call_delegates_to_openai_completion():
|
||||
"""Test that call() delegates to the internal OpenAICompletion when api='responses'."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
)
|
||||
|
||||
with patch.object(
|
||||
llm._responses_delegate, "call", return_value="Hello from Responses API!"
|
||||
) as mock_call:
|
||||
result = llm.call("Hello!")
|
||||
|
||||
mock_call.assert_called_once_with(
|
||||
messages="Hello!",
|
||||
tools=None,
|
||||
callbacks=None,
|
||||
available_functions=None,
|
||||
from_task=None,
|
||||
from_agent=None,
|
||||
response_model=None,
|
||||
)
|
||||
assert result == "Hello from Responses API!"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_azure_responses_api_acall_delegates_to_openai_completion():
|
||||
"""Test that acall() delegates to the internal OpenAICompletion when api='responses'."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
)
|
||||
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
llm._responses_delegate.acall = AsyncMock(return_value="Async hello from Responses API!")
|
||||
|
||||
result = await llm.acall("Hello async!")
|
||||
|
||||
llm._responses_delegate.acall.assert_called_once_with(
|
||||
messages="Hello async!",
|
||||
tools=None,
|
||||
callbacks=None,
|
||||
available_functions=None,
|
||||
from_task=None,
|
||||
from_agent=None,
|
||||
response_model=None,
|
||||
)
|
||||
assert result == "Async hello from Responses API!"
|
||||
|
||||
|
||||
def test_azure_responses_api_call_with_tools():
|
||||
"""Test that call() passes tools to the delegate for Responses API."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get the weather for a location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"location": {"type": "string"}},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
with patch.object(
|
||||
llm._responses_delegate, "call", return_value="It's sunny."
|
||||
) as mock_call:
|
||||
result = llm.call(
|
||||
messages=[{"role": "user", "content": "What's the weather?"}],
|
||||
tools=tools,
|
||||
available_functions={"get_weather": lambda loc: "Sunny"},
|
||||
)
|
||||
|
||||
mock_call.assert_called_once()
|
||||
call_kwargs = mock_call.call_args
|
||||
assert call_kwargs.kwargs["tools"] == tools
|
||||
assert result == "It's sunny."
|
||||
|
||||
|
||||
def test_azure_responses_api_call_with_response_model():
|
||||
"""Test that call() passes response_model to the delegate for structured output."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
from pydantic import BaseModel
|
||||
|
||||
class WeatherResult(BaseModel):
|
||||
temperature: float
|
||||
condition: str
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
)
|
||||
|
||||
with patch.object(
|
||||
llm._responses_delegate, "call", return_value='{"temperature": 72.0, "condition": "sunny"}'
|
||||
) as mock_call:
|
||||
result = llm.call(
|
||||
messages="What's the weather?",
|
||||
response_model=WeatherResult,
|
||||
)
|
||||
|
||||
mock_call.assert_called_once()
|
||||
assert mock_call.call_args.kwargs["response_model"] == WeatherResult
|
||||
|
||||
|
||||
def test_azure_responses_api_last_response_id_property():
|
||||
"""Test that last_response_id property delegates to the internal delegate."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
auto_chain=True,
|
||||
)
|
||||
|
||||
# Initially None
|
||||
assert llm.last_response_id is None
|
||||
|
||||
# Set the delegate's internal state
|
||||
llm._responses_delegate._last_response_id = "resp_test123"
|
||||
assert llm.last_response_id == "resp_test123"
|
||||
|
||||
|
||||
def test_azure_responses_api_last_response_id_returns_none_for_completions():
|
||||
"""Test that last_response_id returns None when api='completions'."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
)
|
||||
|
||||
assert llm.last_response_id is None
|
||||
|
||||
|
||||
def test_azure_responses_api_reset_chain():
|
||||
"""Test that reset_chain delegates to the internal delegate."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
auto_chain=True,
|
||||
)
|
||||
|
||||
# Set and then reset
|
||||
llm._responses_delegate._last_response_id = "resp_test123"
|
||||
assert llm.last_response_id == "resp_test123"
|
||||
|
||||
llm.reset_chain()
|
||||
assert llm.last_response_id is None
|
||||
|
||||
|
||||
def test_azure_responses_api_reset_chain_no_op_for_completions():
|
||||
"""Test that reset_chain is a no-op when api='completions'."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
)
|
||||
|
||||
# Should not raise
|
||||
llm.reset_chain()
|
||||
|
||||
|
||||
def test_azure_responses_api_last_reasoning_items_property():
|
||||
"""Test that last_reasoning_items property delegates to the internal delegate."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
auto_chain_reasoning=True,
|
||||
)
|
||||
|
||||
# Initially None
|
||||
assert llm.last_reasoning_items is None
|
||||
|
||||
# Set the delegate's internal state
|
||||
mock_items = [{"type": "reasoning", "id": "rs_test"}]
|
||||
llm._responses_delegate._last_reasoning_items = mock_items
|
||||
assert llm.last_reasoning_items == mock_items
|
||||
|
||||
|
||||
def test_azure_responses_api_last_reasoning_items_returns_none_for_completions():
|
||||
"""Test that last_reasoning_items returns None when api='completions'."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
)
|
||||
|
||||
assert llm.last_reasoning_items is None
|
||||
|
||||
|
||||
def test_azure_responses_api_reset_reasoning_chain():
|
||||
"""Test that reset_reasoning_chain delegates to the internal delegate."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
auto_chain_reasoning=True,
|
||||
)
|
||||
|
||||
# Set and then reset
|
||||
llm._responses_delegate._last_reasoning_items = [{"type": "reasoning"}]
|
||||
assert llm.last_reasoning_items is not None
|
||||
|
||||
llm.reset_reasoning_chain()
|
||||
assert llm.last_reasoning_items is None
|
||||
|
||||
|
||||
def test_azure_responses_api_reset_reasoning_chain_no_op_for_completions():
|
||||
"""Test that reset_reasoning_chain is a no-op when api='completions'."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
)
|
||||
|
||||
# Should not raise
|
||||
llm.reset_reasoning_chain()
|
||||
|
||||
|
||||
def test_azure_responses_api_completions_mode_unaffected():
|
||||
"""Test that existing completions mode behavior is not affected by responses changes."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
)
|
||||
|
||||
assert llm.api == "completions"
|
||||
assert llm._responses_delegate is None
|
||||
# Should have the Azure AI Inference client
|
||||
assert hasattr(llm, "client")
|
||||
assert hasattr(llm, "async_client")
|
||||
|
||||
|
||||
def test_azure_responses_api_interceptor_allowed():
|
||||
"""Test that interceptors are allowed when api='responses' (since they go through OpenAI SDK)."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
mock_interceptor = MagicMock()
|
||||
|
||||
# This should NOT raise
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
interceptor=mock_interceptor,
|
||||
)
|
||||
assert llm._responses_delegate is not None
|
||||
|
||||
|
||||
def test_azure_responses_api_interceptor_blocked_for_completions():
|
||||
"""Test that interceptors are still blocked for completions mode."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
mock_interceptor = MagicMock()
|
||||
|
||||
with pytest.raises(NotImplementedError, match="HTTP interceptors are not yet supported"):
|
||||
AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="completions",
|
||||
interceptor=mock_interceptor,
|
||||
)
|
||||
|
||||
|
||||
def test_azure_responses_api_builtin_tools():
|
||||
"""Test that builtin_tools param is forwarded to the delegate."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
builtin_tools=["web_search", "code_interpreter"],
|
||||
)
|
||||
|
||||
assert llm._responses_delegate.builtin_tools == ["web_search", "code_interpreter"]
|
||||
|
||||
|
||||
def test_azure_responses_api_with_previous_response_id():
|
||||
"""Test that previous_response_id is forwarded to the delegate."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
previous_response_id="resp_abc123",
|
||||
store=True,
|
||||
)
|
||||
|
||||
delegate = llm._responses_delegate
|
||||
assert delegate.previous_response_id == "resp_abc123"
|
||||
assert delegate.store is True
|
||||
|
||||
|
||||
def test_azure_responses_api_env_var_api_version():
|
||||
"""Test that AZURE_API_VERSION env var is used for responses API version."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
with patch.dict(os.environ, {"AZURE_API_VERSION": "2025-10-01"}):
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
)
|
||||
assert llm._responses_delegate is not None
|
||||
|
||||
|
||||
def test_azure_responses_api_timeout_and_retries():
|
||||
"""Test that timeout and max_retries are passed to the Azure clients."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
timeout=30.0,
|
||||
max_retries=5,
|
||||
)
|
||||
|
||||
assert llm._responses_delegate is not None
|
||||
assert llm.timeout == 30.0
|
||||
assert llm.max_retries == 5
|
||||
|
||||
|
||||
def test_azure_responses_api_streaming_param():
|
||||
"""Test that stream parameter is forwarded to the delegate."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
stream=True,
|
||||
)
|
||||
|
||||
assert llm._responses_delegate.stream is True
|
||||
|
||||
|
||||
def test_azure_responses_api_with_non_azure_openai_endpoint():
|
||||
"""Test Responses API with a non-azure-openai endpoint (e.g., Azure AI Foundry)."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://models.inference.ai.azure.com",
|
||||
api="responses",
|
||||
)
|
||||
|
||||
assert llm._responses_delegate is not None
|
||||
from openai import AzureOpenAI
|
||||
assert isinstance(llm._responses_delegate.client, AzureOpenAI)
|
||||
|
||||
|
||||
def test_azure_responses_api_base_endpoint_preserved():
|
||||
"""Test that base_endpoint is preserved and not modified by endpoint validation."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
endpoint = "https://test.openai.azure.com"
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint=endpoint,
|
||||
api="responses",
|
||||
)
|
||||
|
||||
# base_endpoint should be the original, unmodified endpoint
|
||||
assert llm.base_endpoint == endpoint
|
||||
# endpoint should also be the original since responses mode skips validation
|
||||
assert llm.endpoint == endpoint
|
||||
|
||||
|
||||
def test_azure_responses_api_endpoint_not_validated_for_responses():
|
||||
"""Test that endpoint URL validation (adding /openai/deployments/) is skipped for responses mode."""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
# In completions mode, this endpoint would get /openai/deployments/gpt-4o appended
|
||||
llm_completions = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="completions",
|
||||
)
|
||||
assert "/openai/deployments/" in llm_completions.endpoint
|
||||
|
||||
# In responses mode, the endpoint should NOT be modified
|
||||
llm_responses = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://test.openai.azure.com",
|
||||
api="responses",
|
||||
)
|
||||
assert llm_responses.endpoint == "https://test.openai.azure.com"
|
||||
|
||||
Reference in New Issue
Block a user