mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-04-14 06:53:25 +00:00
Compare commits
3 Commits
1.14.0a3
...
devin/1775
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2315422fc4 | ||
|
|
1a7d2ad05c | ||
|
|
f69171cd76 |
@@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, TypedDict
|
||||
from typing import Any, Literal, TypedDict
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from pydantic import BaseModel, PrivateAttr, model_validator
|
||||
@@ -72,6 +72,19 @@ class AzureCompletion(BaseLLM):
|
||||
|
||||
This class provides direct integration with the Azure AI Inference Python SDK,
|
||||
offering native function calling, streaming support, and proper Azure authentication.
|
||||
|
||||
Supports both Chat Completions API (default) and Responses API.
|
||||
When ``api="responses"`` is set, calls are delegated to the OpenAI Responses API
|
||||
implementation with the Azure resource's ``/openai/v1/`` base URL, reusing the
|
||||
fully-tested OpenAI Responses API code path.
|
||||
|
||||
Example::
|
||||
|
||||
# Chat Completions (default)
|
||||
llm = LLM(model="azure/gpt-4o", api_key=KEY, endpoint=ENDPOINT)
|
||||
|
||||
# Responses API
|
||||
llm = LLM(model="azure/gpt-4o", api="responses", api_key=KEY, endpoint=ENDPOINT)
|
||||
"""
|
||||
|
||||
endpoint: str | None = None
|
||||
@@ -82,14 +95,27 @@ class AzureCompletion(BaseLLM):
|
||||
frequency_penalty: float | None = None
|
||||
presence_penalty: float | None = None
|
||||
max_tokens: int | None = None
|
||||
max_completion_tokens: int | None = None
|
||||
stream: bool = False
|
||||
interceptor: BaseInterceptor[Any, Any] | None = None
|
||||
response_format: type[BaseModel] | None = None
|
||||
is_openai_model: bool = False
|
||||
is_azure_openai_endpoint: bool = False
|
||||
api: Literal["completions", "responses"] = "completions"
|
||||
instructions: str | None = None
|
||||
store: bool | None = None
|
||||
previous_response_id: str | None = None
|
||||
include: list[str] | None = None
|
||||
builtin_tools: list[str] | None = None
|
||||
parse_tool_outputs: bool = False
|
||||
auto_chain: bool = False
|
||||
auto_chain_reasoning: bool = False
|
||||
reasoning_effort: str | None = None
|
||||
seed: int | None = None
|
||||
|
||||
_client: Any = PrivateAttr(default=None)
|
||||
_async_client: Any = PrivateAttr(default=None)
|
||||
_responses_delegate: Any = PrivateAttr(default=None)
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
@@ -142,17 +168,95 @@ class AzureCompletion(BaseLLM):
|
||||
def _init_clients(self) -> AzureCompletion:
|
||||
if not self.api_key:
|
||||
raise ValueError("Azure API key is required.")
|
||||
client_kwargs: dict[str, Any] = {
|
||||
"endpoint": self.endpoint,
|
||||
"credential": AzureKeyCredential(self.api_key),
|
||||
}
|
||||
if self.api_version:
|
||||
client_kwargs["api_version"] = self.api_version
|
||||
|
||||
self._client = ChatCompletionsClient(**client_kwargs)
|
||||
self._async_client = AsyncChatCompletionsClient(**client_kwargs)
|
||||
if self.api == "responses":
|
||||
self._init_responses_delegate()
|
||||
else:
|
||||
client_kwargs: dict[str, Any] = {
|
||||
"endpoint": self.endpoint,
|
||||
"credential": AzureKeyCredential(self.api_key),
|
||||
}
|
||||
if self.api_version:
|
||||
client_kwargs["api_version"] = self.api_version
|
||||
|
||||
self._client = ChatCompletionsClient(**client_kwargs)
|
||||
self._async_client = AsyncChatCompletionsClient(**client_kwargs)
|
||||
return self
|
||||
|
||||
def _init_responses_delegate(self) -> None:
|
||||
"""Initialise the OpenAICompletion delegate for Responses API calls.
|
||||
|
||||
Constructs the Azure-compatible ``/openai/v1/`` base URL from the
|
||||
configured endpoint and creates an :class:`OpenAICompletion` instance
|
||||
that handles all Responses API logic.
|
||||
"""
|
||||
from crewai.llms.providers.openai.completion import OpenAICompletion
|
||||
|
||||
# Build the Azure base_url: <resource>/openai/v1/
|
||||
raw_endpoint = self.endpoint or ""
|
||||
# Strip the /openai/deployments/<deployment> suffix if present
|
||||
deployment_idx = raw_endpoint.find("/openai/deployments/")
|
||||
if deployment_idx != -1:
|
||||
resource_url = raw_endpoint[:deployment_idx]
|
||||
else:
|
||||
resource_url = raw_endpoint.rstrip("/")
|
||||
|
||||
api_version = self.api_version or "2024-06-01"
|
||||
base_url = f"{resource_url}/openai/v1/?api-version={api_version}"
|
||||
|
||||
delegate_kwargs: dict[str, Any] = {
|
||||
"model": self.model,
|
||||
"provider": "openai",
|
||||
"api_key": self.api_key,
|
||||
"base_url": base_url,
|
||||
"api": "responses",
|
||||
"stream": self.stream,
|
||||
}
|
||||
|
||||
# Forward Responses API parameters
|
||||
if self.instructions is not None:
|
||||
delegate_kwargs["instructions"] = self.instructions
|
||||
if self.store is not None:
|
||||
delegate_kwargs["store"] = self.store
|
||||
if self.previous_response_id is not None:
|
||||
delegate_kwargs["previous_response_id"] = self.previous_response_id
|
||||
if self.include is not None:
|
||||
delegate_kwargs["include"] = self.include
|
||||
if self.builtin_tools is not None:
|
||||
delegate_kwargs["builtin_tools"] = self.builtin_tools
|
||||
if self.parse_tool_outputs:
|
||||
delegate_kwargs["parse_tool_outputs"] = self.parse_tool_outputs
|
||||
if self.auto_chain:
|
||||
delegate_kwargs["auto_chain"] = self.auto_chain
|
||||
if self.auto_chain_reasoning:
|
||||
delegate_kwargs["auto_chain_reasoning"] = self.auto_chain_reasoning
|
||||
if self.reasoning_effort is not None:
|
||||
delegate_kwargs["reasoning_effort"] = self.reasoning_effort
|
||||
if self.temperature is not None:
|
||||
delegate_kwargs["temperature"] = self.temperature
|
||||
if self.top_p is not None:
|
||||
delegate_kwargs["top_p"] = self.top_p
|
||||
if self.max_tokens is not None:
|
||||
delegate_kwargs["max_tokens"] = self.max_tokens
|
||||
if self.max_completion_tokens is not None:
|
||||
delegate_kwargs["max_completion_tokens"] = self.max_completion_tokens
|
||||
if self.seed is not None:
|
||||
delegate_kwargs["seed"] = self.seed
|
||||
if self.timeout is not None:
|
||||
delegate_kwargs["timeout"] = self.timeout
|
||||
if self.max_retries != 2:
|
||||
delegate_kwargs["max_retries"] = self.max_retries
|
||||
if self.response_format is not None:
|
||||
delegate_kwargs["response_format"] = self.response_format
|
||||
if self.stop:
|
||||
delegate_kwargs["stop"] = self.stop
|
||||
if self.frequency_penalty is not None:
|
||||
delegate_kwargs["frequency_penalty"] = self.frequency_penalty
|
||||
if self.presence_penalty is not None:
|
||||
delegate_kwargs["presence_penalty"] = self.presence_penalty
|
||||
|
||||
self._responses_delegate = OpenAICompletion(**delegate_kwargs)
|
||||
|
||||
def to_config_dict(self) -> dict[str, Any]:
|
||||
"""Extend base config with Azure-specific fields."""
|
||||
config = super().to_config_dict()
|
||||
@@ -172,6 +276,10 @@ class AzureCompletion(BaseLLM):
|
||||
config["presence_penalty"] = self.presence_penalty
|
||||
if self.max_tokens is not None:
|
||||
config["max_tokens"] = self.max_tokens
|
||||
if self.api != "completions":
|
||||
config["api"] = self.api
|
||||
if self.reasoning_effort is not None:
|
||||
config["reasoning_effort"] = self.reasoning_effort
|
||||
return config
|
||||
|
||||
@staticmethod
|
||||
@@ -277,7 +385,7 @@ class AzureCompletion(BaseLLM):
|
||||
from_agent: Any | None = None,
|
||||
response_model: type[BaseModel] | None = None,
|
||||
) -> str | Any:
|
||||
"""Call Azure AI Inference chat completions API.
|
||||
"""Call Azure AI Inference API (Chat Completions or Responses).
|
||||
|
||||
Args:
|
||||
messages: Input messages for the chat completion
|
||||
@@ -291,6 +399,17 @@ class AzureCompletion(BaseLLM):
|
||||
Returns:
|
||||
Chat completion response or tool call result
|
||||
"""
|
||||
if self.api == "responses" and self._responses_delegate is not None:
|
||||
return self._responses_delegate.call(
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
callbacks=callbacks,
|
||||
available_functions=available_functions,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
response_model=response_model,
|
||||
)
|
||||
|
||||
with llm_call_context():
|
||||
try:
|
||||
# Emit call started event
|
||||
@@ -349,7 +468,7 @@ class AzureCompletion(BaseLLM):
|
||||
from_agent: Any | None = None,
|
||||
response_model: type[BaseModel] | None = None,
|
||||
) -> str | Any:
|
||||
"""Call Azure AI Inference chat completions API asynchronously.
|
||||
"""Call Azure AI Inference API asynchronously (Chat Completions or Responses).
|
||||
|
||||
Args:
|
||||
messages: Input messages for the chat completion
|
||||
@@ -363,6 +482,17 @@ class AzureCompletion(BaseLLM):
|
||||
Returns:
|
||||
Chat completion response or tool call result
|
||||
"""
|
||||
if self.api == "responses" and self._responses_delegate is not None:
|
||||
return await self._responses_delegate.acall(
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
callbacks=callbacks,
|
||||
available_functions=available_functions,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
response_model=response_model,
|
||||
)
|
||||
|
||||
with llm_call_context():
|
||||
try:
|
||||
self._emit_call_started_event(
|
||||
@@ -1090,6 +1220,19 @@ class AzureCompletion(BaseLLM):
|
||||
}
|
||||
return {"total_tokens": 0}
|
||||
|
||||
@property
|
||||
def last_response_id(self) -> str | None:
|
||||
"""Get the last response ID from auto-chaining (Responses API only)."""
|
||||
if self._responses_delegate is not None:
|
||||
rid: str | None = self._responses_delegate.last_response_id
|
||||
return rid
|
||||
return None
|
||||
|
||||
def reset_chain(self) -> None:
|
||||
"""Reset the auto-chain state (Responses API only)."""
|
||||
if self._responses_delegate is not None:
|
||||
self._responses_delegate.reset_chain()
|
||||
|
||||
async def aclose(self) -> None:
|
||||
"""Close the async client and clean up resources.
|
||||
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
import os
|
||||
import sys
|
||||
import types
|
||||
from unittest.mock import patch, MagicMock, Mock
|
||||
from unittest.mock import patch, MagicMock, Mock, AsyncMock
|
||||
import pytest
|
||||
|
||||
from crewai.llm import LLM
|
||||
from crewai.crew import Crew
|
||||
from crewai.agent import Agent
|
||||
from crewai.task import Task
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -1403,3 +1404,470 @@ def test_azure_stop_words_still_applied_to_regular_responses():
|
||||
assert "Observation:" not in result
|
||||
assert "Found results" not in result
|
||||
assert "I need to search for more information" in result
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Azure Responses API Tests
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def test_azure_responses_api_initialization():
|
||||
"""Test that AzureCompletion can be initialized with api='responses'."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
instructions="You are a helpful assistant.",
|
||||
store=True,
|
||||
)
|
||||
|
||||
assert llm.api == "responses"
|
||||
assert llm.instructions == "You are a helpful assistant."
|
||||
assert llm.store is True
|
||||
assert llm.model == "gpt-4o"
|
||||
assert llm._responses_delegate is not None
|
||||
|
||||
|
||||
def test_azure_responses_api_default_is_completions():
|
||||
"""Test that the default API is 'completions' for backward compatibility."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
)
|
||||
|
||||
assert llm.api == "completions"
|
||||
assert llm._responses_delegate is None
|
||||
|
||||
|
||||
def test_azure_responses_api_delegate_is_openai_completion():
|
||||
"""Test that the Responses API delegate is an OpenAICompletion instance."""
|
||||
from crewai.llms.providers.openai.completion import OpenAICompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
)
|
||||
|
||||
assert isinstance(llm._responses_delegate, OpenAICompletion)
|
||||
assert llm._responses_delegate.api == "responses"
|
||||
assert llm._responses_delegate.model == "gpt-4o"
|
||||
|
||||
|
||||
def test_azure_responses_api_base_url_construction():
|
||||
"""Test that the Azure base URL is correctly constructed for Responses API."""
|
||||
from crewai.llms.providers.openai.completion import OpenAICompletion
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
api_version="2025-03-01-preview",
|
||||
)
|
||||
|
||||
delegate = llm._responses_delegate
|
||||
assert isinstance(delegate, OpenAICompletion)
|
||||
assert delegate.base_url == "https://my-resource.openai.azure.com/openai/v1/?api-version=2025-03-01-preview"
|
||||
|
||||
|
||||
def test_azure_responses_api_base_url_strips_deployment_suffix():
|
||||
"""Test that deployment suffix is stripped from endpoint for Responses API base URL."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com/openai/deployments/gpt-4o",
|
||||
)
|
||||
|
||||
delegate = llm._responses_delegate
|
||||
assert "my-resource.openai.azure.com/openai/v1/" in delegate.base_url
|
||||
assert "/openai/deployments/" not in delegate.base_url
|
||||
|
||||
|
||||
def test_azure_responses_api_base_url_with_trailing_slash():
|
||||
"""Test that endpoint with trailing slash is handled correctly."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com/",
|
||||
api_version="2025-03-01-preview",
|
||||
)
|
||||
|
||||
delegate = llm._responses_delegate
|
||||
assert delegate.base_url == "https://my-resource.openai.azure.com/openai/v1/?api-version=2025-03-01-preview"
|
||||
|
||||
|
||||
def test_azure_responses_api_forwards_parameters():
|
||||
"""Test that Responses API parameters are forwarded to the delegate."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
instructions="Be helpful",
|
||||
store=True,
|
||||
previous_response_id="resp_abc123",
|
||||
include=["reasoning.encrypted_content"],
|
||||
builtin_tools=["web_search_preview"],
|
||||
parse_tool_outputs=True,
|
||||
auto_chain=True,
|
||||
auto_chain_reasoning=True,
|
||||
temperature=0.5,
|
||||
top_p=0.9,
|
||||
max_tokens=1000,
|
||||
reasoning_effort="high",
|
||||
seed=42,
|
||||
frequency_penalty=0.3,
|
||||
presence_penalty=0.6,
|
||||
)
|
||||
|
||||
delegate = llm._responses_delegate
|
||||
assert delegate.instructions == "Be helpful"
|
||||
assert delegate.store is True
|
||||
assert delegate.previous_response_id == "resp_abc123"
|
||||
assert delegate.include == ["reasoning.encrypted_content"]
|
||||
assert delegate.builtin_tools == ["web_search_preview"]
|
||||
assert delegate.parse_tool_outputs is True
|
||||
assert delegate.auto_chain is True
|
||||
assert delegate.auto_chain_reasoning is True
|
||||
assert delegate.temperature == 0.5
|
||||
assert delegate.top_p == 0.9
|
||||
assert delegate.max_tokens == 1000
|
||||
assert delegate.reasoning_effort == "high"
|
||||
assert delegate.seed == 42
|
||||
assert delegate.frequency_penalty == 0.3
|
||||
assert delegate.presence_penalty == 0.6
|
||||
|
||||
|
||||
def test_azure_responses_api_call_delegates_to_openai():
|
||||
"""Test that call() with api='responses' delegates to the OpenAI delegate."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
)
|
||||
|
||||
with patch.object(llm._responses_delegate, "call", return_value="responses result") as mock_call:
|
||||
result = llm.call("Hello, world!")
|
||||
mock_call.assert_called_once_with(
|
||||
messages="Hello, world!",
|
||||
tools=None,
|
||||
callbacks=None,
|
||||
available_functions=None,
|
||||
from_task=None,
|
||||
from_agent=None,
|
||||
response_model=None,
|
||||
)
|
||||
assert result == "responses result"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_azure_responses_api_acall_delegates_to_openai():
|
||||
"""Test that acall() with api='responses' delegates to the OpenAI delegate."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
)
|
||||
|
||||
with patch.object(
|
||||
llm._responses_delegate, "acall", new_callable=AsyncMock, return_value="async responses result"
|
||||
) as mock_acall:
|
||||
result = await llm.acall("Hello async!")
|
||||
mock_acall.assert_called_once_with(
|
||||
messages="Hello async!",
|
||||
tools=None,
|
||||
callbacks=None,
|
||||
available_functions=None,
|
||||
from_task=None,
|
||||
from_agent=None,
|
||||
response_model=None,
|
||||
)
|
||||
assert result == "async responses result"
|
||||
|
||||
|
||||
def test_azure_responses_api_call_with_tools():
|
||||
"""Test that call() with api='responses' forwards tools to the delegate."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
)
|
||||
|
||||
tools = [
|
||||
{
|
||||
"name": "get_weather",
|
||||
"description": "Get weather",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"location": {"type": "string"}},
|
||||
"required": ["location"],
|
||||
},
|
||||
}
|
||||
]
|
||||
|
||||
available_functions = {"get_weather": lambda location: f"Sunny in {location}"}
|
||||
|
||||
with patch.object(llm._responses_delegate, "call", return_value="Weather result") as mock_call:
|
||||
result = llm.call(
|
||||
messages=[{"role": "user", "content": "What's the weather?"}],
|
||||
tools=tools,
|
||||
available_functions=available_functions,
|
||||
)
|
||||
mock_call.assert_called_once()
|
||||
call_kwargs = mock_call.call_args
|
||||
assert call_kwargs.kwargs["tools"] == tools
|
||||
assert call_kwargs.kwargs["available_functions"] == available_functions
|
||||
assert result == "Weather result"
|
||||
|
||||
|
||||
def test_azure_responses_api_completions_not_affected():
|
||||
"""Test that completions API path is unaffected when api='completions'."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
)
|
||||
|
||||
assert llm.api == "completions"
|
||||
assert llm._responses_delegate is None
|
||||
assert llm._client is not None
|
||||
assert llm._async_client is not None
|
||||
|
||||
|
||||
def test_azure_responses_api_via_llm_factory():
|
||||
"""Test that api='responses' works when creating via LLM factory."""
|
||||
llm = LLM(
|
||||
model="azure/gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
)
|
||||
|
||||
assert isinstance(llm, AzureCompletion)
|
||||
assert llm.api == "responses"
|
||||
assert llm._responses_delegate is not None
|
||||
|
||||
|
||||
def test_azure_responses_api_to_config_dict():
|
||||
"""Test that to_config_dict() includes api field when set to 'responses'."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
reasoning_effort="high",
|
||||
)
|
||||
|
||||
config = llm.to_config_dict()
|
||||
assert config["api"] == "responses"
|
||||
assert config["reasoning_effort"] == "high"
|
||||
|
||||
|
||||
def test_azure_completions_api_to_config_dict_no_api_field():
|
||||
"""Test that to_config_dict() does not include api when default 'completions'."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
)
|
||||
|
||||
config = llm.to_config_dict()
|
||||
assert "api" not in config
|
||||
|
||||
|
||||
def test_azure_responses_api_last_response_id():
|
||||
"""Test that last_response_id property delegates to the OpenAI delegate."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
auto_chain=True,
|
||||
)
|
||||
|
||||
# Initially None
|
||||
assert llm.last_response_id is None
|
||||
|
||||
# Mock the delegate's last_response_id
|
||||
llm._responses_delegate._last_response_id = "resp_xyz789"
|
||||
assert llm.last_response_id == "resp_xyz789"
|
||||
|
||||
|
||||
def test_azure_responses_api_reset_chain():
|
||||
"""Test that reset_chain() delegates to the OpenAI delegate."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
auto_chain=True,
|
||||
)
|
||||
|
||||
# Set a response ID on the delegate
|
||||
llm._responses_delegate._last_response_id = "resp_xyz789"
|
||||
assert llm.last_response_id == "resp_xyz789"
|
||||
|
||||
# Reset the chain
|
||||
llm.reset_chain()
|
||||
assert llm.last_response_id is None
|
||||
|
||||
|
||||
def test_azure_responses_api_last_response_id_without_delegate():
|
||||
"""Test that last_response_id returns None when no delegate (completions mode)."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
)
|
||||
|
||||
assert llm.last_response_id is None
|
||||
|
||||
|
||||
def test_azure_responses_api_reset_chain_without_delegate():
|
||||
"""Test that reset_chain() is a no-op when no delegate (completions mode)."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
)
|
||||
|
||||
# Should not raise
|
||||
llm.reset_chain()
|
||||
|
||||
|
||||
def test_azure_responses_api_with_structured_output():
|
||||
"""Test that structured output (response_model) is forwarded to the delegate."""
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
class MathAnswer(BaseModel):
|
||||
result: int = Field(description="The numerical result")
|
||||
explanation: str = Field(description="Brief explanation")
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
)
|
||||
|
||||
mock_answer = MathAnswer(result=42, explanation="The answer to everything")
|
||||
with patch.object(llm._responses_delegate, "call", return_value=mock_answer) as mock_call:
|
||||
result = llm.call("What is the answer?", response_model=MathAnswer)
|
||||
mock_call.assert_called_once()
|
||||
call_kwargs = mock_call.call_args
|
||||
assert call_kwargs is not None
|
||||
assert call_kwargs.kwargs["response_model"] == MathAnswer
|
||||
assert isinstance(result, MathAnswer)
|
||||
assert result.result == 42
|
||||
|
||||
|
||||
def test_azure_responses_api_streaming_forwarded():
|
||||
"""Test that stream=True is forwarded to the Responses API delegate."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
stream=True,
|
||||
)
|
||||
|
||||
assert llm._responses_delegate.stream is True
|
||||
|
||||
|
||||
def test_azure_responses_api_max_completion_tokens_forwarded():
|
||||
"""Test that max_completion_tokens is forwarded to the delegate."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
max_completion_tokens=500,
|
||||
)
|
||||
|
||||
assert llm._responses_delegate.max_completion_tokens == 500
|
||||
|
||||
|
||||
def test_azure_responses_api_default_api_version_in_url():
|
||||
"""Test that the api_version is included in the Responses API base URL."""
|
||||
with patch.dict(os.environ, {}, clear=False):
|
||||
# Remove AZURE_API_VERSION if set to ensure we get the code default
|
||||
env = os.environ.copy()
|
||||
env.pop("AZURE_API_VERSION", None)
|
||||
with patch.dict(os.environ, env, clear=True):
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
)
|
||||
|
||||
assert "api-version=" in llm._responses_delegate.base_url
|
||||
assert "api-version=2024-06-01" in llm._responses_delegate.base_url
|
||||
|
||||
|
||||
def test_azure_responses_api_custom_api_version_in_url():
|
||||
"""Test that custom api_version is used in URL when specified."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
api_version="2025-03-01-preview",
|
||||
)
|
||||
|
||||
assert "api-version=2025-03-01-preview" in llm._responses_delegate.base_url
|
||||
|
||||
|
||||
def test_azure_responses_api_no_chat_clients_created():
|
||||
"""Test that Chat Completions clients are NOT created when api='responses'."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
)
|
||||
|
||||
# In responses mode, the native Azure clients should not be initialized
|
||||
assert llm._client is None
|
||||
assert llm._async_client is None
|
||||
assert llm._responses_delegate is not None
|
||||
|
||||
|
||||
def test_azure_responses_api_stop_words_forwarded():
|
||||
"""Test that stop words are forwarded to the delegate."""
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
stop=["STOP"],
|
||||
)
|
||||
|
||||
assert llm._responses_delegate.stop == ["STOP"]
|
||||
|
||||
|
||||
def test_azure_responses_api_response_format_forwarded():
|
||||
"""Test that response_format is forwarded to the delegate."""
|
||||
from pydantic import BaseModel
|
||||
|
||||
class MyFormat(BaseModel):
|
||||
answer: str
|
||||
|
||||
llm = AzureCompletion(
|
||||
model="gpt-4o",
|
||||
api="responses",
|
||||
api_key="test-key",
|
||||
endpoint="https://my-resource.openai.azure.com",
|
||||
response_format=MyFormat,
|
||||
)
|
||||
|
||||
assert llm._responses_delegate.response_format == MyFormat
|
||||
|
||||
Reference in New Issue
Block a user