mirror of
https://github.com/crewAIInc/crewAI.git
synced 2025-12-16 04:18:35 +00:00
fix: Azure API json_schema response_format not supported (#3986)
This fix addresses issue #3986 where Azure AI Inference SDK doesn't support the json_schema response_format required for structured outputs. Changes: - Add supports_response_model() method to BaseLLM (default False) - Override supports_response_model() in OpenAI, Anthropic, Gemini providers - Azure provider returns False for supports_response_model() since the native SDK doesn't support json_schema response_format - Add _llm_supports_response_model() helper in converter.py that checks for supports_response_model() first, then falls back to supports_function_calling() for backwards compatibility with custom LLMs - Update Converter.to_pydantic(), to_json(), and get_conversion_instructions() to use the new helper function - Add comprehensive tests for the fix The fix separates 'supports function calling' from 'supports structured outputs' capabilities, allowing Azure to use function/tool calling while falling back to text-based JSON extraction for structured outputs. Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -179,6 +179,18 @@ class BaseLLM(ABC):
|
||||
"""
|
||||
return DEFAULT_SUPPORTS_STOP_WORDS
|
||||
|
||||
def supports_response_model(self) -> bool:
|
||||
"""Check if the LLM supports structured outputs via response_model.
|
||||
|
||||
This is separate from supports_function_calling() because some providers
|
||||
(like Azure AI Inference) support function/tool calling but do not support
|
||||
the json_schema response_format required for structured outputs.
|
||||
|
||||
Returns:
|
||||
True if the LLM supports structured outputs via response_model, False otherwise.
|
||||
"""
|
||||
return False
|
||||
|
||||
def _supports_stop_words_implementation(self) -> bool:
|
||||
"""Check if stop words are configured for this LLM instance.
|
||||
|
||||
|
||||
@@ -630,6 +630,13 @@ class AnthropicCompletion(BaseLLM):
|
||||
"""Check if the model supports function calling."""
|
||||
return self.supports_tools
|
||||
|
||||
def supports_response_model(self) -> bool:
|
||||
"""Check if the model supports structured outputs via response_model.
|
||||
|
||||
Anthropic models support structured outputs via tool-based approach with input_schema.
|
||||
"""
|
||||
return self.supports_tools
|
||||
|
||||
def supports_stop_words(self) -> bool:
|
||||
"""Check if the model supports stop words."""
|
||||
return True # All Claude models support stop sequences
|
||||
|
||||
@@ -556,6 +556,20 @@ class AzureCompletion(BaseLLM):
|
||||
# Azure OpenAI models support function calling
|
||||
return self.is_openai_model
|
||||
|
||||
def supports_response_model(self) -> bool:
|
||||
"""Check if the model supports structured outputs via response_model.
|
||||
|
||||
The Azure AI Inference SDK (azure.ai.inference.ChatCompletionsClient) does NOT
|
||||
support the json_schema response_format required for structured outputs.
|
||||
While Azure OpenAI models support function calling, the native SDK cannot
|
||||
handle response_format with json_schema type, causing errors like:
|
||||
"Unsupported `response_format` {'type': 'json_schema', ...}"
|
||||
|
||||
Users who need structured outputs with Azure should use the text-based
|
||||
JSON extraction fallback path instead.
|
||||
"""
|
||||
return False
|
||||
|
||||
def supports_stop_words(self) -> bool:
|
||||
"""Check if the model supports stop words."""
|
||||
return True # Most Azure models support stop sequences
|
||||
|
||||
@@ -541,6 +541,13 @@ class GeminiCompletion(BaseLLM):
|
||||
"""Check if the model supports function calling."""
|
||||
return self.supports_tools
|
||||
|
||||
def supports_response_model(self) -> bool:
|
||||
"""Check if the model supports structured outputs via response_model.
|
||||
|
||||
Gemini models support structured outputs via response_schema in generation config.
|
||||
"""
|
||||
return self.supports_tools
|
||||
|
||||
def supports_stop_words(self) -> bool:
|
||||
"""Check if the model supports stop words."""
|
||||
return True
|
||||
|
||||
@@ -568,6 +568,13 @@ class OpenAICompletion(BaseLLM):
|
||||
"""Check if the model supports function calling."""
|
||||
return not self.is_o1_model
|
||||
|
||||
def supports_response_model(self) -> bool:
|
||||
"""Check if the model supports structured outputs via response_model.
|
||||
|
||||
OpenAI models support structured outputs via the beta.chat.completions.parse API.
|
||||
"""
|
||||
return not self.is_o1_model
|
||||
|
||||
def supports_stop_words(self) -> bool:
|
||||
"""Check if the model supports stop words."""
|
||||
return not self.is_o1_model
|
||||
|
||||
@@ -25,6 +25,40 @@ _JSON_PATTERN: Final[re.Pattern[str]] = re.compile(r"({.*})", re.DOTALL)
|
||||
_I18N = get_i18n()
|
||||
|
||||
|
||||
def _llm_supports_response_model(llm: BaseLLM | LLM | Any) -> bool:
|
||||
"""Check if the LLM supports structured outputs via response_model.
|
||||
|
||||
This helper function checks for the new supports_response_model() method first,
|
||||
then falls back to supports_function_calling() for backwards compatibility with
|
||||
custom LLM implementations that haven't been updated yet.
|
||||
|
||||
Args:
|
||||
llm: The language model instance.
|
||||
|
||||
Returns:
|
||||
True if the LLM supports structured outputs via response_model, False otherwise.
|
||||
"""
|
||||
if llm is None or isinstance(llm, str):
|
||||
return False
|
||||
|
||||
# Check for the new supports_response_model method first
|
||||
if hasattr(llm, "supports_response_model"):
|
||||
try:
|
||||
return llm.supports_response_model()
|
||||
except TypeError:
|
||||
return False
|
||||
|
||||
# Backwards compatibility: fall back to supports_function_calling
|
||||
# for custom LLMs that haven't implemented supports_response_model yet
|
||||
if hasattr(llm, "supports_function_calling"):
|
||||
try:
|
||||
return llm.supports_function_calling()
|
||||
except TypeError:
|
||||
return False
|
||||
|
||||
return False
|
||||
|
||||
|
||||
class ConverterError(Exception):
|
||||
"""Error raised when Converter fails to parse the input."""
|
||||
|
||||
@@ -55,7 +89,7 @@ class Converter(OutputConverter):
|
||||
ConverterError: If conversion fails after maximum attempts.
|
||||
"""
|
||||
try:
|
||||
if self.llm.supports_function_calling():
|
||||
if _llm_supports_response_model(self.llm):
|
||||
response = self.llm.call(
|
||||
messages=[
|
||||
{"role": "system", "content": self.instructions},
|
||||
@@ -125,7 +159,7 @@ class Converter(OutputConverter):
|
||||
|
||||
"""
|
||||
try:
|
||||
if self.llm.supports_function_calling():
|
||||
if _llm_supports_response_model(self.llm):
|
||||
return self._create_instructor().to_json()
|
||||
return json.dumps(
|
||||
self.llm.call(
|
||||
@@ -346,12 +380,7 @@ def get_conversion_instructions(
|
||||
|
||||
"""
|
||||
instructions = ""
|
||||
if (
|
||||
llm
|
||||
and not isinstance(llm, str)
|
||||
and hasattr(llm, "supports_function_calling")
|
||||
and llm.supports_function_calling()
|
||||
):
|
||||
if _llm_supports_response_model(llm):
|
||||
schema_dict = generate_model_description(model)
|
||||
schema = json.dumps(schema_dict, indent=2)
|
||||
formatted_task_instructions = _I18N.slice("formatted_task_instructions").format(
|
||||
|
||||
@@ -960,3 +960,144 @@ def test_internal_instructor_real_unsupported_provider() -> None:
|
||||
|
||||
# Verify it's a configuration error about unsupported provider
|
||||
assert "Unsupported provider" in str(exc_info.value) or "unsupported" in str(exc_info.value).lower()
|
||||
|
||||
|
||||
# Tests for _llm_supports_response_model helper function
|
||||
# These tests cover GitHub issue #3986: Azure API doesn't support json_schema response_format
|
||||
from crewai.utilities.converter import _llm_supports_response_model
|
||||
|
||||
|
||||
def test_llm_supports_response_model_with_none() -> None:
|
||||
"""Test _llm_supports_response_model returns False for None."""
|
||||
assert _llm_supports_response_model(None) is False
|
||||
|
||||
|
||||
def test_llm_supports_response_model_with_string() -> None:
|
||||
"""Test _llm_supports_response_model returns False for string LLM."""
|
||||
assert _llm_supports_response_model("gpt-4o") is False
|
||||
|
||||
|
||||
def test_llm_supports_response_model_with_supports_response_model_true() -> None:
|
||||
"""Test _llm_supports_response_model returns True when supports_response_model() returns True."""
|
||||
mock_llm = Mock()
|
||||
mock_llm.supports_response_model.return_value = True
|
||||
assert _llm_supports_response_model(mock_llm) is True
|
||||
|
||||
|
||||
def test_llm_supports_response_model_with_supports_response_model_false() -> None:
|
||||
"""Test _llm_supports_response_model returns False when supports_response_model() returns False."""
|
||||
mock_llm = Mock()
|
||||
mock_llm.supports_response_model.return_value = False
|
||||
assert _llm_supports_response_model(mock_llm) is False
|
||||
|
||||
|
||||
def test_llm_supports_response_model_fallback_to_function_calling_true() -> None:
|
||||
"""Test _llm_supports_response_model falls back to supports_function_calling() when supports_response_model doesn't exist."""
|
||||
mock_llm = Mock(spec=['supports_function_calling'])
|
||||
mock_llm.supports_function_calling.return_value = True
|
||||
assert _llm_supports_response_model(mock_llm) is True
|
||||
|
||||
|
||||
def test_llm_supports_response_model_fallback_to_function_calling_false() -> None:
|
||||
"""Test _llm_supports_response_model falls back to supports_function_calling() when supports_response_model doesn't exist."""
|
||||
mock_llm = Mock(spec=['supports_function_calling'])
|
||||
mock_llm.supports_function_calling.return_value = False
|
||||
assert _llm_supports_response_model(mock_llm) is False
|
||||
|
||||
|
||||
def test_llm_supports_response_model_no_methods() -> None:
|
||||
"""Test _llm_supports_response_model returns False when LLM has neither method."""
|
||||
mock_llm = Mock(spec=[])
|
||||
assert _llm_supports_response_model(mock_llm) is False
|
||||
|
||||
|
||||
def test_llm_supports_response_model_azure_provider() -> None:
|
||||
"""Test that Azure provider returns False for supports_response_model.
|
||||
|
||||
This is the core fix for GitHub issue #3986: Azure AI Inference SDK doesn't
|
||||
support json_schema response_format, so we need to use the text-based fallback.
|
||||
"""
|
||||
from crewai.llms.providers.azure.completion import AzureCompletion
|
||||
|
||||
# Create a mock Azure completion instance
|
||||
azure_llm = Mock(spec=AzureCompletion)
|
||||
azure_llm.supports_response_model.return_value = False
|
||||
azure_llm.supports_function_calling.return_value = True
|
||||
|
||||
# Azure should return False for supports_response_model even though it supports function calling
|
||||
assert azure_llm.supports_response_model() is False
|
||||
assert azure_llm.supports_function_calling() is True
|
||||
|
||||
|
||||
def test_converter_uses_text_fallback_for_azure() -> None:
|
||||
"""Test that Converter uses text-based fallback when LLM doesn't support response_model.
|
||||
|
||||
This verifies the fix for GitHub issue #3986: when using Azure AI Inference SDK,
|
||||
the Converter should NOT pass response_model to the LLM call, but instead use
|
||||
the text-based JSON extraction fallback.
|
||||
"""
|
||||
# Mock Azure-like LLM that supports function calling but NOT response_model
|
||||
mock_llm = Mock()
|
||||
mock_llm.supports_response_model.return_value = False
|
||||
mock_llm.supports_function_calling.return_value = True
|
||||
mock_llm.call.return_value = '{"name": "Azure Test", "age": 42}'
|
||||
|
||||
instructions = get_conversion_instructions(SimpleModel, mock_llm)
|
||||
converter = Converter(
|
||||
llm=mock_llm,
|
||||
text="Name: Azure Test, Age: 42",
|
||||
model=SimpleModel,
|
||||
instructions=instructions,
|
||||
)
|
||||
|
||||
output = converter.to_pydantic()
|
||||
|
||||
# Verify the output is correct
|
||||
assert isinstance(output, SimpleModel)
|
||||
assert output.name == "Azure Test"
|
||||
assert output.age == 42
|
||||
|
||||
# Verify that call was made WITHOUT response_model parameter
|
||||
# (text-based fallback path)
|
||||
mock_llm.call.assert_called_once()
|
||||
call_args = mock_llm.call.call_args
|
||||
# The text-based fallback passes messages as a list, not as keyword argument
|
||||
assert call_args[0][0] == [
|
||||
{"role": "system", "content": instructions},
|
||||
{"role": "user", "content": "Name: Azure Test, Age: 42"},
|
||||
]
|
||||
# Verify response_model was NOT passed
|
||||
assert "response_model" not in call_args[1] if call_args[1] else True
|
||||
|
||||
|
||||
def test_converter_uses_response_model_for_openai() -> None:
|
||||
"""Test that Converter uses response_model when LLM supports it.
|
||||
|
||||
This verifies that OpenAI and other providers that support structured outputs
|
||||
still use the response_model path for better performance.
|
||||
"""
|
||||
# Mock OpenAI-like LLM that supports both function calling AND response_model
|
||||
mock_llm = Mock()
|
||||
mock_llm.supports_response_model.return_value = True
|
||||
mock_llm.supports_function_calling.return_value = True
|
||||
mock_llm.call.return_value = '{"name": "OpenAI Test", "age": 35}'
|
||||
|
||||
instructions = get_conversion_instructions(SimpleModel, mock_llm)
|
||||
converter = Converter(
|
||||
llm=mock_llm,
|
||||
text="Name: OpenAI Test, Age: 35",
|
||||
model=SimpleModel,
|
||||
instructions=instructions,
|
||||
)
|
||||
|
||||
output = converter.to_pydantic()
|
||||
|
||||
# Verify the output is correct
|
||||
assert isinstance(output, SimpleModel)
|
||||
assert output.name == "OpenAI Test"
|
||||
assert output.age == 35
|
||||
|
||||
# Verify that call was made WITH response_model parameter
|
||||
mock_llm.call.assert_called_once()
|
||||
call_args = mock_llm.call.call_args
|
||||
assert call_args[1].get("response_model") == SimpleModel
|
||||
|
||||
Reference in New Issue
Block a user