diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index c4e4dd549..40850193d 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -167,6 +167,7 @@ class AnthropicCompletion(BaseLLM): thinking: AnthropicThinkingConfig | None = None, response_format: type[BaseModel] | None = None, tool_search: AnthropicToolSearchConfig | bool | None = None, + api: Literal["completions", "responses"] = "completions", **kwargs: Any, ): """Initialize Anthropic chat completion client. @@ -192,6 +193,16 @@ class AnthropicCompletion(BaseLLM): and a tool search tool is injected into the tools list. **kwargs: Additional parameters """ + if api == "responses": + raise NotImplementedError( + "The Responses API is not supported by Anthropic provider. " + "Anthropic uses the Messages API natively. " + "The Responses API is available for OpenAI and Azure OpenAI providers. " + "Use api='completions' (default) with Anthropic." + ) + + self.api = api + super().__init__( model=model, temperature=temperature, stop=stop_sequences or [], **kwargs ) diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index 00c10112d..3913d844a 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -3,7 +3,7 @@ from __future__ import annotations import json import logging import os -from typing import TYPE_CHECKING, Any, TypedDict +from typing import TYPE_CHECKING, Any, Literal, TypedDict from pydantic import BaseModel from typing_extensions import Self @@ -89,10 +89,22 @@ class AzureCompletion(BaseLLM): frequency_penalty: float | None = None, presence_penalty: float | None = None, max_tokens: int | None = None, + max_completion_tokens: int | None = None, stop: list[str] | None = None, stream: bool = False, interceptor: BaseInterceptor[Any, Any] | None = None, response_format: type[BaseModel] | None = None, + api: Literal["completions", "responses"] = "completions", + instructions: str | None = None, + store: bool | None = None, + previous_response_id: str | None = None, + include: list[str] | None = None, + builtin_tools: list[str] | None = None, + parse_tool_outputs: bool = False, + auto_chain: bool = False, + auto_chain_reasoning: bool = False, + seed: int | None = None, + reasoning_effort: str | None = None, **kwargs: Any, ): """Initialize Azure AI Inference chat completion client. @@ -109,12 +121,25 @@ class AzureCompletion(BaseLLM): frequency_penalty: Frequency penalty (-2 to 2) presence_penalty: Presence penalty (-2 to 2) max_tokens: Maximum tokens in response + max_completion_tokens: Maximum completion tokens (used by Responses API) stop: Stop sequences stream: Enable streaming responses interceptor: HTTP interceptor (not yet supported for Azure). response_format: Pydantic model for structured output. Used as default when response_model is not passed to call()/acall() methods. Only works with OpenAI models deployed on Azure. + api: Which API to use - 'completions' for Chat Completions (default), + 'responses' for OpenAI Responses API (requires Azure OpenAI endpoint). + instructions: System instructions for Responses API. + store: Whether to store the response for multi-turn (Responses API). + previous_response_id: Response ID for multi-turn conversations (Responses API). + include: Additional output types to include (Responses API). + builtin_tools: Built-in tools like 'web_search', 'file_search' (Responses API). + parse_tool_outputs: Return structured ResponsesAPIResult (Responses API). + auto_chain: Automatically chain responses using response IDs (Responses API). + auto_chain_reasoning: Auto-chain with reasoning items for ZDR (Responses API). + seed: Random seed for deterministic outputs. + reasoning_effort: Reasoning effort level for reasoning models. **kwargs: Additional parameters """ if interceptor is not None: @@ -147,10 +172,36 @@ class AzureCompletion(BaseLLM): "Azure endpoint is required. Set AZURE_ENDPOINT environment variable or pass endpoint parameter." ) + # Responses API parameters + self.api = api + self.instructions = instructions + self.store = store + self.previous_response_id = previous_response_id + self.include = include + self.builtin_tools = builtin_tools + self.parse_tool_outputs = parse_tool_outputs + self.auto_chain = auto_chain + self.auto_chain_reasoning = auto_chain_reasoning + self.max_completion_tokens = max_completion_tokens + self.seed = seed + self.reasoning_effort = reasoning_effort + + # Auto-chain state tracking + self._last_response_id: str | None = None + self._last_reasoning_items: list[Any] = [] + + # Built-in tool type mapping (same as OpenAI) + self.BUILTIN_TOOL_TYPES: dict[str, str] = { + "web_search": "web_search_preview", + "file_search": "file_search", + "code_interpreter": "code_interpreter", + "computer_use": "computer_use_preview", + } + # Validate and potentially fix Azure OpenAI endpoint URL self.endpoint = self._validate_and_fix_endpoint(self.endpoint, model) - # Build client kwargs + # Build client kwargs for Azure AI Inference (Chat Completions) client_kwargs = { "endpoint": self.endpoint, "credential": AzureKeyCredential(self.api_key), @@ -164,6 +215,10 @@ class AzureCompletion(BaseLLM): self.async_client = AsyncChatCompletionsClient(**client_kwargs) # type: ignore[arg-type] + # If using Responses API, also create OpenAI AzureOpenAI clients + if self.api == "responses": + self._init_responses_clients() + self.top_p = top_p self.frequency_penalty = frequency_penalty self.presence_penalty = presence_penalty @@ -180,6 +235,39 @@ class AzureCompletion(BaseLLM): and "/openai/deployments/" in self.endpoint ) + def _init_responses_clients(self) -> None: + """Initialize OpenAI AzureOpenAI clients for Responses API. + + The Responses API requires the OpenAI SDK's AzureOpenAI client, + which supports the same responses.create() interface as the + regular OpenAI client. + """ + try: + from openai import AsyncAzureOpenAI, AzureOpenAI + except ImportError: + raise ImportError( + "The 'openai' package is required for Azure Responses API support. " + "Install it with: uv add 'crewai[openai]' or pip install openai" + ) from None + + # Extract the base Azure endpoint (without /openai/deployments/...) + azure_endpoint = self.endpoint + if "/openai/deployments/" in azure_endpoint: + azure_endpoint = azure_endpoint.split("/openai/deployments/")[0] + + responses_kwargs: dict[str, Any] = { + "api_key": self.api_key, + "azure_endpoint": azure_endpoint, + "api_version": self.api_version or "2025-03-01-preview", + } + + if self.timeout is not None: + responses_kwargs["timeout"] = self.timeout + responses_kwargs["max_retries"] = self.max_retries + + self.responses_client = AzureOpenAI(**responses_kwargs) + self.async_responses_client = AsyncAzureOpenAI(**responses_kwargs) + @staticmethod def _validate_and_fix_endpoint(endpoint: str, model: str) -> str: """Validate and fix Azure endpoint URL format. @@ -269,6 +357,24 @@ class AzureCompletion(BaseLLM): ) raise error + @property + def last_response_id(self) -> str | None: + """Get the last response ID for auto-chaining.""" + return self._last_response_id + + def reset_chain(self) -> None: + """Reset the auto-chain state.""" + self._last_response_id = None + + @property + def last_reasoning_items(self) -> list[Any]: + """Get the last reasoning items for ZDR auto-chaining.""" + return self._last_reasoning_items + + def reset_reasoning_chain(self) -> None: + """Reset the reasoning auto-chain state.""" + self._last_reasoning_items = [] + def call( self, messages: str | list[LLMMessage], @@ -315,6 +421,17 @@ class AzureCompletion(BaseLLM): ): raise ValueError("LLM call blocked by before_llm_call hook") + # Route to Responses API if configured + if self.api == "responses": + return self._call_responses( + messages=formatted_messages, + tools=tools, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + response_model=effective_response_model, + ) + # Prepare completion parameters completion_params = self._prepare_completion_params( formatted_messages, tools, effective_response_model @@ -380,6 +497,17 @@ class AzureCompletion(BaseLLM): formatted_messages = self._format_messages_for_azure(messages) + # Route to Responses API if configured + if self.api == "responses": + return await self._acall_responses( + messages=formatted_messages, + tools=tools, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + response_model=effective_response_model, + ) + completion_params = self._prepare_completion_params( formatted_messages, tools, effective_response_model ) @@ -1003,6 +1131,888 @@ class AzureCompletion(BaseLLM): response_model=response_model, ) + # ========================================================================= + # Responses API methods + # ========================================================================= + + def _call_responses( + self, + messages: list[LLMMessage], + tools: list[dict[str, Any]] | None = None, + available_functions: dict[str, Any] | None = None, + from_task: Any | None = None, + from_agent: Any | None = None, + response_model: type[BaseModel] | None = None, + ) -> str | Any: + """Call Azure OpenAI Responses API.""" + params = self._prepare_responses_params( + messages=messages, tools=tools, response_model=response_model + ) + + if self.stream: + return self._handle_streaming_responses( + params=params, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + response_model=response_model, + ) + + return self._handle_responses( + params=params, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + response_model=response_model, + ) + + async def _acall_responses( + self, + messages: list[LLMMessage], + tools: list[dict[str, Any]] | None = None, + available_functions: dict[str, Any] | None = None, + from_task: Any | None = None, + from_agent: Any | None = None, + response_model: type[BaseModel] | None = None, + ) -> str | Any: + """Async call to Azure OpenAI Responses API.""" + params = self._prepare_responses_params( + messages=messages, tools=tools, response_model=response_model + ) + + if self.stream: + return await self._ahandle_streaming_responses( + params=params, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + response_model=response_model, + ) + + return await self._ahandle_responses( + params=params, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + response_model=response_model, + ) + + def _prepare_responses_params( + self, + messages: list[LLMMessage], + tools: list[dict[str, Any]] | None = None, + response_model: type[BaseModel] | None = None, + ) -> dict[str, Any]: + """Prepare parameters for Azure OpenAI Responses API. + + The Responses API uses a different structure than Chat Completions: + - `input` instead of `messages` + - `instructions` for system-level guidance (extracted from system messages) + - `text.format` instead of `response_format` for structured outputs + - Internally-tagged tool format (flat structure) + """ + instructions: str | None = self.instructions + input_messages: list[LLMMessage] = [] + + for message in messages: + if message.get("role") == "system": + content = message.get("content", "") + content_str = content if isinstance(content, str) else str(content) + if instructions: + instructions = f"{instructions}\n\n{content_str}" + else: + instructions = content_str + else: + input_messages.append(message) + + # Prepare input with optional reasoning items for ZDR chaining + final_input: list[Any] = [] + if self.auto_chain_reasoning and self._last_reasoning_items: + final_input.extend(self._last_reasoning_items) + final_input.extend(input_messages if input_messages else messages) + + params: dict[str, Any] = { + "model": self.model, + "input": final_input, + } + + if instructions: + params["instructions"] = instructions + + if self.stream: + params["stream"] = True + + if self.store is not None: + params["store"] = self.store + + # Handle response chaining: explicit previous_response_id takes precedence + if self.previous_response_id: + params["previous_response_id"] = self.previous_response_id + elif self.auto_chain and self._last_response_id: + params["previous_response_id"] = self._last_response_id + + # Handle include parameter with auto_chain_reasoning support + include_items: list[str] = list(self.include) if self.include else [] + if self.auto_chain_reasoning: + if "reasoning.encrypted_content" not in include_items: + include_items.append("reasoning.encrypted_content") + if include_items: + params["include"] = include_items + + params.update(self.additional_params) + + if self.temperature is not None: + params["temperature"] = self.temperature + if self.top_p is not None: + params["top_p"] = self.top_p + if self.max_completion_tokens is not None: + params["max_output_tokens"] = self.max_completion_tokens + elif self.max_tokens is not None: + params["max_output_tokens"] = self.max_tokens + if self.seed is not None: + params["seed"] = self.seed + + if self.reasoning_effort: + params["reasoning"] = {"effort": self.reasoning_effort} + + if response_model or self.response_format: + format_model = response_model or self.response_format + if isinstance(format_model, type) and issubclass(format_model, BaseModel): + schema_output = generate_model_description(format_model) + json_schema = schema_output.get("json_schema", {}) + params["text"] = { + "format": { + "type": "json_schema", + "name": json_schema.get("name", format_model.__name__), + "strict": json_schema.get("strict", True), + "schema": json_schema.get("schema", {}), + } + } + elif isinstance(format_model, dict): + params["text"] = {"format": format_model} + + all_tools: list[dict[str, Any]] = [] + + if self.builtin_tools: + for tool_name in self.builtin_tools: + tool_type = self.BUILTIN_TOOL_TYPES.get(tool_name, tool_name) + all_tools.append({"type": tool_type}) + + if tools: + all_tools.extend(self._convert_tools_for_responses(tools)) + + if all_tools: + params["tools"] = all_tools + + crewai_specific_params = { + "callbacks", + "available_functions", + "from_task", + "from_agent", + "provider", + "api_key", + "base_url", + "api_base", + "timeout", + } + + return {k: v for k, v in params.items() if k not in crewai_specific_params} + + def _convert_tools_for_responses( + self, tools: list[dict[str, Any]] + ) -> list[dict[str, Any]]: + """Convert CrewAI tools to Responses API format (internally-tagged). + + Responses API uses flat structure: + {"type": "function", "name": "...", "description": "...", "parameters": {...}} + """ + from crewai.llms.providers.utils.common import safe_tool_conversion + + responses_tools = [] + + for tool in tools: + name, description, parameters = safe_tool_conversion(tool, "Azure") + + responses_tool: dict[str, Any] = { + "type": "function", + "name": name, + "description": description, + } + + if parameters: + if isinstance(parameters, dict): + responses_tool["parameters"] = parameters + else: + responses_tool["parameters"] = dict(parameters) + + responses_tools.append(responses_tool) + + return responses_tools + + def _handle_responses( + self, + params: dict[str, Any], + available_functions: dict[str, Any] | None = None, + from_task: Any | None = None, + from_agent: Any | None = None, + response_model: type[BaseModel] | None = None, + ) -> str | Any: + """Handle non-streaming Responses API call.""" + from openai.types.responses import Response + + try: + response: Response = self.responses_client.responses.create(**params) + + # Track response ID for auto-chaining + if self.auto_chain and response.id: + self._last_response_id = response.id + + # Track reasoning items for ZDR auto-chaining + if self.auto_chain_reasoning: + reasoning_items = self._extract_reasoning_items(response) + if reasoning_items: + self._last_reasoning_items = reasoning_items + + usage = self._extract_responses_token_usage(response) + self._track_token_usage_internal(usage) + + # If parse_tool_outputs is enabled, return structured result + if self.parse_tool_outputs: + parsed_result = self._extract_builtin_tool_outputs(response) + parsed_result.text = self._apply_stop_words(parsed_result.text) + + self._emit_call_completed_event( + response=parsed_result.text, + call_type=LLMCallType.LLM_CALL, + from_task=from_task, + from_agent=from_agent, + messages=params.get("input", []), + ) + + return parsed_result + + function_calls = self._extract_function_calls_from_response(response) + if function_calls and not available_functions: + self._emit_call_completed_event( + response=function_calls, + call_type=LLMCallType.TOOL_CALL, + from_task=from_task, + from_agent=from_agent, + messages=params.get("input", []), + ) + return function_calls + + if function_calls and available_functions: + for call in function_calls: + function_name = call.get("name", "") + function_args = call.get("arguments", {}) + if isinstance(function_args, str): + try: + function_args = json.loads(function_args) + except json.JSONDecodeError: + function_args = {} + + result = self._handle_tool_execution( + function_name=function_name, + function_args=function_args, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + ) + + if result is not None: + return result + + content = response.output_text or "" + + if response_model: + try: + structured_result = self._validate_structured_output( + content, response_model + ) + self._emit_call_completed_event( + response=structured_result, + call_type=LLMCallType.LLM_CALL, + from_task=from_task, + from_agent=from_agent, + messages=params.get("input", []), + ) + return structured_result + except ValueError as e: + logging.warning(f"Structured output validation failed: {e}") + + content = self._apply_stop_words(content) + + self._emit_call_completed_event( + response=content, + call_type=LLMCallType.LLM_CALL, + from_task=from_task, + from_agent=from_agent, + messages=params.get("input", []), + ) + + content = self._invoke_after_llm_call_hooks( + params.get("input", []), content, from_agent + ) + + except Exception as e: + if is_context_length_exceeded(e): + logging.error(f"Context window exceeded: {e}") + raise LLMContextLengthExceededError(str(e)) from e + + error_msg = f"Azure Responses API call failed: {e!s}" + logging.error(error_msg) + self._emit_call_failed_event( + error=error_msg, from_task=from_task, from_agent=from_agent + ) + raise + + return content + + async def _ahandle_responses( + self, + params: dict[str, Any], + available_functions: dict[str, Any] | None = None, + from_task: Any | None = None, + from_agent: Any | None = None, + response_model: type[BaseModel] | None = None, + ) -> str | Any: + """Handle async non-streaming Responses API call.""" + from openai.types.responses import Response + + try: + response: Response = await self.async_responses_client.responses.create(**params) + + # Track response ID for auto-chaining + if self.auto_chain and response.id: + self._last_response_id = response.id + + # Track reasoning items for ZDR auto-chaining + if self.auto_chain_reasoning: + reasoning_items = self._extract_reasoning_items(response) + if reasoning_items: + self._last_reasoning_items = reasoning_items + + usage = self._extract_responses_token_usage(response) + self._track_token_usage_internal(usage) + + # If parse_tool_outputs is enabled, return structured result + if self.parse_tool_outputs: + parsed_result = self._extract_builtin_tool_outputs(response) + parsed_result.text = self._apply_stop_words(parsed_result.text) + + self._emit_call_completed_event( + response=parsed_result.text, + call_type=LLMCallType.LLM_CALL, + from_task=from_task, + from_agent=from_agent, + messages=params.get("input", []), + ) + + return parsed_result + + function_calls = self._extract_function_calls_from_response(response) + if function_calls and not available_functions: + self._emit_call_completed_event( + response=function_calls, + call_type=LLMCallType.TOOL_CALL, + from_task=from_task, + from_agent=from_agent, + messages=params.get("input", []), + ) + return function_calls + + if function_calls and available_functions: + for call in function_calls: + function_name = call.get("name", "") + function_args = call.get("arguments", {}) + if isinstance(function_args, str): + try: + function_args = json.loads(function_args) + except json.JSONDecodeError: + function_args = {} + + result = self._handle_tool_execution( + function_name=function_name, + function_args=function_args, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + ) + + if result is not None: + return result + + content = response.output_text or "" + + if response_model: + try: + structured_result = self._validate_structured_output( + content, response_model + ) + self._emit_call_completed_event( + response=structured_result, + call_type=LLMCallType.LLM_CALL, + from_task=from_task, + from_agent=from_agent, + messages=params.get("input", []), + ) + return structured_result + except ValueError as e: + logging.warning(f"Structured output validation failed: {e}") + + content = self._apply_stop_words(content) + + self._emit_call_completed_event( + response=content, + call_type=LLMCallType.LLM_CALL, + from_task=from_task, + from_agent=from_agent, + messages=params.get("input", []), + ) + + except Exception as e: + if is_context_length_exceeded(e): + logging.error(f"Context window exceeded: {e}") + raise LLMContextLengthExceededError(str(e)) from e + + error_msg = f"Azure Responses API call failed: {e!s}" + logging.error(error_msg) + self._emit_call_failed_event( + error=error_msg, from_task=from_task, from_agent=from_agent + ) + raise + + return content + + def _handle_streaming_responses( + self, + params: dict[str, Any], + available_functions: dict[str, Any] | None = None, + from_task: Any | None = None, + from_agent: Any | None = None, + response_model: type[BaseModel] | None = None, + ) -> str | Any: + """Handle streaming Responses API call.""" + from openai.types.responses import Response + + full_response = "" + function_calls: list[dict[str, Any]] = [] + final_response: Response | None = None + + stream = self.responses_client.responses.create(**params) + response_id_stream = None + + for event in stream: + if event.type == "response.created": + response_id_stream = event.response.id + + if event.type == "response.output_text.delta": + delta_text = event.delta or "" + full_response += delta_text + self._emit_stream_chunk_event( + chunk=delta_text, + from_task=from_task, + from_agent=from_agent, + response_id=response_id_stream, + ) + + elif event.type == "response.function_call_arguments.delta": + pass + + elif event.type == "response.output_item.done": + item = event.item + if item.type == "function_call": + function_calls.append( + { + "id": item.call_id, + "name": item.name, + "arguments": item.arguments, + } + ) + + elif event.type == "response.completed": + final_response = event.response + if self.auto_chain and event.response and event.response.id: + self._last_response_id = event.response.id + if self.auto_chain_reasoning and event.response: + reasoning_items = self._extract_reasoning_items(event.response) + if reasoning_items: + self._last_reasoning_items = reasoning_items + if event.response and event.response.usage: + usage = self._extract_responses_token_usage(event.response) + self._track_token_usage_internal(usage) + + # If parse_tool_outputs is enabled, return structured result + if self.parse_tool_outputs and final_response: + parsed_result = self._extract_builtin_tool_outputs(final_response) + parsed_result.text = self._apply_stop_words(parsed_result.text) + + self._emit_call_completed_event( + response=parsed_result.text, + call_type=LLMCallType.LLM_CALL, + from_task=from_task, + from_agent=from_agent, + messages=params.get("input", []), + ) + + return parsed_result + + if function_calls and available_functions: + for call in function_calls: + function_name = call.get("name", "") + function_args = call.get("arguments", {}) + if isinstance(function_args, str): + try: + function_args = json.loads(function_args) + except json.JSONDecodeError: + function_args = {} + + result = self._handle_tool_execution( + function_name=function_name, + function_args=function_args, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + ) + + if result is not None: + return result + + if response_model: + try: + structured_result = self._validate_structured_output( + full_response, response_model + ) + self._emit_call_completed_event( + response=structured_result, + call_type=LLMCallType.LLM_CALL, + from_task=from_task, + from_agent=from_agent, + messages=params.get("input", []), + ) + return structured_result + except ValueError as e: + logging.warning(f"Structured output validation failed: {e}") + + full_response = self._apply_stop_words(full_response) + + self._emit_call_completed_event( + response=full_response, + call_type=LLMCallType.LLM_CALL, + from_task=from_task, + from_agent=from_agent, + messages=params.get("input", []), + ) + + return self._invoke_after_llm_call_hooks( + params.get("input", []), full_response, from_agent + ) + + async def _ahandle_streaming_responses( + self, + params: dict[str, Any], + available_functions: dict[str, Any] | None = None, + from_task: Any | None = None, + from_agent: Any | None = None, + response_model: type[BaseModel] | None = None, + ) -> str | Any: + """Handle async streaming Responses API call.""" + from openai.types.responses import Response + + full_response = "" + function_calls: list[dict[str, Any]] = [] + final_response: Response | None = None + + stream = await self.async_responses_client.responses.create(**params) + response_id_stream = None + + async for event in stream: + if event.type == "response.created": + response_id_stream = event.response.id + + if event.type == "response.output_text.delta": + delta_text = event.delta or "" + full_response += delta_text + self._emit_stream_chunk_event( + chunk=delta_text, + from_task=from_task, + from_agent=from_agent, + response_id=response_id_stream, + ) + + elif event.type == "response.function_call_arguments.delta": + pass + + elif event.type == "response.output_item.done": + item = event.item + if item.type == "function_call": + function_calls.append( + { + "id": item.call_id, + "name": item.name, + "arguments": item.arguments, + } + ) + + elif event.type == "response.completed": + final_response = event.response + if self.auto_chain and event.response and event.response.id: + self._last_response_id = event.response.id + if self.auto_chain_reasoning and event.response: + reasoning_items = self._extract_reasoning_items(event.response) + if reasoning_items: + self._last_reasoning_items = reasoning_items + if event.response and event.response.usage: + usage = self._extract_responses_token_usage(event.response) + self._track_token_usage_internal(usage) + + # If parse_tool_outputs is enabled, return structured result + if self.parse_tool_outputs and final_response: + parsed_result = self._extract_builtin_tool_outputs(final_response) + parsed_result.text = self._apply_stop_words(parsed_result.text) + + self._emit_call_completed_event( + response=parsed_result.text, + call_type=LLMCallType.LLM_CALL, + from_task=from_task, + from_agent=from_agent, + messages=params.get("input", []), + ) + + return parsed_result + + if function_calls and available_functions: + for call in function_calls: + function_name = call.get("name", "") + function_args = call.get("arguments", {}) + if isinstance(function_args, str): + try: + function_args = json.loads(function_args) + except json.JSONDecodeError: + function_args = {} + + result = self._handle_tool_execution( + function_name=function_name, + function_args=function_args, + available_functions=available_functions, + from_task=from_task, + from_agent=from_agent, + ) + + if result is not None: + return result + + if response_model: + try: + structured_result = self._validate_structured_output( + full_response, response_model + ) + self._emit_call_completed_event( + response=structured_result, + call_type=LLMCallType.LLM_CALL, + from_task=from_task, + from_agent=from_agent, + messages=params.get("input", []), + ) + return structured_result + except ValueError as e: + logging.warning(f"Structured output validation failed: {e}") + + full_response = self._apply_stop_words(full_response) + + self._emit_call_completed_event( + response=full_response, + call_type=LLMCallType.LLM_CALL, + from_task=from_task, + from_agent=from_agent, + messages=params.get("input", []), + ) + + return full_response + + def _extract_function_calls_from_response(self, response: Any) -> list[dict[str, Any]]: + """Extract function calls from Responses API output.""" + return [ + { + "id": item.call_id, + "name": item.name, + "arguments": item.arguments, + } + for item in response.output + if item.type == "function_call" + ] + + def _extract_responses_token_usage(self, response: Any) -> dict[str, Any]: + """Extract token usage from Responses API response.""" + if response.usage: + result = { + "prompt_tokens": response.usage.input_tokens, + "completion_tokens": response.usage.output_tokens, + "total_tokens": response.usage.total_tokens, + } + input_details = getattr(response.usage, "input_tokens_details", None) + if input_details: + result["cached_prompt_tokens"] = ( + getattr(input_details, "cached_tokens", 0) or 0 + ) + return result + return {"total_tokens": 0} + + def _extract_builtin_tool_outputs(self, response: Any) -> Any: + """Extract and parse all built-in tool outputs from Responses API. + + Returns a ResponsesAPIResult from the OpenAI provider module. + """ + from crewai.llms.providers.openai.completion import ( + CodeInterpreterFileResult, + CodeInterpreterLogResult, + CodeInterpreterResult, + ComputerUseResult, + FileSearchResult, + FileSearchResultItem, + ReasoningSummary, + ResponsesAPIResult, + WebSearchResult, + ) + + result = ResponsesAPIResult( + text=response.output_text or "", + response_id=response.id, + ) + + for item in response.output: + item_type = item.type + + if item_type == "web_search_call": + result.web_search_results.append( + WebSearchResult( + id=item.id, + status=item.status, # type: ignore[union-attr] + type=item_type, + ) + ) + + elif item_type == "file_search_call": + file_results: list[FileSearchResultItem] = ( + [ + FileSearchResultItem( + file_id=r.file_id, # type: ignore[union-attr] + filename=r.filename, # type: ignore[union-attr] + text=r.text, # type: ignore[union-attr] + score=r.score, # type: ignore[union-attr] + attributes=r.attributes, # type: ignore[union-attr] + ) + for r in item.results # type: ignore[union-attr] + ] + if item.results # type: ignore[union-attr] + else [] + ) + result.file_search_results.append( + FileSearchResult( + id=item.id, + status=item.status, # type: ignore[union-attr] + type=item_type, + queries=list(item.queries), # type: ignore[union-attr] + results=file_results, + ) + ) + + elif item_type == "code_interpreter_call": + code_results: list[ + CodeInterpreterLogResult | CodeInterpreterFileResult + ] = [] + for r in item.results: # type: ignore[union-attr] + if r.type == "logs": # type: ignore[union-attr] + code_results.append( + CodeInterpreterLogResult(type="logs", logs=r.logs) # type: ignore[union-attr] + ) + elif r.type == "files": # type: ignore[union-attr] + files_data = [ + {"file_id": f.file_id, "mime_type": f.mime_type} + for f in r.files # type: ignore[union-attr] + ] + code_results.append( + CodeInterpreterFileResult(type="files", files=files_data) + ) + result.code_interpreter_results.append( + CodeInterpreterResult( + id=item.id, + status=item.status, # type: ignore[union-attr] + type=item_type, + code=item.code, # type: ignore[union-attr] + container_id=item.container_id, # type: ignore[union-attr] + results=code_results, + ) + ) + + elif item_type == "computer_call": + action_dict = item.action.model_dump() if item.action else {} # type: ignore[union-attr] + safety_checks = [ + {"id": c.id, "code": c.code, "message": c.message} + for c in item.pending_safety_checks # type: ignore[union-attr] + ] + result.computer_use_results.append( + ComputerUseResult( + id=item.id, + status=item.status, # type: ignore[union-attr] + type=item_type, + call_id=item.call_id, # type: ignore[union-attr] + action=action_dict, + pending_safety_checks=safety_checks, + ) + ) + + elif item_type == "reasoning": + summaries = [{"type": s.type, "text": s.text} for s in item.summary] # type: ignore[union-attr] + result.reasoning_summaries.append( + ReasoningSummary( + id=item.id, + status=item.status, # type: ignore[union-attr] + type=item_type, + summary=summaries, + encrypted_content=item.encrypted_content, # type: ignore[union-attr] + ) + ) + + elif item_type == "function_call": + result.function_calls.append( + { + "id": item.call_id, # type: ignore[union-attr] + "name": item.name, # type: ignore[union-attr] + "arguments": item.arguments, # type: ignore[union-attr] + } + ) + + return result + + def _extract_reasoning_items(self, response: Any) -> list[Any]: + """Extract reasoning items with encrypted content from response.""" + return [item for item in response.output if item.type == "reasoning"] + + def _validate_structured_output( + self, content: str, response_model: type[BaseModel] + ) -> BaseModel: + """Validate and parse structured output content against a Pydantic model. + + Args: + content: JSON string content from the response + response_model: Pydantic model class for validation + + Returns: + Validated Pydantic model instance + + Raises: + ValueError: If content cannot be parsed/validated + """ + try: + return response_model.model_validate_json(content) + except Exception as e: + raise ValueError( + f"Failed to validate structured output with model " + f"{response_model.__name__}: {e}" + ) from e + def supports_function_calling(self) -> bool: """Check if the model supports function calling.""" # Azure OpenAI models support function calling diff --git a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py index 17f2dbd44..4477ecb05 100644 --- a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py +++ b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py @@ -5,7 +5,7 @@ from contextlib import AsyncExitStack import json import logging import os -from typing import TYPE_CHECKING, Any, TypedDict, cast +from typing import TYPE_CHECKING, Any, Literal, TypedDict, cast from pydantic import BaseModel from typing_extensions import Required @@ -246,6 +246,7 @@ class BedrockCompletion(BaseLLM): additional_model_response_field_paths: list[str] | None = None, interceptor: BaseInterceptor[Any, Any] | None = None, response_format: type[BaseModel] | None = None, + api: Literal["completions", "responses"] = "completions", **kwargs: Any, ) -> None: """Initialize AWS Bedrock completion client. @@ -270,6 +271,16 @@ class BedrockCompletion(BaseLLM): response_model is not passed to call()/acall() methods. **kwargs: Additional parameters """ + if api == "responses": + raise NotImplementedError( + "The Responses API is not supported by AWS Bedrock provider. " + "Bedrock uses the Converse API natively. " + "The Responses API is available for OpenAI and Azure OpenAI providers. " + "Use api='completions' (default) with Bedrock." + ) + + self.api = api + if interceptor is not None: raise NotImplementedError( "HTTP interceptors are not yet supported for AWS Bedrock provider. " diff --git a/lib/crewai/src/crewai/llms/providers/gemini/completion.py b/lib/crewai/src/crewai/llms/providers/gemini/completion.py index fd0530abe..fc88245c8 100644 --- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py +++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py @@ -62,6 +62,7 @@ class GeminiCompletion(BaseLLM): use_vertexai: bool | None = None, response_format: type[BaseModel] | None = None, thinking_config: types.ThinkingConfig | None = None, + api: Literal["completions", "responses"] = "completions", **kwargs: Any, ): """Initialize Google Gemini chat completion client. @@ -100,6 +101,16 @@ class GeminiCompletion(BaseLLM): get include_thoughts=True so thought content is surfaced. **kwargs: Additional parameters """ + if api == "responses": + raise NotImplementedError( + "The Responses API is not supported by Google Gemini provider. " + "Gemini uses the generate_content API natively. " + "The Responses API is available for OpenAI and Azure OpenAI providers. " + "Use api='completions' (default) with Gemini." + ) + + self.api = api + if interceptor is not None: raise NotImplementedError( "HTTP interceptors are not yet supported for Google Gemini provider. " diff --git a/lib/crewai/tests/llms/anthropic/test_anthropic.py b/lib/crewai/tests/llms/anthropic/test_anthropic.py index 89418ca0e..89d1af06a 100644 --- a/lib/crewai/tests/llms/anthropic/test_anthropic.py +++ b/lib/crewai/tests/llms/anthropic/test_anthropic.py @@ -1463,3 +1463,55 @@ def test_tool_search_saves_input_tokens(): f"Expected tool_search ({usage_search.prompt_tokens}) to use fewer input tokens " f"than no search ({usage_no_search.prompt_tokens})" ) + +# ============================================================================= +# Responses API Error Handling Tests +# ============================================================================= + + +def test_anthropic_responses_api_raises_not_implemented(): + """Test that Anthropic raises NotImplementedError when api='responses' is used.""" + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + + with pytest.raises(NotImplementedError, match="Responses API is not supported by Anthropic"): + AnthropicCompletion( + model="claude-sonnet-4-20250514", + api="responses", + ) + + +def test_anthropic_responses_api_error_suggests_completions(): + """Test that the error message suggests using api='completions' instead.""" + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + + with pytest.raises(NotImplementedError) as exc_info: + AnthropicCompletion( + model="claude-sonnet-4-20250514", + api="responses", + ) + + error_msg = str(exc_info.value) + assert "api='completions'" in error_msg + assert "Messages API" in error_msg + + +def test_anthropic_completions_api_still_works(): + """Test that api='completions' (default) still works normally.""" + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + + # Should not raise any error + completion = AnthropicCompletion( + model="claude-sonnet-4-20250514", + api="completions", + ) + assert completion.api == "completions" + + +def test_anthropic_default_api_is_completions(): + """Test that the default API is 'completions'.""" + from crewai.llms.providers.anthropic.completion import AnthropicCompletion + + completion = AnthropicCompletion( + model="claude-sonnet-4-20250514", + ) + assert completion.api == "completions" diff --git a/lib/crewai/tests/llms/azure/test_azure.py b/lib/crewai/tests/llms/azure/test_azure.py index d25b607a8..e59280ea6 100644 --- a/lib/crewai/tests/llms/azure/test_azure.py +++ b/lib/crewai/tests/llms/azure/test_azure.py @@ -1403,3 +1403,784 @@ def test_azure_stop_words_still_applied_to_regular_responses(): assert "Observation:" not in result assert "Found results" not in result assert "I need to search for more information" in result + + +# ============================================================================= +# Azure Responses API Tests +# ============================================================================= + + +def test_azure_responses_api_initialization(): + """Test that Azure Responses API can be initialized with api='responses'.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + instructions="You are a helpful assistant.", + store=True, + ) + + assert completion.api == "responses" + assert completion.instructions == "You are a helpful assistant." + assert completion.store is True + assert completion.model == "gpt-4o" + + +def test_azure_responses_api_default_is_completions(): + """Test that the default API is 'completions' for backward compatibility.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + completion = AzureCompletion( + model="gpt-4o", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + assert completion.api == "completions" + + +def test_azure_responses_api_prepare_params(): + """Test that Responses API params are prepared correctly.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + instructions="Base instructions.", + store=True, + temperature=0.7, + ) + + messages = [ + {"role": "system", "content": "System message."}, + {"role": "user", "content": "Hello!"}, + ] + + params = completion._prepare_responses_params(messages) + + assert params["model"] == "gpt-4o" + assert "Base instructions." in params["instructions"] + assert "System message." in params["instructions"] + assert params["store"] is True + assert params["temperature"] == 0.7 + assert params["input"] == [{"role": "user", "content": "Hello!"}] + + +def test_azure_responses_api_tool_format(): + """Test that tools are converted to Responses API format (internally-tagged).""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + tools = [ + { + "name": "get_weather", + "description": "Get the weather for a location", + "parameters": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"], + }, + } + ] + + responses_tools = completion._convert_tools_for_responses(tools) + + assert len(responses_tools) == 1 + tool = responses_tools[0] + assert tool["type"] == "function" + assert tool["name"] == "get_weather" + assert tool["description"] == "Get the weather for a location" + assert "parameters" in tool + assert "function" not in tool + + +def test_azure_responses_api_structured_output_format(): + """Test that structured outputs use text.format for Responses API.""" + from pydantic import BaseModel + from crewai.llms.providers.azure.completion import AzureCompletion + + class Person(BaseModel): + name: str + age: int + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + messages = [{"role": "user", "content": "Extract: Jane, 25"}] + params = completion._prepare_responses_params(messages, response_model=Person) + + assert "text" in params + assert "format" in params["text"] + assert params["text"]["format"]["type"] == "json_schema" + assert params["text"]["format"]["name"] == "Person" + assert params["text"]["format"]["strict"] is True + + +def test_azure_responses_api_with_previous_response_id(): + """Test that previous_response_id is passed for multi-turn conversations.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + previous_response_id="resp_abc123", + store=True, + ) + + messages = [{"role": "user", "content": "Continue our conversation."}] + params = completion._prepare_responses_params(messages) + + assert params["previous_response_id"] == "resp_abc123" + assert params["store"] is True + + +def test_azure_responses_api_call_routing(): + """Test that call() routes to the correct API based on the api parameter.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + completion_completions = AzureCompletion( + model="gpt-4o", + api="completions", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion_responses = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + with patch.object( + completion_completions, "_handle_completion", return_value="completions result" + ) as mock_completions: + with patch.object(completion_completions, "_format_messages_for_azure", return_value=[{"role": "user", "content": "Hello"}]): + result = completion_completions.call("Hello") + mock_completions.assert_called_once() + assert result == "completions result" + + with patch.object( + completion_responses, "_call_responses", return_value="responses result" + ) as mock_responses: + with patch.object(completion_responses, "_format_messages_for_azure", return_value=[{"role": "user", "content": "Hello"}]): + result = completion_responses.call("Hello") + mock_responses.assert_called_once() + assert result == "responses result" + + +def test_azure_responses_api_builtin_tools_param(): + """Test that builtin_tools parameter is properly configured.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + builtin_tools=["web_search", "code_interpreter"], + ) + + assert completion.builtin_tools == ["web_search", "code_interpreter"] + + messages = [{"role": "user", "content": "Test"}] + params = completion._prepare_responses_params(messages) + + assert "tools" in params + tool_types = [t["type"] for t in params["tools"]] + assert "web_search_preview" in tool_types + assert "code_interpreter" in tool_types + + +def test_azure_responses_api_builtin_tools_with_custom_tools(): + """Test that builtin_tools can be combined with custom function tools.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + builtin_tools=["web_search"], + ) + + custom_tools = [ + { + "name": "get_weather", + "description": "Get weather for a location", + "parameters": {"type": "object", "properties": {}}, + } + ] + + messages = [{"role": "user", "content": "Test"}] + params = completion._prepare_responses_params(messages, tools=custom_tools) + + assert len(params["tools"]) == 2 + tool_types = [t.get("type") for t in params["tools"]] + assert "web_search_preview" in tool_types + assert "function" in tool_types + + +def test_azure_responses_api_parse_tool_outputs_param(): + """Test that parse_tool_outputs parameter is properly configured.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + parse_tool_outputs=True, + ) + + assert completion.parse_tool_outputs is True + + +def test_azure_responses_api_parse_tool_outputs_default_false(): + """Test that parse_tool_outputs defaults to False.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + assert completion.parse_tool_outputs is False + + +# ============================================================================= +# Auto-Chaining Tests (Azure Responses API) +# ============================================================================= + + +def test_azure_responses_api_auto_chain_param(): + """Test that auto_chain parameter is properly configured.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain=True, + ) + + assert completion.auto_chain is True + assert completion._last_response_id is None + + +def test_azure_responses_api_auto_chain_default_false(): + """Test that auto_chain defaults to False.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + assert completion.auto_chain is False + + +def test_azure_responses_api_last_response_id_property(): + """Test last_response_id property.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain=True, + ) + + # Initially None + assert completion.last_response_id is None + + # Simulate setting the internal value + completion._last_response_id = "resp_test_123" + assert completion.last_response_id == "resp_test_123" + + +def test_azure_responses_api_reset_chain(): + """Test reset_chain() method clears the response ID.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain=True, + ) + + # Set a response ID + completion._last_response_id = "resp_test_123" + assert completion.last_response_id == "resp_test_123" + + # Reset the chain + completion.reset_chain() + assert completion.last_response_id is None + + +def test_azure_responses_api_auto_chain_prepare_params(): + """Test that _prepare_responses_params uses auto-chained response ID.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain=True, + ) + + # No previous response ID yet + params = completion._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) + assert "previous_response_id" not in params + + # Set a previous response ID + completion._last_response_id = "resp_previous_123" + params = completion._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) + assert params.get("previous_response_id") == "resp_previous_123" + + +def test_azure_responses_api_explicit_previous_response_id_takes_precedence(): + """Test that explicit previous_response_id overrides auto-chained ID.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain=True, + previous_response_id="resp_explicit_456", + ) + + # Set an auto-chained response ID + completion._last_response_id = "resp_auto_123" + + # Explicit should take precedence + params = completion._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) + assert params.get("previous_response_id") == "resp_explicit_456" + + +def test_azure_responses_api_auto_chain_disabled_no_tracking(): + """Test that response ID is not tracked when auto_chain is False.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain=False, + ) + + # Even with a "previous" response ID set internally, params shouldn't use it + completion._last_response_id = "resp_should_not_use" + params = completion._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) + assert "previous_response_id" not in params + + +# ============================================================================= +# Encrypted Reasoning for ZDR (Zero Data Retention) Tests +# ============================================================================= + + +def test_azure_responses_api_auto_chain_reasoning_param(): + """Test that auto_chain_reasoning parameter is properly configured.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain_reasoning=True, + ) + + assert completion.auto_chain_reasoning is True + assert completion._last_reasoning_items == [] + + +def test_azure_responses_api_auto_chain_reasoning_default_false(): + """Test that auto_chain_reasoning defaults to False.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + assert completion.auto_chain_reasoning is False + + +def test_azure_responses_api_last_reasoning_items_property(): + """Test last_reasoning_items property.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain_reasoning=True, + ) + + # Initially empty + assert completion.last_reasoning_items == [] + + # Simulate setting the internal value + mock_items = [{"id": "rs_test_123", "type": "reasoning"}] + completion._last_reasoning_items = mock_items + assert completion.last_reasoning_items == mock_items + + +def test_azure_responses_api_reset_reasoning_chain(): + """Test reset_reasoning_chain() method clears reasoning items.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain_reasoning=True, + ) + + # Set reasoning items + mock_items = [{"id": "rs_test_123", "type": "reasoning"}] + completion._last_reasoning_items = mock_items + assert completion.last_reasoning_items == mock_items + + # Reset the reasoning chain + completion.reset_reasoning_chain() + assert completion.last_reasoning_items == [] + + +def test_azure_responses_api_auto_chain_reasoning_adds_include(): + """Test that auto_chain_reasoning adds reasoning.encrypted_content to include.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain_reasoning=True, + ) + + params = completion._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) + assert "include" in params + assert "reasoning.encrypted_content" in params["include"] + + +def test_azure_responses_api_auto_chain_reasoning_preserves_existing_include(): + """Test that auto_chain_reasoning preserves existing include items.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain_reasoning=True, + include=["file_search_call.results"], + ) + + params = completion._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) + assert "include" in params + assert "reasoning.encrypted_content" in params["include"] + assert "file_search_call.results" in params["include"] + + +def test_azure_responses_api_auto_chain_reasoning_no_duplicate_include(): + """Test that reasoning.encrypted_content is not duplicated if already in include.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain_reasoning=True, + include=["reasoning.encrypted_content"], + ) + + params = completion._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) + assert "include" in params + # Should only appear once + assert params["include"].count("reasoning.encrypted_content") == 1 + + +def test_azure_responses_api_auto_chain_reasoning_prepends_to_input(): + """Test that stored reasoning items are prepended to input.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain_reasoning=True, + ) + + # Simulate stored reasoning items + mock_reasoning = MagicMock() + mock_reasoning.type = "reasoning" + mock_reasoning.id = "rs_test_123" + completion._last_reasoning_items = [mock_reasoning] + + params = completion._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) + + # Input should have reasoning item first, then the message + assert len(params["input"]) == 2 + assert params["input"][0] == mock_reasoning + assert params["input"][1]["role"] == "user" + + +def test_azure_responses_api_auto_chain_reasoning_disabled_no_include(): + """Test that reasoning.encrypted_content is not added when auto_chain_reasoning is False.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain_reasoning=False, + ) + + params = completion._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) + # Should not have include at all (unless explicitly set) + assert "include" not in params or "reasoning.encrypted_content" not in params.get("include", []) + + +def test_azure_responses_api_auto_chain_reasoning_disabled_no_prepend(): + """Test that reasoning items are not prepended when auto_chain_reasoning is False.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain_reasoning=False, + ) + + # Even with stored reasoning items, they should not be prepended + mock_reasoning = MagicMock() + mock_reasoning.type = "reasoning" + completion._last_reasoning_items = [mock_reasoning] + + params = completion._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) + + # Input should only have the message, not the reasoning item + assert len(params["input"]) == 1 + assert params["input"][0]["role"] == "user" + + +def test_azure_responses_api_both_auto_chains_work_together(): + """Test that auto_chain and auto_chain_reasoning can be used together.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + auto_chain=True, + auto_chain_reasoning=True, + ) + + assert completion.auto_chain is True + assert completion.auto_chain_reasoning is True + assert completion._last_response_id is None + assert completion._last_reasoning_items == [] + + # Set both internal values + completion._last_response_id = "resp_123" + mock_reasoning = MagicMock() + mock_reasoning.type = "reasoning" + completion._last_reasoning_items = [mock_reasoning] + + params = completion._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) + + # Both should be applied + assert params.get("previous_response_id") == "resp_123" + assert "reasoning.encrypted_content" in params["include"] + assert len(params["input"]) == 2 # Reasoning item + message + + +def test_azure_responses_api_max_completion_tokens(): + """Test that max_completion_tokens is mapped to max_output_tokens.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + max_completion_tokens=4096, + ) + + params = completion._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) + assert params["max_output_tokens"] == 4096 + + +def test_azure_responses_api_seed_param(): + """Test that seed parameter is passed through.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + seed=42, + ) + + params = completion._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) + assert params["seed"] == 42 + + +def test_azure_responses_api_reasoning_effort_param(): + """Test that reasoning_effort parameter is passed through.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + reasoning_effort="high", + ) + + params = completion._prepare_responses_params(messages=[{"role": "user", "content": "test"}]) + assert params["reasoning"] == {"effort": "high"} + + +def test_azure_responses_api_init_responses_clients(): + """Test that _init_responses_clients creates OpenAI AzureOpenAI clients.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients") as mock_init: + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + # _init_responses_clients should be called during __init__ + mock_init.assert_called_once() + + +def test_azure_responses_api_system_message_extraction(): + """Test that system messages are extracted to instructions for Responses API.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + ) + + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "Hello!"}, + {"role": "assistant", "content": "Hi there!"}, + {"role": "user", "content": "How are you?"}, + ] + + params = completion._prepare_responses_params(messages) + + # System message should be extracted to instructions + assert params["instructions"] == "You are a helpful assistant." + # Non-system messages should be in input + assert len(params["input"]) == 3 + assert params["input"][0]["role"] == "user" + assert params["input"][1]["role"] == "assistant" + assert params["input"][2]["role"] == "user" + + +def test_azure_responses_api_multiple_system_messages_merged(): + """Test that multiple system messages are merged into instructions.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch("crewai.llms.providers.azure.completion.AzureCompletion._init_responses_clients"): + completion = AzureCompletion( + model="gpt-4o", + api="responses", + api_key="test-key", + endpoint="https://test.openai.azure.com", + instructions="Base instructions.", + ) + + messages = [ + {"role": "system", "content": "System context."}, + {"role": "user", "content": "Hello!"}, + ] + + params = completion._prepare_responses_params(messages) + + # Both base instructions and system message should be merged + assert "Base instructions." in params["instructions"] + assert "System context." in params["instructions"] diff --git a/lib/crewai/tests/llms/bedrock/test_bedrock.py b/lib/crewai/tests/llms/bedrock/test_bedrock.py index fe18a8349..b13b433da 100644 --- a/lib/crewai/tests/llms/bedrock/test_bedrock.py +++ b/lib/crewai/tests/llms/bedrock/test_bedrock.py @@ -1175,3 +1175,56 @@ def test_bedrock_tool_results_not_merged_across_assistant_messages(): ) assert tool_result_messages[0]["content"][0]["toolResult"]["toolUseId"] == "call_a" assert tool_result_messages[1]["content"][0]["toolResult"]["toolUseId"] == "call_b" + + +# ============================================================================= +# Responses API Error Handling Tests +# ============================================================================= + + +def test_bedrock_responses_api_raises_not_implemented(bedrock_mocks): + """Test that Bedrock raises NotImplementedError when api='responses' is used.""" + from crewai.llms.providers.bedrock.completion import BedrockCompletion + + with pytest.raises(NotImplementedError, match="Responses API is not supported by AWS Bedrock"): + BedrockCompletion( + model="anthropic.claude-3-5-sonnet-20241022-v2:0", + api="responses", + ) + + +def test_bedrock_responses_api_error_suggests_completions(bedrock_mocks): + """Test that the error message suggests using api='completions' instead.""" + from crewai.llms.providers.bedrock.completion import BedrockCompletion + + with pytest.raises(NotImplementedError) as exc_info: + BedrockCompletion( + model="anthropic.claude-3-5-sonnet-20241022-v2:0", + api="responses", + ) + + error_msg = str(exc_info.value) + assert "api='completions'" in error_msg + assert "Converse API" in error_msg + + +def test_bedrock_completions_api_still_works(bedrock_mocks): + """Test that api='completions' (default) still works normally.""" + from crewai.llms.providers.bedrock.completion import BedrockCompletion + + # Should not raise any error + completion = BedrockCompletion( + model="anthropic.claude-3-5-sonnet-20241022-v2:0", + api="completions", + ) + assert completion.api == "completions" + + +def test_bedrock_default_api_is_completions(bedrock_mocks): + """Test that the default API is 'completions'.""" + from crewai.llms.providers.bedrock.completion import BedrockCompletion + + completion = BedrockCompletion( + model="anthropic.claude-3-5-sonnet-20241022-v2:0", + ) + assert completion.api == "completions" diff --git a/lib/crewai/tests/llms/google/test_google.py b/lib/crewai/tests/llms/google/test_google.py index bd62e3343..140a1dc81 100644 --- a/lib/crewai/tests/llms/google/test_google.py +++ b/lib/crewai/tests/llms/google/test_google.py @@ -1190,3 +1190,55 @@ def test_gemini_cached_prompt_tokens_with_tools(): # cached_prompt_tokens should be populated (may be 0 if Gemini # doesn't cache for this particular request, but the field should exist) assert usage.cached_prompt_tokens >= 0 + +# ============================================================================= +# Responses API Error Handling Tests +# ============================================================================= + + +def test_gemini_responses_api_raises_not_implemented(): + """Test that Gemini raises NotImplementedError when api='responses' is used.""" + from crewai.llms.providers.gemini.completion import GeminiCompletion + + with pytest.raises(NotImplementedError, match="Responses API is not supported by Google Gemini"): + GeminiCompletion( + model="gemini-2.0-flash-001", + api="responses", + ) + + +def test_gemini_responses_api_error_suggests_completions(): + """Test that the error message suggests using api='completions' instead.""" + from crewai.llms.providers.gemini.completion import GeminiCompletion + + with pytest.raises(NotImplementedError) as exc_info: + GeminiCompletion( + model="gemini-2.0-flash-001", + api="responses", + ) + + error_msg = str(exc_info.value) + assert "api='completions'" in error_msg + assert "generate_content" in error_msg + + +def test_gemini_completions_api_still_works(): + """Test that api='completions' (default) still works normally.""" + from crewai.llms.providers.gemini.completion import GeminiCompletion + + # Should not raise any error + completion = GeminiCompletion( + model="gemini-2.0-flash-001", + api="completions", + ) + assert completion.api == "completions" + + +def test_gemini_default_api_is_completions(): + """Test that the default API is 'completions'.""" + from crewai.llms.providers.gemini.completion import GeminiCompletion + + completion = GeminiCompletion( + model="gemini-2.0-flash-001", + ) + assert completion.api == "completions"