feat: add streaming tool call events; fix provider id tracking; add tests and cassettes
Some checks failed
CodeQL Advanced / Analyze (actions) (push) Has been cancelled
CodeQL Advanced / Analyze (python) (push) Has been cancelled
Notify Downstream / notify-downstream (push) Has been cancelled
Build uv cache / build-cache (3.10) (push) Has been cancelled
Build uv cache / build-cache (3.11) (push) Has been cancelled
Build uv cache / build-cache (3.12) (push) Has been cancelled
Build uv cache / build-cache (3.13) (push) Has been cancelled

Adds support for streaming tool call events with test coverage, fixes tool-stream ID tracking (including OpenAI-style tracking for Azure), improves Gemini tool calling + streaming tests, adds Anthropic tests, generates Azure cassettes, and fixes Azure cassette URIs.
This commit is contained in:
Greyson LaLonde
2026-01-05 14:33:36 -05:00
committed by GitHub
parent f3c17a249b
commit f8deb0fd18
15 changed files with 1798 additions and 60 deletions

View File

@@ -354,8 +354,17 @@ class BaseLLM(ABC):
from_task: Task | None = None, from_task: Task | None = None,
from_agent: Agent | None = None, from_agent: Agent | None = None,
tool_call: dict[str, Any] | None = None, tool_call: dict[str, Any] | None = None,
call_type: LLMCallType | None = None,
) -> None: ) -> None:
"""Emit stream chunk event.""" """Emit stream chunk event.
Args:
chunk: The text content of the chunk.
from_task: The task that initiated the call.
from_agent: The agent that initiated the call.
tool_call: Tool call information if this is a tool call chunk.
call_type: The type of LLM call (LLM_CALL or TOOL_CALL).
"""
if not hasattr(crewai_event_bus, "emit"): if not hasattr(crewai_event_bus, "emit"):
raise ValueError("crewai_event_bus does not have an emit method") from None raise ValueError("crewai_event_bus does not have an emit method") from None
@@ -366,6 +375,7 @@ class BaseLLM(ABC):
tool_call=tool_call, tool_call=tool_call,
from_task=from_task, from_task=from_task,
from_agent=from_agent, from_agent=from_agent,
call_type=call_type,
), ),
) )

View File

@@ -598,6 +598,8 @@ class AnthropicCompletion(BaseLLM):
# (the SDK sets it internally) # (the SDK sets it internally)
stream_params = {k: v for k, v in params.items() if k != "stream"} stream_params = {k: v for k, v in params.items() if k != "stream"}
current_tool_calls: dict[int, dict[str, Any]] = {}
# Make streaming API call # Make streaming API call
with self.client.messages.stream(**stream_params) as stream: with self.client.messages.stream(**stream_params) as stream:
for event in stream: for event in stream:
@@ -610,6 +612,55 @@ class AnthropicCompletion(BaseLLM):
from_agent=from_agent, from_agent=from_agent,
) )
if event.type == "content_block_start":
block = event.content_block
if block.type == "tool_use":
block_index = event.index
current_tool_calls[block_index] = {
"id": block.id,
"name": block.name,
"arguments": "",
"index": block_index,
}
self._emit_stream_chunk_event(
chunk="",
from_task=from_task,
from_agent=from_agent,
tool_call={
"id": block.id,
"function": {
"name": block.name,
"arguments": "",
},
"type": "function",
"index": block_index,
},
call_type=LLMCallType.TOOL_CALL,
)
elif event.type == "content_block_delta":
if event.delta.type == "input_json_delta":
block_index = event.index
partial_json = event.delta.partial_json
if block_index in current_tool_calls and partial_json:
current_tool_calls[block_index]["arguments"] += partial_json
self._emit_stream_chunk_event(
chunk=partial_json,
from_task=from_task,
from_agent=from_agent,
tool_call={
"id": current_tool_calls[block_index]["id"],
"function": {
"name": current_tool_calls[block_index]["name"],
"arguments": current_tool_calls[block_index][
"arguments"
],
},
"type": "function",
"index": block_index,
},
call_type=LLMCallType.TOOL_CALL,
)
final_message: Message = stream.get_final_message() final_message: Message = stream.get_final_message()
thinking_blocks: list[ThinkingBlock] = [] thinking_blocks: list[ThinkingBlock] = []
@@ -941,6 +992,8 @@ class AnthropicCompletion(BaseLLM):
stream_params = {k: v for k, v in params.items() if k != "stream"} stream_params = {k: v for k, v in params.items() if k != "stream"}
current_tool_calls: dict[int, dict[str, Any]] = {}
async with self.async_client.messages.stream(**stream_params) as stream: async with self.async_client.messages.stream(**stream_params) as stream:
async for event in stream: async for event in stream:
if hasattr(event, "delta") and hasattr(event.delta, "text"): if hasattr(event, "delta") and hasattr(event.delta, "text"):
@@ -952,6 +1005,55 @@ class AnthropicCompletion(BaseLLM):
from_agent=from_agent, from_agent=from_agent,
) )
if event.type == "content_block_start":
block = event.content_block
if block.type == "tool_use":
block_index = event.index
current_tool_calls[block_index] = {
"id": block.id,
"name": block.name,
"arguments": "",
"index": block_index,
}
self._emit_stream_chunk_event(
chunk="",
from_task=from_task,
from_agent=from_agent,
tool_call={
"id": block.id,
"function": {
"name": block.name,
"arguments": "",
},
"type": "function",
"index": block_index,
},
call_type=LLMCallType.TOOL_CALL,
)
elif event.type == "content_block_delta":
if event.delta.type == "input_json_delta":
block_index = event.index
partial_json = event.delta.partial_json
if block_index in current_tool_calls and partial_json:
current_tool_calls[block_index]["arguments"] += partial_json
self._emit_stream_chunk_event(
chunk=partial_json,
from_task=from_task,
from_agent=from_agent,
tool_call={
"id": current_tool_calls[block_index]["id"],
"function": {
"name": current_tool_calls[block_index]["name"],
"arguments": current_tool_calls[block_index][
"arguments"
],
},
"type": "function",
"index": block_index,
},
call_type=LLMCallType.TOOL_CALL,
)
final_message: Message = await stream.get_final_message() final_message: Message = await stream.get_final_message()
usage = self._extract_anthropic_token_usage(final_message) usage = self._extract_anthropic_token_usage(final_message)

View File

@@ -674,7 +674,7 @@ class AzureCompletion(BaseLLM):
self, self,
update: StreamingChatCompletionsUpdate, update: StreamingChatCompletionsUpdate,
full_response: str, full_response: str,
tool_calls: dict[str, dict[str, str]], tool_calls: dict[int, dict[str, Any]],
from_task: Any | None = None, from_task: Any | None = None,
from_agent: Any | None = None, from_agent: Any | None = None,
) -> str: ) -> str:
@@ -702,25 +702,45 @@ class AzureCompletion(BaseLLM):
) )
if choice.delta and choice.delta.tool_calls: if choice.delta and choice.delta.tool_calls:
for tool_call in choice.delta.tool_calls: for idx, tool_call in enumerate(choice.delta.tool_calls):
call_id = tool_call.id or "default" if idx not in tool_calls:
if call_id not in tool_calls: tool_calls[idx] = {
tool_calls[call_id] = { "id": tool_call.id,
"name": "", "name": "",
"arguments": "", "arguments": "",
} }
elif tool_call.id and not tool_calls[idx]["id"]:
tool_calls[idx]["id"] = tool_call.id
if tool_call.function and tool_call.function.name: if tool_call.function and tool_call.function.name:
tool_calls[call_id]["name"] = tool_call.function.name tool_calls[idx]["name"] = tool_call.function.name
if tool_call.function and tool_call.function.arguments: if tool_call.function and tool_call.function.arguments:
tool_calls[call_id]["arguments"] += tool_call.function.arguments tool_calls[idx]["arguments"] += tool_call.function.arguments
self._emit_stream_chunk_event(
chunk=tool_call.function.arguments
if tool_call.function and tool_call.function.arguments
else "",
from_task=from_task,
from_agent=from_agent,
tool_call={
"id": tool_calls[idx]["id"],
"function": {
"name": tool_calls[idx]["name"],
"arguments": tool_calls[idx]["arguments"],
},
"type": "function",
"index": idx,
},
call_type=LLMCallType.TOOL_CALL,
)
return full_response return full_response
def _finalize_streaming_response( def _finalize_streaming_response(
self, self,
full_response: str, full_response: str,
tool_calls: dict[str, dict[str, str]], tool_calls: dict[int, dict[str, Any]],
usage_data: dict[str, int], usage_data: dict[str, int],
params: AzureCompletionParams, params: AzureCompletionParams,
available_functions: dict[str, Any] | None = None, available_functions: dict[str, Any] | None = None,
@@ -804,7 +824,7 @@ class AzureCompletion(BaseLLM):
) -> str | Any: ) -> str | Any:
"""Handle streaming chat completion.""" """Handle streaming chat completion."""
full_response = "" full_response = ""
tool_calls: dict[str, dict[str, Any]] = {} tool_calls: dict[int, dict[str, Any]] = {}
usage_data = {"total_tokens": 0} usage_data = {"total_tokens": 0}
for update in self.client.complete(**params): # type: ignore[arg-type] for update in self.client.complete(**params): # type: ignore[arg-type]
@@ -870,7 +890,7 @@ class AzureCompletion(BaseLLM):
) -> str | Any: ) -> str | Any:
"""Handle streaming chat completion asynchronously.""" """Handle streaming chat completion asynchronously."""
full_response = "" full_response = ""
tool_calls: dict[str, dict[str, Any]] = {} tool_calls: dict[int, dict[str, Any]] = {}
usage_data = {"total_tokens": 0} usage_data = {"total_tokens": 0}

View File

@@ -315,9 +315,7 @@ class BedrockCompletion(BaseLLM):
messages messages
) )
if not self._invoke_before_llm_call_hooks( if not self._invoke_before_llm_call_hooks(formatted_messages, from_agent):
cast(list[LLMMessage], formatted_messages), from_agent
):
raise ValueError("LLM call blocked by before_llm_call hook") raise ValueError("LLM call blocked by before_llm_call hook")
# Prepare request body # Prepare request body
@@ -361,7 +359,7 @@ class BedrockCompletion(BaseLLM):
if self.stream: if self.stream:
return self._handle_streaming_converse( return self._handle_streaming_converse(
cast(list[LLMMessage], formatted_messages), formatted_messages,
body, body,
available_functions, available_functions,
from_task, from_task,
@@ -369,7 +367,7 @@ class BedrockCompletion(BaseLLM):
) )
return self._handle_converse( return self._handle_converse(
cast(list[LLMMessage], formatted_messages), formatted_messages,
body, body,
available_functions, available_functions,
from_task, from_task,
@@ -433,7 +431,7 @@ class BedrockCompletion(BaseLLM):
) )
formatted_messages, system_message = self._format_messages_for_converse( formatted_messages, system_message = self._format_messages_for_converse(
messages # type: ignore[arg-type] messages
) )
body: BedrockConverseRequestBody = { body: BedrockConverseRequestBody = {
@@ -687,8 +685,10 @@ class BedrockCompletion(BaseLLM):
) -> str: ) -> str:
"""Handle streaming converse API call with comprehensive event handling.""" """Handle streaming converse API call with comprehensive event handling."""
full_response = "" full_response = ""
current_tool_use = None current_tool_use: dict[str, Any] | None = None
tool_use_id = None tool_use_id: str | None = None
tool_use_index = 0
accumulated_tool_input = ""
try: try:
response = self.client.converse_stream( response = self.client.converse_stream(
@@ -709,9 +709,30 @@ class BedrockCompletion(BaseLLM):
elif "contentBlockStart" in event: elif "contentBlockStart" in event:
start = event["contentBlockStart"].get("start", {}) start = event["contentBlockStart"].get("start", {})
content_block_index = event["contentBlockStart"].get(
"contentBlockIndex", 0
)
if "toolUse" in start: if "toolUse" in start:
current_tool_use = start["toolUse"] tool_use_block = start["toolUse"]
current_tool_use = cast(dict[str, Any], tool_use_block)
tool_use_id = current_tool_use.get("toolUseId") tool_use_id = current_tool_use.get("toolUseId")
tool_use_index = content_block_index
accumulated_tool_input = ""
self._emit_stream_chunk_event(
chunk="",
from_task=from_task,
from_agent=from_agent,
tool_call={
"id": tool_use_id or "",
"function": {
"name": current_tool_use.get("name", ""),
"arguments": "",
},
"type": "function",
"index": tool_use_index,
},
call_type=LLMCallType.TOOL_CALL,
)
logging.debug( logging.debug(
f"Tool use started in stream: {json.dumps(current_tool_use)} (ID: {tool_use_id})" f"Tool use started in stream: {json.dumps(current_tool_use)} (ID: {tool_use_id})"
) )
@@ -730,7 +751,23 @@ class BedrockCompletion(BaseLLM):
elif "toolUse" in delta and current_tool_use: elif "toolUse" in delta and current_tool_use:
tool_input = delta["toolUse"].get("input", "") tool_input = delta["toolUse"].get("input", "")
if tool_input: if tool_input:
accumulated_tool_input += tool_input
logging.debug(f"Tool input delta: {tool_input}") logging.debug(f"Tool input delta: {tool_input}")
self._emit_stream_chunk_event(
chunk=tool_input,
from_task=from_task,
from_agent=from_agent,
tool_call={
"id": tool_use_id or "",
"function": {
"name": current_tool_use.get("name", ""),
"arguments": accumulated_tool_input,
},
"type": "function",
"index": tool_use_index,
},
call_type=LLMCallType.TOOL_CALL,
)
elif "contentBlockStop" in event: elif "contentBlockStop" in event:
logging.debug("Content block stopped in stream") logging.debug("Content block stopped in stream")
if current_tool_use and available_functions: if current_tool_use and available_functions:
@@ -848,7 +885,7 @@ class BedrockCompletion(BaseLLM):
async def _ahandle_converse( async def _ahandle_converse(
self, self,
messages: list[dict[str, Any]], messages: list[LLMMessage],
body: BedrockConverseRequestBody, body: BedrockConverseRequestBody,
available_functions: Mapping[str, Any] | None = None, available_functions: Mapping[str, Any] | None = None,
from_task: Any | None = None, from_task: Any | None = None,
@@ -1013,7 +1050,7 @@ class BedrockCompletion(BaseLLM):
async def _ahandle_streaming_converse( async def _ahandle_streaming_converse(
self, self,
messages: list[dict[str, Any]], messages: list[LLMMessage],
body: BedrockConverseRequestBody, body: BedrockConverseRequestBody,
available_functions: dict[str, Any] | None = None, available_functions: dict[str, Any] | None = None,
from_task: Any | None = None, from_task: Any | None = None,
@@ -1021,8 +1058,10 @@ class BedrockCompletion(BaseLLM):
) -> str: ) -> str:
"""Handle async streaming converse API call.""" """Handle async streaming converse API call."""
full_response = "" full_response = ""
current_tool_use = None current_tool_use: dict[str, Any] | None = None
tool_use_id = None tool_use_id: str | None = None
tool_use_index = 0
accumulated_tool_input = ""
try: try:
async_client = await self._ensure_async_client() async_client = await self._ensure_async_client()
@@ -1044,9 +1083,30 @@ class BedrockCompletion(BaseLLM):
elif "contentBlockStart" in event: elif "contentBlockStart" in event:
start = event["contentBlockStart"].get("start", {}) start = event["contentBlockStart"].get("start", {})
content_block_index = event["contentBlockStart"].get(
"contentBlockIndex", 0
)
if "toolUse" in start: if "toolUse" in start:
current_tool_use = start["toolUse"] tool_use_block = start["toolUse"]
current_tool_use = cast(dict[str, Any], tool_use_block)
tool_use_id = current_tool_use.get("toolUseId") tool_use_id = current_tool_use.get("toolUseId")
tool_use_index = content_block_index
accumulated_tool_input = ""
self._emit_stream_chunk_event(
chunk="",
from_task=from_task,
from_agent=from_agent,
tool_call={
"id": tool_use_id or "",
"function": {
"name": current_tool_use.get("name", ""),
"arguments": "",
},
"type": "function",
"index": tool_use_index,
},
call_type=LLMCallType.TOOL_CALL,
)
logging.debug( logging.debug(
f"Tool use started in stream: {current_tool_use.get('name')} (ID: {tool_use_id})" f"Tool use started in stream: {current_tool_use.get('name')} (ID: {tool_use_id})"
) )
@@ -1065,7 +1125,23 @@ class BedrockCompletion(BaseLLM):
elif "toolUse" in delta and current_tool_use: elif "toolUse" in delta and current_tool_use:
tool_input = delta["toolUse"].get("input", "") tool_input = delta["toolUse"].get("input", "")
if tool_input: if tool_input:
accumulated_tool_input += tool_input
logging.debug(f"Tool input delta: {tool_input}") logging.debug(f"Tool input delta: {tool_input}")
self._emit_stream_chunk_event(
chunk=tool_input,
from_task=from_task,
from_agent=from_agent,
tool_call={
"id": tool_use_id or "",
"function": {
"name": current_tool_use.get("name", ""),
"arguments": accumulated_tool_input,
},
"type": "function",
"index": tool_use_index,
},
call_type=LLMCallType.TOOL_CALL,
)
elif "contentBlockStop" in event: elif "contentBlockStop" in event:
logging.debug("Content block stopped in stream") logging.debug("Content block stopped in stream")
@@ -1174,7 +1250,7 @@ class BedrockCompletion(BaseLLM):
def _format_messages_for_converse( def _format_messages_for_converse(
self, messages: str | list[LLMMessage] self, messages: str | list[LLMMessage]
) -> tuple[list[dict[str, Any]], str | None]: ) -> tuple[list[LLMMessage], str | None]:
"""Format messages for Converse API following AWS documentation. """Format messages for Converse API following AWS documentation.
Note: Returns dict[str, Any] instead of LLMMessage because Bedrock uses Note: Returns dict[str, Any] instead of LLMMessage because Bedrock uses
@@ -1184,7 +1260,7 @@ class BedrockCompletion(BaseLLM):
# Use base class formatting first # Use base class formatting first
formatted_messages = self._format_messages(messages) formatted_messages = self._format_messages(messages)
converse_messages: list[dict[str, Any]] = [] converse_messages: list[LLMMessage] = []
system_message: str | None = None system_message: str | None = None
for message in formatted_messages: for message in formatted_messages:

View File

@@ -1,5 +1,6 @@
from __future__ import annotations from __future__ import annotations
import json
import logging import logging
import os import os
import re import re
@@ -24,7 +25,7 @@ try:
from google import genai from google import genai
from google.genai import types from google.genai import types
from google.genai.errors import APIError from google.genai.errors import APIError
from google.genai.types import GenerateContentResponse, Schema from google.genai.types import GenerateContentResponse
except ImportError: except ImportError:
raise ImportError( raise ImportError(
'Google Gen AI native provider not available, to install: uv add "crewai[google-genai]"' 'Google Gen AI native provider not available, to install: uv add "crewai[google-genai]"'
@@ -434,12 +435,9 @@ class GeminiCompletion(BaseLLM):
function_declaration = types.FunctionDeclaration( function_declaration = types.FunctionDeclaration(
name=name, name=name,
description=description, description=description,
parameters=parameters if parameters else None,
) )
# Add parameters if present - ensure parameters is a dict
if parameters and isinstance(parameters, Schema):
function_declaration.parameters = parameters
gemini_tool = types.Tool(function_declarations=[function_declaration]) gemini_tool = types.Tool(function_declarations=[function_declaration])
gemini_tools.append(gemini_tool) gemini_tools.append(gemini_tool)
@@ -609,7 +607,7 @@ class GeminiCompletion(BaseLLM):
candidate = response.candidates[0] candidate = response.candidates[0]
if candidate.content and candidate.content.parts: if candidate.content and candidate.content.parts:
for part in candidate.content.parts: for part in candidate.content.parts:
if hasattr(part, "function_call") and part.function_call: if part.function_call:
function_name = part.function_call.name function_name = part.function_call.name
if function_name is None: if function_name is None:
continue continue
@@ -645,17 +643,17 @@ class GeminiCompletion(BaseLLM):
self, self,
chunk: GenerateContentResponse, chunk: GenerateContentResponse,
full_response: str, full_response: str,
function_calls: dict[str, dict[str, Any]], function_calls: dict[int, dict[str, Any]],
usage_data: dict[str, int], usage_data: dict[str, int],
from_task: Any | None = None, from_task: Any | None = None,
from_agent: Any | None = None, from_agent: Any | None = None,
) -> tuple[str, dict[str, dict[str, Any]], dict[str, int]]: ) -> tuple[str, dict[int, dict[str, Any]], dict[str, int]]:
"""Process a single streaming chunk. """Process a single streaming chunk.
Args: Args:
chunk: The streaming chunk response chunk: The streaming chunk response
full_response: Accumulated response text full_response: Accumulated response text
function_calls: Accumulated function calls function_calls: Accumulated function calls keyed by sequential index
usage_data: Accumulated usage data usage_data: Accumulated usage data
from_task: Task that initiated the call from_task: Task that initiated the call
from_agent: Agent that initiated the call from_agent: Agent that initiated the call
@@ -678,22 +676,44 @@ class GeminiCompletion(BaseLLM):
candidate = chunk.candidates[0] candidate = chunk.candidates[0]
if candidate.content and candidate.content.parts: if candidate.content and candidate.content.parts:
for part in candidate.content.parts: for part in candidate.content.parts:
if hasattr(part, "function_call") and part.function_call: if part.function_call:
call_id = part.function_call.name or "default" call_index = len(function_calls)
if call_id not in function_calls: call_id = f"call_{call_index}"
function_calls[call_id] = { args_dict = (
"name": part.function_call.name, dict(part.function_call.args)
"args": dict(part.function_call.args) if part.function_call.args
if part.function_call.args else {}
else {}, )
} args_json = json.dumps(args_dict)
function_calls[call_index] = {
"id": call_id,
"name": part.function_call.name,
"args": args_dict,
}
self._emit_stream_chunk_event(
chunk=args_json,
from_task=from_task,
from_agent=from_agent,
tool_call={
"id": call_id,
"function": {
"name": part.function_call.name or "",
"arguments": args_json,
},
"type": "function",
"index": call_index,
},
call_type=LLMCallType.TOOL_CALL,
)
return full_response, function_calls, usage_data return full_response, function_calls, usage_data
def _finalize_streaming_response( def _finalize_streaming_response(
self, self,
full_response: str, full_response: str,
function_calls: dict[str, dict[str, Any]], function_calls: dict[int, dict[str, Any]],
usage_data: dict[str, int], usage_data: dict[str, int],
contents: list[types.Content], contents: list[types.Content],
available_functions: dict[str, Any] | None = None, available_functions: dict[str, Any] | None = None,
@@ -800,7 +820,7 @@ class GeminiCompletion(BaseLLM):
) -> str: ) -> str:
"""Handle streaming content generation.""" """Handle streaming content generation."""
full_response = "" full_response = ""
function_calls: dict[str, dict[str, Any]] = {} function_calls: dict[int, dict[str, Any]] = {}
usage_data = {"total_tokens": 0} usage_data = {"total_tokens": 0}
# The API accepts list[Content] but mypy is overly strict about variance # The API accepts list[Content] but mypy is overly strict about variance
@@ -878,7 +898,7 @@ class GeminiCompletion(BaseLLM):
) -> str: ) -> str:
"""Handle async streaming content generation.""" """Handle async streaming content generation."""
full_response = "" full_response = ""
function_calls: dict[str, dict[str, Any]] = {} function_calls: dict[int, dict[str, Any]] = {}
usage_data = {"total_tokens": 0} usage_data = {"total_tokens": 0}
# The API accepts list[Content] but mypy is overly strict about variance # The API accepts list[Content] but mypy is overly strict about variance

View File

@@ -521,7 +521,7 @@ class OpenAICompletion(BaseLLM):
) -> str: ) -> str:
"""Handle streaming chat completion.""" """Handle streaming chat completion."""
full_response = "" full_response = ""
tool_calls = {} tool_calls: dict[int, dict[str, Any]] = {}
if response_model: if response_model:
parse_params = { parse_params = {
@@ -591,17 +591,41 @@ class OpenAICompletion(BaseLLM):
if chunk_delta.tool_calls: if chunk_delta.tool_calls:
for tool_call in chunk_delta.tool_calls: for tool_call in chunk_delta.tool_calls:
call_id = tool_call.id or "default" tool_index = tool_call.index if tool_call.index is not None else 0
if call_id not in tool_calls: if tool_index not in tool_calls:
tool_calls[call_id] = { tool_calls[tool_index] = {
"id": tool_call.id,
"name": "", "name": "",
"arguments": "", "arguments": "",
"index": tool_index,
} }
elif tool_call.id and not tool_calls[tool_index]["id"]:
tool_calls[tool_index]["id"] = tool_call.id
if tool_call.function and tool_call.function.name: if tool_call.function and tool_call.function.name:
tool_calls[call_id]["name"] = tool_call.function.name tool_calls[tool_index]["name"] = tool_call.function.name
if tool_call.function and tool_call.function.arguments: if tool_call.function and tool_call.function.arguments:
tool_calls[call_id]["arguments"] += tool_call.function.arguments tool_calls[tool_index]["arguments"] += (
tool_call.function.arguments
)
self._emit_stream_chunk_event(
chunk=tool_call.function.arguments
if tool_call.function and tool_call.function.arguments
else "",
from_task=from_task,
from_agent=from_agent,
tool_call={
"id": tool_calls[tool_index]["id"],
"function": {
"name": tool_calls[tool_index]["name"],
"arguments": tool_calls[tool_index]["arguments"],
},
"type": "function",
"index": tool_calls[tool_index]["index"],
},
call_type=LLMCallType.TOOL_CALL,
)
self._track_token_usage_internal(usage_data) self._track_token_usage_internal(usage_data)
@@ -789,7 +813,7 @@ class OpenAICompletion(BaseLLM):
) -> str: ) -> str:
"""Handle async streaming chat completion.""" """Handle async streaming chat completion."""
full_response = "" full_response = ""
tool_calls = {} tool_calls: dict[int, dict[str, Any]] = {}
if response_model: if response_model:
completion_stream: AsyncIterator[ completion_stream: AsyncIterator[
@@ -870,17 +894,41 @@ class OpenAICompletion(BaseLLM):
if chunk_delta.tool_calls: if chunk_delta.tool_calls:
for tool_call in chunk_delta.tool_calls: for tool_call in chunk_delta.tool_calls:
call_id = tool_call.id or "default" tool_index = tool_call.index if tool_call.index is not None else 0
if call_id not in tool_calls: if tool_index not in tool_calls:
tool_calls[call_id] = { tool_calls[tool_index] = {
"id": tool_call.id,
"name": "", "name": "",
"arguments": "", "arguments": "",
"index": tool_index,
} }
elif tool_call.id and not tool_calls[tool_index]["id"]:
tool_calls[tool_index]["id"] = tool_call.id
if tool_call.function and tool_call.function.name: if tool_call.function and tool_call.function.name:
tool_calls[call_id]["name"] = tool_call.function.name tool_calls[tool_index]["name"] = tool_call.function.name
if tool_call.function and tool_call.function.arguments: if tool_call.function and tool_call.function.arguments:
tool_calls[call_id]["arguments"] += tool_call.function.arguments tool_calls[tool_index]["arguments"] += (
tool_call.function.arguments
)
self._emit_stream_chunk_event(
chunk=tool_call.function.arguments
if tool_call.function and tool_call.function.arguments
else "",
from_task=from_task,
from_agent=from_agent,
tool_call={
"id": tool_calls[tool_index]["id"],
"function": {
"name": tool_calls[tool_index]["name"],
"arguments": tool_calls[tool_index]["arguments"],
},
"type": "function",
"index": tool_calls[tool_index]["index"],
},
call_type=LLMCallType.TOOL_CALL,
)
self._track_token_usage_internal(usage_data) self._track_token_usage_internal(usage_data)

View File

@@ -0,0 +1,371 @@
interactions:
- request:
body: '{"max_tokens":4096,"messages":[{"role":"user","content":"What is the temperature
in San Francisco?"}],"model":"claude-3-5-haiku-latest","tools":[{"name":"get_current_temperature","description":"Get
the current temperature in a city.","input_schema":{"type":"object","properties":{"city":{"type":"string","description":"The
name of the city to get the temperature for."}},"required":["city"]}}],"stream":true}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '408'
content-type:
- application/json
host:
- api.anthropic.com
x-api-key:
- X-API-KEY-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 0.71.1
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.10
x-stainless-stream-helper:
- messages
x-stainless-timeout:
- NOT_GIVEN
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: 'event: message_start
data: {"type":"message_start","message":{"model":"claude-3-5-haiku-20241022","id":"msg_01JCJXSfyzkcecJUydp157cS","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":351,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":1,"service_tier":"standard"}} }
event: content_block_start
data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"I"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"''ll"} }
event: ping
data: {"type": "ping"}
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
help"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
you find out"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
the current temperature in San"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
Francisco. I"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"''ll"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
use the get"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"_current_temperature
function"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
to"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
retrieve"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
this"} }
event: content_block_delta
data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"
information."} }
event: content_block_stop
data: {"type":"content_block_stop","index":0}
event: content_block_start
data: {"type":"content_block_start","index":1,"content_block":{"type":"tool_use","id":"toolu_01Lfr3kUnHMZApePPRWMv1uS","name":"get_current_temperature","input":{}} }
event: content_block_delta
data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":""} }
event: content_block_delta
data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"{\"c"} }
event: content_block_delta
data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"ity\":"} }
event: content_block_delta
data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"
\"San Franci"} }
event: content_block_delta
data: {"type":"content_block_delta","index":1,"delta":{"type":"input_json_delta","partial_json":"sco\"}"} }
event: content_block_stop
data: {"type":"content_block_stop","index":1 }
event: message_delta
data: {"type":"message_delta","delta":{"stop_reason":"tool_use","stop_sequence":null},"usage":{"input_tokens":351,"cache_creation_input_tokens":0,"cache_read_input_tokens":0,"output_tokens":85}}
event: message_stop
data: {"type":"message_stop" }
'
headers:
CF-RAY:
- CF-RAY-XXX
Cache-Control:
- no-cache
Connection:
- keep-alive
Content-Type:
- text/event-stream; charset=utf-8
Date:
- Mon, 05 Jan 2026 16:04:31 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
anthropic-organization-id:
- ANTHROPIC-ORGANIZATION-ID-XXX
anthropic-ratelimit-input-tokens-limit:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-input-tokens-remaining:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-input-tokens-reset:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
anthropic-ratelimit-output-tokens-limit:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-output-tokens-remaining:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-output-tokens-reset:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2026-01-05T16:04:30Z'
anthropic-ratelimit-tokens-limit:
- ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
anthropic-ratelimit-tokens-remaining:
- ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
anthropic-ratelimit-tokens-reset:
- ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
cf-cache-status:
- DYNAMIC
request-id:
- REQUEST-ID-XXX
strict-transport-security:
- STS-XXX
x-envoy-upstream-service-time:
- '690'
status:
code: 200
message: OK
- request:
body: "{\"max_tokens\":4096,\"messages\":[{\"role\":\"user\",\"content\":\"What
is the temperature in San Francisco?\"},{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"I'll
help you find out the current temperature in San Francisco. I'll use the get_current_temperature
function to retrieve this information.\"},{\"type\":\"tool_use\",\"id\":\"toolu_01Lfr3kUnHMZApePPRWMv1uS\",\"name\":\"get_current_temperature\",\"input\":{\"city\":\"San
Francisco\"}}]},{\"role\":\"user\",\"content\":[{\"type\":\"tool_result\",\"tool_use_id\":\"toolu_01Lfr3kUnHMZApePPRWMv1uS\",\"content\":\"The
temperature in San Francisco is 72\xB0F\"}]}],\"model\":\"claude-3-5-haiku-latest\",\"stream\":true,\"tools\":[{\"name\":\"get_current_temperature\",\"description\":\"Get
the current temperature in a city.\",\"input_schema\":{\"type\":\"object\",\"properties\":{\"city\":{\"type\":\"string\",\"description\":\"The
name of the city to get the temperature for.\"}},\"required\":[\"city\"]}}]}"
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '883'
content-type:
- application/json
host:
- api.anthropic.com
x-api-key:
- X-API-KEY-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 0.71.1
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.10
x-stainless-timeout:
- NOT_GIVEN
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: "event: message_start\ndata: {\"type\":\"message_start\",\"message\":{\"model\":\"claude-3-5-haiku-20241022\",\"id\":\"msg_01XbRN6xwSPSLv6pWtB15EZs\",\"type\":\"message\",\"role\":\"assistant\",\"content\":[],\"stop_reason\":null,\"stop_sequence\":null,\"usage\":{\"input_tokens\":457,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"cache_creation\":{\"ephemeral_5m_input_tokens\":0,\"ephemeral_1h_input_tokens\":0},\"output_tokens\":2,\"service_tier\":\"standard\"}}
\ }\n\nevent: content_block_start\ndata: {\"type\":\"content_block_start\",\"index\":0,\"content_block\":{\"type\":\"text\",\"text\":\"\"}
}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"The\"}
\ }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"
current\"} }\n\nevent: ping\ndata: {\"type\": \"ping\"}\n\nevent:
content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"
temperature in San Francisco is\"} }\n\nevent: content_block_delta\ndata:
{\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"
72\xB0F.\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"
It\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"
sounds\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"
like a\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"
pleasant\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"
day!\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"
Is\"}}\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"
there anything else I can\"} }\n\nevent: content_block_delta\ndata:
{\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"
help\"} }\n\nevent: content_block_delta\ndata: {\"type\":\"content_block_delta\",\"index\":0,\"delta\":{\"type\":\"text_delta\",\"text\":\"
you with?\"} }\n\nevent: content_block_stop\ndata: {\"type\":\"content_block_stop\",\"index\":0
\ }\n\nevent: message_delta\ndata: {\"type\":\"message_delta\",\"delta\":{\"stop_reason\":\"end_turn\",\"stop_sequence\":null},\"usage\":{\"input_tokens\":457,\"cache_creation_input_tokens\":0,\"cache_read_input_tokens\":0,\"output_tokens\":33}
\ }\n\nevent: message_stop\ndata: {\"type\":\"message_stop\" }\n\n"
headers:
CF-RAY:
- CF-RAY-XXX
Cache-Control:
- no-cache
Connection:
- keep-alive
Content-Type:
- text/event-stream; charset=utf-8
Date:
- Mon, 05 Jan 2026 16:04:33 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
anthropic-organization-id:
- ANTHROPIC-ORGANIZATION-ID-XXX
anthropic-ratelimit-input-tokens-limit:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-input-tokens-remaining:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-input-tokens-reset:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
anthropic-ratelimit-output-tokens-limit:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-output-tokens-remaining:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-output-tokens-reset:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2026-01-05T16:04:32Z'
anthropic-ratelimit-tokens-limit:
- ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
anthropic-ratelimit-tokens-remaining:
- ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
anthropic-ratelimit-tokens-reset:
- ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
cf-cache-status:
- DYNAMIC
request-id:
- REQUEST-ID-XXX
strict-transport-security:
- STS-XXX
x-envoy-upstream-service-time:
- '532'
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,108 @@
interactions:
- request:
body: '{"messages": [{"role": "user", "content": "What is the temperature in San
Francisco?"}], "stream": true, "tool_choice": "auto", "tools": [{"function":
{"name": "get_current_temperature", "description": "Get the current temperature
in a city.", "parameters": {"type": "object", "properties": {"city": {"type":
"string", "description": "The name of the city to get the temperature for."}},
"required": ["city"]}}, "type": "function"}], "stream_options": {"include_usage":
true}}'
headers:
Accept:
- application/json
Connection:
- keep-alive
Content-Length:
- '476'
Content-Type:
- application/json
User-Agent:
- X-USER-AGENT-XXX
accept-encoding:
- ACCEPT-ENCODING-XXX
api-key:
- X-API-KEY-XXX
authorization:
- AUTHORIZATION-XXX
extra-parameters:
- pass-through
x-ms-client-request-id:
- X-MS-CLIENT-REQUEST-ID-XXX
method: POST
uri: https://fake-azure-endpoint.openai.azure.com/openai/deployments/gpt-4o-mini/chat/completions?api-version=2024-02-15-preview
response:
body:
string: 'data: {"choices":[],"created":0,"id":"","model":"","object":"","prompt_filter_results":[{"prompt_index":0,"content_filter_results":{"hate":{"filtered":false,"severity":"safe"},"jailbreak":{"filtered":false,"detected":false},"self_harm":{"filtered":false,"severity":"safe"},"sexual":{"filtered":false,"severity":"safe"},"violence":{"filtered":false,"severity":"safe"}}}]}
data: {"choices":[{"content_filter_results":{},"delta":{"content":null,"refusal":null,"role":"assistant","tool_calls":[{"function":{"arguments":"","name":"get_current_temperature"},"id":"call_e6RnREl4LBGp0PdkIf6bBioH","index":0,"type":"function"}]},"finish_reason":null,"index":0,"logprobs":null}],"created":1767630292,"id":"chatcmpl-Cuhfwc9oYO2rZ1Y2xInKelrARv7iC","model":"gpt-4o-mini-2024-07-18","obfuscation":"a","object":"chat.completion.chunk","system_fingerprint":"fp_f97eff32c5","usage":null}
data: {"choices":[{"content_filter_results":{},"delta":{"tool_calls":[{"function":{"arguments":"{\""},"index":0}]},"finish_reason":null,"index":0,"logprobs":null}],"created":1767630292,"id":"chatcmpl-Cuhfwc9oYO2rZ1Y2xInKelrARv7iC","model":"gpt-4o-mini-2024-07-18","obfuscation":"scYzCqI","object":"chat.completion.chunk","system_fingerprint":"fp_f97eff32c5","usage":null}
data: {"choices":[{"content_filter_results":{},"delta":{"tool_calls":[{"function":{"arguments":"city"},"index":0}]},"finish_reason":null,"index":0,"logprobs":null}],"created":1767630292,"id":"chatcmpl-Cuhfwc9oYO2rZ1Y2xInKelrARv7iC","model":"gpt-4o-mini-2024-07-18","obfuscation":"gtrknf","object":"chat.completion.chunk","system_fingerprint":"fp_f97eff32c5","usage":null}
data: {"choices":[{"content_filter_results":{},"delta":{"tool_calls":[{"function":{"arguments":"\":\""},"index":0}]},"finish_reason":null,"index":0,"logprobs":null}],"created":1767630292,"id":"chatcmpl-Cuhfwc9oYO2rZ1Y2xInKelrARv7iC","model":"gpt-4o-mini-2024-07-18","obfuscation":"Fgf3u","object":"chat.completion.chunk","system_fingerprint":"fp_f97eff32c5","usage":null}
data: {"choices":[{"content_filter_results":{},"delta":{"tool_calls":[{"function":{"arguments":"San"},"index":0}]},"finish_reason":null,"index":0,"logprobs":null}],"created":1767630292,"id":"chatcmpl-Cuhfwc9oYO2rZ1Y2xInKelrARv7iC","model":"gpt-4o-mini-2024-07-18","obfuscation":"Y11NWOp","object":"chat.completion.chunk","system_fingerprint":"fp_f97eff32c5","usage":null}
data: {"choices":[{"content_filter_results":{},"delta":{"tool_calls":[{"function":{"arguments":"
Francisco"},"index":0}]},"finish_reason":null,"index":0,"logprobs":null}],"created":1767630292,"id":"chatcmpl-Cuhfwc9oYO2rZ1Y2xInKelrARv7iC","model":"gpt-4o-mini-2024-07-18","obfuscation":"","object":"chat.completion.chunk","system_fingerprint":"fp_f97eff32c5","usage":null}
data: {"choices":[{"content_filter_results":{},"delta":{"tool_calls":[{"function":{"arguments":"\"}"},"index":0}]},"finish_reason":null,"index":0,"logprobs":null}],"created":1767630292,"id":"chatcmpl-Cuhfwc9oYO2rZ1Y2xInKelrARv7iC","model":"gpt-4o-mini-2024-07-18","obfuscation":"21nwlWJ","object":"chat.completion.chunk","system_fingerprint":"fp_f97eff32c5","usage":null}
data: {"choices":[{"content_filter_results":{},"delta":{},"finish_reason":"tool_calls","index":0,"logprobs":null}],"created":1767630292,"id":"chatcmpl-Cuhfwc9oYO2rZ1Y2xInKelrARv7iC","model":"gpt-4o-mini-2024-07-18","obfuscation":"lX7hrh76","object":"chat.completion.chunk","system_fingerprint":"fp_f97eff32c5","usage":null}
data: {"choices":[],"created":1767630292,"id":"chatcmpl-Cuhfwc9oYO2rZ1Y2xInKelrARv7iC","model":"gpt-4o-mini-2024-07-18","obfuscation":"hA2","object":"chat.completion.chunk","system_fingerprint":"fp_f97eff32c5","usage":{"completion_tokens":17,"completion_tokens_details":{"accepted_prediction_tokens":0,"audio_tokens":0,"reasoning_tokens":0,"rejected_prediction_tokens":0},"prompt_tokens":66,"prompt_tokens_details":{"audio_tokens":0,"cached_tokens":0},"total_tokens":83}}
data: [DONE]
'
headers:
Content-Type:
- text/event-stream; charset=utf-8
Date:
- Mon, 05 Jan 2026 16:24:52 GMT
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
apim-request-id:
- APIM-REQUEST-ID-XXX
azureml-model-session:
- AZUREML-MODEL-SESSION-XXX
x-accel-buffering:
- 'no'
x-content-type-options:
- X-CONTENT-TYPE-XXX
x-ms-client-request-id:
- X-MS-CLIENT-REQUEST-ID-XXX
x-ms-deployment-name:
- gpt-4o-mini
x-ms-rai-invoked:
- 'true'
x-ms-region:
- X-MS-REGION-XXX
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,67 @@
interactions:
- request:
body: '{"contents": [{"parts": [{"text": "What is the temperature in San Francisco?"}],
"role": "user"}], "tools": [{"functionDeclarations": [{"description": "Get the
current temperature in a city.", "name": "get_current_temperature", "parameters":
{"properties": {"city": {"description": "The name of the city to get the temperature
for.", "type": "STRING"}}, "required": ["city"], "type": "OBJECT"}}]}], "generationConfig":
{}}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- '*/*'
accept-encoding:
- ACCEPT-ENCODING-XXX
connection:
- keep-alive
content-length:
- '422'
content-type:
- application/json
host:
- generativelanguage.googleapis.com
x-goog-api-client:
- google-genai-sdk/1.49.0 gl-python/3.12.10
x-goog-api-key:
- X-GOOG-API-KEY-XXX
method: POST
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:streamGenerateContent?alt=sse
response:
body:
string: "data: {\"candidates\": [{\"content\": {\"parts\": [{\"functionCall\":
{\"name\": \"get_current_temperature\",\"args\": {\"city\": \"San Francisco\"}}}],\"role\":
\"model\"},\"finishReason\": \"STOP\"}],\"usageMetadata\": {\"promptTokenCount\":
36,\"candidatesTokenCount\": 8,\"totalTokenCount\": 44,\"promptTokensDetails\":
[{\"modality\": \"TEXT\",\"tokenCount\": 36}],\"candidatesTokensDetails\":
[{\"modality\": \"TEXT\",\"tokenCount\": 8}]},\"modelVersion\": \"gemini-2.0-flash\",\"responseId\":
\"h99badGPDrP-x_APraXUmAM\"}\r\n\r\n"
headers:
Alt-Svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
Content-Disposition:
- attachment
Content-Type:
- text/event-stream
Date:
- Mon, 05 Jan 2026 15:57:59 GMT
Server:
- scaffolding on HTTPServer2
Server-Timing:
- gfet4t7; dur=583
Transfer-Encoding:
- chunked
Vary:
- Origin
- X-Origin
- Referer
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
X-Frame-Options:
- X-FRAME-OPTIONS-XXX
X-XSS-Protection:
- '0'
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,68 @@
interactions:
- request:
body: '{"contents": [{"parts": [{"text": "What is the temperature in Paris and
London?"}], "role": "user"}], "tools": [{"functionDeclarations": [{"description":
"Get the current temperature in a city.", "name": "get_current_temperature",
"parameters": {"properties": {"city": {"description": "The name of the city
to get the temperature for.", "type": "STRING"}}, "required": ["city"], "type":
"OBJECT"}}]}], "generationConfig": {}}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- '*/*'
accept-encoding:
- ACCEPT-ENCODING-XXX
connection:
- keep-alive
content-length:
- '425'
content-type:
- application/json
host:
- generativelanguage.googleapis.com
x-goog-api-client:
- google-genai-sdk/1.49.0 gl-python/3.12.10
x-goog-api-key:
- X-GOOG-API-KEY-XXX
method: POST
uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:streamGenerateContent?alt=sse
response:
body:
string: "data: {\"candidates\": [{\"content\": {\"parts\": [{\"functionCall\":
{\"name\": \"get_current_temperature\",\"args\": {\"city\": \"Paris\"}}},{\"functionCall\":
{\"name\": \"get_current_temperature\",\"args\": {\"city\": \"London\"}}}],\"role\":
\"model\"},\"finishReason\": \"STOP\"}],\"usageMetadata\": {\"promptTokenCount\":
37,\"candidatesTokenCount\": 14,\"totalTokenCount\": 51,\"promptTokensDetails\":
[{\"modality\": \"TEXT\",\"tokenCount\": 37}],\"candidatesTokensDetails\":
[{\"modality\": \"TEXT\",\"tokenCount\": 14}]},\"modelVersion\": \"gemini-2.0-flash\",\"responseId\":
\"h99baZTLOoSShMIPgYaAgQw\"}\r\n\r\n"
headers:
Alt-Svc:
- h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
Content-Disposition:
- attachment
Content-Type:
- text/event-stream
Date:
- Mon, 05 Jan 2026 15:58:00 GMT
Server:
- scaffolding on HTTPServer2
Server-Timing:
- gfet4t7; dur=960
Transfer-Encoding:
- chunked
Vary:
- Origin
- X-Origin
- Referer
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
X-Frame-Options:
- X-FRAME-OPTIONS-XXX
X-XSS-Protection:
- '0'
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,131 @@
interactions:
- request:
body: '{"messages":[{"role":"user","content":"What is the temperature in San Francisco?"}],"model":"gpt-4o-mini","stream":true,"stream_options":{"include_usage":true},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_temperature","description":"Get
the current temperature in a city.","parameters":{"type":"object","properties":{"city":{"type":"string","description":"The
name of the city to get the temperature for."}},"required":["city"]}}}]}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '468'
content-type:
- application/json
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.10
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: 'data: {"id":"chatcmpl-CugUbFnMkpXISLZPDmla5Pi4j8yng","object":"chat.completion.chunk","created":1767625745,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_3kB8meBh6OQYxf3Ch6K6aS7X","type":"function","function":{"name":"get_current_temperature","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"E1uB1Z7e"}
data: {"id":"chatcmpl-CugUbFnMkpXISLZPDmla5Pi4j8yng","object":"chat.completion.chunk","created":1767625745,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"WfA8lJUdnDG3wX"}
data: {"id":"chatcmpl-CugUbFnMkpXISLZPDmla5Pi4j8yng","object":"chat.completion.chunk","created":1767625745,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"city"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"q8i16eqGFPM92"}
data: {"id":"chatcmpl-CugUbFnMkpXISLZPDmla5Pi4j8yng","object":"chat.completion.chunk","created":1767625745,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"BTem15zkzsoy"}
data: {"id":"chatcmpl-CugUbFnMkpXISLZPDmla5Pi4j8yng","object":"chat.completion.chunk","created":1767625745,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"cTpMhY3sI6NiHw"}
data: {"id":"chatcmpl-CugUbFnMkpXISLZPDmla5Pi4j8yng","object":"chat.completion.chunk","created":1767625745,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
Francisco"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"RHpsStT"}
data: {"id":"chatcmpl-CugUbFnMkpXISLZPDmla5Pi4j8yng","object":"chat.completion.chunk","created":1767625745,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"SXQ7dOpJWPNo41"}
data: {"id":"chatcmpl-CugUbFnMkpXISLZPDmla5Pi4j8yng","object":"chat.completion.chunk","created":1767625745,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null,"obfuscation":"r8asNT7VjB8B67A"}
data: {"id":"chatcmpl-CugUbFnMkpXISLZPDmla5Pi4j8yng","object":"chat.completion.chunk","created":1767625745,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[],"usage":{"prompt_tokens":66,"completion_tokens":16,"total_tokens":82,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"D6wMV9IHqp"}
data: [DONE]
'
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- text/event-stream; charset=utf-8
Date:
- Mon, 05 Jan 2026 15:09:05 GMT
Server:
- cloudflare
Set-Cookie:
- SET-COOKIE-XXX
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '474'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-envoy-upstream-service-time:
- '488'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,131 @@
interactions:
- request:
body: '{"messages":[{"role":"user","content":"What is the temperature in San Francisco?"}],"model":"gpt-4o-mini","stream":true,"stream_options":{"include_usage":true},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_temperature","description":"Get
the current temperature in a city.","parameters":{"type":"object","properties":{"city":{"type":"string","description":"The
name of the city to get the temperature for."}},"required":["city"]}}}]}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '468'
content-type:
- application/json
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.10
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: 'data: {"id":"chatcmpl-CugUcrVnIGFI01Ty76IqBP4iwcdk1","object":"chat.completion.chunk","created":1767625746,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_QXZLQbxriC1eBnOMXLPMopfe","type":"function","function":{"name":"get_current_temperature","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"ncD7jNXK"}
data: {"id":"chatcmpl-CugUcrVnIGFI01Ty76IqBP4iwcdk1","object":"chat.completion.chunk","created":1767625746,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"AmzMaEKhB232Mr"}
data: {"id":"chatcmpl-CugUcrVnIGFI01Ty76IqBP4iwcdk1","object":"chat.completion.chunk","created":1767625746,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"city"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"xfJ8TboQmMJCA"}
data: {"id":"chatcmpl-CugUcrVnIGFI01Ty76IqBP4iwcdk1","object":"chat.completion.chunk","created":1767625746,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"iS6dOaTHSzht"}
data: {"id":"chatcmpl-CugUcrVnIGFI01Ty76IqBP4iwcdk1","object":"chat.completion.chunk","created":1767625746,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"F7li6njWQE87IY"}
data: {"id":"chatcmpl-CugUcrVnIGFI01Ty76IqBP4iwcdk1","object":"chat.completion.chunk","created":1767625746,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
Francisco"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"EaofAx0"}
data: {"id":"chatcmpl-CugUcrVnIGFI01Ty76IqBP4iwcdk1","object":"chat.completion.chunk","created":1767625746,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"YNoAewLIjPGgbm"}
data: {"id":"chatcmpl-CugUcrVnIGFI01Ty76IqBP4iwcdk1","object":"chat.completion.chunk","created":1767625746,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null,"obfuscation":"DyMKmTz1cyhwt3H"}
data: {"id":"chatcmpl-CugUcrVnIGFI01Ty76IqBP4iwcdk1","object":"chat.completion.chunk","created":1767625746,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[],"usage":{"prompt_tokens":66,"completion_tokens":16,"total_tokens":82,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"svCxdxouSj"}
data: [DONE]
'
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- text/event-stream; charset=utf-8
Date:
- Mon, 05 Jan 2026 15:09:07 GMT
Server:
- cloudflare
Set-Cookie:
- SET-COOKIE-XXX
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '650'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-envoy-upstream-service-time:
- '692'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,131 @@
interactions:
- request:
body: '{"messages":[{"role":"user","content":"What is the temperature in San Francisco?"}],"model":"gpt-4o-mini","stream":true,"stream_options":{"include_usage":true},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_temperature","description":"Get
the current temperature in a city.","parameters":{"type":"object","properties":{"city":{"type":"string","description":"The
name of the city to get the temperature for."}},"required":["city"]}}}]}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '468'
content-type:
- application/json
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.10
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: 'data: {"id":"chatcmpl-CugUfGwROKOfstuAzKnqcsX3yWA90","object":"chat.completion.chunk","created":1767625749,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_ZxE8mQ4FdO733hdMe8iW7mBH","type":"function","function":{"name":"get_current_temperature","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"2yD9IR8j"}
data: {"id":"chatcmpl-CugUfGwROKOfstuAzKnqcsX3yWA90","object":"chat.completion.chunk","created":1767625749,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"HT2u4m0HdAcZFq"}
data: {"id":"chatcmpl-CugUfGwROKOfstuAzKnqcsX3yWA90","object":"chat.completion.chunk","created":1767625749,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"city"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"O5f277ricHatr"}
data: {"id":"chatcmpl-CugUfGwROKOfstuAzKnqcsX3yWA90","object":"chat.completion.chunk","created":1767625749,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"mLTrMr1JtCBJ"}
data: {"id":"chatcmpl-CugUfGwROKOfstuAzKnqcsX3yWA90","object":"chat.completion.chunk","created":1767625749,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"siz0LLU1Gv7jC1"}
data: {"id":"chatcmpl-CugUfGwROKOfstuAzKnqcsX3yWA90","object":"chat.completion.chunk","created":1767625749,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
Francisco"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"OGOJJYA"}
data: {"id":"chatcmpl-CugUfGwROKOfstuAzKnqcsX3yWA90","object":"chat.completion.chunk","created":1767625749,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"wZT1SejqluCrAY"}
data: {"id":"chatcmpl-CugUfGwROKOfstuAzKnqcsX3yWA90","object":"chat.completion.chunk","created":1767625749,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null,"obfuscation":"YNlwGCa5JWewnZy"}
data: {"id":"chatcmpl-CugUfGwROKOfstuAzKnqcsX3yWA90","object":"chat.completion.chunk","created":1767625749,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[],"usage":{"prompt_tokens":66,"completion_tokens":16,"total_tokens":82,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"4Fk4xNw3lV"}
data: [DONE]
'
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- text/event-stream; charset=utf-8
Date:
- Mon, 05 Jan 2026 15:09:10 GMT
Server:
- cloudflare
Set-Cookie:
- SET-COOKIE-XXX
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '683'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-envoy-upstream-service-time:
- '698'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,131 @@
interactions:
- request:
body: '{"messages":[{"role":"user","content":"What is the temperature in San Francisco?"}],"model":"gpt-4o-mini","stream":true,"stream_options":{"include_usage":true},"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_current_temperature","description":"Get
the current temperature in a city.","parameters":{"type":"object","properties":{"city":{"type":"string","description":"The
name of the city to get the temperature for."}},"required":["city"]}}}]}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '468'
content-type:
- application/json
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.10
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: 'data: {"id":"chatcmpl-CugUdqZiFd9Y6Kq1E9zniCoa0uwHM","object":"chat.completion.chunk","created":1767625747,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_ACVuyKtLn299YJUkoH9RWxks","type":"function","function":{"name":"get_current_temperature","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"p96OKjJc"}
data: {"id":"chatcmpl-CugUdqZiFd9Y6Kq1E9zniCoa0uwHM","object":"chat.completion.chunk","created":1767625747,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"RoT4saRoTqVqK9"}
data: {"id":"chatcmpl-CugUdqZiFd9Y6Kq1E9zniCoa0uwHM","object":"chat.completion.chunk","created":1767625747,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"city"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"UnjRIiaNmkXxG"}
data: {"id":"chatcmpl-CugUdqZiFd9Y6Kq1E9zniCoa0uwHM","object":"chat.completion.chunk","created":1767625747,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"OUJpwmX8Y5xm"}
data: {"id":"chatcmpl-CugUdqZiFd9Y6Kq1E9zniCoa0uwHM","object":"chat.completion.chunk","created":1767625747,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"San"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"DBXFz5gGQyitfE"}
data: {"id":"chatcmpl-CugUdqZiFd9Y6Kq1E9zniCoa0uwHM","object":"chat.completion.chunk","created":1767625747,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"
Francisco"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"LSJ3CF3"}
data: {"id":"chatcmpl-CugUdqZiFd9Y6Kq1E9zniCoa0uwHM","object":"chat.completion.chunk","created":1767625747,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"KUrpUnjMA8Rwhi"}
data: {"id":"chatcmpl-CugUdqZiFd9Y6Kq1E9zniCoa0uwHM","object":"chat.completion.chunk","created":1767625747,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null,"obfuscation":"Kycqgm00aFnjf9a"}
data: {"id":"chatcmpl-CugUdqZiFd9Y6Kq1E9zniCoa0uwHM","object":"chat.completion.chunk","created":1767625747,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[],"usage":{"prompt_tokens":66,"completion_tokens":16,"total_tokens":82,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"UoTa3DaYLG"}
data: [DONE]
'
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- text/event-stream; charset=utf-8
Date:
- Mon, 05 Jan 2026 15:09:08 GMT
Server:
- cloudflare
Set-Cookie:
- SET-COOKIE-XXX
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '509'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
x-envoy-upstream-service-time:
- '524'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,324 @@
"""Tests for tool call streaming events across LLM providers.
These tests verify that when streaming is enabled and the LLM makes a tool call,
the stream chunk events include proper tool call information with
call_type=LLMCallType.TOOL_CALL.
"""
from typing import Any
from unittest.mock import MagicMock, patch
import pytest
from crewai.events.types.llm_events import LLMCallType, LLMStreamChunkEvent, ToolCall
from crewai.llm import LLM
@pytest.fixture
def get_temperature_tool_schema() -> dict[str, Any]:
"""Create a temperature tool schema for native function calling."""
return {
"type": "function",
"function": {
"name": "get_current_temperature",
"description": "Get the current temperature in a city.",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "The name of the city to get the temperature for.",
}
},
"required": ["city"],
},
},
}
@pytest.fixture
def mock_emit() -> MagicMock:
"""Mock the event bus emit function."""
from crewai.events.event_bus import CrewAIEventsBus
with patch.object(CrewAIEventsBus, "emit") as mock:
yield mock
def get_tool_call_events(mock_emit: MagicMock) -> list[LLMStreamChunkEvent]:
"""Extract tool call streaming events from mock emit calls."""
tool_call_events = []
for call in mock_emit.call_args_list:
event = call[1].get("event") if len(call) > 1 else None
if isinstance(event, LLMStreamChunkEvent) and event.call_type == LLMCallType.TOOL_CALL:
tool_call_events.append(event)
return tool_call_events
def get_all_stream_events(mock_emit: MagicMock) -> list[LLMStreamChunkEvent]:
"""Extract all streaming events from mock emit calls."""
stream_events = []
for call in mock_emit.call_args_list:
event = call[1].get("event") if len(call) > 1 else None
if isinstance(event, LLMStreamChunkEvent):
stream_events.append(event)
return stream_events
class TestOpenAIToolCallStreaming:
"""Tests for OpenAI provider tool call streaming events."""
@pytest.mark.vcr()
def test_openai_streaming_emits_tool_call_events(
self, get_temperature_tool_schema: dict[str, Any], mock_emit: MagicMock
) -> None:
"""Test that OpenAI streaming emits tool call events with correct call_type."""
llm = LLM(model="openai/gpt-4o-mini", stream=True)
llm.call(
messages=[
{"role": "user", "content": "What is the temperature in San Francisco?"},
],
tools=[get_temperature_tool_schema],
available_functions={
"get_current_temperature": lambda city: f"The temperature in {city} is 72°F"
},
)
tool_call_events = get_tool_call_events(mock_emit)
assert len(tool_call_events) > 0, "Should receive tool call streaming events"
first_tool_call_event = tool_call_events[0]
assert first_tool_call_event.call_type == LLMCallType.TOOL_CALL
assert first_tool_call_event.tool_call is not None
assert isinstance(first_tool_call_event.tool_call, ToolCall)
assert first_tool_call_event.tool_call.function is not None
assert first_tool_call_event.tool_call.function.name == "get_current_temperature"
assert first_tool_call_event.tool_call.type == "function"
assert first_tool_call_event.tool_call.index >= 0
class TestToolCallStreamingEventStructure:
"""Tests for the structure and content of tool call streaming events."""
@pytest.mark.vcr()
def test_tool_call_event_accumulates_arguments(
self, get_temperature_tool_schema: dict[str, Any], mock_emit: MagicMock
) -> None:
"""Test that tool call events accumulate arguments progressively."""
llm = LLM(model="openai/gpt-4o-mini", stream=True)
llm.call(
messages=[
{"role": "user", "content": "What is the temperature in San Francisco?"},
],
tools=[get_temperature_tool_schema],
available_functions={
"get_current_temperature": lambda city: f"The temperature in {city} is 72°F"
},
)
tool_call_events = get_tool_call_events(mock_emit)
assert len(tool_call_events) >= 2, "Should receive multiple tool call streaming events"
for evt in tool_call_events:
assert evt.tool_call is not None
assert evt.tool_call.function is not None
@pytest.mark.vcr()
def test_tool_call_events_have_consistent_tool_id(
self, get_temperature_tool_schema: dict[str, Any], mock_emit: MagicMock
) -> None:
"""Test that all events for the same tool call have the same tool ID."""
llm = LLM(model="openai/gpt-4o-mini", stream=True)
llm.call(
messages=[
{"role": "user", "content": "What is the temperature in San Francisco?"},
],
tools=[get_temperature_tool_schema],
available_functions={
"get_current_temperature": lambda city: f"The temperature in {city} is 72°F"
},
)
tool_call_events = get_tool_call_events(mock_emit)
assert len(tool_call_events) >= 1, "Should receive tool call streaming events"
if len(tool_call_events) > 1:
events_by_index: dict[int, list[LLMStreamChunkEvent]] = {}
for evt in tool_call_events:
if evt.tool_call is not None:
idx = evt.tool_call.index
if idx not in events_by_index:
events_by_index[idx] = []
events_by_index[idx].append(evt)
for idx, evts in events_by_index.items():
ids = [
e.tool_call.id
for e in evts
if e.tool_call is not None and e.tool_call.id
]
if ids:
assert len(set(ids)) == 1, f"Tool call ID should be consistent for index {idx}"
class TestMixedStreamingEvents:
"""Tests for scenarios with both text and tool call streaming events."""
@pytest.mark.vcr()
def test_streaming_distinguishes_text_and_tool_calls(
self, get_temperature_tool_schema: dict[str, Any], mock_emit: MagicMock
) -> None:
"""Test that streaming correctly distinguishes between text chunks and tool calls."""
llm = LLM(model="openai/gpt-4o-mini", stream=True)
llm.call(
messages=[
{"role": "user", "content": "What is the temperature in San Francisco?"},
],
tools=[get_temperature_tool_schema],
available_functions={
"get_current_temperature": lambda city: f"The temperature in {city} is 72°F"
},
)
all_events = get_all_stream_events(mock_emit)
tool_call_events = get_tool_call_events(mock_emit)
assert len(all_events) >= 1, "Should receive streaming events"
for event in tool_call_events:
assert event.call_type == LLMCallType.TOOL_CALL
assert event.tool_call is not None
class TestGeminiToolCallStreaming:
"""Tests for Gemini provider tool call streaming events."""
@pytest.mark.vcr()
def test_gemini_streaming_emits_tool_call_events(
self, get_temperature_tool_schema: dict[str, Any], mock_emit: MagicMock
) -> None:
"""Test that Gemini streaming emits tool call events with correct call_type."""
llm = LLM(model="gemini/gemini-2.0-flash", stream=True)
llm.call(
messages=[
{"role": "user", "content": "What is the temperature in San Francisco?"},
],
tools=[get_temperature_tool_schema],
available_functions={
"get_current_temperature": lambda city: f"The temperature in {city} is 72°F"
},
)
tool_call_events = get_tool_call_events(mock_emit)
assert len(tool_call_events) > 0, "Should receive tool call streaming events"
first_tool_call_event = tool_call_events[0]
assert first_tool_call_event.call_type == LLMCallType.TOOL_CALL
assert first_tool_call_event.tool_call is not None
assert isinstance(first_tool_call_event.tool_call, ToolCall)
assert first_tool_call_event.tool_call.function is not None
assert first_tool_call_event.tool_call.function.name == "get_current_temperature"
assert first_tool_call_event.tool_call.type == "function"
@pytest.mark.vcr()
def test_gemini_streaming_multiple_tool_calls_unique_ids(
self, get_temperature_tool_schema: dict[str, Any], mock_emit: MagicMock
) -> None:
"""Test that Gemini streaming assigns unique IDs to multiple tool calls."""
llm = LLM(model="gemini/gemini-2.0-flash", stream=True)
llm.call(
messages=[
{"role": "user", "content": "What is the temperature in Paris and London?"},
],
tools=[get_temperature_tool_schema],
available_functions={
"get_current_temperature": lambda city: f"The temperature in {city} is 72°F"
},
)
tool_call_events = get_tool_call_events(mock_emit)
assert len(tool_call_events) >= 2, "Should receive at least 2 tool call events"
tool_ids = [
evt.tool_call.id
for evt in tool_call_events
if evt.tool_call is not None and evt.tool_call.id
]
assert len(set(tool_ids)) >= 2, "Each tool call should have a unique ID"
class TestAzureToolCallStreaming:
"""Tests for Azure provider tool call streaming events."""
@pytest.mark.vcr()
def test_azure_streaming_emits_tool_call_events(
self, get_temperature_tool_schema: dict[str, Any], mock_emit: MagicMock
) -> None:
"""Test that Azure streaming emits tool call events with correct call_type."""
llm = LLM(model="azure/gpt-4o-mini", stream=True)
llm.call(
messages=[
{"role": "user", "content": "What is the temperature in San Francisco?"},
],
tools=[get_temperature_tool_schema],
available_functions={
"get_current_temperature": lambda city: f"The temperature in {city} is 72°F"
},
)
tool_call_events = get_tool_call_events(mock_emit)
assert len(tool_call_events) > 0, "Should receive tool call streaming events"
first_tool_call_event = tool_call_events[0]
assert first_tool_call_event.call_type == LLMCallType.TOOL_CALL
assert first_tool_call_event.tool_call is not None
assert isinstance(first_tool_call_event.tool_call, ToolCall)
assert first_tool_call_event.tool_call.function is not None
assert first_tool_call_event.tool_call.function.name == "get_current_temperature"
assert first_tool_call_event.tool_call.type == "function"
class TestAnthropicToolCallStreaming:
"""Tests for Anthropic provider tool call streaming events."""
@pytest.mark.vcr()
def test_anthropic_streaming_emits_tool_call_events(
self, get_temperature_tool_schema: dict[str, Any], mock_emit: MagicMock
) -> None:
"""Test that Anthropic streaming emits tool call events with correct call_type."""
llm = LLM(model="anthropic/claude-3-5-haiku-latest", stream=True)
llm.call(
messages=[
{"role": "user", "content": "What is the temperature in San Francisco?"},
],
tools=[get_temperature_tool_schema],
available_functions={
"get_current_temperature": lambda city: f"The temperature in {city} is 72°F"
},
)
tool_call_events = get_tool_call_events(mock_emit)
assert len(tool_call_events) > 0, "Should receive tool call streaming events"
first_tool_call_event = tool_call_events[0]
assert first_tool_call_event.call_type == LLMCallType.TOOL_CALL
assert first_tool_call_event.tool_call is not None
assert isinstance(first_tool_call_event.tool_call, ToolCall)
assert first_tool_call_event.tool_call.function is not None
assert first_tool_call_event.tool_call.function.name == "get_current_temperature"
assert first_tool_call_event.tool_call.type == "function"