diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index 657488098..f7cb76471 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -1580,10 +1580,12 @@ class AnthropicCompletion(BaseLLM): usage = response.usage input_tokens = getattr(usage, "input_tokens", 0) output_tokens = getattr(usage, "output_tokens", 0) + cache_read_tokens = getattr(usage, "cache_read_input_tokens", 0) or 0 return { "input_tokens": input_tokens, "output_tokens": output_tokens, "total_tokens": input_tokens + output_tokens, + "cached_prompt_tokens": cache_read_tokens, } return {"total_tokens": 0} diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index e7fd80844..00c10112d 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -425,8 +425,9 @@ class AzureCompletion(BaseLLM): "stream": self.stream, } + model_extras: dict[str, Any] = {} if self.stream: - params["model_extras"] = {"stream_options": {"include_usage": True}} + model_extras["stream_options"] = {"include_usage": True} if response_model and self.is_openai_model: model_description = generate_model_description(response_model) @@ -464,6 +465,13 @@ class AzureCompletion(BaseLLM): params["tools"] = self._convert_tools_for_interference(tools) params["tool_choice"] = "auto" + prompt_cache_key = self.additional_params.get("prompt_cache_key") + if prompt_cache_key: + model_extras["prompt_cache_key"] = prompt_cache_key + + if model_extras: + params["model_extras"] = model_extras + additional_params = self.additional_params additional_drop_params = additional_params.get("additional_drop_params") drop_params = additional_params.get("drop_params") @@ -1063,10 +1071,15 @@ class AzureCompletion(BaseLLM): """Extract token usage from Azure response.""" if hasattr(response, "usage") and response.usage: usage = response.usage + cached_tokens = 0 + prompt_details = getattr(usage, "prompt_tokens_details", None) + if prompt_details: + cached_tokens = getattr(prompt_details, "cached_tokens", 0) or 0 return { "prompt_tokens": getattr(usage, "prompt_tokens", 0), "completion_tokens": getattr(usage, "completion_tokens", 0), "total_tokens": getattr(usage, "total_tokens", 0), + "cached_prompt_tokens": cached_tokens, } return {"total_tokens": 0} diff --git a/lib/crewai/src/crewai/llms/providers/gemini/completion.py b/lib/crewai/src/crewai/llms/providers/gemini/completion.py index 0c00de96d..14603b7d2 100644 --- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py +++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py @@ -1295,11 +1295,13 @@ class GeminiCompletion(BaseLLM): """Extract token usage from Gemini response.""" if response.usage_metadata: usage = response.usage_metadata + cached_tokens = getattr(usage, "cached_content_token_count", 0) or 0 return { "prompt_token_count": getattr(usage, "prompt_token_count", 0), "candidates_token_count": getattr(usage, "candidates_token_count", 0), "total_token_count": getattr(usage, "total_token_count", 0), "total_tokens": getattr(usage, "total_token_count", 0), + "cached_prompt_tokens": cached_tokens, } return {"total_tokens": 0} diff --git a/lib/crewai/src/crewai/llms/providers/openai/completion.py b/lib/crewai/src/crewai/llms/providers/openai/completion.py index 22b9cda3b..871621ddb 100644 --- a/lib/crewai/src/crewai/llms/providers/openai/completion.py +++ b/lib/crewai/src/crewai/llms/providers/openai/completion.py @@ -1094,11 +1094,7 @@ class OpenAICompletion(BaseLLM): if reasoning_items: self._last_reasoning_items = reasoning_items if event.response and event.response.usage: - usage = { - "prompt_tokens": event.response.usage.input_tokens, - "completion_tokens": event.response.usage.output_tokens, - "total_tokens": event.response.usage.total_tokens, - } + usage = self._extract_responses_token_usage(event.response) self._track_token_usage_internal(usage) # If parse_tool_outputs is enabled, return structured result @@ -1222,11 +1218,7 @@ class OpenAICompletion(BaseLLM): if reasoning_items: self._last_reasoning_items = reasoning_items if event.response and event.response.usage: - usage = { - "prompt_tokens": event.response.usage.input_tokens, - "completion_tokens": event.response.usage.output_tokens, - "total_tokens": event.response.usage.total_tokens, - } + usage = self._extract_responses_token_usage(event.response) self._track_token_usage_internal(usage) # If parse_tool_outputs is enabled, return structured result @@ -1310,11 +1302,18 @@ class OpenAICompletion(BaseLLM): def _extract_responses_token_usage(self, response: Response) -> dict[str, Any]: """Extract token usage from Responses API response.""" if response.usage: - return { + result = { "prompt_tokens": response.usage.input_tokens, "completion_tokens": response.usage.output_tokens, "total_tokens": response.usage.total_tokens, } + # Extract cached prompt tokens from input_tokens_details + input_details = getattr(response.usage, "input_tokens_details", None) + if input_details: + result["cached_prompt_tokens"] = ( + getattr(input_details, "cached_tokens", 0) or 0 + ) + return result return {"total_tokens": 0} def _extract_builtin_tool_outputs(self, response: Response) -> ResponsesAPIResult: @@ -2264,11 +2263,18 @@ class OpenAICompletion(BaseLLM): """Extract token usage from OpenAI ChatCompletion or ChatCompletionChunk response.""" if hasattr(response, "usage") and response.usage: usage = response.usage - return { + result = { "prompt_tokens": getattr(usage, "prompt_tokens", 0), "completion_tokens": getattr(usage, "completion_tokens", 0), "total_tokens": getattr(usage, "total_tokens", 0), } + # Extract cached prompt tokens from prompt_tokens_details + prompt_details = getattr(usage, "prompt_tokens_details", None) + if prompt_details: + result["cached_prompt_tokens"] = ( + getattr(prompt_details, "cached_tokens", 0) or 0 + ) + return result return {"total_tokens": 0} def _format_messages(self, messages: str | list[LLMMessage]) -> list[LLMMessage]: diff --git a/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_cached_prompt_tokens.yaml b/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_cached_prompt_tokens.yaml new file mode 100644 index 000000000..51997fbed --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_cached_prompt_tokens.yaml @@ -0,0 +1,332 @@ +interactions: +- request: + body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"Say + hello in one word.","cache_control":{"type":"ephemeral"}}]}],"model":"claude-sonnet-4-5-20250929","stream":false,"system":"You + are a helpful assistant. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. "}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '5918' + content-type: + - application/json + host: + - api.anthropic.com + x-api-key: + - X-API-KEY-XXX + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 0.73.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: '{"model":"claude-sonnet-4-5-20250929","id":"msg_013xTaKq41TFn6drdxt1mFdx","type":"message","role":"assistant","content":[{"type":"text","text":"Hello!"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":0,"cache_read_input_tokens":1217,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":5,"service_tier":"standard","inference_geo":"not_available"}}' + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Security-Policy: + - CSP-FILTERED + Content-Type: + - application/json + Date: + - Tue, 10 Feb 2026 18:27:40 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - ANTHROPIC-ORGANIZATION-ID-XXX + anthropic-ratelimit-input-tokens-limit: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-input-tokens-remaining: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-input-tokens-reset: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX + anthropic-ratelimit-output-tokens-limit: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-output-tokens-remaining: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-output-tokens-reset: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX + anthropic-ratelimit-tokens-limit: + - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX + anthropic-ratelimit-tokens-remaining: + - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX + anthropic-ratelimit-tokens-reset: + - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX + cf-cache-status: + - DYNAMIC + request-id: + - REQUEST-ID-XXX + strict-transport-security: + - STS-XXX + x-envoy-upstream-service-time: + - '726' + status: + code: 200 + message: OK +- request: + body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"Say + goodbye in one word.","cache_control":{"type":"ephemeral"}}]}],"model":"claude-sonnet-4-5-20250929","stream":false,"system":"You + are a helpful assistant. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. "}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '5920' + content-type: + - application/json + host: + - api.anthropic.com + x-api-key: + - X-API-KEY-XXX + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 0.73.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: '{"model":"claude-sonnet-4-5-20250929","id":"msg_01LdueHX7nvf19wD8Uxn4EZD","type":"message","role":"assistant","content":[{"type":"text","text":"Goodbye"}],"stop_reason":"end_turn","stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":0,"cache_read_input_tokens":1217,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":5,"service_tier":"standard","inference_geo":"not_available"}}' + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Security-Policy: + - CSP-FILTERED + Content-Type: + - application/json + Date: + - Tue, 10 Feb 2026 18:27:41 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - ANTHROPIC-ORGANIZATION-ID-XXX + anthropic-ratelimit-input-tokens-limit: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-input-tokens-remaining: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-input-tokens-reset: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX + anthropic-ratelimit-output-tokens-limit: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-output-tokens-remaining: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-output-tokens-reset: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX + anthropic-ratelimit-tokens-limit: + - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX + anthropic-ratelimit-tokens-remaining: + - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX + anthropic-ratelimit-tokens-reset: + - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX + cf-cache-status: + - DYNAMIC + request-id: + - REQUEST-ID-XXX + strict-transport-security: + - STS-XXX + x-envoy-upstream-service-time: + - '759' + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_cached_prompt_tokens_with_tools.yaml b/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_cached_prompt_tokens_with_tools.yaml new file mode 100644 index 000000000..84e6549cf --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_cached_prompt_tokens_with_tools.yaml @@ -0,0 +1,336 @@ +interactions: +- request: + body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"What + is the weather in Tokyo?","cache_control":{"type":"ephemeral"}}]}],"model":"claude-sonnet-4-5-20250929","stream":false,"system":"You + are a helpful assistant that uses tools. This is padding text to ensure the + prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. ","tool_choice":{"type":"tool","name":"get_weather"},"tools":[{"name":"get_weather","description":"Get + the current weather for a location","input_schema":{"type":"object","properties":{"location":{"type":"string","description":"The + city name"}},"required":["location"]}}]}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '6211' + content-type: + - application/json + host: + - api.anthropic.com + x-api-key: + - X-API-KEY-XXX + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 0.73.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: '{"model":"claude-sonnet-4-5-20250929","id":"msg_01WhFk2ppoz43nbh4uNhXBfL","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01CX1yZuJ5MQaJbXNSrnCiqf","name":"get_weather","input":{"location":"Tokyo"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":24,"cache_creation_input_tokens":0,"cache_read_input_tokens":1857,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":33,"service_tier":"standard","inference_geo":"not_available"}}' + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Security-Policy: + - CSP-FILTERED + Content-Type: + - application/json + Date: + - Tue, 10 Feb 2026 18:27:38 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - ANTHROPIC-ORGANIZATION-ID-XXX + anthropic-ratelimit-input-tokens-limit: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-input-tokens-remaining: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-input-tokens-reset: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX + anthropic-ratelimit-output-tokens-limit: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-output-tokens-remaining: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-output-tokens-reset: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX + anthropic-ratelimit-tokens-limit: + - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX + anthropic-ratelimit-tokens-remaining: + - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX + anthropic-ratelimit-tokens-reset: + - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX + cf-cache-status: + - DYNAMIC + request-id: + - REQUEST-ID-XXX + strict-transport-security: + - STS-XXX + x-envoy-upstream-service-time: + - '1390' + status: + code: 200 + message: OK +- request: + body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"What + is the weather in Paris?","cache_control":{"type":"ephemeral"}}]}],"model":"claude-sonnet-4-5-20250929","stream":false,"system":"You + are a helpful assistant that uses tools. This is padding text to ensure the + prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. ","tool_choice":{"type":"tool","name":"get_weather"},"tools":[{"name":"get_weather","description":"Get + the current weather for a location","input_schema":{"type":"object","properties":{"location":{"type":"string","description":"The + city name"}},"required":["location"]}}]}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '6211' + content-type: + - application/json + host: + - api.anthropic.com + x-api-key: + - X-API-KEY-XXX + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 0.73.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: '{"model":"claude-sonnet-4-5-20250929","id":"msg_01Nmw5NyAEwCLGjpVnf15rh4","type":"message","role":"assistant","content":[{"type":"tool_use","id":"toolu_01DEe9K7N4EfhPFqxHhqEHCE","name":"get_weather","input":{"location":"Paris"}}],"stop_reason":"tool_use","stop_sequence":null,"usage":{"input_tokens":24,"cache_creation_input_tokens":0,"cache_read_input_tokens":1857,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":33,"service_tier":"standard","inference_geo":"not_available"}}' + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Security-Policy: + - CSP-FILTERED + Content-Type: + - application/json + Date: + - Tue, 10 Feb 2026 18:27:40 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - ANTHROPIC-ORGANIZATION-ID-XXX + anthropic-ratelimit-input-tokens-limit: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-input-tokens-remaining: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-input-tokens-reset: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX + anthropic-ratelimit-output-tokens-limit: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-output-tokens-remaining: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-output-tokens-reset: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX + anthropic-ratelimit-tokens-limit: + - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX + anthropic-ratelimit-tokens-remaining: + - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX + anthropic-ratelimit-tokens-reset: + - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX + cf-cache-status: + - DYNAMIC + request-id: + - REQUEST-ID-XXX + strict-transport-security: + - STS-XXX + x-envoy-upstream-service-time: + - '1259' + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_streaming_cached_prompt_tokens.yaml b/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_streaming_cached_prompt_tokens.yaml new file mode 100644 index 000000000..b1623d81c --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/anthropic/test_anthropic_streaming_cached_prompt_tokens.yaml @@ -0,0 +1,411 @@ +interactions: +- request: + body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"Say + hello in one word.","cache_control":{"type":"ephemeral"}}]}],"model":"claude-sonnet-4-5-20250929","system":"You + are a helpful assistant. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. ","stream":true}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '5917' + content-type: + - application/json + host: + - api.anthropic.com + x-api-key: + - X-API-KEY-XXX + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 0.73.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + x-stainless-stream-helper: + - messages + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: 'event: message_start + + data: {"type":"message_start","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01LshZroyEGgd3HfDrKdQMLm","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":0,"cache_read_input_tokens":1217,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":4,"service_tier":"standard","inference_geo":"not_available"}} } + + + event: content_block_start + + data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""} } + + + event: ping + + data: {"type": "ping"} + + + event: content_block_delta + + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Hello"} } + + + event: content_block_stop + + data: {"type":"content_block_stop","index":0 } + + + event: message_delta + + data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":3,"cache_creation_input_tokens":0,"cache_read_input_tokens":1217,"output_tokens":4} + } + + + event: message_stop + + data: {"type":"message_stop" } + + + ' + headers: + CF-RAY: + - CF-RAY-XXX + Cache-Control: + - no-cache + Connection: + - keep-alive + Content-Security-Policy: + - CSP-FILTERED + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Tue, 10 Feb 2026 18:27:43 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - ANTHROPIC-ORGANIZATION-ID-XXX + anthropic-ratelimit-input-tokens-limit: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-input-tokens-remaining: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-input-tokens-reset: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX + anthropic-ratelimit-output-tokens-limit: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-output-tokens-remaining: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-output-tokens-reset: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX + anthropic-ratelimit-tokens-limit: + - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX + anthropic-ratelimit-tokens-remaining: + - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX + anthropic-ratelimit-tokens-reset: + - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX + cf-cache-status: + - DYNAMIC + request-id: + - REQUEST-ID-XXX + strict-transport-security: + - STS-XXX + x-envoy-upstream-service-time: + - '837' + status: + code: 200 + message: OK +- request: + body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"Say + goodbye in one word.","cache_control":{"type":"ephemeral"}}]}],"model":"claude-sonnet-4-5-20250929","system":"You + are a helpful assistant. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. ","stream":true}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + anthropic-version: + - '2023-06-01' + connection: + - keep-alive + content-length: + - '5919' + content-type: + - application/json + host: + - api.anthropic.com + x-api-key: + - X-API-KEY-XXX + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 0.73.0 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + x-stainless-stream-helper: + - messages + x-stainless-timeout: + - NOT_GIVEN + method: POST + uri: https://api.anthropic.com/v1/messages + response: + body: + string: 'event: message_start + + data: {"type":"message_start","message":{"model":"claude-sonnet-4-5-20250929","id":"msg_01MZSWarEUbFXmek8aEpwKDu","type":"message","role":"assistant","content":[],"stop_reason":null,"stop_sequence":null,"usage":{"input_tokens":3,"cache_creation_input_tokens":0,"cache_read_input_tokens":1217,"cache_creation":{"ephemeral_5m_input_tokens":0,"ephemeral_1h_input_tokens":0},"output_tokens":6,"service_tier":"standard","inference_geo":"not_available"}} } + + + event: content_block_start + + data: {"type":"content_block_start","index":0,"content_block":{"type":"text","text":""}} + + + event: ping + + data: {"type": "ping"} + + + event: content_block_delta + + data: {"type":"content_block_delta","index":0,"delta":{"type":"text_delta","text":"Goodbye."} } + + + event: content_block_stop + + data: {"type":"content_block_stop","index":0 } + + + event: message_delta + + data: {"type":"message_delta","delta":{"stop_reason":"end_turn","stop_sequence":null},"usage":{"input_tokens":3,"cache_creation_input_tokens":0,"cache_read_input_tokens":1217,"output_tokens":6} } + + + event: message_stop + + data: {"type":"message_stop" } + + + ' + headers: + CF-RAY: + - CF-RAY-XXX + Cache-Control: + - no-cache + Connection: + - keep-alive + Content-Security-Policy: + - CSP-FILTERED + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Tue, 10 Feb 2026 18:27:44 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Robots-Tag: + - none + anthropic-organization-id: + - ANTHROPIC-ORGANIZATION-ID-XXX + anthropic-ratelimit-input-tokens-limit: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-input-tokens-remaining: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-input-tokens-reset: + - ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX + anthropic-ratelimit-output-tokens-limit: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX + anthropic-ratelimit-output-tokens-remaining: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX + anthropic-ratelimit-output-tokens-reset: + - ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX + anthropic-ratelimit-tokens-limit: + - ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX + anthropic-ratelimit-tokens-remaining: + - ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX + anthropic-ratelimit-tokens-reset: + - ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX + cf-cache-status: + - DYNAMIC + request-id: + - REQUEST-ID-XXX + strict-transport-security: + - STS-XXX + x-envoy-upstream-service-time: + - '870' + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/llms/google/test_gemini_cached_prompt_tokens.yaml b/lib/crewai/tests/cassettes/llms/google/test_gemini_cached_prompt_tokens.yaml new file mode 100644 index 000000000..44dd7934c --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/google/test_gemini_cached_prompt_tokens.yaml @@ -0,0 +1,266 @@ +interactions: +- request: + body: '{"contents": [{"parts": [{"text": "Say hello in one word."}], "role": "user"}], + "systemInstruction": {"parts": [{"text": "You are a helpful assistant. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + "}], "role": "user"}, "generationConfig": {}}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - '*/*' + accept-encoding: + - ACCEPT-ENCODING-XXX + connection: + - keep-alive + content-length: + - '5876' + content-type: + - application/json + host: + - generativelanguage.googleapis.com + x-goog-api-client: + - google-genai-sdk/1.49.0 gl-python/3.13.3 + x-goog-api-key: + - X-GOOG-API-KEY-XXX + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + body: + string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\": + [\n {\n \"text\": \"Hello\"\n }\n ],\n + \ \"role\": \"model\"\n },\n \"finishReason\": \"STOP\",\n + \ \"index\": 0\n }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\": + 1135,\n \"candidatesTokenCount\": 1,\n \"totalTokenCount\": 1158,\n + \ \"promptTokensDetails\": [\n {\n \"modality\": \"TEXT\",\n + \ \"tokenCount\": 1135\n }\n ],\n \"thoughtsTokenCount\": + 22\n },\n \"modelVersion\": \"gemini-2.5-flash\",\n \"responseId\": \"46GLaf60NYmY-8YP--PB6QE\"\n}\n" + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Type: + - application/json; charset=UTF-8 + Date: + - Tue, 10 Feb 2026 21:23:47 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=773 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + X-Frame-Options: + - X-FRAME-OPTIONS-XXX + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +- request: + body: '{"contents": [{"parts": [{"text": "Say goodbye in one word."}], "role": + "user"}], "systemInstruction": {"parts": [{"text": "You are a helpful assistant. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. "}], "role": "user"}, "generationConfig": {}}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - '*/*' + accept-encoding: + - ACCEPT-ENCODING-XXX + connection: + - keep-alive + content-length: + - '5878' + content-type: + - application/json + host: + - generativelanguage.googleapis.com + x-goog-api-client: + - google-genai-sdk/1.49.0 gl-python/3.13.3 + x-goog-api-key: + - X-GOOG-API-KEY-XXX + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + body: + string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\": + [\n {\n \"text\": \"Farewell.\"\n }\n ],\n + \ \"role\": \"model\"\n },\n \"finishReason\": \"STOP\",\n + \ \"index\": 0\n }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\": + 1135,\n \"candidatesTokenCount\": 3,\n \"totalTokenCount\": 1164,\n + \ \"promptTokensDetails\": [\n {\n \"modality\": \"TEXT\",\n + \ \"tokenCount\": 1135\n }\n ],\n \"thoughtsTokenCount\": + 26\n },\n \"modelVersion\": \"gemini-2.5-flash\",\n \"responseId\": \"5KGLafeeIv-G-8YP_MfPgAI\"\n}\n" + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Type: + - application/json; charset=UTF-8 + Date: + - Tue, 10 Feb 2026 21:23:48 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=662 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + X-Frame-Options: + - X-FRAME-OPTIONS-XXX + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/llms/google/test_gemini_cached_prompt_tokens_with_tools.yaml b/lib/crewai/tests/cassettes/llms/google/test_gemini_cached_prompt_tokens_with_tools.yaml new file mode 100644 index 000000000..728329fb7 --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/google/test_gemini_cached_prompt_tokens_with_tools.yaml @@ -0,0 +1,280 @@ +interactions: +- request: + body: '{"contents": [{"parts": [{"text": "What is the weather in Tokyo?"}], "role": + "user"}], "systemInstruction": {"parts": [{"text": "You are a helpful assistant + that uses tools. This is padding text to ensure the prompt is large enough for + caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. "}], "role": "user"}, "tools": [{"functionDeclarations": + [{"description": "Get the current weather for a location", "name": "get_weather", + "parameters_json_schema": {"type": "object", "properties": {"location": {"type": + "string", "description": "The city name"}}, "required": ["location"]}}]}], "generationConfig": + {}}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - '*/*' + accept-encoding: + - ACCEPT-ENCODING-XXX + connection: + - keep-alive + content-length: + - '6172' + content-type: + - application/json + host: + - generativelanguage.googleapis.com + x-goog-api-client: + - google-genai-sdk/1.49.0 gl-python/3.13.3 + x-goog-api-key: + - X-GOOG-API-KEY-XXX + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + body: + string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\": + [\n {\n \"functionCall\": {\n \"name\": \"get_weather\",\n + \ \"args\": {\n \"location\": \"Tokyo\"\n }\n + \ },\n \"thoughtSignature\": \"CpECAb4+9vvTFzaczX2PeZjKEs1f6+MRyTMz+xxqs37q0INQ6e0WLt1soet6CL/uzRML9LsycSeQTraXtXR8qcGj6dnrhKLpovpy8EkrtfK6P57PGpostE/UJ6TIKPlWi0pY1h2u9vyy5yGLzpp0PZM6d6f8rzV9uPFNM+onGvcFOdzghRZlHmYkQdbdpZaFQBAK6QFuh8oGbC0Ygrsk1guJo1YZaKtU5Rp/k2rJO61Obgq7aYEb7ACVx7DM9ZlVCun/PbXR4UolFeNPxNdwzC5AVvP7UKa2Cxi8dzQ8RNebtd39/gNO546XzADGZkpSqG6QF0S4IEsmB9FFCctN1evgKicgT2Qo+AR6BY8uzZyWkGQx\"\n + \ }\n ],\n \"role\": \"model\"\n },\n \"finishReason\": + \"STOP\",\n \"index\": 0,\n \"finishMessage\": \"Model generated + function call(s).\"\n }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\": + 1180,\n \"candidatesTokenCount\": 15,\n \"totalTokenCount\": 1253,\n + \ \"promptTokensDetails\": [\n {\n \"modality\": \"TEXT\",\n + \ \"tokenCount\": 1180\n }\n ],\n \"thoughtsTokenCount\": + 58\n },\n \"modelVersion\": \"gemini-2.5-flash\",\n \"responseId\": \"wHmLacb_GL-J-sAPn6azgAo\"\n}\n" + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Type: + - application/json; charset=UTF-8 + Date: + - Tue, 10 Feb 2026 18:32:32 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=755 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + X-Frame-Options: + - X-FRAME-OPTIONS-XXX + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +- request: + body: '{"contents": [{"parts": [{"text": "What is the weather in Paris?"}], "role": + "user"}], "systemInstruction": {"parts": [{"text": "You are a helpful assistant + that uses tools. This is padding text to ensure the prompt is large enough for + caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. "}], "role": "user"}, "tools": [{"functionDeclarations": + [{"description": "Get the current weather for a location", "name": "get_weather", + "parameters_json_schema": {"type": "object", "properties": {"location": {"type": + "string", "description": "The city name"}}, "required": ["location"]}}]}], "generationConfig": + {}}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - '*/*' + accept-encoding: + - ACCEPT-ENCODING-XXX + connection: + - keep-alive + content-length: + - '6172' + content-type: + - application/json + host: + - generativelanguage.googleapis.com + x-goog-api-client: + - google-genai-sdk/1.49.0 gl-python/3.13.3 + x-goog-api-key: + - X-GOOG-API-KEY-XXX + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent + response: + body: + string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\": + [\n {\n \"functionCall\": {\n \"name\": \"get_weather\",\n + \ \"args\": {\n \"location\": \"Paris\"\n }\n + \ },\n \"thoughtSignature\": \"CuMBAb4+9vurHOlMBPzqCtd/J0Q5jBhUq8dsk7xntqcTgwBcZ1KeX4F4UJ0rdfg1OLhDkOlOlELA/jBYxATT19QUvw0szvDBDml0PsTBXlt64o7oGVmOCjdiGPu71I9+sCYhlD3QXzwLdQdrvUIfVrB+kaGszmZi1KTIli+qD9ihueDYGY510ouKdfl31UipQEG990+qFJyXe3avVEh3Jo72iXr3Q4UczFdbKSTV4V4fjrokFaB7UqcYy1iuAB5vHRsxYFJeTCi+ddKzn700gbWbiJZUniKiE3QfdOK4A5S0woBDzV0=\"\n + \ }\n ],\n \"role\": \"model\"\n },\n \"finishReason\": + \"STOP\",\n \"index\": 0,\n \"finishMessage\": \"Model generated + function call(s).\"\n }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\": + 1180,\n \"candidatesTokenCount\": 15,\n \"totalTokenCount\": 1242,\n + \ \"promptTokensDetails\": [\n {\n \"modality\": \"TEXT\",\n + \ \"tokenCount\": 1180\n }\n ],\n \"thoughtsTokenCount\": + 47\n },\n \"modelVersion\": \"gemini-2.5-flash\",\n \"responseId\": \"wXmLadTiEri5jMcPk_6ZgAc\"\n}\n" + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Type: + - application/json; charset=UTF-8 + Date: + - Tue, 10 Feb 2026 18:32:33 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=881 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + X-Frame-Options: + - X-FRAME-OPTIONS-XXX + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_completions_cached_prompt_tokens.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_completions_cached_prompt_tokens.yaml new file mode 100644 index 000000000..5ec31bcea --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_completions_cached_prompt_tokens.yaml @@ -0,0 +1,356 @@ +interactions: +- request: + body: '{"messages":[{"role":"system","content":"You are a helpful assistant. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + "},{"role":"user","content":"Say hello in one word."}],"model":"gpt-4.1"}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '5823' + content-type: + - application/json + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D7mVhCCkdWfellaSmcNLOuu87BsqI\",\n \"object\": + \"chat.completion\",\n \"created\": 1770747141,\n \"model\": \"gpt-4.1-2025-04-14\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Hello!\",\n \"refusal\": null,\n + \ \"annotations\": []\n },\n \"logprobs\": null,\n \"finish_reason\": + \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": 1144,\n \"completion_tokens\": + 2,\n \"total_tokens\": 1146,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 1024,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_8b22347a3e\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Tue, 10 Feb 2026 18:12:22 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '469' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + set-cookie: + - SET-COOKIE-XXX + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":"You are a helpful assistant. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + "},{"role":"user","content":"Say goodbye in one word."}],"model":"gpt-4.1"}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '5825' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D7mViSYwB6eFFbBcp045uvPAO8m2e\",\n \"object\": + \"chat.completion\",\n \"created\": 1770747142,\n \"model\": \"gpt-4.1-2025-04-14\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Farewell.\",\n \"refusal\": + null,\n \"annotations\": []\n },\n \"logprobs\": null,\n + \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 1144,\n \"completion_tokens\": 3,\n \"total_tokens\": 1147,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_8b22347a3e\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Tue, 10 Feb 2026 18:12:22 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '468' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + set-cookie: + - SET-COOKIE-XXX + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_completions_cached_prompt_tokens_with_tools.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_completions_cached_prompt_tokens_with_tools.yaml new file mode 100644 index 000000000..25137d35f --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_completions_cached_prompt_tokens_with_tools.yaml @@ -0,0 +1,368 @@ +interactions: +- request: + body: '{"messages":[{"role":"system","content":"You are a helpful assistant that + uses tools. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. "},{"role":"user","content":"What is the weather in Tokyo?"}],"model":"gpt-4.1","tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_weather","description":"Get + the current weather for a location","strict":true,"parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city name"}},"required":["location"],"additionalProperties":false}}}]}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '6158' + content-type: + - application/json + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D7mVx3s1dI2SICWePwHVeWCDct2QG\",\n \"object\": + \"chat.completion\",\n \"created\": 1770747157,\n \"model\": \"gpt-4.1-2025-04-14\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n + \ \"id\": \"call_x9KzZUT3UYazEUJiRmE0PvaU\",\n \"type\": + \"function\",\n \"function\": {\n \"name\": \"get_weather\",\n + \ \"arguments\": \"{\\\"location\\\":\\\"Tokyo\\\"}\"\n }\n + \ }\n ],\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"tool_calls\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 1187,\n \"completion_tokens\": + 14,\n \"total_tokens\": 1201,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 1152,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_8b22347a3e\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Tue, 10 Feb 2026 18:12:37 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '645' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + set-cookie: + - SET-COOKIE-XXX + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":"You are a helpful assistant that + uses tools. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. "},{"role":"user","content":"What is the weather in Paris?"}],"model":"gpt-4.1","tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_weather","description":"Get + the current weather for a location","strict":true,"parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city name"}},"required":["location"],"additionalProperties":false}}}]}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '6158' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D7mVynM0Soyt3osUFrlF7tEyrj7jP\",\n \"object\": + \"chat.completion\",\n \"created\": 1770747158,\n \"model\": \"gpt-4.1-2025-04-14\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n + \ \"id\": \"call_k8rYmsdMcCWSRKqVDFItmJ8v\",\n \"type\": + \"function\",\n \"function\": {\n \"name\": \"get_weather\",\n + \ \"arguments\": \"{\\\"location\\\":\\\"Paris\\\"}\"\n }\n + \ }\n ],\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"tool_calls\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 1187,\n \"completion_tokens\": + 14,\n \"total_tokens\": 1201,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 1152,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_8b22347a3e\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Tue, 10 Feb 2026 18:12:38 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '749' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + set-cookie: + - SET-COOKIE-XXX + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_cached_prompt_tokens.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_cached_prompt_tokens.yaml new file mode 100644 index 000000000..32167dab9 --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_cached_prompt_tokens.yaml @@ -0,0 +1,520 @@ +interactions: +- request: + body: '{"input":[{"role":"user","content":"Say hello in one word."}],"model":"gpt-4.1","instructions":"You + are a helpful assistant. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. "}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '5807' + content-type: + - application/json + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: "{\n \"id\": \"resp_0b352452095088f800698b751350fc8196bd5d8b1a179d27e8\",\n + \ \"object\": \"response\",\n \"created_at\": 1770747155,\n \"status\": + \"completed\",\n \"background\": false,\n \"billing\": {\n \"payer\": + \"developer\"\n },\n \"completed_at\": 1770747155,\n \"error\": null,\n + \ \"frequency_penalty\": 0.0,\n \"incomplete_details\": null,\n \"instructions\": + \"You are a helpful assistant. This is padding text to ensure the prompt is + large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for + caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is + padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to + ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the + prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is + large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for + caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is + padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to + ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the + prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is + large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for + caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is + padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to + ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the + prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is + large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for + caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is + padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to + ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the + prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is + large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for + caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is + padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to + ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the + prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is + large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for + caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is + padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. \",\n \"max_output_tokens\": + null,\n \"max_tool_calls\": null,\n \"model\": \"gpt-4.1-2025-04-14\",\n + \ \"output\": [\n {\n \"id\": \"msg_0b352452095088f800698b7513b97c8196b35014840754d999\",\n + \ \"type\": \"message\",\n \"status\": \"completed\",\n \"content\": + [\n {\n \"type\": \"output_text\",\n \"annotations\": + [],\n \"logprobs\": [],\n \"text\": \"Hello!\"\n }\n + \ ],\n \"role\": \"assistant\"\n }\n ],\n \"parallel_tool_calls\": + true,\n \"presence_penalty\": 0.0,\n \"previous_response_id\": null,\n \"prompt_cache_key\": + null,\n \"prompt_cache_retention\": null,\n \"reasoning\": {\n \"effort\": + null,\n \"summary\": null\n },\n \"safety_identifier\": null,\n \"service_tier\": + \"default\",\n \"store\": true,\n \"temperature\": 1.0,\n \"text\": {\n + \ \"format\": {\n \"type\": \"text\"\n },\n \"verbosity\": \"medium\"\n + \ },\n \"tool_choice\": \"auto\",\n \"tools\": [],\n \"top_logprobs\": + 0,\n \"top_p\": 1.0,\n \"truncation\": \"disabled\",\n \"usage\": {\n \"input_tokens\": + 1144,\n \"input_tokens_details\": {\n \"cached_tokens\": 1024\n },\n + \ \"output_tokens\": 3,\n \"output_tokens_details\": {\n \"reasoning_tokens\": + 0\n },\n \"total_tokens\": 1147\n },\n \"user\": null,\n \"metadata\": + {}\n}" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Tue, 10 Feb 2026 18:12:35 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '637' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + set-cookie: + - SET-COOKIE-XXX + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: '{"input":[{"role":"user","content":"Say goodbye in one word."}],"model":"gpt-4.1","instructions":"You + are a helpful assistant. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. "}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '5809' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/responses + response: + body: + string: "{\n \"id\": \"resp_003a6f71f9ee620400698b75140a088196989e8d5641ffa74d\",\n + \ \"object\": \"response\",\n \"created_at\": 1770747156,\n \"status\": + \"completed\",\n \"background\": false,\n \"billing\": {\n \"payer\": + \"developer\"\n },\n \"completed_at\": 1770747156,\n \"error\": null,\n + \ \"frequency_penalty\": 0.0,\n \"incomplete_details\": null,\n \"instructions\": + \"You are a helpful assistant. This is padding text to ensure the prompt is + large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for + caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is + padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to + ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the + prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is + large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for + caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is + padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to + ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the + prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is + large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for + caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is + padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to + ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the + prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is + large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for + caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is + padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to + ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the + prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is + large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for + caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is + padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to + ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the + prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is + large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for + caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is + padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. \",\n \"max_output_tokens\": + null,\n \"max_tool_calls\": null,\n \"model\": \"gpt-4.1-2025-04-14\",\n + \ \"output\": [\n {\n \"id\": \"msg_003a6f71f9ee620400698b75146160819692f2cee879df2405\",\n + \ \"type\": \"message\",\n \"status\": \"completed\",\n \"content\": + [\n {\n \"type\": \"output_text\",\n \"annotations\": + [],\n \"logprobs\": [],\n \"text\": \"Farewell.\"\n }\n + \ ],\n \"role\": \"assistant\"\n }\n ],\n \"parallel_tool_calls\": + true,\n \"presence_penalty\": 0.0,\n \"previous_response_id\": null,\n \"prompt_cache_key\": + null,\n \"prompt_cache_retention\": null,\n \"reasoning\": {\n \"effort\": + null,\n \"summary\": null\n },\n \"safety_identifier\": null,\n \"service_tier\": + \"default\",\n \"store\": true,\n \"temperature\": 1.0,\n \"text\": {\n + \ \"format\": {\n \"type\": \"text\"\n },\n \"verbosity\": \"medium\"\n + \ },\n \"tool_choice\": \"auto\",\n \"tools\": [],\n \"top_logprobs\": + 0,\n \"top_p\": 1.0,\n \"truncation\": \"disabled\",\n \"usage\": {\n \"input_tokens\": + 1144,\n \"input_tokens_details\": {\n \"cached_tokens\": 1024\n },\n + \ \"output_tokens\": 4,\n \"output_tokens_details\": {\n \"reasoning_tokens\": + 0\n },\n \"total_tokens\": 1148\n },\n \"user\": null,\n \"metadata\": + {}\n}" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Tue, 10 Feb 2026 18:12:36 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '543' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + set-cookie: + - SET-COOKIE-XXX + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_cached_prompt_tokens_with_tools.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_cached_prompt_tokens_with_tools.yaml new file mode 100644 index 000000000..c0db4ef9c --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_responses_api_cached_prompt_tokens_with_tools.yaml @@ -0,0 +1,368 @@ +interactions: +- request: + body: '{"messages":[{"role":"system","content":"You are a helpful assistant that + uses tools. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. "},{"role":"user","content":"What is the weather in Tokyo?"}],"model":"gpt-4.1","tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_weather","description":"Get + the current weather for a location","strict":true,"parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city name"}},"required":["location"],"additionalProperties":false}}}]}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '6158' + content-type: + - application/json + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D7mXQCgT3p3ViImkiqDiZGqLREQtp\",\n \"object\": + \"chat.completion\",\n \"created\": 1770747248,\n \"model\": \"gpt-4.1-2025-04-14\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n + \ \"id\": \"call_9ZqMavn3J1fBnQEaqpYol0Bd\",\n \"type\": + \"function\",\n \"function\": {\n \"name\": \"get_weather\",\n + \ \"arguments\": \"{\\\"location\\\":\\\"Tokyo\\\"}\"\n }\n + \ }\n ],\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"tool_calls\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 1187,\n \"completion_tokens\": + 14,\n \"total_tokens\": 1201,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 1152,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_8b22347a3e\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Tue, 10 Feb 2026 18:14:08 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '484' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + set-cookie: + - SET-COOKIE-XXX + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":"You are a helpful assistant that + uses tools. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. "},{"role":"user","content":"What is the weather in Paris?"}],"model":"gpt-4.1","tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_weather","description":"Get + the current weather for a location","strict":true,"parameters":{"type":"object","properties":{"location":{"type":"string","description":"The + city name"}},"required":["location"],"additionalProperties":false}}}]}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '6158' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D7mXR8k9vk8TlGvGXlrQSI7iNeAN1\",\n \"object\": + \"chat.completion\",\n \"created\": 1770747249,\n \"model\": \"gpt-4.1-2025-04-14\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n + \ \"id\": \"call_6PeUBlRPG8JcV2lspmLjJbnn\",\n \"type\": + \"function\",\n \"function\": {\n \"name\": \"get_weather\",\n + \ \"arguments\": \"{\\\"location\\\":\\\"Paris\\\"}\"\n }\n + \ }\n ],\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"tool_calls\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 1187,\n \"completion_tokens\": + 14,\n \"total_tokens\": 1201,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 1152,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_8b22347a3e\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Tue, 10 Feb 2026 18:14:09 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '528' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + set-cookie: + - SET-COOKIE-XXX + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/llms/openai/test_openai_streaming_cached_prompt_tokens.yaml b/lib/crewai/tests/cassettes/llms/openai/test_openai_streaming_cached_prompt_tokens.yaml new file mode 100644 index 000000000..86ce69eb5 --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/openai/test_openai_streaming_cached_prompt_tokens.yaml @@ -0,0 +1,375 @@ +interactions: +- request: + body: '{"messages":[{"role":"system","content":"You are a helpful assistant. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + "},{"role":"user","content":"Say hello in one word."}],"model":"gpt-4.1","stream":true,"stream_options":{"include_usage":true}}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '5877' + content-type: + - application/json + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-D7mVuXauQqcmOCb3XP6IL6yHwJaAL","object":"chat.completion.chunk","created":1770747154,"model":"gpt-4.1-2025-04-14","service_tier":"default","system_fingerprint":"fp_8b22347a3e","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"lFWRn007xqlce"} + + + data: {"id":"chatcmpl-D7mVuXauQqcmOCb3XP6IL6yHwJaAL","object":"chat.completion.chunk","created":1770747154,"model":"gpt-4.1-2025-04-14","service_tier":"default","system_fingerprint":"fp_8b22347a3e","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"OXJHANtgvy"} + + + data: {"id":"chatcmpl-D7mVuXauQqcmOCb3XP6IL6yHwJaAL","object":"chat.completion.chunk","created":1770747154,"model":"gpt-4.1-2025-04-14","service_tier":"default","system_fingerprint":"fp_8b22347a3e","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"AZtd6jtoChevtm"} + + + data: {"id":"chatcmpl-D7mVuXauQqcmOCb3XP6IL6yHwJaAL","object":"chat.completion.chunk","created":1770747154,"model":"gpt-4.1-2025-04-14","service_tier":"default","system_fingerprint":"fp_8b22347a3e","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"irwn2mqyB"} + + + data: {"id":"chatcmpl-D7mVuXauQqcmOCb3XP6IL6yHwJaAL","object":"chat.completion.chunk","created":1770747154,"model":"gpt-4.1-2025-04-14","service_tier":"default","system_fingerprint":"fp_8b22347a3e","choices":[],"usage":{"prompt_tokens":1144,"completion_tokens":2,"total_tokens":1146,"prompt_tokens_details":{"cached_tokens":1024,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"W0rkiiZe"} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Tue, 10 Feb 2026 18:12:34 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '236' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + set-cookie: + - SET-COOKIE-XXX + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"system","content":"You are a helpful assistant. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + This is padding text to ensure the prompt is large enough for caching. This + is padding text to ensure the prompt is large enough for caching. This is padding + text to ensure the prompt is large enough for caching. This is padding text + to ensure the prompt is large enough for caching. This is padding text to ensure + the prompt is large enough for caching. This is padding text to ensure the prompt + is large enough for caching. This is padding text to ensure the prompt is large + enough for caching. This is padding text to ensure the prompt is large enough + for caching. This is padding text to ensure the prompt is large enough for caching. + "},{"role":"user","content":"Say goodbye in one word."}],"model":"gpt-4.1","stream":true,"stream_options":{"include_usage":true}}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '5879' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.13.3 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-D7mVuqaadwp22jFsp2qAKiE1utU3K","object":"chat.completion.chunk","created":1770747154,"model":"gpt-4.1-2025-04-14","service_tier":"default","system_fingerprint":"fp_8b22347a3e","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"pCjdYd4kX4W2q"} + + + data: {"id":"chatcmpl-D7mVuqaadwp22jFsp2qAKiE1utU3K","object":"chat.completion.chunk","created":1770747154,"model":"gpt-4.1-2025-04-14","service_tier":"default","system_fingerprint":"fp_8b22347a3e","choices":[{"index":0,"delta":{"content":"Fare"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"DJ94I8XQj86"} + + + data: {"id":"chatcmpl-D7mVuqaadwp22jFsp2qAKiE1utU3K","object":"chat.completion.chunk","created":1770747154,"model":"gpt-4.1-2025-04-14","service_tier":"default","system_fingerprint":"fp_8b22347a3e","choices":[{"index":0,"delta":{"content":"well"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"qgSSFwDBmaW"} + + + data: {"id":"chatcmpl-D7mVuqaadwp22jFsp2qAKiE1utU3K","object":"chat.completion.chunk","created":1770747154,"model":"gpt-4.1-2025-04-14","service_tier":"default","system_fingerprint":"fp_8b22347a3e","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"4xVBYer6Uy1atr"} + + + data: {"id":"chatcmpl-D7mVuqaadwp22jFsp2qAKiE1utU3K","object":"chat.completion.chunk","created":1770747154,"model":"gpt-4.1-2025-04-14","service_tier":"default","system_fingerprint":"fp_8b22347a3e","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"XxMhsMje0"} + + + data: {"id":"chatcmpl-D7mVuqaadwp22jFsp2qAKiE1utU3K","object":"chat.completion.chunk","created":1770747154,"model":"gpt-4.1-2025-04-14","service_tier":"default","system_fingerprint":"fp_8b22347a3e","choices":[],"usage":{"prompt_tokens":1144,"completion_tokens":3,"total_tokens":1147,"prompt_tokens_details":{"cached_tokens":1024,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"J3eKDOHW"} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Tue, 10 Feb 2026 18:12:34 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '296' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + set-cookie: + - SET-COOKIE-XXX + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/llms/anthropic/test_anthropic.py b/lib/crewai/tests/llms/anthropic/test_anthropic.py index c5ad5f273..129662ef3 100644 --- a/lib/crewai/tests/llms/anthropic/test_anthropic.py +++ b/lib/crewai/tests/llms/anthropic/test_anthropic.py @@ -990,3 +990,134 @@ def test_anthropic_agent_kickoff_structured_output_with_tools(): assert result.pydantic.result == 42, f"Expected result 42 but got {result.pydantic.result}" assert result.pydantic.operation, "Operation should not be empty" assert result.pydantic.explanation, "Explanation should not be empty" + + +@pytest.mark.vcr() +def test_anthropic_cached_prompt_tokens(): + """ + Test that Anthropic correctly extracts and tracks cached_prompt_tokens + from cache_read_input_tokens. Uses cache_control to enable prompt caching + and sends the same large prompt twice so the second call hits the cache. + """ + # Anthropic requires cache_control blocks and >=1024 tokens for caching + padding = "This is padding text to ensure the prompt is large enough for caching. " * 80 + system_msg = f"You are a helpful assistant. {padding}" + + llm = LLM(model="anthropic/claude-sonnet-4-5-20250929") + + def _ephemeral_user(text: str): + return [{"type": "text", "text": text, "cache_control": {"type": "ephemeral"}}] + + # First call: creates the cache + llm.call([ + {"role": "system", "content": system_msg}, + {"role": "user", "content": _ephemeral_user("Say hello in one word.")}, + ]) + + # Second call: same system prompt should hit the cache + llm.call([ + {"role": "system", "content": system_msg}, + {"role": "user", "content": _ephemeral_user("Say goodbye in one word.")}, + ]) + + usage = llm.get_token_usage_summary() + assert usage.total_tokens > 0 + assert usage.prompt_tokens > 0 + assert usage.completion_tokens > 0 + assert usage.successful_requests == 2 + # The second call should have cached prompt tokens + assert usage.cached_prompt_tokens > 0 + + +@pytest.mark.vcr() +def test_anthropic_streaming_cached_prompt_tokens(): + """ + Test that Anthropic streaming correctly extracts and tracks cached_prompt_tokens. + """ + padding = "This is padding text to ensure the prompt is large enough for caching. " * 80 + system_msg = f"You are a helpful assistant. {padding}" + + llm = LLM(model="anthropic/claude-sonnet-4-5-20250929", stream=True) + + def _ephemeral_user(text: str): + return [{"type": "text", "text": text, "cache_control": {"type": "ephemeral"}}] + + # First call: creates the cache + llm.call([ + {"role": "system", "content": system_msg}, + {"role": "user", "content": _ephemeral_user("Say hello in one word.")}, + ]) + + # Second call: same system prompt should hit the cache + llm.call([ + {"role": "system", "content": system_msg}, + {"role": "user", "content": _ephemeral_user("Say goodbye in one word.")}, + ]) + + usage = llm.get_token_usage_summary() + assert usage.total_tokens > 0 + assert usage.successful_requests == 2 + # The second call should have cached prompt tokens + assert usage.cached_prompt_tokens > 0 + + +@pytest.mark.vcr() +def test_anthropic_cached_prompt_tokens_with_tools(): + """ + Test that Anthropic correctly tracks cached_prompt_tokens when tools are used. + The large system prompt should be cached across tool-calling requests. + """ + padding = "This is padding text to ensure the prompt is large enough for caching. " * 80 + system_msg = f"You are a helpful assistant that uses tools. {padding}" + + def get_weather(location: str) -> str: + return f"The weather in {location} is sunny and 72°F" + + tools = [ + { + "name": "get_weather", + "description": "Get the current weather for a location", + "input_schema": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city name" + } + }, + "required": ["location"], + }, + } + ] + + llm = LLM(model="anthropic/claude-sonnet-4-5-20250929") + + def _ephemeral_user(text: str): + return [{"type": "text", "text": text, "cache_control": {"type": "ephemeral"}}] + + # First call with tool: creates the cache + llm.call( + [ + {"role": "system", "content": system_msg}, + {"role": "user", "content": _ephemeral_user("What is the weather in Tokyo?")}, + ], + tools=tools, + available_functions={"get_weather": get_weather}, + ) + + # Second call with same system prompt + tools: should hit the cache + llm.call( + [ + {"role": "system", "content": system_msg}, + {"role": "user", "content": _ephemeral_user("What is the weather in Paris?")}, + ], + tools=tools, + available_functions={"get_weather": get_weather}, + ) + + usage = llm.get_token_usage_summary() + assert usage.total_tokens > 0 + assert usage.prompt_tokens > 0 + assert usage.successful_requests == 2 + # The second call should have cached prompt tokens + assert usage.cached_prompt_tokens > 0 diff --git a/lib/crewai/tests/llms/azure/test_azure.py b/lib/crewai/tests/llms/azure/test_azure.py index 17a01bb56..d25b607a8 100644 --- a/lib/crewai/tests/llms/azure/test_azure.py +++ b/lib/crewai/tests/llms/azure/test_azure.py @@ -102,7 +102,6 @@ def test_azure_tool_use_conversation_flow(): # Verify that the API was called assert mock_complete.called - @pytest.mark.usefixtures("mock_azure_credentials") def test_azure_completion_module_is_imported(): """ diff --git a/lib/crewai/tests/llms/google/test_google.py b/lib/crewai/tests/llms/google/test_google.py index 1c3ed5ce6..3f86388d5 100644 --- a/lib/crewai/tests/llms/google/test_google.py +++ b/lib/crewai/tests/llms/google/test_google.py @@ -42,65 +42,6 @@ def test_gemini_completion_is_used_when_gemini_provider(): assert llm.provider == "gemini" assert llm.model == "gemini-2.0-flash-001" - - - -def test_gemini_tool_use_conversation_flow(): - """ - Test that the Gemini completion properly handles tool use conversation flow - """ - from unittest.mock import Mock, patch - from crewai.llms.providers.gemini.completion import GeminiCompletion - - # Create GeminiCompletion instance - completion = GeminiCompletion(model="gemini-2.0-flash-001") - - # Mock tool function - def mock_weather_tool(location: str) -> str: - return f"The weather in {location} is sunny and 75°F" - - available_functions = {"get_weather": mock_weather_tool} - - # Mock the Google Gemini client responses - with patch.object(completion.client.models, 'generate_content') as mock_generate: - # Mock function call in response - mock_function_call = Mock() - mock_function_call.name = "get_weather" - mock_function_call.args = {"location": "San Francisco"} - - mock_part = Mock() - mock_part.function_call = mock_function_call - - mock_content = Mock() - mock_content.parts = [mock_part] - - mock_candidate = Mock() - mock_candidate.content = mock_content - - mock_response = Mock() - mock_response.candidates = [mock_candidate] - mock_response.text = "Based on the weather data, it's a beautiful day in San Francisco with sunny skies and 75°F temperature." - mock_response.usage_metadata = Mock() - mock_response.usage_metadata.prompt_token_count = 100 - mock_response.usage_metadata.candidates_token_count = 50 - mock_response.usage_metadata.total_token_count = 150 - - mock_generate.return_value = mock_response - - # Test the call - messages = [{"role": "user", "content": "What's the weather like in San Francisco?"}] - result = completion.call( - messages=messages, - available_functions=available_functions - ) - - # Verify the tool was executed and returned the result - assert result == "The weather in San Francisco is sunny and 75°F" - - # Verify that the API was called - assert mock_generate.called - - def test_gemini_completion_module_is_imported(): """ Test that the completion module is properly imported when using Google provider @@ -1114,3 +1055,97 @@ def test_gemini_structured_output_preserves_json_with_stop_word_patterns(): assert "Action:" in result.action_taken assert "Observation:" in result.observation_result assert "Final Answer:" in result.final_answer + + +@pytest.mark.vcr() +def test_gemini_cached_prompt_tokens(): + """ + Test that Gemini correctly extracts and tracks cached_prompt_tokens + from cached_content_token_count in the usage metadata. + Sends two calls with the same large prompt to trigger caching. + """ + padding = "This is padding text to ensure the prompt is large enough for caching. " * 80 + system_msg = f"You are a helpful assistant. {padding}" + + llm = LLM(model="google/gemini-2.5-flash") + + # First call + llm.call([ + {"role": "system", "content": system_msg}, + {"role": "user", "content": "Say hello in one word."}, + ]) + + # Second call: same system prompt + llm.call([ + {"role": "system", "content": system_msg}, + {"role": "user", "content": "Say goodbye in one word."}, + ]) + + usage = llm.get_token_usage_summary() + assert usage.total_tokens > 0 + assert usage.prompt_tokens > 0 + assert usage.completion_tokens > 0 + assert usage.successful_requests == 2 + # cached_prompt_tokens should be populated (may be 0 if Gemini + # doesn't cache for this particular request, but the field should exist) + assert usage.cached_prompt_tokens >= 0 + + +@pytest.mark.vcr() +def test_gemini_cached_prompt_tokens_with_tools(): + """ + Test that Gemini correctly tracks cached_prompt_tokens when tools are used. + The large system prompt should be cached across tool-calling requests. + """ + padding = "This is padding text to ensure the prompt is large enough for caching. " * 80 + system_msg = f"You are a helpful assistant that uses tools. {padding}" + + def get_weather(location: str) -> str: + return f"The weather in {location} is sunny and 72°F" + + tools = [ + { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city name" + } + }, + "required": ["location"], + }, + } + ] + + llm = LLM(model="google/gemini-2.5-flash") + + # First call with tool + llm.call( + [ + {"role": "system", "content": system_msg}, + {"role": "user", "content": "What is the weather in Tokyo?"}, + ], + tools=tools, + available_functions={"get_weather": get_weather}, + ) + + # Second call with same system prompt + tools + llm.call( + [ + {"role": "system", "content": system_msg}, + {"role": "user", "content": "What is the weather in Paris?"}, + ], + tools=tools, + available_functions={"get_weather": get_weather}, + ) + + usage = llm.get_token_usage_summary() + assert usage.total_tokens > 0 + assert usage.prompt_tokens > 0 + assert usage.successful_requests == 2 + # cached_prompt_tokens should be populated (may be 0 if Gemini + # doesn't cache for this particular request, but the field should exist) + assert usage.cached_prompt_tokens >= 0 diff --git a/lib/crewai/tests/llms/openai/test_openai.py b/lib/crewai/tests/llms/openai/test_openai.py index a75a37681..069823a7a 100644 --- a/lib/crewai/tests/llms/openai/test_openai.py +++ b/lib/crewai/tests/llms/openai/test_openai.py @@ -1581,6 +1581,218 @@ def test_openai_structured_output_preserves_json_with_stop_word_patterns(): assert "Final Answer:" in result.final_answer + +@pytest.mark.vcr() +def test_openai_completions_cached_prompt_tokens(): + """ + Test that the Chat Completions API correctly extracts and tracks + cached_prompt_tokens from prompt_tokens_details.cached_tokens. + Sends the same large prompt twice so the second call hits the cache. + """ + # Build a large system prompt to trigger prompt caching (>1024 tokens) + padding = "This is padding text to ensure the prompt is large enough for caching. " * 80 + system_msg = f"You are a helpful assistant. {padding}" + + llm = OpenAICompletion(model="gpt-4.1") + + # First call: creates the cache + llm.call([ + {"role": "system", "content": system_msg}, + {"role": "user", "content": "Say hello in one word."}, + ]) + + # Second call: same system prompt should hit the cache + llm.call([ + {"role": "system", "content": system_msg}, + {"role": "user", "content": "Say goodbye in one word."}, + ]) + + usage = llm.get_token_usage_summary() + assert usage.total_tokens > 0 + assert usage.prompt_tokens > 0 + assert usage.completion_tokens > 0 + assert usage.successful_requests == 2 + # The second call should have cached prompt tokens + assert usage.cached_prompt_tokens > 0 + + +@pytest.mark.vcr() +def test_openai_responses_api_cached_prompt_tokens(): + """ + Test that the Responses API correctly extracts and tracks + cached_prompt_tokens from input_tokens_details.cached_tokens. + """ + padding = "This is padding text to ensure the prompt is large enough for caching. " * 80 + system_msg = f"You are a helpful assistant. {padding}" + + llm = OpenAICompletion(model="gpt-4.1", api="responses") + + # First call: creates the cache + llm.call([ + {"role": "system", "content": system_msg}, + {"role": "user", "content": "Say hello in one word."}, + ]) + + # Second call: same system prompt should hit the cache + llm.call([ + {"role": "system", "content": system_msg}, + {"role": "user", "content": "Say goodbye in one word."}, + ]) + + usage = llm.get_token_usage_summary() + assert usage.total_tokens > 0 + assert usage.prompt_tokens > 0 + assert usage.completion_tokens > 0 + assert usage.successful_requests == 2 + # The second call should have cached prompt tokens + assert usage.cached_prompt_tokens > 0 + + +@pytest.mark.vcr() +def test_openai_streaming_cached_prompt_tokens(): + """ + Test that streaming Chat Completions API correctly extracts and tracks + cached_prompt_tokens. + """ + padding = "This is padding text to ensure the prompt is large enough for caching. " * 80 + system_msg = f"You are a helpful assistant. {padding}" + + llm = OpenAICompletion(model="gpt-4.1", stream=True) + + # First call: creates the cache + llm.call([ + {"role": "system", "content": system_msg}, + {"role": "user", "content": "Say hello in one word."}, + ]) + + # Second call: same system prompt should hit the cache + llm.call([ + {"role": "system", "content": system_msg}, + {"role": "user", "content": "Say goodbye in one word."}, + ]) + + usage = llm.get_token_usage_summary() + assert usage.total_tokens > 0 + assert usage.successful_requests == 2 + # The second call should have cached prompt tokens + assert usage.cached_prompt_tokens > 0 + + +@pytest.mark.vcr() +def test_openai_completions_cached_prompt_tokens_with_tools(): + """ + Test that the Chat Completions API correctly tracks cached_prompt_tokens + when tools are used. The large system prompt should be cached across calls. + """ + padding = "This is padding text to ensure the prompt is large enough for caching. " * 80 + system_msg = f"You are a helpful assistant that uses tools. {padding}" + + def get_weather(location: str) -> str: + return f"The weather in {location} is sunny and 72°F" + + tools = [ + { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city name" + } + }, + "required": ["location"], + "additionalProperties": False, + }, + } + ] + + llm = OpenAICompletion(model="gpt-4.1") + + # First call with tool: creates the cache + llm.call( + [ + {"role": "system", "content": system_msg}, + {"role": "user", "content": "What is the weather in Tokyo?"}, + ], + tools=tools, + available_functions={"get_weather": get_weather}, + ) + + # Second call with same system prompt + tools: should hit the cache + llm.call( + [ + {"role": "system", "content": system_msg}, + {"role": "user", "content": "What is the weather in Paris?"}, + ], + tools=tools, + available_functions={"get_weather": get_weather}, + ) + + usage = llm.get_token_usage_summary() + assert usage.total_tokens > 0 + assert usage.prompt_tokens > 0 + assert usage.successful_requests == 2 + # The second call should have cached prompt tokens + assert usage.cached_prompt_tokens > 0 + + +@pytest.mark.vcr() +def test_openai_responses_api_cached_prompt_tokens_with_tools(): + """ + Test that the Responses API correctly tracks cached_prompt_tokens + when function tools are used. + """ + padding = "This is padding text to ensure the prompt is large enough for caching. " * 80 + system_msg = f"You are a helpful assistant that uses tools. {padding}" + + def get_weather(location: str) -> str: + return f"The weather in {location} is sunny and 72°F" + + tools = [ + { + "name": "get_weather", + "description": "Get the current weather for a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city name" + } + }, + "required": ["location"], + }, + } + ] + + llm = OpenAICompletion(model="gpt-4.1", api='response') + + # First call with tool + llm.call( + [ + {"role": "system", "content": system_msg}, + {"role": "user", "content": "What is the weather in Tokyo?"}, + ], + tools=tools, + available_functions={"get_weather": get_weather}, + ) + + # Second call: same system prompt + tools should hit cache + llm.call( + [ + {"role": "system", "content": system_msg}, + {"role": "user", "content": "What is the weather in Paris?"}, + ], + tools=tools, + available_functions={"get_weather": get_weather}, + ) + + usage = llm.get_token_usage_summary() + assert usage.total_tokens > 0 + assert usage.successful_requests == 2 + assert usage.cached_prompt_tokens > 0 def test_openai_streaming_returns_tool_calls_without_available_functions(): """Test that streaming returns tool calls list when available_functions is None.