diff --git a/lib/crewai/src/crewai/llms/base_llm.py b/lib/crewai/src/crewai/llms/base_llm.py index 41ce1d2cd..4f45572ee 100644 --- a/lib/crewai/src/crewai/llms/base_llm.py +++ b/lib/crewai/src/crewai/llms/base_llm.py @@ -172,6 +172,8 @@ class BaseLLM(BaseModel, ABC): "completion_tokens": 0, "successful_requests": 0, "cached_prompt_tokens": 0, + "reasoning_tokens": 0, + "cache_creation_tokens": 0, } ) @@ -808,14 +810,24 @@ class BaseLLM(BaseModel, ABC): cached_tokens = ( usage_data.get("cached_tokens") or usage_data.get("cached_prompt_tokens") + or usage_data.get("cache_read_input_tokens") or 0 ) + if not cached_tokens: + prompt_details = usage_data.get("prompt_tokens_details") + if isinstance(prompt_details, dict): + cached_tokens = prompt_details.get("cached_tokens", 0) or 0 + + reasoning_tokens = usage_data.get("reasoning_tokens", 0) or 0 + cache_creation_tokens = usage_data.get("cache_creation_tokens", 0) or 0 self._token_usage["prompt_tokens"] += prompt_tokens self._token_usage["completion_tokens"] += completion_tokens self._token_usage["total_tokens"] += prompt_tokens + completion_tokens self._token_usage["successful_requests"] += 1 self._token_usage["cached_prompt_tokens"] += cached_tokens + self._token_usage["reasoning_tokens"] += reasoning_tokens + self._token_usage["cache_creation_tokens"] += cache_creation_tokens def get_token_usage_summary(self) -> UsageMetrics: """Get summary of token usage for this LLM instance. diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index b6df34b94..ad572afa8 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -1704,18 +1704,23 @@ class AnthropicCompletion(BaseLLM): def _extract_anthropic_token_usage( response: Message | BetaMessage, ) -> dict[str, Any]: - """Extract token usage from Anthropic response.""" + """Extract token usage and response metadata from Anthropic response.""" if hasattr(response, "usage") and response.usage: usage = response.usage input_tokens = getattr(usage, "input_tokens", 0) output_tokens = getattr(usage, "output_tokens", 0) cache_read_tokens = getattr(usage, "cache_read_input_tokens", 0) or 0 - return { + cache_creation_tokens = ( + getattr(usage, "cache_creation_input_tokens", 0) or 0 + ) + result: dict[str, Any] = { "input_tokens": input_tokens, "output_tokens": output_tokens, "total_tokens": input_tokens + output_tokens, "cached_prompt_tokens": cache_read_tokens, + "cache_creation_tokens": cache_creation_tokens, } + return result return {"total_tokens": 0} def supports_multimodal(self) -> bool: diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index db7ab7e73..cac811bc7 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -1076,19 +1076,27 @@ class AzureCompletion(BaseLLM): @staticmethod def _extract_azure_token_usage(response: ChatCompletions) -> dict[str, Any]: - """Extract token usage from Azure response.""" + """Extract token usage and response metadata from Azure response.""" if hasattr(response, "usage") and response.usage: usage = response.usage cached_tokens = 0 prompt_details = getattr(usage, "prompt_tokens_details", None) if prompt_details: cached_tokens = getattr(prompt_details, "cached_tokens", 0) or 0 - return { + reasoning_tokens = 0 + completion_details = getattr(usage, "completion_tokens_details", None) + if completion_details: + reasoning_tokens = ( + getattr(completion_details, "reasoning_tokens", 0) or 0 + ) + result: dict[str, Any] = { "prompt_tokens": getattr(usage, "prompt_tokens", 0), "completion_tokens": getattr(usage, "completion_tokens", 0), "total_tokens": getattr(usage, "total_tokens", 0), "cached_prompt_tokens": cached_tokens, + "reasoning_tokens": reasoning_tokens, } + return result return {"total_tokens": 0} async def aclose(self) -> None: diff --git a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py index c25c9bfec..c3a27d6e2 100644 --- a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py +++ b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py @@ -2025,11 +2025,18 @@ class BedrockCompletion(BaseLLM): input_tokens = usage.get("inputTokens", 0) output_tokens = usage.get("outputTokens", 0) total_tokens = usage.get("totalTokens", input_tokens + output_tokens) + raw_cached = ( + usage.get("cacheReadInputTokenCount") + or usage.get("cacheReadInputTokens") + or 0 + ) + cached_tokens = raw_cached if isinstance(raw_cached, int) else 0 self._token_usage["prompt_tokens"] += input_tokens self._token_usage["completion_tokens"] += output_tokens self._token_usage["total_tokens"] += total_tokens self._token_usage["successful_requests"] += 1 + self._token_usage["cached_prompt_tokens"] += cached_tokens def supports_function_calling(self) -> bool: """Check if the model supports function calling.""" diff --git a/lib/crewai/src/crewai/llms/providers/gemini/completion.py b/lib/crewai/src/crewai/llms/providers/gemini/completion.py index c84f7f5fd..3959bc659 100644 --- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py +++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py @@ -1306,17 +1306,20 @@ class GeminiCompletion(BaseLLM): @staticmethod def _extract_token_usage(response: GenerateContentResponse) -> dict[str, Any]: - """Extract token usage from Gemini response.""" + """Extract token usage and response metadata from Gemini response.""" if response.usage_metadata: usage = response.usage_metadata cached_tokens = getattr(usage, "cached_content_token_count", 0) or 0 - return { + thinking_tokens = getattr(usage, "thoughts_token_count", 0) or 0 + result: dict[str, Any] = { "prompt_token_count": getattr(usage, "prompt_token_count", 0), "candidates_token_count": getattr(usage, "candidates_token_count", 0), "total_token_count": getattr(usage, "total_token_count", 0), "total_tokens": getattr(usage, "total_token_count", 0), "cached_prompt_tokens": cached_tokens, + "reasoning_tokens": thinking_tokens, } + return result return {"total_tokens": 0} @staticmethod diff --git a/lib/crewai/src/crewai/llms/providers/openai/completion.py b/lib/crewai/src/crewai/llms/providers/openai/completion.py index b76f552df..668794e83 100644 --- a/lib/crewai/src/crewai/llms/providers/openai/completion.py +++ b/lib/crewai/src/crewai/llms/providers/openai/completion.py @@ -1324,19 +1324,23 @@ class OpenAICompletion(BaseLLM): ] def _extract_responses_token_usage(self, response: Response) -> dict[str, Any]: - """Extract token usage from Responses API response.""" + """Extract token usage and response metadata from Responses API response.""" if response.usage: - result = { + result: dict[str, Any] = { "prompt_tokens": response.usage.input_tokens, "completion_tokens": response.usage.output_tokens, "total_tokens": response.usage.total_tokens, } - # Extract cached prompt tokens from input_tokens_details input_details = getattr(response.usage, "input_tokens_details", None) if input_details: result["cached_prompt_tokens"] = ( getattr(input_details, "cached_tokens", 0) or 0 ) + output_details = getattr(response.usage, "output_tokens_details", None) + if output_details: + result["reasoning_tokens"] = ( + getattr(output_details, "reasoning_tokens", 0) or 0 + ) return result return {"total_tokens": 0} @@ -2307,20 +2311,24 @@ class OpenAICompletion(BaseLLM): def _extract_openai_token_usage( self, response: ChatCompletion | ChatCompletionChunk ) -> dict[str, Any]: - """Extract token usage from OpenAI ChatCompletion or ChatCompletionChunk response.""" + """Extract token usage and response metadata from OpenAI ChatCompletion.""" if hasattr(response, "usage") and response.usage: usage = response.usage - result = { + result: dict[str, Any] = { "prompt_tokens": getattr(usage, "prompt_tokens", 0), "completion_tokens": getattr(usage, "completion_tokens", 0), "total_tokens": getattr(usage, "total_tokens", 0), } - # Extract cached prompt tokens from prompt_tokens_details prompt_details = getattr(usage, "prompt_tokens_details", None) if prompt_details: result["cached_prompt_tokens"] = ( getattr(prompt_details, "cached_tokens", 0) or 0 ) + completion_details = getattr(usage, "completion_tokens_details", None) + if completion_details: + result["reasoning_tokens"] = ( + getattr(completion_details, "reasoning_tokens", 0) or 0 + ) return result return {"total_tokens": 0} diff --git a/lib/crewai/src/crewai/types/usage_metrics.py b/lib/crewai/src/crewai/types/usage_metrics.py index 77e9ef598..76fa7dca0 100644 --- a/lib/crewai/src/crewai/types/usage_metrics.py +++ b/lib/crewai/src/crewai/types/usage_metrics.py @@ -29,6 +29,14 @@ class UsageMetrics(BaseModel): completion_tokens: int = Field( default=0, description="Number of tokens used in completions." ) + reasoning_tokens: int = Field( + default=0, + description="Number of reasoning/thinking tokens (e.g. OpenAI o-series, Gemini thinking).", + ) + cache_creation_tokens: int = Field( + default=0, + description="Number of cache creation tokens (e.g. Anthropic cache writes).", + ) successful_requests: int = Field( default=0, description="Number of successful requests made." ) @@ -43,4 +51,6 @@ class UsageMetrics(BaseModel): self.prompt_tokens += usage_metrics.prompt_tokens self.cached_prompt_tokens += usage_metrics.cached_prompt_tokens self.completion_tokens += usage_metrics.completion_tokens + self.reasoning_tokens += usage_metrics.reasoning_tokens + self.cache_creation_tokens += usage_metrics.cache_creation_tokens self.successful_requests += usage_metrics.successful_requests diff --git a/lib/crewai/tests/events/test_llm_usage_event.py b/lib/crewai/tests/events/test_llm_usage_event.py index f19f07b47..9be8c639f 100644 --- a/lib/crewai/tests/events/test_llm_usage_event.py +++ b/lib/crewai/tests/events/test_llm_usage_event.py @@ -174,3 +174,51 @@ class TestEmitCallCompletedEventPassesUsage: event = mock_emit.call_args[1]["event"] assert isinstance(event, LLMCallCompletedEvent) assert event.usage is None + +class TestUsageMetricsNewFields: + def test_add_usage_metrics_aggregates_reasoning_and_cache_creation(self): + from crewai.types.usage_metrics import UsageMetrics + + metrics1 = UsageMetrics( + total_tokens=100, + prompt_tokens=60, + completion_tokens=40, + cached_prompt_tokens=10, + reasoning_tokens=15, + cache_creation_tokens=5, + successful_requests=1, + ) + metrics2 = UsageMetrics( + total_tokens=200, + prompt_tokens=120, + completion_tokens=80, + cached_prompt_tokens=20, + reasoning_tokens=25, + cache_creation_tokens=10, + successful_requests=1, + ) + + metrics1.add_usage_metrics(metrics2) + + assert metrics1.total_tokens == 300 + assert metrics1.prompt_tokens == 180 + assert metrics1.completion_tokens == 120 + assert metrics1.cached_prompt_tokens == 30 + assert metrics1.reasoning_tokens == 40 + assert metrics1.cache_creation_tokens == 15 + assert metrics1.successful_requests == 2 + + def test_new_fields_default_to_zero(self): + from crewai.types.usage_metrics import UsageMetrics + + metrics = UsageMetrics() + assert metrics.reasoning_tokens == 0 + assert metrics.cache_creation_tokens == 0 + + def test_model_dump_includes_new_fields(self): + from crewai.types.usage_metrics import UsageMetrics + + metrics = UsageMetrics(reasoning_tokens=10, cache_creation_tokens=5) + dumped = metrics.model_dump() + assert dumped["reasoning_tokens"] == 10 + assert dumped["cache_creation_tokens"] == 5 diff --git a/lib/crewai/tests/llms/anthropic/test_anthropic.py b/lib/crewai/tests/llms/anthropic/test_anthropic.py index e8f16af5a..81a51c8d6 100644 --- a/lib/crewai/tests/llms/anthropic/test_anthropic.py +++ b/lib/crewai/tests/llms/anthropic/test_anthropic.py @@ -1463,3 +1463,45 @@ def test_tool_search_saves_input_tokens(): f"Expected tool_search ({usage_search.prompt_tokens}) to use fewer input tokens " f"than no search ({usage_no_search.prompt_tokens})" ) + + +def test_anthropic_cache_creation_tokens_extraction(): + """Test that cache_creation_input_tokens are extracted from Anthropic responses.""" + llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + + mock_response = MagicMock() + mock_response.content = [MagicMock(text="test response")] + mock_response.usage = MagicMock( + input_tokens=100, + output_tokens=50, + cache_read_input_tokens=30, + cache_creation_input_tokens=20, + ) + mock_response.stop_reason = None + mock_response.model = None + + usage = llm._extract_anthropic_token_usage(mock_response) + assert usage["input_tokens"] == 100 + assert usage["output_tokens"] == 50 + assert usage["total_tokens"] == 150 + assert usage["cached_prompt_tokens"] == 30 + assert usage["cache_creation_tokens"] == 20 + + +def test_anthropic_missing_cache_fields_default_to_zero(): + """Test that missing cache fields default to zero.""" + llm = LLM(model="anthropic/claude-3-5-sonnet-20241022") + + mock_response = MagicMock() + mock_response.content = [MagicMock(text="test response")] + mock_response.usage = MagicMock( + input_tokens=40, + output_tokens=20, + spec=["input_tokens", "output_tokens"], + ) + mock_response.usage.cache_read_input_tokens = None + mock_response.usage.cache_creation_input_tokens = None + + usage = llm._extract_anthropic_token_usage(mock_response) + assert usage["cached_prompt_tokens"] == 0 + assert usage["cache_creation_tokens"] == 0 diff --git a/lib/crewai/tests/llms/azure/test_azure.py b/lib/crewai/tests/llms/azure/test_azure.py index a0da30998..38b31a2e2 100644 --- a/lib/crewai/tests/llms/azure/test_azure.py +++ b/lib/crewai/tests/llms/azure/test_azure.py @@ -1403,3 +1403,44 @@ def test_azure_stop_words_still_applied_to_regular_responses(): assert "Observation:" not in result assert "Found results" not in result assert "I need to search for more information" in result + + +def test_azure_reasoning_tokens_and_cached_tokens(): + """Test that reasoning_tokens and cached_tokens are extracted from Azure responses.""" + llm = LLM(model="azure/gpt-4") + + mock_response = MagicMock() + mock_response.usage = MagicMock( + prompt_tokens=100, + completion_tokens=200, + total_tokens=300, + ) + mock_response.usage.prompt_tokens_details = MagicMock(cached_tokens=40) + mock_response.usage.completion_tokens_details = MagicMock(reasoning_tokens=60) + + usage = llm._extract_azure_token_usage(mock_response) + assert usage["prompt_tokens"] == 100 + assert usage["completion_tokens"] == 200 + assert usage["total_tokens"] == 300 + assert usage["cached_prompt_tokens"] == 40 + assert usage["reasoning_tokens"] == 60 + + +def test_azure_no_detail_fields(): + """Test Azure extraction without detail fields.""" + llm = LLM(model="azure/gpt-4") + + mock_response = MagicMock() + mock_response.usage = MagicMock( + prompt_tokens=50, + completion_tokens=30, + total_tokens=80, + ) + mock_response.usage.prompt_tokens_details = None + mock_response.usage.completion_tokens_details = None + + usage = llm._extract_azure_token_usage(mock_response) + assert usage["prompt_tokens"] == 50 + assert usage["completion_tokens"] == 30 + assert usage["cached_prompt_tokens"] == 0 + assert usage["reasoning_tokens"] == 0 diff --git a/lib/crewai/tests/llms/bedrock/test_bedrock.py b/lib/crewai/tests/llms/bedrock/test_bedrock.py index 76958bf86..959b1dbc2 100644 --- a/lib/crewai/tests/llms/bedrock/test_bedrock.py +++ b/lib/crewai/tests/llms/bedrock/test_bedrock.py @@ -1175,3 +1175,81 @@ def test_bedrock_tool_results_not_merged_across_assistant_messages(): ) assert tool_result_messages[0]["content"][0]["toolResult"]["toolUseId"] == "call_a" assert tool_result_messages[1]["content"][0]["toolResult"]["toolUseId"] == "call_b" + + +def test_bedrock_cached_token_tracking(): + """Test that cached tokens (cacheReadInputTokenCount) are tracked for Bedrock.""" + llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") + + with patch.object(llm._client, 'converse') as mock_converse: + mock_response = { + 'output': { + 'message': { + 'role': 'assistant', + 'content': [{'text': 'test response'}] + } + }, + 'usage': { + 'inputTokens': 100, + 'outputTokens': 50, + 'totalTokens': 150, + 'cacheReadInputTokenCount': 30, + } + } + mock_converse.return_value = mock_response + + result = llm.call("Hello") + assert result == "test response" + assert llm._token_usage['prompt_tokens'] == 100 + assert llm._token_usage['completion_tokens'] == 50 + assert llm._token_usage['total_tokens'] == 150 + assert llm._token_usage['cached_prompt_tokens'] == 30 + + +def test_bedrock_cached_token_alternate_key(): + """Test that the alternate key cacheReadInputTokens also works.""" + llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") + + with patch.object(llm._client, 'converse') as mock_converse: + mock_response = { + 'output': { + 'message': { + 'role': 'assistant', + 'content': [{'text': 'test response'}] + } + }, + 'usage': { + 'inputTokens': 80, + 'outputTokens': 40, + 'totalTokens': 120, + 'cacheReadInputTokens': 25, + } + } + mock_converse.return_value = mock_response + + llm.call("Hello") + assert llm._token_usage['cached_prompt_tokens'] == 25 + + +def test_bedrock_no_cache_tokens_defaults_to_zero(): + """Test that missing cache token keys default to zero.""" + llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0") + + with patch.object(llm._client, 'converse') as mock_converse: + mock_response = { + 'output': { + 'message': { + 'role': 'assistant', + 'content': [{'text': 'test response'}] + } + }, + 'usage': { + 'inputTokens': 60, + 'outputTokens': 30, + 'totalTokens': 90, + } + } + mock_converse.return_value = mock_response + + llm.call("Hello") + assert llm._token_usage['cached_prompt_tokens'] == 0 diff --git a/lib/crewai/tests/llms/google/test_google.py b/lib/crewai/tests/llms/google/test_google.py index d0553c7db..25b755dae 100644 --- a/lib/crewai/tests/llms/google/test_google.py +++ b/lib/crewai/tests/llms/google/test_google.py @@ -1190,3 +1190,42 @@ def test_gemini_cached_prompt_tokens_with_tools(): # cached_prompt_tokens should be populated (may be 0 if Gemini # doesn't cache for this particular request, but the field should exist) assert usage.cached_prompt_tokens >= 0 + + +def test_gemini_reasoning_tokens_extraction(): + """Test that thoughts_token_count is extracted as reasoning_tokens from Gemini.""" + llm = LLM(model="google/gemini-2.0-flash-001") + + mock_response = MagicMock() + mock_response.usage_metadata = MagicMock( + prompt_token_count=100, + candidates_token_count=50, + total_token_count=150, + cached_content_token_count=10, + thoughts_token_count=30, + ) + usage = llm._extract_token_usage(mock_response) + assert usage["prompt_token_count"] == 100 + assert usage["candidates_token_count"] == 50 + assert usage["total_tokens"] == 150 + assert usage["cached_prompt_tokens"] == 10 + assert usage["reasoning_tokens"] == 30 + + +def test_gemini_no_thinking_tokens_defaults_to_zero(): + """Test that missing thoughts_token_count defaults to zero.""" + llm = LLM(model="google/gemini-2.0-flash-001") + + mock_response = MagicMock() + mock_response.usage_metadata = MagicMock( + prompt_token_count=80, + candidates_token_count=40, + total_token_count=120, + cached_content_token_count=0, + thoughts_token_count=None, + ) + mock_response.candidates = [] + + usage = llm._extract_token_usage(mock_response) + assert usage["reasoning_tokens"] == 0 + assert usage["cached_prompt_tokens"] == 0 diff --git a/lib/crewai/tests/llms/openai/test_openai.py b/lib/crewai/tests/llms/openai/test_openai.py index 3dada2d85..5a2a6a299 100644 --- a/lib/crewai/tests/llms/openai/test_openai.py +++ b/lib/crewai/tests/llms/openai/test_openai.py @@ -1929,6 +1929,47 @@ def test_openai_streaming_returns_tool_calls_without_available_functions(): assert result[0]["type"] == "function" +def test_openai_responses_api_reasoning_tokens_extraction(): + """Test that reasoning_tokens are extracted from Responses API responses.""" + llm = LLM(model="openai/gpt-4o") + + mock_response = MagicMock() + mock_response.usage = MagicMock( + input_tokens=100, + output_tokens=200, + total_tokens=300, + ) + mock_response.usage.input_tokens_details = MagicMock(cached_tokens=25) + mock_response.usage.output_tokens_details = MagicMock(reasoning_tokens=80) + + usage = llm._extract_responses_token_usage(mock_response) + assert usage["prompt_tokens"] == 100 + assert usage["completion_tokens"] == 200 + assert usage["total_tokens"] == 300 + assert usage["cached_prompt_tokens"] == 25 + assert usage["reasoning_tokens"] == 80 + + +def test_openai_responses_api_no_detail_fields_omitted(): + """Test that reasoning/cached fields are omitted when Responses API details are absent.""" + llm = LLM(model="openai/gpt-4o") + + mock_response = MagicMock() + mock_response.usage = MagicMock( + input_tokens=50, + output_tokens=30, + total_tokens=80, + ) + mock_response.usage.input_tokens_details = None + mock_response.usage.output_tokens_details = None + + usage = llm._extract_responses_token_usage(mock_response) + assert usage["prompt_tokens"] == 50 + assert usage["completion_tokens"] == 30 + assert "cached_prompt_tokens" not in usage + assert "reasoning_tokens" not in usage + + @pytest.mark.asyncio async def test_openai_async_streaming_returns_tool_calls_without_available_functions(): """Test that async streaming returns tool calls list when available_functions is None. @@ -2018,3 +2059,44 @@ async def test_openai_async_streaming_returns_tool_calls_without_available_funct assert result[0]["function"]["arguments"] == '{"expression": "1+1"}' assert result[0]["id"] == "call_abc123" assert result[0]["type"] == "function" + + +def test_openai_reasoning_tokens_extraction(): + """Test that reasoning_tokens are extracted from OpenAI o-series responses.""" + llm = LLM(model="openai/gpt-4o") + + mock_response = MagicMock() + mock_response.usage = MagicMock( + prompt_tokens=100, + completion_tokens=200, + total_tokens=300, + ) + mock_response.usage.prompt_tokens_details = MagicMock(cached_tokens=25) + mock_response.usage.completion_tokens_details = MagicMock(reasoning_tokens=80) + + usage = llm._extract_openai_token_usage(mock_response) + assert usage["prompt_tokens"] == 100 + assert usage["completion_tokens"] == 200 + assert usage["total_tokens"] == 300 + assert usage["cached_prompt_tokens"] == 25 + assert usage["reasoning_tokens"] == 80 + + +def test_openai_no_detail_fields_omitted(): + """Test that reasoning/cached fields are omitted when details are absent.""" + llm = LLM(model="openai/gpt-4o") + + mock_response = MagicMock() + mock_response.usage = MagicMock( + prompt_tokens=50, + completion_tokens=30, + total_tokens=80, + ) + mock_response.usage.prompt_tokens_details = None + mock_response.usage.completion_tokens_details = None + + usage = llm._extract_openai_token_usage(mock_response) + assert usage["prompt_tokens"] == 50 + assert usage["completion_tokens"] == 30 + assert "cached_prompt_tokens" not in usage + assert "reasoning_tokens" not in usage diff --git a/lib/crewai/tests/test_llm.py b/lib/crewai/tests/test_llm.py index 413504f31..60ecca7f0 100644 --- a/lib/crewai/tests/test_llm.py +++ b/lib/crewai/tests/test_llm.py @@ -1001,6 +1001,8 @@ def test_usage_info_non_streaming_with_call(): "completion_tokens": 0, "successful_requests": 0, "cached_prompt_tokens": 0, + "reasoning_tokens": 0, + "cache_creation_tokens": 0, } assert llm.stream is False @@ -1025,6 +1027,8 @@ def test_usage_info_streaming_with_call(): "completion_tokens": 0, "successful_requests": 0, "cached_prompt_tokens": 0, + "reasoning_tokens": 0, + "cache_creation_tokens": 0, } assert llm.stream is True @@ -1056,6 +1060,8 @@ async def test_usage_info_non_streaming_with_acall(): "completion_tokens": 0, "successful_requests": 0, "cached_prompt_tokens": 0, + "reasoning_tokens": 0, + "cache_creation_tokens": 0, } with patch.object( @@ -1089,6 +1095,8 @@ async def test_usage_info_non_streaming_with_acall_and_stop(): "completion_tokens": 0, "successful_requests": 0, "cached_prompt_tokens": 0, + "reasoning_tokens": 0, + "cache_creation_tokens": 0, } with patch.object( @@ -1121,6 +1129,8 @@ async def test_usage_info_streaming_with_acall(): "completion_tokens": 0, "successful_requests": 0, "cached_prompt_tokens": 0, + "reasoning_tokens": 0, + "cache_creation_tokens": 0, } with patch.object(