From c08f218bf8929579b1eb69ef99246f66426ef5f7 Mon Sep 17 00:00:00 2001 From: Thiago Moretto Date: Wed, 7 Jan 2026 09:03:44 -0300 Subject: [PATCH] fix: account for thought tokens on gemini models --- lib/crewai/src/crewai/llms/base_llm.py | 4 ++++ lib/crewai/src/crewai/llms/providers/gemini/completion.py | 1 + lib/crewai/tests/llms/google/test_google.py | 3 +++ 3 files changed, 8 insertions(+) diff --git a/lib/crewai/src/crewai/llms/base_llm.py b/lib/crewai/src/crewai/llms/base_llm.py index c09c26453..8b9c9e241 100644 --- a/lib/crewai/src/crewai/llms/base_llm.py +++ b/lib/crewai/src/crewai/llms/base_llm.py @@ -569,6 +569,10 @@ class BaseLLM(ABC): or usage_data.get("input_tokens") or 0 ) + + ( + usage_data.get("thoughts_token_count") + or 0 + ) completion_tokens = ( usage_data.get("completion_tokens") diff --git a/lib/crewai/src/crewai/llms/providers/gemini/completion.py b/lib/crewai/src/crewai/llms/providers/gemini/completion.py index b268f07de..b6c893aa9 100644 --- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py +++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py @@ -982,6 +982,7 @@ class GeminiCompletion(BaseLLM): usage = response.usage_metadata return { "prompt_token_count": getattr(usage, "prompt_token_count", 0), + "thoughts_token_count": getattr(usage, "thoughts_token_count", 0), "candidates_token_count": getattr(usage, "candidates_token_count", 0), "total_token_count": getattr(usage, "total_token_count", 0), "total_tokens": getattr(usage, "total_token_count", 0), diff --git a/lib/crewai/tests/llms/google/test_google.py b/lib/crewai/tests/llms/google/test_google.py index 37f591de6..d24f07a93 100644 --- a/lib/crewai/tests/llms/google/test_google.py +++ b/lib/crewai/tests/llms/google/test_google.py @@ -79,6 +79,7 @@ def test_gemini_tool_use_conversation_flow(): mock_response.text = "Based on the weather data, it's a beautiful day in San Francisco with sunny skies and 75°F temperature." mock_response.usage_metadata = Mock() mock_response.usage_metadata.prompt_token_count = 100 + mock_response.usage_metadata.thoughts_token_count = 50 mock_response.usage_metadata.candidates_token_count = 50 mock_response.usage_metadata.total_token_count = 150 @@ -630,6 +631,7 @@ def test_gemini_token_usage_tracking(): mock_response.candidates = [] mock_response.usage_metadata = MagicMock( prompt_token_count=50, + thoughts_token_count=25, candidates_token_count=25, total_token_count=75 ) @@ -643,6 +645,7 @@ def test_gemini_token_usage_tracking(): # Verify token usage was extracted usage = llm._extract_token_usage(mock_response) assert usage["prompt_token_count"] == 50 + assert usage["thoughts_token_count"] == 25 assert usage["candidates_token_count"] == 25 assert usage["total_token_count"] == 75 assert usage["total_tokens"] == 75