diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index c61639e04..08c1a1bf8 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -1925,16 +1925,62 @@ class LLM(BaseLLM): @staticmethod def _usage_to_dict(usage: Any) -> dict[str, Any] | None: + """Convert a provider usage object to a plain dict and flatten the + cache/reasoning sub-counts that LiteLLM nests under provider-specific + shapes into the top-level keys the rest of the pipeline expects. + + LiteLLM hands back provider usage as-is, so cache-read, cache-creation + and reasoning tokens may live in nested objects (e.g. + ``prompt_tokens_details.cached_tokens``) or under Anthropic-style keys + (``cache_read_input_tokens``). Downstream span mapping only reads the + flat ``cached_prompt_tokens`` / ``reasoning_tokens`` / + ``cache_creation_tokens`` keys, so we surface them here. + + Only those derived buckets are populated; ``prompt_tokens`` / + ``completion_tokens`` / ``total_tokens`` are left untouched. Extraction + precedence mirrors ``BaseLLM._track_token_usage_internal``. + """ if usage is None: return None if isinstance(usage, dict): - return usage - if isinstance(usage, BaseModel): - result: dict[str, Any] = usage.model_dump() - return result - if hasattr(usage, "__dict__"): - return {k: v for k, v in vars(usage).items() if not k.startswith("_")} - return None + data: dict[str, Any] = dict(usage) + elif isinstance(usage, BaseModel): + data = usage.model_dump() + elif hasattr(usage, "__dict__"): + data = {k: v for k, v in vars(usage).items() if not k.startswith("_")} + else: + return None + + def _nested(container: Any, key: str) -> Any: + if isinstance(container, dict): + return container.get(key) + return getattr(container, key, None) + + prompt_details = data.get("prompt_tokens_details") + completion_details = data.get("completion_tokens_details") + + cached_prompt_tokens = ( + data.get("cached_tokens") + or data.get("cached_prompt_tokens") + or data.get("cache_read_input_tokens") + or _nested(prompt_details, "cached_tokens") + ) + if cached_prompt_tokens is not None: + data["cached_prompt_tokens"] = cached_prompt_tokens + + reasoning_tokens = data.get("reasoning_tokens") or _nested( + completion_details, "reasoning_tokens" + ) + if reasoning_tokens is not None: + data["reasoning_tokens"] = reasoning_tokens + + cache_creation_tokens = data.get("cache_creation_tokens") or data.get( + "cache_creation_input_tokens" + ) + if cache_creation_tokens is not None: + data["cache_creation_tokens"] = cache_creation_tokens + + return data def _handle_emit_call_events( self, diff --git a/lib/crewai/tests/events/test_llm_usage_event.py b/lib/crewai/tests/events/test_llm_usage_event.py index 9be8c639f..d0b29c863 100644 --- a/lib/crewai/tests/events/test_llm_usage_event.py +++ b/lib/crewai/tests/events/test_llm_usage_event.py @@ -61,9 +61,84 @@ class TestUsageToDict: def test_none_returns_none(self): assert LLM._usage_to_dict(None) is None - def test_dict_passes_through(self): + def test_dict_without_nested_shapes_is_returned_unchanged(self): usage = {"prompt_tokens": 10, "total_tokens": 30} - assert LLM._usage_to_dict(usage) is usage + result = LLM._usage_to_dict(usage) + assert result == usage + # The input dict is copied, not mutated, so derived keys are not added. + assert "cached_prompt_tokens" not in result + + @pytest.mark.parametrize( + ("usage", "expected"), + [ + pytest.param( + {"prompt_tokens": 100, "prompt_tokens_details": {"cached_tokens": 40}}, + {"cached_prompt_tokens": 40}, + id="openai-nested-cached-tokens", + ), + pytest.param( + {"prompt_tokens": 100, "cached_tokens": 30}, + {"cached_prompt_tokens": 30}, + id="flat-cached-tokens", + ), + pytest.param( + {"input_tokens": 100, "cache_read_input_tokens": 25}, + {"cached_prompt_tokens": 25}, + id="anthropic-cache-read-input-tokens", + ), + pytest.param( + { + "completion_tokens": 200, + "completion_tokens_details": {"reasoning_tokens": 60}, + }, + {"reasoning_tokens": 60}, + id="openai-nested-reasoning-tokens", + ), + pytest.param( + {"input_tokens": 100, "cache_creation_input_tokens": 70}, + {"cache_creation_tokens": 70}, + id="anthropic-cache-creation-input-tokens", + ), + pytest.param( + { + "prompt_tokens": 100, + "completion_tokens": 200, + "prompt_tokens_details": {"cached_tokens": 40}, + "completion_tokens_details": {"reasoning_tokens": 60}, + "cache_creation_input_tokens": 10, + }, + { + "cached_prompt_tokens": 40, + "reasoning_tokens": 60, + "cache_creation_tokens": 10, + }, + id="all-buckets-from-nested-shapes", + ), + ], + ) + def test_normalizes_nested_litellm_buckets(self, usage, expected): + result = LLM._usage_to_dict(usage) + for key, value in expected.items(): + assert result[key] == value + + def test_does_not_alter_core_token_counts(self): + usage = { + "prompt_tokens": 100, + "completion_tokens": 200, + "total_tokens": 300, + "prompt_tokens_details": {"cached_tokens": 40}, + } + result = LLM._usage_to_dict(usage) + assert result["prompt_tokens"] == 100 + assert result["completion_tokens"] == 200 + assert result["total_tokens"] == 300 + + def test_absent_buckets_are_not_added(self): + usage = {"prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300} + result = LLM._usage_to_dict(usage) + assert "cached_prompt_tokens" not in result + assert "reasoning_tokens" not in result + assert "cache_creation_tokens" not in result def test_pydantic_model_uses_model_dump(self): class Usage(BaseModel):