feat: flatten LiteLLM cache/reasoning usage sub-counts in _usage_to_dict (#6033)

LiteLLM returns provider usage as-is, nesting cache-read / cache-creation / reasoning counts under provider-specific shapes (e.g. prompt_tokens_details.cached_tokens, Anthropic-style cache_read_input_tokens). Surface them as flat cached_prompt_tokens / reasoning_tokens / cache_creation_tokens keys so the span pipeline can read them; prompt / completion / total token counts are left untouched.
2026-07-02 05:38:12 +00:00 · 2026-06-03 16:13:30 -03:00
parent 1357491f0d
commit d09e3f4544
2 changed files with 130 additions and 9 deletions
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -1925,16 +1925,62 @@ class LLM(BaseLLM):

    @staticmethod
    def _usage_to_dict(usage: Any) -> dict[str, Any] | None:
+        """Convert a provider usage object to a plain dict and flatten the
+        cache/reasoning sub-counts that LiteLLM nests under provider-specific
+        shapes into the top-level keys the rest of the pipeline expects.
+
+        LiteLLM hands back provider usage as-is, so cache-read, cache-creation
+        and reasoning tokens may live in nested objects (e.g.
+        ``prompt_tokens_details.cached_tokens``) or under Anthropic-style keys
+        (``cache_read_input_tokens``). Downstream span mapping only reads the
+        flat ``cached_prompt_tokens`` / ``reasoning_tokens`` /
+        ``cache_creation_tokens`` keys, so we surface them here.
+
+        Only those derived buckets are populated; ``prompt_tokens`` /
+        ``completion_tokens`` / ``total_tokens`` are left untouched. Extraction
+        precedence mirrors ``BaseLLM._track_token_usage_internal``.
+        """
        if usage is None:
            return None
        if isinstance(usage, dict):
-            return usage
-        if isinstance(usage, BaseModel):
-            result: dict[str, Any] = usage.model_dump()
-            return result
-        if hasattr(usage, "__dict__"):
-            return {k: v for k, v in vars(usage).items() if not k.startswith("_")}
-        return None
+            data: dict[str, Any] = dict(usage)
+        elif isinstance(usage, BaseModel):
+            data = usage.model_dump()
+        elif hasattr(usage, "__dict__"):
+            data = {k: v for k, v in vars(usage).items() if not k.startswith("_")}
+        else:
+            return None
+
+        def _nested(container: Any, key: str) -> Any:
+            if isinstance(container, dict):
+                return container.get(key)
+            return getattr(container, key, None)
+
+        prompt_details = data.get("prompt_tokens_details")
+        completion_details = data.get("completion_tokens_details")
+
+        cached_prompt_tokens = (
+            data.get("cached_tokens")
+            or data.get("cached_prompt_tokens")
+            or data.get("cache_read_input_tokens")
+            or _nested(prompt_details, "cached_tokens")
+        )
+        if cached_prompt_tokens is not None:
+            data["cached_prompt_tokens"] = cached_prompt_tokens
+
+        reasoning_tokens = data.get("reasoning_tokens") or _nested(
+            completion_details, "reasoning_tokens"
+        )
+        if reasoning_tokens is not None:
+            data["reasoning_tokens"] = reasoning_tokens
+
+        cache_creation_tokens = data.get("cache_creation_tokens") or data.get(
+            "cache_creation_input_tokens"
+        )
+        if cache_creation_tokens is not None:
+            data["cache_creation_tokens"] = cache_creation_tokens
+
+        return data

    def _handle_emit_call_events(
        self,
--- a/lib/crewai/tests/events/test_llm_usage_event.py
+++ b/lib/crewai/tests/events/test_llm_usage_event.py
@@ -61,9 +61,84 @@ class TestUsageToDict:
    def test_none_returns_none(self):
        assert LLM._usage_to_dict(None) is None

-    def test_dict_passes_through(self):
+    def test_dict_without_nested_shapes_is_returned_unchanged(self):
        usage = {"prompt_tokens": 10, "total_tokens": 30}
-        assert LLM._usage_to_dict(usage) is usage
+        result = LLM._usage_to_dict(usage)
+        assert result == usage
+        # The input dict is copied, not mutated, so derived keys are not added.
+        assert "cached_prompt_tokens" not in result
+
+    @pytest.mark.parametrize(
+        ("usage", "expected"),
+        [
+            pytest.param(
+                {"prompt_tokens": 100, "prompt_tokens_details": {"cached_tokens": 40}},
+                {"cached_prompt_tokens": 40},
+                id="openai-nested-cached-tokens",
+            ),
+            pytest.param(
+                {"prompt_tokens": 100, "cached_tokens": 30},
+                {"cached_prompt_tokens": 30},
+                id="flat-cached-tokens",
+            ),
+            pytest.param(
+                {"input_tokens": 100, "cache_read_input_tokens": 25},
+                {"cached_prompt_tokens": 25},
+                id="anthropic-cache-read-input-tokens",
+            ),
+            pytest.param(
+                {
+                    "completion_tokens": 200,
+                    "completion_tokens_details": {"reasoning_tokens": 60},
+                },
+                {"reasoning_tokens": 60},
+                id="openai-nested-reasoning-tokens",
+            ),
+            pytest.param(
+                {"input_tokens": 100, "cache_creation_input_tokens": 70},
+                {"cache_creation_tokens": 70},
+                id="anthropic-cache-creation-input-tokens",
+            ),
+            pytest.param(
+                {
+                    "prompt_tokens": 100,
+                    "completion_tokens": 200,
+                    "prompt_tokens_details": {"cached_tokens": 40},
+                    "completion_tokens_details": {"reasoning_tokens": 60},
+                    "cache_creation_input_tokens": 10,
+                },
+                {
+                    "cached_prompt_tokens": 40,
+                    "reasoning_tokens": 60,
+                    "cache_creation_tokens": 10,
+                },
+                id="all-buckets-from-nested-shapes",
+            ),
+        ],
+    )
+    def test_normalizes_nested_litellm_buckets(self, usage, expected):
+        result = LLM._usage_to_dict(usage)
+        for key, value in expected.items():
+            assert result[key] == value
+
+    def test_does_not_alter_core_token_counts(self):
+        usage = {
+            "prompt_tokens": 100,
+            "completion_tokens": 200,
+            "total_tokens": 300,
+            "prompt_tokens_details": {"cached_tokens": 40},
+        }
+        result = LLM._usage_to_dict(usage)
+        assert result["prompt_tokens"] == 100
+        assert result["completion_tokens"] == 200
+        assert result["total_tokens"] == 300
+
+    def test_absent_buckets_are_not_added(self):
+        usage = {"prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300}
+        result = LLM._usage_to_dict(usage)
+        assert "cached_prompt_tokens" not in result
+        assert "reasoning_tokens" not in result
+        assert "cache_creation_tokens" not in result

    def test_pydantic_model_uses_model_dump(self):
        class Usage(BaseModel):