feat: flatten LiteLLM cache/reasoning usage sub-counts in _usage_to_dict (#6033)

LiteLLM returns provider usage as-is, nesting cache-read / cache-creation /
reasoning counts under provider-specific shapes (e.g.
prompt_tokens_details.cached_tokens, Anthropic-style cache_read_input_tokens).
Surface them as flat cached_prompt_tokens / reasoning_tokens /
cache_creation_tokens keys so the span pipeline can read them; prompt /
completion / total token counts are left untouched.
This commit is contained in:
Lucas Gomide
2026-06-03 16:13:30 -03:00
committed by GitHub
parent 1357491f0d
commit d09e3f4544
2 changed files with 130 additions and 9 deletions

View File

@@ -1925,16 +1925,62 @@ class LLM(BaseLLM):
@staticmethod
def _usage_to_dict(usage: Any) -> dict[str, Any] | None:
"""Convert a provider usage object to a plain dict and flatten the
cache/reasoning sub-counts that LiteLLM nests under provider-specific
shapes into the top-level keys the rest of the pipeline expects.
LiteLLM hands back provider usage as-is, so cache-read, cache-creation
and reasoning tokens may live in nested objects (e.g.
``prompt_tokens_details.cached_tokens``) or under Anthropic-style keys
(``cache_read_input_tokens``). Downstream span mapping only reads the
flat ``cached_prompt_tokens`` / ``reasoning_tokens`` /
``cache_creation_tokens`` keys, so we surface them here.
Only those derived buckets are populated; ``prompt_tokens`` /
``completion_tokens`` / ``total_tokens`` are left untouched. Extraction
precedence mirrors ``BaseLLM._track_token_usage_internal``.
"""
if usage is None:
return None
if isinstance(usage, dict):
return usage
if isinstance(usage, BaseModel):
result: dict[str, Any] = usage.model_dump()
return result
if hasattr(usage, "__dict__"):
return {k: v for k, v in vars(usage).items() if not k.startswith("_")}
return None
data: dict[str, Any] = dict(usage)
elif isinstance(usage, BaseModel):
data = usage.model_dump()
elif hasattr(usage, "__dict__"):
data = {k: v for k, v in vars(usage).items() if not k.startswith("_")}
else:
return None
def _nested(container: Any, key: str) -> Any:
if isinstance(container, dict):
return container.get(key)
return getattr(container, key, None)
prompt_details = data.get("prompt_tokens_details")
completion_details = data.get("completion_tokens_details")
cached_prompt_tokens = (
data.get("cached_tokens")
or data.get("cached_prompt_tokens")
or data.get("cache_read_input_tokens")
or _nested(prompt_details, "cached_tokens")
)
if cached_prompt_tokens is not None:
data["cached_prompt_tokens"] = cached_prompt_tokens
reasoning_tokens = data.get("reasoning_tokens") or _nested(
completion_details, "reasoning_tokens"
)
if reasoning_tokens is not None:
data["reasoning_tokens"] = reasoning_tokens
cache_creation_tokens = data.get("cache_creation_tokens") or data.get(
"cache_creation_input_tokens"
)
if cache_creation_tokens is not None:
data["cache_creation_tokens"] = cache_creation_tokens
return data
def _handle_emit_call_events(
self,

View File

@@ -61,9 +61,84 @@ class TestUsageToDict:
def test_none_returns_none(self):
assert LLM._usage_to_dict(None) is None
def test_dict_passes_through(self):
def test_dict_without_nested_shapes_is_returned_unchanged(self):
usage = {"prompt_tokens": 10, "total_tokens": 30}
assert LLM._usage_to_dict(usage) is usage
result = LLM._usage_to_dict(usage)
assert result == usage
# The input dict is copied, not mutated, so derived keys are not added.
assert "cached_prompt_tokens" not in result
@pytest.mark.parametrize(
("usage", "expected"),
[
pytest.param(
{"prompt_tokens": 100, "prompt_tokens_details": {"cached_tokens": 40}},
{"cached_prompt_tokens": 40},
id="openai-nested-cached-tokens",
),
pytest.param(
{"prompt_tokens": 100, "cached_tokens": 30},
{"cached_prompt_tokens": 30},
id="flat-cached-tokens",
),
pytest.param(
{"input_tokens": 100, "cache_read_input_tokens": 25},
{"cached_prompt_tokens": 25},
id="anthropic-cache-read-input-tokens",
),
pytest.param(
{
"completion_tokens": 200,
"completion_tokens_details": {"reasoning_tokens": 60},
},
{"reasoning_tokens": 60},
id="openai-nested-reasoning-tokens",
),
pytest.param(
{"input_tokens": 100, "cache_creation_input_tokens": 70},
{"cache_creation_tokens": 70},
id="anthropic-cache-creation-input-tokens",
),
pytest.param(
{
"prompt_tokens": 100,
"completion_tokens": 200,
"prompt_tokens_details": {"cached_tokens": 40},
"completion_tokens_details": {"reasoning_tokens": 60},
"cache_creation_input_tokens": 10,
},
{
"cached_prompt_tokens": 40,
"reasoning_tokens": 60,
"cache_creation_tokens": 10,
},
id="all-buckets-from-nested-shapes",
),
],
)
def test_normalizes_nested_litellm_buckets(self, usage, expected):
result = LLM._usage_to_dict(usage)
for key, value in expected.items():
assert result[key] == value
def test_does_not_alter_core_token_counts(self):
usage = {
"prompt_tokens": 100,
"completion_tokens": 200,
"total_tokens": 300,
"prompt_tokens_details": {"cached_tokens": 40},
}
result = LLM._usage_to_dict(usage)
assert result["prompt_tokens"] == 100
assert result["completion_tokens"] == 200
assert result["total_tokens"] == 300
def test_absent_buckets_are_not_added(self):
usage = {"prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300}
result = LLM._usage_to_dict(usage)
assert "cached_prompt_tokens" not in result
assert "reasoning_tokens" not in result
assert "cache_creation_tokens" not in result
def test_pydantic_model_uses_model_dump(self):
class Usage(BaseModel):