mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-07-02 05:38:12 +00:00
feat: flatten LiteLLM cache/reasoning usage sub-counts in _usage_to_dict (#6033)
LiteLLM returns provider usage as-is, nesting cache-read / cache-creation / reasoning counts under provider-specific shapes (e.g. prompt_tokens_details.cached_tokens, Anthropic-style cache_read_input_tokens). Surface them as flat cached_prompt_tokens / reasoning_tokens / cache_creation_tokens keys so the span pipeline can read them; prompt / completion / total token counts are left untouched.
This commit is contained in:
@@ -1925,16 +1925,62 @@ class LLM(BaseLLM):
|
||||
|
||||
@staticmethod
|
||||
def _usage_to_dict(usage: Any) -> dict[str, Any] | None:
|
||||
"""Convert a provider usage object to a plain dict and flatten the
|
||||
cache/reasoning sub-counts that LiteLLM nests under provider-specific
|
||||
shapes into the top-level keys the rest of the pipeline expects.
|
||||
|
||||
LiteLLM hands back provider usage as-is, so cache-read, cache-creation
|
||||
and reasoning tokens may live in nested objects (e.g.
|
||||
``prompt_tokens_details.cached_tokens``) or under Anthropic-style keys
|
||||
(``cache_read_input_tokens``). Downstream span mapping only reads the
|
||||
flat ``cached_prompt_tokens`` / ``reasoning_tokens`` /
|
||||
``cache_creation_tokens`` keys, so we surface them here.
|
||||
|
||||
Only those derived buckets are populated; ``prompt_tokens`` /
|
||||
``completion_tokens`` / ``total_tokens`` are left untouched. Extraction
|
||||
precedence mirrors ``BaseLLM._track_token_usage_internal``.
|
||||
"""
|
||||
if usage is None:
|
||||
return None
|
||||
if isinstance(usage, dict):
|
||||
return usage
|
||||
if isinstance(usage, BaseModel):
|
||||
result: dict[str, Any] = usage.model_dump()
|
||||
return result
|
||||
if hasattr(usage, "__dict__"):
|
||||
return {k: v for k, v in vars(usage).items() if not k.startswith("_")}
|
||||
return None
|
||||
data: dict[str, Any] = dict(usage)
|
||||
elif isinstance(usage, BaseModel):
|
||||
data = usage.model_dump()
|
||||
elif hasattr(usage, "__dict__"):
|
||||
data = {k: v for k, v in vars(usage).items() if not k.startswith("_")}
|
||||
else:
|
||||
return None
|
||||
|
||||
def _nested(container: Any, key: str) -> Any:
|
||||
if isinstance(container, dict):
|
||||
return container.get(key)
|
||||
return getattr(container, key, None)
|
||||
|
||||
prompt_details = data.get("prompt_tokens_details")
|
||||
completion_details = data.get("completion_tokens_details")
|
||||
|
||||
cached_prompt_tokens = (
|
||||
data.get("cached_tokens")
|
||||
or data.get("cached_prompt_tokens")
|
||||
or data.get("cache_read_input_tokens")
|
||||
or _nested(prompt_details, "cached_tokens")
|
||||
)
|
||||
if cached_prompt_tokens is not None:
|
||||
data["cached_prompt_tokens"] = cached_prompt_tokens
|
||||
|
||||
reasoning_tokens = data.get("reasoning_tokens") or _nested(
|
||||
completion_details, "reasoning_tokens"
|
||||
)
|
||||
if reasoning_tokens is not None:
|
||||
data["reasoning_tokens"] = reasoning_tokens
|
||||
|
||||
cache_creation_tokens = data.get("cache_creation_tokens") or data.get(
|
||||
"cache_creation_input_tokens"
|
||||
)
|
||||
if cache_creation_tokens is not None:
|
||||
data["cache_creation_tokens"] = cache_creation_tokens
|
||||
|
||||
return data
|
||||
|
||||
def _handle_emit_call_events(
|
||||
self,
|
||||
|
||||
@@ -61,9 +61,84 @@ class TestUsageToDict:
|
||||
def test_none_returns_none(self):
|
||||
assert LLM._usage_to_dict(None) is None
|
||||
|
||||
def test_dict_passes_through(self):
|
||||
def test_dict_without_nested_shapes_is_returned_unchanged(self):
|
||||
usage = {"prompt_tokens": 10, "total_tokens": 30}
|
||||
assert LLM._usage_to_dict(usage) is usage
|
||||
result = LLM._usage_to_dict(usage)
|
||||
assert result == usage
|
||||
# The input dict is copied, not mutated, so derived keys are not added.
|
||||
assert "cached_prompt_tokens" not in result
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("usage", "expected"),
|
||||
[
|
||||
pytest.param(
|
||||
{"prompt_tokens": 100, "prompt_tokens_details": {"cached_tokens": 40}},
|
||||
{"cached_prompt_tokens": 40},
|
||||
id="openai-nested-cached-tokens",
|
||||
),
|
||||
pytest.param(
|
||||
{"prompt_tokens": 100, "cached_tokens": 30},
|
||||
{"cached_prompt_tokens": 30},
|
||||
id="flat-cached-tokens",
|
||||
),
|
||||
pytest.param(
|
||||
{"input_tokens": 100, "cache_read_input_tokens": 25},
|
||||
{"cached_prompt_tokens": 25},
|
||||
id="anthropic-cache-read-input-tokens",
|
||||
),
|
||||
pytest.param(
|
||||
{
|
||||
"completion_tokens": 200,
|
||||
"completion_tokens_details": {"reasoning_tokens": 60},
|
||||
},
|
||||
{"reasoning_tokens": 60},
|
||||
id="openai-nested-reasoning-tokens",
|
||||
),
|
||||
pytest.param(
|
||||
{"input_tokens": 100, "cache_creation_input_tokens": 70},
|
||||
{"cache_creation_tokens": 70},
|
||||
id="anthropic-cache-creation-input-tokens",
|
||||
),
|
||||
pytest.param(
|
||||
{
|
||||
"prompt_tokens": 100,
|
||||
"completion_tokens": 200,
|
||||
"prompt_tokens_details": {"cached_tokens": 40},
|
||||
"completion_tokens_details": {"reasoning_tokens": 60},
|
||||
"cache_creation_input_tokens": 10,
|
||||
},
|
||||
{
|
||||
"cached_prompt_tokens": 40,
|
||||
"reasoning_tokens": 60,
|
||||
"cache_creation_tokens": 10,
|
||||
},
|
||||
id="all-buckets-from-nested-shapes",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_normalizes_nested_litellm_buckets(self, usage, expected):
|
||||
result = LLM._usage_to_dict(usage)
|
||||
for key, value in expected.items():
|
||||
assert result[key] == value
|
||||
|
||||
def test_does_not_alter_core_token_counts(self):
|
||||
usage = {
|
||||
"prompt_tokens": 100,
|
||||
"completion_tokens": 200,
|
||||
"total_tokens": 300,
|
||||
"prompt_tokens_details": {"cached_tokens": 40},
|
||||
}
|
||||
result = LLM._usage_to_dict(usage)
|
||||
assert result["prompt_tokens"] == 100
|
||||
assert result["completion_tokens"] == 200
|
||||
assert result["total_tokens"] == 300
|
||||
|
||||
def test_absent_buckets_are_not_added(self):
|
||||
usage = {"prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300}
|
||||
result = LLM._usage_to_dict(usage)
|
||||
assert "cached_prompt_tokens" not in result
|
||||
assert "reasoning_tokens" not in result
|
||||
assert "cache_creation_tokens" not in result
|
||||
|
||||
def test_pydantic_model_uses_model_dump(self):
|
||||
class Usage(BaseModel):
|
||||
|
||||
Reference in New Issue
Block a user