feat: emit token usage data in LLMCallCompletedEvent

This commit is contained in:
Lucas Gomide
2026-03-31 13:18:36 -03:00
committed by GitHub
parent 3283a00e31
commit 68e943be68
12 changed files with 468 additions and 33 deletions

View File

@@ -0,0 +1,108 @@
interactions:
- request:
body: '{"messages":[{"role":"user","content":"Say hello"}],"model":"gpt-4o-mini"}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
authorization:
- AUTHORIZATION-XXX
connection:
- keep-alive
content-length:
- '74'
content-type:
- application/json
host:
- api.openai.com
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.83.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.2
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: "{\n \"id\": \"chatcmpl-DPS8YQSwQ3pZKZztIoIe1eYodMqh2\",\n \"object\":
\"chat.completion\",\n \"created\": 1774958730,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"Hello! How can I assist you today?\",\n
\ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\":
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
9,\n \"completion_tokens\": 9,\n \"total_tokens\": 18,\n \"prompt_tokens_details\":
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_709f182cb4\"\n}\n"
headers:
CF-Cache-Status:
- DYNAMIC
CF-Ray:
- 9e4f38fc5d9d82e8-GIG
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Tue, 31 Mar 2026 12:05:30 GMT
Server:
- cloudflare
Strict-Transport-Security:
- STS-XXX
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- X-CONTENT-TYPE-XXX
access-control-expose-headers:
- ACCESS-CONTROL-XXX
alt-svc:
- h3=":443"; ma=86400
content-length:
- '839'
openai-organization:
- OPENAI-ORG-XXX
openai-processing-ms:
- '680'
openai-project:
- OPENAI-PROJECT-XXX
openai-version:
- '2020-10-01'
set-cookie:
- SET-COOKIE-XXX
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- X-RATELIMIT-LIMIT-REQUESTS-XXX
x-ratelimit-limit-tokens:
- X-RATELIMIT-LIMIT-TOKENS-XXX
x-ratelimit-remaining-requests:
- X-RATELIMIT-REMAINING-REQUESTS-XXX
x-ratelimit-remaining-tokens:
- X-RATELIMIT-REMAINING-TOKENS-XXX
x-ratelimit-reset-requests:
- X-RATELIMIT-RESET-REQUESTS-XXX
x-ratelimit-reset-tokens:
- X-RATELIMIT-RESET-TOKENS-XXX
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,176 @@
from typing import Any
from unittest.mock import patch
import pytest
from pydantic import BaseModel
from crewai.events.event_bus import CrewAIEventsBus
from crewai.events.types.llm_events import LLMCallCompletedEvent, LLMCallType
from crewai.llm import LLM
from crewai.llms.base_llm import BaseLLM
class TestLLMCallCompletedEventUsageField:
def test_accepts_usage_dict(self):
event = LLMCallCompletedEvent(
response="hello",
call_type=LLMCallType.LLM_CALL,
call_id="test-id",
usage={"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
)
assert event.usage == {
"prompt_tokens": 10,
"completion_tokens": 20,
"total_tokens": 30,
}
def test_usage_defaults_to_none(self):
event = LLMCallCompletedEvent(
response="hello",
call_type=LLMCallType.LLM_CALL,
call_id="test-id",
)
assert event.usage is None
def test_accepts_none_usage(self):
event = LLMCallCompletedEvent(
response="hello",
call_type=LLMCallType.LLM_CALL,
call_id="test-id",
usage=None,
)
assert event.usage is None
def test_accepts_nested_usage_dict(self):
usage = {
"prompt_tokens": 100,
"completion_tokens": 200,
"total_tokens": 300,
"prompt_tokens_details": {"cached_tokens": 50},
}
event = LLMCallCompletedEvent(
response="hello",
call_type=LLMCallType.LLM_CALL,
call_id="test-id",
usage=usage,
)
assert event.usage["prompt_tokens_details"]["cached_tokens"] == 50
class TestUsageToDict:
def test_none_returns_none(self):
assert LLM._usage_to_dict(None) is None
def test_dict_passes_through(self):
usage = {"prompt_tokens": 10, "total_tokens": 30}
assert LLM._usage_to_dict(usage) is usage
def test_pydantic_model_uses_model_dump(self):
class Usage(BaseModel):
prompt_tokens: int = 10
completion_tokens: int = 20
total_tokens: int = 30
result = LLM._usage_to_dict(Usage())
assert result == {
"prompt_tokens": 10,
"completion_tokens": 20,
"total_tokens": 30,
}
def test_object_with_dict_attr(self):
class UsageObj:
def __init__(self):
self.prompt_tokens = 5
self.completion_tokens = 15
self.total_tokens = 20
result = LLM._usage_to_dict(UsageObj())
assert result == {
"prompt_tokens": 5,
"completion_tokens": 15,
"total_tokens": 20,
}
def test_object_with_dict_excludes_private_attrs(self):
class UsageObj:
def __init__(self):
self.total_tokens = 42
self._internal = "hidden"
result = LLM._usage_to_dict(UsageObj())
assert result == {"total_tokens": 42}
assert "_internal" not in result
def test_unsupported_type_returns_none(self):
assert LLM._usage_to_dict(42) is None
assert LLM._usage_to_dict("string") is None
class _StubLLM(BaseLLM):
"""Minimal concrete BaseLLM for testing event emission."""
model: str = "test-model"
def call(self, *args: Any, **kwargs: Any) -> str:
return ""
async def acall(self, *args: Any, **kwargs: Any) -> str:
return ""
def supports_function_calling(self) -> bool:
return False
def supports_stop_words(self) -> bool:
return True
class TestEmitCallCompletedEventPassesUsage:
@pytest.fixture
def mock_emit(self):
with patch.object(CrewAIEventsBus, "emit") as mock:
yield mock
@pytest.fixture
def llm(self):
return _StubLLM(model="test-model")
def test_usage_is_passed_to_event(self, mock_emit, llm):
usage_data = {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30}
llm._emit_call_completed_event(
response="hello",
call_type=LLMCallType.LLM_CALL,
messages="test prompt",
usage=usage_data,
)
mock_emit.assert_called_once()
event = mock_emit.call_args[1]["event"]
assert isinstance(event, LLMCallCompletedEvent)
assert event.usage == usage_data
def test_none_usage_is_passed_to_event(self, mock_emit, llm):
llm._emit_call_completed_event(
response="hello",
call_type=LLMCallType.LLM_CALL,
messages="test prompt",
usage=None,
)
mock_emit.assert_called_once()
event = mock_emit.call_args[1]["event"]
assert isinstance(event, LLMCallCompletedEvent)
assert event.usage is None
def test_usage_omitted_defaults_to_none(self, mock_emit, llm):
llm._emit_call_completed_event(
response="hello",
call_type=LLMCallType.LLM_CALL,
messages="test prompt",
)
mock_emit.assert_called_once()
event = mock_emit.call_args[1]["event"]
assert isinstance(event, LLMCallCompletedEvent)
assert event.usage is None

View File

@@ -752,11 +752,7 @@ def test_litellm_retry_catches_litellm_unsupported_params_error(caplog):
raise litellm_error
return MagicMock(
choices=[MagicMock(message=MagicMock(content="Paris", tool_calls=None))],
usage=MagicMock(
prompt_tokens=10,
completion_tokens=5,
total_tokens=15,
),
usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
)
with patch("litellm.completion", side_effect=mock_completion):
@@ -787,11 +783,7 @@ def test_litellm_retry_catches_openai_api_stop_error(caplog):
raise api_error
return MagicMock(
choices=[MagicMock(message=MagicMock(content="Paris", tool_calls=None))],
usage=MagicMock(
prompt_tokens=10,
completion_tokens=5,
total_tokens=15,
),
usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
)
with patch("litellm.completion", side_effect=mock_completion):

View File

@@ -879,6 +879,35 @@ def test_llm_emits_call_started_event():
assert started_events[0].task_id is None
@pytest.mark.vcr()
def test_llm_completed_event_includes_usage():
completed_events: list[LLMCallCompletedEvent] = []
condition = threading.Condition()
@crewai_event_bus.on(LLMCallCompletedEvent)
def handle_llm_call_completed(source, event):
with condition:
completed_events.append(event)
condition.notify()
llm = LLM(model="gpt-4o-mini")
llm.call("Say hello")
with condition:
success = condition.wait_for(
lambda: len(completed_events) >= 1,
timeout=10,
)
assert success, "Timeout waiting for LLMCallCompletedEvent"
event = completed_events[0]
assert event.usage is not None
assert isinstance(event.usage, dict)
assert event.usage.get("prompt_tokens", 0) > 0
assert event.usage.get("completion_tokens", 0) > 0
assert event.usage.get("total_tokens", 0) > 0
@pytest.mark.vcr()
def test_llm_emits_call_failed_event():
received_events = []