mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-02 15:52:34 +00:00
feat: emit token usage data in LLMCallCompletedEvent
This commit is contained in:
@@ -0,0 +1,108 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"user","content":"Say hello"}],"model":"gpt-4o-mini"}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '74'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.2
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-DPS8YQSwQ3pZKZztIoIe1eYodMqh2\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1774958730,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"Hello! How can I assist you today?\",\n
|
||||
\ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\":
|
||||
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
|
||||
9,\n \"completion_tokens\": 9,\n \"total_tokens\": 18,\n \"prompt_tokens_details\":
|
||||
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_709f182cb4\"\n}\n"
|
||||
headers:
|
||||
CF-Cache-Status:
|
||||
- DYNAMIC
|
||||
CF-Ray:
|
||||
- 9e4f38fc5d9d82e8-GIG
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Tue, 31 Mar 2026 12:05:30 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
content-length:
|
||||
- '839'
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '680'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
176
lib/crewai/tests/events/test_llm_usage_event.py
Normal file
176
lib/crewai/tests/events/test_llm_usage_event.py
Normal file
@@ -0,0 +1,176 @@
|
||||
from typing import Any
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel
|
||||
|
||||
from crewai.events.event_bus import CrewAIEventsBus
|
||||
from crewai.events.types.llm_events import LLMCallCompletedEvent, LLMCallType
|
||||
from crewai.llm import LLM
|
||||
from crewai.llms.base_llm import BaseLLM
|
||||
|
||||
|
||||
class TestLLMCallCompletedEventUsageField:
|
||||
def test_accepts_usage_dict(self):
|
||||
event = LLMCallCompletedEvent(
|
||||
response="hello",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
call_id="test-id",
|
||||
usage={"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
|
||||
)
|
||||
assert event.usage == {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": 20,
|
||||
"total_tokens": 30,
|
||||
}
|
||||
|
||||
def test_usage_defaults_to_none(self):
|
||||
event = LLMCallCompletedEvent(
|
||||
response="hello",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
call_id="test-id",
|
||||
)
|
||||
assert event.usage is None
|
||||
|
||||
def test_accepts_none_usage(self):
|
||||
event = LLMCallCompletedEvent(
|
||||
response="hello",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
call_id="test-id",
|
||||
usage=None,
|
||||
)
|
||||
assert event.usage is None
|
||||
|
||||
def test_accepts_nested_usage_dict(self):
|
||||
usage = {
|
||||
"prompt_tokens": 100,
|
||||
"completion_tokens": 200,
|
||||
"total_tokens": 300,
|
||||
"prompt_tokens_details": {"cached_tokens": 50},
|
||||
}
|
||||
event = LLMCallCompletedEvent(
|
||||
response="hello",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
call_id="test-id",
|
||||
usage=usage,
|
||||
)
|
||||
assert event.usage["prompt_tokens_details"]["cached_tokens"] == 50
|
||||
|
||||
|
||||
class TestUsageToDict:
|
||||
def test_none_returns_none(self):
|
||||
assert LLM._usage_to_dict(None) is None
|
||||
|
||||
def test_dict_passes_through(self):
|
||||
usage = {"prompt_tokens": 10, "total_tokens": 30}
|
||||
assert LLM._usage_to_dict(usage) is usage
|
||||
|
||||
def test_pydantic_model_uses_model_dump(self):
|
||||
class Usage(BaseModel):
|
||||
prompt_tokens: int = 10
|
||||
completion_tokens: int = 20
|
||||
total_tokens: int = 30
|
||||
|
||||
result = LLM._usage_to_dict(Usage())
|
||||
assert result == {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": 20,
|
||||
"total_tokens": 30,
|
||||
}
|
||||
|
||||
def test_object_with_dict_attr(self):
|
||||
class UsageObj:
|
||||
def __init__(self):
|
||||
self.prompt_tokens = 5
|
||||
self.completion_tokens = 15
|
||||
self.total_tokens = 20
|
||||
|
||||
result = LLM._usage_to_dict(UsageObj())
|
||||
assert result == {
|
||||
"prompt_tokens": 5,
|
||||
"completion_tokens": 15,
|
||||
"total_tokens": 20,
|
||||
}
|
||||
|
||||
def test_object_with_dict_excludes_private_attrs(self):
|
||||
class UsageObj:
|
||||
def __init__(self):
|
||||
self.total_tokens = 42
|
||||
self._internal = "hidden"
|
||||
|
||||
result = LLM._usage_to_dict(UsageObj())
|
||||
assert result == {"total_tokens": 42}
|
||||
assert "_internal" not in result
|
||||
|
||||
def test_unsupported_type_returns_none(self):
|
||||
assert LLM._usage_to_dict(42) is None
|
||||
assert LLM._usage_to_dict("string") is None
|
||||
|
||||
|
||||
class _StubLLM(BaseLLM):
|
||||
"""Minimal concrete BaseLLM for testing event emission."""
|
||||
|
||||
model: str = "test-model"
|
||||
|
||||
def call(self, *args: Any, **kwargs: Any) -> str:
|
||||
return ""
|
||||
|
||||
async def acall(self, *args: Any, **kwargs: Any) -> str:
|
||||
return ""
|
||||
|
||||
def supports_function_calling(self) -> bool:
|
||||
return False
|
||||
|
||||
def supports_stop_words(self) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
class TestEmitCallCompletedEventPassesUsage:
|
||||
@pytest.fixture
|
||||
def mock_emit(self):
|
||||
with patch.object(CrewAIEventsBus, "emit") as mock:
|
||||
yield mock
|
||||
|
||||
@pytest.fixture
|
||||
def llm(self):
|
||||
return _StubLLM(model="test-model")
|
||||
|
||||
def test_usage_is_passed_to_event(self, mock_emit, llm):
|
||||
usage_data = {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30}
|
||||
|
||||
llm._emit_call_completed_event(
|
||||
response="hello",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
messages="test prompt",
|
||||
usage=usage_data,
|
||||
)
|
||||
|
||||
mock_emit.assert_called_once()
|
||||
event = mock_emit.call_args[1]["event"]
|
||||
assert isinstance(event, LLMCallCompletedEvent)
|
||||
assert event.usage == usage_data
|
||||
|
||||
def test_none_usage_is_passed_to_event(self, mock_emit, llm):
|
||||
llm._emit_call_completed_event(
|
||||
response="hello",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
messages="test prompt",
|
||||
usage=None,
|
||||
)
|
||||
|
||||
mock_emit.assert_called_once()
|
||||
event = mock_emit.call_args[1]["event"]
|
||||
assert isinstance(event, LLMCallCompletedEvent)
|
||||
assert event.usage is None
|
||||
|
||||
def test_usage_omitted_defaults_to_none(self, mock_emit, llm):
|
||||
llm._emit_call_completed_event(
|
||||
response="hello",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
messages="test prompt",
|
||||
)
|
||||
|
||||
mock_emit.assert_called_once()
|
||||
event = mock_emit.call_args[1]["event"]
|
||||
assert isinstance(event, LLMCallCompletedEvent)
|
||||
assert event.usage is None
|
||||
@@ -752,11 +752,7 @@ def test_litellm_retry_catches_litellm_unsupported_params_error(caplog):
|
||||
raise litellm_error
|
||||
return MagicMock(
|
||||
choices=[MagicMock(message=MagicMock(content="Paris", tool_calls=None))],
|
||||
usage=MagicMock(
|
||||
prompt_tokens=10,
|
||||
completion_tokens=5,
|
||||
total_tokens=15,
|
||||
),
|
||||
usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
|
||||
)
|
||||
|
||||
with patch("litellm.completion", side_effect=mock_completion):
|
||||
@@ -787,11 +783,7 @@ def test_litellm_retry_catches_openai_api_stop_error(caplog):
|
||||
raise api_error
|
||||
return MagicMock(
|
||||
choices=[MagicMock(message=MagicMock(content="Paris", tool_calls=None))],
|
||||
usage=MagicMock(
|
||||
prompt_tokens=10,
|
||||
completion_tokens=5,
|
||||
total_tokens=15,
|
||||
),
|
||||
usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
|
||||
)
|
||||
|
||||
with patch("litellm.completion", side_effect=mock_completion):
|
||||
|
||||
@@ -879,6 +879,35 @@ def test_llm_emits_call_started_event():
|
||||
assert started_events[0].task_id is None
|
||||
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_llm_completed_event_includes_usage():
|
||||
completed_events: list[LLMCallCompletedEvent] = []
|
||||
condition = threading.Condition()
|
||||
|
||||
@crewai_event_bus.on(LLMCallCompletedEvent)
|
||||
def handle_llm_call_completed(source, event):
|
||||
with condition:
|
||||
completed_events.append(event)
|
||||
condition.notify()
|
||||
|
||||
llm = LLM(model="gpt-4o-mini")
|
||||
llm.call("Say hello")
|
||||
|
||||
with condition:
|
||||
success = condition.wait_for(
|
||||
lambda: len(completed_events) >= 1,
|
||||
timeout=10,
|
||||
)
|
||||
assert success, "Timeout waiting for LLMCallCompletedEvent"
|
||||
|
||||
event = completed_events[0]
|
||||
assert event.usage is not None
|
||||
assert isinstance(event.usage, dict)
|
||||
assert event.usage.get("prompt_tokens", 0) > 0
|
||||
assert event.usage.get("completion_tokens", 0) > 0
|
||||
assert event.usage.get("total_tokens", 0) > 0
|
||||
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_llm_emits_call_failed_event():
|
||||
received_events = []
|
||||
|
||||
Reference in New Issue
Block a user