mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-04-30 23:02:50 +00:00
feat: emit token usage data in LLMCallCompletedEvent
Add usage field to LLMCallCompletedEvent and wire it through BaseLLM._emit_call_completed_event and all provider implementations (OpenAI, Anthropic, Gemini, Bedrock, Azure, LiteLLM)
This commit is contained in:
@@ -57,6 +57,7 @@ class LLMCallCompletedEvent(LLMEventBase):
|
||||
messages: str | list[dict[str, Any]] | None = None
|
||||
response: Any
|
||||
call_type: LLMCallType
|
||||
usage: dict[str, Any] | None = None
|
||||
|
||||
|
||||
class LLMCallFailedEvent(LLMEventBase):
|
||||
|
||||
@@ -970,21 +970,25 @@ class LLM(BaseLLM):
|
||||
)
|
||||
result = instructor_instance.to_pydantic()
|
||||
structured_response = result.model_dump_json()
|
||||
usage_dict = self._usage_to_dict(usage_info)
|
||||
self._handle_emit_call_events(
|
||||
response=structured_response,
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage_dict,
|
||||
)
|
||||
return structured_response
|
||||
|
||||
usage_dict = self._usage_to_dict(usage_info)
|
||||
self._handle_emit_call_events(
|
||||
response=full_response,
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage_dict,
|
||||
)
|
||||
return full_response
|
||||
|
||||
@@ -994,12 +998,14 @@ class LLM(BaseLLM):
|
||||
return tool_result
|
||||
|
||||
# --- 10) Emit completion event and return response
|
||||
usage_dict = self._usage_to_dict(usage_info)
|
||||
self._handle_emit_call_events(
|
||||
response=full_response,
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage_dict,
|
||||
)
|
||||
return full_response
|
||||
|
||||
@@ -1021,6 +1027,7 @@ class LLM(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=self._usage_to_dict(usage_info),
|
||||
)
|
||||
return full_response
|
||||
|
||||
@@ -1172,6 +1179,7 @@ class LLM(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=None,
|
||||
)
|
||||
return structured_response
|
||||
|
||||
@@ -1202,6 +1210,8 @@ class LLM(BaseLLM):
|
||||
raise LLMContextLengthExceededError(error_msg) from e
|
||||
raise
|
||||
|
||||
response_usage = self._usage_to_dict(getattr(response, "usage", None))
|
||||
|
||||
# --- 2) Handle structured output response (when response_model is provided)
|
||||
if response_model is not None:
|
||||
# When using instructor/response_model, litellm returns a Pydantic model instance
|
||||
@@ -1213,6 +1223,7 @@ class LLM(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=response_usage,
|
||||
)
|
||||
return structured_response
|
||||
|
||||
@@ -1244,6 +1255,7 @@ class LLM(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=response_usage,
|
||||
)
|
||||
return text_response
|
||||
|
||||
@@ -1267,6 +1279,7 @@ class LLM(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=response_usage,
|
||||
)
|
||||
return text_response
|
||||
|
||||
@@ -1316,6 +1329,7 @@ class LLM(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=None,
|
||||
)
|
||||
return structured_response
|
||||
|
||||
@@ -1342,6 +1356,8 @@ class LLM(BaseLLM):
|
||||
raise LLMContextLengthExceededError(error_msg) from e
|
||||
raise
|
||||
|
||||
response_usage = self._usage_to_dict(getattr(response, "usage", None))
|
||||
|
||||
if response_model is not None:
|
||||
if isinstance(response, BaseModel):
|
||||
structured_response = response.model_dump_json()
|
||||
@@ -1351,6 +1367,7 @@ class LLM(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=response_usage,
|
||||
)
|
||||
return structured_response
|
||||
|
||||
@@ -1380,6 +1397,7 @@ class LLM(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=response_usage,
|
||||
)
|
||||
return text_response
|
||||
|
||||
@@ -1402,6 +1420,7 @@ class LLM(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=response_usage,
|
||||
)
|
||||
return text_response
|
||||
|
||||
@@ -1548,12 +1567,14 @@ class LLM(BaseLLM):
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
usage_dict = self._usage_to_dict(usage_info)
|
||||
self._handle_emit_call_events(
|
||||
response=full_response,
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("messages"),
|
||||
usage=usage_dict,
|
||||
)
|
||||
return full_response
|
||||
|
||||
@@ -1575,6 +1596,7 @@ class LLM(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("messages"),
|
||||
usage=self._usage_to_dict(usage_info),
|
||||
)
|
||||
return full_response
|
||||
raise
|
||||
@@ -1961,6 +1983,18 @@ class LLM(BaseLLM):
|
||||
)
|
||||
raise
|
||||
|
||||
@staticmethod
|
||||
def _usage_to_dict(usage: Any) -> dict[str, Any] | None:
|
||||
if usage is None:
|
||||
return None
|
||||
if isinstance(usage, dict):
|
||||
return usage
|
||||
if hasattr(usage, "model_dump"):
|
||||
return usage.model_dump()
|
||||
if hasattr(usage, "__dict__"):
|
||||
return {k: v for k, v in vars(usage).items() if not k.startswith("_")}
|
||||
return None
|
||||
|
||||
def _handle_emit_call_events(
|
||||
self,
|
||||
response: Any,
|
||||
@@ -1968,6 +2002,7 @@ class LLM(BaseLLM):
|
||||
from_task: Task | None = None,
|
||||
from_agent: Agent | None = None,
|
||||
messages: str | list[LLMMessage] | None = None,
|
||||
usage: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""Handle the events for the LLM call.
|
||||
|
||||
@@ -1977,6 +2012,7 @@ class LLM(BaseLLM):
|
||||
from_task: Optional task object
|
||||
from_agent: Optional agent object
|
||||
messages: Optional messages object
|
||||
usage: Optional token usage data
|
||||
"""
|
||||
crewai_event_bus.emit(
|
||||
self,
|
||||
@@ -1988,6 +2024,7 @@ class LLM(BaseLLM):
|
||||
from_agent=from_agent,
|
||||
model=self.model,
|
||||
call_id=get_current_call_id(),
|
||||
usage=usage,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -460,6 +460,7 @@ class BaseLLM(BaseModel, ABC):
|
||||
from_task: Task | None = None,
|
||||
from_agent: Agent | None = None,
|
||||
messages: str | list[LLMMessage] | None = None,
|
||||
usage: dict[str, Any] | None = None,
|
||||
) -> None:
|
||||
"""Emit LLM call completed event."""
|
||||
from crewai.utilities.serialization import to_serializable
|
||||
@@ -474,6 +475,7 @@ class BaseLLM(BaseModel, ABC):
|
||||
from_agent=from_agent,
|
||||
model=self.model,
|
||||
call_id=get_current_call_id(),
|
||||
usage=usage,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -811,6 +811,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return structured_data
|
||||
else:
|
||||
@@ -826,6 +827,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return structured_data
|
||||
|
||||
@@ -848,6 +850,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return list(tool_uses)
|
||||
|
||||
@@ -879,6 +882,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
if usage.get("total_tokens", 0) > 0:
|
||||
@@ -1028,6 +1032,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return structured_data
|
||||
for block in final_message.content:
|
||||
@@ -1042,6 +1047,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return structured_data
|
||||
|
||||
@@ -1071,6 +1077,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return self._invoke_after_llm_call_hooks(
|
||||
@@ -1241,6 +1248,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=follow_up_params["messages"],
|
||||
usage=follow_up_usage,
|
||||
)
|
||||
|
||||
# Log combined token usage
|
||||
@@ -1332,6 +1340,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return structured_data
|
||||
else:
|
||||
@@ -1347,6 +1356,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return structured_data
|
||||
|
||||
@@ -1367,6 +1377,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return list(tool_uses)
|
||||
|
||||
@@ -1390,6 +1401,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
if usage.get("total_tokens", 0) > 0:
|
||||
@@ -1527,6 +1539,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return structured_data
|
||||
for block in final_message.content:
|
||||
@@ -1541,6 +1554,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return structured_data
|
||||
|
||||
@@ -1569,6 +1583,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return full_response
|
||||
@@ -1627,6 +1642,7 @@ class AnthropicCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=follow_up_params["messages"],
|
||||
usage=follow_up_usage,
|
||||
)
|
||||
|
||||
total_usage = {
|
||||
|
||||
@@ -569,6 +569,7 @@ class AzureCompletion(BaseLLM):
|
||||
params: AzureCompletionParams,
|
||||
from_task: Any | None = None,
|
||||
from_agent: Any | None = None,
|
||||
usage: dict[str, Any] | None = None,
|
||||
) -> BaseModel:
|
||||
"""Validate content against response model and emit completion event.
|
||||
|
||||
@@ -594,6 +595,7 @@ class AzureCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return structured_data
|
||||
@@ -643,6 +645,7 @@ class AzureCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return list(message.tool_calls)
|
||||
|
||||
@@ -680,6 +683,7 @@ class AzureCompletion(BaseLLM):
|
||||
params=params,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
content = self._apply_stop_words(content)
|
||||
@@ -691,6 +695,7 @@ class AzureCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return self._invoke_after_llm_call_hooks(
|
||||
@@ -826,6 +831,7 @@ class AzureCompletion(BaseLLM):
|
||||
params=params,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
usage=usage_data,
|
||||
)
|
||||
|
||||
# If there are tool_calls but no available_functions, return them
|
||||
@@ -848,6 +854,7 @@ class AzureCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage_data,
|
||||
)
|
||||
return formatted_tool_calls
|
||||
|
||||
@@ -884,6 +891,7 @@ class AzureCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage_data,
|
||||
)
|
||||
|
||||
return self._invoke_after_llm_call_hooks(
|
||||
|
||||
@@ -664,8 +664,9 @@ class BedrockCompletion(BaseLLM):
|
||||
)
|
||||
|
||||
# Track token usage according to AWS response format
|
||||
if "usage" in response:
|
||||
self._track_token_usage_internal(response["usage"])
|
||||
usage = response.get("usage")
|
||||
if usage:
|
||||
self._track_token_usage_internal(usage)
|
||||
|
||||
stop_reason = response.get("stopReason")
|
||||
if stop_reason:
|
||||
@@ -705,6 +706,7 @@ class BedrockCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=messages,
|
||||
usage=usage,
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
@@ -727,6 +729,7 @@ class BedrockCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=messages,
|
||||
usage=usage,
|
||||
)
|
||||
return non_structured_output_tool_uses
|
||||
|
||||
@@ -806,6 +809,7 @@ class BedrockCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=messages,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return self._invoke_after_llm_call_hooks(
|
||||
@@ -936,6 +940,7 @@ class BedrockCompletion(BaseLLM):
|
||||
tool_use_id: str | None = None
|
||||
tool_use_index = 0
|
||||
accumulated_tool_input = ""
|
||||
usage_data: dict[str, Any] | None = None
|
||||
|
||||
try:
|
||||
response = self._client.converse_stream(
|
||||
@@ -1045,6 +1050,7 @@ class BedrockCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=messages,
|
||||
usage=usage_data,
|
||||
)
|
||||
return result # type: ignore[return-value]
|
||||
except Exception as e:
|
||||
@@ -1112,6 +1118,7 @@ class BedrockCompletion(BaseLLM):
|
||||
metadata = event["metadata"]
|
||||
if "usage" in metadata:
|
||||
usage_metrics = metadata["usage"]
|
||||
usage_data = usage_metrics
|
||||
self._track_token_usage_internal(usage_metrics)
|
||||
logging.debug(f"Token usage: {usage_metrics}")
|
||||
if "trace" in metadata:
|
||||
@@ -1141,6 +1148,7 @@ class BedrockCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=messages,
|
||||
usage=usage_data,
|
||||
)
|
||||
|
||||
return full_response
|
||||
@@ -1252,8 +1260,9 @@ class BedrockCompletion(BaseLLM):
|
||||
**body,
|
||||
)
|
||||
|
||||
if "usage" in response:
|
||||
self._track_token_usage_internal(response["usage"])
|
||||
usage = response.get("usage")
|
||||
if usage:
|
||||
self._track_token_usage_internal(usage)
|
||||
|
||||
stop_reason = response.get("stopReason")
|
||||
if stop_reason:
|
||||
@@ -1292,6 +1301,7 @@ class BedrockCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=messages,
|
||||
usage=usage,
|
||||
)
|
||||
return result
|
||||
except Exception as e:
|
||||
@@ -1314,6 +1324,7 @@ class BedrockCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=messages,
|
||||
usage=usage,
|
||||
)
|
||||
return non_structured_output_tool_uses
|
||||
|
||||
@@ -1388,6 +1399,7 @@ class BedrockCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=messages,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return text_content
|
||||
@@ -1508,6 +1520,7 @@ class BedrockCompletion(BaseLLM):
|
||||
tool_use_id: str | None = None
|
||||
tool_use_index = 0
|
||||
accumulated_tool_input = ""
|
||||
usage_data: dict[str, Any] | None = None
|
||||
|
||||
try:
|
||||
async_client = await self._ensure_async_client()
|
||||
@@ -1619,6 +1632,7 @@ class BedrockCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=messages,
|
||||
usage=usage_data,
|
||||
)
|
||||
return result # type: ignore[return-value]
|
||||
except Exception as e:
|
||||
@@ -1691,6 +1705,7 @@ class BedrockCompletion(BaseLLM):
|
||||
metadata = event["metadata"]
|
||||
if "usage" in metadata:
|
||||
usage_metrics = metadata["usage"]
|
||||
usage_data = usage_metrics
|
||||
self._track_token_usage_internal(usage_metrics)
|
||||
logging.debug(f"Token usage: {usage_metrics}")
|
||||
if "trace" in metadata:
|
||||
@@ -1720,6 +1735,7 @@ class BedrockCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=messages,
|
||||
usage=usage_data,
|
||||
)
|
||||
|
||||
return self._invoke_after_llm_call_hooks(
|
||||
|
||||
@@ -665,6 +665,7 @@ class GeminiCompletion(BaseLLM):
|
||||
messages_for_event: list[LLMMessage],
|
||||
from_task: Any | None = None,
|
||||
from_agent: Any | None = None,
|
||||
usage: dict[str, Any] | None = None,
|
||||
) -> BaseModel:
|
||||
"""Validate content against response model and emit completion event.
|
||||
|
||||
@@ -690,6 +691,7 @@ class GeminiCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=messages_for_event,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return structured_data
|
||||
@@ -705,6 +707,7 @@ class GeminiCompletion(BaseLLM):
|
||||
response_model: type[BaseModel] | None = None,
|
||||
from_task: Any | None = None,
|
||||
from_agent: Any | None = None,
|
||||
usage: dict[str, Any] | None = None,
|
||||
) -> str | BaseModel:
|
||||
"""Finalize completion response with validation and event emission.
|
||||
|
||||
@@ -728,6 +731,7 @@ class GeminiCompletion(BaseLLM):
|
||||
messages_for_event=messages_for_event,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
self._emit_call_completed_event(
|
||||
@@ -736,6 +740,7 @@ class GeminiCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=messages_for_event,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return self._invoke_after_llm_call_hooks(
|
||||
@@ -749,6 +754,7 @@ class GeminiCompletion(BaseLLM):
|
||||
contents: list[types.Content],
|
||||
from_task: Any | None = None,
|
||||
from_agent: Any | None = None,
|
||||
usage: dict[str, Any] | None = None,
|
||||
) -> BaseModel:
|
||||
"""Validate and emit event for structured_output tool call.
|
||||
|
||||
@@ -773,6 +779,7 @@ class GeminiCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=self._convert_contents_to_dict(contents),
|
||||
usage=usage,
|
||||
)
|
||||
return validated_data
|
||||
except Exception as e:
|
||||
@@ -791,6 +798,7 @@ class GeminiCompletion(BaseLLM):
|
||||
from_task: Any | None = None,
|
||||
from_agent: Any | None = None,
|
||||
response_model: type[BaseModel] | None = None,
|
||||
usage: dict[str, Any] | None = None,
|
||||
) -> str | Any:
|
||||
"""Process response, execute function calls, and finalize completion.
|
||||
|
||||
@@ -831,6 +839,7 @@ class GeminiCompletion(BaseLLM):
|
||||
contents=contents,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
# Filter out structured_output from function calls returned to executor
|
||||
@@ -852,6 +861,7 @@ class GeminiCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=self._convert_contents_to_dict(contents),
|
||||
usage=usage,
|
||||
)
|
||||
return non_structured_output_parts
|
||||
|
||||
@@ -893,6 +903,7 @@ class GeminiCompletion(BaseLLM):
|
||||
response_model=effective_response_model,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
def _process_stream_chunk(
|
||||
@@ -1013,6 +1024,7 @@ class GeminiCompletion(BaseLLM):
|
||||
contents=contents,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
usage=usage_data,
|
||||
)
|
||||
|
||||
non_structured_output_calls = {
|
||||
@@ -1041,6 +1053,7 @@ class GeminiCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=self._convert_contents_to_dict(contents),
|
||||
usage=usage_data,
|
||||
)
|
||||
return formatted_function_calls
|
||||
|
||||
@@ -1081,6 +1094,7 @@ class GeminiCompletion(BaseLLM):
|
||||
response_model=effective_response_model,
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
usage=usage_data,
|
||||
)
|
||||
|
||||
def _handle_completion(
|
||||
@@ -1118,6 +1132,7 @@ class GeminiCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
response_model=response_model,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
def _handle_streaming_completion(
|
||||
@@ -1196,6 +1211,7 @@ class GeminiCompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
response_model=response_model,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
async def _ahandle_streaming_completion(
|
||||
|
||||
@@ -809,6 +809,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("input", []),
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return parsed_result
|
||||
@@ -821,6 +822,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("input", []),
|
||||
usage=usage,
|
||||
)
|
||||
return function_calls
|
||||
|
||||
@@ -858,6 +860,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("input", []),
|
||||
usage=usage,
|
||||
)
|
||||
return structured_result
|
||||
except ValueError as e:
|
||||
@@ -871,6 +874,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("input", []),
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
content = self._invoke_after_llm_call_hooks(
|
||||
@@ -941,6 +945,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("input", []),
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return parsed_result
|
||||
@@ -953,6 +958,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("input", []),
|
||||
usage=usage,
|
||||
)
|
||||
return function_calls
|
||||
|
||||
@@ -990,6 +996,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("input", []),
|
||||
usage=usage,
|
||||
)
|
||||
return structured_result
|
||||
except ValueError as e:
|
||||
@@ -1003,6 +1010,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("input", []),
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
except NotFoundError as e:
|
||||
@@ -1045,6 +1053,7 @@ class OpenAICompletion(BaseLLM):
|
||||
full_response = ""
|
||||
function_calls: list[dict[str, Any]] = []
|
||||
final_response: Response | None = None
|
||||
usage: dict[str, Any] = {"total_tokens": 0}
|
||||
|
||||
stream = self._client.responses.create(**params)
|
||||
response_id_stream = None
|
||||
@@ -1102,6 +1111,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("input", []),
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return parsed_result
|
||||
@@ -1138,6 +1148,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("input", []),
|
||||
usage=usage,
|
||||
)
|
||||
return structured_result
|
||||
except ValueError as e:
|
||||
@@ -1151,6 +1162,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("input", []),
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return self._invoke_after_llm_call_hooks(
|
||||
@@ -1169,6 +1181,7 @@ class OpenAICompletion(BaseLLM):
|
||||
full_response = ""
|
||||
function_calls: list[dict[str, Any]] = []
|
||||
final_response: Response | None = None
|
||||
usage: dict[str, Any] = {"total_tokens": 0}
|
||||
|
||||
stream = await self._async_client.responses.create(**params)
|
||||
response_id_stream = None
|
||||
@@ -1226,6 +1239,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("input", []),
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return parsed_result
|
||||
@@ -1262,6 +1276,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("input", []),
|
||||
usage=usage,
|
||||
)
|
||||
return structured_result
|
||||
except ValueError as e:
|
||||
@@ -1275,6 +1290,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params.get("input", []),
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
return full_response
|
||||
@@ -1580,6 +1596,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return parsed_object
|
||||
|
||||
@@ -1601,6 +1618,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return list(message.tool_calls)
|
||||
|
||||
@@ -1639,6 +1657,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return structured_result
|
||||
except ValueError as e:
|
||||
@@ -1652,6 +1671,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
if usage.get("total_tokens", 0) > 0:
|
||||
@@ -1736,6 +1756,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage_data,
|
||||
)
|
||||
return tool_calls_list
|
||||
|
||||
@@ -1778,6 +1799,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage_data,
|
||||
)
|
||||
|
||||
return full_response
|
||||
@@ -1831,6 +1853,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return parsed_result
|
||||
|
||||
@@ -1955,6 +1978,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return parsed_object
|
||||
|
||||
@@ -1978,6 +2002,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return list(message.tool_calls)
|
||||
|
||||
@@ -2016,6 +2041,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
return structured_result
|
||||
except ValueError as e:
|
||||
@@ -2029,6 +2055,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
if usage.get("total_tokens", 0) > 0:
|
||||
@@ -2113,6 +2140,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage_data,
|
||||
)
|
||||
|
||||
return parsed_object
|
||||
@@ -2124,6 +2152,7 @@ class OpenAICompletion(BaseLLM):
|
||||
from_task=from_task,
|
||||
from_agent=from_agent,
|
||||
messages=params["messages"],
|
||||
usage=usage_data,
|
||||
)
|
||||
return accumulated_content
|
||||
|
||||
|
||||
@@ -0,0 +1,108 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"user","content":"Say hello"}],"model":"gpt-4o-mini"}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '74'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.2
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-DPS8YQSwQ3pZKZztIoIe1eYodMqh2\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1774958730,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"Hello! How can I assist you today?\",\n
|
||||
\ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\":
|
||||
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
|
||||
9,\n \"completion_tokens\": 9,\n \"total_tokens\": 18,\n \"prompt_tokens_details\":
|
||||
{\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_709f182cb4\"\n}\n"
|
||||
headers:
|
||||
CF-Cache-Status:
|
||||
- DYNAMIC
|
||||
CF-Ray:
|
||||
- 9e4f38fc5d9d82e8-GIG
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Tue, 31 Mar 2026 12:05:30 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
content-length:
|
||||
- '839'
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '680'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
176
lib/crewai/tests/events/test_llm_usage_event.py
Normal file
176
lib/crewai/tests/events/test_llm_usage_event.py
Normal file
@@ -0,0 +1,176 @@
|
||||
from typing import Any
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel
|
||||
|
||||
from crewai.events.event_bus import CrewAIEventsBus
|
||||
from crewai.events.types.llm_events import LLMCallCompletedEvent, LLMCallType
|
||||
from crewai.llm import LLM
|
||||
from crewai.llms.base_llm import BaseLLM
|
||||
|
||||
|
||||
class TestLLMCallCompletedEventUsageField:
|
||||
def test_accepts_usage_dict(self):
|
||||
event = LLMCallCompletedEvent(
|
||||
response="hello",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
call_id="test-id",
|
||||
usage={"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
|
||||
)
|
||||
assert event.usage == {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": 20,
|
||||
"total_tokens": 30,
|
||||
}
|
||||
|
||||
def test_usage_defaults_to_none(self):
|
||||
event = LLMCallCompletedEvent(
|
||||
response="hello",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
call_id="test-id",
|
||||
)
|
||||
assert event.usage is None
|
||||
|
||||
def test_accepts_none_usage(self):
|
||||
event = LLMCallCompletedEvent(
|
||||
response="hello",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
call_id="test-id",
|
||||
usage=None,
|
||||
)
|
||||
assert event.usage is None
|
||||
|
||||
def test_accepts_nested_usage_dict(self):
|
||||
usage = {
|
||||
"prompt_tokens": 100,
|
||||
"completion_tokens": 200,
|
||||
"total_tokens": 300,
|
||||
"prompt_tokens_details": {"cached_tokens": 50},
|
||||
}
|
||||
event = LLMCallCompletedEvent(
|
||||
response="hello",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
call_id="test-id",
|
||||
usage=usage,
|
||||
)
|
||||
assert event.usage["prompt_tokens_details"]["cached_tokens"] == 50
|
||||
|
||||
|
||||
class TestUsageToDict:
|
||||
def test_none_returns_none(self):
|
||||
assert LLM._usage_to_dict(None) is None
|
||||
|
||||
def test_dict_passes_through(self):
|
||||
usage = {"prompt_tokens": 10, "total_tokens": 30}
|
||||
assert LLM._usage_to_dict(usage) is usage
|
||||
|
||||
def test_pydantic_model_uses_model_dump(self):
|
||||
class Usage(BaseModel):
|
||||
prompt_tokens: int = 10
|
||||
completion_tokens: int = 20
|
||||
total_tokens: int = 30
|
||||
|
||||
result = LLM._usage_to_dict(Usage())
|
||||
assert result == {
|
||||
"prompt_tokens": 10,
|
||||
"completion_tokens": 20,
|
||||
"total_tokens": 30,
|
||||
}
|
||||
|
||||
def test_object_with_dict_attr(self):
|
||||
class UsageObj:
|
||||
def __init__(self):
|
||||
self.prompt_tokens = 5
|
||||
self.completion_tokens = 15
|
||||
self.total_tokens = 20
|
||||
|
||||
result = LLM._usage_to_dict(UsageObj())
|
||||
assert result == {
|
||||
"prompt_tokens": 5,
|
||||
"completion_tokens": 15,
|
||||
"total_tokens": 20,
|
||||
}
|
||||
|
||||
def test_object_with_dict_excludes_private_attrs(self):
|
||||
class UsageObj:
|
||||
def __init__(self):
|
||||
self.total_tokens = 42
|
||||
self._internal = "hidden"
|
||||
|
||||
result = LLM._usage_to_dict(UsageObj())
|
||||
assert result == {"total_tokens": 42}
|
||||
assert "_internal" not in result
|
||||
|
||||
def test_unsupported_type_returns_none(self):
|
||||
assert LLM._usage_to_dict(42) is None
|
||||
assert LLM._usage_to_dict("string") is None
|
||||
|
||||
|
||||
class _StubLLM(BaseLLM):
|
||||
"""Minimal concrete BaseLLM for testing event emission."""
|
||||
|
||||
model: str = "test-model"
|
||||
|
||||
def call(self, *args: Any, **kwargs: Any) -> str:
|
||||
return ""
|
||||
|
||||
async def acall(self, *args: Any, **kwargs: Any) -> str:
|
||||
return ""
|
||||
|
||||
def supports_function_calling(self) -> bool:
|
||||
return False
|
||||
|
||||
def supports_stop_words(self) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
class TestEmitCallCompletedEventPassesUsage:
|
||||
@pytest.fixture
|
||||
def mock_emit(self):
|
||||
with patch.object(CrewAIEventsBus, "emit") as mock:
|
||||
yield mock
|
||||
|
||||
@pytest.fixture
|
||||
def llm(self):
|
||||
return _StubLLM(model="test-model")
|
||||
|
||||
def test_usage_is_passed_to_event(self, mock_emit, llm):
|
||||
usage_data = {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30}
|
||||
|
||||
llm._emit_call_completed_event(
|
||||
response="hello",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
messages="test prompt",
|
||||
usage=usage_data,
|
||||
)
|
||||
|
||||
mock_emit.assert_called_once()
|
||||
event = mock_emit.call_args[1]["event"]
|
||||
assert isinstance(event, LLMCallCompletedEvent)
|
||||
assert event.usage == usage_data
|
||||
|
||||
def test_none_usage_is_passed_to_event(self, mock_emit, llm):
|
||||
llm._emit_call_completed_event(
|
||||
response="hello",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
messages="test prompt",
|
||||
usage=None,
|
||||
)
|
||||
|
||||
mock_emit.assert_called_once()
|
||||
event = mock_emit.call_args[1]["event"]
|
||||
assert isinstance(event, LLMCallCompletedEvent)
|
||||
assert event.usage is None
|
||||
|
||||
def test_usage_omitted_defaults_to_none(self, mock_emit, llm):
|
||||
llm._emit_call_completed_event(
|
||||
response="hello",
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
messages="test prompt",
|
||||
)
|
||||
|
||||
mock_emit.assert_called_once()
|
||||
event = mock_emit.call_args[1]["event"]
|
||||
assert isinstance(event, LLMCallCompletedEvent)
|
||||
assert event.usage is None
|
||||
@@ -752,11 +752,7 @@ def test_litellm_retry_catches_litellm_unsupported_params_error(caplog):
|
||||
raise litellm_error
|
||||
return MagicMock(
|
||||
choices=[MagicMock(message=MagicMock(content="Paris", tool_calls=None))],
|
||||
usage=MagicMock(
|
||||
prompt_tokens=10,
|
||||
completion_tokens=5,
|
||||
total_tokens=15,
|
||||
),
|
||||
usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
|
||||
)
|
||||
|
||||
with patch("litellm.completion", side_effect=mock_completion):
|
||||
@@ -787,11 +783,7 @@ def test_litellm_retry_catches_openai_api_stop_error(caplog):
|
||||
raise api_error
|
||||
return MagicMock(
|
||||
choices=[MagicMock(message=MagicMock(content="Paris", tool_calls=None))],
|
||||
usage=MagicMock(
|
||||
prompt_tokens=10,
|
||||
completion_tokens=5,
|
||||
total_tokens=15,
|
||||
),
|
||||
usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
|
||||
)
|
||||
|
||||
with patch("litellm.completion", side_effect=mock_completion):
|
||||
|
||||
@@ -879,6 +879,35 @@ def test_llm_emits_call_started_event():
|
||||
assert started_events[0].task_id is None
|
||||
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_llm_completed_event_includes_usage():
|
||||
completed_events: list[LLMCallCompletedEvent] = []
|
||||
condition = threading.Condition()
|
||||
|
||||
@crewai_event_bus.on(LLMCallCompletedEvent)
|
||||
def handle_llm_call_completed(source, event):
|
||||
with condition:
|
||||
completed_events.append(event)
|
||||
condition.notify()
|
||||
|
||||
llm = LLM(model="gpt-4o-mini")
|
||||
llm.call("Say hello")
|
||||
|
||||
with condition:
|
||||
success = condition.wait_for(
|
||||
lambda: len(completed_events) >= 1,
|
||||
timeout=10,
|
||||
)
|
||||
assert success, "Timeout waiting for LLMCallCompletedEvent"
|
||||
|
||||
event = completed_events[0]
|
||||
assert event.usage is not None
|
||||
assert isinstance(event.usage, dict)
|
||||
assert event.usage.get("prompt_tokens", 0) > 0
|
||||
assert event.usage.get("completion_tokens", 0) > 0
|
||||
assert event.usage.get("total_tokens", 0) > 0
|
||||
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_llm_emits_call_failed_event():
|
||||
received_events = []
|
||||
|
||||
Reference in New Issue
Block a user