mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-07-01 21:28:10 +00:00
Two correctness fixes uncovered while landing the OTel finish_reason +
response_id plumbing:
- LiteLLM streaming (sync + async): `stream_options={"include_usage": True}`
causes LiteLLM to emit a final usage-only chunk with `choices=[]`. The
post-loop `_extract_finish_reason_and_response_id(last_chunk)` silently
returned `(None, None)` because the last chunk has no choices, even though
earlier chunks carried `finish_reason="stop"`. Track both fields
incrementally inside the loop (mirroring how OpenAI/Gemini/Azure already
handle their native streams) and use the tracked values for the
LLMCallCompletedEvent emission and the partial-response error path.
- Bedrock Converse: `ResponseMetadata.RequestId` is an AWS infra trace id,
not a model-level response id (semantically different from OpenAI's
`chatcmpl-XXX`). Return None for `response_id` rather than mislead
downstream telemetry consumers. The audit-fix's async propagation chain
still works — None propagates through unchanged.
Adds `test_llm_streaming_finish_reason.py` pinning both the sync and async
LiteLLM streaming paths against the include_usage chunk shape.
97 lines
2.9 KiB
Python
97 lines
2.9 KiB
Python
"""Regression: LiteLLM emits a final usage-only chunk (choices=[]) when
|
|
``stream_options.include_usage`` is set. The old post-loop
|
|
``_extract_finish_reason_and_response_id(last_chunk)`` then silently returned
|
|
(None, None). These tests pin that we capture finish_reason/response_id
|
|
incrementally during the stream loop instead.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from typing import Any
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
from crewai.events.event_bus import CrewAIEventsBus
|
|
from crewai.events.types.llm_events import LLMCallCompletedEvent
|
|
from crewai.llm import LLM
|
|
|
|
|
|
@pytest.fixture
|
|
def mock_emit():
|
|
with patch.object(CrewAIEventsBus, "emit") as mock:
|
|
yield mock
|
|
|
|
|
|
def _completed_event(mock_emit) -> LLMCallCompletedEvent:
|
|
matches = [
|
|
call.kwargs["event"]
|
|
for call in mock_emit.call_args_list
|
|
if isinstance(call.kwargs.get("event"), LLMCallCompletedEvent)
|
|
]
|
|
assert matches, "expected an LLMCallCompletedEvent to be emitted"
|
|
assert len(matches) == 1, f"expected one completed event, got {len(matches)}"
|
|
return matches[0]
|
|
|
|
|
|
def _chunks_with_usage_tail() -> list[dict[str, Any]]:
|
|
"""Three-chunk stream mirroring LiteLLM's include_usage behavior:
|
|
two content chunks where the second carries finish_reason="stop",
|
|
then a final usage-only chunk with choices=[]."""
|
|
return [
|
|
{
|
|
"id": "chatcmpl-stream-1",
|
|
"choices": [
|
|
{"delta": {"content": "hi"}, "finish_reason": None}
|
|
],
|
|
},
|
|
{
|
|
"id": "chatcmpl-stream-1",
|
|
"choices": [
|
|
{"delta": {"content": " there"}, "finish_reason": "stop"}
|
|
],
|
|
},
|
|
{
|
|
"id": "chatcmpl-stream-1",
|
|
"choices": [],
|
|
"usage": {
|
|
"prompt_tokens": 1,
|
|
"completion_tokens": 2,
|
|
"total_tokens": 3,
|
|
},
|
|
},
|
|
]
|
|
|
|
|
|
def test_sync_stream_emits_finish_reason_and_response_id_from_loop(mock_emit):
|
|
llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=True)
|
|
|
|
with patch("crewai.llm.litellm.completion", return_value=iter(_chunks_with_usage_tail())):
|
|
result = llm.call("anything")
|
|
|
|
assert result == "hi there"
|
|
|
|
event = _completed_event(mock_emit)
|
|
assert event.finish_reason == "stop"
|
|
assert event.response_id == "chatcmpl-stream-1"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_async_stream_emits_finish_reason_and_response_id_from_loop(mock_emit):
|
|
llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=True)
|
|
|
|
async def _aiter():
|
|
for chunk in _chunks_with_usage_tail():
|
|
yield chunk
|
|
|
|
async def _acompletion(*_args, **_kwargs):
|
|
return _aiter()
|
|
|
|
with patch("crewai.llm.litellm.acompletion", side_effect=_acompletion):
|
|
result = await llm.acall("anything")
|
|
|
|
assert result == "hi there"
|
|
|
|
event = _completed_event(mock_emit)
|
|
assert event.finish_reason == "stop"
|
|
assert event.response_id == "chatcmpl-stream-1"
|