crewAI/lib/crewai/tests/test_llm_streaming_finish_reason.py

"""Regression: LiteLLM emits a final usage-only chunk (choices=[]) when
``stream_options.include_usage`` is set. The old post-loop
``_extract_finish_reason_and_response_id(last_chunk)`` then silently returned
(None, None). These tests pin that we capture finish_reason/response_id
incrementally during the stream loop instead.
"""
from __future__ import annotations

from typing import Any
from unittest.mock import patch

import pytest

from crewai.events.event_bus import CrewAIEventsBus
from crewai.events.types.llm_events import LLMCallCompletedEvent
from crewai.llm import LLM


@pytest.fixture
def mock_emit():
    with patch.object(CrewAIEventsBus, "emit") as mock:
        yield mock


def _completed_event(mock_emit) -> LLMCallCompletedEvent:
    matches = [
        call.kwargs["event"]
        for call in mock_emit.call_args_list
        if isinstance(call.kwargs.get("event"), LLMCallCompletedEvent)
    ]
    assert matches, "expected an LLMCallCompletedEvent to be emitted"
    assert len(matches) == 1, f"expected one completed event, got {len(matches)}"
    return matches[0]


def _chunks_with_usage_tail() -> list[dict[str, Any]]:
    """Three-chunk stream mirroring LiteLLM's include_usage behavior:
    two content chunks where the second carries finish_reason="stop",
    then a final usage-only chunk with choices=[]."""
    return [
        {
            "id": "chatcmpl-stream-1",
            "choices": [
                {"delta": {"content": "hi"}, "finish_reason": None}
            ],
        },
        {
            "id": "chatcmpl-stream-1",
            "choices": [
                {"delta": {"content": " there"}, "finish_reason": "stop"}
            ],
        },
        {
            "id": "chatcmpl-stream-1",
            "choices": [],
            "usage": {
                "prompt_tokens": 1,
                "completion_tokens": 2,
                "total_tokens": 3,
            },
        },
    ]


def test_sync_stream_emits_finish_reason_and_response_id_from_loop(mock_emit):
    llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=True)

    with patch("crewai.llm.litellm.completion", return_value=iter(_chunks_with_usage_tail())):
        result = llm.call("anything")

    assert result == "hi there"

    event = _completed_event(mock_emit)
    assert event.finish_reason == "stop"
    assert event.response_id == "chatcmpl-stream-1"


@pytest.mark.asyncio
async def test_async_stream_emits_finish_reason_and_response_id_from_loop(mock_emit):
    llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=True)

    async def _aiter():
        for chunk in _chunks_with_usage_tail():
            yield chunk

    async def _acompletion(*_args, **_kwargs):
        return _aiter()

    with patch("crewai.llm.litellm.acompletion", side_effect=_acompletion):
        result = await llm.acall("anything")

    assert result == "hi there"

    event = _completed_event(mock_emit)
    assert event.finish_reason == "stop"
    assert event.response_id == "chatcmpl-stream-1"