diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index add04cb1e..105bc8988 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -745,6 +745,8 @@ class LLM(BaseLLM): if isinstance(chunk, ModelResponseBase): response_id = chunk.id + elif isinstance(chunk, dict): + response_id = chunk.get("id") chunk_finish, chunk_id = self._extract_finish_reason_and_response_id( chunk @@ -1461,7 +1463,12 @@ class LLM(BaseLLM): async for chunk in await litellm.acompletion(**params): chunk_count += 1 chunk_content = None - response_id = chunk.id if isinstance(chunk, ModelResponseBase) else None + if isinstance(chunk, ModelResponseBase): + response_id = chunk.id + elif isinstance(chunk, dict): + response_id = chunk.get("id") + else: + response_id = None chunk_finish, chunk_id = self._extract_finish_reason_and_response_id( chunk diff --git a/lib/crewai/src/crewai/llms/base_llm.py b/lib/crewai/src/crewai/llms/base_llm.py index 066f57c38..aaaca1cd0 100644 --- a/lib/crewai/src/crewai/llms/base_llm.py +++ b/lib/crewai/src/crewai/llms/base_llm.py @@ -150,6 +150,13 @@ class BaseLLM(BaseModel, ABC): llm_type: str = "base" model: str temperature: float | None = None + top_p: float | None = None + max_tokens: int | None = None + stream: bool | None = None + seed: int | None = None + frequency_penalty: float | None = None + presence_penalty: float | None = None + n: int | None = None api_key: str | None = None base_url: str | None = None provider: str = Field(default="openai") @@ -484,34 +491,27 @@ class BaseLLM(BaseModel, ABC): ) -> None: """Emit LLM call started event. - Sampling params default to introspecting ``self`` (``self.temperature``, - ``self.top_p``, ``self.stop`` -> ``stop_sequences``, ...) so providers - don't need to thread them through every emission site. Explicit - kwargs override the introspection. """ from crewai.utilities.serialization import to_serializable if temperature is None: - temperature = getattr(self, "temperature", None) + temperature = self.temperature if top_p is None: - top_p = getattr(self, "top_p", None) + top_p = self.top_p if max_tokens is None: - max_tokens = getattr(self, "max_tokens", None) + max_tokens = self.max_tokens if stream is None: - stream = getattr(self, "stream", None) + stream = self.stream if seed is None: - seed = getattr(self, "seed", None) + seed = self.seed if stop_sequences is None: - stop_attr = getattr(self, "stop", None) or getattr( - self, "stop_sequences", None - ) - stop_sequences = stop_attr or None + stop_sequences = self.stop_sequences or None if frequency_penalty is None: - frequency_penalty = getattr(self, "frequency_penalty", None) + frequency_penalty = self.frequency_penalty if presence_penalty is None: - presence_penalty = getattr(self, "presence_penalty", None) + presence_penalty = self.presence_penalty if n is None: - n = getattr(self, "n", None) + n = self.n crewai_event_bus.emit( self, diff --git a/lib/crewai/tests/events/test_llm_finish_reason_response_id.py b/lib/crewai/tests/events/test_llm_finish_reason_response_id.py index 436b5ed24..d0a778fcf 100644 --- a/lib/crewai/tests/events/test_llm_finish_reason_response_id.py +++ b/lib/crewai/tests/events/test_llm_finish_reason_response_id.py @@ -9,6 +9,7 @@ from crewai.events.types.llm_events import ( LLMCallCompletedEvent, LLMCallStartedEvent, LLMCallType, + LLMStreamChunkEvent, ) from crewai.llm import LLM from crewai.llms._finish_reason_utils import extract_choices_finish_reason_and_id @@ -222,6 +223,128 @@ class TestEmitCallStartedEventIntrospectsSamplingParams: assert event.temperature == 0.9 +class TestBaseLLMSamplingParamFields: + # Regression: PR #5945 review feedback. Sampling params are declared as + # typed fields on BaseLLM so ``_emit_call_started_event`` reads them via + # plain attribute access instead of getattr/hasattr fallbacks. Kwargs + # like ``n=1`` bind directly to the typed field via Pydantic; there is + # no promotion from ``additional_params``. + def test_sampling_kwargs_bind_to_typed_fields(self, mock_emit): + from crewai.llms.providers.openai.completion import OpenAICompletion + + llm = LLM(model="gpt-4", n=1, temperature=0.5, seed=42) + + assert isinstance(llm, OpenAICompletion) + assert llm.n == 1 + assert llm.temperature == 0.5 + assert llm.seed == 42 + assert "n" not in llm.additional_params + assert "temperature" not in llm.additional_params + assert "seed" not in llm.additional_params + + llm._emit_call_started_event(messages="hi") + + event = mock_emit.call_args[1]["event"] + assert isinstance(event, LLMCallStartedEvent) + assert event.n == 1 + assert event.temperature == 0.5 + assert event.seed == 42 + + def test_additional_params_are_not_promoted_to_typed_fields(self, mock_emit): + # Callers who pass sampling params through ``additional_params`` + # opt out of typed-field semantics. We intentionally do NOT promote + # those values back into ``self.n`` / ``self.temperature``, so the + # emitter sees ``None`` for those attributes. If a caller wants the + # value surfaced in telemetry, they pass it as a kwarg. + llm = LLM( + model="gpt-4", + additional_params={"n": 1, "temperature": 0.5, "seed": 42}, + ) + + assert llm.n is None + assert llm.temperature is None + assert llm.seed is None + assert llm.additional_params == {"n": 1, "temperature": 0.5, "seed": 42} + + llm._emit_call_started_event(messages="hi") + + event = mock_emit.call_args[1]["event"] + assert isinstance(event, LLMCallStartedEvent) + assert event.n is None + assert event.temperature is None + assert event.seed is None + + def test_emit_uses_call_scoped_stop_override(self, mock_emit): + from crewai.llms.base_llm import call_stop_override + + llm = _StubLLM(model="test-model", stop=["A"]) + + with call_stop_override(llm, ["X"]): + llm._emit_call_started_event(messages="hi") + + event = mock_emit.call_args[1]["event"] + assert isinstance(event, LLMCallStartedEvent) + assert event.stop_sequences == ["X"] + # Instance-level stop is never mutated by the override. + assert llm.stop == ["A"] + + +class TestStreamingDictChunkResponseIdPropagation: + # Regression: PR #5945 coderabbitai feedback. The streaming loop only + # extracted ``chunk.id`` for ``ModelResponseBase`` instances; dict-shaped + # chunks (LiteLLM emits these in some configs) silently dropped the id + # and ``LLMStreamChunkEvent.response_id`` came through as ``None``. + def _dict_chunks(self) -> list[dict[str, Any]]: + return [ + { + "id": "test-chunk-id", + "choices": [{"delta": {"content": "hi"}, "finish_reason": None}], + }, + { + "id": "test-chunk-id", + "choices": [{"delta": {"content": " there"}, "finish_reason": "stop"}], + }, + ] + + def _stream_event_response_ids(self, mock_emit) -> list[str | None]: + return [ + call.kwargs["event"].response_id + for call in mock_emit.call_args_list + if isinstance(call.kwargs.get("event"), LLMStreamChunkEvent) + ] + + def test_sync_dict_chunk_id_propagates_to_stream_event(self, mock_emit): + llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=True) + + with patch( + "crewai.llm.litellm.completion", + return_value=iter(self._dict_chunks()), + ): + llm.call("anything") + + ids = self._stream_event_response_ids(mock_emit) + assert ids, "expected at least one LLMStreamChunkEvent" + assert all(rid == "test-chunk-id" for rid in ids), ids + + @pytest.mark.asyncio + async def test_async_dict_chunk_id_propagates_to_stream_event(self, mock_emit): + llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=True) + + async def _aiter(): + for chunk in self._dict_chunks(): + yield chunk + + async def _acompletion(*_args, **_kwargs): + return _aiter() + + with patch("crewai.llm.litellm.acompletion", side_effect=_acompletion): + await llm.acall("anything") + + ids = self._stream_event_response_ids(mock_emit) + assert ids, "expected at least one LLMStreamChunkEvent" + assert all(rid == "test-chunk-id" for rid in ids), ids + + class TestEmitCallCompletedEventPassesFinishReasonAndResponseId: def test_passes_through_to_event(self, mock_emit): llm = _StubLLM(model="test-model")