refactor(otel): declare sampling params on BaseLLM + honor stop overrides + dict chunk id

2026-07-04 06:29:22 +00:00 · 2026-05-27 18:07:19 -03:00
parent 0797b38999
commit c25543f3fb
3 changed files with 147 additions and 17 deletions
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -745,6 +745,8 @@ class LLM(BaseLLM):

                if isinstance(chunk, ModelResponseBase):
                    response_id = chunk.id
+                elif isinstance(chunk, dict):
+                    response_id = chunk.get("id")

                chunk_finish, chunk_id = self._extract_finish_reason_and_response_id(
                    chunk
@@ -1461,7 +1463,12 @@ class LLM(BaseLLM):
            async for chunk in await litellm.acompletion(**params):
                chunk_count += 1
                chunk_content = None
-                response_id = chunk.id if isinstance(chunk, ModelResponseBase) else None
+                if isinstance(chunk, ModelResponseBase):
+                    response_id = chunk.id
+                elif isinstance(chunk, dict):
+                    response_id = chunk.get("id")
+                else:
+                    response_id = None

                chunk_finish, chunk_id = self._extract_finish_reason_and_response_id(
                    chunk
--- a/lib/crewai/src/crewai/llms/base_llm.py
+++ b/lib/crewai/src/crewai/llms/base_llm.py
@@ -150,6 +150,13 @@ class BaseLLM(BaseModel, ABC):
    llm_type: str = "base"
    model: str
    temperature: float | None = None
+    top_p: float | None = None
+    max_tokens: int | None = None
+    stream: bool | None = None
+    seed: int | None = None
+    frequency_penalty: float | None = None
+    presence_penalty: float | None = None
+    n: int | None = None
    api_key: str | None = None
    base_url: str | None = None
    provider: str = Field(default="openai")
@@ -484,34 +491,27 @@ class BaseLLM(BaseModel, ABC):
    ) -> None:
        """Emit LLM call started event.

-        Sampling params default to introspecting ``self`` (``self.temperature``,
-        ``self.top_p``, ``self.stop`` -> ``stop_sequences``, ...) so providers
-        don't need to thread them through every emission site. Explicit
-        kwargs override the introspection.
        """
        from crewai.utilities.serialization import to_serializable

        if temperature is None:
-            temperature = getattr(self, "temperature", None)
+            temperature = self.temperature
        if top_p is None:
-            top_p = getattr(self, "top_p", None)
+            top_p = self.top_p
        if max_tokens is None:
-            max_tokens = getattr(self, "max_tokens", None)
+            max_tokens = self.max_tokens
        if stream is None:
-            stream = getattr(self, "stream", None)
+            stream = self.stream
        if seed is None:
-            seed = getattr(self, "seed", None)
+            seed = self.seed
        if stop_sequences is None:
-            stop_attr = getattr(self, "stop", None) or getattr(
-                self, "stop_sequences", None
-            )
-            stop_sequences = stop_attr or None
+            stop_sequences = self.stop_sequences or None
        if frequency_penalty is None:
-            frequency_penalty = getattr(self, "frequency_penalty", None)
+            frequency_penalty = self.frequency_penalty
        if presence_penalty is None:
-            presence_penalty = getattr(self, "presence_penalty", None)
+            presence_penalty = self.presence_penalty
        if n is None:
-            n = getattr(self, "n", None)
+            n = self.n

        crewai_event_bus.emit(
            self,
--- a/lib/crewai/tests/events/test_llm_finish_reason_response_id.py
+++ b/lib/crewai/tests/events/test_llm_finish_reason_response_id.py
@@ -9,6 +9,7 @@ from crewai.events.types.llm_events import (
    LLMCallCompletedEvent,
    LLMCallStartedEvent,
    LLMCallType,
+    LLMStreamChunkEvent,
 )
 from crewai.llm import LLM
 from crewai.llms._finish_reason_utils import extract_choices_finish_reason_and_id
@@ -222,6 +223,128 @@ class TestEmitCallStartedEventIntrospectsSamplingParams:
        assert event.temperature == 0.9


+class TestBaseLLMSamplingParamFields:
+    # Regression: PR #5945 review feedback. Sampling params are declared as
+    # typed fields on BaseLLM so ``_emit_call_started_event`` reads them via
+    # plain attribute access instead of getattr/hasattr fallbacks. Kwargs
+    # like ``n=1`` bind directly to the typed field via Pydantic; there is
+    # no promotion from ``additional_params``.
+    def test_sampling_kwargs_bind_to_typed_fields(self, mock_emit):
+        from crewai.llms.providers.openai.completion import OpenAICompletion
+
+        llm = LLM(model="gpt-4", n=1, temperature=0.5, seed=42)
+
+        assert isinstance(llm, OpenAICompletion)
+        assert llm.n == 1
+        assert llm.temperature == 0.5
+        assert llm.seed == 42
+        assert "n" not in llm.additional_params
+        assert "temperature" not in llm.additional_params
+        assert "seed" not in llm.additional_params
+
+        llm._emit_call_started_event(messages="hi")
+
+        event = mock_emit.call_args[1]["event"]
+        assert isinstance(event, LLMCallStartedEvent)
+        assert event.n == 1
+        assert event.temperature == 0.5
+        assert event.seed == 42
+
+    def test_additional_params_are_not_promoted_to_typed_fields(self, mock_emit):
+        # Callers who pass sampling params through ``additional_params``
+        # opt out of typed-field semantics. We intentionally do NOT promote
+        # those values back into ``self.n`` / ``self.temperature``, so the
+        # emitter sees ``None`` for those attributes. If a caller wants the
+        # value surfaced in telemetry, they pass it as a kwarg.
+        llm = LLM(
+            model="gpt-4",
+            additional_params={"n": 1, "temperature": 0.5, "seed": 42},
+        )
+
+        assert llm.n is None
+        assert llm.temperature is None
+        assert llm.seed is None
+        assert llm.additional_params == {"n": 1, "temperature": 0.5, "seed": 42}
+
+        llm._emit_call_started_event(messages="hi")
+
+        event = mock_emit.call_args[1]["event"]
+        assert isinstance(event, LLMCallStartedEvent)
+        assert event.n is None
+        assert event.temperature is None
+        assert event.seed is None
+
+    def test_emit_uses_call_scoped_stop_override(self, mock_emit):
+        from crewai.llms.base_llm import call_stop_override
+
+        llm = _StubLLM(model="test-model", stop=["A"])
+
+        with call_stop_override(llm, ["X"]):
+            llm._emit_call_started_event(messages="hi")
+
+        event = mock_emit.call_args[1]["event"]
+        assert isinstance(event, LLMCallStartedEvent)
+        assert event.stop_sequences == ["X"]
+        # Instance-level stop is never mutated by the override.
+        assert llm.stop == ["A"]
+
+
+class TestStreamingDictChunkResponseIdPropagation:
+    # Regression: PR #5945 coderabbitai feedback. The streaming loop only
+    # extracted ``chunk.id`` for ``ModelResponseBase`` instances; dict-shaped
+    # chunks (LiteLLM emits these in some configs) silently dropped the id
+    # and ``LLMStreamChunkEvent.response_id`` came through as ``None``.
+    def _dict_chunks(self) -> list[dict[str, Any]]:
+        return [
+            {
+                "id": "test-chunk-id",
+                "choices": [{"delta": {"content": "hi"}, "finish_reason": None}],
+            },
+            {
+                "id": "test-chunk-id",
+                "choices": [{"delta": {"content": " there"}, "finish_reason": "stop"}],
+            },
+        ]
+
+    def _stream_event_response_ids(self, mock_emit) -> list[str | None]:
+        return [
+            call.kwargs["event"].response_id
+            for call in mock_emit.call_args_list
+            if isinstance(call.kwargs.get("event"), LLMStreamChunkEvent)
+        ]
+
+    def test_sync_dict_chunk_id_propagates_to_stream_event(self, mock_emit):
+        llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=True)
+
+        with patch(
+            "crewai.llm.litellm.completion",
+            return_value=iter(self._dict_chunks()),
+        ):
+            llm.call("anything")
+
+        ids = self._stream_event_response_ids(mock_emit)
+        assert ids, "expected at least one LLMStreamChunkEvent"
+        assert all(rid == "test-chunk-id" for rid in ids), ids
+
+    @pytest.mark.asyncio
+    async def test_async_dict_chunk_id_propagates_to_stream_event(self, mock_emit):
+        llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=True)
+
+        async def _aiter():
+            for chunk in self._dict_chunks():
+                yield chunk
+
+        async def _acompletion(*_args, **_kwargs):
+            return _aiter()
+
+        with patch("crewai.llm.litellm.acompletion", side_effect=_acompletion):
+            await llm.acall("anything")
+
+        ids = self._stream_event_response_ids(mock_emit)
+        assert ids, "expected at least one LLMStreamChunkEvent"
+        assert all(rid == "test-chunk-id" for rid in ids), ids
+
+
 class TestEmitCallCompletedEventPassesFinishReasonAndResponseId:
    def test_passes_through_to_event(self, mock_emit):
        llm = _StubLLM(model="test-model")