From 63e7319e69fd2ac694109a068c8467429ca9edc4 Mon Sep 17 00:00:00 2001 From: Lucas Gomide Date: Wed, 27 May 2026 13:54:06 -0300 Subject: [PATCH] fix(otel): coerce non-list stop_sequences to list[str] on LLMCallStartedEvent Observed in Datadog: gen_ai.request.stop_sequences on a Gemini/Vertex span surfaced the textproto repr of a google.protobuf.struct_pb2.ListValue (values { string_value: "\nObservation:" }) instead of a real Sequence[str]. Root cause is upstream - a Vertex AI / Gemini code path stores the stop list in a protobuf container (RepeatedScalarContainer or ListValue) rather than a plain Python list. When that container reaches LLMCallStartedEvent and then BaseLLM._emit_call_started_event hands it to the OTel SDK as a span attribute, the SDK falls back to str(value) because the type isn't a recognised Sequence[str] - producing the protobuf textproto string instead of an array attribute. --- .../src/crewai/events/types/llm_events.py | 26 ++++++ .../test_llm_finish_reason_response_id.py | 79 +++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/lib/crewai/src/crewai/events/types/llm_events.py b/lib/crewai/src/crewai/events/types/llm_events.py index c0f66ada4..13585d495 100644 --- a/lib/crewai/src/crewai/events/types/llm_events.py +++ b/lib/crewai/src/crewai/events/types/llm_events.py @@ -60,6 +60,32 @@ class LLMCallStartedEvent(LLMEventBase): presence_penalty: float | None = None n: int | None = None + @field_validator("stop_sequences", mode="before") + @classmethod + def _coerce_stop_sequences_to_str_list(cls, value: Any) -> list[str] | None: + """Normalize stop_sequences to ``list[str] | None``. + + Some providers store stop sequences in non-Python-list containers — + e.g. a Vertex AI / Gemini code path can hand back a + ``google.protobuf.struct_pb2.ListValue`` or a ``RepeatedScalarContainer``. + Without coercion the OTel SDK falls back to ``str(value)`` when + ``gen_ai.request.stop_sequences`` is set, producing the protobuf + textproto repr (``values { string_value: \"...\" }``) instead of a + proper ``Sequence[str]``. + + A bare string is treated as a single stop sequence. Anything that + can't be iterated cleanly falls back to ``None`` rather than crashing + event construction. + """ + if value is None: + return None + if isinstance(value, str): + return [value] + try: + return [item if isinstance(item, str) else str(item) for item in value] + except TypeError: + return None + class LLMCallCompletedEvent(LLMEventBase): """Event emitted when a LLM call completes""" diff --git a/lib/crewai/tests/events/test_llm_finish_reason_response_id.py b/lib/crewai/tests/events/test_llm_finish_reason_response_id.py index 70c2349b8..436b5ed24 100644 --- a/lib/crewai/tests/events/test_llm_finish_reason_response_id.py +++ b/lib/crewai/tests/events/test_llm_finish_reason_response_id.py @@ -108,6 +108,85 @@ class TestLLMCallStartedEventSamplingParams: assert event.n is None +class TestStopSequencesCoercion: + # The OTel SDK falls back to str(value) when a span attribute isn't a + # recognised Sequence[str], producing the protobuf textproto repr + # ("values { string_value: ... }") in downstream telemetry. The + # field_validator coerces exotic iterables (Vertex/Gemini protobuf + # containers, tuples, generators) to a clean list[str] up front so the + # OTel attribute is always shaped correctly. + def test_bare_string_is_wrapped_in_list(self): + event = LLMCallStartedEvent(call_id="call-1", stop_sequences="\nObservation:") + assert event.stop_sequences == ["\nObservation:"] + + @pytest.mark.parametrize( + "raw, expected", + [ + (["\nObservation:", "Final Answer:"], ["\nObservation:", "Final Answer:"]), + (("\nObservation:",), ["\nObservation:"]), + ((s for s in ["a", "b"]), ["a", "b"]), + ([], []), + ], + ) + def test_python_iterables_pass_through( + self, raw: Any, expected: list[str] + ) -> None: + event = LLMCallStartedEvent(call_id="call-1", stop_sequences=raw) + assert event.stop_sequences == expected + + def test_protobuf_like_repeated_container_is_coerced(self): + # Mirrors google.protobuf RepeatedScalarContainer: iterable yielding + # actual Python str objects. Should pass through cleanly. + class _RepeatedScalar: + def __init__(self, items: list[str]) -> None: + self._items = items + + def __iter__(self): + return iter(self._items) + + event = LLMCallStartedEvent( + call_id="call-1", + stop_sequences=_RepeatedScalar(["\nObservation:"]), + ) + assert event.stop_sequences == ["\nObservation:"] + + def test_protobuf_listvalue_with_nested_values_coerces_to_textproto_strings(self): + # Mirrors google.protobuf.struct_pb2.ListValue: iterable yielding + # `Value` messages whose str() is "string_value: \"...\"". The + # coercion will str() each element, which is still wrong-shaped but + # at least lands as a real list[str] for the OTel attribute instead + # of a single textproto-blob string. Documents observed behaviour; + # the upstream fix is to pass list[str] to LLM.stop, not ListValue. + class _PbValue: + def __init__(self, string_value: str) -> None: + self.string_value = string_value + + def __str__(self) -> str: + return f'string_value: "{self.string_value}"' + + class _PbListValue: + def __init__(self, values: list[_PbValue]) -> None: + self.values = values + + def __iter__(self): + return iter(self.values) + + event = LLMCallStartedEvent( + call_id="call-1", + stop_sequences=_PbListValue([_PbValue("\\nObservation:")]), + ) + assert event.stop_sequences == ['string_value: "\\nObservation:"'] + + @pytest.mark.parametrize("bad_input", [123, 12.5, object()]) + def test_non_iterable_falls_back_to_none(self, bad_input: Any) -> None: + event = LLMCallStartedEvent(call_id="call-1", stop_sequences=bad_input) + assert event.stop_sequences is None + + def test_none_stays_none(self): + event = LLMCallStartedEvent(call_id="call-1", stop_sequences=None) + assert event.stop_sequences is None + + class TestEmitCallStartedEventIntrospectsSamplingParams: def test_reads_sampling_params_off_self(self, mock_emit): llm = _StubLLM(model="test-model", temperature=0.4)