mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-07-04 06:29:22 +00:00
refactor(otel): declare sampling params on BaseLLM + honor stop overrides + dict chunk id
This commit is contained in:
@@ -745,6 +745,8 @@ class LLM(BaseLLM):
|
||||
|
||||
if isinstance(chunk, ModelResponseBase):
|
||||
response_id = chunk.id
|
||||
elif isinstance(chunk, dict):
|
||||
response_id = chunk.get("id")
|
||||
|
||||
chunk_finish, chunk_id = self._extract_finish_reason_and_response_id(
|
||||
chunk
|
||||
@@ -1461,7 +1463,12 @@ class LLM(BaseLLM):
|
||||
async for chunk in await litellm.acompletion(**params):
|
||||
chunk_count += 1
|
||||
chunk_content = None
|
||||
response_id = chunk.id if isinstance(chunk, ModelResponseBase) else None
|
||||
if isinstance(chunk, ModelResponseBase):
|
||||
response_id = chunk.id
|
||||
elif isinstance(chunk, dict):
|
||||
response_id = chunk.get("id")
|
||||
else:
|
||||
response_id = None
|
||||
|
||||
chunk_finish, chunk_id = self._extract_finish_reason_and_response_id(
|
||||
chunk
|
||||
|
||||
@@ -150,6 +150,13 @@ class BaseLLM(BaseModel, ABC):
|
||||
llm_type: str = "base"
|
||||
model: str
|
||||
temperature: float | None = None
|
||||
top_p: float | None = None
|
||||
max_tokens: int | None = None
|
||||
stream: bool | None = None
|
||||
seed: int | None = None
|
||||
frequency_penalty: float | None = None
|
||||
presence_penalty: float | None = None
|
||||
n: int | None = None
|
||||
api_key: str | None = None
|
||||
base_url: str | None = None
|
||||
provider: str = Field(default="openai")
|
||||
@@ -484,34 +491,27 @@ class BaseLLM(BaseModel, ABC):
|
||||
) -> None:
|
||||
"""Emit LLM call started event.
|
||||
|
||||
Sampling params default to introspecting ``self`` (``self.temperature``,
|
||||
``self.top_p``, ``self.stop`` -> ``stop_sequences``, ...) so providers
|
||||
don't need to thread them through every emission site. Explicit
|
||||
kwargs override the introspection.
|
||||
"""
|
||||
from crewai.utilities.serialization import to_serializable
|
||||
|
||||
if temperature is None:
|
||||
temperature = getattr(self, "temperature", None)
|
||||
temperature = self.temperature
|
||||
if top_p is None:
|
||||
top_p = getattr(self, "top_p", None)
|
||||
top_p = self.top_p
|
||||
if max_tokens is None:
|
||||
max_tokens = getattr(self, "max_tokens", None)
|
||||
max_tokens = self.max_tokens
|
||||
if stream is None:
|
||||
stream = getattr(self, "stream", None)
|
||||
stream = self.stream
|
||||
if seed is None:
|
||||
seed = getattr(self, "seed", None)
|
||||
seed = self.seed
|
||||
if stop_sequences is None:
|
||||
stop_attr = getattr(self, "stop", None) or getattr(
|
||||
self, "stop_sequences", None
|
||||
)
|
||||
stop_sequences = stop_attr or None
|
||||
stop_sequences = self.stop_sequences or None
|
||||
if frequency_penalty is None:
|
||||
frequency_penalty = getattr(self, "frequency_penalty", None)
|
||||
frequency_penalty = self.frequency_penalty
|
||||
if presence_penalty is None:
|
||||
presence_penalty = getattr(self, "presence_penalty", None)
|
||||
presence_penalty = self.presence_penalty
|
||||
if n is None:
|
||||
n = getattr(self, "n", None)
|
||||
n = self.n
|
||||
|
||||
crewai_event_bus.emit(
|
||||
self,
|
||||
|
||||
@@ -9,6 +9,7 @@ from crewai.events.types.llm_events import (
|
||||
LLMCallCompletedEvent,
|
||||
LLMCallStartedEvent,
|
||||
LLMCallType,
|
||||
LLMStreamChunkEvent,
|
||||
)
|
||||
from crewai.llm import LLM
|
||||
from crewai.llms._finish_reason_utils import extract_choices_finish_reason_and_id
|
||||
@@ -222,6 +223,128 @@ class TestEmitCallStartedEventIntrospectsSamplingParams:
|
||||
assert event.temperature == 0.9
|
||||
|
||||
|
||||
class TestBaseLLMSamplingParamFields:
|
||||
# Regression: PR #5945 review feedback. Sampling params are declared as
|
||||
# typed fields on BaseLLM so ``_emit_call_started_event`` reads them via
|
||||
# plain attribute access instead of getattr/hasattr fallbacks. Kwargs
|
||||
# like ``n=1`` bind directly to the typed field via Pydantic; there is
|
||||
# no promotion from ``additional_params``.
|
||||
def test_sampling_kwargs_bind_to_typed_fields(self, mock_emit):
|
||||
from crewai.llms.providers.openai.completion import OpenAICompletion
|
||||
|
||||
llm = LLM(model="gpt-4", n=1, temperature=0.5, seed=42)
|
||||
|
||||
assert isinstance(llm, OpenAICompletion)
|
||||
assert llm.n == 1
|
||||
assert llm.temperature == 0.5
|
||||
assert llm.seed == 42
|
||||
assert "n" not in llm.additional_params
|
||||
assert "temperature" not in llm.additional_params
|
||||
assert "seed" not in llm.additional_params
|
||||
|
||||
llm._emit_call_started_event(messages="hi")
|
||||
|
||||
event = mock_emit.call_args[1]["event"]
|
||||
assert isinstance(event, LLMCallStartedEvent)
|
||||
assert event.n == 1
|
||||
assert event.temperature == 0.5
|
||||
assert event.seed == 42
|
||||
|
||||
def test_additional_params_are_not_promoted_to_typed_fields(self, mock_emit):
|
||||
# Callers who pass sampling params through ``additional_params``
|
||||
# opt out of typed-field semantics. We intentionally do NOT promote
|
||||
# those values back into ``self.n`` / ``self.temperature``, so the
|
||||
# emitter sees ``None`` for those attributes. If a caller wants the
|
||||
# value surfaced in telemetry, they pass it as a kwarg.
|
||||
llm = LLM(
|
||||
model="gpt-4",
|
||||
additional_params={"n": 1, "temperature": 0.5, "seed": 42},
|
||||
)
|
||||
|
||||
assert llm.n is None
|
||||
assert llm.temperature is None
|
||||
assert llm.seed is None
|
||||
assert llm.additional_params == {"n": 1, "temperature": 0.5, "seed": 42}
|
||||
|
||||
llm._emit_call_started_event(messages="hi")
|
||||
|
||||
event = mock_emit.call_args[1]["event"]
|
||||
assert isinstance(event, LLMCallStartedEvent)
|
||||
assert event.n is None
|
||||
assert event.temperature is None
|
||||
assert event.seed is None
|
||||
|
||||
def test_emit_uses_call_scoped_stop_override(self, mock_emit):
|
||||
from crewai.llms.base_llm import call_stop_override
|
||||
|
||||
llm = _StubLLM(model="test-model", stop=["A"])
|
||||
|
||||
with call_stop_override(llm, ["X"]):
|
||||
llm._emit_call_started_event(messages="hi")
|
||||
|
||||
event = mock_emit.call_args[1]["event"]
|
||||
assert isinstance(event, LLMCallStartedEvent)
|
||||
assert event.stop_sequences == ["X"]
|
||||
# Instance-level stop is never mutated by the override.
|
||||
assert llm.stop == ["A"]
|
||||
|
||||
|
||||
class TestStreamingDictChunkResponseIdPropagation:
|
||||
# Regression: PR #5945 coderabbitai feedback. The streaming loop only
|
||||
# extracted ``chunk.id`` for ``ModelResponseBase`` instances; dict-shaped
|
||||
# chunks (LiteLLM emits these in some configs) silently dropped the id
|
||||
# and ``LLMStreamChunkEvent.response_id`` came through as ``None``.
|
||||
def _dict_chunks(self) -> list[dict[str, Any]]:
|
||||
return [
|
||||
{
|
||||
"id": "test-chunk-id",
|
||||
"choices": [{"delta": {"content": "hi"}, "finish_reason": None}],
|
||||
},
|
||||
{
|
||||
"id": "test-chunk-id",
|
||||
"choices": [{"delta": {"content": " there"}, "finish_reason": "stop"}],
|
||||
},
|
||||
]
|
||||
|
||||
def _stream_event_response_ids(self, mock_emit) -> list[str | None]:
|
||||
return [
|
||||
call.kwargs["event"].response_id
|
||||
for call in mock_emit.call_args_list
|
||||
if isinstance(call.kwargs.get("event"), LLMStreamChunkEvent)
|
||||
]
|
||||
|
||||
def test_sync_dict_chunk_id_propagates_to_stream_event(self, mock_emit):
|
||||
llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=True)
|
||||
|
||||
with patch(
|
||||
"crewai.llm.litellm.completion",
|
||||
return_value=iter(self._dict_chunks()),
|
||||
):
|
||||
llm.call("anything")
|
||||
|
||||
ids = self._stream_event_response_ids(mock_emit)
|
||||
assert ids, "expected at least one LLMStreamChunkEvent"
|
||||
assert all(rid == "test-chunk-id" for rid in ids), ids
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_async_dict_chunk_id_propagates_to_stream_event(self, mock_emit):
|
||||
llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=True)
|
||||
|
||||
async def _aiter():
|
||||
for chunk in self._dict_chunks():
|
||||
yield chunk
|
||||
|
||||
async def _acompletion(*_args, **_kwargs):
|
||||
return _aiter()
|
||||
|
||||
with patch("crewai.llm.litellm.acompletion", side_effect=_acompletion):
|
||||
await llm.acall("anything")
|
||||
|
||||
ids = self._stream_event_response_ids(mock_emit)
|
||||
assert ids, "expected at least one LLMStreamChunkEvent"
|
||||
assert all(rid == "test-chunk-id" for rid in ids), ids
|
||||
|
||||
|
||||
class TestEmitCallCompletedEventPassesFinishReasonAndResponseId:
|
||||
def test_passes_through_to_event(self, mock_emit):
|
||||
llm = _StubLLM(model="test-model")
|
||||
|
||||
Reference in New Issue
Block a user