fix: handle GPT-5.x models not supporting the stop API parameter

GPT-5.x models reject the `stop` parameter at the API level with "Unsupported parameter: 'stop' is not supported with this model". This breaks CrewAI executions when routing through LiteLLM (e.g. via OpenAI-compatible gateways like Asimov), because the LiteLLM fallback path always includes `stop` in the API request params. The native OpenAI provider was unaffected because it never sends `stop` to the API — it applies stop words client-side via `_apply_stop_words()`. However, when the request goes through LiteLLM (custom endpoints, proxy gateways), `stop` is sent as an API parameter and GPT-5.x rejects it. Additionally, the existing retry logic that catches this error only matched the OpenAI API error format ("Unsupported parameter") but missed LiteLLM's own pre-validation error format ("does not support parameters"), so the self-healing retry never triggered for LiteLLM-routed calls.
2026-04-30 14:52:36 +00:00 · 2026-03-27 11:55:39 -03:00
parent 9fe0c15549
commit 5ed863385e
5 changed files with 311 additions and 7 deletions
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -753,7 +753,7 @@ class LLM(BaseLLM):
            "temperature": self.temperature,
            "top_p": self.top_p,
            "n": self.n,
-            "stop": self.stop or None,
+            "stop": (self.stop or None) if self.supports_stop_words() else None,
            "max_tokens": self.max_tokens or self.max_completion_tokens,
            "presence_penalty": self.presence_penalty,
            "frequency_penalty": self.frequency_penalty,
@@ -1825,9 +1825,11 @@ class LLM(BaseLLM):
                    # whether to summarize the content or abort based on the respect_context_window flag
                    raise
                except Exception as e:
-                    unsupported_stop = "Unsupported parameter" in str(
-                        e
-                    ) and "'stop'" in str(e)
+                    error_str = str(e)
+                    unsupported_stop = "'stop'" in error_str and (
+                        "Unsupported parameter" in error_str
+                        or "does not support parameters" in error_str
+                    )

                    if unsupported_stop:
                        if (
@@ -1961,9 +1963,11 @@ class LLM(BaseLLM):
                except LLMContextLengthExceededError:
                    raise
                except Exception as e:
-                    unsupported_stop = "Unsupported parameter" in str(
-                        e
-                    ) and "'stop'" in str(e)
+                    error_str = str(e)
+                    unsupported_stop = "'stop'" in error_str and (
+                        "Unsupported parameter" in error_str
+                        or "does not support parameters" in error_str
+                    )

                    if unsupported_stop:
                        if (
@@ -2263,6 +2267,10 @@ class LLM(BaseLLM):
        Note: This method is only used by the litellm fallback path.
        Native providers override this method with their own implementation.
        """
+        model_lower = self.model.lower() if self.model else ""
+        if "gpt-5" in model_lower:
+            return False
+
        if not LITELLM_AVAILABLE or get_supported_openai_params is None:
            # When litellm is not available, assume stop words are supported
            return True
--- a/lib/crewai/src/crewai/llms/providers/openai/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/openai/completion.py
@@ -2245,6 +2245,9 @@ class OpenAICompletion(BaseLLM):

    def supports_stop_words(self) -> bool:
        """Check if the model supports stop words."""
+        model_lower = self.model.lower() if self.model else ""
+        if "gpt-5" in model_lower:
+            return False
        return not self.is_o1_model

    def get_context_window_size(self) -> int:
--- a/lib/crewai/tests/cassettes/test_litellm_gpt5_call_succeeds_without_stop_error.yaml
+++ b/lib/crewai/tests/cassettes/test_litellm_gpt5_call_succeeds_without_stop_error.yaml
@@ -0,0 +1,110 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"What is the capital of France?"}],"model":"gpt-5"}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - application/json
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      authorization:
+      - AUTHORIZATION-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '89'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      x-stainless-arch:
+      - X-STAINLESS-ARCH-XXX
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - X-STAINLESS-OS-XXX
+      x-stainless-package-version:
+      - 1.83.0
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-read-timeout:
+      - X-STAINLESS-READ-TIMEOUT-XXX
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.13.2
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-DO4LcSpy72yIXCYSIVOQEXWNXydgn\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1774628956,\n  \"model\": \"gpt-5-2025-08-07\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"Paris.\",\n        \"refusal\": null,\n
+        \       \"annotations\": []\n      },\n      \"finish_reason\": \"stop\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 13,\n    \"completion_tokens\":
+        11,\n    \"total_tokens\": 24,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": null\n}\n"
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e2fc5dce85582fb-GIG
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 27 Mar 2026 16:29:17 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - STS-XXX
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      access-control-expose-headers:
+      - ACCESS-CONTROL-XXX
+      alt-svc:
+      - h3=":443"; ma=86400
+      content-length:
+      - '772'
+      openai-organization:
+      - OPENAI-ORG-XXX
+      openai-processing-ms:
+      - '1343'
+      openai-project:
+      - OPENAI-PROJECT-XXX
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - SET-COOKIE-XXX
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - X-RATELIMIT-LIMIT-REQUESTS-XXX
+      x-ratelimit-limit-tokens:
+      - X-RATELIMIT-LIMIT-TOKENS-XXX
+      x-ratelimit-remaining-requests:
+      - X-RATELIMIT-REMAINING-REQUESTS-XXX
+      x-ratelimit-remaining-tokens:
+      - X-RATELIMIT-REMAINING-TOKENS-XXX
+      x-ratelimit-reset-requests:
+      - X-RATELIMIT-RESET-REQUESTS-XXX
+      x-ratelimit-reset-tokens:
+      - X-RATELIMIT-RESET-TOKENS-XXX
+      x-request-id:
+      - X-REQUEST-ID-XXX
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/lib/crewai/tests/llms/openai/test_openai.py
+++ b/lib/crewai/tests/llms/openai/test_openai.py
@@ -1523,6 +1523,69 @@ def test_openai_stop_words_not_applied_to_structured_output():
    assert "Observation:" in result.observation


+def test_openai_gpt5_models_do_not_support_stop_words():
+    """
+    Test that GPT-5 family models do not support stop words via the API.
+    GPT-5 models reject the 'stop' parameter, so stop words must be
+    applied client-side only.
+    """
+    gpt5_models = [
+        "gpt-5",
+        "gpt-5-mini",
+        "gpt-5-nano",
+        "gpt-5-pro",
+        "gpt-5.1",
+        "gpt-5.1-chat",
+        "gpt-5.2",
+        "gpt-5.2-chat",
+    ]
+
+    for model_name in gpt5_models:
+        llm = OpenAICompletion(model=model_name)
+        assert llm.supports_stop_words() == False, (
+            f"Expected {model_name} to NOT support stop words"
+        )
+
+
+def test_openai_non_gpt5_models_support_stop_words():
+    """
+    Test that non-GPT-5 models still support stop words normally.
+    """
+    supported_models = [
+        "gpt-4o",
+        "gpt-4o-mini",
+        "gpt-4.1",
+        "gpt-4.1-mini",
+        "gpt-4-turbo",
+    ]
+
+    for model_name in supported_models:
+        llm = OpenAICompletion(model=model_name)
+        assert llm.supports_stop_words() == True, (
+            f"Expected {model_name} to support stop words"
+        )
+
+
+def test_openai_gpt5_still_applies_stop_words_client_side():
+    """
+    Test that GPT-5 models still truncate responses at stop words client-side
+    via _apply_stop_words(), even though they don't send 'stop' to the API.
+    """
+    llm = OpenAICompletion(
+        model="gpt-5.2",
+        stop=["Observation:", "Final Answer:"],
+    )
+
+    assert llm.supports_stop_words() == False
+
+    response = "I need to search.\n\nAction: search\nObservation: Found results"
+    result = llm._apply_stop_words(response)
+
+    assert "Observation:" not in result
+    assert "Found results" not in result
+    assert "I need to search" in result
+
+
 def test_openai_stop_words_still_applied_to_regular_responses():
    """
    Test that stop words ARE still applied for regular (non-structured) responses.
--- a/lib/crewai/tests/test_llm.py
+++ b/lib/crewai/tests/test_llm.py
@@ -682,6 +682,126 @@ def test_llm_call_when_stop_is_unsupported_when_additional_drop_params_is_provid
    assert "Paris" in result


+@pytest.mark.vcr()
+def test_litellm_gpt5_call_succeeds_without_stop_error():
+    """
+    Integration test: GPT-5 call succeeds when stop words are configured,
+    because stop is omitted from API params and applied client-side.
+    """
+    llm = LLM(model="gpt-5", stop=["Observation:"], is_litellm=True)
+    result = llm.call("What is the capital of France?")
+    assert isinstance(result, str)
+    assert len(result) > 0
+
+
+def test_litellm_gpt5_does_not_send_stop_in_params():
+    """
+    Test that the LiteLLM fallback path does not include 'stop' in API params
+    for GPT-5.x models, since they reject it at the API level.
+    """
+    llm = LLM(model="openai/gpt-5.2", stop=["Observation:"], is_litellm=True)
+
+    params = llm._prepare_completion_params(
+        messages=[{"role": "user", "content": "Hello"}]
+    )
+
+    assert params.get("stop") is None, (
+        "GPT-5.x models should not have 'stop' in API params"
+    )
+
+
+def test_litellm_non_gpt5_sends_stop_in_params():
+    """
+    Test that the LiteLLM fallback path still includes 'stop' in API params
+    for models that support it.
+    """
+    llm = LLM(model="gpt-4o", stop=["Observation:"], is_litellm=True)
+
+    params = llm._prepare_completion_params(
+        messages=[{"role": "user", "content": "Hello"}]
+    )
+
+    assert params.get("stop") == ["Observation:"], (
+        "Non-GPT-5 models should have 'stop' in API params"
+    )
+
+
+def test_litellm_retry_catches_litellm_unsupported_params_error(caplog):
+    """
+    Test that the retry logic catches LiteLLM's UnsupportedParamsError format
+    ("does not support parameters") in addition to the OpenAI API format.
+    """
+    llm = LLM(model="openai/gpt-5.2", stop=["Observation:"], is_litellm=True)
+
+    litellm_error = Exception(
+        "litellm.UnsupportedParamsError: openai does not support parameters: "
+        "['stop'], for model=openai/gpt-5.2."
+    )
+
+    call_count = 0
+
+    try:
+        import litellm
+    except ImportError:
+        pytest.skip("litellm is not installed; skipping LiteLLM retry test")
+
+    def mock_completion(*args, **kwargs):
+        nonlocal call_count
+        call_count += 1
+        if call_count == 1:
+            raise litellm_error
+        return MagicMock(
+            choices=[MagicMock(message=MagicMock(content="Paris", tool_calls=None))],
+            usage=MagicMock(
+                prompt_tokens=10,
+                completion_tokens=5,
+                total_tokens=15,
+            ),
+        )
+
+    with patch("litellm.completion", side_effect=mock_completion):
+        with caplog.at_level(logging.INFO):
+            result = llm.call("What is the capital of France?")
+
+    assert "Retrying LLM call without the unsupported 'stop'" in caplog.text
+    assert "stop" in llm.additional_params.get("additional_drop_params", [])
+
+
+def test_litellm_retry_catches_openai_api_stop_error(caplog):
+    """
+    Test that the retry logic still catches the OpenAI API error format
+    ("Unsupported parameter: 'stop'").
+    """
+    llm = LLM(model="openai/gpt-5.2", stop=["Observation:"], is_litellm=True)
+
+    api_error = Exception(
+        "Unsupported parameter: 'stop' is not supported with this model."
+    )
+
+    call_count = 0
+
+    def mock_completion(*args, **kwargs):
+        nonlocal call_count
+        call_count += 1
+        if call_count == 1:
+            raise api_error
+        return MagicMock(
+            choices=[MagicMock(message=MagicMock(content="Paris", tool_calls=None))],
+            usage=MagicMock(
+                prompt_tokens=10,
+                completion_tokens=5,
+                total_tokens=15,
+            ),
+        )
+
+    with patch("litellm.completion", side_effect=mock_completion):
+        with caplog.at_level(logging.INFO):
+            llm.call("What is the capital of France?")
+
+    assert "Retrying LLM call without the unsupported 'stop'" in caplog.text
+    assert "stop" in llm.additional_params.get("additional_drop_params", [])
+
+
@pytest.fixture
 def ollama_llm():
    return LLM(model="ollama/llama3.2:3b", is_litellm=True)