mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-04-30 14:52:36 +00:00
fix: handle GPT-5.x models not supporting the stop API parameter
GPT-5.x models reject the `stop` parameter at the API level with "Unsupported parameter: 'stop' is not supported with this model". This breaks CrewAI executions when routing through LiteLLM (e.g. via
OpenAI-compatible gateways like Asimov), because the LiteLLM fallback path always includes `stop` in the API request params.
The native OpenAI provider was unaffected because it never sends `stop` to the API — it applies stop words client-side via `_apply_stop_words()`. However, when the request goes through LiteLLM (custom endpoints, proxy gateways),
`stop` is sent as an API parameter and GPT-5.x rejects it.
Additionally, the existing retry logic that catches this error only matched the OpenAI API error format ("Unsupported parameter") but missed
LiteLLM's own pre-validation error format ("does not support parameters"), so the self-healing retry never triggered for LiteLLM-routed calls.
This commit is contained in:
@@ -753,7 +753,7 @@ class LLM(BaseLLM):
|
||||
"temperature": self.temperature,
|
||||
"top_p": self.top_p,
|
||||
"n": self.n,
|
||||
"stop": self.stop or None,
|
||||
"stop": (self.stop or None) if self.supports_stop_words() else None,
|
||||
"max_tokens": self.max_tokens or self.max_completion_tokens,
|
||||
"presence_penalty": self.presence_penalty,
|
||||
"frequency_penalty": self.frequency_penalty,
|
||||
@@ -1825,9 +1825,11 @@ class LLM(BaseLLM):
|
||||
# whether to summarize the content or abort based on the respect_context_window flag
|
||||
raise
|
||||
except Exception as e:
|
||||
unsupported_stop = "Unsupported parameter" in str(
|
||||
e
|
||||
) and "'stop'" in str(e)
|
||||
error_str = str(e)
|
||||
unsupported_stop = "'stop'" in error_str and (
|
||||
"Unsupported parameter" in error_str
|
||||
or "does not support parameters" in error_str
|
||||
)
|
||||
|
||||
if unsupported_stop:
|
||||
if (
|
||||
@@ -1961,9 +1963,11 @@ class LLM(BaseLLM):
|
||||
except LLMContextLengthExceededError:
|
||||
raise
|
||||
except Exception as e:
|
||||
unsupported_stop = "Unsupported parameter" in str(
|
||||
e
|
||||
) and "'stop'" in str(e)
|
||||
error_str = str(e)
|
||||
unsupported_stop = "'stop'" in error_str and (
|
||||
"Unsupported parameter" in error_str
|
||||
or "does not support parameters" in error_str
|
||||
)
|
||||
|
||||
if unsupported_stop:
|
||||
if (
|
||||
@@ -2263,6 +2267,10 @@ class LLM(BaseLLM):
|
||||
Note: This method is only used by the litellm fallback path.
|
||||
Native providers override this method with their own implementation.
|
||||
"""
|
||||
model_lower = self.model.lower() if self.model else ""
|
||||
if "gpt-5" in model_lower:
|
||||
return False
|
||||
|
||||
if not LITELLM_AVAILABLE or get_supported_openai_params is None:
|
||||
# When litellm is not available, assume stop words are supported
|
||||
return True
|
||||
|
||||
@@ -2245,6 +2245,9 @@ class OpenAICompletion(BaseLLM):
|
||||
|
||||
def supports_stop_words(self) -> bool:
|
||||
"""Check if the model supports stop words."""
|
||||
model_lower = self.model.lower() if self.model else ""
|
||||
if "gpt-5" in model_lower:
|
||||
return False
|
||||
return not self.is_o1_model
|
||||
|
||||
def get_context_window_size(self) -> int:
|
||||
|
||||
@@ -0,0 +1,110 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"user","content":"What is the capital of France?"}],"model":"gpt-5"}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '89'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 1.83.0
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.13.2
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-DO4LcSpy72yIXCYSIVOQEXWNXydgn\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1774628956,\n \"model\": \"gpt-5-2025-08-07\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"Paris.\",\n \"refusal\": null,\n
|
||||
\ \"annotations\": []\n },\n \"finish_reason\": \"stop\"\n
|
||||
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 13,\n \"completion_tokens\":
|
||||
11,\n \"total_tokens\": 24,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
|
||||
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": null\n}\n"
|
||||
headers:
|
||||
CF-Cache-Status:
|
||||
- DYNAMIC
|
||||
CF-Ray:
|
||||
- 9e2fc5dce85582fb-GIG
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Fri, 27 Mar 2026 16:29:17 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
content-length:
|
||||
- '772'
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '1343'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
set-cookie:
|
||||
- SET-COOKIE-XXX
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -1523,6 +1523,69 @@ def test_openai_stop_words_not_applied_to_structured_output():
|
||||
assert "Observation:" in result.observation
|
||||
|
||||
|
||||
def test_openai_gpt5_models_do_not_support_stop_words():
|
||||
"""
|
||||
Test that GPT-5 family models do not support stop words via the API.
|
||||
GPT-5 models reject the 'stop' parameter, so stop words must be
|
||||
applied client-side only.
|
||||
"""
|
||||
gpt5_models = [
|
||||
"gpt-5",
|
||||
"gpt-5-mini",
|
||||
"gpt-5-nano",
|
||||
"gpt-5-pro",
|
||||
"gpt-5.1",
|
||||
"gpt-5.1-chat",
|
||||
"gpt-5.2",
|
||||
"gpt-5.2-chat",
|
||||
]
|
||||
|
||||
for model_name in gpt5_models:
|
||||
llm = OpenAICompletion(model=model_name)
|
||||
assert llm.supports_stop_words() == False, (
|
||||
f"Expected {model_name} to NOT support stop words"
|
||||
)
|
||||
|
||||
|
||||
def test_openai_non_gpt5_models_support_stop_words():
|
||||
"""
|
||||
Test that non-GPT-5 models still support stop words normally.
|
||||
"""
|
||||
supported_models = [
|
||||
"gpt-4o",
|
||||
"gpt-4o-mini",
|
||||
"gpt-4.1",
|
||||
"gpt-4.1-mini",
|
||||
"gpt-4-turbo",
|
||||
]
|
||||
|
||||
for model_name in supported_models:
|
||||
llm = OpenAICompletion(model=model_name)
|
||||
assert llm.supports_stop_words() == True, (
|
||||
f"Expected {model_name} to support stop words"
|
||||
)
|
||||
|
||||
|
||||
def test_openai_gpt5_still_applies_stop_words_client_side():
|
||||
"""
|
||||
Test that GPT-5 models still truncate responses at stop words client-side
|
||||
via _apply_stop_words(), even though they don't send 'stop' to the API.
|
||||
"""
|
||||
llm = OpenAICompletion(
|
||||
model="gpt-5.2",
|
||||
stop=["Observation:", "Final Answer:"],
|
||||
)
|
||||
|
||||
assert llm.supports_stop_words() == False
|
||||
|
||||
response = "I need to search.\n\nAction: search\nObservation: Found results"
|
||||
result = llm._apply_stop_words(response)
|
||||
|
||||
assert "Observation:" not in result
|
||||
assert "Found results" not in result
|
||||
assert "I need to search" in result
|
||||
|
||||
|
||||
def test_openai_stop_words_still_applied_to_regular_responses():
|
||||
"""
|
||||
Test that stop words ARE still applied for regular (non-structured) responses.
|
||||
|
||||
@@ -682,6 +682,126 @@ def test_llm_call_when_stop_is_unsupported_when_additional_drop_params_is_provid
|
||||
assert "Paris" in result
|
||||
|
||||
|
||||
@pytest.mark.vcr()
|
||||
def test_litellm_gpt5_call_succeeds_without_stop_error():
|
||||
"""
|
||||
Integration test: GPT-5 call succeeds when stop words are configured,
|
||||
because stop is omitted from API params and applied client-side.
|
||||
"""
|
||||
llm = LLM(model="gpt-5", stop=["Observation:"], is_litellm=True)
|
||||
result = llm.call("What is the capital of France?")
|
||||
assert isinstance(result, str)
|
||||
assert len(result) > 0
|
||||
|
||||
|
||||
def test_litellm_gpt5_does_not_send_stop_in_params():
|
||||
"""
|
||||
Test that the LiteLLM fallback path does not include 'stop' in API params
|
||||
for GPT-5.x models, since they reject it at the API level.
|
||||
"""
|
||||
llm = LLM(model="openai/gpt-5.2", stop=["Observation:"], is_litellm=True)
|
||||
|
||||
params = llm._prepare_completion_params(
|
||||
messages=[{"role": "user", "content": "Hello"}]
|
||||
)
|
||||
|
||||
assert params.get("stop") is None, (
|
||||
"GPT-5.x models should not have 'stop' in API params"
|
||||
)
|
||||
|
||||
|
||||
def test_litellm_non_gpt5_sends_stop_in_params():
|
||||
"""
|
||||
Test that the LiteLLM fallback path still includes 'stop' in API params
|
||||
for models that support it.
|
||||
"""
|
||||
llm = LLM(model="gpt-4o", stop=["Observation:"], is_litellm=True)
|
||||
|
||||
params = llm._prepare_completion_params(
|
||||
messages=[{"role": "user", "content": "Hello"}]
|
||||
)
|
||||
|
||||
assert params.get("stop") == ["Observation:"], (
|
||||
"Non-GPT-5 models should have 'stop' in API params"
|
||||
)
|
||||
|
||||
|
||||
def test_litellm_retry_catches_litellm_unsupported_params_error(caplog):
|
||||
"""
|
||||
Test that the retry logic catches LiteLLM's UnsupportedParamsError format
|
||||
("does not support parameters") in addition to the OpenAI API format.
|
||||
"""
|
||||
llm = LLM(model="openai/gpt-5.2", stop=["Observation:"], is_litellm=True)
|
||||
|
||||
litellm_error = Exception(
|
||||
"litellm.UnsupportedParamsError: openai does not support parameters: "
|
||||
"['stop'], for model=openai/gpt-5.2."
|
||||
)
|
||||
|
||||
call_count = 0
|
||||
|
||||
try:
|
||||
import litellm
|
||||
except ImportError:
|
||||
pytest.skip("litellm is not installed; skipping LiteLLM retry test")
|
||||
|
||||
def mock_completion(*args, **kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count == 1:
|
||||
raise litellm_error
|
||||
return MagicMock(
|
||||
choices=[MagicMock(message=MagicMock(content="Paris", tool_calls=None))],
|
||||
usage=MagicMock(
|
||||
prompt_tokens=10,
|
||||
completion_tokens=5,
|
||||
total_tokens=15,
|
||||
),
|
||||
)
|
||||
|
||||
with patch("litellm.completion", side_effect=mock_completion):
|
||||
with caplog.at_level(logging.INFO):
|
||||
result = llm.call("What is the capital of France?")
|
||||
|
||||
assert "Retrying LLM call without the unsupported 'stop'" in caplog.text
|
||||
assert "stop" in llm.additional_params.get("additional_drop_params", [])
|
||||
|
||||
|
||||
def test_litellm_retry_catches_openai_api_stop_error(caplog):
|
||||
"""
|
||||
Test that the retry logic still catches the OpenAI API error format
|
||||
("Unsupported parameter: 'stop'").
|
||||
"""
|
||||
llm = LLM(model="openai/gpt-5.2", stop=["Observation:"], is_litellm=True)
|
||||
|
||||
api_error = Exception(
|
||||
"Unsupported parameter: 'stop' is not supported with this model."
|
||||
)
|
||||
|
||||
call_count = 0
|
||||
|
||||
def mock_completion(*args, **kwargs):
|
||||
nonlocal call_count
|
||||
call_count += 1
|
||||
if call_count == 1:
|
||||
raise api_error
|
||||
return MagicMock(
|
||||
choices=[MagicMock(message=MagicMock(content="Paris", tool_calls=None))],
|
||||
usage=MagicMock(
|
||||
prompt_tokens=10,
|
||||
completion_tokens=5,
|
||||
total_tokens=15,
|
||||
),
|
||||
)
|
||||
|
||||
with patch("litellm.completion", side_effect=mock_completion):
|
||||
with caplog.at_level(logging.INFO):
|
||||
llm.call("What is the capital of France?")
|
||||
|
||||
assert "Retrying LLM call without the unsupported 'stop'" in caplog.text
|
||||
assert "stop" in llm.additional_params.get("additional_drop_params", [])
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ollama_llm():
|
||||
return LLM(model="ollama/llama3.2:3b", is_litellm=True)
|
||||
|
||||
Reference in New Issue
Block a user