feat(cerebras): integrate Cerebras cloud SDK for native completion support

- Added support for the Cerebras cloud SDK, enabling native chat completions.
- Introduced  class for handling requests to the Cerebras API.
- Updated  to include  as a dependency.
- Added tests for the new Cerebras provider, including unit tests and VCR tests for API interactions.
- Updated existing files to accommodate the new provider and its configurations.
This commit is contained in:
lorenzejay
2026-05-06 15:40:14 -07:00
parent d165bcb65f
commit b27ab32546
11 changed files with 2195 additions and 2 deletions

View File

@@ -98,6 +98,9 @@ azure-ai-inference = [
anthropic = [ anthropic = [
"anthropic~=0.73.0", "anthropic~=0.73.0",
] ]
cerebras = [
"cerebras-cloud-sdk~=1.67.0",
]
a2a = [ a2a = [
"a2a-sdk~=0.3.10", "a2a-sdk~=0.3.10",
"httpx-auth~=0.23.1", "httpx-auth~=0.23.1",

View File

@@ -606,6 +606,13 @@ class LLM(BaseLLM):
return BedrockCompletion return BedrockCompletion
if provider == "cerebras":
from crewai.llms.providers.cerebras.completion import (
CerebrasCompletion,
)
return CerebrasCompletion
# OpenAI-compatible providers # OpenAI-compatible providers
openai_compatible_providers = { openai_compatible_providers = {
"openrouter", "openrouter",

View File

@@ -523,6 +523,20 @@ BedrockModels: TypeAlias = Literal[
"qwen.qwen3-coder-30b-a3b-v1:0", "qwen.qwen3-coder-30b-a3b-v1:0",
"twelvelabs.pegasus-1-2-v1:0", "twelvelabs.pegasus-1-2-v1:0",
] ]
CerebrasModels: TypeAlias = Literal[
"llama3.1-8b",
"gpt-oss-120b",
"qwen-3-235b-a22b-instruct-2507",
"zai-glm-4.7",
]
CEREBRAS_MODELS: list[CerebrasModels] = [
"llama3.1-8b",
"gpt-oss-120b",
"qwen-3-235b-a22b-instruct-2507",
"zai-glm-4.7",
]
BEDROCK_MODELS: list[BedrockModels] = [ BEDROCK_MODELS: list[BedrockModels] = [
# Inference profiles (regional) - Claude 4 # Inference profiles (regional) - Claude 4
"us.anthropic.claude-sonnet-4-5-20250929-v1:0", "us.anthropic.claude-sonnet-4-5-20250929-v1:0",

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,233 @@
interactions:
- request:
body: ''
headers:
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
connection:
- keep-alive
host:
- api.cerebras.ai
user-agent:
- X-USER-AGENT-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.67.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: GET
uri: https://api.cerebras.ai/v1/tcp_warming
response:
body:
string: 'This request is being sent by the Cerebras Cloud SDK to warm up your
TCP connection so that your requests will have lower TTFT.
If you don''t want this, please set `"warmTCPConnection": false` (NodeJS)
or `warm_tcp_connection=False` (Python) in the SDK constructor.
For more assistance, contact us at support@cerebras.ai
'
headers:
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
cf-ray:
- CF-RAY-XXX
content-type:
- text/plain; charset=utf-8
date:
- Wed, 06 May 2026 22:33:47 GMT
referrer-policy:
- REFERRER-POLICY-XXX
server:
- cloudflare
set-cookie:
- SET-COOKIE-XXX
strict-transport-security:
- STS-XXX
x-content-type-options:
- X-CONTENT-TYPE-XXX
status:
code: 200
message: OK
- request:
body: ''
headers:
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
connection:
- keep-alive
host:
- api.cerebras.ai
user-agent:
- X-USER-AGENT-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.67.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: GET
uri: https://api.cerebras.ai/v1/tcp_warming
response:
body:
string: 'This request is being sent by the Cerebras Cloud SDK to warm up your
TCP connection so that your requests will have lower TTFT.
If you don''t want this, please set `"warmTCPConnection": false` (NodeJS)
or `warm_tcp_connection=False` (Python) in the SDK constructor.
For more assistance, contact us at support@cerebras.ai
'
headers:
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
cf-ray:
- CF-RAY-XXX
content-type:
- text/plain; charset=utf-8
date:
- Wed, 06 May 2026 22:33:47 GMT
referrer-policy:
- REFERRER-POLICY-XXX
server:
- cloudflare
set-cookie:
- SET-COOKIE-XXX
strict-transport-security:
- STS-XXX
x-content-type-options:
- X-CONTENT-TYPE-XXX
status:
code: 200
message: OK
- request:
body: '{"model":"llama3.1-8b","max_completion_tokens":32,"messages":[{"role":"user","content":"Reply
with exactly the word: OK"}]}'
headers:
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
connection:
- keep-alive
content-length:
- '123'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.cerebras.ai
user-agent:
- X-USER-AGENT-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.67.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.cerebras.ai/v1/chat/completions
response:
body:
string: '{"id":"chatcmpl-63f7fec0-7ac5-4dec-adca-427a39ca8d77","choices":[{"finish_reason":"stop","index":0,"message":{"content":"OK","role":"assistant"}}],"created":1778106827,"model":"llama3.1-8b","system_fingerprint":"fp_96e7e4453bc38316a23a","object":"chat.completion","usage":{"total_tokens":44,"completion_tokens":2,"completion_tokens_details":{"accepted_prediction_tokens":0,"rejected_prediction_tokens":0,"reasoning_tokens":0},"prompt_tokens":42,"prompt_tokens_details":{"cached_tokens":0}},"time_info":{"created":1778106827.7362924,"queue_time":2.178798466,"prompt_time":0.003727911,"completion_time":0.00089246,"total_time":2.1848058700561523}}'
headers:
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
cf-ray:
- CF-RAY-XXX
content-type:
- application/json
date:
- Wed, 06 May 2026 22:33:49 GMT
inference-id:
- chatcmpl-63f7fec0-7ac5-4dec-adca-427a39ca8d77
referrer-policy:
- REFERRER-POLICY-XXX
server:
- cloudflare
strict-transport-security:
- STS-XXX
x-content-type-options:
- X-CONTENT-TYPE-XXX
x-ratelimit-limit-requests-day:
- '14400'
x-ratelimit-limit-requests-hour:
- '900'
x-ratelimit-limit-requests-minute:
- '30'
x-ratelimit-limit-tokens-day:
- '1000000'
x-ratelimit-limit-tokens-hour:
- '1000000'
x-ratelimit-limit-tokens-minute:
- '60000'
x-ratelimit-remaining-requests-day:
- '14399'
x-ratelimit-remaining-requests-hour:
- '899'
x-ratelimit-remaining-requests-minute:
- '29'
x-ratelimit-remaining-tokens-day:
- '999961'
x-ratelimit-remaining-tokens-hour:
- '999961'
x-ratelimit-remaining-tokens-minute:
- '59961'
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,249 @@
interactions:
- request:
body: ''
headers:
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
connection:
- keep-alive
host:
- api.cerebras.ai
user-agent:
- X-USER-AGENT-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.67.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: GET
uri: https://api.cerebras.ai/v1/tcp_warming
response:
body:
string: 'This request is being sent by the Cerebras Cloud SDK to warm up your
TCP connection so that your requests will have lower TTFT.
If you don''t want this, please set `"warmTCPConnection": false` (NodeJS)
or `warm_tcp_connection=False` (Python) in the SDK constructor.
For more assistance, contact us at support@cerebras.ai
'
headers:
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
cf-ray:
- CF-RAY-XXX
content-type:
- text/plain; charset=utf-8
date:
- Wed, 06 May 2026 22:33:50 GMT
referrer-policy:
- REFERRER-POLICY-XXX
server:
- cloudflare
set-cookie:
- SET-COOKIE-XXX
strict-transport-security:
- STS-XXX
x-content-type-options:
- X-CONTENT-TYPE-XXX
status:
code: 200
message: OK
- request:
body: ''
headers:
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
connection:
- keep-alive
host:
- api.cerebras.ai
user-agent:
- X-USER-AGENT-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.67.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: GET
uri: https://api.cerebras.ai/v1/tcp_warming
response:
body:
string: 'This request is being sent by the Cerebras Cloud SDK to warm up your
TCP connection so that your requests will have lower TTFT.
If you don''t want this, please set `"warmTCPConnection": false` (NodeJS)
or `warm_tcp_connection=False` (Python) in the SDK constructor.
For more assistance, contact us at support@cerebras.ai
'
headers:
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
cf-ray:
- CF-RAY-XXX
content-type:
- text/plain; charset=utf-8
date:
- Wed, 06 May 2026 22:33:50 GMT
referrer-policy:
- REFERRER-POLICY-XXX
server:
- cloudflare
set-cookie:
- SET-COOKIE-XXX
strict-transport-security:
- STS-XXX
x-content-type-options:
- X-CONTENT-TYPE-XXX
status:
code: 200
message: OK
- request:
body: '{"model":"llama3.1-8b","max_completion_tokens":32,"messages":[{"role":"user","content":"Count:
one, two, three."}],"stream":true,"stream_options":{"include_usage":true}}'
headers:
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
connection:
- keep-alive
content-length:
- '169'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.cerebras.ai
user-agent:
- X-USER-AGENT-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.67.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.cerebras.ai/v1/chat/completions
response:
body:
string: 'data: {"id":"chatcmpl-e4b66146-cac9-459a-9909-ddd5fb56b3fd","choices":[{"delta":{"role":"assistant"},"index":0}],"created":1778106830,"model":"llama3.1-8b","system_fingerprint":"fp_96e7e4453bc38316a23a","object":"chat.completion.chunk"}
data: {"id":"chatcmpl-e4b66146-cac9-459a-9909-ddd5fb56b3fd","choices":[{"delta":{"content":"One"},"index":0}],"created":1778106830,"model":"llama3.1-8b","system_fingerprint":"fp_96e7e4453bc38316a23a","object":"chat.completion.chunk"}
data: {"id":"chatcmpl-e4b66146-cac9-459a-9909-ddd5fb56b3fd","choices":[{"delta":{"content":",
two, three. What''s next?"},"index":0}],"created":1778106830,"model":"llama3.1-8b","system_fingerprint":"fp_96e7e4453bc38316a23a","object":"chat.completion.chunk"}
data: {"id":"chatcmpl-e4b66146-cac9-459a-9909-ddd5fb56b3fd","choices":[{"delta":{},"finish_reason":"stop","index":0}],"created":1778106830,"model":"llama3.1-8b","system_fingerprint":"fp_96e7e4453bc38316a23a","object":"chat.completion.chunk","usage":{"total_tokens":54,"completion_tokens":11,"completion_tokens_details":{"accepted_prediction_tokens":0,"rejected_prediction_tokens":0,"reasoning_tokens":0},"prompt_tokens":43,"prompt_tokens_details":{"cached_tokens":0}},"time_info":{"created":1778106830.7255669,"queue_time":0.00023963,"prompt_time":0.002608115,"completion_time":0.00474267,"total_time":0.009609460830688477}}
data: [DONE]
'
headers:
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
cf-ray:
- CF-RAY-XXX
content-type:
- text/event-stream; charset=utf-8
date:
- Wed, 06 May 2026 22:33:50 GMT
inference-id:
- chatcmpl-e4b66146-cac9-459a-9909-ddd5fb56b3fd
referrer-policy:
- REFERRER-POLICY-XXX
server:
- cloudflare
strict-transport-security:
- STS-XXX
x-content-type-options:
- X-CONTENT-TYPE-XXX
x-ratelimit-limit-requests-day:
- '14400'
x-ratelimit-limit-requests-hour:
- '900'
x-ratelimit-limit-requests-minute:
- '30'
x-ratelimit-limit-tokens-day:
- '1000000'
x-ratelimit-limit-tokens-hour:
- '1000000'
x-ratelimit-limit-tokens-minute:
- '60000'
x-ratelimit-remaining-requests-day:
- '14398'
x-ratelimit-remaining-requests-hour:
- '898'
x-ratelimit-remaining-requests-minute:
- '29'
x-ratelimit-remaining-tokens-day:
- '999953'
x-ratelimit-remaining-tokens-hour:
- '999963'
x-ratelimit-remaining-tokens-minute:
- '59963'
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,234 @@
interactions:
- request:
body: ''
headers:
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
connection:
- keep-alive
host:
- api.cerebras.ai
user-agent:
- X-USER-AGENT-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.67.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: GET
uri: https://api.cerebras.ai/v1/tcp_warming
response:
body:
string: 'This request is being sent by the Cerebras Cloud SDK to warm up your
TCP connection so that your requests will have lower TTFT.
If you don''t want this, please set `"warmTCPConnection": false` (NodeJS)
or `warm_tcp_connection=False` (Python) in the SDK constructor.
For more assistance, contact us at support@cerebras.ai
'
headers:
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
cf-ray:
- CF-RAY-XXX
content-type:
- text/plain; charset=utf-8
date:
- Wed, 06 May 2026 22:33:51 GMT
referrer-policy:
- REFERRER-POLICY-XXX
server:
- cloudflare
set-cookie:
- SET-COOKIE-XXX
strict-transport-security:
- STS-XXX
x-content-type-options:
- X-CONTENT-TYPE-XXX
status:
code: 200
message: OK
- request:
body: ''
headers:
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
connection:
- keep-alive
host:
- api.cerebras.ai
user-agent:
- X-USER-AGENT-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.67.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: GET
uri: https://api.cerebras.ai/v1/tcp_warming
response:
body:
string: 'This request is being sent by the Cerebras Cloud SDK to warm up your
TCP connection so that your requests will have lower TTFT.
If you don''t want this, please set `"warmTCPConnection": false` (NodeJS)
or `warm_tcp_connection=False` (Python) in the SDK constructor.
For more assistance, contact us at support@cerebras.ai
'
headers:
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
cf-ray:
- CF-RAY-XXX
content-type:
- text/plain; charset=utf-8
date:
- Wed, 06 May 2026 22:33:51 GMT
referrer-policy:
- REFERRER-POLICY-XXX
server:
- cloudflare
set-cookie:
- SET-COOKIE-XXX
strict-transport-security:
- STS-XXX
x-content-type-options:
- X-CONTENT-TYPE-XXX
status:
code: 200
message: OK
- request:
body: '{"model":"llama3.1-8b","max_completion_tokens":64,"messages":[{"role":"user","content":"Say
hi."}],"seed":7,"temperature":0.0}'
headers:
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
connection:
- keep-alive
content-length:
- '126'
content-type:
- application/json
cookie:
- COOKIE-XXX
host:
- api.cerebras.ai
user-agent:
- X-USER-AGENT-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 1.67.0
x-stainless-read-timeout:
- X-STAINLESS-READ-TIMEOUT-XXX
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.13.3
method: POST
uri: https://api.cerebras.ai/v1/chat/completions
response:
body:
string: '{"id":"chatcmpl-5bf61f2f-c4a5-4122-b921-d8cba7d8ebc0","choices":[{"finish_reason":"stop","index":0,"message":{"content":"Hello.
How can I assist you today?","role":"assistant"}}],"created":1778106831,"model":"llama3.1-8b","system_fingerprint":"fp_96e7e4453bc38316a23a","object":"chat.completion","usage":{"total_tokens":48,"completion_tokens":10,"completion_tokens_details":{"accepted_prediction_tokens":0,"rejected_prediction_tokens":0,"reasoning_tokens":0},"prompt_tokens":38,"prompt_tokens_details":{"cached_tokens":0}},"time_info":{"created":1778106831.5465255,"queue_time":7.401e-05,"prompt_time":0.001719314,"completion_time":0.003982367,"total_time":0.0068645477294921875}}'
headers:
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
cf-ray:
- CF-RAY-XXX
content-type:
- application/json
date:
- Wed, 06 May 2026 22:33:51 GMT
inference-id:
- chatcmpl-5bf61f2f-c4a5-4122-b921-d8cba7d8ebc0
referrer-policy:
- REFERRER-POLICY-XXX
server:
- cloudflare
strict-transport-security:
- STS-XXX
x-content-type-options:
- X-CONTENT-TYPE-XXX
x-ratelimit-limit-requests-day:
- '14400'
x-ratelimit-limit-requests-hour:
- '900'
x-ratelimit-limit-requests-minute:
- '30'
x-ratelimit-limit-tokens-day:
- '1000000'
x-ratelimit-limit-tokens-hour:
- '1000000'
x-ratelimit-limit-tokens-minute:
- '60000'
x-ratelimit-remaining-requests-day:
- '14397'
x-ratelimit-remaining-requests-hour:
- '898'
x-ratelimit-remaining-requests-minute:
- '28'
x-ratelimit-remaining-tokens-day:
- '999871'
x-ratelimit-remaining-tokens-hour:
- '999881'
x-ratelimit-remaining-tokens-minute:
- '59881'
x-request-id:
- X-REQUEST-ID-XXX
status:
code: 200
message: OK
version: 1

View File

@@ -0,0 +1,284 @@
"""Tests for the native Cerebras provider.
Two flavors:
- Unit tests under the ``Test*`` classes — exercise factory routing, field
normalization, env-var resolution, client construction, and the fallback
to OpenAI-compatible when the SDK extra is not installed. These run with
``CEREBRAS_API_KEY`` / ``CEREBRAS_BASE_URL`` cleared so each test states
the env it depends on.
- Module-level VCR tests — replay real Cerebras API responses from cassettes.
To re-record, set ``CEREBRAS_API_KEY`` and run with
``PYTEST_VCR_RECORD_MODE=new_episodes`` (or delete the target cassette
and use the default ``once`` mode).
"""
from __future__ import annotations
import builtins
import sys
from unittest.mock import patch
import pytest
from crewai.llm import LLM
from crewai.llms.providers.cerebras.completion import CerebrasCompletion
from crewai.llms.providers.openai_compatible.completion import (
OpenAICompatibleCompletion,
)
@pytest.fixture
def clear_cerebras_env(monkeypatch):
monkeypatch.delenv("CEREBRAS_API_KEY", raising=False)
monkeypatch.delenv("CEREBRAS_BASE_URL", raising=False)
@pytest.mark.usefixtures("clear_cerebras_env")
class TestCerebrasFactoryRouting:
def test_provider_prefix_routes_to_native(self):
llm = LLM(model="cerebras/gpt-oss-120b", api_key="sk-test")
assert isinstance(llm, CerebrasCompletion)
assert llm.llm_type == "cerebras"
assert llm.provider == "cerebras"
assert llm.is_litellm is False
def test_explicit_provider_kwarg_routes_to_native(self):
llm = LLM(model="gpt-oss-120b", provider="cerebras", api_key="sk-test")
assert isinstance(llm, CerebrasCompletion)
def test_falls_back_to_openai_compat_when_sdk_missing(self, monkeypatch):
# Drop any cached imports so the factory re-imports the cerebras module.
for mod_name in list(sys.modules):
if mod_name.startswith(
"crewai.llms.providers.cerebras"
) or mod_name.startswith("cerebras"):
monkeypatch.delitem(sys.modules, mod_name, raising=False)
real_import = builtins.__import__
def _import_blocker(name, *args, **kwargs):
if name.startswith("cerebras"):
raise ImportError(f"simulated missing dep: {name}")
return real_import(name, *args, **kwargs)
monkeypatch.setattr(builtins, "__import__", _import_blocker)
llm = LLM(model="cerebras/gpt-oss-120b", api_key="sk-test")
# Without the SDK, factory falls through to the OpenAI-compatible path.
assert isinstance(llm, OpenAICompatibleCompletion)
@pytest.mark.usefixtures("clear_cerebras_env")
class TestCerebrasFieldNormalization:
def test_default_base_url_left_unset(self):
# We deliberately don't pin a default base URL — the SDK has its own.
llm = CerebrasCompletion(model="gpt-oss-120b", api_key="sk-test")
assert llm.base_url is None
assert llm.api == "completions"
assert llm.provider == "cerebras"
def test_env_var_base_url_override(self, monkeypatch):
monkeypatch.setenv("CEREBRAS_BASE_URL", "https://custom.cerebras.example/v1")
llm = CerebrasCompletion(model="gpt-oss-120b", api_key="sk-test")
assert llm.base_url == "https://custom.cerebras.example/v1"
def test_explicit_base_url_takes_precedence(self, monkeypatch):
monkeypatch.setenv("CEREBRAS_BASE_URL", "https://from-env.example/v1")
llm = CerebrasCompletion(
model="gpt-oss-120b",
api_key="sk-test",
base_url="https://explicit.example/v1",
)
assert llm.base_url == "https://explicit.example/v1"
def test_api_forced_to_completions(self):
# Even if a caller tries to set api="responses", the validator clamps it.
llm = CerebrasCompletion(
model="gpt-oss-120b", api_key="sk-test", api="responses"
)
assert llm.api == "completions"
@pytest.mark.usefixtures("clear_cerebras_env")
class TestCerebrasApiKeyResolution:
def test_env_var_picked_up(self, monkeypatch):
monkeypatch.setenv("CEREBRAS_API_KEY", "env-key")
llm = CerebrasCompletion(model="gpt-oss-120b")
assert llm.api_key == "env-key"
def test_explicit_api_key_takes_precedence(self, monkeypatch):
monkeypatch.setenv("CEREBRAS_API_KEY", "env-key")
llm = CerebrasCompletion(model="gpt-oss-120b", api_key="explicit-key")
assert llm.api_key == "explicit-key"
def test_construction_succeeds_without_key(self):
# Lazy client init: missing key should not crash construction.
llm = CerebrasCompletion(model="gpt-oss-120b")
assert llm.api_key is None
def test_get_client_params_raises_without_key(self):
llm = CerebrasCompletion(model="gpt-oss-120b")
with pytest.raises(ValueError, match="CEREBRAS_API_KEY"):
llm._get_client_params()
@pytest.mark.usefixtures("clear_cerebras_env")
class TestCerebrasClientBuild:
def test_sync_client_uses_cerebras_sdk(self, monkeypatch):
monkeypatch.setenv("CEREBRAS_API_KEY", "sk-test")
llm = CerebrasCompletion(model="gpt-oss-120b")
client = llm._get_sync_client()
from cerebras.cloud.sdk import Cerebras
assert isinstance(client, Cerebras)
def test_async_client_uses_cerebras_sdk(self, monkeypatch):
monkeypatch.setenv("CEREBRAS_API_KEY", "sk-test")
llm = CerebrasCompletion(model="gpt-oss-120b")
client = llm._get_async_client()
from cerebras.cloud.sdk import AsyncCerebras
assert isinstance(client, AsyncCerebras)
def test_client_params_threaded_through(self, monkeypatch):
monkeypatch.setenv("CEREBRAS_API_KEY", "sk-test")
llm = CerebrasCompletion(
model="gpt-oss-120b",
timeout=30.0,
max_retries=5,
default_headers={"X-Custom": "yes"},
)
params = llm._get_client_params()
assert params["timeout"] == 30.0
assert params["max_retries"] == 5
assert params["default_headers"] == {"X-Custom": "yes"}
assert params["api_key"] == "sk-test"
# base_url omitted so the SDK uses its own default.
assert "base_url" not in params
def test_client_params_includes_base_url_when_set(self, monkeypatch):
monkeypatch.setenv("CEREBRAS_API_KEY", "sk-test")
llm = CerebrasCompletion(
model="gpt-oss-120b", base_url="https://override.example/api"
)
params = llm._get_client_params()
assert params["base_url"] == "https://override.example/api"
@pytest.mark.usefixtures("clear_cerebras_env")
class TestCerebrasConfigDict:
def test_specific_fields_included_when_set(self):
llm = CerebrasCompletion(
model="gpt-oss-120b",
api_key="sk-test",
service_tier="priority",
prompt_cache_key="cache-1",
clear_thinking=True,
reasoning_effort="high",
)
config = llm.to_config_dict()
assert config["service_tier"] == "priority"
assert config["prompt_cache_key"] == "cache-1"
assert config["clear_thinking"] is True
assert config["reasoning_effort"] == "high"
def test_specific_fields_omitted_when_unset(self):
llm = CerebrasCompletion(model="gpt-oss-120b", api_key="sk-test")
config = llm.to_config_dict()
assert "service_tier" not in config
assert "prompt_cache_key" not in config
assert "clear_thinking" not in config
@pytest.mark.usefixtures("clear_cerebras_env")
class TestCerebrasCompletionParams:
"""Verify Cerebras-specific kwargs reach the chat.completions.create call."""
def test_reasoning_effort_threaded_for_non_o1_models(self):
llm = CerebrasCompletion(
model="gpt-oss-120b", api_key="sk-test", reasoning_effort="high"
)
params = llm._prepare_completion_params(messages=[])
# Parent gates this on is_o1_model — Cerebras must thread it regardless.
assert params["reasoning_effort"] == "high"
def test_service_tier_threaded(self):
llm = CerebrasCompletion(
model="llama3.1-8b", api_key="sk-test", service_tier="priority"
)
params = llm._prepare_completion_params(messages=[])
assert params["service_tier"] == "priority"
def test_prompt_cache_key_threaded(self):
llm = CerebrasCompletion(
model="llama3.1-8b", api_key="sk-test", prompt_cache_key="run-42"
)
params = llm._prepare_completion_params(messages=[])
assert params["prompt_cache_key"] == "run-42"
def test_clear_thinking_threaded(self):
llm = CerebrasCompletion(
model="zai-glm-4.7", api_key="sk-test", clear_thinking=True
)
params = llm._prepare_completion_params(messages=[])
assert params["clear_thinking"] is True
def test_cerebras_specific_fields_omitted_when_unset(self):
llm = CerebrasCompletion(model="llama3.1-8b", api_key="sk-test")
params = llm._prepare_completion_params(messages=[])
assert "service_tier" not in params
assert "prompt_cache_key" not in params
assert "clear_thinking" not in params
assert "reasoning_effort" not in params
@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
def test_cerebras_basic_completion():
"""End-to-end completion against Cerebras (replays from cassette)."""
llm = LLM(model="cerebras/llama3.1-8b", max_completion_tokens=32)
assert isinstance(llm, CerebrasCompletion)
result = llm.call("Reply with exactly the word: OK")
assert isinstance(result, str)
assert len(result) > 0
@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
def test_cerebras_streaming_completion():
"""Streaming completion against Cerebras (replays from cassette)."""
llm = LLM(model="cerebras/llama3.1-8b", stream=True, max_completion_tokens=32)
assert isinstance(llm, CerebrasCompletion)
result = llm.call("Count: one, two, three.")
assert isinstance(result, str)
assert len(result) > 0
@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
def test_cerebras_temperature_and_seed_passed_to_sdk():
"""Deterministic-sampling params reach the Cerebras SDK call."""
llm = LLM(
model="cerebras/llama3.1-8b",
temperature=0.0,
seed=7,
max_completion_tokens=64,
)
assert isinstance(llm, CerebrasCompletion)
original_create = llm._client.chat.completions.create
captured: dict = {}
def capture_and_call(**kwargs):
captured.update(kwargs)
return original_create(**kwargs)
with patch.object(
llm._client.chat.completions, "create", side_effect=capture_and_call
):
llm.call("Say hi.")
assert captured["model"] == "llama3.1-8b"
assert captured["temperature"] == 0.0
assert captured["seed"] == 7

25
uv.lock generated
View File

@@ -13,7 +13,7 @@ resolution-markers = [
] ]
[options] [options]
exclude-newer = "2026-04-27T16:00:00Z" exclude-newer = "2026-04-28T07:00:00Z"
[manifest] [manifest]
members = [ members = [
@@ -789,6 +789,23 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/06/f3/39cf3367b8107baa44f861dc802cbf16263c945b62d8265d36034fc07bea/cachetools-7.0.5-py3-none-any.whl", hash = "sha256:46bc8ebefbe485407621d0a4264b23c080cedd913921bad7ac3ed2f26c183114", size = 13918, upload-time = "2026-03-09T20:51:27.33Z" }, { url = "https://files.pythonhosted.org/packages/06/f3/39cf3367b8107baa44f861dc802cbf16263c945b62d8265d36034fc07bea/cachetools-7.0.5-py3-none-any.whl", hash = "sha256:46bc8ebefbe485407621d0a4264b23c080cedd913921bad7ac3ed2f26c183114", size = 13918, upload-time = "2026-03-09T20:51:27.33Z" },
] ]
[[package]]
name = "cerebras-cloud-sdk"
version = "1.67.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
{ name = "distro" },
{ name = "httpx", extra = ["http2"] },
{ name = "pydantic" },
{ name = "sniffio" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/92/12/c201f07582068141e88f9a523ab02fdc97de58f2f7c0df775c6c52b9d8dd/cerebras_cloud_sdk-1.67.0.tar.gz", hash = "sha256:3aed6f86c6c7a83ee9d4cfb08a2acea089cebf2af5b8aed116ef79995a4f4813", size = 131536, upload-time = "2026-01-29T23:31:27.306Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/7a/5e/36a364f3d1bab4073454b75e7c91dc7ec6879b960063d1a9c929f1c7ea71/cerebras_cloud_sdk-1.67.0-py3-none-any.whl", hash = "sha256:658b79ca2e9c16f75cc6b4e5d523ee014c9e54a88bd39f88905c28ecb33daae1", size = 97807, upload-time = "2026-01-29T23:31:25.77Z" },
]
[[package]] [[package]]
name = "certifi" name = "certifi"
version = "2026.2.25" version = "2026.2.25"
@@ -1330,6 +1347,9 @@ azure-ai-inference = [
bedrock = [ bedrock = [
{ name = "boto3" }, { name = "boto3" },
] ]
cerebras = [
{ name = "cerebras-cloud-sdk" },
]
docling = [ docling = [
{ name = "docling" }, { name = "docling" },
] ]
@@ -1383,6 +1403,7 @@ requires-dist = [
{ name = "azure-identity", marker = "extra == 'azure-ai-inference'", specifier = ">=1.17.0,<2" }, { name = "azure-identity", marker = "extra == 'azure-ai-inference'", specifier = ">=1.17.0,<2" },
{ name = "boto3", marker = "extra == 'aws'", specifier = "~=1.42.79" }, { name = "boto3", marker = "extra == 'aws'", specifier = "~=1.42.79" },
{ name = "boto3", marker = "extra == 'bedrock'", specifier = "~=1.42.79" }, { name = "boto3", marker = "extra == 'bedrock'", specifier = "~=1.42.79" },
{ name = "cerebras-cloud-sdk", marker = "extra == 'cerebras'", specifier = "~=1.67.0" },
{ name = "chromadb", specifier = "~=1.1.0" }, { name = "chromadb", specifier = "~=1.1.0" },
{ name = "click", specifier = "~=8.1.7" }, { name = "click", specifier = "~=8.1.7" },
{ name = "crewai-cli", editable = "lib/cli" }, { name = "crewai-cli", editable = "lib/cli" },
@@ -1426,7 +1447,7 @@ requires-dist = [
{ name = "tomli-w", specifier = "~=1.1.0" }, { name = "tomli-w", specifier = "~=1.1.0" },
{ name = "voyageai", marker = "extra == 'voyageai'", specifier = "~=0.3.5" }, { name = "voyageai", marker = "extra == 'voyageai'", specifier = "~=0.3.5" },
] ]
provides-extras = ["a2a", "anthropic", "aws", "azure-ai-inference", "bedrock", "docling", "embeddings", "file-processing", "google-genai", "litellm", "mem0", "openpyxl", "pandas", "qdrant", "qdrant-edge", "tools", "voyageai", "watson"] provides-extras = ["a2a", "anthropic", "aws", "azure-ai-inference", "bedrock", "cerebras", "docling", "embeddings", "file-processing", "google-genai", "litellm", "mem0", "openpyxl", "pandas", "qdrant", "qdrant-edge", "tools", "voyageai", "watson"]
[[package]] [[package]]
name = "crewai-cli" name = "crewai-cli"