feat(cerebras): integrate Cerebras cloud SDK for native completion support

- Added support for the Cerebras cloud SDK, enabling native chat completions. - Introduced class for handling requests to the Cerebras API. - Updated to include as a dependency. - Added tests for the new Cerebras provider, including unit tests and VCR tests for API interactions. - Updated existing files to accommodate the new provider and its configurations.
2026-05-07 18:19:00 +00:00 · 2026-05-06 15:40:14 -07:00
parent d165bcb65f
commit b27ab32546
11 changed files with 2195 additions and 2 deletions
--- a/lib/crewai/pyproject.toml
+++ b/lib/crewai/pyproject.toml
@@ -98,6 +98,9 @@ azure-ai-inference = [
 anthropic = [
    "anthropic~=0.73.0",
 ]
 cerebras = [
    "cerebras-cloud-sdk~=1.67.0",
 ]
 a2a = [
     "a2a-sdk~=0.3.10",
     "httpx-auth~=0.23.1",
--- a/lib/crewai/src/crewai/llm.py
+++ b/lib/crewai/src/crewai/llm.py
@@ -606,6 +606,13 @@ class LLM(BaseLLM):
            return BedrockCompletion
        if provider == "cerebras":
            from crewai.llms.providers.cerebras.completion import (
                CerebrasCompletion,
            )
            return CerebrasCompletion
        # OpenAI-compatible providers
        openai_compatible_providers = {
            "openrouter",
--- a/lib/crewai/src/crewai/llms/constants.py
+++ b/lib/crewai/src/crewai/llms/constants.py
@@ -523,6 +523,20 @@ BedrockModels: TypeAlias = Literal[
    "qwen.qwen3-coder-30b-a3b-v1:0",
    "twelvelabs.pegasus-1-2-v1:0",
 ]
 CerebrasModels: TypeAlias = Literal[
    "llama3.1-8b",
    "gpt-oss-120b",
    "qwen-3-235b-a22b-instruct-2507",
    "zai-glm-4.7",
 ]
 CEREBRAS_MODELS: list[CerebrasModels] = [
    "llama3.1-8b",
    "gpt-oss-120b",
    "qwen-3-235b-a22b-instruct-2507",
    "zai-glm-4.7",
 ]
 BEDROCK_MODELS: list[BedrockModels] = [
    # Inference profiles (regional) - Claude 4
    "us.anthropic.claude-sonnet-4-5-20250929-v1:0",
--- a/lib/crewai/src/crewai/llms/providers/cerebras/init.py
+++ b/lib/crewai/src/crewai/llms/providers/cerebras/init.py
--- a/lib/crewai/src/crewai/llms/providers/cerebras/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/cerebras/completion.py
--- a/lib/crewai/tests/cassettes/llms/cerebras/test_cerebras_basic_completion.yaml
+++ b/lib/crewai/tests/cassettes/llms/cerebras/test_cerebras_basic_completion.yaml
@@ -0,0 +1,233 @@
 interactions:
 - request:
    body: ''
    headers:
      accept:
      - application/json
      accept-encoding:
      - ACCEPT-ENCODING-XXX
      connection:
      - keep-alive
      host:
      - api.cerebras.ai
      user-agent:
      - X-USER-AGENT-XXX
      x-stainless-arch:
      - X-STAINLESS-ARCH-XXX
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - X-STAINLESS-OS-XXX
      x-stainless-package-version:
      - 1.67.0
      x-stainless-read-timeout:
      - X-STAINLESS-READ-TIMEOUT-XXX
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.13.3
    method: GET
    uri: https://api.cerebras.ai/v1/tcp_warming
  response:
    body:
      string: 'This request is being sent by the Cerebras Cloud SDK to warm up your
        TCP connection so that your requests will have lower TTFT.
        If you don''t want this, please set `"warmTCPConnection": false` (NodeJS)
        or `warm_tcp_connection=False` (Python) in the SDK constructor.
        For more assistance, contact us at support@cerebras.ai
        '
    headers:
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      cf-ray:
      - CF-RAY-XXX
      content-type:
      - text/plain; charset=utf-8
      date:
      - Wed, 06 May 2026 22:33:47 GMT
      referrer-policy:
      - REFERRER-POLICY-XXX
      server:
      - cloudflare
      set-cookie:
      - SET-COOKIE-XXX
      strict-transport-security:
      - STS-XXX
      x-content-type-options:
      - X-CONTENT-TYPE-XXX
    status:
      code: 200
      message: OK
 - request:
    body: ''
    headers:
      accept:
      - application/json
      accept-encoding:
      - ACCEPT-ENCODING-XXX
      connection:
      - keep-alive
      host:
      - api.cerebras.ai
      user-agent:
      - X-USER-AGENT-XXX
      x-stainless-arch:
      - X-STAINLESS-ARCH-XXX
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - X-STAINLESS-OS-XXX
      x-stainless-package-version:
      - 1.67.0
      x-stainless-read-timeout:
      - X-STAINLESS-READ-TIMEOUT-XXX
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.13.3
    method: GET
    uri: https://api.cerebras.ai/v1/tcp_warming
  response:
    body:
      string: 'This request is being sent by the Cerebras Cloud SDK to warm up your
        TCP connection so that your requests will have lower TTFT.
        If you don''t want this, please set `"warmTCPConnection": false` (NodeJS)
        or `warm_tcp_connection=False` (Python) in the SDK constructor.
        For more assistance, contact us at support@cerebras.ai
        '
    headers:
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      cf-ray:
      - CF-RAY-XXX
      content-type:
      - text/plain; charset=utf-8
      date:
      - Wed, 06 May 2026 22:33:47 GMT
      referrer-policy:
      - REFERRER-POLICY-XXX
      server:
      - cloudflare
      set-cookie:
      - SET-COOKIE-XXX
      strict-transport-security:
      - STS-XXX
      x-content-type-options:
      - X-CONTENT-TYPE-XXX
    status:
      code: 200
      message: OK
 - request:
    body: '{"model":"llama3.1-8b","max_completion_tokens":32,"messages":[{"role":"user","content":"Reply
      with exactly the word: OK"}]}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - ACCEPT-ENCODING-XXX
      connection:
      - keep-alive
      content-length:
      - '123'
      content-type:
      - application/json
      cookie:
      - COOKIE-XXX
      host:
      - api.cerebras.ai
      user-agent:
      - X-USER-AGENT-XXX
      x-stainless-arch:
      - X-STAINLESS-ARCH-XXX
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - X-STAINLESS-OS-XXX
      x-stainless-package-version:
      - 1.67.0
      x-stainless-read-timeout:
      - X-STAINLESS-READ-TIMEOUT-XXX
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.13.3
    method: POST
    uri: https://api.cerebras.ai/v1/chat/completions
  response:
    body:
      string: '{"id":"chatcmpl-63f7fec0-7ac5-4dec-adca-427a39ca8d77","choices":[{"finish_reason":"stop","index":0,"message":{"content":"OK","role":"assistant"}}],"created":1778106827,"model":"llama3.1-8b","system_fingerprint":"fp_96e7e4453bc38316a23a","object":"chat.completion","usage":{"total_tokens":44,"completion_tokens":2,"completion_tokens_details":{"accepted_prediction_tokens":0,"rejected_prediction_tokens":0,"reasoning_tokens":0},"prompt_tokens":42,"prompt_tokens_details":{"cached_tokens":0}},"time_info":{"created":1778106827.7362924,"queue_time":2.178798466,"prompt_time":0.003727911,"completion_time":0.00089246,"total_time":2.1848058700561523}}'
    headers:
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      cf-ray:
      - CF-RAY-XXX
      content-type:
      - application/json
      date:
      - Wed, 06 May 2026 22:33:49 GMT
      inference-id:
      - chatcmpl-63f7fec0-7ac5-4dec-adca-427a39ca8d77
      referrer-policy:
      - REFERRER-POLICY-XXX
      server:
      - cloudflare
      strict-transport-security:
      - STS-XXX
      x-content-type-options:
      - X-CONTENT-TYPE-XXX
      x-ratelimit-limit-requests-day:
      - '14400'
      x-ratelimit-limit-requests-hour:
      - '900'
      x-ratelimit-limit-requests-minute:
      - '30'
      x-ratelimit-limit-tokens-day:
      - '1000000'
      x-ratelimit-limit-tokens-hour:
      - '1000000'
      x-ratelimit-limit-tokens-minute:
      - '60000'
      x-ratelimit-remaining-requests-day:
      - '14399'
      x-ratelimit-remaining-requests-hour:
      - '899'
      x-ratelimit-remaining-requests-minute:
      - '29'
      x-ratelimit-remaining-tokens-day:
      - '999961'
      x-ratelimit-remaining-tokens-hour:
      - '999961'
      x-ratelimit-remaining-tokens-minute:
      - '59961'
      x-request-id:
      - X-REQUEST-ID-XXX
    status:
      code: 200
      message: OK
 version: 1
--- a/lib/crewai/tests/cassettes/llms/cerebras/test_cerebras_streaming_completion.yaml
+++ b/lib/crewai/tests/cassettes/llms/cerebras/test_cerebras_streaming_completion.yaml
@@ -0,0 +1,249 @@
 interactions:
 - request:
    body: ''
    headers:
      accept:
      - application/json
      accept-encoding:
      - ACCEPT-ENCODING-XXX
      connection:
      - keep-alive
      host:
      - api.cerebras.ai
      user-agent:
      - X-USER-AGENT-XXX
      x-stainless-arch:
      - X-STAINLESS-ARCH-XXX
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - X-STAINLESS-OS-XXX
      x-stainless-package-version:
      - 1.67.0
      x-stainless-read-timeout:
      - X-STAINLESS-READ-TIMEOUT-XXX
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.13.3
    method: GET
    uri: https://api.cerebras.ai/v1/tcp_warming
  response:
    body:
      string: 'This request is being sent by the Cerebras Cloud SDK to warm up your
        TCP connection so that your requests will have lower TTFT.
        If you don''t want this, please set `"warmTCPConnection": false` (NodeJS)
        or `warm_tcp_connection=False` (Python) in the SDK constructor.
        For more assistance, contact us at support@cerebras.ai
        '
    headers:
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      cf-ray:
      - CF-RAY-XXX
      content-type:
      - text/plain; charset=utf-8
      date:
      - Wed, 06 May 2026 22:33:50 GMT
      referrer-policy:
      - REFERRER-POLICY-XXX
      server:
      - cloudflare
      set-cookie:
      - SET-COOKIE-XXX
      strict-transport-security:
      - STS-XXX
      x-content-type-options:
      - X-CONTENT-TYPE-XXX
    status:
      code: 200
      message: OK
 - request:
    body: ''
    headers:
      accept:
      - application/json
      accept-encoding:
      - ACCEPT-ENCODING-XXX
      connection:
      - keep-alive
      host:
      - api.cerebras.ai
      user-agent:
      - X-USER-AGENT-XXX
      x-stainless-arch:
      - X-STAINLESS-ARCH-XXX
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - X-STAINLESS-OS-XXX
      x-stainless-package-version:
      - 1.67.0
      x-stainless-read-timeout:
      - X-STAINLESS-READ-TIMEOUT-XXX
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.13.3
    method: GET
    uri: https://api.cerebras.ai/v1/tcp_warming
  response:
    body:
      string: 'This request is being sent by the Cerebras Cloud SDK to warm up your
        TCP connection so that your requests will have lower TTFT.
        If you don''t want this, please set `"warmTCPConnection": false` (NodeJS)
        or `warm_tcp_connection=False` (Python) in the SDK constructor.
        For more assistance, contact us at support@cerebras.ai
        '
    headers:
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      cf-ray:
      - CF-RAY-XXX
      content-type:
      - text/plain; charset=utf-8
      date:
      - Wed, 06 May 2026 22:33:50 GMT
      referrer-policy:
      - REFERRER-POLICY-XXX
      server:
      - cloudflare
      set-cookie:
      - SET-COOKIE-XXX
      strict-transport-security:
      - STS-XXX
      x-content-type-options:
      - X-CONTENT-TYPE-XXX
    status:
      code: 200
      message: OK
 - request:
    body: '{"model":"llama3.1-8b","max_completion_tokens":32,"messages":[{"role":"user","content":"Count:
      one, two, three."}],"stream":true,"stream_options":{"include_usage":true}}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - ACCEPT-ENCODING-XXX
      connection:
      - keep-alive
      content-length:
      - '169'
      content-type:
      - application/json
      cookie:
      - COOKIE-XXX
      host:
      - api.cerebras.ai
      user-agent:
      - X-USER-AGENT-XXX
      x-stainless-arch:
      - X-STAINLESS-ARCH-XXX
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - X-STAINLESS-OS-XXX
      x-stainless-package-version:
      - 1.67.0
      x-stainless-read-timeout:
      - X-STAINLESS-READ-TIMEOUT-XXX
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.13.3
    method: POST
    uri: https://api.cerebras.ai/v1/chat/completions
  response:
    body:
      string: 'data: {"id":"chatcmpl-e4b66146-cac9-459a-9909-ddd5fb56b3fd","choices":[{"delta":{"role":"assistant"},"index":0}],"created":1778106830,"model":"llama3.1-8b","system_fingerprint":"fp_96e7e4453bc38316a23a","object":"chat.completion.chunk"}
        data: {"id":"chatcmpl-e4b66146-cac9-459a-9909-ddd5fb56b3fd","choices":[{"delta":{"content":"One"},"index":0}],"created":1778106830,"model":"llama3.1-8b","system_fingerprint":"fp_96e7e4453bc38316a23a","object":"chat.completion.chunk"}
        data: {"id":"chatcmpl-e4b66146-cac9-459a-9909-ddd5fb56b3fd","choices":[{"delta":{"content":",
        two, three. What''s next?"},"index":0}],"created":1778106830,"model":"llama3.1-8b","system_fingerprint":"fp_96e7e4453bc38316a23a","object":"chat.completion.chunk"}
        data: {"id":"chatcmpl-e4b66146-cac9-459a-9909-ddd5fb56b3fd","choices":[{"delta":{},"finish_reason":"stop","index":0}],"created":1778106830,"model":"llama3.1-8b","system_fingerprint":"fp_96e7e4453bc38316a23a","object":"chat.completion.chunk","usage":{"total_tokens":54,"completion_tokens":11,"completion_tokens_details":{"accepted_prediction_tokens":0,"rejected_prediction_tokens":0,"reasoning_tokens":0},"prompt_tokens":43,"prompt_tokens_details":{"cached_tokens":0}},"time_info":{"created":1778106830.7255669,"queue_time":0.00023963,"prompt_time":0.002608115,"completion_time":0.00474267,"total_time":0.009609460830688477}}
        data: [DONE]
        '
    headers:
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      cf-ray:
      - CF-RAY-XXX
      content-type:
      - text/event-stream; charset=utf-8
      date:
      - Wed, 06 May 2026 22:33:50 GMT
      inference-id:
      - chatcmpl-e4b66146-cac9-459a-9909-ddd5fb56b3fd
      referrer-policy:
      - REFERRER-POLICY-XXX
      server:
      - cloudflare
      strict-transport-security:
      - STS-XXX
      x-content-type-options:
      - X-CONTENT-TYPE-XXX
      x-ratelimit-limit-requests-day:
      - '14400'
      x-ratelimit-limit-requests-hour:
      - '900'
      x-ratelimit-limit-requests-minute:
      - '30'
      x-ratelimit-limit-tokens-day:
      - '1000000'
      x-ratelimit-limit-tokens-hour:
      - '1000000'
      x-ratelimit-limit-tokens-minute:
      - '60000'
      x-ratelimit-remaining-requests-day:
      - '14398'
      x-ratelimit-remaining-requests-hour:
      - '898'
      x-ratelimit-remaining-requests-minute:
      - '29'
      x-ratelimit-remaining-tokens-day:
      - '999953'
      x-ratelimit-remaining-tokens-hour:
      - '999963'
      x-ratelimit-remaining-tokens-minute:
      - '59963'
      x-request-id:
      - X-REQUEST-ID-XXX
    status:
      code: 200
      message: OK
 version: 1
--- a/lib/crewai/tests/cassettes/llms/cerebras/test_cerebras_temperature_and_seed_passed_to_sdk.yaml
+++ b/lib/crewai/tests/cassettes/llms/cerebras/test_cerebras_temperature_and_seed_passed_to_sdk.yaml
@@ -0,0 +1,234 @@
 interactions:
 - request:
    body: ''
    headers:
      accept:
      - application/json
      accept-encoding:
      - ACCEPT-ENCODING-XXX
      connection:
      - keep-alive
      host:
      - api.cerebras.ai
      user-agent:
      - X-USER-AGENT-XXX
      x-stainless-arch:
      - X-STAINLESS-ARCH-XXX
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - X-STAINLESS-OS-XXX
      x-stainless-package-version:
      - 1.67.0
      x-stainless-read-timeout:
      - X-STAINLESS-READ-TIMEOUT-XXX
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.13.3
    method: GET
    uri: https://api.cerebras.ai/v1/tcp_warming
  response:
    body:
      string: 'This request is being sent by the Cerebras Cloud SDK to warm up your
        TCP connection so that your requests will have lower TTFT.
        If you don''t want this, please set `"warmTCPConnection": false` (NodeJS)
        or `warm_tcp_connection=False` (Python) in the SDK constructor.
        For more assistance, contact us at support@cerebras.ai
        '
    headers:
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      cf-ray:
      - CF-RAY-XXX
      content-type:
      - text/plain; charset=utf-8
      date:
      - Wed, 06 May 2026 22:33:51 GMT
      referrer-policy:
      - REFERRER-POLICY-XXX
      server:
      - cloudflare
      set-cookie:
      - SET-COOKIE-XXX
      strict-transport-security:
      - STS-XXX
      x-content-type-options:
      - X-CONTENT-TYPE-XXX
    status:
      code: 200
      message: OK
 - request:
    body: ''
    headers:
      accept:
      - application/json
      accept-encoding:
      - ACCEPT-ENCODING-XXX
      connection:
      - keep-alive
      host:
      - api.cerebras.ai
      user-agent:
      - X-USER-AGENT-XXX
      x-stainless-arch:
      - X-STAINLESS-ARCH-XXX
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - X-STAINLESS-OS-XXX
      x-stainless-package-version:
      - 1.67.0
      x-stainless-read-timeout:
      - X-STAINLESS-READ-TIMEOUT-XXX
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.13.3
    method: GET
    uri: https://api.cerebras.ai/v1/tcp_warming
  response:
    body:
      string: 'This request is being sent by the Cerebras Cloud SDK to warm up your
        TCP connection so that your requests will have lower TTFT.
        If you don''t want this, please set `"warmTCPConnection": false` (NodeJS)
        or `warm_tcp_connection=False` (Python) in the SDK constructor.
        For more assistance, contact us at support@cerebras.ai
        '
    headers:
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      cf-ray:
      - CF-RAY-XXX
      content-type:
      - text/plain; charset=utf-8
      date:
      - Wed, 06 May 2026 22:33:51 GMT
      referrer-policy:
      - REFERRER-POLICY-XXX
      server:
      - cloudflare
      set-cookie:
      - SET-COOKIE-XXX
      strict-transport-security:
      - STS-XXX
      x-content-type-options:
      - X-CONTENT-TYPE-XXX
    status:
      code: 200
      message: OK
 - request:
    body: '{"model":"llama3.1-8b","max_completion_tokens":64,"messages":[{"role":"user","content":"Say
      hi."}],"seed":7,"temperature":0.0}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - ACCEPT-ENCODING-XXX
      connection:
      - keep-alive
      content-length:
      - '126'
      content-type:
      - application/json
      cookie:
      - COOKIE-XXX
      host:
      - api.cerebras.ai
      user-agent:
      - X-USER-AGENT-XXX
      x-stainless-arch:
      - X-STAINLESS-ARCH-XXX
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - X-STAINLESS-OS-XXX
      x-stainless-package-version:
      - 1.67.0
      x-stainless-read-timeout:
      - X-STAINLESS-READ-TIMEOUT-XXX
      x-stainless-retry-count:
      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.13.3
    method: POST
    uri: https://api.cerebras.ai/v1/chat/completions
  response:
    body:
      string: '{"id":"chatcmpl-5bf61f2f-c4a5-4122-b921-d8cba7d8ebc0","choices":[{"finish_reason":"stop","index":0,"message":{"content":"Hello.
        How can I assist you today?","role":"assistant"}}],"created":1778106831,"model":"llama3.1-8b","system_fingerprint":"fp_96e7e4453bc38316a23a","object":"chat.completion","usage":{"total_tokens":48,"completion_tokens":10,"completion_tokens_details":{"accepted_prediction_tokens":0,"rejected_prediction_tokens":0,"reasoning_tokens":0},"prompt_tokens":38,"prompt_tokens_details":{"cached_tokens":0}},"time_info":{"created":1778106831.5465255,"queue_time":7.401e-05,"prompt_time":0.001719314,"completion_time":0.003982367,"total_time":0.0068645477294921875}}'
    headers:
      alt-svc:
      - h3=":443"; ma=86400
      cf-cache-status:
      - DYNAMIC
      cf-ray:
      - CF-RAY-XXX
      content-type:
      - application/json
      date:
      - Wed, 06 May 2026 22:33:51 GMT
      inference-id:
      - chatcmpl-5bf61f2f-c4a5-4122-b921-d8cba7d8ebc0
      referrer-policy:
      - REFERRER-POLICY-XXX
      server:
      - cloudflare
      strict-transport-security:
      - STS-XXX
      x-content-type-options:
      - X-CONTENT-TYPE-XXX
      x-ratelimit-limit-requests-day:
      - '14400'
      x-ratelimit-limit-requests-hour:
      - '900'
      x-ratelimit-limit-requests-minute:
      - '30'
      x-ratelimit-limit-tokens-day:
      - '1000000'
      x-ratelimit-limit-tokens-hour:
      - '1000000'
      x-ratelimit-limit-tokens-minute:
      - '60000'
      x-ratelimit-remaining-requests-day:
      - '14397'
      x-ratelimit-remaining-requests-hour:
      - '898'
      x-ratelimit-remaining-requests-minute:
      - '28'
      x-ratelimit-remaining-tokens-day:
      - '999871'
      x-ratelimit-remaining-tokens-hour:
      - '999881'
      x-ratelimit-remaining-tokens-minute:
      - '59881'
      x-request-id:
      - X-REQUEST-ID-XXX
    status:
      code: 200
      message: OK
 version: 1
--- a/lib/crewai/tests/llms/cerebras/init.py
+++ b/lib/crewai/tests/llms/cerebras/init.py
--- a/lib/crewai/tests/llms/cerebras/test_cerebras_completion.py
+++ b/lib/crewai/tests/llms/cerebras/test_cerebras_completion.py
@@ -0,0 +1,284 @@
 """Tests for the native Cerebras provider.
 Two flavors:
 - Unit tests under the ``Test*`` classes — exercise factory routing, field
  normalization, env-var resolution, client construction, and the fallback
  to OpenAI-compatible when the SDK extra is not installed. These run with
  ``CEREBRAS_API_KEY`` / ``CEREBRAS_BASE_URL`` cleared so each test states
  the env it depends on.
 - Module-level VCR tests — replay real Cerebras API responses from cassettes.
  To re-record, set ``CEREBRAS_API_KEY`` and run with
  ``PYTEST_VCR_RECORD_MODE=new_episodes`` (or delete the target cassette
  and use the default ``once`` mode).
 """
 from __future__ import annotations
 import builtins
 import sys
 from unittest.mock import patch
 import pytest
 from crewai.llm import LLM
 from crewai.llms.providers.cerebras.completion import CerebrasCompletion
 from crewai.llms.providers.openai_compatible.completion import (
    OpenAICompatibleCompletion,
 )
@pytest.fixture
 def clear_cerebras_env(monkeypatch):
    monkeypatch.delenv("CEREBRAS_API_KEY", raising=False)
    monkeypatch.delenv("CEREBRAS_BASE_URL", raising=False)
@pytest.mark.usefixtures("clear_cerebras_env")
 class TestCerebrasFactoryRouting:
    def test_provider_prefix_routes_to_native(self):
        llm = LLM(model="cerebras/gpt-oss-120b", api_key="sk-test")
        assert isinstance(llm, CerebrasCompletion)
        assert llm.llm_type == "cerebras"
        assert llm.provider == "cerebras"
        assert llm.is_litellm is False
    def test_explicit_provider_kwarg_routes_to_native(self):
        llm = LLM(model="gpt-oss-120b", provider="cerebras", api_key="sk-test")
        assert isinstance(llm, CerebrasCompletion)
    def test_falls_back_to_openai_compat_when_sdk_missing(self, monkeypatch):
        # Drop any cached imports so the factory re-imports the cerebras module.
        for mod_name in list(sys.modules):
            if mod_name.startswith(
                "crewai.llms.providers.cerebras"
            ) or mod_name.startswith("cerebras"):
                monkeypatch.delitem(sys.modules, mod_name, raising=False)
        real_import = builtins.__import__
        def _import_blocker(name, *args, **kwargs):
            if name.startswith("cerebras"):
                raise ImportError(f"simulated missing dep: {name}")
            return real_import(name, *args, **kwargs)
        monkeypatch.setattr(builtins, "__import__", _import_blocker)
        llm = LLM(model="cerebras/gpt-oss-120b", api_key="sk-test")
        # Without the SDK, factory falls through to the OpenAI-compatible path.
        assert isinstance(llm, OpenAICompatibleCompletion)
@pytest.mark.usefixtures("clear_cerebras_env")
 class TestCerebrasFieldNormalization:
    def test_default_base_url_left_unset(self):
        # We deliberately don't pin a default base URL — the SDK has its own.
        llm = CerebrasCompletion(model="gpt-oss-120b", api_key="sk-test")
        assert llm.base_url is None
        assert llm.api == "completions"
        assert llm.provider == "cerebras"
    def test_env_var_base_url_override(self, monkeypatch):
        monkeypatch.setenv("CEREBRAS_BASE_URL", "https://custom.cerebras.example/v1")
        llm = CerebrasCompletion(model="gpt-oss-120b", api_key="sk-test")
        assert llm.base_url == "https://custom.cerebras.example/v1"
    def test_explicit_base_url_takes_precedence(self, monkeypatch):
        monkeypatch.setenv("CEREBRAS_BASE_URL", "https://from-env.example/v1")
        llm = CerebrasCompletion(
            model="gpt-oss-120b",
            api_key="sk-test",
            base_url="https://explicit.example/v1",
        )
        assert llm.base_url == "https://explicit.example/v1"
    def test_api_forced_to_completions(self):
        # Even if a caller tries to set api="responses", the validator clamps it.
        llm = CerebrasCompletion(
            model="gpt-oss-120b", api_key="sk-test", api="responses"
        )
        assert llm.api == "completions"
@pytest.mark.usefixtures("clear_cerebras_env")
 class TestCerebrasApiKeyResolution:
    def test_env_var_picked_up(self, monkeypatch):
        monkeypatch.setenv("CEREBRAS_API_KEY", "env-key")
        llm = CerebrasCompletion(model="gpt-oss-120b")
        assert llm.api_key == "env-key"
    def test_explicit_api_key_takes_precedence(self, monkeypatch):
        monkeypatch.setenv("CEREBRAS_API_KEY", "env-key")
        llm = CerebrasCompletion(model="gpt-oss-120b", api_key="explicit-key")
        assert llm.api_key == "explicit-key"
    def test_construction_succeeds_without_key(self):
        # Lazy client init: missing key should not crash construction.
        llm = CerebrasCompletion(model="gpt-oss-120b")
        assert llm.api_key is None
    def test_get_client_params_raises_without_key(self):
        llm = CerebrasCompletion(model="gpt-oss-120b")
        with pytest.raises(ValueError, match="CEREBRAS_API_KEY"):
            llm._get_client_params()
@pytest.mark.usefixtures("clear_cerebras_env")
 class TestCerebrasClientBuild:
    def test_sync_client_uses_cerebras_sdk(self, monkeypatch):
        monkeypatch.setenv("CEREBRAS_API_KEY", "sk-test")
        llm = CerebrasCompletion(model="gpt-oss-120b")
        client = llm._get_sync_client()
        from cerebras.cloud.sdk import Cerebras
        assert isinstance(client, Cerebras)
    def test_async_client_uses_cerebras_sdk(self, monkeypatch):
        monkeypatch.setenv("CEREBRAS_API_KEY", "sk-test")
        llm = CerebrasCompletion(model="gpt-oss-120b")
        client = llm._get_async_client()
        from cerebras.cloud.sdk import AsyncCerebras
        assert isinstance(client, AsyncCerebras)
    def test_client_params_threaded_through(self, monkeypatch):
        monkeypatch.setenv("CEREBRAS_API_KEY", "sk-test")
        llm = CerebrasCompletion(
            model="gpt-oss-120b",
            timeout=30.0,
            max_retries=5,
            default_headers={"X-Custom": "yes"},
        )
        params = llm._get_client_params()
        assert params["timeout"] == 30.0
        assert params["max_retries"] == 5
        assert params["default_headers"] == {"X-Custom": "yes"}
        assert params["api_key"] == "sk-test"
        # base_url omitted so the SDK uses its own default.
        assert "base_url" not in params
    def test_client_params_includes_base_url_when_set(self, monkeypatch):
        monkeypatch.setenv("CEREBRAS_API_KEY", "sk-test")
        llm = CerebrasCompletion(
            model="gpt-oss-120b", base_url="https://override.example/api"
        )
        params = llm._get_client_params()
        assert params["base_url"] == "https://override.example/api"
@pytest.mark.usefixtures("clear_cerebras_env")
 class TestCerebrasConfigDict:
    def test_specific_fields_included_when_set(self):
        llm = CerebrasCompletion(
            model="gpt-oss-120b",
            api_key="sk-test",
            service_tier="priority",
            prompt_cache_key="cache-1",
            clear_thinking=True,
            reasoning_effort="high",
        )
        config = llm.to_config_dict()
        assert config["service_tier"] == "priority"
        assert config["prompt_cache_key"] == "cache-1"
        assert config["clear_thinking"] is True
        assert config["reasoning_effort"] == "high"
    def test_specific_fields_omitted_when_unset(self):
        llm = CerebrasCompletion(model="gpt-oss-120b", api_key="sk-test")
        config = llm.to_config_dict()
        assert "service_tier" not in config
        assert "prompt_cache_key" not in config
        assert "clear_thinking" not in config
@pytest.mark.usefixtures("clear_cerebras_env")
 class TestCerebrasCompletionParams:
    """Verify Cerebras-specific kwargs reach the chat.completions.create call."""
    def test_reasoning_effort_threaded_for_non_o1_models(self):
        llm = CerebrasCompletion(
            model="gpt-oss-120b", api_key="sk-test", reasoning_effort="high"
        )
        params = llm._prepare_completion_params(messages=[])
        # Parent gates this on is_o1_model — Cerebras must thread it regardless.
        assert params["reasoning_effort"] == "high"
    def test_service_tier_threaded(self):
        llm = CerebrasCompletion(
            model="llama3.1-8b", api_key="sk-test", service_tier="priority"
        )
        params = llm._prepare_completion_params(messages=[])
        assert params["service_tier"] == "priority"
    def test_prompt_cache_key_threaded(self):
        llm = CerebrasCompletion(
            model="llama3.1-8b", api_key="sk-test", prompt_cache_key="run-42"
        )
        params = llm._prepare_completion_params(messages=[])
        assert params["prompt_cache_key"] == "run-42"
    def test_clear_thinking_threaded(self):
        llm = CerebrasCompletion(
            model="zai-glm-4.7", api_key="sk-test", clear_thinking=True
        )
        params = llm._prepare_completion_params(messages=[])
        assert params["clear_thinking"] is True
    def test_cerebras_specific_fields_omitted_when_unset(self):
        llm = CerebrasCompletion(model="llama3.1-8b", api_key="sk-test")
        params = llm._prepare_completion_params(messages=[])
        assert "service_tier" not in params
        assert "prompt_cache_key" not in params
        assert "clear_thinking" not in params
        assert "reasoning_effort" not in params
@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
 def test_cerebras_basic_completion():
    """End-to-end completion against Cerebras (replays from cassette)."""
    llm = LLM(model="cerebras/llama3.1-8b", max_completion_tokens=32)
    assert isinstance(llm, CerebrasCompletion)
    result = llm.call("Reply with exactly the word: OK")
    assert isinstance(result, str)
    assert len(result) > 0
@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
 def test_cerebras_streaming_completion():
    """Streaming completion against Cerebras (replays from cassette)."""
    llm = LLM(model="cerebras/llama3.1-8b", stream=True, max_completion_tokens=32)
    assert isinstance(llm, CerebrasCompletion)
    result = llm.call("Count: one, two, three.")
    assert isinstance(result, str)
    assert len(result) > 0
@pytest.mark.vcr(filter_headers=["authorization", "x-api-key"])
 def test_cerebras_temperature_and_seed_passed_to_sdk():
    """Deterministic-sampling params reach the Cerebras SDK call."""
    llm = LLM(
        model="cerebras/llama3.1-8b",
        temperature=0.0,
        seed=7,
        max_completion_tokens=64,
    )
    assert isinstance(llm, CerebrasCompletion)
    original_create = llm._client.chat.completions.create
    captured: dict = {}
    def capture_and_call(**kwargs):
        captured.update(kwargs)
        return original_create(**kwargs)
    with patch.object(
        llm._client.chat.completions, "create", side_effect=capture_and_call
    ):
        llm.call("Say hi.")
    assert captured["model"] == "llama3.1-8b"
    assert captured["temperature"] == 0.0
    assert captured["seed"] == 7
--- a/uv.lock
+++ b/uv.lock
@@ -13,7 +13,7 @@ resolution-markers = [
 ]
 [options]
-exclude-newer = "2026-04-27T16:00:00Z"
+exclude-newer = "2026-04-28T07:00:00Z"
 [manifest]
 members = [
@@ -789,6 +789,23 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/06/f3/39cf3367b8107baa44f861dc802cbf16263c945b62d8265d36034fc07bea/cachetools-7.0.5-py3-none-any.whl", hash = "sha256:46bc8ebefbe485407621d0a4264b23c080cedd913921bad7ac3ed2f26c183114", size = 13918, upload-time = "2026-03-09T20:51:27.33Z" },
 ]
 [[package]]
 name = "cerebras-cloud-sdk"
 version = "1.67.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "anyio" },
    { name = "distro" },
    { name = "httpx", extra = ["http2"] },
    { name = "pydantic" },
    { name = "sniffio" },
    { name = "typing-extensions" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/92/12/c201f07582068141e88f9a523ab02fdc97de58f2f7c0df775c6c52b9d8dd/cerebras_cloud_sdk-1.67.0.tar.gz", hash = "sha256:3aed6f86c6c7a83ee9d4cfb08a2acea089cebf2af5b8aed116ef79995a4f4813", size = 131536, upload-time = "2026-01-29T23:31:27.306Z" }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/7a/5e/36a364f3d1bab4073454b75e7c91dc7ec6879b960063d1a9c929f1c7ea71/cerebras_cloud_sdk-1.67.0-py3-none-any.whl", hash = "sha256:658b79ca2e9c16f75cc6b4e5d523ee014c9e54a88bd39f88905c28ecb33daae1", size = 97807, upload-time = "2026-01-29T23:31:25.77Z" },
 ]
 [[package]]
 name = "certifi"
 version = "2026.2.25"
@@ -1330,6 +1347,9 @@ azure-ai-inference = [
 bedrock = [
    { name = "boto3" },
 ]
 cerebras = [
    { name = "cerebras-cloud-sdk" },
 ]
 docling = [
    { name = "docling" },
 ]
@@ -1383,6 +1403,7 @@ requires-dist = [
    { name = "azure-identity", marker = "extra == 'azure-ai-inference'", specifier = ">=1.17.0,<2" },
    { name = "boto3", marker = "extra == 'aws'", specifier = "~=1.42.79" },
    { name = "boto3", marker = "extra == 'bedrock'", specifier = "~=1.42.79" },
    { name = "cerebras-cloud-sdk", marker = "extra == 'cerebras'", specifier = "~=1.67.0" },
    { name = "chromadb", specifier = "~=1.1.0" },
    { name = "click", specifier = "~=8.1.7" },
    { name = "crewai-cli", editable = "lib/cli" },
@@ -1426,7 +1447,7 @@ requires-dist = [
    { name = "tomli-w", specifier = "~=1.1.0" },
    { name = "voyageai", marker = "extra == 'voyageai'", specifier = "~=0.3.5" },
 ]
-provides-extras = ["a2a", "anthropic", "aws", "azure-ai-inference", "bedrock", "docling", "embeddings", "file-processing", "google-genai", "litellm", "mem0", "openpyxl", "pandas", "qdrant", "qdrant-edge", "tools", "voyageai", "watson"]
+provides-extras = ["a2a", "anthropic", "aws", "azure-ai-inference", "bedrock", "cerebras", "docling", "embeddings", "file-processing", "google-genai", "litellm", "mem0", "openpyxl", "pandas", "qdrant", "qdrant-edge", "tools", "voyageai", "watson"]
 [[package]]
 name = "crewai-cli"