update litellm to support o3-mini and deepseek. Update docs.

This commit is contained in:
Brandon Hancock
2025-02-04 10:58:34 -05:00
parent 23b9e10323
commit 748383d74c
9 changed files with 500 additions and 10 deletions

View File

@@ -0,0 +1,100 @@
interactions:
- request:
body: '{"model": "deepseek/deepseek-r1", "messages": [{"role": "user", "content":
"What is the capital of France?"}], "stop": [], "stream": false}'
headers:
accept:
- '*/*'
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '139'
host:
- openrouter.ai
http-referer:
- https://litellm.ai
user-agent:
- litellm/1.60.2
x-title:
- liteLLM
method: POST
uri: https://openrouter.ai/api/v1/chat/completions
response:
content: "\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n\n
\ \n\n \n\n \n\n \n\n \n\n \n{\"id\":\"gen-1738684300-YnD5WOSczQWsW0vQG78a\",\"provider\":\"Nebius\",\"model\":\"deepseek/deepseek-r1\",\"object\":\"chat.completion\",\"created\":1738684300,\"choices\":[{\"logprobs\":null,\"index\":0,\"message\":{\"role\":\"assistant\",\"content\":\"The
capital of France is **Paris**. Known for its iconic landmarks such as the Eiffel
Tower, Notre-Dame Cathedral, and the Louvre Museum, Paris has served as the
political and cultural center of France for centuries. \U0001F1EB\U0001F1F7\",\"refusal\":null}}],\"usage\":{\"prompt_tokens\":10,\"completion_tokens\":261,\"total_tokens\":271}}"
headers:
Access-Control-Allow-Origin:
- '*'
CF-RAY:
- 90cbd2ceaf3ead5e-ATL
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Tue, 04 Feb 2025 15:51:40 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
Vary:
- Accept-Encoding
x-clerk-auth-message:
- Invalid JWT form. A JWT consists of three parts separated by dots. (reason=token-invalid,
token-carrier=header)
x-clerk-auth-reason:
- token-invalid
x-clerk-auth-status:
- signed-out
http_version: HTTP/1.1
status_code: 200
version: 1

View File

@@ -0,0 +1,107 @@
interactions:
- request:
body: '{"messages": [{"role": "user", "content": "What is the capital of France?"}],
"model": "o3-mini", "reasoning_effort": "high", "stop": []}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '137'
content-type:
- application/json
cookie:
- _cfuvid=etTqqA9SBOnENmrFAUBIexdW0v2ZeO1x9_Ek_WChlfU-1737568920137-0.0.1.1-604800000
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.61.0
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.61.0
x-stainless-raw-response:
- 'true'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.7
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-AxFNUz7l4pwtY9xhFSPIGlwNfE4Sj\",\n \"object\":
\"chat.completion\",\n \"created\": 1738683828,\n \"model\": \"o3-mini-2025-01-31\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"The capital of France is Paris.\",\n
\ \"refusal\": null\n },\n \"finish_reason\": \"stop\"\n }\n
\ ],\n \"usage\": {\n \"prompt_tokens\": 13,\n \"completion_tokens\":
81,\n \"total_tokens\": 94,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n
\ \"reasoning_tokens\": 64,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_8bcaa0ca21\"\n}\n"
headers:
CF-RAY:
- 90cbc745d91fb0ca-ATL
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Tue, 04 Feb 2025 15:43:50 GMT
Server:
- cloudflare
Set-Cookie:
- __cf_bm=.AP74BirsYr.lu61bSaimK2HRF6126qr5vCrr3HC6ak-1738683830-1.0.1.1-feh.bcMOv9wYnitoPpr.7UR7JrzCsbRLlzct09xCDm2SwmnRQQk5ZSSV41Ywer2S0rptbvufFwklV9wo9ATvWw;
path=/; expires=Tue, 04-Feb-25 16:13:50 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None
- _cfuvid=JBfx8Sl7w82A0S_K1tQd5ZcwzWaZP5Gg5W1dqAdgwNU-1738683830528-0.0.1.1-604800000;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '2169'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999974'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_163e7bd79cb5a5e62d4688245b97d1d9
http_version: HTTP/1.1
status_code: 200
version: 1

View File

@@ -0,0 +1,102 @@
interactions:
- request:
body: '{"messages": [{"role": "user", "content": "What is the capital of France?"}],
"model": "o3-mini", "reasoning_effort": "low", "stop": []}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '136'
content-type:
- application/json
cookie:
- _cfuvid=JBfx8Sl7w82A0S_K1tQd5ZcwzWaZP5Gg5W1dqAdgwNU-1738683830528-0.0.1.1-604800000;
__cf_bm=.AP74BirsYr.lu61bSaimK2HRF6126qr5vCrr3HC6ak-1738683830-1.0.1.1-feh.bcMOv9wYnitoPpr.7UR7JrzCsbRLlzct09xCDm2SwmnRQQk5ZSSV41Ywer2S0rptbvufFwklV9wo9ATvWw
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.61.0
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.61.0
x-stainless-raw-response:
- 'true'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.7
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-AxFNWljEYFrf5qRwYj73OPQtAnPbF\",\n \"object\":
\"chat.completion\",\n \"created\": 1738683830,\n \"model\": \"o3-mini-2025-01-31\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"The capital of France is Paris.\",\n
\ \"refusal\": null\n },\n \"finish_reason\": \"stop\"\n }\n
\ ],\n \"usage\": {\n \"prompt_tokens\": 13,\n \"completion_tokens\":
17,\n \"total_tokens\": 30,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n
\ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_8bcaa0ca21\"\n}\n"
headers:
CF-RAY:
- 90cbc7551fe0b0ca-ATL
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Tue, 04 Feb 2025 15:43:51 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '1103'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999974'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_fd7178a0e5060216d04f3bd023e8bca1
http_version: HTTP/1.1
status_code: 200
version: 1

View File

@@ -0,0 +1,102 @@
interactions:
- request:
body: '{"messages": [{"role": "user", "content": "What is the capital of France?"}],
"model": "o3-mini", "reasoning_effort": "medium", "stop": []}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
connection:
- keep-alive
content-length:
- '139'
content-type:
- application/json
cookie:
- _cfuvid=JBfx8Sl7w82A0S_K1tQd5ZcwzWaZP5Gg5W1dqAdgwNU-1738683830528-0.0.1.1-604800000;
__cf_bm=.AP74BirsYr.lu61bSaimK2HRF6126qr5vCrr3HC6ak-1738683830-1.0.1.1-feh.bcMOv9wYnitoPpr.7UR7JrzCsbRLlzct09xCDm2SwmnRQQk5ZSSV41Ywer2S0rptbvufFwklV9wo9ATvWw
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.61.0
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.61.0
x-stainless-raw-response:
- 'true'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.7
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-AxFS8IuMeYs6Rky2UbG8wH8P5PR4k\",\n \"object\":
\"chat.completion\",\n \"created\": 1738684116,\n \"model\": \"o3-mini-2025-01-31\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"The capital of France is Paris.\",\n
\ \"refusal\": null\n },\n \"finish_reason\": \"stop\"\n }\n
\ ],\n \"usage\": {\n \"prompt_tokens\": 13,\n \"completion_tokens\":
145,\n \"total_tokens\": 158,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n
\ \"reasoning_tokens\": 128,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
\"default\",\n \"system_fingerprint\": \"fp_8bcaa0ca21\"\n}\n"
headers:
CF-RAY:
- 90cbce51b946afb4-ATL
Connection:
- keep-alive
Content-Encoding:
- gzip
Content-Type:
- application/json
Date:
- Tue, 04 Feb 2025 15:48:39 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- crewai-iuxna1
openai-processing-ms:
- '2365'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '30000'
x-ratelimit-limit-tokens:
- '150000000'
x-ratelimit-remaining-requests:
- '29999'
x-ratelimit-remaining-tokens:
- '149999974'
x-ratelimit-reset-requests:
- 2ms
x-ratelimit-reset-tokens:
- 0s
x-request-id:
- req_bfd83679e674c3894991477f1fb043b2
http_version: HTTP/1.1
status_code: 200
version: 1

View File

@@ -1,3 +1,4 @@
import os
from time import sleep
from unittest.mock import MagicMock, patch
@@ -202,3 +203,51 @@ def test_llm_passes_additional_params():
# Check the result from llm.call
assert result == "Test response"
@pytest.mark.vcr(filter_headers=["authorization"])
def test_o3_mini_reasoning_effort_high():
llm = LLM(
model="o3-mini",
reasoning_effort="high",
)
result = llm.call("What is the capital of France?")
assert isinstance(result, str)
assert "Paris" in result
@pytest.mark.vcr(filter_headers=["authorization"])
def test_o3_mini_reasoning_effort_low():
llm = LLM(
model="o3-mini",
reasoning_effort="low",
)
result = llm.call("What is the capital of France?")
assert isinstance(result, str)
assert "Paris" in result
@pytest.mark.vcr(filter_headers=["authorization"])
def test_o3_mini_reasoning_effort_medium():
llm = LLM(
model="o3-mini",
reasoning_effort="medium",
)
result = llm.call("What is the capital of France?")
assert isinstance(result, str)
assert "Paris" in result
@pytest.mark.vcr(filter_headers=["authorization"])
def test_deepseek_r1_with_open_router():
if not os.getenv("OPEN_ROUTER_API_KEY"):
pytest.skip("OPEN_ROUTER_API_KEY not set; skipping test.")
llm = LLM(
model="openrouter/deepseek/deepseek-r1",
base_url="https://openrouter.ai/api/v1",
api_key=os.getenv("OPEN_ROUTER_API_KEY"),
)
result = llm.call("What is the capital of France?")
assert isinstance(result, str)
assert "Paris" in result