mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 08:08:32 +00:00
Adding usage info in llm.py (#4172)
* Adding usage info everywhere * Changing the check * Changing the logic * Adding tests * Adding casellets * Minor change * Fixing testcase * remove the duplicated test case, thanks to cursor * Adding async test cases * Updating test case --------- Co-authored-by: Lorenze Jay <63378463+lorenzejay@users.noreply.github.com>
This commit is contained in:
@@ -925,11 +925,12 @@ class LLM(BaseLLM):
|
||||
except Exception as e:
|
||||
logging.debug(f"Error checking for tool calls: {e}")
|
||||
|
||||
# Track token usage and log callbacks if available in streaming mode
|
||||
if usage_info:
|
||||
self._track_token_usage_internal(usage_info)
|
||||
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
|
||||
|
||||
if not tool_calls or not available_functions:
|
||||
# Track token usage and log callbacks if available in streaming mode
|
||||
if usage_info:
|
||||
self._track_token_usage_internal(usage_info)
|
||||
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
|
||||
|
||||
if response_model and self.is_litellm:
|
||||
instructor_instance = InternalInstructor(
|
||||
@@ -962,12 +963,7 @@ class LLM(BaseLLM):
|
||||
if tool_result is not None:
|
||||
return tool_result
|
||||
|
||||
# --- 10) Track token usage and log callbacks if available in streaming mode
|
||||
if usage_info:
|
||||
self._track_token_usage_internal(usage_info)
|
||||
self._handle_streaming_callbacks(callbacks, usage_info, last_chunk)
|
||||
|
||||
# --- 11) Emit completion event and return response
|
||||
# --- 10) Emit completion event and return response
|
||||
self._handle_emit_call_events(
|
||||
response=full_response,
|
||||
call_type=LLMCallType.LLM_CALL,
|
||||
@@ -1148,6 +1144,10 @@ class LLM(BaseLLM):
|
||||
if response_model:
|
||||
params["response_model"] = response_model
|
||||
response = litellm.completion(**params)
|
||||
|
||||
if hasattr(response,"usage") and not isinstance(response.usage, type) and response.usage:
|
||||
usage_info = response.usage
|
||||
self._track_token_usage_internal(usage_info)
|
||||
|
||||
except ContextWindowExceededError as e:
|
||||
# Convert litellm's context window error to our own exception type
|
||||
@@ -1273,6 +1273,10 @@ class LLM(BaseLLM):
|
||||
params["response_model"] = response_model
|
||||
response = await litellm.acompletion(**params)
|
||||
|
||||
if hasattr(response,"usage") and not isinstance(response.usage, type) and response.usage:
|
||||
usage_info = response.usage
|
||||
self._track_token_usage_internal(usage_info)
|
||||
|
||||
except ContextWindowExceededError as e:
|
||||
raise LLMContextLengthExceededError(str(e)) from e
|
||||
|
||||
@@ -1359,6 +1363,7 @@ class LLM(BaseLLM):
|
||||
"""
|
||||
full_response = ""
|
||||
chunk_count = 0
|
||||
|
||||
usage_info = None
|
||||
|
||||
accumulated_tool_args: defaultdict[int, AccumulatedToolArgs] = defaultdict(
|
||||
@@ -1444,6 +1449,9 @@ class LLM(BaseLLM):
|
||||
end_time=0,
|
||||
)
|
||||
|
||||
if usage_info:
|
||||
self._track_token_usage_internal(usage_info)
|
||||
|
||||
if accumulated_tool_args and available_functions:
|
||||
# Convert accumulated tool args to ChatCompletionDeltaToolCall objects
|
||||
tool_calls_list: list[ChatCompletionDeltaToolCall] = [
|
||||
|
||||
@@ -0,0 +1,113 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"user","content":"Tell me a joke."}],"model":"gpt-4o-mini","stop":[]}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '90'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- async:asyncio
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 2.14.0
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.14
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-CvErx9mbnUKFHKkhPChO93eUzKJqy\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1767757889,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"Why did the scarecrow win an award?
|
||||
\\n\\nBecause he was outstanding in his field!\",\n \"refusal\": null,\n
|
||||
\ \"annotations\": []\n },\n \"logprobs\": null,\n \"finish_reason\":
|
||||
\"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": 12,\n \"completion_tokens\":
|
||||
18,\n \"total_tokens\": 30,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
|
||||
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_29330a9688\"\n}\n"
|
||||
headers:
|
||||
Access-Control-Expose-Headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Wed, 07 Jan 2026 03:51:29 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Set-Cookie:
|
||||
- SET-COOKIE-XXX
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
content-length:
|
||||
- '887'
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '466'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
x-envoy-upstream-service-time:
|
||||
- '483'
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,113 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"user","content":"Tell me a joke."}],"model":"gpt-4o-mini","stop":[]}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '90'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 2.14.0
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.14
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: "{\n \"id\": \"chatcmpl-CugAsv9iAHdiGddGDHcZWEp7ZV7cB\",\n \"object\":
|
||||
\"chat.completion\",\n \"created\": 1767624522,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n
|
||||
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
|
||||
\"assistant\",\n \"content\": \"Why don't skeletons fight each other?
|
||||
\\n\\nThey don't have the guts!\",\n \"refusal\": null,\n \"annotations\":
|
||||
[]\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n
|
||||
\ }\n ],\n \"usage\": {\n \"prompt_tokens\": 12,\n \"completion_tokens\":
|
||||
15,\n \"total_tokens\": 27,\n \"prompt_tokens_details\": {\n \"cached_tokens\":
|
||||
0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\":
|
||||
{\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\":
|
||||
0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\":
|
||||
\"default\",\n \"system_fingerprint\": \"fp_29330a9688\"\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Mon, 05 Jan 2026 14:48:43 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Set-Cookie:
|
||||
- SET-COOKIE-XXX
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
content-length:
|
||||
- '874'
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '424'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
x-envoy-upstream-service-time:
|
||||
- '1017'
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,179 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"user","content":"Tell me a joke."}],"model":"gpt-4o-mini","stop":[],"stream":true,"stream_options":{"include_usage":true}}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '144'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- async:asyncio
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 2.14.0
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.14
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: 'data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"k9LESwMhk"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"Why"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"tYMBX9z8"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
did"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"X5lpC48"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
the"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Ns5pnmO"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
scare"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"cUTYl"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"crow"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"ZvHPszH"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
win"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"pLKQ5rM"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
an"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Yl8vxgvM"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
award"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"xfxd0"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"SFxdiZP3Uh"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
\n\n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Sysruv"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"Because"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"OeZH"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
he"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"epBJpPYm"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
was"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"5Bofkug"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
outstanding"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"ylIDIBTCqSLy3tA"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
in"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"lLi2lQc4"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
his"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"fi47Jij"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
field"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Kkiyw"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"RMcUfqa93e"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"rAtJI"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[],"usage":{"prompt_tokens":12,"completion_tokens":18,"total_tokens":30,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"aTyTRaiahL"}
|
||||
|
||||
|
||||
data: [DONE]
|
||||
|
||||
|
||||
'
|
||||
headers:
|
||||
Access-Control-Expose-Headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- text/event-stream; charset=utf-8
|
||||
Date:
|
||||
- Wed, 07 Jan 2026 04:09:13 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Set-Cookie:
|
||||
- SET-COOKIE-XXX
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '243'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
x-envoy-upstream-service-time:
|
||||
- '645'
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -0,0 +1,179 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages":[{"role":"user","content":"Tell me a joke."}],"model":"gpt-4o-mini","stop":[],"stream":true,"stream_options":{"include_usage":true}}'
|
||||
headers:
|
||||
User-Agent:
|
||||
- X-USER-AGENT-XXX
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- ACCEPT-ENCODING-XXX
|
||||
authorization:
|
||||
- AUTHORIZATION-XXX
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '144'
|
||||
content-type:
|
||||
- application/json
|
||||
cookie:
|
||||
- COOKIE-XXX
|
||||
host:
|
||||
- api.openai.com
|
||||
x-stainless-arch:
|
||||
- X-STAINLESS-ARCH-XXX
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- X-STAINLESS-OS-XXX
|
||||
x-stainless-package-version:
|
||||
- 2.14.0
|
||||
x-stainless-raw-response:
|
||||
- 'true'
|
||||
x-stainless-read-timeout:
|
||||
- X-STAINLESS-READ-TIMEOUT-XXX
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.14
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: 'data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"SVnFynat2"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"Why"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"M0Y4Qurw"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
did"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"LknkzkM"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
the"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"45ePnqI"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
scare"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"DsJ1r"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"crow"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"9sXjMg0"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
win"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"UlTRXCu"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
an"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"He218dPh"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
award"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"CO1Dc"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"nHS3XxEjuW"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
\n\n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"IhBQDR"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"Because"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"TJzX"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
he"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"AjRyStfn"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
was"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"2AZtzyA"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
outstanding"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"XfziOItr8wziIap"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
in"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"7hXp54s6"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
his"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"RPmgnK3"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"
|
||||
field"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"uqtNk"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Wziup4uj7N"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"q9paY"}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[],"usage":{"prompt_tokens":12,"completion_tokens":18,"total_tokens":30,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"TWmOWpZx0s"}
|
||||
|
||||
|
||||
data: [DONE]
|
||||
|
||||
|
||||
'
|
||||
headers:
|
||||
CF-RAY:
|
||||
- CF-RAY-XXX
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- text/event-stream; charset=utf-8
|
||||
Date:
|
||||
- Mon, 05 Jan 2026 14:48:44 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Strict-Transport-Security:
|
||||
- STS-XXX
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- X-CONTENT-TYPE-XXX
|
||||
access-control-expose-headers:
|
||||
- ACCESS-CONTROL-XXX
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
openai-organization:
|
||||
- OPENAI-ORG-XXX
|
||||
openai-processing-ms:
|
||||
- '227'
|
||||
openai-project:
|
||||
- OPENAI-PROJECT-XXX
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
x-envoy-upstream-service-time:
|
||||
- '645'
|
||||
x-openai-proxy-wasm:
|
||||
- v0.1
|
||||
x-ratelimit-limit-requests:
|
||||
- X-RATELIMIT-LIMIT-REQUESTS-XXX
|
||||
x-ratelimit-limit-tokens:
|
||||
- X-RATELIMIT-LIMIT-TOKENS-XXX
|
||||
x-ratelimit-remaining-requests:
|
||||
- X-RATELIMIT-REMAINING-REQUESTS-XXX
|
||||
x-ratelimit-remaining-tokens:
|
||||
- X-RATELIMIT-REMAINING-TOKENS-XXX
|
||||
x-ratelimit-reset-requests:
|
||||
- X-RATELIMIT-RESET-REQUESTS-XXX
|
||||
x-ratelimit-reset-tokens:
|
||||
- X-RATELIMIT-RESET-TOKENS-XXX
|
||||
x-request-id:
|
||||
- X-REQUEST-ID-XXX
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -877,3 +877,116 @@ def test_validate_model_in_constants():
|
||||
LLM._validate_model_in_constants("anthropic.claude-future-v1:0", "bedrock")
|
||||
is True
|
||||
)
|
||||
|
||||
@pytest.mark.vcr(record_mode="once",decode_compressed_response=True)
|
||||
def test_usage_info_non_streaming_with_call():
|
||||
llm = LLM(model="gpt-4o-mini", is_litellm=True)
|
||||
assert llm._token_usage == {
|
||||
"total_tokens": 0,
|
||||
"prompt_tokens": 0,
|
||||
"completion_tokens": 0,
|
||||
"successful_requests": 0,
|
||||
"cached_prompt_tokens": 0,
|
||||
}
|
||||
assert llm.stream is False
|
||||
|
||||
with patch.object(
|
||||
llm, "_handle_non_streaming_response", wraps=llm._handle_non_streaming_response
|
||||
) as mock_handle:
|
||||
llm.call("Tell me a joke.")
|
||||
mock_handle.assert_called_once()
|
||||
|
||||
assert llm._token_usage["total_tokens"] > 0
|
||||
assert llm._token_usage["prompt_tokens"] > 0
|
||||
assert llm._token_usage["completion_tokens"] > 0
|
||||
assert llm._token_usage["successful_requests"] == 1
|
||||
|
||||
|
||||
@pytest.mark.vcr(record_mode="once",decode_compressed_response=True)
|
||||
def test_usage_info_streaming_with_call():
|
||||
llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=True)
|
||||
assert llm._token_usage == {
|
||||
"total_tokens": 0,
|
||||
"prompt_tokens": 0,
|
||||
"completion_tokens": 0,
|
||||
"successful_requests": 0,
|
||||
"cached_prompt_tokens": 0,
|
||||
}
|
||||
assert llm.stream is True
|
||||
|
||||
with patch.object(
|
||||
llm, "_handle_streaming_response", wraps=llm._handle_streaming_response
|
||||
) as mock_handle:
|
||||
llm.call("Tell me a joke.")
|
||||
mock_handle.assert_called_once()
|
||||
|
||||
assert llm._token_usage["total_tokens"] > 0
|
||||
assert llm._token_usage["prompt_tokens"] > 0
|
||||
assert llm._token_usage["completion_tokens"] > 0
|
||||
assert llm._token_usage["successful_requests"] == 1
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.vcr(record_mode="once",decode_compressed_response=True,match_on=["method", "scheme", "host", "path", "body"])
|
||||
async def test_usage_info_non_streaming_with_acall():
|
||||
llm = LLM(
|
||||
model="openai/gpt-4o-mini",
|
||||
is_litellm=True,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
# sanity check
|
||||
assert llm._token_usage == {
|
||||
"total_tokens": 0,
|
||||
"prompt_tokens": 0,
|
||||
"completion_tokens": 0,
|
||||
"successful_requests": 0,
|
||||
"cached_prompt_tokens": 0,
|
||||
}
|
||||
|
||||
with patch.object(
|
||||
llm, "_ahandle_non_streaming_response", wraps=llm._ahandle_non_streaming_response
|
||||
) as mock_handle:
|
||||
result = await llm.acall("Tell me a joke.")
|
||||
mock_handle.assert_called_once()
|
||||
|
||||
# token usage assertions (robust)
|
||||
assert llm._token_usage["successful_requests"] == 1
|
||||
assert llm._token_usage["prompt_tokens"] > 0
|
||||
assert llm._token_usage["completion_tokens"] > 0
|
||||
assert llm._token_usage["total_tokens"] > 0
|
||||
|
||||
assert len(result) > 0
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.vcr(record_mode="none",decode_compressed_response=True,match_on=["method", "scheme", "host", "path", "body"])
|
||||
async def test_usage_info_streaming_with_acall():
|
||||
llm = LLM(
|
||||
model="gpt-4o-mini",
|
||||
is_litellm=True,
|
||||
stream=True,
|
||||
)
|
||||
|
||||
assert llm.stream is True
|
||||
assert llm._token_usage == {
|
||||
"total_tokens": 0,
|
||||
"prompt_tokens": 0,
|
||||
"completion_tokens": 0,
|
||||
"successful_requests": 0,
|
||||
"cached_prompt_tokens": 0,
|
||||
}
|
||||
|
||||
with patch.object(
|
||||
llm, "_ahandle_streaming_response", wraps=llm._ahandle_streaming_response
|
||||
) as mock_handle:
|
||||
result = await llm.acall("Tell me a joke.")
|
||||
mock_handle.assert_called_once()
|
||||
|
||||
|
||||
assert llm._token_usage["successful_requests"] == 1
|
||||
assert llm._token_usage["prompt_tokens"] > 0
|
||||
assert llm._token_usage["completion_tokens"] > 0
|
||||
assert llm._token_usage["total_tokens"] > 0
|
||||
|
||||
assert len(result) > 0
|
||||
Reference in New Issue
Block a user