From 1c4f44af808c32fff8fb01c3a23bc0594d365d3e Mon Sep 17 00:00:00 2001 From: Vidit Ostwal <110953813+Vidit-Ostwal@users.noreply.github.com> Date: Thu, 8 Jan 2026 00:12:27 +0530 Subject: [PATCH] Adding usage info in llm.py (#4172) * Adding usage info everywhere * Changing the check * Changing the logic * Adding tests * Adding casellets * Minor change * Fixing testcase * remove the duplicated test case, thanks to cursor * Adding async test cases * Updating test case --------- Co-authored-by: Lorenze Jay <63378463+lorenzejay@users.noreply.github.com> --- lib/crewai/src/crewai/llm.py | 28 ++- ...t_usage_info_non_streaming_with_acall.yaml | 113 +++++++++++ ...st_usage_info_non_streaming_with_call.yaml | 113 +++++++++++ .../test_usage_info_streaming_with_acall.yaml | 179 ++++++++++++++++++ .../test_usage_info_streaming_with_call.yaml | 179 ++++++++++++++++++ lib/crewai/tests/test_llm.py | 113 +++++++++++ 6 files changed, 715 insertions(+), 10 deletions(-) create mode 100644 lib/crewai/tests/cassettes/test_usage_info_non_streaming_with_acall.yaml create mode 100644 lib/crewai/tests/cassettes/test_usage_info_non_streaming_with_call.yaml create mode 100644 lib/crewai/tests/cassettes/test_usage_info_streaming_with_acall.yaml create mode 100644 lib/crewai/tests/cassettes/test_usage_info_streaming_with_call.yaml diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index 77053deeb..8bc1fe648 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -925,11 +925,12 @@ class LLM(BaseLLM): except Exception as e: logging.debug(f"Error checking for tool calls: {e}") + # Track token usage and log callbacks if available in streaming mode + if usage_info: + self._track_token_usage_internal(usage_info) + self._handle_streaming_callbacks(callbacks, usage_info, last_chunk) + if not tool_calls or not available_functions: - # Track token usage and log callbacks if available in streaming mode - if usage_info: - self._track_token_usage_internal(usage_info) - self._handle_streaming_callbacks(callbacks, usage_info, last_chunk) if response_model and self.is_litellm: instructor_instance = InternalInstructor( @@ -962,12 +963,7 @@ class LLM(BaseLLM): if tool_result is not None: return tool_result - # --- 10) Track token usage and log callbacks if available in streaming mode - if usage_info: - self._track_token_usage_internal(usage_info) - self._handle_streaming_callbacks(callbacks, usage_info, last_chunk) - - # --- 11) Emit completion event and return response + # --- 10) Emit completion event and return response self._handle_emit_call_events( response=full_response, call_type=LLMCallType.LLM_CALL, @@ -1148,6 +1144,10 @@ class LLM(BaseLLM): if response_model: params["response_model"] = response_model response = litellm.completion(**params) + + if hasattr(response,"usage") and not isinstance(response.usage, type) and response.usage: + usage_info = response.usage + self._track_token_usage_internal(usage_info) except ContextWindowExceededError as e: # Convert litellm's context window error to our own exception type @@ -1273,6 +1273,10 @@ class LLM(BaseLLM): params["response_model"] = response_model response = await litellm.acompletion(**params) + if hasattr(response,"usage") and not isinstance(response.usage, type) and response.usage: + usage_info = response.usage + self._track_token_usage_internal(usage_info) + except ContextWindowExceededError as e: raise LLMContextLengthExceededError(str(e)) from e @@ -1359,6 +1363,7 @@ class LLM(BaseLLM): """ full_response = "" chunk_count = 0 + usage_info = None accumulated_tool_args: defaultdict[int, AccumulatedToolArgs] = defaultdict( @@ -1444,6 +1449,9 @@ class LLM(BaseLLM): end_time=0, ) + if usage_info: + self._track_token_usage_internal(usage_info) + if accumulated_tool_args and available_functions: # Convert accumulated tool args to ChatCompletionDeltaToolCall objects tool_calls_list: list[ChatCompletionDeltaToolCall] = [ diff --git a/lib/crewai/tests/cassettes/test_usage_info_non_streaming_with_acall.yaml b/lib/crewai/tests/cassettes/test_usage_info_non_streaming_with_acall.yaml new file mode 100644 index 000000000..b123e9f99 --- /dev/null +++ b/lib/crewai/tests/cassettes/test_usage_info_non_streaming_with_acall.yaml @@ -0,0 +1,113 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Tell me a joke."}],"model":"gpt-4o-mini","stop":[]}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '90' + content-type: + - application/json + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 2.14.0 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.14 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-CvErx9mbnUKFHKkhPChO93eUzKJqy\",\n \"object\": + \"chat.completion\",\n \"created\": 1767757889,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Why did the scarecrow win an award? + \\n\\nBecause he was outstanding in his field!\",\n \"refusal\": null,\n + \ \"annotations\": []\n },\n \"logprobs\": null,\n \"finish_reason\": + \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": 12,\n \"completion_tokens\": + 18,\n \"total_tokens\": 30,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_29330a9688\"\n}\n" + headers: + Access-Control-Expose-Headers: + - ACCESS-CONTROL-XXX + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Wed, 07 Jan 2026 03:51:29 GMT + Server: + - cloudflare + Set-Cookie: + - SET-COOKIE-XXX + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + content-length: + - '887' + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '466' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '483' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/test_usage_info_non_streaming_with_call.yaml b/lib/crewai/tests/cassettes/test_usage_info_non_streaming_with_call.yaml new file mode 100644 index 000000000..ec0f25047 --- /dev/null +++ b/lib/crewai/tests/cassettes/test_usage_info_non_streaming_with_call.yaml @@ -0,0 +1,113 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Tell me a joke."}],"model":"gpt-4o-mini","stop":[]}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '90' + content-type: + - application/json + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 2.14.0 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.14 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-CugAsv9iAHdiGddGDHcZWEp7ZV7cB\",\n \"object\": + \"chat.completion\",\n \"created\": 1767624522,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Why don't skeletons fight each other? + \\n\\nThey don't have the guts!\",\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 12,\n \"completion_tokens\": + 15,\n \"total_tokens\": 27,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_29330a9688\"\n}\n" + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Mon, 05 Jan 2026 14:48:43 GMT + Server: + - cloudflare + Set-Cookie: + - SET-COOKIE-XXX + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + content-length: + - '874' + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '424' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '1017' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/test_usage_info_streaming_with_acall.yaml b/lib/crewai/tests/cassettes/test_usage_info_streaming_with_acall.yaml new file mode 100644 index 000000000..c2257d581 --- /dev/null +++ b/lib/crewai/tests/cassettes/test_usage_info_streaming_with_acall.yaml @@ -0,0 +1,179 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Tell me a joke."}],"model":"gpt-4o-mini","stop":[],"stream":true,"stream_options":{"include_usage":true}}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '144' + content-type: + - application/json + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 2.14.0 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.14 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"k9LESwMhk"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"Why"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"tYMBX9z8"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + did"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"X5lpC48"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + the"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Ns5pnmO"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + scare"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"cUTYl"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"crow"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"ZvHPszH"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + win"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"pLKQ5rM"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + an"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Yl8vxgvM"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + award"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"xfxd0"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"SFxdiZP3Uh"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + \n\n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Sysruv"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"Because"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"OeZH"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + he"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"epBJpPYm"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + was"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"5Bofkug"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + outstanding"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"ylIDIBTCqSLy3tA"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + in"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"lLi2lQc4"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + his"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"fi47Jij"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + field"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Kkiyw"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"RMcUfqa93e"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"rAtJI"} + + + data: {"id":"chatcmpl-CvF96exJN1ZmQQ0zfOWhGs2UqetwZ","object":"chat.completion.chunk","created":1767758952,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[],"usage":{"prompt_tokens":12,"completion_tokens":18,"total_tokens":30,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"aTyTRaiahL"} + + + data: [DONE] + + + ' + headers: + Access-Control-Expose-Headers: + - ACCESS-CONTROL-XXX + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Wed, 07 Jan 2026 04:09:13 GMT + Server: + - cloudflare + Set-Cookie: + - SET-COOKIE-XXX + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '243' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '645' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/cassettes/test_usage_info_streaming_with_call.yaml b/lib/crewai/tests/cassettes/test_usage_info_streaming_with_call.yaml new file mode 100644 index 000000000..dc10b77ec --- /dev/null +++ b/lib/crewai/tests/cassettes/test_usage_info_streaming_with_call.yaml @@ -0,0 +1,179 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Tell me a joke."}],"model":"gpt-4o-mini","stop":[],"stream":true,"stream_options":{"include_usage":true}}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '144' + content-type: + - application/json + cookie: + - COOKIE-XXX + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 2.14.0 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.14 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"SVnFynat2"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"Why"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"M0Y4Qurw"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + did"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"LknkzkM"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + the"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"45ePnqI"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + scare"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"DsJ1r"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"crow"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"9sXjMg0"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + win"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"UlTRXCu"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + an"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"He218dPh"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + award"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"CO1Dc"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"nHS3XxEjuW"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + \n\n"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"IhBQDR"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"Because"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"TJzX"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + he"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"AjRyStfn"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + was"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"2AZtzyA"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + outstanding"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"XfziOItr8wziIap"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + in"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"7hXp54s6"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + his"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"RPmgnK3"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":" + field"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"uqtNk"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Wziup4uj7N"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"q9paY"} + + + data: {"id":"chatcmpl-CugAuE9ctOkFjqIbmxWZpxeNX7gWt","object":"chat.completion.chunk","created":1767624524,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_29330a9688","choices":[],"usage":{"prompt_tokens":12,"completion_tokens":18,"total_tokens":30,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"TWmOWpZx0s"} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Mon, 05 Jan 2026 14:48:44 GMT + Server: + - cloudflare + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + access-control-expose-headers: + - ACCESS-CONTROL-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '227' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '645' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/test_llm.py b/lib/crewai/tests/test_llm.py index 6f3bcd70a..a8b6a7a3f 100644 --- a/lib/crewai/tests/test_llm.py +++ b/lib/crewai/tests/test_llm.py @@ -877,3 +877,116 @@ def test_validate_model_in_constants(): LLM._validate_model_in_constants("anthropic.claude-future-v1:0", "bedrock") is True ) + +@pytest.mark.vcr(record_mode="once",decode_compressed_response=True) +def test_usage_info_non_streaming_with_call(): + llm = LLM(model="gpt-4o-mini", is_litellm=True) + assert llm._token_usage == { + "total_tokens": 0, + "prompt_tokens": 0, + "completion_tokens": 0, + "successful_requests": 0, + "cached_prompt_tokens": 0, + } + assert llm.stream is False + + with patch.object( + llm, "_handle_non_streaming_response", wraps=llm._handle_non_streaming_response + ) as mock_handle: + llm.call("Tell me a joke.") + mock_handle.assert_called_once() + + assert llm._token_usage["total_tokens"] > 0 + assert llm._token_usage["prompt_tokens"] > 0 + assert llm._token_usage["completion_tokens"] > 0 + assert llm._token_usage["successful_requests"] == 1 + + +@pytest.mark.vcr(record_mode="once",decode_compressed_response=True) +def test_usage_info_streaming_with_call(): + llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=True) + assert llm._token_usage == { + "total_tokens": 0, + "prompt_tokens": 0, + "completion_tokens": 0, + "successful_requests": 0, + "cached_prompt_tokens": 0, + } + assert llm.stream is True + + with patch.object( + llm, "_handle_streaming_response", wraps=llm._handle_streaming_response + ) as mock_handle: + llm.call("Tell me a joke.") + mock_handle.assert_called_once() + + assert llm._token_usage["total_tokens"] > 0 + assert llm._token_usage["prompt_tokens"] > 0 + assert llm._token_usage["completion_tokens"] > 0 + assert llm._token_usage["successful_requests"] == 1 + + +@pytest.mark.asyncio +@pytest.mark.vcr(record_mode="once",decode_compressed_response=True,match_on=["method", "scheme", "host", "path", "body"]) +async def test_usage_info_non_streaming_with_acall(): + llm = LLM( + model="openai/gpt-4o-mini", + is_litellm=True, + stream=False, + ) + + # sanity check + assert llm._token_usage == { + "total_tokens": 0, + "prompt_tokens": 0, + "completion_tokens": 0, + "successful_requests": 0, + "cached_prompt_tokens": 0, + } + + with patch.object( + llm, "_ahandle_non_streaming_response", wraps=llm._ahandle_non_streaming_response + ) as mock_handle: + result = await llm.acall("Tell me a joke.") + mock_handle.assert_called_once() + + # token usage assertions (robust) + assert llm._token_usage["successful_requests"] == 1 + assert llm._token_usage["prompt_tokens"] > 0 + assert llm._token_usage["completion_tokens"] > 0 + assert llm._token_usage["total_tokens"] > 0 + + assert len(result) > 0 + + +@pytest.mark.asyncio +@pytest.mark.vcr(record_mode="none",decode_compressed_response=True,match_on=["method", "scheme", "host", "path", "body"]) +async def test_usage_info_streaming_with_acall(): + llm = LLM( + model="gpt-4o-mini", + is_litellm=True, + stream=True, + ) + + assert llm.stream is True + assert llm._token_usage == { + "total_tokens": 0, + "prompt_tokens": 0, + "completion_tokens": 0, + "successful_requests": 0, + "cached_prompt_tokens": 0, + } + + with patch.object( + llm, "_ahandle_streaming_response", wraps=llm._ahandle_streaming_response + ) as mock_handle: + result = await llm.acall("Tell me a joke.") + mock_handle.assert_called_once() + + + assert llm._token_usage["successful_requests"] == 1 + assert llm._token_usage["prompt_tokens"] > 0 + assert llm._token_usage["completion_tokens"] > 0 + assert llm._token_usage["total_tokens"] > 0 + + assert len(result) > 0 \ No newline at end of file