diff --git a/lib/crewai/tests/cassettes/test_usage_info_non_streaming_with_acall_and_stop.yaml b/lib/crewai/tests/cassettes/test_usage_info_non_streaming_with_acall_and_stop.yaml new file mode 100644 index 000000000..32ee39bbd --- /dev/null +++ b/lib/crewai/tests/cassettes/test_usage_info_non_streaming_with_acall_and_stop.yaml @@ -0,0 +1,213 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Tell me a joke."}],"model":"gpt-4o-mini","stop":["END"]}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '95' + content-type: + - application/json + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-D12bQ6vMcLYBu6WGhMwQKenus7ejH\",\n \"object\": + \"chat.completion\",\n \"created\": 1769140704,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Why did the scarecrow win an award?\\n\\nBecause + he was outstanding in his field!\",\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 12,\n \"completion_tokens\": + 17,\n \"total_tokens\": 29,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_29330a9688\"\n}\n" + headers: + Access-Control-Expose-Headers: + - ACCESS-CONTROL-XXX + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Fri, 23 Jan 2026 03:58:25 GMT + Server: + - cloudflare + Set-Cookie: + - SET-COOKIE-XXX + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + content-length: + - '886' + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '519' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '652' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"user","content":"Tell me a joke."}],"model":"gpt-4o-mini","stop":["END"]}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - application/json + accept-encoding: + - ACCEPT-ENCODING-XXX + authorization: + - AUTHORIZATION-XXX + connection: + - keep-alive + content-length: + - '95' + content-type: + - application/json + host: + - api.openai.com + x-stainless-arch: + - X-STAINLESS-ARCH-XXX + x-stainless-async: + - async:asyncio + x-stainless-lang: + - python + x-stainless-os: + - X-STAINLESS-OS-XXX + x-stainless-package-version: + - 1.83.0 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - X-STAINLESS-READ-TIMEOUT-XXX + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.10 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: '' + headers: + Access-Control-Expose-Headers: + - ACCESS-CONTROL-XXX + CF-RAY: + - CF-RAY-XXX + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Fri, 23 Jan 2026 03:58:25 GMT + Server: + - cloudflare + Set-Cookie: + - SET-COOKIE-XXX + Strict-Transport-Security: + - STS-XXX + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + content-length: + - '0' + openai-organization: + - OPENAI-ORG-XXX + openai-processing-ms: + - '519' + openai-project: + - OPENAI-PROJECT-XXX + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '652' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - X-RATELIMIT-LIMIT-REQUESTS-XXX + x-ratelimit-limit-tokens: + - X-RATELIMIT-LIMIT-TOKENS-XXX + x-ratelimit-remaining-requests: + - X-RATELIMIT-REMAINING-REQUESTS-XXX + x-ratelimit-remaining-tokens: + - X-RATELIMIT-REMAINING-TOKENS-XXX + x-ratelimit-reset-requests: + - X-RATELIMIT-RESET-REQUESTS-XXX + x-ratelimit-reset-tokens: + - X-RATELIMIT-RESET-TOKENS-XXX + x-request-id: + - X-REQUEST-ID-XXX + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/test_llm.py b/lib/crewai/tests/test_llm.py index a8b6a7a3f..1f7a1d01f 100644 --- a/lib/crewai/tests/test_llm.py +++ b/lib/crewai/tests/test_llm.py @@ -959,6 +959,38 @@ async def test_usage_info_non_streaming_with_acall(): assert len(result) > 0 +@pytest.mark.asyncio +@pytest.mark.vcr(record_mode="once",decode_compressed_response=True,match_on=["method", "scheme", "host", "path", "body"]) +async def test_usage_info_non_streaming_with_acall_and_stop(): + llm = LLM( + model="openai/gpt-4o-mini", + is_litellm=True, + stream=False, + stop=["END"], + ) + + assert llm._token_usage == { + "total_tokens": 0, + "prompt_tokens": 0, + "completion_tokens": 0, + "successful_requests": 0, + "cached_prompt_tokens": 0, + } + + with patch.object( + llm, "_ahandle_non_streaming_response", wraps=llm._ahandle_non_streaming_response + ) as mock_handle: + result = await llm.acall("Tell me a joke.") + mock_handle.assert_called_once() + + assert llm._token_usage["successful_requests"] == 1 + assert llm._token_usage["prompt_tokens"] > 0 + assert llm._token_usage["completion_tokens"] > 0 + assert llm._token_usage["total_tokens"] > 0 + + assert len(result) > 0 + + @pytest.mark.asyncio @pytest.mark.vcr(record_mode="none",decode_compressed_response=True,match_on=["method", "scheme", "host", "path", "body"]) async def test_usage_info_streaming_with_acall():