Adding usage info in llm.py (#4172)

* Adding usage info everywhere * Changing the check * Changing the logic * Adding tests * Adding casellets * Minor change * Fixing testcase * remove the duplicated test case, thanks to cursor * Adding async test cases * Updating test case --------- Co-authored-by: Lorenze Jay <63378463+lorenzejay@users.noreply.github.com>
2026-05-03 08:12:39 +00:00 · 2026-01-08 00:12:27 +05:30
parent 09014215a9
commit 1c4f44af80
6 changed files with 715 additions and 10 deletions
--- a/lib/crewai/tests/test_llm.py
+++ b/lib/crewai/tests/test_llm.py
@@ -877,3 +877,116 @@ def test_validate_model_in_constants():
        LLM._validate_model_in_constants("anthropic.claude-future-v1:0", "bedrock")
        is True
    )
+
+@pytest.mark.vcr(record_mode="once",decode_compressed_response=True)
+def test_usage_info_non_streaming_with_call():
+    llm = LLM(model="gpt-4o-mini", is_litellm=True)
+    assert llm._token_usage == {
+        "total_tokens": 0,
+        "prompt_tokens": 0,
+        "completion_tokens": 0,
+        "successful_requests": 0,
+        "cached_prompt_tokens": 0,
+    }
+    assert llm.stream is False
+
+    with patch.object(
+        llm, "_handle_non_streaming_response", wraps=llm._handle_non_streaming_response
+    ) as mock_handle:
+        llm.call("Tell me a joke.")
+        mock_handle.assert_called_once()
+
+    assert llm._token_usage["total_tokens"] > 0
+    assert llm._token_usage["prompt_tokens"] > 0
+    assert llm._token_usage["completion_tokens"] > 0
+    assert llm._token_usage["successful_requests"] == 1
+
+
+@pytest.mark.vcr(record_mode="once",decode_compressed_response=True)
+def test_usage_info_streaming_with_call():
+    llm = LLM(model="gpt-4o-mini", is_litellm=True, stream=True)
+    assert llm._token_usage == {
+        "total_tokens": 0,
+        "prompt_tokens": 0,
+        "completion_tokens": 0,
+        "successful_requests": 0,
+        "cached_prompt_tokens": 0,
+    }
+    assert llm.stream is True
+
+    with patch.object(
+        llm, "_handle_streaming_response", wraps=llm._handle_streaming_response
+    ) as mock_handle:
+        llm.call("Tell me a joke.")
+        mock_handle.assert_called_once()
+
+    assert llm._token_usage["total_tokens"] > 0
+    assert llm._token_usage["prompt_tokens"] > 0
+    assert llm._token_usage["completion_tokens"] > 0
+    assert llm._token_usage["successful_requests"] == 1
+
+
+@pytest.mark.asyncio
+@pytest.mark.vcr(record_mode="once",decode_compressed_response=True,match_on=["method", "scheme", "host", "path", "body"])
+async def test_usage_info_non_streaming_with_acall():
+    llm = LLM(
+        model="openai/gpt-4o-mini", 
+        is_litellm=True,
+        stream=False,
+    )
+
+    # sanity check
+    assert llm._token_usage == {
+        "total_tokens": 0,
+        "prompt_tokens": 0,
+        "completion_tokens": 0,
+        "successful_requests": 0,
+        "cached_prompt_tokens": 0,
+    }
+
+    with patch.object(
+        llm, "_ahandle_non_streaming_response", wraps=llm._ahandle_non_streaming_response
+    ) as mock_handle:
+        result = await llm.acall("Tell me a joke.")
+        mock_handle.assert_called_once()
+
+    # token usage assertions (robust)
+    assert llm._token_usage["successful_requests"] == 1
+    assert llm._token_usage["prompt_tokens"] > 0
+    assert llm._token_usage["completion_tokens"] > 0
+    assert llm._token_usage["total_tokens"] > 0
+
+    assert len(result) > 0
+
+
+@pytest.mark.asyncio
+@pytest.mark.vcr(record_mode="none",decode_compressed_response=True,match_on=["method", "scheme", "host", "path", "body"])
+async def test_usage_info_streaming_with_acall():
+    llm = LLM(
+        model="gpt-4o-mini",
+        is_litellm=True,
+        stream=True,
+    )
+
+    assert llm.stream is True
+    assert llm._token_usage == {
+        "total_tokens": 0,
+        "prompt_tokens": 0,
+        "completion_tokens": 0,
+        "successful_requests": 0,
+        "cached_prompt_tokens": 0,
+    }
+    
+    with patch.object(
+        llm, "_ahandle_streaming_response", wraps=llm._ahandle_streaming_response
+    ) as mock_handle:
+        result = await llm.acall("Tell me a joke.")
+        mock_handle.assert_called_once()
+
+
+    assert llm._token_usage["successful_requests"] == 1
+    assert llm._token_usage["prompt_tokens"] > 0
+    assert llm._token_usage["completion_tokens"] > 0
+    assert llm._token_usage["total_tokens"] > 0
+
+    assert len(result) > 0