From 3df5278ee98a4cff5ec8c8cd9c1399ecce14a75c Mon Sep 17 00:00:00 2001
From: Brandon Hancock <brandon@brandonhancock.io>
Date: Wed, 5 Mar 2025 10:01:11 -0500
Subject: [PATCH] WIP

---
 src/crewai/llm.py                             |  74 +++-
 .../test_crew_kickoff_usage_metrics.yaml      | 338 ------------------
 tests/crew_test.py                            |   8 +-
 tests/llm_test.py                             |   1 +
 4 files changed, 77 insertions(+), 344 deletions(-)
 delete mode 100644 tests/cassettes/test_crew_kickoff_usage_metrics.yaml

diff --git a/src/crewai/llm.py b/src/crewai/llm.py
index ec4306bc4..e1d4aa95d 100644
--- a/src/crewai/llm.py
+++ b/src/crewai/llm.py
@@ -295,6 +295,13 @@ class LLM:
         last_chunk = None
         chunk_count = 0
         debug_info = []
+        aggregated_usage = {
+            "prompt_tokens": 0,
+            "completion_tokens": 0,
+            "total_tokens": 0,
+            "successful_requests": 0,
+            "cached_prompt_tokens": 0,
+        }
 
         # --- 2) Make sure stream is set to True
         params["stream"] = True
@@ -314,6 +321,13 @@ class LLM:
                 # Handle ModelResponse objects
                 if isinstance(chunk, ModelResponse):
                     debug_info.append("Chunk is ModelResponse")
+
+                    # Capture and aggregate usage information from the chunk if available
+                    chunk_usage = getattr(chunk, "usage", None)
+                    if isinstance(chunk_usage, dict):
+                        for key in aggregated_usage:
+                            aggregated_usage[key] += chunk_usage.get(key, 0)
+
                     choices = getattr(chunk, "choices", [])
                     if choices and len(choices) > 0:
                         choice = choices[0]
@@ -378,6 +392,12 @@ class LLM:
                 if last_chunk is not None:
                     # Try to extract any content from the last chunk
                     if isinstance(last_chunk, ModelResponse):
+                        # Capture and aggregate usage information from the last chunk if available
+                        chunk_usage = getattr(last_chunk, "usage", None)
+                        if isinstance(chunk_usage, dict):
+                            for key in aggregated_usage:
+                                aggregated_usage[key] += chunk_usage.get(key, 0)
+
                         choices = getattr(last_chunk, "choices", [])
                         if choices and len(choices) > 0:
                             choice = choices[0]
@@ -406,6 +426,12 @@ class LLM:
 
             # --- 7) Check for tool calls in the final response
             if isinstance(last_chunk, ModelResponse):
+                # Capture and aggregate usage information from the last chunk if available
+                chunk_usage = getattr(last_chunk, "usage", None)
+                if isinstance(chunk_usage, dict):
+                    for key in aggregated_usage:
+                        aggregated_usage[key] += chunk_usage.get(key, 0)
+
                 choices = getattr(last_chunk, "choices", [])
                 if choices and len(choices) > 0:
                     choice = choices[0]
@@ -418,7 +444,23 @@ class LLM:
                         if tool_result is not None:
                             return tool_result
 
-            # --- 8) Emit completion event and return response
+            # --- 8) Log token usage if available
+            # Use aggregated usage if any tokens were counted
+            if any(value > 0 for value in aggregated_usage.values()):
+                logging.info(
+                    f"Aggregated token usage from streaming response: {aggregated_usage}"
+                )
+                if self.callbacks and len(self.callbacks) > 0:
+                    for callback in self.callbacks:
+                        if hasattr(callback, "log_success_event"):
+                            callback.log_success_event(
+                                kwargs=params,
+                                response_obj={"usage": aggregated_usage},
+                                start_time=0,
+                                end_time=0,
+                            )
+
+            # --- 9) Emit completion event and return response
             self._handle_emit_call_events(full_response, LLMCallType.LLM_CALL)
             return full_response
 
@@ -465,6 +507,16 @@ class LLM:
         """
         # --- 1) Make the completion call
         response = litellm.completion(**params)
+        # Extract usage info – if none is provided, default to zero
+        usage_info = getattr(response, "usage", None)
+        if usage_info is None:
+            usage_info = {
+                "prompt_tokens": 0,
+                "completion_tokens": 0,
+                "total_tokens": 0,
+                "successful_requests": 0,
+                "cached_prompt_tokens": 0,
+            }
 
         # --- 2) Extract response message and content
         response_message = cast(Choices, cast(ModelResponse, response).choices)[
@@ -472,15 +524,29 @@ class LLM:
         ].message
         text_response = response_message.content or ""
 
-        # --- 3) Check for tool calls
+        # --- 3) Handle callbacks with usage info
+        if self.callbacks and len(self.callbacks) > 0:
+            for callback in self.callbacks:
+                if hasattr(callback, "log_success_event"):
+                    logging.info(
+                        f"Token usage from non-streaming response: {usage_info}"
+                    )
+                    callback.log_success_event(
+                        kwargs=params,
+                        response_obj={"usage": usage_info},
+                        start_time=0,
+                        end_time=0,
+                    )
+
+        # --- 4) Check for tool calls
         tool_calls = getattr(response_message, "tool_calls", [])
 
-        # --- 4) Handle tool calls if present
+        # --- 5) Handle tool calls if present
         tool_result = self._handle_tool_call(tool_calls, available_functions)
         if tool_result is not None:
             return tool_result
 
-        # --- 5) Emit completion event and return response
+        # --- 6) Emit completion event and return response
         self._handle_emit_call_events(text_response, LLMCallType.LLM_CALL)
         return text_response
 
diff --git a/tests/cassettes/test_crew_kickoff_usage_metrics.yaml b/tests/cassettes/test_crew_kickoff_usage_metrics.yaml
deleted file mode 100644
index b51ff6964..000000000
--- a/tests/cassettes/test_crew_kickoff_usage_metrics.yaml
+++ /dev/null
@@ -1,338 +0,0 @@
-interactions:
-- request:
-    body: '{"messages": [{"role": "system", "content": "You are dog Researcher. You
-      have a lot of experience with dog.\nYour personal goal is: Express hot takes
-      on dog.\nTo give my best complete final answer to the task use the exact following
-      format:\n\nThought: I now can give a great answer\nFinal Answer: Your final
-      answer must be the great and the most complete as possible, it must be outcome
-      described.\n\nI MUST use these formats, my job depends on it!"}, {"role": "user",
-      "content": "\nCurrent Task: Give me an analysis around dog.\n\nThis is the expect
-      criteria for your final answer: 1 bullet point about dog that''s under 15 words.\nyou
-      MUST return the actual complete content as the final answer, not a summary.\n\nBegin!
-      This is VERY important to you, use the tools available and give your best Final
-      Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop":
-      ["\nObservation:"], "stream": false}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '919'
-      content-type:
-      - application/json
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.52.1
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.52.1
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.12.7
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    content: "{\n  \"id\": \"chatcmpl-AcdAr57gPSeXoBUvpM7ihR5ocSg8K\",\n  \"object\":
-      \"chat.completion\",\n  \"created\": 1733770413,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
-      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-      \"assistant\",\n        \"content\": \"I now can give a great answer  \\nFinal
-      Answer: Dogs are loyal companions, enhancing human lives with love and joy.\",\n
-      \       \"refusal\": null\n      },\n      \"logprobs\": null,\n      \"finish_reason\":
-      \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 175,\n    \"completion_tokens\":
-      24,\n    \"total_tokens\": 199,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
-      0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n
-      \     \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"system_fingerprint\":
-      \"fp_bba3c8e70b\"\n}\n"
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 8ef732d93c754554-ATL
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Mon, 09 Dec 2024 18:53:33 GMT
-      Server:
-      - cloudflare
-      Set-Cookie:
-      - __cf_bm=zftDGzMKnU559gRET72hds3.GZV1di4sti_Q8aIdqPg-1733770413-1.0.1.1-AXqWYLVe2ClCqIFObmsZXfjYEbJ8Ahbl74TpjGzyxfP1UsSB3HisukLyoXLq52raWViSlB3tLosiLnNEWwWMdw;
-        path=/; expires=Mon, 09-Dec-24 19:23:33 GMT; domain=.api.openai.com; HttpOnly;
-        Secure; SameSite=None
-      - _cfuvid=eff7OIkJ0zWRunpA6z67LHqscmSe6XjNxXiPw1R3xCc-1733770413538-0.0.1.1-604800000;
-        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-organization:
-      - crewai-iuxna1
-      openai-processing-ms:
-      - '476'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '30000'
-      x-ratelimit-limit-tokens:
-      - '150000000'
-      x-ratelimit-remaining-requests:
-      - '29999'
-      x-ratelimit-remaining-tokens:
-      - '149999793'
-      x-ratelimit-reset-requests:
-      - 2ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_0d2fbea0013ad1ad0768fcdd457f5be3
-    http_version: HTTP/1.1
-    status_code: 200
-- request:
-    body: '{"messages": [{"role": "system", "content": "You are cat Researcher. You
-      have a lot of experience with cat.\nYour personal goal is: Express hot takes
-      on cat.\nTo give my best complete final answer to the task use the exact following
-      format:\n\nThought: I now can give a great answer\nFinal Answer: Your final
-      answer must be the great and the most complete as possible, it must be outcome
-      described.\n\nI MUST use these formats, my job depends on it!"}, {"role": "user",
-      "content": "\nCurrent Task: Give me an analysis around cat.\n\nThis is the expect
-      criteria for your final answer: 1 bullet point about cat that''s under 15 words.\nyou
-      MUST return the actual complete content as the final answer, not a summary.\n\nBegin!
-      This is VERY important to you, use the tools available and give your best Final
-      Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop":
-      ["\nObservation:"], "stream": false}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '919'
-      content-type:
-      - application/json
-      cookie:
-      - __cf_bm=zftDGzMKnU559gRET72hds3.GZV1di4sti_Q8aIdqPg-1733770413-1.0.1.1-AXqWYLVe2ClCqIFObmsZXfjYEbJ8Ahbl74TpjGzyxfP1UsSB3HisukLyoXLq52raWViSlB3tLosiLnNEWwWMdw;
-        _cfuvid=eff7OIkJ0zWRunpA6z67LHqscmSe6XjNxXiPw1R3xCc-1733770413538-0.0.1.1-604800000
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.52.1
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.52.1
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.12.7
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    content: "{\n  \"id\": \"chatcmpl-AcdArzlm4vKRhN7P4mtNE7X3UrCb3\",\n  \"object\":
-      \"chat.completion\",\n  \"created\": 1733770413,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
-      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-      \"assistant\",\n        \"content\": \"I now can give a great answer  \\nFinal
-      Answer: Cats are independent yet affectionate, making them unique companions
-      for humans.\",\n        \"refusal\": null\n      },\n      \"logprobs\": null,\n
-      \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-      175,\n    \"completion_tokens\": 24,\n    \"total_tokens\": 199,\n    \"prompt_tokens_details\":
-      {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-      {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"system_fingerprint\":
-      \"fp_818c284075\"\n}\n"
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 8ef732dcfadf4554-ATL
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Mon, 09 Dec 2024 18:53:34 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-organization:
-      - crewai-iuxna1
-      openai-processing-ms:
-      - '418'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '30000'
-      x-ratelimit-limit-tokens:
-      - '150000000'
-      x-ratelimit-remaining-requests:
-      - '29999'
-      x-ratelimit-remaining-tokens:
-      - '149999793'
-      x-ratelimit-reset-requests:
-      - 2ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_483f4fc624bef1034c884b5d0e847aee
-    http_version: HTTP/1.1
-    status_code: 200
-- request:
-    body: '{"messages": [{"role": "system", "content": "You are apple Researcher.
-      You have a lot of experience with apple.\nYour personal goal is: Express hot
-      takes on apple.\nTo give my best complete final answer to the task use the exact
-      following format:\n\nThought: I now can give a great answer\nFinal Answer: Your
-      final answer must be the great and the most complete as possible, it must be
-      outcome described.\n\nI MUST use these formats, my job depends on it!"}, {"role":
-      "user", "content": "\nCurrent Task: Give me an analysis around apple.\n\nThis
-      is the expect criteria for your final answer: 1 bullet point about apple that''s
-      under 15 words.\nyou MUST return the actual complete content as the final answer,
-      not a summary.\n\nBegin! This is VERY important to you, use the tools available
-      and give your best Final Answer, your job depends on it!\n\nThought:"}], "model":
-      "gpt-4o-mini", "stop": ["\nObservation:"], "stream": false}'
-    headers:
-      accept:
-      - application/json
-      accept-encoding:
-      - gzip, deflate
-      connection:
-      - keep-alive
-      content-length:
-      - '929'
-      content-type:
-      - application/json
-      cookie:
-      - __cf_bm=zftDGzMKnU559gRET72hds3.GZV1di4sti_Q8aIdqPg-1733770413-1.0.1.1-AXqWYLVe2ClCqIFObmsZXfjYEbJ8Ahbl74TpjGzyxfP1UsSB3HisukLyoXLq52raWViSlB3tLosiLnNEWwWMdw;
-        _cfuvid=eff7OIkJ0zWRunpA6z67LHqscmSe6XjNxXiPw1R3xCc-1733770413538-0.0.1.1-604800000
-      host:
-      - api.openai.com
-      user-agent:
-      - OpenAI/Python 1.52.1
-      x-stainless-arch:
-      - arm64
-      x-stainless-async:
-      - 'false'
-      x-stainless-lang:
-      - python
-      x-stainless-os:
-      - MacOS
-      x-stainless-package-version:
-      - 1.52.1
-      x-stainless-raw-response:
-      - 'true'
-      x-stainless-retry-count:
-      - '0'
-      x-stainless-runtime:
-      - CPython
-      x-stainless-runtime-version:
-      - 3.12.7
-    method: POST
-    uri: https://api.openai.com/v1/chat/completions
-  response:
-    content: "{\n  \"id\": \"chatcmpl-AcdAsHWkJ3K1sBl3O6XYMZZ3BhHx4\",\n  \"object\":
-      \"chat.completion\",\n  \"created\": 1733770414,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
-      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-      \"assistant\",\n        \"content\": \"I now can give a great answer  \\nFinal
-      Answer: Apple consistently innovates, leading the tech industry with flagship
-      products.\",\n        \"refusal\": null\n      },\n      \"logprobs\": null,\n
-      \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-      175,\n    \"completion_tokens\": 24,\n    \"total_tokens\": 199,\n    \"prompt_tokens_details\":
-      {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
-      {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
-      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"system_fingerprint\":
-      \"fp_818c284075\"\n}\n"
-    headers:
-      CF-Cache-Status:
-      - DYNAMIC
-      CF-RAY:
-      - 8ef732e028194554-ATL
-      Connection:
-      - keep-alive
-      Content-Encoding:
-      - gzip
-      Content-Type:
-      - application/json
-      Date:
-      - Mon, 09 Dec 2024 18:53:34 GMT
-      Server:
-      - cloudflare
-      Transfer-Encoding:
-      - chunked
-      X-Content-Type-Options:
-      - nosniff
-      access-control-expose-headers:
-      - X-Request-ID
-      alt-svc:
-      - h3=":443"; ma=86400
-      openai-organization:
-      - crewai-iuxna1
-      openai-processing-ms:
-      - '393'
-      openai-version:
-      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
-      x-ratelimit-limit-requests:
-      - '30000'
-      x-ratelimit-limit-tokens:
-      - '150000000'
-      x-ratelimit-remaining-requests:
-      - '29999'
-      x-ratelimit-remaining-tokens:
-      - '149999791'
-      x-ratelimit-reset-requests:
-      - 2ms
-      x-ratelimit-reset-tokens:
-      - 0s
-      x-request-id:
-      - req_01df88b62376d0b63fb3fa2761bc9c2b
-    http_version: HTTP/1.1
-    status_code: 200
-version: 1
diff --git a/tests/crew_test.py b/tests/crew_test.py
index 8f9f69deb..0a00fe51d 100644
--- a/tests/crew_test.py
+++ b/tests/crew_test.py
@@ -948,7 +948,7 @@ def test_api_calls_throttling(capsys):
         moveon.assert_called()
 
 
-@pytest.mark.vcr(filter_headers=["authorization"])
+# @pytest.mark.vcr(filter_headers=["authorization"])
 def test_crew_kickoff_usage_metrics():
     inputs = [
         {"topic": "dog"},
@@ -960,6 +960,7 @@ def test_crew_kickoff_usage_metrics():
         role="{topic} Researcher",
         goal="Express hot takes on {topic}.",
         backstory="You have a lot of experience with {topic}.",
+        llm=LLM(model="gpt-4o"),
     )
 
     task = Task(
@@ -968,12 +969,13 @@ def test_crew_kickoff_usage_metrics():
         agent=agent,
     )
 
+    # Use real LLM calls instead of mocking
     crew = Crew(agents=[agent], tasks=[task])
     results = crew.kickoff_for_each(inputs=inputs)
 
     assert len(results) == len(inputs)
     for result in results:
-        # Assert that all required keys are in usage_metrics and their values are not None
+        # Assert that all required keys are in usage_metrics and their values are greater than 0
         assert result.token_usage.total_tokens > 0
         assert result.token_usage.prompt_tokens > 0
         assert result.token_usage.completion_tokens > 0
@@ -3973,3 +3975,5 @@ def test_crew_with_knowledge_sources_works_with_copy():
     assert crew_copy.knowledge_sources == crew.knowledge_sources
     assert len(crew_copy.agents) == len(crew.agents)
     assert len(crew_copy.tasks) == len(crew.tasks)
+
+    assert len(crew_copy.tasks) == len(crew.tasks)
diff --git a/tests/llm_test.py b/tests/llm_test.py
index 61aa1aced..79ea56750 100644
--- a/tests/llm_test.py
+++ b/tests/llm_test.py
@@ -285,6 +285,7 @@ def test_o3_mini_reasoning_effort_medium():
     assert isinstance(result, str)
     assert "Paris" in result
 
+
 def test_context_window_validation():
     """Test that context window validation works correctly."""
     # Test valid window size