From 3df5278ee98a4cff5ec8c8cd9c1399ecce14a75c Mon Sep 17 00:00:00 2001 From: Brandon Hancock Date: Wed, 5 Mar 2025 10:01:11 -0500 Subject: [PATCH] WIP --- src/crewai/llm.py | 74 +++- .../test_crew_kickoff_usage_metrics.yaml | 338 ------------------ tests/crew_test.py | 8 +- tests/llm_test.py | 1 + 4 files changed, 77 insertions(+), 344 deletions(-) delete mode 100644 tests/cassettes/test_crew_kickoff_usage_metrics.yaml diff --git a/src/crewai/llm.py b/src/crewai/llm.py index ec4306bc4..e1d4aa95d 100644 --- a/src/crewai/llm.py +++ b/src/crewai/llm.py @@ -295,6 +295,13 @@ class LLM: last_chunk = None chunk_count = 0 debug_info = [] + aggregated_usage = { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0, + "successful_requests": 0, + "cached_prompt_tokens": 0, + } # --- 2) Make sure stream is set to True params["stream"] = True @@ -314,6 +321,13 @@ class LLM: # Handle ModelResponse objects if isinstance(chunk, ModelResponse): debug_info.append("Chunk is ModelResponse") + + # Capture and aggregate usage information from the chunk if available + chunk_usage = getattr(chunk, "usage", None) + if isinstance(chunk_usage, dict): + for key in aggregated_usage: + aggregated_usage[key] += chunk_usage.get(key, 0) + choices = getattr(chunk, "choices", []) if choices and len(choices) > 0: choice = choices[0] @@ -378,6 +392,12 @@ class LLM: if last_chunk is not None: # Try to extract any content from the last chunk if isinstance(last_chunk, ModelResponse): + # Capture and aggregate usage information from the last chunk if available + chunk_usage = getattr(last_chunk, "usage", None) + if isinstance(chunk_usage, dict): + for key in aggregated_usage: + aggregated_usage[key] += chunk_usage.get(key, 0) + choices = getattr(last_chunk, "choices", []) if choices and len(choices) > 0: choice = choices[0] @@ -406,6 +426,12 @@ class LLM: # --- 7) Check for tool calls in the final response if isinstance(last_chunk, ModelResponse): + # Capture and aggregate usage information from the last chunk if available + chunk_usage = getattr(last_chunk, "usage", None) + if isinstance(chunk_usage, dict): + for key in aggregated_usage: + aggregated_usage[key] += chunk_usage.get(key, 0) + choices = getattr(last_chunk, "choices", []) if choices and len(choices) > 0: choice = choices[0] @@ -418,7 +444,23 @@ class LLM: if tool_result is not None: return tool_result - # --- 8) Emit completion event and return response + # --- 8) Log token usage if available + # Use aggregated usage if any tokens were counted + if any(value > 0 for value in aggregated_usage.values()): + logging.info( + f"Aggregated token usage from streaming response: {aggregated_usage}" + ) + if self.callbacks and len(self.callbacks) > 0: + for callback in self.callbacks: + if hasattr(callback, "log_success_event"): + callback.log_success_event( + kwargs=params, + response_obj={"usage": aggregated_usage}, + start_time=0, + end_time=0, + ) + + # --- 9) Emit completion event and return response self._handle_emit_call_events(full_response, LLMCallType.LLM_CALL) return full_response @@ -465,6 +507,16 @@ class LLM: """ # --- 1) Make the completion call response = litellm.completion(**params) + # Extract usage info – if none is provided, default to zero + usage_info = getattr(response, "usage", None) + if usage_info is None: + usage_info = { + "prompt_tokens": 0, + "completion_tokens": 0, + "total_tokens": 0, + "successful_requests": 0, + "cached_prompt_tokens": 0, + } # --- 2) Extract response message and content response_message = cast(Choices, cast(ModelResponse, response).choices)[ @@ -472,15 +524,29 @@ class LLM: ].message text_response = response_message.content or "" - # --- 3) Check for tool calls + # --- 3) Handle callbacks with usage info + if self.callbacks and len(self.callbacks) > 0: + for callback in self.callbacks: + if hasattr(callback, "log_success_event"): + logging.info( + f"Token usage from non-streaming response: {usage_info}" + ) + callback.log_success_event( + kwargs=params, + response_obj={"usage": usage_info}, + start_time=0, + end_time=0, + ) + + # --- 4) Check for tool calls tool_calls = getattr(response_message, "tool_calls", []) - # --- 4) Handle tool calls if present + # --- 5) Handle tool calls if present tool_result = self._handle_tool_call(tool_calls, available_functions) if tool_result is not None: return tool_result - # --- 5) Emit completion event and return response + # --- 6) Emit completion event and return response self._handle_emit_call_events(text_response, LLMCallType.LLM_CALL) return text_response diff --git a/tests/cassettes/test_crew_kickoff_usage_metrics.yaml b/tests/cassettes/test_crew_kickoff_usage_metrics.yaml deleted file mode 100644 index b51ff6964..000000000 --- a/tests/cassettes/test_crew_kickoff_usage_metrics.yaml +++ /dev/null @@ -1,338 +0,0 @@ -interactions: -- request: - body: '{"messages": [{"role": "system", "content": "You are dog Researcher. You - have a lot of experience with dog.\nYour personal goal is: Express hot takes - on dog.\nTo give my best complete final answer to the task use the exact following - format:\n\nThought: I now can give a great answer\nFinal Answer: Your final - answer must be the great and the most complete as possible, it must be outcome - described.\n\nI MUST use these formats, my job depends on it!"}, {"role": "user", - "content": "\nCurrent Task: Give me an analysis around dog.\n\nThis is the expect - criteria for your final answer: 1 bullet point about dog that''s under 15 words.\nyou - MUST return the actual complete content as the final answer, not a summary.\n\nBegin! - This is VERY important to you, use the tools available and give your best Final - Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": - ["\nObservation:"], "stream": false}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '919' - content-type: - - application/json - host: - - api.openai.com - user-agent: - - OpenAI/Python 1.52.1 - x-stainless-arch: - - arm64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.52.1 - x-stainless-raw-response: - - 'true' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.12.7 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - content: "{\n \"id\": \"chatcmpl-AcdAr57gPSeXoBUvpM7ihR5ocSg8K\",\n \"object\": - \"chat.completion\",\n \"created\": 1733770413,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n - \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": - \"assistant\",\n \"content\": \"I now can give a great answer \\nFinal - Answer: Dogs are loyal companions, enhancing human lives with love and joy.\",\n - \ \"refusal\": null\n },\n \"logprobs\": null,\n \"finish_reason\": - \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": 175,\n \"completion_tokens\": - 24,\n \"total_tokens\": 199,\n \"prompt_tokens_details\": {\n \"cached_tokens\": - 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n - \ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": - 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\": - \"fp_bba3c8e70b\"\n}\n" - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 8ef732d93c754554-ATL - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Mon, 09 Dec 2024 18:53:33 GMT - Server: - - cloudflare - Set-Cookie: - - __cf_bm=zftDGzMKnU559gRET72hds3.GZV1di4sti_Q8aIdqPg-1733770413-1.0.1.1-AXqWYLVe2ClCqIFObmsZXfjYEbJ8Ahbl74TpjGzyxfP1UsSB3HisukLyoXLq52raWViSlB3tLosiLnNEWwWMdw; - path=/; expires=Mon, 09-Dec-24 19:23:33 GMT; domain=.api.openai.com; HttpOnly; - Secure; SameSite=None - - _cfuvid=eff7OIkJ0zWRunpA6z67LHqscmSe6XjNxXiPw1R3xCc-1733770413538-0.0.1.1-604800000; - path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - openai-organization: - - crewai-iuxna1 - openai-processing-ms: - - '476' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '30000' - x-ratelimit-limit-tokens: - - '150000000' - x-ratelimit-remaining-requests: - - '29999' - x-ratelimit-remaining-tokens: - - '149999793' - x-ratelimit-reset-requests: - - 2ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_0d2fbea0013ad1ad0768fcdd457f5be3 - http_version: HTTP/1.1 - status_code: 200 -- request: - body: '{"messages": [{"role": "system", "content": "You are cat Researcher. You - have a lot of experience with cat.\nYour personal goal is: Express hot takes - on cat.\nTo give my best complete final answer to the task use the exact following - format:\n\nThought: I now can give a great answer\nFinal Answer: Your final - answer must be the great and the most complete as possible, it must be outcome - described.\n\nI MUST use these formats, my job depends on it!"}, {"role": "user", - "content": "\nCurrent Task: Give me an analysis around cat.\n\nThis is the expect - criteria for your final answer: 1 bullet point about cat that''s under 15 words.\nyou - MUST return the actual complete content as the final answer, not a summary.\n\nBegin! - This is VERY important to you, use the tools available and give your best Final - Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": - ["\nObservation:"], "stream": false}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '919' - content-type: - - application/json - cookie: - - __cf_bm=zftDGzMKnU559gRET72hds3.GZV1di4sti_Q8aIdqPg-1733770413-1.0.1.1-AXqWYLVe2ClCqIFObmsZXfjYEbJ8Ahbl74TpjGzyxfP1UsSB3HisukLyoXLq52raWViSlB3tLosiLnNEWwWMdw; - _cfuvid=eff7OIkJ0zWRunpA6z67LHqscmSe6XjNxXiPw1R3xCc-1733770413538-0.0.1.1-604800000 - host: - - api.openai.com - user-agent: - - OpenAI/Python 1.52.1 - x-stainless-arch: - - arm64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.52.1 - x-stainless-raw-response: - - 'true' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.12.7 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - content: "{\n \"id\": \"chatcmpl-AcdArzlm4vKRhN7P4mtNE7X3UrCb3\",\n \"object\": - \"chat.completion\",\n \"created\": 1733770413,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n - \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": - \"assistant\",\n \"content\": \"I now can give a great answer \\nFinal - Answer: Cats are independent yet affectionate, making them unique companions - for humans.\",\n \"refusal\": null\n },\n \"logprobs\": null,\n - \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": - 175,\n \"completion_tokens\": 24,\n \"total_tokens\": 199,\n \"prompt_tokens_details\": - {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": - {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": - 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\": - \"fp_818c284075\"\n}\n" - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 8ef732dcfadf4554-ATL - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Mon, 09 Dec 2024 18:53:34 GMT - Server: - - cloudflare - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - openai-organization: - - crewai-iuxna1 - openai-processing-ms: - - '418' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '30000' - x-ratelimit-limit-tokens: - - '150000000' - x-ratelimit-remaining-requests: - - '29999' - x-ratelimit-remaining-tokens: - - '149999793' - x-ratelimit-reset-requests: - - 2ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_483f4fc624bef1034c884b5d0e847aee - http_version: HTTP/1.1 - status_code: 200 -- request: - body: '{"messages": [{"role": "system", "content": "You are apple Researcher. - You have a lot of experience with apple.\nYour personal goal is: Express hot - takes on apple.\nTo give my best complete final answer to the task use the exact - following format:\n\nThought: I now can give a great answer\nFinal Answer: Your - final answer must be the great and the most complete as possible, it must be - outcome described.\n\nI MUST use these formats, my job depends on it!"}, {"role": - "user", "content": "\nCurrent Task: Give me an analysis around apple.\n\nThis - is the expect criteria for your final answer: 1 bullet point about apple that''s - under 15 words.\nyou MUST return the actual complete content as the final answer, - not a summary.\n\nBegin! This is VERY important to you, use the tools available - and give your best Final Answer, your job depends on it!\n\nThought:"}], "model": - "gpt-4o-mini", "stop": ["\nObservation:"], "stream": false}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '929' - content-type: - - application/json - cookie: - - __cf_bm=zftDGzMKnU559gRET72hds3.GZV1di4sti_Q8aIdqPg-1733770413-1.0.1.1-AXqWYLVe2ClCqIFObmsZXfjYEbJ8Ahbl74TpjGzyxfP1UsSB3HisukLyoXLq52raWViSlB3tLosiLnNEWwWMdw; - _cfuvid=eff7OIkJ0zWRunpA6z67LHqscmSe6XjNxXiPw1R3xCc-1733770413538-0.0.1.1-604800000 - host: - - api.openai.com - user-agent: - - OpenAI/Python 1.52.1 - x-stainless-arch: - - arm64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.52.1 - x-stainless-raw-response: - - 'true' - x-stainless-retry-count: - - '0' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.12.7 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - content: "{\n \"id\": \"chatcmpl-AcdAsHWkJ3K1sBl3O6XYMZZ3BhHx4\",\n \"object\": - \"chat.completion\",\n \"created\": 1733770414,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n - \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": - \"assistant\",\n \"content\": \"I now can give a great answer \\nFinal - Answer: Apple consistently innovates, leading the tech industry with flagship - products.\",\n \"refusal\": null\n },\n \"logprobs\": null,\n - \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": - 175,\n \"completion_tokens\": 24,\n \"total_tokens\": 199,\n \"prompt_tokens_details\": - {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": - {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": - 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\": - \"fp_818c284075\"\n}\n" - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 8ef732e028194554-ATL - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Mon, 09 Dec 2024 18:53:34 GMT - Server: - - cloudflare - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - alt-svc: - - h3=":443"; ma=86400 - openai-organization: - - crewai-iuxna1 - openai-processing-ms: - - '393' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '30000' - x-ratelimit-limit-tokens: - - '150000000' - x-ratelimit-remaining-requests: - - '29999' - x-ratelimit-remaining-tokens: - - '149999791' - x-ratelimit-reset-requests: - - 2ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_01df88b62376d0b63fb3fa2761bc9c2b - http_version: HTTP/1.1 - status_code: 200 -version: 1 diff --git a/tests/crew_test.py b/tests/crew_test.py index 8f9f69deb..0a00fe51d 100644 --- a/tests/crew_test.py +++ b/tests/crew_test.py @@ -948,7 +948,7 @@ def test_api_calls_throttling(capsys): moveon.assert_called() -@pytest.mark.vcr(filter_headers=["authorization"]) +# @pytest.mark.vcr(filter_headers=["authorization"]) def test_crew_kickoff_usage_metrics(): inputs = [ {"topic": "dog"}, @@ -960,6 +960,7 @@ def test_crew_kickoff_usage_metrics(): role="{topic} Researcher", goal="Express hot takes on {topic}.", backstory="You have a lot of experience with {topic}.", + llm=LLM(model="gpt-4o"), ) task = Task( @@ -968,12 +969,13 @@ def test_crew_kickoff_usage_metrics(): agent=agent, ) + # Use real LLM calls instead of mocking crew = Crew(agents=[agent], tasks=[task]) results = crew.kickoff_for_each(inputs=inputs) assert len(results) == len(inputs) for result in results: - # Assert that all required keys are in usage_metrics and their values are not None + # Assert that all required keys are in usage_metrics and their values are greater than 0 assert result.token_usage.total_tokens > 0 assert result.token_usage.prompt_tokens > 0 assert result.token_usage.completion_tokens > 0 @@ -3973,3 +3975,5 @@ def test_crew_with_knowledge_sources_works_with_copy(): assert crew_copy.knowledge_sources == crew.knowledge_sources assert len(crew_copy.agents) == len(crew.agents) assert len(crew_copy.tasks) == len(crew.tasks) + + assert len(crew_copy.tasks) == len(crew.tasks) diff --git a/tests/llm_test.py b/tests/llm_test.py index 61aa1aced..79ea56750 100644 --- a/tests/llm_test.py +++ b/tests/llm_test.py @@ -285,6 +285,7 @@ def test_o3_mini_reasoning_effort_medium(): assert isinstance(result, str) assert "Paris" in result + def test_context_window_validation(): """Test that context window validation works correctly.""" # Test valid window size