diff --git a/src/crewai/llm.py b/src/crewai/llm.py index 577cb6a43..7ea8155a6 100644 --- a/src/crewai/llm.py +++ b/src/crewai/llm.py @@ -118,12 +118,12 @@ class LLM: litellm.drop_params = True litellm.set_verbose = False - litellm.callbacks = callbacks + self.set_callbacks(callbacks) def call(self, messages: List[Dict[str, str]], callbacks: List[Any] = []) -> str: with suppress_warnings(): if callbacks and len(callbacks) > 0: - litellm.callbacks = callbacks + self.set_callbacks(callbacks) try: params = { @@ -181,3 +181,15 @@ class LLM: def get_context_window_size(self) -> int: # Only using 75% of the context window size to avoid cutting the message in the middle return int(LLM_CONTEXT_WINDOW_SIZES.get(self.model, 8192) * 0.75) + + def set_callbacks(self, callbacks: List[Any]): + callback_types = [type(callback) for callback in callbacks] + for callback in litellm.success_callback[:]: + if type(callback) in callback_types: + litellm.success_callback.remove(callback) + + for callback in litellm._async_success_callback[:]: + if type(callback) in callback_types: + litellm._async_success_callback.remove(callback) + + litellm.callbacks = callbacks diff --git a/tests/cassettes/test_llm_callback_replacement.yaml b/tests/cassettes/test_llm_callback_replacement.yaml new file mode 100644 index 000000000..7b8f7e707 --- /dev/null +++ b/tests/cassettes/test_llm_callback_replacement.yaml @@ -0,0 +1,205 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "Hello, world!"}], "model": "gpt-4o-mini", + "stream": false}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '101' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.52.1 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.52.1 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.9 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xSwWrcMBS8+ytedY6LvWvYZi8lpZSkBJLSQiChGK307FUi66nSc9Ml7L8H2e56 + l7bQiw8zb8Yzg14yAGG0WINQW8mq8za/+Oqv5MUmXv+8+/Hl3uO3j59u1efreHO+/PAszpKCNo+o + +LfqraLOW2RDbqRVQMmYXMvVsqyWy1VVDERHGm2StZ7zivLOOJMvikWVF6u8fDept2QURrGGhwwA + 4GX4ppxO4y+xhsFrQDqMUbYo1ocjABHIJkTIGE1k6ViczaQix+iG6JdoLb2BS3oGJR1cwSiAHfXA + pOXu/bEwYNNHmcK73toJ3x+SWGp9oE2c+APeGGfitg4oI7n018jkxcDuM4DvQ+P+pITwgTrPNdMT + umRYlqOdmHeeyfOJY2JpZ3gxjXRqVmtkaWw8GkwoqbaoZ+W8ruy1oSMiO6r8Z5a/eY+1jWv/x34m + lELPqGsfUBt12nc+C5ge4b/ODhMPgUXcRcauboxrMfhgxifQ+LrYyEKXi6opRbbPXgEAAP//AwAM + DMWoEAMAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e185b2c1b790303-GRU + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 12 Nov 2024 17:49:00 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=l.QrRLcNZkML_KSfxjir6YCV35B8GNTitBTNh7cPGc4-1731433740-1.0.1.1-j1ejlmykyoI8yk6i6pQjtPoovGzfxI2f5vG6u0EqodQMjCvhbHfNyN_wmYkeT._BMvFi.zDQ8m_PqEHr8tSdEQ; + path=/; expires=Tue, 12-Nov-24 18:19:00 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=jcCDyMK__Fd0V5DMeqt9yXdlKc7Hsw87a1K01pZu9l0-1731433740848-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - user-tqfegqsiobpvvjmn0giaipdq + openai-processing-ms: + - '322' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '199978' + x-ratelimit-reset-requests: + - 8.64s + x-ratelimit-reset-tokens: + - 6ms + x-request-id: + - req_037288753767e763a51a04eae757ca84 + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "user", "content": "Hello, world from another agent!"}], + "model": "gpt-4o-mini", "stream": false}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '120' + content-type: + - application/json + cookie: + - __cf_bm=l.QrRLcNZkML_KSfxjir6YCV35B8GNTitBTNh7cPGc4-1731433740-1.0.1.1-j1ejlmykyoI8yk6i6pQjtPoovGzfxI2f5vG6u0EqodQMjCvhbHfNyN_wmYkeT._BMvFi.zDQ8m_PqEHr8tSdEQ; + _cfuvid=jcCDyMK__Fd0V5DMeqt9yXdlKc7Hsw87a1K01pZu9l0-1731433740848-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.52.1 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.52.1 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.11.9 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAA4xSy27bMBC86yu2PFuBZAt14UvRU5MA7aVAEKAIBJpcSUwoLkuu6jiB/z3QI5aM + tkAvPMzsDGZ2+ZoACKPFDoRqJKvW2/TLD3+z//oiD8dfL7d339zvW125x9zX90/3mVj1Cto/ouJ3 + 1ZWi1ltkQ26kVUDJ2Lvm201ebDbbIh+IljTaXlZ7TgtKW+NMus7WRZpt0/zTpG7IKIxiBz8TAIDX + 4e1zOo3PYgfZ6h1pMUZZo9idhwBEINsjQsZoIkvHYjWTihyjG6Jfo7X0Ab4bhcAEipxDxXAw3IB0 + xA0GkDU6voJrOoCSDm5gNIUjdcCk5fHz0jxg1UXZF3SdtRN+Oqe1VPtA+zjxZ7wyzsSmDCgjuT5Z + ZPJiYE8JwMOwle6iqPCBWs8l0xO63jAvRjsx32JBfpxIJpZ2xjfTJi/dSo0sjY2LrQolVYN6Vs4n + kJ02tCCSRec/w/zNe+xtXP0/9jOhFHpGXfqA2qjLwvNYwP6n/mvsvOMhsIjHyNiWlXE1Bh/M+E8q + X2Z7mel8XVS5SE7JGwAAAP//AwA/cK4yNQMAAA== + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e185b31398a0303-GRU + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Tue, 12 Nov 2024 17:49:02 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - user-tqfegqsiobpvvjmn0giaipdq + openai-processing-ms: + - '889' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - '9998' + x-ratelimit-remaining-tokens: + - '199975' + x-ratelimit-reset-requests: + - 16.489s + x-ratelimit-reset-tokens: + - 7ms + x-request-id: + - req_bde3810b36a4859688e53d1df64bdd20 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/crew_test.py b/tests/crew_test.py index 24b892271..5f39557e0 100644 --- a/tests/crew_test.py +++ b/tests/crew_test.py @@ -1280,10 +1280,10 @@ def test_agent_usage_metrics_are_captured_for_hierarchical_process(): assert result.raw == "Howdy!" assert result.token_usage == UsageMetrics( - total_tokens=2626, - prompt_tokens=2482, - completion_tokens=144, - successful_requests=5, + total_tokens=1673, + prompt_tokens=1562, + completion_tokens=111, + successful_requests=3, ) diff --git a/tests/llm_test.py b/tests/llm_test.py new file mode 100644 index 000000000..e824d54c9 --- /dev/null +++ b/tests/llm_test.py @@ -0,0 +1,30 @@ +import pytest + +from crewai.agents.agent_builder.utilities.base_token_process import TokenProcess +from crewai.llm import LLM +from crewai.utilities.token_counter_callback import TokenCalcHandler + + +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_llm_callback_replacement(): + llm = LLM(model="gpt-4o-mini") + + calc_handler_1 = TokenCalcHandler(token_cost_process=TokenProcess()) + calc_handler_2 = TokenCalcHandler(token_cost_process=TokenProcess()) + + llm.call( + messages=[{"role": "user", "content": "Hello, world!"}], + callbacks=[calc_handler_1], + ) + usage_metrics_1 = calc_handler_1.token_cost_process.get_summary() + + llm.call( + messages=[{"role": "user", "content": "Hello, world from another agent!"}], + callbacks=[calc_handler_2], + ) + usage_metrics_2 = calc_handler_2.token_cost_process.get_summary() + + # The first handler should not have been updated + assert usage_metrics_1.successful_requests == 1 + assert usage_metrics_2.successful_requests == 1 + assert usage_metrics_1 == calc_handler_1.token_cost_process.get_summary()