diff --git a/src/crewai/llm.py b/src/crewai/llm.py index 89ddfe2a1..f30ed080f 100644 --- a/src/crewai/llm.py +++ b/src/crewai/llm.py @@ -5,7 +5,7 @@ import sys import threading import warnings from collections import defaultdict -from contextlib import contextmanager, redirect_stderr, redirect_stdout +from contextlib import contextmanager from typing import ( Any, DefaultDict, @@ -18,7 +18,7 @@ from typing import ( Union, cast, ) - +from datetime import datetime from dotenv import load_dotenv from litellm.types.utils import ChatCompletionDeltaToolCall from pydantic import BaseModel, Field @@ -30,6 +30,11 @@ from crewai.utilities.events.llm_events import ( LLMCallType, LLMStreamChunkEvent, ) +from crewai.utilities.events.tool_usage_events import ( + ToolUsageStartedEvent, + ToolUsageFinishedEvent, + ToolUsageErrorEvent, +) with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) @@ -833,7 +838,26 @@ class LLM(BaseLLM): fn = available_functions[function_name] # --- 3.2) Execute function + assert hasattr(crewai_event_bus, "emit") + started_at = datetime.now() + crewai_event_bus.emit( + self, + event=ToolUsageStartedEvent( + tool_name=function_name, + tool_args=function_args, + ), + ) result = fn(**function_args) + crewai_event_bus.emit( + self, + event=ToolUsageFinishedEvent( + output=result, + tool_name=function_name, + tool_args=function_args, + started_at=started_at, + finished_at=datetime.now(), + ), + ) # --- 3.3) Emit success event self._handle_emit_call_events(result, LLMCallType.TOOL_CALL) @@ -849,6 +873,14 @@ class LLM(BaseLLM): self, event=LLMCallFailedEvent(error=f"Tool execution error: {str(e)}"), ) + crewai_event_bus.emit( + self, + event=ToolUsageErrorEvent( + tool_name=function_name, + tool_args=function_args, + error=f"Tool execution error: {str(e)}" + ), + ) return None def call( diff --git a/src/crewai/utilities/events/tool_usage_events.py b/src/crewai/utilities/events/tool_usage_events.py index 8ab22f667..13de0d519 100644 --- a/src/crewai/utilities/events/tool_usage_events.py +++ b/src/crewai/utilities/events/tool_usage_events.py @@ -7,11 +7,11 @@ from .base_events import BaseEvent class ToolUsageEvent(BaseEvent): """Base event for tool usage tracking""" - agent_key: str - agent_role: str + agent_key: Optional[str] = None + agent_role: Optional[str] = None tool_name: str tool_args: Dict[str, Any] | str - tool_class: str + tool_class: Optional[str] = None run_attempts: int | None = None delegations: int | None = None agent: Optional[Any] = None diff --git a/tests/cassettes/test_handle_streaming_tool_calls_with_error.yaml b/tests/cassettes/test_handle_streaming_tool_calls_with_error.yaml new file mode 100644 index 000000000..82c93d658 --- /dev/null +++ b/tests/cassettes/test_handle_streaming_tool_calls_with_error.yaml @@ -0,0 +1,143 @@ +interactions: +- request: + body: '{"messages": [{"role": "user", "content": "What is the weather in New York?"}], + "model": "gpt-4o", "stop": [], "stream": true, "stream_options": {"include_usage": + true}, "tools": [{"type": "function", "function": {"name": "get_weather", "description": + "Get the current weather in a given location", "parameters": {"type": "object", + "properties": {"location": {"type": "string", "description": "The city and state, + e.g. San Francisco, CA"}}, "required": ["location"]}}}]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '470' + content-type: + - application/json + cookie: + - _cfuvid=3UeEmz_rnmsoZxrVUv32u35gJOi766GDWNe5_RTjiPk-1736537376739-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.68.2 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.68.2 + x-stainless-raw-response: + - 'true' + x-stainless-read-timeout: + - '600.0' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.9 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-BcY6NFDeu4HFOAIarpwSNAUEMuPTg","object":"chat.completion.chunk","created":1748527251,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_07871e2ad8","choices":[{"index":0,"delta":{"role":"assistant","content":null,"tool_calls":[{"index":0,"id":"call_UkMsNK0RTJ1nlT19WqgLJYV9","type":"function","function":{"name":"get_weather","arguments":""}}],"refusal":null},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BcY6NFDeu4HFOAIarpwSNAUEMuPTg","object":"chat.completion.chunk","created":1748527251,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_07871e2ad8","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"{\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BcY6NFDeu4HFOAIarpwSNAUEMuPTg","object":"chat.completion.chunk","created":1748527251,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_07871e2ad8","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"location"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BcY6NFDeu4HFOAIarpwSNAUEMuPTg","object":"chat.completion.chunk","created":1748527251,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_07871e2ad8","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\":\""}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BcY6NFDeu4HFOAIarpwSNAUEMuPTg","object":"chat.completion.chunk","created":1748527251,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_07871e2ad8","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"New"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BcY6NFDeu4HFOAIarpwSNAUEMuPTg","object":"chat.completion.chunk","created":1748527251,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_07871e2ad8","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + York"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BcY6NFDeu4HFOAIarpwSNAUEMuPTg","object":"chat.completion.chunk","created":1748527251,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_07871e2ad8","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":","}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BcY6NFDeu4HFOAIarpwSNAUEMuPTg","object":"chat.completion.chunk","created":1748527251,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_07871e2ad8","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":" + NY"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BcY6NFDeu4HFOAIarpwSNAUEMuPTg","object":"chat.completion.chunk","created":1748527251,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_07871e2ad8","choices":[{"index":0,"delta":{"tool_calls":[{"index":0,"function":{"arguments":"\"}"}}]},"logprobs":null,"finish_reason":null}],"usage":null} + + + data: {"id":"chatcmpl-BcY6NFDeu4HFOAIarpwSNAUEMuPTg","object":"chat.completion.chunk","created":1748527251,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_07871e2ad8","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"tool_calls"}],"usage":null} + + + data: {"id":"chatcmpl-BcY6NFDeu4HFOAIarpwSNAUEMuPTg","object":"chat.completion.chunk","created":1748527251,"model":"gpt-4o-2024-08-06","service_tier":"default","system_fingerprint":"fp_07871e2ad8","choices":[],"usage":{"prompt_tokens":68,"completion_tokens":17,"total_tokens":85,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}} + + + data: [DONE] + + + ' + headers: + CF-RAY: + - 947685373af8a435-GRU + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Thu, 29 May 2025 14:00:51 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=fFoq7oCHLgmljA4hsHWxTGHMEWJ.0t1XTuDptZPPkOc-1748527251-1.0.1.1-PP3Hd7XzA4AQFn0JQWjuQdhFwey0Pj9maUWKfFG16Bkl69Uk65A8XKN73UbsvO327TruwxameKb_m_HDePCR.YN0TZlE8Pu45WsA9shDwKY; + path=/; expires=Thu, 29-May-25 14:30:51 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=ut1CVX5GOYnv03fiV2Dsv7cm5soJmwgSutkPAEuVXWg-1748527251565-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '332' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-envoy-upstream-service-time: + - '334' + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '30000000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '29999989' + x-ratelimit-reset-requests: + - 6ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_1dc91fc964a8d23ee023693400e5c181 + status: + code: 200 + message: OK +version: 1 diff --git a/tests/cassettes/test_llm_callback_replacement.yaml b/tests/cassettes/test_llm_callback_replacement.yaml index 4c3187128..fb1c19642 100644 --- a/tests/cassettes/test_llm_callback_replacement.yaml +++ b/tests/cassettes/test_llm_callback_replacement.yaml @@ -202,4 +202,63 @@ interactions: - req_366bcd7dfe94e2a2b5640fd9bb1c5a6b http_version: HTTP/1.1 status_code: 200 +- request: + body: !!binary | + CtcMCiQKIgoMc2VydmljZS5uYW1lEhIKEGNyZXdBSS10ZWxlbWV0cnkSrgwKEgoQY3Jld2FpLnRl + bGVtZXRyeRKUCAoQu3w5ZNCcMWutYN9ACENEihIIIWUtKzKLQXoqDENyZXcgQ3JlYXRlZDABOcjc + jv4SBEQYQWg/lv4SBEQYShsKDmNyZXdhaV92ZXJzaW9uEgkKBzAuMTIwLjFKGgoOcHl0aG9uX3Zl + cnNpb24SCAoGMy4xMi45Si4KCGNyZXdfa2V5EiIKIDY5NDY1NGEzMThmNzE5ODgzYzA2ZjhlNmQ5 + YTc1NDlmSjEKB2NyZXdfaWQSJgokMjI4NzU3NTAtYjIwMC00MTI4LWJmYjUtYTFmNTFjNDhlNDk5 + ShwKDGNyZXdfcHJvY2VzcxIMCgpzZXF1ZW50aWFsShEKC2NyZXdfbWVtb3J5EgIQAEoaChRjcmV3 + X251bWJlcl9vZl90YXNrcxICGAFKGwoVY3Jld19udW1iZXJfb2ZfYWdlbnRzEgIYAUo6ChBjcmV3 + X2ZpbmdlcnByaW50EiYKJDBhZGQxM2U2LTBhYWQtNDUyNS1iYTE0LWZhMDUzZGM2ZjE0ZUo7Chtj + cmV3X2ZpbmdlcnByaW50X2NyZWF0ZWRfYXQSHAoaMjAyNS0wNS0yOVQxMDo1NzoxNC45NTE4MTlK + zAIKC2NyZXdfYWdlbnRzErwCCrkCW3sia2V5IjogIjU1ODY5YmNiMTYzMjNlNzEyOWQyNTIzNjJj + ODU1ZGE2IiwgImlkIjogIjJiY2UyZTE0LWIyN2UtNDM1MC1iZmIyLWE1YTNkMTRmYTJhMCIsICJy + b2xlIjogIlNheSBIaSIsICJ2ZXJib3NlPyI6IGZhbHNlLCAibWF4X2l0ZXIiOiAyNSwgIm1heF9y + cG0iOiBudWxsLCAiZnVuY3Rpb25fY2FsbGluZ19sbG0iOiAiIiwgImxsbSI6ICJ0ZXN0LW1vZGVs + IiwgImRlbGVnYXRpb25fZW5hYmxlZD8iOiBmYWxzZSwgImFsbG93X2NvZGVfZXhlY3V0aW9uPyI6 + IGZhbHNlLCAibWF4X3JldHJ5X2xpbWl0IjogMiwgInRvb2xzX25hbWVzIjogW119XUr7AQoKY3Jl + d190YXNrcxLsAQrpAVt7ImtleSI6ICJkZTI5NDBmMDZhZDhhNDE2YzI4Y2MwZjI2MTBmMTgwYiIs + ICJpZCI6ICJiM2MyMzNkZC1kNDk2LTQ1YjQtYWFkMy1kYzYyZGI3ZjJiZWEiLCAiYXN5bmNfZXhl + Y3V0aW9uPyI6IGZhbHNlLCAiaHVtYW5faW5wdXQ/IjogZmFsc2UsICJhZ2VudF9yb2xlIjogIlNh + eSBIaSIsICJhZ2VudF9rZXkiOiAiNTU4NjliY2IxNjMyM2U3MTI5ZDI1MjM2MmM4NTVkYTYiLCAi + dG9vbHNfbmFtZXMiOiBbXX1degIYAYUBAAEAABKABAoQaW1V2ASOUN5hjxpKH5WT+BIIe6lsRrYF + 84MqDFRhc2sgQ3JlYXRlZDABOfA/rv4SBEQYQSC1rv4SBEQYSi4KCGNyZXdfa2V5EiIKIDY5NDY1 + NGEzMThmNzE5ODgzYzA2ZjhlNmQ5YTc1NDlmSjEKB2NyZXdfaWQSJgokMjI4NzU3NTAtYjIwMC00 + MTI4LWJmYjUtYTFmNTFjNDhlNDk5Si4KCHRhc2tfa2V5EiIKIGRlMjk0MGYwNmFkOGE0MTZjMjhj + YzBmMjYxMGYxODBiSjEKB3Rhc2tfaWQSJgokYjNjMjMzZGQtZDQ5Ni00NWI0LWFhZDMtZGM2MmRi + N2YyYmVhSjoKEGNyZXdfZmluZ2VycHJpbnQSJgokMGFkZDEzZTYtMGFhZC00NTI1LWJhMTQtZmEw + NTNkYzZmMTRlSjoKEHRhc2tfZmluZ2VycHJpbnQSJgokZGVlNDA1YjgtMTkxNC00N2NkLTlkMTgt + ZTdmZDA0NjFkOGE4SjsKG3Rhc2tfZmluZ2VycHJpbnRfY3JlYXRlZF9hdBIcChoyMDI1LTA1LTI5 + VDEwOjU3OjE0Ljk1MTc4M0o7ChFhZ2VudF9maW5nZXJwcmludBImCiRiNWQ0NGNlMS00NGRjLTQ0 + YzYtYTU1YS0xODZhM2QxZmU2YjJ6AhgBhQEAAQAA + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '1626' + Content-Type: + - application/x-protobuf + User-Agent: + - OTel-OTLP-Exporter-Python/1.31.1 + method: POST + uri: https://telemetry.crewai.com:4319/v1/traces + response: + body: + string: "\n\0" + headers: + Content-Length: + - '2' + Content-Type: + - application/x-protobuf + Date: + - Thu, 29 May 2025 13:57:17 GMT + status: + code: 200 + message: OK version: 1 diff --git a/tests/llm_test.py b/tests/llm_test.py index f80637c60..36fc2dfcb 100644 --- a/tests/llm_test.py +++ b/tests/llm_test.py @@ -2,7 +2,6 @@ import os from time import sleep from unittest.mock import MagicMock, patch -import litellm import pytest from pydantic import BaseModel @@ -11,7 +10,11 @@ from crewai.llm import CONTEXT_WINDOW_USAGE_RATIO, LLM from crewai.utilities.events import ( LLMCallCompletedEvent, LLMStreamChunkEvent, + ToolUsageStartedEvent, + ToolUsageFinishedEvent, + ToolUsageErrorEvent, ) + from crewai.utilities.token_counter_callback import TokenCalcHandler @@ -222,7 +225,7 @@ def test_get_custom_llm_provider_gemini(): def test_get_custom_llm_provider_openai(): llm = LLM(model="gpt-4") - assert llm._get_custom_llm_provider() == None + assert llm._get_custom_llm_provider() is None def test_validate_call_params_supported(): @@ -511,12 +514,18 @@ def assert_event_count( expected_completed_tool_call: int = 0, expected_stream_chunk: int = 0, expected_completed_llm_call: int = 0, + expected_tool_usage_started: int = 0, + expected_tool_usage_finished: int = 0, + expected_tool_usage_error: int = 0, expected_final_chunk_result: str = "", ): event_count = { "completed_tool_call": 0, "stream_chunk": 0, "completed_llm_call": 0, + "tool_usage_started": 0, + "tool_usage_finished": 0, + "tool_usage_error": 0, } final_chunk_result = "" for _call in mock_emit.call_args_list: @@ -535,12 +544,21 @@ def assert_event_count( and event.call_type.value == "llm_call" ): event_count["completed_llm_call"] += 1 + elif isinstance(event, ToolUsageStartedEvent): + event_count["tool_usage_started"] += 1 + elif isinstance(event, ToolUsageFinishedEvent): + event_count["tool_usage_finished"] += 1 + elif isinstance(event, ToolUsageErrorEvent): + event_count["tool_usage_error"] += 1 else: continue assert event_count["completed_tool_call"] == expected_completed_tool_call assert event_count["stream_chunk"] == expected_stream_chunk assert event_count["completed_llm_call"] == expected_completed_llm_call + assert event_count["tool_usage_started"] == expected_tool_usage_started + assert event_count["tool_usage_finished"] == expected_tool_usage_finished + assert event_count["tool_usage_error"] == expected_tool_usage_error assert final_chunk_result == expected_final_chunk_result @@ -574,6 +592,34 @@ def test_handle_streaming_tool_calls(get_weather_tool_schema, mock_emit): expected_completed_tool_call=1, expected_stream_chunk=10, expected_completed_llm_call=1, + expected_tool_usage_started=1, + expected_tool_usage_finished=1, + expected_final_chunk_result=expected_final_chunk_result, + ) + +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_handle_streaming_tool_calls_with_error(get_weather_tool_schema, mock_emit): + def get_weather_error(location): + raise Exception("Error") + + llm = LLM(model="openai/gpt-4o", stream=True) + response = llm.call( + messages=[ + {"role": "user", "content": "What is the weather in New York?"}, + ], + tools=[get_weather_tool_schema], + available_functions={ + "get_weather": get_weather_error + }, + ) + assert response == "" + expected_final_chunk_result = '{"location":"New York, NY"}' + assert_event_count( + mock_emit=mock_emit, + expected_stream_chunk=9, + expected_completed_llm_call=1, + expected_tool_usage_started=1, + expected_tool_usage_error=1, expected_final_chunk_result=expected_final_chunk_result, )