Merge branch 'main' into devin/1740154466-add-o3-mini-context-window

2026-01-27 09:08:14 +00:00 · 2025-02-25 15:30:01 -05:00
parent ed1bfe7b6f b4e2db0306
commit 46dfa8577c
28 changed files with 2345 additions and 1624 deletions
--- a/tests/agent_test.py
+++ b/tests/agent_test.py
@@ -915,8 +915,6 @@ def test_tool_result_as_answer_is_the_final_answer_for_the_agent():

@pytest.mark.vcr(filter_headers=["authorization"])
 def test_tool_usage_information_is_appended_to_agent():
-    from datetime import UTC, datetime
-
    from crewai.tools import BaseTool

    class MyCustomTool(BaseTool):
@@ -926,36 +924,30 @@ def test_tool_usage_information_is_appended_to_agent():
        def _run(self) -> str:
            return "Howdy!"

-    fixed_datetime = datetime(2025, 2, 10, 12, 0, 0, tzinfo=UTC)
-    with patch("datetime.datetime") as mock_datetime:
-        mock_datetime.now.return_value = fixed_datetime
-        mock_datetime.side_effect = lambda *args, **kw: datetime(*args, **kw)
+    agent1 = Agent(
+        role="Friendly Neighbor",
+        goal="Make everyone feel welcome",
+        backstory="You are the friendly neighbor",
+        tools=[MyCustomTool(result_as_answer=True)],
+    )

-        agent1 = Agent(
-            role="Friendly Neighbor",
-            goal="Make everyone feel welcome",
-            backstory="You are the friendly neighbor",
-            tools=[MyCustomTool(result_as_answer=True)],
-        )
+    greeting = Task(
+        description="Say an appropriate greeting.",
+        expected_output="The greeting.",
+        agent=agent1,
+    )
+    tasks = [greeting]
+    crew = Crew(agents=[agent1], tasks=tasks)

-        greeting = Task(
-            description="Say an appropriate greeting.",
-            expected_output="The greeting.",
-            agent=agent1,
-        )
-        tasks = [greeting]
-        crew = Crew(agents=[agent1], tasks=tasks)
-
-        crew.kickoff()
-        assert agent1.tools_results == [
-            {
-                "result": "Howdy!",
-                "tool_name": "Decide Greetings",
-                "tool_args": {},
-                "result_as_answer": True,
-                "start_time": fixed_datetime,
-            }
-        ]
+    crew.kickoff()
+    assert agent1.tools_results == [
+        {
+            "result": "Howdy!",
+            "tool_name": "Decide Greetings",
+            "tool_args": {},
+            "result_as_answer": True,
+        }
+    ]


 def test_agent_definition_based_on_dict():
--- a/tests/crew_test.py
+++ b/tests/crew_test.py
@@ -833,6 +833,12 @@ def test_crew_verbose_output(capsys):

    crew.kickoff()
    captured = capsys.readouterr()
+
+    # Filter out event listener logs (lines starting with '[')
+    filtered_output = "\n".join(
+        line for line in captured.out.split("\n") if not line.startswith("[")
+    )
+
    expected_strings = [
        "\x1b[1m\x1b[95m# Agent:\x1b[00m \x1b[1m\x1b[92mResearcher",
        "\x1b[00m\n\x1b[95m## Task:\x1b[00m \x1b[92mResearch AI advancements.",
@@ -845,27 +851,19 @@ def test_crew_verbose_output(capsys):
    ]

    for expected_string in expected_strings:
-        assert expected_string in captured.out
+        assert expected_string in filtered_output

    # Now test with verbose set to False
    crew.verbose = False
    crew._logger = Logger(verbose=False)
    crew.kickoff()
-    expected_listener_logs = [
-        "[🚀 CREW 'CREW' STARTED]",
-        "[📋 TASK STARTED: RESEARCH AI ADVANCEMENTS.]",
-        "[🤖 AGENT 'RESEARCHER' STARTED TASK]",
-        "[✅ AGENT 'RESEARCHER' COMPLETED TASK]",
-        "[✅ TASK COMPLETED: RESEARCH AI ADVANCEMENTS.]",
-        "[📋 TASK STARTED: WRITE ABOUT AI IN HEALTHCARE.]",
-        "[🤖 AGENT 'SENIOR WRITER' STARTED TASK]",
-        "[✅ AGENT 'SENIOR WRITER' COMPLETED TASK]",
-        "[✅ TASK COMPLETED: WRITE ABOUT AI IN HEALTHCARE.]",
-        "[✅ CREW 'CREW' COMPLETED]",
-    ]
    captured = capsys.readouterr()
-    for log in expected_listener_logs:
-        assert log in captured.out
+    filtered_output = "\n".join(
+        line
+        for line in captured.out.split("\n")
+        if not line.startswith("[") and line.strip() and not line.startswith("\x1b")
+    )
+    assert filtered_output == ""


@pytest.mark.vcr(filter_headers=["authorization"])
--- a/tests/traces/test_unified_trace_controller.py
+++ b/tests/traces/test_unified_trace_controller.py
@@ -1,360 +0,0 @@
-import os
-from datetime import UTC, datetime
-from unittest.mock import MagicMock, patch
-from uuid import UUID
-
-import pytest
-
-from crewai.traces.context import TraceContext
-from crewai.traces.enums import CrewType, RunType, TraceType
-from crewai.traces.models import (
-    CrewTrace,
-    FlowStepIO,
-    LLMRequest,
-    LLMResponse,
-)
-from crewai.traces.unified_trace_controller import (
-    UnifiedTraceController,
-    init_crew_main_trace,
-    init_flow_main_trace,
-    should_trace,
-    trace_flow_step,
-    trace_llm_call,
-)
-
-
-class TestUnifiedTraceController:
-    @pytest.fixture
-    def basic_trace_controller(self):
-        return UnifiedTraceController(
-            trace_type=TraceType.LLM_CALL,
-            run_type=RunType.KICKOFF,
-            crew_type=CrewType.CREW,
-            run_id="test-run-id",
-            agent_role="test-agent",
-            task_name="test-task",
-            task_description="test description",
-            task_id="test-task-id",
-        )
-
-    def test_initialization(self, basic_trace_controller):
-        """Test basic initialization of UnifiedTraceController"""
-        assert basic_trace_controller.trace_type == TraceType.LLM_CALL
-        assert basic_trace_controller.run_type == RunType.KICKOFF
-        assert basic_trace_controller.crew_type == CrewType.CREW
-        assert basic_trace_controller.run_id == "test-run-id"
-        assert basic_trace_controller.agent_role == "test-agent"
-        assert basic_trace_controller.task_name == "test-task"
-        assert basic_trace_controller.task_description == "test description"
-        assert basic_trace_controller.task_id == "test-task-id"
-        assert basic_trace_controller.status == "running"
-        assert isinstance(UUID(basic_trace_controller.trace_id), UUID)
-
-    def test_start_trace(self, basic_trace_controller):
-        """Test starting a trace"""
-        result = basic_trace_controller.start_trace()
-        assert result == basic_trace_controller
-        assert basic_trace_controller.start_time is not None
-        assert isinstance(basic_trace_controller.start_time, datetime)
-
-    def test_end_trace_success(self, basic_trace_controller):
-        """Test ending a trace successfully"""
-        basic_trace_controller.start_trace()
-        basic_trace_controller.end_trace(result={"test": "result"})
-
-        assert basic_trace_controller.end_time is not None
-        assert basic_trace_controller.status == "completed"
-        assert basic_trace_controller.error is None
-        assert basic_trace_controller.context.get("response") == {"test": "result"}
-
-    def test_end_trace_with_error(self, basic_trace_controller):
-        """Test ending a trace with an error"""
-        basic_trace_controller.start_trace()
-        basic_trace_controller.end_trace(error="Test error occurred")
-
-        assert basic_trace_controller.end_time is not None
-        assert basic_trace_controller.status == "error"
-        assert basic_trace_controller.error == "Test error occurred"
-
-    def test_add_child_trace(self, basic_trace_controller):
-        """Test adding a child trace"""
-        child_trace = {"id": "child-1", "type": "test"}
-        basic_trace_controller.add_child_trace(child_trace)
-        assert len(basic_trace_controller.children) == 1
-        assert basic_trace_controller.children[0] == child_trace
-
-    def test_to_crew_trace_llm_call(self):
-        """Test converting to CrewTrace for LLM call"""
-        test_messages = [{"role": "user", "content": "test"}]
-        test_response = {
-            "content": "test response",
-            "finish_reason": "stop",
-        }
-
-        controller = UnifiedTraceController(
-            trace_type=TraceType.LLM_CALL,
-            run_type=RunType.KICKOFF,
-            crew_type=CrewType.CREW,
-            run_id="test-run-id",
-            context={
-                "messages": test_messages,
-                "temperature": 0.7,
-                "max_tokens": 100,
-            },
-        )
-
-        # Set model and messages in the context
-        controller.context["model"] = "gpt-4"
-        controller.context["messages"] = test_messages
-
-        controller.start_trace()
-        controller.end_trace(result=test_response)
-
-        crew_trace = controller.to_crew_trace()
-        assert isinstance(crew_trace, CrewTrace)
-        assert isinstance(crew_trace.request, LLMRequest)
-        assert isinstance(crew_trace.response, LLMResponse)
-        assert crew_trace.request.model == "gpt-4"
-        assert crew_trace.request.messages == test_messages
-        assert crew_trace.response.content == test_response["content"]
-        assert crew_trace.response.finish_reason == test_response["finish_reason"]
-
-    def test_to_crew_trace_flow_step(self):
-        """Test converting to CrewTrace for flow step"""
-        flow_step_data = {
-            "function_name": "test_function",
-            "inputs": {"param1": "value1"},
-            "metadata": {"meta": "data"},
-        }
-
-        controller = UnifiedTraceController(
-            trace_type=TraceType.FLOW_STEP,
-            run_type=RunType.KICKOFF,
-            crew_type=CrewType.FLOW,
-            run_id="test-run-id",
-            flow_step=flow_step_data,
-        )
-
-        controller.start_trace()
-        controller.end_trace(result="test result")
-
-        crew_trace = controller.to_crew_trace()
-        assert isinstance(crew_trace, CrewTrace)
-        assert isinstance(crew_trace.flow_step, FlowStepIO)
-        assert crew_trace.flow_step.function_name == "test_function"
-        assert crew_trace.flow_step.inputs == {"param1": "value1"}
-        assert crew_trace.flow_step.outputs == {"result": "test result"}
-
-    def test_should_trace(self):
-        """Test should_trace function"""
-        with patch.dict(os.environ, {"CREWAI_ENABLE_TRACING": "true"}):
-            assert should_trace() is True
-
-        with patch.dict(os.environ, {"CREWAI_ENABLE_TRACING": "false"}):
-            assert should_trace() is False
-
-        with patch.dict(os.environ, clear=True):
-            assert should_trace() is False
-
-    @pytest.mark.asyncio
-    async def test_trace_flow_step_decorator(self):
-        """Test trace_flow_step decorator"""
-
-        class TestFlow:
-            flow_id = "test-flow-id"
-
-            @trace_flow_step
-            async def test_method(self, method_name, method, *args, **kwargs):
-                return "test result"
-
-        with patch.dict(os.environ, {"CREWAI_ENABLE_TRACING": "true"}):
-            flow = TestFlow()
-            result = await flow.test_method("test_method", lambda x: x, arg1="value1")
-            assert result == "test result"
-
-    def test_trace_llm_call_decorator(self):
-        """Test trace_llm_call decorator"""
-
-        class TestLLM:
-            model = "gpt-4"
-            temperature = 0.7
-            max_tokens = 100
-            stop = None
-
-            def _get_execution_context(self):
-                return MagicMock(), MagicMock()
-
-            def _get_new_messages(self, messages):
-                return messages
-
-            def _get_new_tool_results(self, agent):
-                return []
-
-            @trace_llm_call
-            def test_method(self, params):
-                return {
-                    "choices": [
-                        {
-                            "message": {"content": "test response"},
-                            "finish_reason": "stop",
-                        }
-                    ],
-                    "usage": {
-                        "total_tokens": 50,
-                        "prompt_tokens": 20,
-                        "completion_tokens": 30,
-                    },
-                }
-
-        with patch.dict(os.environ, {"CREWAI_ENABLE_TRACING": "true"}):
-            llm = TestLLM()
-            result = llm.test_method({"messages": []})
-            assert result["choices"][0]["message"]["content"] == "test response"
-
-    def test_init_crew_main_trace_kickoff(self):
-        """Test init_crew_main_trace in kickoff mode"""
-        trace_context = None
-
-        class TestCrew:
-            id = "test-crew-id"
-            _test = False
-            _train = False
-
-        @init_crew_main_trace
-        def test_method(self):
-            nonlocal trace_context
-            trace_context = TraceContext.get_current()
-            return "test result"
-
-        with patch.dict(os.environ, {"CREWAI_ENABLE_TRACING": "true"}):
-            crew = TestCrew()
-            result = test_method(crew)
-            assert result == "test result"
-            assert trace_context is not None
-            assert trace_context.trace_type == TraceType.LLM_CALL
-            assert trace_context.run_type == RunType.KICKOFF
-            assert trace_context.crew_type == CrewType.CREW
-            assert trace_context.run_id == str(crew.id)
-
-    def test_init_crew_main_trace_test_mode(self):
-        """Test init_crew_main_trace in test mode"""
-        trace_context = None
-
-        class TestCrew:
-            id = "test-crew-id"
-            _test = True
-            _train = False
-
-        @init_crew_main_trace
-        def test_method(self):
-            nonlocal trace_context
-            trace_context = TraceContext.get_current()
-            return "test result"
-
-        with patch.dict(os.environ, {"CREWAI_ENABLE_TRACING": "true"}):
-            crew = TestCrew()
-            result = test_method(crew)
-            assert result == "test result"
-            assert trace_context is not None
-            assert trace_context.run_type == RunType.TEST
-
-    def test_init_crew_main_trace_train_mode(self):
-        """Test init_crew_main_trace in train mode"""
-        trace_context = None
-
-        class TestCrew:
-            id = "test-crew-id"
-            _test = False
-            _train = True
-
-        @init_crew_main_trace
-        def test_method(self):
-            nonlocal trace_context
-            trace_context = TraceContext.get_current()
-            return "test result"
-
-        with patch.dict(os.environ, {"CREWAI_ENABLE_TRACING": "true"}):
-            crew = TestCrew()
-            result = test_method(crew)
-            assert result == "test result"
-            assert trace_context is not None
-            assert trace_context.run_type == RunType.TRAIN
-
-    @pytest.mark.asyncio
-    async def test_init_flow_main_trace(self):
-        """Test init_flow_main_trace decorator"""
-        trace_context = None
-        test_inputs = {"test": "input"}
-
-        class TestFlow:
-            flow_id = "test-flow-id"
-
-            @init_flow_main_trace
-            async def test_method(self, **kwargs):
-                nonlocal trace_context
-                trace_context = TraceContext.get_current()
-                # Verify the context is set during execution
-                assert trace_context.context["context"]["inputs"] == test_inputs
-                return "test result"
-
-        with patch.dict(os.environ, {"CREWAI_ENABLE_TRACING": "true"}):
-            flow = TestFlow()
-            result = await flow.test_method(inputs=test_inputs)
-            assert result == "test result"
-            assert trace_context is not None
-            assert trace_context.trace_type == TraceType.FLOW_STEP
-            assert trace_context.crew_type == CrewType.FLOW
-            assert trace_context.run_type == RunType.KICKOFF
-            assert trace_context.run_id == str(flow.flow_id)
-            assert trace_context.context["context"]["inputs"] == test_inputs
-
-    def test_trace_context_management(self):
-        """Test TraceContext management"""
-        trace1 = UnifiedTraceController(
-            trace_type=TraceType.LLM_CALL,
-            run_type=RunType.KICKOFF,
-            crew_type=CrewType.CREW,
-            run_id="test-run-1",
-        )
-
-        trace2 = UnifiedTraceController(
-            trace_type=TraceType.FLOW_STEP,
-            run_type=RunType.TEST,
-            crew_type=CrewType.FLOW,
-            run_id="test-run-2",
-        )
-
-        # Test that context is initially empty
-        assert TraceContext.get_current() is None
-
-        # Test setting and getting context
-        with TraceContext.set_current(trace1):
-            assert TraceContext.get_current() == trace1
-
-            # Test nested context
-            with TraceContext.set_current(trace2):
-                assert TraceContext.get_current() == trace2
-
-            # Test context restoration after nested block
-            assert TraceContext.get_current() == trace1
-
-        # Test context cleanup after with block
-        assert TraceContext.get_current() is None
-
-    def test_trace_context_error_handling(self):
-        """Test TraceContext error handling"""
-        trace = UnifiedTraceController(
-            trace_type=TraceType.LLM_CALL,
-            run_type=RunType.KICKOFF,
-            crew_type=CrewType.CREW,
-            run_id="test-run",
-        )
-
-        # Test that context is properly cleaned up even if an error occurs
-        try:
-            with TraceContext.set_current(trace):
-                raise ValueError("Test error")
-        except ValueError:
-            pass
-
-        assert TraceContext.get_current() is None
--- a/tests/utilities/cassettes/test_crew_emits_end_task_event.yaml
+++ b/tests/utilities/cassettes/test_crew_emits_end_task_event.yaml
--- a/tests/utilities/cassettes/test_crew_emits_test_kickoff_type_event.yaml
+++ b/tests/utilities/cassettes/test_crew_emits_test_kickoff_type_event.yaml
@@ -0,0 +1,236 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are base_agent. You are
+      a helpful assistant that just says hi\nYour personal goal is: Just say hi\nTo
+      give my best complete final answer to the task respond using the exact following
+      format:\n\nThought: I now can give a great answer\nFinal Answer: Your final
+      answer must be the great and the most complete as possible, it must be outcome
+      described.\n\nI MUST use these formats, my job depends on it!"}, {"role": "user",
+      "content": "\nCurrent Task: Just say hi\n\nThis is the expected criteria for
+      your final answer: hi\nyou MUST return the actual complete content as the final
+      answer, not a summary.\n\nBegin! This is VERY important to you, use the tools
+      available and give your best Final Answer, your job depends on it!\n\nThought:"}],
+      "model": "gpt-4o-mini", "stop": ["\nObservation:"]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '838'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.61.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.61.0
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.8
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    content: "{\n  \"id\": \"chatcmpl-B4VsaBZ4ec4b0ab4pkqWgyxTFVVfc\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1740415556,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+      \"assistant\",\n        \"content\": \"I now can give a great answer  \\nFinal
+      Answer: hi\",\n        \"refusal\": null\n      },\n      \"logprobs\": null,\n
+      \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+      161,\n    \"completion_tokens\": 12,\n    \"total_tokens\": 173,\n    \"prompt_tokens_details\":
+      {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+      {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+      \"default\",\n  \"system_fingerprint\": \"fp_7fcd609668\"\n}\n"
+    headers:
+      CF-RAY:
+      - 9170edc5da6f230e-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 24 Feb 2025 16:45:57 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=lvRw4Nyef7N35to64fj2_kHDfbZp0KSFbwgF5chYMRI-1740415557-1.0.1.1-o5BaN1FpBwv5Wq6zIlv0rCB28lk5hVI9wZQWU3pig1jgyAKDkYzTwZ0MlSR6v6TPIX9RfepjrO3.Gk3FEmcVRw;
+        path=/; expires=Mon, 24-Feb-25 17:15:57 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=ySaVoTQvAcQyH5QoJQJDj75e5j8HwGFPOlFMAWEvXJk-1740415557302-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '721'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999808'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_fc3b3bcd4382cddaa3c04ce7003e4857
+    http_version: HTTP/1.1
+    status_code: 200
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Task Execution Evaluator.
+      Evaluator agent for crew evaluation with precise capabilities to evaluate the
+      performance of the agents in the crew based on the tasks they have performed\nYour
+      personal goal is: Your goal is to evaluate the performance of the agents in
+      the crew based on the tasks they have performed using score from 1 to 10 evaluating
+      on completion, quality, and overall performance.\nTo give my best complete final
+      answer to the task respond using the exact following format:\n\nThought: I now
+      can give a great answer\nFinal Answer: Your final answer must be the great and
+      the most complete as possible, it must be outcome described.\n\nI MUST use these
+      formats, my job depends on it!"}, {"role": "user", "content": "\nCurrent Task:
+      Based on the task description and the expected output, compare and evaluate
+      the performance of the agents in the crew based on the Task Output they have
+      performed using score from 1 to 10 evaluating on completion, quality, and overall
+      performance.task_description: Just say hi task_expected_output: hi agent: base_agent
+      agent_goal: Just say hi Task Output: hi\n\nThis is the expected criteria for
+      your final answer: Evaluation Score from 1 to 10 based on the performance of
+      the agents on the tasks\nyou MUST return the actual complete content as the
+      final answer, not a summary.\nEnsure your final answer contains only the content
+      in the following format: {\n  \"quality\": float\n}\n\nEnsure the final output
+      does not include any code block markers like ```json or ```python.\n\nBegin!
+      This is VERY important to you, use the tools available and give your best Final
+      Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop":
+      ["\nObservation:"]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '1765'
+      content-type:
+      - application/json
+      cookie:
+      - __cf_bm=lvRw4Nyef7N35to64fj2_kHDfbZp0KSFbwgF5chYMRI-1740415557-1.0.1.1-o5BaN1FpBwv5Wq6zIlv0rCB28lk5hVI9wZQWU3pig1jgyAKDkYzTwZ0MlSR6v6TPIX9RfepjrO3.Gk3FEmcVRw;
+        _cfuvid=ySaVoTQvAcQyH5QoJQJDj75e5j8HwGFPOlFMAWEvXJk-1740415557302-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.61.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.61.0
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.8
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    content: "{\n  \"id\": \"chatcmpl-B4Vsbd9AsRaJ2exDtWnHAwC8rIjfi\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1740415557,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+      \"assistant\",\n        \"content\": \"I now can give a great answer  \\nFinal
+      Answer: {  \\n  \\\"quality\\\": 10  \\n}  \",\n        \"refusal\": null\n
+      \     },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n    }\n
+      \ ],\n  \"usage\": {\n    \"prompt_tokens\": 338,\n    \"completion_tokens\":
+      22,\n    \"total_tokens\": 360,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+      0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n
+      \     \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+      \"default\",\n  \"system_fingerprint\": \"fp_7fcd609668\"\n}\n"
+    headers:
+      CF-RAY:
+      - 9170edd15bb5230e-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 24 Feb 2025 16:45:58 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '860'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999578'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_fad452c2d10b5fc95809130912b08837
+    http_version: HTTP/1.1
+    status_code: 200
+version: 1
--- a/tests/utilities/cassettes/test_llm_emits_call_failed_event.yaml
+++ b/tests/utilities/cassettes/test_llm_emits_call_failed_event.yaml
@@ -0,0 +1,103 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Hello, how are you?"}], "model":
+      "gpt-4o-mini", "stop": []}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '102'
+      content-type:
+      - application/json
+      cookie:
+      - _cfuvid=IY8ppO70AMHr2skDSUsGh71zqHHdCQCZ3OvkPi26NBc-1740424913267-0.0.1.1-604800000;
+        __cf_bm=fU6K5KZoDmgcEuF8_yWAYKUO5fKHh6q5.wDPnna393g-1740424913-1.0.1.1-2iOaq3JVGWs439V0HxJee0IC9HdJm7dPkeJorD.AGw0YwkngRPM8rrTzn_7ht1BkbOauEezj.wPKcBz18gIYUg
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.61.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.61.0
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.8
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    content: "{\n  \"id\": \"chatcmpl-B4YLA2SrC2rwdVQ3U87G5a0P5lsLw\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1740425016,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+      \"assistant\",\n        \"content\": \"Hello! I'm just a computer program, so
+      I don't have feelings, but I'm here and ready to help you. How can I assist
+      you today?\",\n        \"refusal\": null\n      },\n      \"logprobs\": null,\n
+      \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+      13,\n    \"completion_tokens\": 30,\n    \"total_tokens\": 43,\n    \"prompt_tokens_details\":
+      {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+      {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+      \"default\",\n  \"system_fingerprint\": \"fp_709714d124\"\n}\n"
+    headers:
+      CF-RAY:
+      - 9171d4c0ed44236e-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 24 Feb 2025 19:23:38 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '1954'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999978'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_ea2703502b8827e4297cd2a7bae9d9c8
+    http_version: HTTP/1.1
+    status_code: 200
+version: 1
--- a/tests/utilities/cassettes/test_llm_emits_call_started_event.yaml
+++ b/tests/utilities/cassettes/test_llm_emits_call_started_event.yaml
@@ -0,0 +1,108 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Hello, how are you?"}], "model":
+      "gpt-4o-mini", "stop": []}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '102'
+      content-type:
+      - application/json
+      cookie:
+      - _cfuvid=GefCcEtb_Gem93E4a9Hvt3Xyof1YQZVJAXBb9I6pEUs-1739398417375-0.0.1.1-604800000
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.61.0
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.61.0
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.8
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    content: "{\n  \"id\": \"chatcmpl-B4YJU8IWKGyBQtAyPDRd3SFI2flYR\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1740424912,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+      \"assistant\",\n        \"content\": \"Hello! I'm just a computer program, so
+      I don't have feelings, but I'm here and ready to help you. How can I assist
+      you today?\",\n        \"refusal\": null\n      },\n      \"logprobs\": null,\n
+      \     \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+      13,\n    \"completion_tokens\": 30,\n    \"total_tokens\": 43,\n    \"prompt_tokens_details\":
+      {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+      {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+      \"default\",\n  \"system_fingerprint\": \"fp_7fcd609668\"\n}\n"
+    headers:
+      CF-RAY:
+      - 9171d230d8ed7ae0-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 24 Feb 2025 19:21:53 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=fU6K5KZoDmgcEuF8_yWAYKUO5fKHh6q5.wDPnna393g-1740424913-1.0.1.1-2iOaq3JVGWs439V0HxJee0IC9HdJm7dPkeJorD.AGw0YwkngRPM8rrTzn_7ht1BkbOauEezj.wPKcBz18gIYUg;
+        path=/; expires=Mon, 24-Feb-25 19:51:53 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=IY8ppO70AMHr2skDSUsGh71zqHHdCQCZ3OvkPi26NBc-1740424913267-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '993'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999978'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_d9c4d49185e97b1797061efc1e55d811
+    http_version: HTTP/1.1
+    status_code: 200
+version: 1
--- a/tests/utilities/test_events.py
+++ b/tests/utilities/test_events.py
@@ -1,6 +1,5 @@
-import json
 from datetime import datetime
-from unittest.mock import MagicMock, patch
+from unittest.mock import Mock, patch

 import pytest
 from pydantic import Field
@@ -9,9 +8,9 @@ from crewai.agent import Agent
 from crewai.agents.crew_agent_executor import CrewAgentExecutor
 from crewai.crew import Crew
 from crewai.flow.flow import Flow, listen, start
+from crewai.llm import LLM
 from crewai.task import Task
 from crewai.tools.base_tool import BaseTool
-from crewai.tools.tool_usage import ToolUsage
 from crewai.utilities.events.agent_events import (
    AgentExecutionCompletedEvent,
    AgentExecutionErrorEvent,
@@ -21,8 +20,11 @@ from crewai.utilities.events.crew_events import (
    CrewKickoffCompletedEvent,
    CrewKickoffFailedEvent,
    CrewKickoffStartedEvent,
+    CrewTestCompletedEvent,
+    CrewTestStartedEvent,
 )
 from crewai.utilities.events.crewai_event_bus import crewai_event_bus
+from crewai.utilities.events.event_listener import EventListener
 from crewai.utilities.events.event_types import ToolUsageFinishedEvent
 from crewai.utilities.events.flow_events import (
    FlowCreatedEvent,
@@ -31,6 +33,12 @@ from crewai.utilities.events.flow_events import (
    MethodExecutionFailedEvent,
    MethodExecutionStartedEvent,
 )
+from crewai.utilities.events.llm_events import (
+    LLMCallCompletedEvent,
+    LLMCallFailedEvent,
+    LLMCallStartedEvent,
+    LLMCallType,
+)
 from crewai.utilities.events.task_events import (
    TaskCompletedEvent,
    TaskFailedEvent,
@@ -52,26 +60,35 @@ base_task = Task(
    expected_output="hi",
    agent=base_agent,
 )
+event_listener = EventListener()


@pytest.mark.vcr(filter_headers=["authorization"])
 def test_crew_emits_start_kickoff_event():
    received_events = []
+    mock_span = Mock()

-    with crewai_event_bus.scoped_handlers():
-
-        @crewai_event_bus.on(CrewKickoffStartedEvent)
-        def handle_crew_start(source, event):
-            received_events.append(event)
-
-        crew = Crew(agents=[base_agent], tasks=[base_task], name="TestCrew")
+    @crewai_event_bus.on(CrewKickoffStartedEvent)
+    def handle_crew_start(source, event):
+        received_events.append(event)

+    crew = Crew(agents=[base_agent], tasks=[base_task], name="TestCrew")
+    with (
+        patch.object(
+            event_listener._telemetry, "crew_execution_span", return_value=mock_span
+        ) as mock_crew_execution_span,
+        patch.object(
+            event_listener._telemetry, "end_crew", return_value=mock_span
+        ) as mock_crew_ended,
+    ):
        crew.kickoff()
+    mock_crew_execution_span.assert_called_once_with(crew, None)
+    mock_crew_ended.assert_called_once_with(crew, "hi")

-        assert len(received_events) == 1
-        assert received_events[0].crew_name == "TestCrew"
-        assert isinstance(received_events[0].timestamp, datetime)
-        assert received_events[0].type == "crew_kickoff_started"
+    assert len(received_events) == 1
+    assert received_events[0].crew_name == "TestCrew"
+    assert isinstance(received_events[0].timestamp, datetime)
+    assert received_events[0].type == "crew_kickoff_started"


@pytest.mark.vcr(filter_headers=["authorization"])
@@ -92,6 +109,45 @@ def test_crew_emits_end_kickoff_event():
    assert received_events[0].type == "crew_kickoff_completed"


+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_crew_emits_test_kickoff_type_event():
+    received_events = []
+    mock_span = Mock()
+
+    @crewai_event_bus.on(CrewTestStartedEvent)
+    def handle_crew_end(source, event):
+        received_events.append(event)
+
+    @crewai_event_bus.on(CrewTestCompletedEvent)
+    def handle_crew_test_end(source, event):
+        received_events.append(event)
+
+    eval_llm = LLM(model="gpt-4o-mini")
+    with (
+        patch.object(
+            event_listener._telemetry, "test_execution_span", return_value=mock_span
+        ) as mock_crew_execution_span,
+    ):
+        crew = Crew(agents=[base_agent], tasks=[base_task], name="TestCrew")
+        crew.test(n_iterations=1, eval_llm=eval_llm)
+
+        # Verify the call was made with correct argument types and values
+        assert mock_crew_execution_span.call_count == 1
+        args = mock_crew_execution_span.call_args[0]
+        assert isinstance(args[0], Crew)
+        assert args[1] == 1
+        assert args[2] is None
+        assert args[3] == eval_llm
+
+    assert len(received_events) == 2
+    assert received_events[0].crew_name == "TestCrew"
+    assert isinstance(received_events[0].timestamp, datetime)
+    assert received_events[0].type == "crew_test_started"
+    assert received_events[1].crew_name == "TestCrew"
+    assert isinstance(received_events[1].timestamp, datetime)
+    assert received_events[1].type == "crew_test_completed"
+
+
@pytest.mark.vcr(filter_headers=["authorization"])
 def test_crew_emits_kickoff_failed_event():
    received_events = []
@@ -142,9 +198,20 @@ def test_crew_emits_end_task_event():
    def handle_task_end(source, event):
        received_events.append(event)

+    mock_span = Mock()
    crew = Crew(agents=[base_agent], tasks=[base_task], name="TestCrew")
+    with (
+        patch.object(
+            event_listener._telemetry, "task_started", return_value=mock_span
+        ) as mock_task_started,
+        patch.object(
+            event_listener._telemetry, "task_ended", return_value=mock_span
+        ) as mock_task_ended,
+    ):
+        crew.kickoff()

-    crew.kickoff()
+    mock_task_started.assert_called_once_with(crew=crew, task=base_task)
+    mock_task_ended.assert_called_once_with(mock_span, base_task, crew)

    assert len(received_events) == 1
    assert isinstance(received_events[0].timestamp, datetime)
@@ -334,24 +401,29 @@ def test_tools_emits_error_events():

 def test_flow_emits_start_event():
    received_events = []
+    mock_span = Mock()

-    with crewai_event_bus.scoped_handlers():
+    @crewai_event_bus.on(FlowStartedEvent)
+    def handle_flow_start(source, event):
+        received_events.append(event)

-        @crewai_event_bus.on(FlowStartedEvent)
-        def handle_flow_start(source, event):
-            received_events.append(event)
-
-        class TestFlow(Flow[dict]):
-            @start()
-            def begin(self):
-                return "started"
+    class TestFlow(Flow[dict]):
+        @start()
+        def begin(self):
+            return "started"

+    with (
+        patch.object(
+            event_listener._telemetry, "flow_execution_span", return_value=mock_span
+        ) as mock_flow_execution_span,
+    ):
        flow = TestFlow()
        flow.kickoff()

-        assert len(received_events) == 1
-        assert received_events[0].flow_name == "TestFlow"
-        assert received_events[0].type == "flow_started"
+    mock_flow_execution_span.assert_called_once_with("TestFlow", ["begin"])
+    assert len(received_events) == 1
+    assert received_events[0].flow_name == "TestFlow"
+    assert received_events[0].type == "flow_started"


 def test_flow_emits_finish_event():
@@ -455,6 +527,7 @@ def test_multiple_handlers_for_same_event():

 def test_flow_emits_created_event():
    received_events = []
+    mock_span = Mock()

    @crewai_event_bus.on(FlowCreatedEvent)
    def handle_flow_created(source, event):
@@ -465,8 +538,15 @@ def test_flow_emits_created_event():
        def begin(self):
            return "started"

-    flow = TestFlow()
-    flow.kickoff()
+    with (
+        patch.object(
+            event_listener._telemetry, "flow_creation_span", return_value=mock_span
+        ) as mock_flow_creation_span,
+    ):
+        flow = TestFlow()
+        flow.kickoff()
+
+    mock_flow_creation_span.assert_called_once_with("TestFlow")

    assert len(received_events) == 1
    assert received_events[0].flow_name == "TestFlow"
@@ -495,3 +575,43 @@ def test_flow_emits_method_execution_failed_event():
    assert received_events[0].flow_name == "TestFlow"
    assert received_events[0].type == "method_execution_failed"
    assert received_events[0].error == error
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_llm_emits_call_started_event():
+    received_events = []
+
+    @crewai_event_bus.on(LLMCallStartedEvent)
+    def handle_llm_call_started(source, event):
+        received_events.append(event)
+
+    @crewai_event_bus.on(LLMCallCompletedEvent)
+    def handle_llm_call_completed(source, event):
+        received_events.append(event)
+
+    llm = LLM(model="gpt-4o-mini")
+    llm.call("Hello, how are you?")
+
+    assert len(received_events) == 2
+    assert received_events[0].type == "llm_call_started"
+    assert received_events[1].type == "llm_call_completed"
+
+
+@pytest.mark.vcr(filter_headers=["authorization"])
+def test_llm_emits_call_failed_event():
+    received_events = []
+
+    @crewai_event_bus.on(LLMCallFailedEvent)
+    def handle_llm_call_failed(source, event):
+        received_events.append(event)
+
+    error_message = "Simulated LLM call failure"
+    with patch("crewai.llm.litellm.completion", side_effect=Exception(error_message)):
+        llm = LLM(model="gpt-4o-mini")
+        with pytest.raises(Exception) as exc_info:
+            llm.call("Hello, how are you?")
+
+        assert str(exc_info.value) == error_message
+        assert len(received_events) == 1
+        assert received_events[0].type == "llm_call_failed"
+        assert received_events[0].error == error_message