chore: restructure test env, cassettes, and conftest; fix flaky tests

Consolidates pytest config, standardizes env handling, reorganizes cassette layout, removes outdated VCR configs, improves sync with threading.Condition, updates event-waiting logic, ensures cleanup, regenerates Gemini cassettes, and reverts unintended test changes.
2026-01-09 08:08:32 +00:00 · 2025-11-29 16:55:24 -05:00
parent bc4e6a3127
commit c925d2d519
200 changed files with 2070 additions and 1891 deletions
--- a/lib/crewai/tests/agents/test_agent.py
+++ b/lib/crewai/tests/agents/test_agent.py
@@ -147,7 +147,7 @@ def test_custom_llm():
    assert agent.llm.model == "gpt-4"


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_execution():
    agent = Agent(
        role="test role",
@@ -166,7 +166,7 @@ def test_agent_execution():
    assert output == "1 + 1 is 2"


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_execution_with_tools():
    @tool
    def multiplier(first_number: int, second_number: int) -> float:
@@ -211,7 +211,7 @@ def test_agent_execution_with_tools():
    assert received_events[0].tool_args == {"first_number": 3, "second_number": 4}


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_logging_tool_usage():
    @tool
    def multiplier(first_number: int, second_number: int) -> float:
@@ -245,7 +245,7 @@ def test_logging_tool_usage():
    assert agent.tools_handler.last_used_tool.arguments == tool_usage.arguments


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_cache_hitting():
    @tool
    def multiplier(first_number: int, second_number: int) -> float:
@@ -325,7 +325,7 @@ def test_cache_hitting():
    assert received_events[0].output == "12"


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_disabling_cache_for_agent():
    @tool
    def multiplier(first_number: int, second_number: int) -> float:
@@ -389,7 +389,7 @@ def test_disabling_cache_for_agent():
        read.assert_not_called()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_execution_with_specific_tools():
    @tool
    def multiplier(first_number: int, second_number: int) -> float:
@@ -412,7 +412,7 @@ def test_agent_execution_with_specific_tools():
    assert output == "The result of the multiplication is 12."


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_powered_by_new_o_model_family_that_allows_skipping_tool():
    @tool
    def multiplier(first_number: int, second_number: int) -> float:
@@ -438,7 +438,7 @@ def test_agent_powered_by_new_o_model_family_that_allows_skipping_tool():
    assert output == "12"


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_powered_by_new_o_model_family_that_uses_tool():
    @tool
    def comapny_customer_data() -> str:
@@ -464,7 +464,7 @@ def test_agent_powered_by_new_o_model_family_that_uses_tool():
    assert output == "42"


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_custom_max_iterations():
    @tool
    def get_final_answer() -> float:
@@ -509,7 +509,7 @@ def test_agent_custom_max_iterations():
    assert call_count == 2


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
@pytest.mark.timeout(30)
 def test_agent_max_iterations_stops_loop():
    """Test that agent execution terminates when max_iter is reached."""
@@ -546,7 +546,7 @@ def test_agent_max_iterations_stops_loop():
    )


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_repeated_tool_usage(capsys):
    """Test that agents handle repeated tool usage appropriately.

@@ -595,7 +595,7 @@ def test_agent_repeated_tool_usage(capsys):
    )


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_repeated_tool_usage_check_even_with_disabled_cache(capsys):
    @tool
    def get_final_answer(anything: str) -> float:
@@ -638,7 +638,7 @@ def test_agent_repeated_tool_usage_check_even_with_disabled_cache(capsys):
    )


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_moved_on_after_max_iterations():
    @tool
    def get_final_answer() -> float:
@@ -665,7 +665,7 @@ def test_agent_moved_on_after_max_iterations():
    assert output == "42"


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_respect_the_max_rpm_set(capsys):
    @tool
    def get_final_answer() -> float:
@@ -699,7 +699,7 @@ def test_agent_respect_the_max_rpm_set(capsys):
        moveon.assert_called()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_respect_the_max_rpm_set_over_crew_rpm(capsys):
    from unittest.mock import patch

@@ -737,7 +737,7 @@ def test_agent_respect_the_max_rpm_set_over_crew_rpm(capsys):
        moveon.assert_not_called()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_without_max_rpm_respects_crew_rpm(capsys):
    from unittest.mock import patch

@@ -797,7 +797,7 @@ def test_agent_without_max_rpm_respects_crew_rpm(capsys):
        moveon.assert_called_once()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_error_on_parsing_tool(capsys):
    from unittest.mock import patch

@@ -840,7 +840,7 @@ def test_agent_error_on_parsing_tool(capsys):
    assert "Error on parsing tool." in captured.out


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_remembers_output_format_after_using_tools_too_many_times():
    from unittest.mock import patch

@@ -875,7 +875,7 @@ def test_agent_remembers_output_format_after_using_tools_too_many_times():
        remember_format.assert_called()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_use_specific_tasks_output_as_context(capsys):
    agent1 = Agent(role="test role", goal="test goal", backstory="test backstory")
    agent2 = Agent(role="test role2", goal="test goal2", backstory="test backstory2")
@@ -902,7 +902,7 @@ def test_agent_use_specific_tasks_output_as_context(capsys):
    assert "hi" in result.raw.lower() or "hello" in result.raw.lower()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_step_callback():
    class StepCallback:
        def callback(self, step):
@@ -936,7 +936,7 @@ def test_agent_step_callback():
        callback.assert_called()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_function_calling_llm():
    from crewai.llm import LLM
    llm = LLM(model="gpt-4o", is_litellm=True)
@@ -983,7 +983,7 @@ def test_agent_function_calling_llm():
        mock_original_tool_calling.assert_called()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_tool_result_as_answer_is_the_final_answer_for_the_agent():
    from crewai.tools import BaseTool

@@ -1013,7 +1013,7 @@ def test_tool_result_as_answer_is_the_final_answer_for_the_agent():
    assert result.raw == "Howdy!"


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_tool_usage_information_is_appended_to_agent():
    from crewai.tools import BaseTool

@@ -1068,7 +1068,7 @@ def test_agent_definition_based_on_dict():


 # test for human input
-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_human_input():
    # Agent configuration
    config = {
@@ -1216,7 +1216,7 @@ Thought:<|eot_id|>
        assert mock_format_prompt.return_value == expected_prompt


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_task_allow_crewai_trigger_context():
    from crewai import Crew

@@ -1237,7 +1237,7 @@ def test_task_allow_crewai_trigger_context():
    assert "Trigger Payload: Important context data" in prompt


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_task_without_allow_crewai_trigger_context():
    from crewai import Crew

@@ -1260,7 +1260,7 @@ def test_task_without_allow_crewai_trigger_context():
    assert "Important context data" not in prompt


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_task_allow_crewai_trigger_context_no_payload():
    from crewai import Crew

@@ -1282,7 +1282,7 @@ def test_task_allow_crewai_trigger_context_no_payload():
    assert "Trigger Payload:" not in prompt


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_do_not_allow_crewai_trigger_context_for_first_task_hierarchical():
    from crewai import Crew

@@ -1311,7 +1311,7 @@ def test_do_not_allow_crewai_trigger_context_for_first_task_hierarchical():
    assert "Trigger Payload: Initial context data" not in first_prompt


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_first_task_auto_inject_trigger():
    from crewai import Crew

@@ -1344,7 +1344,7 @@ def test_first_task_auto_inject_trigger():
    assert "Trigger Payload:" not in second_prompt


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_ensure_first_task_allow_crewai_trigger_context_is_false_does_not_inject():
    from crewai import Crew

@@ -1549,7 +1549,7 @@ def test_agent_with_additional_kwargs():
    assert agent.llm.frequency_penalty == 0.1


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_llm_call():
    llm = LLM(model="gpt-3.5-turbo")
    messages = [{"role": "user", "content": "Say 'Hello, World!'"}]
@@ -1558,7 +1558,7 @@ def test_llm_call():
    assert "Hello, World!" in response


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_llm_call_with_error():
    llm = LLM(model="non-existent-model")
    messages = [{"role": "user", "content": "This should fail"}]
@@ -1567,7 +1567,7 @@ def test_llm_call_with_error():
        llm.call(messages)


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_handle_context_length_exceeds_limit():
    # Import necessary modules
    from crewai.utilities.agent_utils import handle_context_length
@@ -1620,7 +1620,7 @@ def test_handle_context_length_exceeds_limit():
        mock_summarize.assert_called_once()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_handle_context_length_exceeds_limit_cli_no():
    agent = Agent(
        role="test role",
@@ -1695,7 +1695,7 @@ def test_agent_with_all_llm_attributes():
    assert agent.llm.api_key == "sk-your-api-key-here"


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_llm_call_with_all_attributes():
    llm = LLM(
        model="gpt-3.5-turbo",
@@ -1712,7 +1712,7 @@ def test_llm_call_with_all_attributes():
    assert "STOP" not in response


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_with_ollama_llama3():
    agent = Agent(
        role="test role",
@@ -1733,7 +1733,7 @@ def test_agent_with_ollama_llama3():
    assert "Llama3" in response or "AI" in response or "language model" in response


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_llm_call_with_ollama_llama3():
    llm = LLM(
        model="ollama/llama3.2:3b",
@@ -1752,7 +1752,7 @@ def test_llm_call_with_ollama_llama3():
    assert "Llama3" in response or "AI" in response or "language model" in response


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_execute_task_basic():
    agent = Agent(
        role="test role",
@@ -1771,7 +1771,7 @@ def test_agent_execute_task_basic():
    assert "4" in result


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_execute_task_with_context():
    agent = Agent(
        role="test role",
@@ -1793,7 +1793,7 @@ def test_agent_execute_task_with_context():
    assert "fox" in result.lower() and "dog" in result.lower()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_execute_task_with_tool():
    @tool
    def dummy_tool(query: str) -> str:
@@ -1818,7 +1818,7 @@ def test_agent_execute_task_with_tool():
    assert "Dummy result for: test query" in result


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_execute_task_with_custom_llm():
    agent = Agent(
        role="test role",
@@ -1839,7 +1839,7 @@ def test_agent_execute_task_with_custom_llm():
    )


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_execute_task_with_ollama():
    agent = Agent(
        role="test role",
@@ -1859,7 +1859,7 @@ def test_agent_execute_task_with_ollama():
    assert "AI" in result or "artificial intelligence" in result.lower()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_with_knowledge_sources():
    content = "Brandon's favorite color is red and he likes Mexican food."
    string_source = StringKnowledgeSource(content=content)
@@ -1891,7 +1891,7 @@ def test_agent_with_knowledge_sources():
            assert "red" in result.raw.lower()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_with_knowledge_sources_with_query_limit_and_score_threshold():
    content = "Brandon's favorite color is red and he likes Mexican food."
    string_source = StringKnowledgeSource(content=content)
@@ -1939,7 +1939,7 @@ def test_agent_with_knowledge_sources_with_query_limit_and_score_threshold():
            )


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_with_knowledge_sources_with_query_limit_and_score_threshold_default():
    content = "Brandon's favorite color is red and he likes Mexican food."
    string_source = StringKnowledgeSource(content=content)
@@ -1988,7 +1988,7 @@ def test_agent_with_knowledge_sources_with_query_limit_and_score_threshold_defau
            )


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_with_knowledge_sources_extensive_role():
    content = "Brandon's favorite color is red and he likes Mexican food."
    string_source = StringKnowledgeSource(content=content)
@@ -2024,7 +2024,7 @@ def test_agent_with_knowledge_sources_extensive_role():
        assert "red" in result.raw.lower()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_with_knowledge_sources_works_with_copy():
    content = "Brandon's favorite color is red and he likes Mexican food."
    string_source = StringKnowledgeSource(content=content)
@@ -2063,7 +2063,7 @@ def test_agent_with_knowledge_sources_works_with_copy():
            assert isinstance(agent_copy.llm, BaseLLM)


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_with_knowledge_sources_generate_search_query():
    content = "Brandon's favorite color is red and he likes Mexican food."
    string_source = StringKnowledgeSource(content=content)
@@ -2116,7 +2116,7 @@ def test_agent_with_knowledge_sources_generate_search_query():
        assert "red" in result.raw.lower()


-@pytest.mark.vcr(record_mode="none", filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_with_knowledge_with_no_crewai_knowledge():
    mock_knowledge = MagicMock(spec=Knowledge)

@@ -2143,7 +2143,7 @@ def test_agent_with_knowledge_with_no_crewai_knowledge():
    mock_knowledge.query.assert_called_once()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_with_only_crewai_knowledge():
    mock_knowledge = MagicMock(spec=Knowledge)

@@ -2168,7 +2168,7 @@ def test_agent_with_only_crewai_knowledge():
    mock_knowledge.query.assert_called_once()


-@pytest.mark.vcr(record_mode="none", filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_knowledege_with_crewai_knowledge():
    crew_knowledge = MagicMock(spec=Knowledge)
    agent_knowledge = MagicMock(spec=Knowledge)
@@ -2197,7 +2197,7 @@ def test_agent_knowledege_with_crewai_knowledge():
    crew_knowledge.query.assert_called_once()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_litellm_auth_error_handling():
    """Test that LiteLLM authentication errors are handled correctly and not retried."""
    from litellm import AuthenticationError as LiteLLMAuthenticationError
@@ -2326,7 +2326,7 @@ def test_litellm_anthropic_error_handling():
    mock_llm_call.assert_called_once()


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_get_knowledge_search_query():
    """Test that _get_knowledge_search_query calls the LLM with the correct prompts."""
    from crewai.utilities.i18n import I18N
--- a/lib/crewai/tests/agents/test_lite_agent.py
+++ b/lib/crewai/tests/agents/test_lite_agent.py
@@ -70,7 +70,7 @@ class ResearchResult(BaseModel):
    sources: list[str] = Field(description="List of sources used")


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
@pytest.mark.parametrize("verbose", [True, False])
 def test_lite_agent_created_with_correct_parameters(monkeypatch, verbose):
    """Test that LiteAgent is created with the correct parameters when Agent.kickoff() is called."""
@@ -130,7 +130,7 @@ def test_lite_agent_created_with_correct_parameters(monkeypatch, verbose):
    assert created_lite_agent["response_format"] == TestResponse


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_lite_agent_with_tools():
    """Test that Agent can use tools."""
    # Create a LiteAgent with tools
@@ -174,7 +174,7 @@ def test_lite_agent_with_tools():
    assert event.tool_name == "search_web"


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_lite_agent_structured_output():
    """Test that Agent can return a simple structured output."""

@@ -217,7 +217,7 @@ def test_lite_agent_structured_output():
    return result


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_lite_agent_returns_usage_metrics():
    """Test that LiteAgent returns usage metrics."""
    llm = LLM(model="gpt-4o-mini")
@@ -238,7 +238,7 @@ def test_lite_agent_returns_usage_metrics():
    assert result.usage_metrics["total_tokens"] > 0


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_lite_agent_output_includes_messages():
    """Test that LiteAgentOutput includes messages from agent execution."""
    llm = LLM(model="gpt-4o-mini")
@@ -259,7 +259,7 @@ def test_lite_agent_output_includes_messages():
    assert len(result.messages) > 0


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
@pytest.mark.asyncio
 async def test_lite_agent_returns_usage_metrics_async():
    """Test that LiteAgent returns usage metrics when run asynchronously."""
@@ -354,9 +354,9 @@ def test_sets_parent_flow_when_inside_flow():
    assert captured_agent.parent_flow is flow


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_guardrail_is_called_using_string():
-    guardrail_events = defaultdict(list)
+    guardrail_events: dict[str, list] = defaultdict(list)
    from crewai.events.event_types import (
        LLMGuardrailCompletedEvent,
        LLMGuardrailStartedEvent,
@@ -369,35 +369,33 @@ def test_guardrail_is_called_using_string():
        guardrail="""Only include Brazilian players, both women and men""",
    )

-    all_events_received = threading.Event()
+    condition = threading.Condition()

    @crewai_event_bus.on(LLMGuardrailStartedEvent)
    def capture_guardrail_started(source, event):
        assert isinstance(source, LiteAgent)
        assert source.original_agent == agent
-        guardrail_events["started"].append(event)
-        if (
-            len(guardrail_events["started"]) == 2
-            and len(guardrail_events["completed"]) == 2
-        ):
-            all_events_received.set()
+        with condition:
+            guardrail_events["started"].append(event)
+            condition.notify()

    @crewai_event_bus.on(LLMGuardrailCompletedEvent)
    def capture_guardrail_completed(source, event):
        assert isinstance(source, LiteAgent)
        assert source.original_agent == agent
-        guardrail_events["completed"].append(event)
-        if (
-            len(guardrail_events["started"]) == 2
-            and len(guardrail_events["completed"]) == 2
-        ):
-            all_events_received.set()
+        with condition:
+            guardrail_events["completed"].append(event)
+            condition.notify()

    result = agent.kickoff(messages="Top 10 best players in the world?")

-    assert all_events_received.wait(timeout=10), (
-        "Timeout waiting for all guardrail events"
-    )
+    with condition:
+        success = condition.wait_for(
+            lambda: len(guardrail_events["started"]) >= 2
+            and len(guardrail_events["completed"]) >= 2,
+            timeout=10,
+        )
+    assert success, "Timeout waiting for all guardrail events"
    assert len(guardrail_events["started"]) == 2
    assert len(guardrail_events["completed"]) == 2
    assert not guardrail_events["completed"][0].success
@@ -408,33 +406,27 @@ def test_guardrail_is_called_using_string():
    )


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_guardrail_is_called_using_callable():
-    guardrail_events = defaultdict(list)
+    guardrail_events: dict[str, list] = defaultdict(list)
    from crewai.events.event_types import (
        LLMGuardrailCompletedEvent,
        LLMGuardrailStartedEvent,
    )

-    all_events_received = threading.Event()
+    condition = threading.Condition()

    @crewai_event_bus.on(LLMGuardrailStartedEvent)
    def capture_guardrail_started(source, event):
-        guardrail_events["started"].append(event)
-        if (
-            len(guardrail_events["started"]) == 1
-            and len(guardrail_events["completed"]) == 1
-        ):
-            all_events_received.set()
+        with condition:
+            guardrail_events["started"].append(event)
+            condition.notify()

    @crewai_event_bus.on(LLMGuardrailCompletedEvent)
    def capture_guardrail_completed(source, event):
-        guardrail_events["completed"].append(event)
-        if (
-            len(guardrail_events["started"]) == 1
-            and len(guardrail_events["completed"]) == 1
-        ):
-            all_events_received.set()
+        with condition:
+            guardrail_events["completed"].append(event)
+            condition.notify()

    agent = Agent(
        role="Sports Analyst",
@@ -445,42 +437,40 @@ def test_guardrail_is_called_using_callable():

    result = agent.kickoff(messages="Top 1 best players in the world?")

-    assert all_events_received.wait(timeout=10), (
-        "Timeout waiting for all guardrail events"
-    )
+    with condition:
+        success = condition.wait_for(
+            lambda: len(guardrail_events["started"]) >= 1
+            and len(guardrail_events["completed"]) >= 1,
+            timeout=10,
+        )
+    assert success, "Timeout waiting for all guardrail events"
    assert len(guardrail_events["started"]) == 1
    assert len(guardrail_events["completed"]) == 1
    assert guardrail_events["completed"][0].success
    assert "Pelé - Santos, 1958" in result.raw


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_guardrail_reached_attempt_limit():
-    guardrail_events = defaultdict(list)
+    guardrail_events: dict[str, list] = defaultdict(list)
    from crewai.events.event_types import (
        LLMGuardrailCompletedEvent,
        LLMGuardrailStartedEvent,
    )

-    all_events_received = threading.Event()
+    condition = threading.Condition()

    @crewai_event_bus.on(LLMGuardrailStartedEvent)
    def capture_guardrail_started(source, event):
-        guardrail_events["started"].append(event)
-        if (
-            len(guardrail_events["started"]) == 3
-            and len(guardrail_events["completed"]) == 3
-        ):
-            all_events_received.set()
+        with condition:
+            guardrail_events["started"].append(event)
+            condition.notify()

    @crewai_event_bus.on(LLMGuardrailCompletedEvent)
    def capture_guardrail_completed(source, event):
-        guardrail_events["completed"].append(event)
-        if (
-            len(guardrail_events["started"]) == 3
-            and len(guardrail_events["completed"]) == 3
-        ):
-            all_events_received.set()
+        with condition:
+            guardrail_events["completed"].append(event)
+            condition.notify()

    agent = Agent(
        role="Sports Analyst",
@@ -498,9 +488,13 @@ def test_guardrail_reached_attempt_limit():
    ):
        agent.kickoff(messages="Top 10 best players in the world?")

-    assert all_events_received.wait(timeout=10), (
-        "Timeout waiting for all guardrail events"
-    )
+    with condition:
+        success = condition.wait_for(
+            lambda: len(guardrail_events["started"]) >= 3
+            and len(guardrail_events["completed"]) >= 3,
+            timeout=10,
+        )
+    assert success, "Timeout waiting for all guardrail events"
    assert len(guardrail_events["started"]) == 3  # 2 retries + 1 initial call
    assert len(guardrail_events["completed"]) == 3  # 2 retries + 1 initial call
    assert not guardrail_events["completed"][0].success
@@ -508,7 +502,7 @@ def test_guardrail_reached_attempt_limit():
    assert not guardrail_events["completed"][2].success


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_output_when_guardrail_returns_base_model():
    class Player(BaseModel):
        name: str
@@ -599,7 +593,7 @@ def test_lite_agent_with_custom_llm_and_guardrails():
    assert result2.raw == "Modified by guardrail"


-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_lite_agent_with_invalid_llm():
    """Test that LiteAgent raises proper error when create_llm returns None."""
    with patch("crewai.lite_agent.create_llm", return_value=None):
@@ -615,7 +609,7 @@ def test_lite_agent_with_invalid_llm():

@patch.dict("os.environ", {"CREWAI_PLATFORM_INTEGRATION_TOKEN": "test_token"})
@patch("crewai_tools.tools.crewai_platform_tools.crewai_platform_tool_builder.requests.get")
-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_kickoff_with_platform_tools(mock_get):
    """Test that Agent.kickoff() properly integrates platform tools with LiteAgent"""
    mock_response = Mock()
@@ -657,7 +651,7 @@ def test_agent_kickoff_with_platform_tools(mock_get):

@patch.dict("os.environ", {"EXA_API_KEY": "test_exa_key"})
@patch("crewai.agent.Agent._get_external_mcp_tools")
-@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.vcr()
 def test_agent_kickoff_with_mcp_tools(mock_get_mcp_tools):
    """Test that Agent.kickoff() properly integrates MCP tools with LiteAgent"""
    # Setup mock MCP tools - create a proper BaseTool instance