wip

2025-12-17 21:08:29 +00:00 · 2025-01-10 17:16:10 -05:00
4 changed files with 325 additions and 111 deletions
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -1,32 +1,60 @@
 name: Run Tests
-on: [pull_request]
+on:
  pull_request:
  push:
    branches:
      - main
 permissions:
  contents: write
 env:
  OPENAI_API_KEY: fake-api-key
 jobs:
  tests:
    runs-on: ubuntu-latest
    timeout-minutes: 15
    env:
      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
      MODEL: gpt-4o-mini
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
-      - name: Install uv
+      - name: Install UV
        uses: astral-sh/setup-uv@v3
        with:
          enable-cache: true
      - name: Set up Python
        run: uv python install 3.12.8
      - name: Install the project
        run: uv sync --dev --all-extras
-      - name: Run tests
+      - name: Run General Tests
-        run: uv run pytest tests -vv
+        run: uv run pytest tests -k "not main_branch_tests" -vv
  main_branch_tests:
    if: github.ref == 'refs/heads/main'
    runs-on: ubuntu-latest
    needs: tests
    timeout-minutes: 15
    env:
      OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
      - name: Install UV
        uses: astral-sh/setup-uv@v3
        with:
          enable-cache: true
      - name: Set up Python
        run: uv python install 3.12.8
      - name: Install the project
        run: uv sync --dev --all-extras
      - name: Run Main Branch Specific Tests
        run: uv run pytest tests/main_branch_tests -vv
--- a/tests/e2e_crew_tests.py
+++ b/tests/e2e_crew_tests.py
@@ -0,0 +1,289 @@
 import asyncio
 import os
 import tempfile
 import pytest
 from crewai.agent import Agent
 from crewai.crew import Crew
 from crewai.crews.crew_output import CrewOutput
 from crewai.process import Process
 from crewai.task import Task
 from crewai.tasks.conditional_task import ConditionalTask
 def test_basic_crew_execution(default_agent):
    """Test basic crew execution using the default agent fixture."""
    # Initialize agents by copying the default agent fixture
    researcher = default_agent.copy()
    researcher.role = "Researcher"
    researcher.goal = "Research the latest advancements in AI."
    researcher.backstory = "An expert in AI technologies."
    writer = default_agent.copy()
    writer.role = "Writer"
    writer.goal = "Write an article based on research findings."
    writer.backstory = "A professional writer specializing in technology topics."
    # Define tasks
    research_task = Task(
        description="Provide a summary of the latest advancements in AI.",
        expected_output="A detailed summary of recent AI advancements.",
        agent=researcher,
    )
    writing_task = Task(
        description="Write an article based on the research summary.",
        expected_output="An engaging article on AI advancements.",
        agent=writer,
    )
    # Create the crew
    crew = Crew(
        agents=[researcher, writer],
        tasks=[research_task, writing_task],
        process=Process.sequential,
    )
    # Execute the crew
    result = crew.kickoff()
    # Assertions to verify the result
    assert result is not None, "Crew execution did not return a result."
    assert isinstance(result, CrewOutput), "Result is not an instance of CrewOutput."
    assert (
        "AI advancements" in result.raw
        or "artificial intelligence" in result.raw.lower()
    ), "Result does not contain expected content."
 def test_hierarchical_crew_with_manager(default_llm_config):
    """Test hierarchical crew execution with a manager agent."""
    # Initialize agents using the default LLM config fixture
    ceo = Agent(
        role="CEO",
        goal="Oversee the project and ensure quality deliverables.",
        backstory="A seasoned executive with a keen eye for detail.",
        llm=default_llm_config,
    )
    developer = Agent(
        role="Developer",
        goal="Implement software features as per requirements.",
        backstory="An experienced software developer.",
        llm=default_llm_config,
    )
    tester = Agent(
        role="Tester",
        goal="Test software features and report bugs.",
        backstory="A meticulous QA engineer.",
        llm=default_llm_config,
    )
    # Define tasks
    development_task = Task(
        description="Develop the new authentication feature.",
        expected_output="Code implementation of the authentication feature.",
        agent=developer,
    )
    testing_task = Task(
        description="Test the authentication feature for vulnerabilities.",
        expected_output="A report on any found bugs or vulnerabilities.",
        agent=tester,
    )
    # Create the crew with hierarchical process
    crew = Crew(
        agents=[ceo, developer, tester],
        tasks=[development_task, testing_task],
        process=Process.hierarchical,
        manager_agent=ceo,
    )
    # Execute the crew
    result = crew.kickoff()
    # Assertions to verify the result
    assert result is not None, "Crew execution did not return a result."
    assert isinstance(result, CrewOutput), "Result is not an instance of CrewOutput."
    assert (
        "authentication" in result.raw.lower()
    ), "Result does not contain expected content."
@pytest.mark.asyncio
 async def test_asynchronous_task_execution(default_llm_config):
    """Test crew execution with asynchronous tasks."""
    # Initialize agent
    data_processor = Agent(
        role="Data Processor",
        goal="Process large datasets efficiently.",
        backstory="An expert in data processing and analysis.",
        llm=default_llm_config,
    )
    # Define tasks with async_execution=True
    async_task1 = Task(
        description="Process dataset A asynchronously.",
        expected_output="Processed results of dataset A.",
        agent=data_processor,
        async_execution=True,
    )
    async_task2 = Task(
        description="Process dataset B asynchronously.",
        expected_output="Processed results of dataset B.",
        agent=data_processor,
        async_execution=True,
    )
    # Create the crew
    crew = Crew(
        agents=[data_processor],
        tasks=[async_task1, async_task2],
        process=Process.sequential,
    )
    # Execute the crew asynchronously
    result = await crew.kickoff_async()
    # Assertions to verify the result
    assert result is not None, "Crew execution did not return a result."
    assert isinstance(result, CrewOutput), "Result is not an instance of CrewOutput."
    assert (
        "dataset a" in result.raw.lower() or "dataset b" in result.raw.lower()
    ), "Result does not contain expected content."
 def test_crew_with_conditional_task(default_llm_config):
    """Test crew execution that includes a conditional task."""
    # Initialize agents
    analyst = Agent(
        role="Analyst",
        goal="Analyze data and make decisions based on insights.",
        backstory="A data analyst with experience in predictive modeling.",
        llm=default_llm_config,
    )
    decision_maker = Agent(
        role="Decision Maker",
        goal="Make decisions based on analysis.",
        backstory="An executive responsible for strategic decisions.",
        llm=default_llm_config,
    )
    # Define tasks
    analysis_task = Task(
        description="Analyze the quarterly financial data.",
        expected_output="A report highlighting key financial insights.",
        agent=analyst,
    )
    decision_task = ConditionalTask(
        description="If the profit margin is below 10%, recommend cost-cutting measures.",
        expected_output="Recommendations for reducing costs.",
        agent=decision_maker,
        condition=lambda output: "profit margin below 10%" in output.lower(),
    )
    # Create the crew
    crew = Crew(
        agents=[analyst, decision_maker],
        tasks=[analysis_task, decision_task],
        process=Process.sequential,
    )
    # Execute the crew
    result = crew.kickoff()
    # Assertions to verify the result
    assert result is not None, "Crew execution did not return a result."
    assert isinstance(result, CrewOutput), "Result is not an instance of CrewOutput."
    assert len(result.tasks_output) >= 1, "No tasks were executed."
 def test_crew_with_output_file():
    """Test crew execution that writes output to a file."""
    # Access the API key from environment variables
    openai_api_key = os.environ.get("OPENAI_API_KEY")
    assert openai_api_key, "OPENAI_API_KEY environment variable is not set."
    # Create a temporary directory for output files
    with tempfile.TemporaryDirectory() as tmpdirname:
        # Initialize agent
        content_creator = Agent(
            role="Content Creator",
            goal="Generate engaging blog content.",
            backstory="A creative writer with a passion for storytelling.",
            llm={"provider": "openai", "model": "gpt-4", "api_key": openai_api_key},
        )
        # Define task with output file
        output_file_path = f"{tmpdirname}/blog_post.txt"
        blog_task = Task(
            description="Write a blog post about the benefits of remote work.",
            expected_output="An informative and engaging blog post.",
            agent=content_creator,
            output_file=output_file_path,
        )
        # Create the crew
        crew = Crew(
            agents=[content_creator],
            tasks=[blog_task],
            process=Process.sequential,
        )
        # Execute the crew
        crew.kickoff()
        # Assertions to verify the result
        assert os.path.exists(output_file_path), "Output file was not created."
        # Read the content from the file and perform assertions
        with open(output_file_path, "r") as file:
            content = file.read()
            assert (
                "remote work" in content.lower()
            ), "Output file does not contain expected content."
 def test_invalid_hierarchical_process():
    """Test that an error is raised when using hierarchical process without a manager agent or manager_llm."""
    with pytest.raises(ValueError) as exc_info:
        Crew(
            agents=[],
            tasks=[],
            process=Process.hierarchical,  # Hierarchical process without a manager
        )
    assert "manager_llm or manager_agent is required" in str(exc_info.value)
 def test_crew_with_memory(memory_agent, memory_tasks):
    """Test crew execution utilizing memory."""
    # Enable memory in the crew
    crew = Crew(
        agents=[memory_agent],
        tasks=memory_tasks,
        process=Process.sequential,
        memory=True,  # Enable memory
    )
    # Execute the crew
    result = crew.kickoff()
    # Assertions to verify the result
    assert result is not None, "Crew execution did not return a result."
    assert isinstance(result, CrewOutput), "Result is not an instance of CrewOutput."
    assert (
        "history of ai" in result.raw.lower() and "future of ai" in result.raw.lower()
    ), "Result does not contain expected content."
--- a/tests/main_branch_tests/e2e_crew_tests.py
+++ b/tests/main_branch_tests/e2e_crew_tests.py
--- a/tests/pipeline/cassettes/test_router_with_empty_input.yaml
+++ b/tests/pipeline/cassettes/test_router_with_empty_input.yaml
@@ -1,103 +0,0 @@
 interactions:
 - request:
    body: '{"messages": [{"role": "system", "content": "You are Test Role. Test Backstory\nYour
      personal goal is: Test Goal\nTo give my best complete final answer to the task
      use the exact following format:\n\nThought: I now can give a great answer\nFinal
      Answer: Your final answer must be the great and the most complete as possible,
      it must be outcome described.\n\nI MUST use these formats, my job depends on
      it!"}, {"role": "user", "content": "\nCurrent Task: Return: Test output\n\nThis
      is the expect criteria for your final answer: Test output\nyou MUST return the
      actual complete content as the final answer, not a summary.\n\nBegin! This is
      VERY important to you, use the tools available and give your best Final Answer,
      your job depends on it!\n\nThought:"}], "model": "gpt-4o"}'
    headers:
      accept:
      - application/json
      accept-encoding:
      - gzip, deflate
      connection:
      - keep-alive
      content-length:
      - '776'
      content-type:
      - application/json
      cookie:
      - __cf_bm=9.8sBYBkvBR8R1K_bVF7xgU..80XKlEIg3N2OBbTSCU-1727214102-1.0.1.1-.qiTLXbPamYUMSuyNsOEB9jhGu.jOifujOrx9E2JZvStbIZ9RTIiE44xKKNfLPxQkOi6qAT3h6htK8lPDGV_5g;
        _cfuvid=lbRdAddVWV6W3f5Dm9SaOPWDUOxqtZBSPr_fTW26nEA-1727213194587-0.0.1.1-604800000
      host:
      - api.openai.com
      user-agent:
      - OpenAI/Python 1.47.0
      x-stainless-arch:
      - arm64
      x-stainless-async:
      - 'false'
      x-stainless-lang:
      - python
      x-stainless-os:
      - MacOS
      x-stainless-package-version:
      - 1.47.0
      x-stainless-raw-response:
      - 'true'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
      - 3.11.7
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
    content: "{\n  \"id\": \"chatcmpl-AB7fr4aPstiFUArxwxTVdfJSFwxsC\",\n  \"object\":
      \"chat.completion\",\n  \"created\": 1727214471,\n  \"model\": \"gpt-4o-2024-05-13\",\n
      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
      \"assistant\",\n        \"content\": \"Thought: I now can give a great answer\\nFinal
      Answer: Test output\",\n        \"refusal\": null\n      },\n      \"logprobs\":
      null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
      155,\n    \"completion_tokens\": 15,\n    \"total_tokens\": 170,\n    \"completion_tokens_details\":
      {\n      \"reasoning_tokens\": 0\n    }\n  },\n  \"system_fingerprint\": \"fp_52a7f40b0b\"\n}\n"
    headers:
      CF-Cache-Status:
      - DYNAMIC
      CF-RAY:
      - 8c85f9a91e311cf3-GRU
      Connection:
      - keep-alive
      Content-Encoding:
      - gzip
      Content-Type:
      - application/json
      Date:
      - Tue, 24 Sep 2024 21:47:51 GMT
      Server:
      - cloudflare
      Transfer-Encoding:
      - chunked
      X-Content-Type-Options:
      - nosniff
      access-control-expose-headers:
      - X-Request-ID
      openai-organization:
      - crewai-iuxna1
      openai-processing-ms:
      - '216'
      openai-version:
      - '2020-10-01'
      strict-transport-security:
      - max-age=31536000; includeSubDomains; preload
      x-ratelimit-limit-requests:
      - '10000'
      x-ratelimit-limit-tokens:
      - '30000000'
      x-ratelimit-remaining-requests:
      - '9999'
      x-ratelimit-remaining-tokens:
      - '29999817'
      x-ratelimit-reset-requests:
      - 6ms
      x-ratelimit-reset-tokens:
      - 0s
      x-request-id:
      - req_88b1376917b345c976fdb03a55f7b6c1
    http_version: HTTP/1.1
    status_code: 200
 version: 1