From 54acbc9d0e09a0e750abd32ae4c416e522e1d198 Mon Sep 17 00:00:00 2001 From: Brandon Hancock Date: Fri, 10 Jan 2025 17:16:10 -0500 Subject: [PATCH] wip --- .github/workflows/tests.yml | 44 ++- tests/e2e_crew_tests.py | 289 ++++++++++++++++++ .../e2e_crew_tests.py} | 0 .../test_router_with_empty_input.yaml | 103 ------- 4 files changed, 325 insertions(+), 111 deletions(-) create mode 100644 tests/e2e_crew_tests.py rename tests/{pipeline/__init__.py => main_branch_tests/e2e_crew_tests.py} (100%) delete mode 100644 tests/pipeline/cassettes/test_router_with_empty_input.yaml diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f655dcc64..4835de348 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,32 +1,60 @@ name: Run Tests -on: [pull_request] +on: + pull_request: + push: + branches: + - main permissions: contents: write -env: - OPENAI_API_KEY: fake-api-key - jobs: tests: runs-on: ubuntu-latest timeout-minutes: 15 + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + MODEL: gpt-4o-mini steps: - name: Checkout code uses: actions/checkout@v4 - - name: Install uv + - name: Install UV uses: astral-sh/setup-uv@v3 with: enable-cache: true - - name: Set up Python run: uv python install 3.12.8 - name: Install the project run: uv sync --dev --all-extras - - name: Run tests - run: uv run pytest tests -vv + - name: Run General Tests + run: uv run pytest tests -k "not main_branch_tests" -vv + + main_branch_tests: + if: github.ref == 'refs/heads/main' + runs-on: ubuntu-latest + needs: tests + timeout-minutes: 15 + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install UV + uses: astral-sh/setup-uv@v3 + with: + enable-cache: true + + - name: Set up Python + run: uv python install 3.12.8 + + - name: Install the project + run: uv sync --dev --all-extras + + - name: Run Main Branch Specific Tests + run: uv run pytest tests/main_branch_tests -vv diff --git a/tests/e2e_crew_tests.py b/tests/e2e_crew_tests.py new file mode 100644 index 000000000..244c94b77 --- /dev/null +++ b/tests/e2e_crew_tests.py @@ -0,0 +1,289 @@ +import asyncio +import os +import tempfile + +import pytest + +from crewai.agent import Agent +from crewai.crew import Crew +from crewai.crews.crew_output import CrewOutput +from crewai.process import Process +from crewai.task import Task +from crewai.tasks.conditional_task import ConditionalTask + + +def test_basic_crew_execution(default_agent): + """Test basic crew execution using the default agent fixture.""" + + # Initialize agents by copying the default agent fixture + researcher = default_agent.copy() + researcher.role = "Researcher" + researcher.goal = "Research the latest advancements in AI." + researcher.backstory = "An expert in AI technologies." + + writer = default_agent.copy() + writer.role = "Writer" + writer.goal = "Write an article based on research findings." + writer.backstory = "A professional writer specializing in technology topics." + + # Define tasks + research_task = Task( + description="Provide a summary of the latest advancements in AI.", + expected_output="A detailed summary of recent AI advancements.", + agent=researcher, + ) + + writing_task = Task( + description="Write an article based on the research summary.", + expected_output="An engaging article on AI advancements.", + agent=writer, + ) + + # Create the crew + crew = Crew( + agents=[researcher, writer], + tasks=[research_task, writing_task], + process=Process.sequential, + ) + + # Execute the crew + result = crew.kickoff() + + # Assertions to verify the result + assert result is not None, "Crew execution did not return a result." + assert isinstance(result, CrewOutput), "Result is not an instance of CrewOutput." + assert ( + "AI advancements" in result.raw + or "artificial intelligence" in result.raw.lower() + ), "Result does not contain expected content." + + +def test_hierarchical_crew_with_manager(default_llm_config): + """Test hierarchical crew execution with a manager agent.""" + + # Initialize agents using the default LLM config fixture + ceo = Agent( + role="CEO", + goal="Oversee the project and ensure quality deliverables.", + backstory="A seasoned executive with a keen eye for detail.", + llm=default_llm_config, + ) + + developer = Agent( + role="Developer", + goal="Implement software features as per requirements.", + backstory="An experienced software developer.", + llm=default_llm_config, + ) + + tester = Agent( + role="Tester", + goal="Test software features and report bugs.", + backstory="A meticulous QA engineer.", + llm=default_llm_config, + ) + + # Define tasks + development_task = Task( + description="Develop the new authentication feature.", + expected_output="Code implementation of the authentication feature.", + agent=developer, + ) + + testing_task = Task( + description="Test the authentication feature for vulnerabilities.", + expected_output="A report on any found bugs or vulnerabilities.", + agent=tester, + ) + + # Create the crew with hierarchical process + crew = Crew( + agents=[ceo, developer, tester], + tasks=[development_task, testing_task], + process=Process.hierarchical, + manager_agent=ceo, + ) + + # Execute the crew + result = crew.kickoff() + + # Assertions to verify the result + assert result is not None, "Crew execution did not return a result." + assert isinstance(result, CrewOutput), "Result is not an instance of CrewOutput." + assert ( + "authentication" in result.raw.lower() + ), "Result does not contain expected content." + + +@pytest.mark.asyncio +async def test_asynchronous_task_execution(default_llm_config): + """Test crew execution with asynchronous tasks.""" + + # Initialize agent + data_processor = Agent( + role="Data Processor", + goal="Process large datasets efficiently.", + backstory="An expert in data processing and analysis.", + llm=default_llm_config, + ) + + # Define tasks with async_execution=True + async_task1 = Task( + description="Process dataset A asynchronously.", + expected_output="Processed results of dataset A.", + agent=data_processor, + async_execution=True, + ) + + async_task2 = Task( + description="Process dataset B asynchronously.", + expected_output="Processed results of dataset B.", + agent=data_processor, + async_execution=True, + ) + + # Create the crew + crew = Crew( + agents=[data_processor], + tasks=[async_task1, async_task2], + process=Process.sequential, + ) + + # Execute the crew asynchronously + result = await crew.kickoff_async() + + # Assertions to verify the result + assert result is not None, "Crew execution did not return a result." + assert isinstance(result, CrewOutput), "Result is not an instance of CrewOutput." + assert ( + "dataset a" in result.raw.lower() or "dataset b" in result.raw.lower() + ), "Result does not contain expected content." + + +def test_crew_with_conditional_task(default_llm_config): + """Test crew execution that includes a conditional task.""" + + # Initialize agents + analyst = Agent( + role="Analyst", + goal="Analyze data and make decisions based on insights.", + backstory="A data analyst with experience in predictive modeling.", + llm=default_llm_config, + ) + + decision_maker = Agent( + role="Decision Maker", + goal="Make decisions based on analysis.", + backstory="An executive responsible for strategic decisions.", + llm=default_llm_config, + ) + + # Define tasks + analysis_task = Task( + description="Analyze the quarterly financial data.", + expected_output="A report highlighting key financial insights.", + agent=analyst, + ) + + decision_task = ConditionalTask( + description="If the profit margin is below 10%, recommend cost-cutting measures.", + expected_output="Recommendations for reducing costs.", + agent=decision_maker, + condition=lambda output: "profit margin below 10%" in output.lower(), + ) + + # Create the crew + crew = Crew( + agents=[analyst, decision_maker], + tasks=[analysis_task, decision_task], + process=Process.sequential, + ) + + # Execute the crew + result = crew.kickoff() + + # Assertions to verify the result + assert result is not None, "Crew execution did not return a result." + assert isinstance(result, CrewOutput), "Result is not an instance of CrewOutput." + assert len(result.tasks_output) >= 1, "No tasks were executed." + + +def test_crew_with_output_file(): + """Test crew execution that writes output to a file.""" + + # Access the API key from environment variables + openai_api_key = os.environ.get("OPENAI_API_KEY") + assert openai_api_key, "OPENAI_API_KEY environment variable is not set." + + # Create a temporary directory for output files + with tempfile.TemporaryDirectory() as tmpdirname: + + # Initialize agent + content_creator = Agent( + role="Content Creator", + goal="Generate engaging blog content.", + backstory="A creative writer with a passion for storytelling.", + llm={"provider": "openai", "model": "gpt-4", "api_key": openai_api_key}, + ) + + # Define task with output file + output_file_path = f"{tmpdirname}/blog_post.txt" + blog_task = Task( + description="Write a blog post about the benefits of remote work.", + expected_output="An informative and engaging blog post.", + agent=content_creator, + output_file=output_file_path, + ) + + # Create the crew + crew = Crew( + agents=[content_creator], + tasks=[blog_task], + process=Process.sequential, + ) + + # Execute the crew + crew.kickoff() + + # Assertions to verify the result + assert os.path.exists(output_file_path), "Output file was not created." + + # Read the content from the file and perform assertions + with open(output_file_path, "r") as file: + content = file.read() + assert ( + "remote work" in content.lower() + ), "Output file does not contain expected content." + + +def test_invalid_hierarchical_process(): + """Test that an error is raised when using hierarchical process without a manager agent or manager_llm.""" + with pytest.raises(ValueError) as exc_info: + Crew( + agents=[], + tasks=[], + process=Process.hierarchical, # Hierarchical process without a manager + ) + assert "manager_llm or manager_agent is required" in str(exc_info.value) + + +def test_crew_with_memory(memory_agent, memory_tasks): + """Test crew execution utilizing memory.""" + + # Enable memory in the crew + crew = Crew( + agents=[memory_agent], + tasks=memory_tasks, + process=Process.sequential, + memory=True, # Enable memory + ) + + # Execute the crew + result = crew.kickoff() + + # Assertions to verify the result + assert result is not None, "Crew execution did not return a result." + assert isinstance(result, CrewOutput), "Result is not an instance of CrewOutput." + assert ( + "history of ai" in result.raw.lower() and "future of ai" in result.raw.lower() + ), "Result does not contain expected content." diff --git a/tests/pipeline/__init__.py b/tests/main_branch_tests/e2e_crew_tests.py similarity index 100% rename from tests/pipeline/__init__.py rename to tests/main_branch_tests/e2e_crew_tests.py diff --git a/tests/pipeline/cassettes/test_router_with_empty_input.yaml b/tests/pipeline/cassettes/test_router_with_empty_input.yaml deleted file mode 100644 index ac64c5796..000000000 --- a/tests/pipeline/cassettes/test_router_with_empty_input.yaml +++ /dev/null @@ -1,103 +0,0 @@ -interactions: -- request: - body: '{"messages": [{"role": "system", "content": "You are Test Role. Test Backstory\nYour - personal goal is: Test Goal\nTo give my best complete final answer to the task - use the exact following format:\n\nThought: I now can give a great answer\nFinal - Answer: Your final answer must be the great and the most complete as possible, - it must be outcome described.\n\nI MUST use these formats, my job depends on - it!"}, {"role": "user", "content": "\nCurrent Task: Return: Test output\n\nThis - is the expect criteria for your final answer: Test output\nyou MUST return the - actual complete content as the final answer, not a summary.\n\nBegin! This is - VERY important to you, use the tools available and give your best Final Answer, - your job depends on it!\n\nThought:"}], "model": "gpt-4o"}' - headers: - accept: - - application/json - accept-encoding: - - gzip, deflate - connection: - - keep-alive - content-length: - - '776' - content-type: - - application/json - cookie: - - __cf_bm=9.8sBYBkvBR8R1K_bVF7xgU..80XKlEIg3N2OBbTSCU-1727214102-1.0.1.1-.qiTLXbPamYUMSuyNsOEB9jhGu.jOifujOrx9E2JZvStbIZ9RTIiE44xKKNfLPxQkOi6qAT3h6htK8lPDGV_5g; - _cfuvid=lbRdAddVWV6W3f5Dm9SaOPWDUOxqtZBSPr_fTW26nEA-1727213194587-0.0.1.1-604800000 - host: - - api.openai.com - user-agent: - - OpenAI/Python 1.47.0 - x-stainless-arch: - - arm64 - x-stainless-async: - - 'false' - x-stainless-lang: - - python - x-stainless-os: - - MacOS - x-stainless-package-version: - - 1.47.0 - x-stainless-raw-response: - - 'true' - x-stainless-runtime: - - CPython - x-stainless-runtime-version: - - 3.11.7 - method: POST - uri: https://api.openai.com/v1/chat/completions - response: - content: "{\n \"id\": \"chatcmpl-AB7fr4aPstiFUArxwxTVdfJSFwxsC\",\n \"object\": - \"chat.completion\",\n \"created\": 1727214471,\n \"model\": \"gpt-4o-2024-05-13\",\n - \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": - \"assistant\",\n \"content\": \"Thought: I now can give a great answer\\nFinal - Answer: Test output\",\n \"refusal\": null\n },\n \"logprobs\": - null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": - 155,\n \"completion_tokens\": 15,\n \"total_tokens\": 170,\n \"completion_tokens_details\": - {\n \"reasoning_tokens\": 0\n }\n },\n \"system_fingerprint\": \"fp_52a7f40b0b\"\n}\n" - headers: - CF-Cache-Status: - - DYNAMIC - CF-RAY: - - 8c85f9a91e311cf3-GRU - Connection: - - keep-alive - Content-Encoding: - - gzip - Content-Type: - - application/json - Date: - - Tue, 24 Sep 2024 21:47:51 GMT - Server: - - cloudflare - Transfer-Encoding: - - chunked - X-Content-Type-Options: - - nosniff - access-control-expose-headers: - - X-Request-ID - openai-organization: - - crewai-iuxna1 - openai-processing-ms: - - '216' - openai-version: - - '2020-10-01' - strict-transport-security: - - max-age=31536000; includeSubDomains; preload - x-ratelimit-limit-requests: - - '10000' - x-ratelimit-limit-tokens: - - '30000000' - x-ratelimit-remaining-requests: - - '9999' - x-ratelimit-remaining-tokens: - - '29999817' - x-ratelimit-reset-requests: - - 6ms - x-ratelimit-reset-tokens: - - 0s - x-request-id: - - req_88b1376917b345c976fdb03a55f7b6c1 - http_version: HTTP/1.1 - status_code: 200 -version: 1