feat: add regression tests and configure its workflow

2026-01-23 15:18:14 +00:00 · 2025-07-15 18:45:26 -03:00
parent 6ebb6c9b63
commit cbe570088e
7 changed files with 343 additions and 3 deletions
--- a/tests/regression/results/test_financial_analysis_regression.json
+++ b/tests/regression/results/test_financial_analysis_regression.json
@@ -0,0 +1,42 @@
+[
+  {
+    "timestamp": "2025-07-15T21:34:08.253410+00:00",
+    "metadata": {},
+    "results": [
+      {
+        "identifier": "72239c22b0cdde98ad5c588074ef6325",
+        "inputs": {
+          "company": "Apple Inc. (AAPL)"
+        },
+        "score": {
+          "goal_alignment": 10.0,
+          "semantic_quality": 9.0,
+          "tool_selection": 6.0,
+          "parameter_extraction": 5.0,
+          "tool_invocation": 10.0,
+          "reasoning_efficiency": 7.300000000000001
+        },
+        "expected_score": {
+          "goal_alignment": 8
+        },
+        "passed": true
+      },
+      {
+        "identifier": "test_2",
+        "inputs": {
+          "company": "Microsoft Corporation (MSFT)"
+        },
+        "score": {
+          "goal_alignment": 10.0,
+          "semantic_quality": 7.333333333333333,
+          "tool_selection": 6.25,
+          "parameter_extraction": 9.5,
+          "tool_invocation": 10.0,
+          "reasoning_efficiency": 6.0
+        },
+        "expected_score": 8,
+        "passed": true
+      }
+    ]
+  }
+]
--- a/tests/regression/results/test_history_teacher.json
+++ b/tests/regression/results/test_history_teacher.json
@@ -0,0 +1,24 @@
+[
+  {
+    "timestamp": "2025-07-15T21:31:05.916161+00:00",
+    "metadata": {},
+    "results": [
+      {
+        "identifier": "df0ea31ac4a7fb4a908b8319ec7b3719",
+        "inputs": {
+          "messages": "How was the Battle of Waterloo?"
+        },
+        "score": {
+          "goal_alignment": 10.0,
+          "semantic_quality": 10.0,
+          "tool_selection": 10.0,
+          "parameter_extraction": 10.0,
+          "tool_invocation": 10.0,
+          "reasoning_efficiency": 5.5
+        },
+        "expected_score": 8,
+        "passed": true
+      }
+    ]
+  }
+]
--- a/tests/regression/test_financial_analysis.py
+++ b/tests/regression/test_financial_analysis.py
@@ -0,0 +1,144 @@
+import pytest
+from crewai import Agent, Crew, Process, Task
+from crewai_tools import SerperDevTool
+
+from crewai.experimental.evaluation.testing import (
+    assert_experiment_successfully,
+    run_experiment,
+)
+
+
+@pytest.fixture
+def financial_analysis_crew():
+    search_tool = SerperDevTool()
+
+    data_researcher = Agent(
+        role="Financial Data Researcher",
+        goal="Efficiently collect and structure key financial metrics using multiple search strategies. Using only the search tool.",
+        backstory=(
+            "You are a precision-focused financial analyst who uses multiple targeted searches "
+            "to cross-verify data and ensure comprehensive coverage. You leverage different "
+            "search approaches to gather financial information from various authoritative sources."
+        ),
+        tools=[search_tool],
+    )
+
+    financial_analyst = Agent(
+        role="Financial Analyst",
+        goal="Analyze financial data to assess company performance and outlook",
+        backstory=(
+            "You are a seasoned financial analyst with expertise in evaluating company "
+            "performance through quantitative analysis. You can interpret financial statements, "
+            "identify trends, and make reasoned assessments of a company's financial health."
+        ),
+        tools=[search_tool],
+    )
+
+    report_writer = Agent(
+        role="Financial Report Writer",
+        goal="Synthesize financial analysis into clear, actionable reports",
+        backstory=(
+            "You are an experienced financial writer who excels at turning complex financial "
+            "analyses into clear, concise, and impactful reports. You know how to highlight "
+            "key insights and present information in a way that's accessible to various audiences."
+        ),
+        tools=[],
+    )
+
+    research_task = Task(
+        description=(
+            "Research {company} financial data using multiple targeted search queries:\n\n"
+            "**Search Strategy - Execute these searches sequentially:**\n"
+            "1. '{company} quarterly earnings Q4 2024 Q1 2025 financial results'\n"
+            "2. '{company} financial metrics P/E ratio profit margin debt equity'\n"
+            "3. '{company} revenue growth year over year earnings growth rate'\n"
+            "4. '{company} recent financial news SEC filings analyst reports'\n"
+            "5. '{company} stock performance market cap valuation 2024 2025'\n\n"
+            "**Data Collection Guidelines:**\n"
+            "- Use multiple search queries to cross-verify financial figures\n"
+            "- Prioritize official sources (SEC filings, earnings calls, company reports)\n"
+            "- Compare data across different financial platforms for accuracy\n"
+            "- Present findings in the exact format specified in expected_output."
+        ),
+        expected_output=(
+            "Financial data summary in this structure:\n\n"
+            "## Company Financial Overview\n"
+            "**Data Sources Used:** [List 3-5 sources from multiple searches]\n\n"
+            "**Latest Quarter:** [Period]\n"
+            "- Revenue: $X (YoY: +/-X%) [Source verification]\n"
+            "- Net Income: $X (YoY: +/-X%) [Source verification]\n"
+            "- EPS: $X (YoY: +/-X%) [Source verification]\n\n"
+            "**Key Metrics:**\n"
+            "- P/E Ratio: X [Current vs Historical]\n"
+            "- Profit Margin: X% [Trend indicator]\n"
+            "- Debt-to-Equity: X [Industry comparison]\n\n"
+            "**Growth Analysis:**\n"
+            "- Revenue Growth: X% (3-year trend)\n"
+            "- Earnings Growth: X% (consistency check)\n\n"
+            "**Material Developments:** [1-2 key items with impact assessment]\n"
+            "**Data Confidence:** [High/Medium/Low based on source consistency]"
+        ),
+        agent=data_researcher,
+    )
+
+    analysis_task = Task(
+        description=(
+            "Analyze the collected financial data to assess the company's performance and outlook. "
+            "Include the following in your analysis:\n"
+            "1. Evaluation of financial health based on key metrics\n"
+            "2. Trend analysis showing growth or decline patterns\n"
+            "3. Comparison with industry benchmarks or competitors\n"
+            "4. Identification of strengths and potential areas of concern\n"
+            "5. Short-term financial outlook based on current trends"
+        ),
+        expected_output=(
+            "A detailed financial analysis that includes assessment of key metrics, trends, "
+            "comparative analysis, and a reasoned outlook for the company's financial future."
+        ),
+        agent=financial_analyst,
+        context=[research_task],
+    )
+
+    report_task = Task(
+        description=(
+            "Create a professional financial report based on the research and analysis. "
+            "The report should:\n"
+            "1. Begin with an executive summary highlighting key findings\n"
+            "2. Present the financial analysis in a clear, logical structure\n"
+            "3. Include visual representations of key data points (described textually)\n"
+            "4. Provide actionable insights for potential investors\n"
+            "5. Conclude with a clear investment recommendation (buy, hold, or sell)"
+        ),
+        expected_output=(
+            "A professional, comprehensive financial report with executive summary, "
+            "structured analysis, visual elements, actionable insights, and a clear recommendation."
+        ),
+        agent=report_writer,
+        context=[research_task, analysis_task],
+    )
+
+    crew = Crew(
+        agents=[data_researcher, financial_analyst, report_writer],
+        tasks=[research_task, analysis_task, report_task],
+        process=Process.sequential,
+    )
+
+    return crew
+
+
+def test_financial_analysis_regression(financial_analysis_crew):
+    dataset = [
+        {
+            "inputs": {"company": "Apple Inc. (AAPL)"},
+            "expected_score": {"goal_alignment": 8},
+        },
+        {
+            "identifier": "test_2",
+            "inputs": {"company": "Microsoft Corporation (MSFT)"},
+            "expected_score": 8,
+        },
+    ]
+
+    results = run_experiment(dataset=dataset, crew=financial_analysis_crew, verbose=True)
+
+    assert_experiment_successfully(results)
--- a/tests/regression/test_history_teacher.py
+++ b/tests/regression/test_history_teacher.py
@@ -0,0 +1,33 @@
+import pytest
+from crewai import Agent
+from crewai_tools import SerperDevTool
+
+from crewai.experimental.evaluation.testing import (
+    assert_experiment_successfully,
+    run_experiment,
+)
+
+@pytest.fixture
+def history_teacher():
+    search_tool = SerperDevTool()
+    return Agent(
+        role="History Educator",
+        goal="Teach students about important historical events with clarity and context",
+        backstory=(
+            "As a renowned historian and educator, you have spent decades studying world history, "
+            "from ancient civilizations to modern events. You are passionate about making history "
+            "engaging and understandable for learners of all ages. Your mission is to educate, explain, "
+            "and spark curiosity about the past."
+        ),
+        tools=[search_tool],
+        verbose=True,
+    )
+def test_history_teacher(history_teacher):
+    dataset = [
+        {"inputs": {"messages": "How was the Battle of Waterloo?"}, "expected_score": 8}
+    ]
+    results = run_experiment(
+        dataset=dataset, agents=[history_teacher], verbose=True
+    )
+
+    assert_experiment_successfully(results)