mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-28 17:48:13 +00:00
Compare commits
1 Commits
1.8.1
...
lg-agent-p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cbe570088e |
75
.github/workflows/regression-tests.yml
vendored
Normal file
75
.github/workflows/regression-tests.yml
vendored
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
name: Regression Tests
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
branch:
|
||||||
|
description: 'Branch to run tests on'
|
||||||
|
required: true
|
||||||
|
default: 'main'
|
||||||
|
type: string
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
|
||||||
|
env:
|
||||||
|
OPENAI_API_KEY: fake-api-key
|
||||||
|
PYTHONUNBUFFERED: 1
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
regression-tests:
|
||||||
|
name: Regression - ${{ github.event.inputs.branch }}
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event.inputs.branch }}
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Display execution info
|
||||||
|
run: |
|
||||||
|
echo "🚀 Running Regression Tests"
|
||||||
|
echo "📂 Branch: ${{ github.event.inputs.branch }}"
|
||||||
|
echo "📊 Current commit: $(git rev-parse --short HEAD)"
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@v3
|
||||||
|
with:
|
||||||
|
enable-cache: true
|
||||||
|
cache-dependency-glob: |
|
||||||
|
**/pyproject.toml
|
||||||
|
**/uv.lock
|
||||||
|
|
||||||
|
- name: Set up Python 3.13
|
||||||
|
run: uv python install 3.13
|
||||||
|
|
||||||
|
- name: Install the project
|
||||||
|
run: uv sync --dev --all-extras
|
||||||
|
|
||||||
|
- name: Install SQLite with FTS5 support
|
||||||
|
run: |
|
||||||
|
# WORKAROUND: GitHub Actions' Ubuntu runner uses SQLite without FTS5 support compiled in.
|
||||||
|
# This is a temporary fix until the runner includes SQLite with FTS5 or Python's sqlite3
|
||||||
|
# module is compiled with FTS5 support by default.
|
||||||
|
# TODO: Remove this workaround once GitHub Actions runners include SQLite FTS5 support
|
||||||
|
|
||||||
|
# Install pysqlite3-binary which has FTS5 support
|
||||||
|
uv pip install pysqlite3-binary
|
||||||
|
# Create a sitecustomize.py to override sqlite3 with pysqlite3
|
||||||
|
mkdir -p .pytest_sqlite_override
|
||||||
|
echo "import sys; import pysqlite3; sys.modules['sqlite3'] = pysqlite3" > .pytest_sqlite_override/sitecustomize.py
|
||||||
|
# Test FTS5 availability
|
||||||
|
PYTHONPATH=.pytest_sqlite_override uv run python -c "import sqlite3; print(f'SQLite version: {sqlite3.sqlite_version}')"
|
||||||
|
PYTHONPATH=.pytest_sqlite_override uv run python -c "import sqlite3; conn = sqlite3.connect(':memory:'); conn.execute('CREATE VIRTUAL TABLE test USING fts5(content)'); print('FTS5 module available')"
|
||||||
|
|
||||||
|
- name: Run Regression Tests
|
||||||
|
run: |
|
||||||
|
PYTHONPATH=.pytest_sqlite_override uv run pytest \
|
||||||
|
--block-network \
|
||||||
|
--timeout=30 \
|
||||||
|
-vv \
|
||||||
|
--durations=10 \
|
||||||
|
-n auto \
|
||||||
|
--maxfail=5 \
|
||||||
|
tests/regression
|
||||||
@@ -137,3 +137,6 @@ exclude = [
|
|||||||
"docs/**",
|
"docs/**",
|
||||||
"docs/",
|
"docs/",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
norecursedirs = ["tests/regression"]
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
import inspect
|
import inspect
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from typing_extensions import Any
|
from typing_extensions import Any
|
||||||
import warnings
|
import warnings
|
||||||
@@ -41,12 +42,30 @@ def run_experiment(dataset: list[dict[str, Any]], crew: Crew | None = None, agen
|
|||||||
return runner.run(agents=agents, crew=crew, print_summary=verbose)
|
return runner.run(agents=agents, crew=crew, print_summary=verbose)
|
||||||
|
|
||||||
def _get_baseline_filepath_fallback() -> str:
|
def _get_baseline_filepath_fallback() -> str:
|
||||||
test_func_name = "experiment_fallback"
|
filename = "experiment_fallback.json"
|
||||||
|
calling_file = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
current_frame = inspect.currentframe()
|
current_frame = inspect.currentframe()
|
||||||
if current_frame is not None:
|
if current_frame is not None:
|
||||||
test_func_name = current_frame.f_back.f_back.f_code.co_name # type: ignore[union-attr]
|
test_func_name = current_frame.f_back.f_back.f_code.co_name # type: ignore[union-attr]
|
||||||
|
filename = f"{test_func_name}.json"
|
||||||
|
calling_file = current_frame.f_back.f_back.f_code.co_filename # type: ignore[union-attr]
|
||||||
except Exception:
|
except Exception:
|
||||||
...
|
return filename
|
||||||
return f"{test_func_name}_results.json"
|
|
||||||
|
if not calling_file:
|
||||||
|
return filename
|
||||||
|
|
||||||
|
calling_path = Path(calling_file)
|
||||||
|
try:
|
||||||
|
baseline_dir_parts = calling_path.parts[:-1]
|
||||||
|
baseline_dir = Path(*baseline_dir_parts) / "results"
|
||||||
|
baseline_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
baseline_filepath = baseline_dir / filename
|
||||||
|
return str(baseline_filepath)
|
||||||
|
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
return filename
|
||||||
|
|||||||
@@ -0,0 +1,42 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"timestamp": "2025-07-15T21:34:08.253410+00:00",
|
||||||
|
"metadata": {},
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"identifier": "72239c22b0cdde98ad5c588074ef6325",
|
||||||
|
"inputs": {
|
||||||
|
"company": "Apple Inc. (AAPL)"
|
||||||
|
},
|
||||||
|
"score": {
|
||||||
|
"goal_alignment": 10.0,
|
||||||
|
"semantic_quality": 9.0,
|
||||||
|
"tool_selection": 6.0,
|
||||||
|
"parameter_extraction": 5.0,
|
||||||
|
"tool_invocation": 10.0,
|
||||||
|
"reasoning_efficiency": 7.300000000000001
|
||||||
|
},
|
||||||
|
"expected_score": {
|
||||||
|
"goal_alignment": 8
|
||||||
|
},
|
||||||
|
"passed": true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"identifier": "test_2",
|
||||||
|
"inputs": {
|
||||||
|
"company": "Microsoft Corporation (MSFT)"
|
||||||
|
},
|
||||||
|
"score": {
|
||||||
|
"goal_alignment": 10.0,
|
||||||
|
"semantic_quality": 7.333333333333333,
|
||||||
|
"tool_selection": 6.25,
|
||||||
|
"parameter_extraction": 9.5,
|
||||||
|
"tool_invocation": 10.0,
|
||||||
|
"reasoning_efficiency": 6.0
|
||||||
|
},
|
||||||
|
"expected_score": 8,
|
||||||
|
"passed": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
24
tests/regression/results/test_history_teacher.json
Normal file
24
tests/regression/results/test_history_teacher.json
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"timestamp": "2025-07-15T21:31:05.916161+00:00",
|
||||||
|
"metadata": {},
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"identifier": "df0ea31ac4a7fb4a908b8319ec7b3719",
|
||||||
|
"inputs": {
|
||||||
|
"messages": "How was the Battle of Waterloo?"
|
||||||
|
},
|
||||||
|
"score": {
|
||||||
|
"goal_alignment": 10.0,
|
||||||
|
"semantic_quality": 10.0,
|
||||||
|
"tool_selection": 10.0,
|
||||||
|
"parameter_extraction": 10.0,
|
||||||
|
"tool_invocation": 10.0,
|
||||||
|
"reasoning_efficiency": 5.5
|
||||||
|
},
|
||||||
|
"expected_score": 8,
|
||||||
|
"passed": true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
144
tests/regression/test_financial_analysis.py
Normal file
144
tests/regression/test_financial_analysis.py
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
import pytest
|
||||||
|
from crewai import Agent, Crew, Process, Task
|
||||||
|
from crewai_tools import SerperDevTool
|
||||||
|
|
||||||
|
from crewai.experimental.evaluation.testing import (
|
||||||
|
assert_experiment_successfully,
|
||||||
|
run_experiment,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def financial_analysis_crew():
|
||||||
|
search_tool = SerperDevTool()
|
||||||
|
|
||||||
|
data_researcher = Agent(
|
||||||
|
role="Financial Data Researcher",
|
||||||
|
goal="Efficiently collect and structure key financial metrics using multiple search strategies. Using only the search tool.",
|
||||||
|
backstory=(
|
||||||
|
"You are a precision-focused financial analyst who uses multiple targeted searches "
|
||||||
|
"to cross-verify data and ensure comprehensive coverage. You leverage different "
|
||||||
|
"search approaches to gather financial information from various authoritative sources."
|
||||||
|
),
|
||||||
|
tools=[search_tool],
|
||||||
|
)
|
||||||
|
|
||||||
|
financial_analyst = Agent(
|
||||||
|
role="Financial Analyst",
|
||||||
|
goal="Analyze financial data to assess company performance and outlook",
|
||||||
|
backstory=(
|
||||||
|
"You are a seasoned financial analyst with expertise in evaluating company "
|
||||||
|
"performance through quantitative analysis. You can interpret financial statements, "
|
||||||
|
"identify trends, and make reasoned assessments of a company's financial health."
|
||||||
|
),
|
||||||
|
tools=[search_tool],
|
||||||
|
)
|
||||||
|
|
||||||
|
report_writer = Agent(
|
||||||
|
role="Financial Report Writer",
|
||||||
|
goal="Synthesize financial analysis into clear, actionable reports",
|
||||||
|
backstory=(
|
||||||
|
"You are an experienced financial writer who excels at turning complex financial "
|
||||||
|
"analyses into clear, concise, and impactful reports. You know how to highlight "
|
||||||
|
"key insights and present information in a way that's accessible to various audiences."
|
||||||
|
),
|
||||||
|
tools=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
research_task = Task(
|
||||||
|
description=(
|
||||||
|
"Research {company} financial data using multiple targeted search queries:\n\n"
|
||||||
|
"**Search Strategy - Execute these searches sequentially:**\n"
|
||||||
|
"1. '{company} quarterly earnings Q4 2024 Q1 2025 financial results'\n"
|
||||||
|
"2. '{company} financial metrics P/E ratio profit margin debt equity'\n"
|
||||||
|
"3. '{company} revenue growth year over year earnings growth rate'\n"
|
||||||
|
"4. '{company} recent financial news SEC filings analyst reports'\n"
|
||||||
|
"5. '{company} stock performance market cap valuation 2024 2025'\n\n"
|
||||||
|
"**Data Collection Guidelines:**\n"
|
||||||
|
"- Use multiple search queries to cross-verify financial figures\n"
|
||||||
|
"- Prioritize official sources (SEC filings, earnings calls, company reports)\n"
|
||||||
|
"- Compare data across different financial platforms for accuracy\n"
|
||||||
|
"- Present findings in the exact format specified in expected_output."
|
||||||
|
),
|
||||||
|
expected_output=(
|
||||||
|
"Financial data summary in this structure:\n\n"
|
||||||
|
"## Company Financial Overview\n"
|
||||||
|
"**Data Sources Used:** [List 3-5 sources from multiple searches]\n\n"
|
||||||
|
"**Latest Quarter:** [Period]\n"
|
||||||
|
"- Revenue: $X (YoY: +/-X%) [Source verification]\n"
|
||||||
|
"- Net Income: $X (YoY: +/-X%) [Source verification]\n"
|
||||||
|
"- EPS: $X (YoY: +/-X%) [Source verification]\n\n"
|
||||||
|
"**Key Metrics:**\n"
|
||||||
|
"- P/E Ratio: X [Current vs Historical]\n"
|
||||||
|
"- Profit Margin: X% [Trend indicator]\n"
|
||||||
|
"- Debt-to-Equity: X [Industry comparison]\n\n"
|
||||||
|
"**Growth Analysis:**\n"
|
||||||
|
"- Revenue Growth: X% (3-year trend)\n"
|
||||||
|
"- Earnings Growth: X% (consistency check)\n\n"
|
||||||
|
"**Material Developments:** [1-2 key items with impact assessment]\n"
|
||||||
|
"**Data Confidence:** [High/Medium/Low based on source consistency]"
|
||||||
|
),
|
||||||
|
agent=data_researcher,
|
||||||
|
)
|
||||||
|
|
||||||
|
analysis_task = Task(
|
||||||
|
description=(
|
||||||
|
"Analyze the collected financial data to assess the company's performance and outlook. "
|
||||||
|
"Include the following in your analysis:\n"
|
||||||
|
"1. Evaluation of financial health based on key metrics\n"
|
||||||
|
"2. Trend analysis showing growth or decline patterns\n"
|
||||||
|
"3. Comparison with industry benchmarks or competitors\n"
|
||||||
|
"4. Identification of strengths and potential areas of concern\n"
|
||||||
|
"5. Short-term financial outlook based on current trends"
|
||||||
|
),
|
||||||
|
expected_output=(
|
||||||
|
"A detailed financial analysis that includes assessment of key metrics, trends, "
|
||||||
|
"comparative analysis, and a reasoned outlook for the company's financial future."
|
||||||
|
),
|
||||||
|
agent=financial_analyst,
|
||||||
|
context=[research_task],
|
||||||
|
)
|
||||||
|
|
||||||
|
report_task = Task(
|
||||||
|
description=(
|
||||||
|
"Create a professional financial report based on the research and analysis. "
|
||||||
|
"The report should:\n"
|
||||||
|
"1. Begin with an executive summary highlighting key findings\n"
|
||||||
|
"2. Present the financial analysis in a clear, logical structure\n"
|
||||||
|
"3. Include visual representations of key data points (described textually)\n"
|
||||||
|
"4. Provide actionable insights for potential investors\n"
|
||||||
|
"5. Conclude with a clear investment recommendation (buy, hold, or sell)"
|
||||||
|
),
|
||||||
|
expected_output=(
|
||||||
|
"A professional, comprehensive financial report with executive summary, "
|
||||||
|
"structured analysis, visual elements, actionable insights, and a clear recommendation."
|
||||||
|
),
|
||||||
|
agent=report_writer,
|
||||||
|
context=[research_task, analysis_task],
|
||||||
|
)
|
||||||
|
|
||||||
|
crew = Crew(
|
||||||
|
agents=[data_researcher, financial_analyst, report_writer],
|
||||||
|
tasks=[research_task, analysis_task, report_task],
|
||||||
|
process=Process.sequential,
|
||||||
|
)
|
||||||
|
|
||||||
|
return crew
|
||||||
|
|
||||||
|
|
||||||
|
def test_financial_analysis_regression(financial_analysis_crew):
|
||||||
|
dataset = [
|
||||||
|
{
|
||||||
|
"inputs": {"company": "Apple Inc. (AAPL)"},
|
||||||
|
"expected_score": {"goal_alignment": 8},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"identifier": "test_2",
|
||||||
|
"inputs": {"company": "Microsoft Corporation (MSFT)"},
|
||||||
|
"expected_score": 8,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
results = run_experiment(dataset=dataset, crew=financial_analysis_crew, verbose=True)
|
||||||
|
|
||||||
|
assert_experiment_successfully(results)
|
||||||
33
tests/regression/test_history_teacher.py
Normal file
33
tests/regression/test_history_teacher.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
import pytest
|
||||||
|
from crewai import Agent
|
||||||
|
from crewai_tools import SerperDevTool
|
||||||
|
|
||||||
|
from crewai.experimental.evaluation.testing import (
|
||||||
|
assert_experiment_successfully,
|
||||||
|
run_experiment,
|
||||||
|
)
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def history_teacher():
|
||||||
|
search_tool = SerperDevTool()
|
||||||
|
return Agent(
|
||||||
|
role="History Educator",
|
||||||
|
goal="Teach students about important historical events with clarity and context",
|
||||||
|
backstory=(
|
||||||
|
"As a renowned historian and educator, you have spent decades studying world history, "
|
||||||
|
"from ancient civilizations to modern events. You are passionate about making history "
|
||||||
|
"engaging and understandable for learners of all ages. Your mission is to educate, explain, "
|
||||||
|
"and spark curiosity about the past."
|
||||||
|
),
|
||||||
|
tools=[search_tool],
|
||||||
|
verbose=True,
|
||||||
|
)
|
||||||
|
def test_history_teacher(history_teacher):
|
||||||
|
dataset = [
|
||||||
|
{"inputs": {"messages": "How was the Battle of Waterloo?"}, "expected_score": 8}
|
||||||
|
]
|
||||||
|
results = run_experiment(
|
||||||
|
dataset=dataset, agents=[history_teacher], verbose=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert_experiment_successfully(results)
|
||||||
Reference in New Issue
Block a user