mirror of
https://github.com/crewAIInc/crewAI.git
synced 2025-12-16 04:18:35 +00:00
Compare commits
1 Commits
1.2.0
...
lg-agent-p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cbe570088e |
75
.github/workflows/regression-tests.yml
vendored
Normal file
75
.github/workflows/regression-tests.yml
vendored
Normal file
@@ -0,0 +1,75 @@
|
||||
name: Regression Tests
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
branch:
|
||||
description: 'Branch to run tests on'
|
||||
required: true
|
||||
default: 'main'
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
env:
|
||||
OPENAI_API_KEY: fake-api-key
|
||||
PYTHONUNBUFFERED: 1
|
||||
|
||||
jobs:
|
||||
regression-tests:
|
||||
name: Regression - ${{ github.event.inputs.branch }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
ref: ${{ github.event.inputs.branch }}
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Display execution info
|
||||
run: |
|
||||
echo "🚀 Running Regression Tests"
|
||||
echo "📂 Branch: ${{ github.event.inputs.branch }}"
|
||||
echo "📊 Current commit: $(git rev-parse --short HEAD)"
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@v3
|
||||
with:
|
||||
enable-cache: true
|
||||
cache-dependency-glob: |
|
||||
**/pyproject.toml
|
||||
**/uv.lock
|
||||
|
||||
- name: Set up Python 3.13
|
||||
run: uv python install 3.13
|
||||
|
||||
- name: Install the project
|
||||
run: uv sync --dev --all-extras
|
||||
|
||||
- name: Install SQLite with FTS5 support
|
||||
run: |
|
||||
# WORKAROUND: GitHub Actions' Ubuntu runner uses SQLite without FTS5 support compiled in.
|
||||
# This is a temporary fix until the runner includes SQLite with FTS5 or Python's sqlite3
|
||||
# module is compiled with FTS5 support by default.
|
||||
# TODO: Remove this workaround once GitHub Actions runners include SQLite FTS5 support
|
||||
|
||||
# Install pysqlite3-binary which has FTS5 support
|
||||
uv pip install pysqlite3-binary
|
||||
# Create a sitecustomize.py to override sqlite3 with pysqlite3
|
||||
mkdir -p .pytest_sqlite_override
|
||||
echo "import sys; import pysqlite3; sys.modules['sqlite3'] = pysqlite3" > .pytest_sqlite_override/sitecustomize.py
|
||||
# Test FTS5 availability
|
||||
PYTHONPATH=.pytest_sqlite_override uv run python -c "import sqlite3; print(f'SQLite version: {sqlite3.sqlite_version}')"
|
||||
PYTHONPATH=.pytest_sqlite_override uv run python -c "import sqlite3; conn = sqlite3.connect(':memory:'); conn.execute('CREATE VIRTUAL TABLE test USING fts5(content)'); print('FTS5 module available')"
|
||||
|
||||
- name: Run Regression Tests
|
||||
run: |
|
||||
PYTHONPATH=.pytest_sqlite_override uv run pytest \
|
||||
--block-network \
|
||||
--timeout=30 \
|
||||
-vv \
|
||||
--durations=10 \
|
||||
-n auto \
|
||||
--maxfail=5 \
|
||||
tests/regression
|
||||
@@ -137,3 +137,6 @@ exclude = [
|
||||
"docs/**",
|
||||
"docs/",
|
||||
]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
norecursedirs = ["tests/regression"]
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import inspect
|
||||
from pathlib import Path
|
||||
|
||||
from typing_extensions import Any
|
||||
import warnings
|
||||
@@ -41,12 +42,30 @@ def run_experiment(dataset: list[dict[str, Any]], crew: Crew | None = None, agen
|
||||
return runner.run(agents=agents, crew=crew, print_summary=verbose)
|
||||
|
||||
def _get_baseline_filepath_fallback() -> str:
|
||||
test_func_name = "experiment_fallback"
|
||||
filename = "experiment_fallback.json"
|
||||
calling_file = None
|
||||
|
||||
try:
|
||||
current_frame = inspect.currentframe()
|
||||
if current_frame is not None:
|
||||
test_func_name = current_frame.f_back.f_back.f_code.co_name # type: ignore[union-attr]
|
||||
filename = f"{test_func_name}.json"
|
||||
calling_file = current_frame.f_back.f_back.f_code.co_filename # type: ignore[union-attr]
|
||||
except Exception:
|
||||
...
|
||||
return f"{test_func_name}_results.json"
|
||||
return filename
|
||||
|
||||
if not calling_file:
|
||||
return filename
|
||||
|
||||
calling_path = Path(calling_file)
|
||||
try:
|
||||
baseline_dir_parts = calling_path.parts[:-1]
|
||||
baseline_dir = Path(*baseline_dir_parts) / "results"
|
||||
baseline_dir.mkdir(parents=True, exist_ok=True)
|
||||
baseline_filepath = baseline_dir / filename
|
||||
return str(baseline_filepath)
|
||||
|
||||
except (ValueError, IndexError):
|
||||
pass
|
||||
|
||||
return filename
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
[
|
||||
{
|
||||
"timestamp": "2025-07-15T21:34:08.253410+00:00",
|
||||
"metadata": {},
|
||||
"results": [
|
||||
{
|
||||
"identifier": "72239c22b0cdde98ad5c588074ef6325",
|
||||
"inputs": {
|
||||
"company": "Apple Inc. (AAPL)"
|
||||
},
|
||||
"score": {
|
||||
"goal_alignment": 10.0,
|
||||
"semantic_quality": 9.0,
|
||||
"tool_selection": 6.0,
|
||||
"parameter_extraction": 5.0,
|
||||
"tool_invocation": 10.0,
|
||||
"reasoning_efficiency": 7.300000000000001
|
||||
},
|
||||
"expected_score": {
|
||||
"goal_alignment": 8
|
||||
},
|
||||
"passed": true
|
||||
},
|
||||
{
|
||||
"identifier": "test_2",
|
||||
"inputs": {
|
||||
"company": "Microsoft Corporation (MSFT)"
|
||||
},
|
||||
"score": {
|
||||
"goal_alignment": 10.0,
|
||||
"semantic_quality": 7.333333333333333,
|
||||
"tool_selection": 6.25,
|
||||
"parameter_extraction": 9.5,
|
||||
"tool_invocation": 10.0,
|
||||
"reasoning_efficiency": 6.0
|
||||
},
|
||||
"expected_score": 8,
|
||||
"passed": true
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
24
tests/regression/results/test_history_teacher.json
Normal file
24
tests/regression/results/test_history_teacher.json
Normal file
@@ -0,0 +1,24 @@
|
||||
[
|
||||
{
|
||||
"timestamp": "2025-07-15T21:31:05.916161+00:00",
|
||||
"metadata": {},
|
||||
"results": [
|
||||
{
|
||||
"identifier": "df0ea31ac4a7fb4a908b8319ec7b3719",
|
||||
"inputs": {
|
||||
"messages": "How was the Battle of Waterloo?"
|
||||
},
|
||||
"score": {
|
||||
"goal_alignment": 10.0,
|
||||
"semantic_quality": 10.0,
|
||||
"tool_selection": 10.0,
|
||||
"parameter_extraction": 10.0,
|
||||
"tool_invocation": 10.0,
|
||||
"reasoning_efficiency": 5.5
|
||||
},
|
||||
"expected_score": 8,
|
||||
"passed": true
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
144
tests/regression/test_financial_analysis.py
Normal file
144
tests/regression/test_financial_analysis.py
Normal file
@@ -0,0 +1,144 @@
|
||||
import pytest
|
||||
from crewai import Agent, Crew, Process, Task
|
||||
from crewai_tools import SerperDevTool
|
||||
|
||||
from crewai.experimental.evaluation.testing import (
|
||||
assert_experiment_successfully,
|
||||
run_experiment,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def financial_analysis_crew():
|
||||
search_tool = SerperDevTool()
|
||||
|
||||
data_researcher = Agent(
|
||||
role="Financial Data Researcher",
|
||||
goal="Efficiently collect and structure key financial metrics using multiple search strategies. Using only the search tool.",
|
||||
backstory=(
|
||||
"You are a precision-focused financial analyst who uses multiple targeted searches "
|
||||
"to cross-verify data and ensure comprehensive coverage. You leverage different "
|
||||
"search approaches to gather financial information from various authoritative sources."
|
||||
),
|
||||
tools=[search_tool],
|
||||
)
|
||||
|
||||
financial_analyst = Agent(
|
||||
role="Financial Analyst",
|
||||
goal="Analyze financial data to assess company performance and outlook",
|
||||
backstory=(
|
||||
"You are a seasoned financial analyst with expertise in evaluating company "
|
||||
"performance through quantitative analysis. You can interpret financial statements, "
|
||||
"identify trends, and make reasoned assessments of a company's financial health."
|
||||
),
|
||||
tools=[search_tool],
|
||||
)
|
||||
|
||||
report_writer = Agent(
|
||||
role="Financial Report Writer",
|
||||
goal="Synthesize financial analysis into clear, actionable reports",
|
||||
backstory=(
|
||||
"You are an experienced financial writer who excels at turning complex financial "
|
||||
"analyses into clear, concise, and impactful reports. You know how to highlight "
|
||||
"key insights and present information in a way that's accessible to various audiences."
|
||||
),
|
||||
tools=[],
|
||||
)
|
||||
|
||||
research_task = Task(
|
||||
description=(
|
||||
"Research {company} financial data using multiple targeted search queries:\n\n"
|
||||
"**Search Strategy - Execute these searches sequentially:**\n"
|
||||
"1. '{company} quarterly earnings Q4 2024 Q1 2025 financial results'\n"
|
||||
"2. '{company} financial metrics P/E ratio profit margin debt equity'\n"
|
||||
"3. '{company} revenue growth year over year earnings growth rate'\n"
|
||||
"4. '{company} recent financial news SEC filings analyst reports'\n"
|
||||
"5. '{company} stock performance market cap valuation 2024 2025'\n\n"
|
||||
"**Data Collection Guidelines:**\n"
|
||||
"- Use multiple search queries to cross-verify financial figures\n"
|
||||
"- Prioritize official sources (SEC filings, earnings calls, company reports)\n"
|
||||
"- Compare data across different financial platforms for accuracy\n"
|
||||
"- Present findings in the exact format specified in expected_output."
|
||||
),
|
||||
expected_output=(
|
||||
"Financial data summary in this structure:\n\n"
|
||||
"## Company Financial Overview\n"
|
||||
"**Data Sources Used:** [List 3-5 sources from multiple searches]\n\n"
|
||||
"**Latest Quarter:** [Period]\n"
|
||||
"- Revenue: $X (YoY: +/-X%) [Source verification]\n"
|
||||
"- Net Income: $X (YoY: +/-X%) [Source verification]\n"
|
||||
"- EPS: $X (YoY: +/-X%) [Source verification]\n\n"
|
||||
"**Key Metrics:**\n"
|
||||
"- P/E Ratio: X [Current vs Historical]\n"
|
||||
"- Profit Margin: X% [Trend indicator]\n"
|
||||
"- Debt-to-Equity: X [Industry comparison]\n\n"
|
||||
"**Growth Analysis:**\n"
|
||||
"- Revenue Growth: X% (3-year trend)\n"
|
||||
"- Earnings Growth: X% (consistency check)\n\n"
|
||||
"**Material Developments:** [1-2 key items with impact assessment]\n"
|
||||
"**Data Confidence:** [High/Medium/Low based on source consistency]"
|
||||
),
|
||||
agent=data_researcher,
|
||||
)
|
||||
|
||||
analysis_task = Task(
|
||||
description=(
|
||||
"Analyze the collected financial data to assess the company's performance and outlook. "
|
||||
"Include the following in your analysis:\n"
|
||||
"1. Evaluation of financial health based on key metrics\n"
|
||||
"2. Trend analysis showing growth or decline patterns\n"
|
||||
"3. Comparison with industry benchmarks or competitors\n"
|
||||
"4. Identification of strengths and potential areas of concern\n"
|
||||
"5. Short-term financial outlook based on current trends"
|
||||
),
|
||||
expected_output=(
|
||||
"A detailed financial analysis that includes assessment of key metrics, trends, "
|
||||
"comparative analysis, and a reasoned outlook for the company's financial future."
|
||||
),
|
||||
agent=financial_analyst,
|
||||
context=[research_task],
|
||||
)
|
||||
|
||||
report_task = Task(
|
||||
description=(
|
||||
"Create a professional financial report based on the research and analysis. "
|
||||
"The report should:\n"
|
||||
"1. Begin with an executive summary highlighting key findings\n"
|
||||
"2. Present the financial analysis in a clear, logical structure\n"
|
||||
"3. Include visual representations of key data points (described textually)\n"
|
||||
"4. Provide actionable insights for potential investors\n"
|
||||
"5. Conclude with a clear investment recommendation (buy, hold, or sell)"
|
||||
),
|
||||
expected_output=(
|
||||
"A professional, comprehensive financial report with executive summary, "
|
||||
"structured analysis, visual elements, actionable insights, and a clear recommendation."
|
||||
),
|
||||
agent=report_writer,
|
||||
context=[research_task, analysis_task],
|
||||
)
|
||||
|
||||
crew = Crew(
|
||||
agents=[data_researcher, financial_analyst, report_writer],
|
||||
tasks=[research_task, analysis_task, report_task],
|
||||
process=Process.sequential,
|
||||
)
|
||||
|
||||
return crew
|
||||
|
||||
|
||||
def test_financial_analysis_regression(financial_analysis_crew):
|
||||
dataset = [
|
||||
{
|
||||
"inputs": {"company": "Apple Inc. (AAPL)"},
|
||||
"expected_score": {"goal_alignment": 8},
|
||||
},
|
||||
{
|
||||
"identifier": "test_2",
|
||||
"inputs": {"company": "Microsoft Corporation (MSFT)"},
|
||||
"expected_score": 8,
|
||||
},
|
||||
]
|
||||
|
||||
results = run_experiment(dataset=dataset, crew=financial_analysis_crew, verbose=True)
|
||||
|
||||
assert_experiment_successfully(results)
|
||||
33
tests/regression/test_history_teacher.py
Normal file
33
tests/regression/test_history_teacher.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import pytest
|
||||
from crewai import Agent
|
||||
from crewai_tools import SerperDevTool
|
||||
|
||||
from crewai.experimental.evaluation.testing import (
|
||||
assert_experiment_successfully,
|
||||
run_experiment,
|
||||
)
|
||||
|
||||
@pytest.fixture
|
||||
def history_teacher():
|
||||
search_tool = SerperDevTool()
|
||||
return Agent(
|
||||
role="History Educator",
|
||||
goal="Teach students about important historical events with clarity and context",
|
||||
backstory=(
|
||||
"As a renowned historian and educator, you have spent decades studying world history, "
|
||||
"from ancient civilizations to modern events. You are passionate about making history "
|
||||
"engaging and understandable for learners of all ages. Your mission is to educate, explain, "
|
||||
"and spark curiosity about the past."
|
||||
),
|
||||
tools=[search_tool],
|
||||
verbose=True,
|
||||
)
|
||||
def test_history_teacher(history_teacher):
|
||||
dataset = [
|
||||
{"inputs": {"messages": "How was the Battle of Waterloo?"}, "expected_score": 8}
|
||||
]
|
||||
results = run_experiment(
|
||||
dataset=dataset, agents=[history_teacher], verbose=True
|
||||
)
|
||||
|
||||
assert_experiment_successfully(results)
|
||||
Reference in New Issue
Block a user