mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-29 18:18:13 +00:00
Address PR feedback: improve comments, maintain alphabetical order, and parameterize tests
Co-Authored-By: Joe Moura <joao@crewai.com>
This commit is contained in:
@@ -87,8 +87,8 @@ LLM_CONTEXT_WINDOW_SIZES = {
|
|||||||
"gpt-4.1-nano-2025-04-14": 1047576,
|
"gpt-4.1-nano-2025-04-14": 1047576,
|
||||||
"o1-preview": 128000,
|
"o1-preview": 128000,
|
||||||
"o1-mini": 128000,
|
"o1-mini": 128000,
|
||||||
"o4-mini": 200000, # Based on o3-mini specifications
|
|
||||||
"o3-mini": 200000, # Based on official o3-mini specifications
|
"o3-mini": 200000, # Based on official o3-mini specifications
|
||||||
|
"o4-mini": 200000, # Token limit based on OpenAI's official specifications
|
||||||
# gemini
|
# gemini
|
||||||
"gemini-2.0-flash": 1048576,
|
"gemini-2.0-flash": 1048576,
|
||||||
"gemini-2.0-flash-thinking-exp-01-21": 32768,
|
"gemini-2.0-flash-thinking-exp-01-21": 32768,
|
||||||
|
|||||||
@@ -37,11 +37,11 @@ OPENAI_BIGGER_MODELS = [
|
|||||||
"gpt-4",
|
"gpt-4",
|
||||||
"gpt-4o",
|
"gpt-4o",
|
||||||
"o1-preview",
|
"o1-preview",
|
||||||
"o1-mini",
|
|
||||||
"o1",
|
"o1",
|
||||||
|
"o1-mini",
|
||||||
"o3",
|
"o3",
|
||||||
"o4-mini",
|
|
||||||
"o3-mini",
|
"o3-mini",
|
||||||
|
"o4-mini", # Maintain alphabetical order for clarity
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -413,7 +413,9 @@ def test_agent_execution_with_specific_tools():
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||||
def test_agent_powered_by_new_o_model_family_that_allows_skipping_tool():
|
@pytest.mark.parametrize("model", ["o3-mini", "o4-mini"])
|
||||||
|
def test_agent_powered_by_o_model_that_allows_skipping_tool(model):
|
||||||
|
"""Test that o-series models can skip using tools when appropriate."""
|
||||||
@tool
|
@tool
|
||||||
def multiplier(first_number: int, second_number: int) -> float:
|
def multiplier(first_number: int, second_number: int) -> float:
|
||||||
"""Useful for when you need to multiply two numbers together."""
|
"""Useful for when you need to multiply two numbers together."""
|
||||||
@@ -423,7 +425,7 @@ def test_agent_powered_by_new_o_model_family_that_allows_skipping_tool():
|
|||||||
role="test role",
|
role="test role",
|
||||||
goal="test goal",
|
goal="test goal",
|
||||||
backstory="test backstory",
|
backstory="test backstory",
|
||||||
llm=LLM(model="o3-mini"),
|
llm=LLM(model=model),
|
||||||
max_iter=3,
|
max_iter=3,
|
||||||
use_system_prompt=False,
|
use_system_prompt=False,
|
||||||
allow_delegation=False,
|
allow_delegation=False,
|
||||||
@@ -438,36 +440,12 @@ def test_agent_powered_by_new_o_model_family_that_allows_skipping_tool():
|
|||||||
assert output == "12"
|
assert output == "12"
|
||||||
|
|
||||||
|
|
||||||
# @pytest.mark.vcr(filter_headers=["authorization"])
|
|
||||||
# def test_agent_powered_by_o4_mini_that_allows_skipping_tool():
|
|
||||||
# @tool
|
|
||||||
# def multiplier(first_number: int, second_number: int) -> float:
|
|
||||||
# """Useful for when you need to multiply two numbers together."""
|
|
||||||
# return first_number * second_number
|
|
||||||
#
|
|
||||||
# agent = Agent(
|
|
||||||
# role="test role",
|
|
||||||
# goal="test goal",
|
|
||||||
# backstory="test backstory",
|
|
||||||
# llm=LLM(model="o4-mini"),
|
|
||||||
# max_iter=3,
|
|
||||||
# use_system_prompt=False,
|
|
||||||
# allow_delegation=False,
|
|
||||||
# )
|
|
||||||
#
|
|
||||||
# task = Task(
|
|
||||||
# description="What is 3 times 4?",
|
|
||||||
# agent=agent,
|
|
||||||
# expected_output="The result of the multiplication.",
|
|
||||||
# )
|
|
||||||
# output = agent.execute_task(task=task, tools=[multiplier])
|
|
||||||
# assert output == "12"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||||
def test_agent_powered_by_new_o_model_family_that_uses_tool():
|
@pytest.mark.parametrize("model", ["o3-mini", "o4-mini"])
|
||||||
|
def test_agent_powered_by_o_model_that_uses_tool(model):
|
||||||
|
"""Test that o-series models can use tools when appropriate."""
|
||||||
@tool
|
@tool
|
||||||
def comapny_customer_data() -> float:
|
def company_customer_data() -> float:
|
||||||
"""Useful for getting customer related data."""
|
"""Useful for getting customer related data."""
|
||||||
return "The company has 42 customers"
|
return "The company has 42 customers"
|
||||||
|
|
||||||
@@ -475,37 +453,19 @@ def test_agent_powered_by_new_o_model_family_that_uses_tool():
|
|||||||
role="test role",
|
role="test role",
|
||||||
goal="test goal",
|
goal="test goal",
|
||||||
backstory="test backstory",
|
backstory="test backstory",
|
||||||
llm="o3-mini",
|
llm=model,
|
||||||
max_iter=3,
|
max_iter=3,
|
||||||
use_system_prompt=False,
|
use_system_prompt=False,
|
||||||
allow_delegation=False,
|
allow_delegation=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
task = Task(
|
||||||
# @pytest.mark.vcr(filter_headers=["authorization"])
|
description="How many customers does the company have?",
|
||||||
# def test_agent_powered_by_o4_mini_that_uses_tool():
|
agent=agent,
|
||||||
# @tool
|
expected_output="The number of customers",
|
||||||
# def company_customer_data() -> float:
|
)
|
||||||
# """Useful for getting customer related data."""
|
output = agent.execute_task(task=task, tools=[company_customer_data])
|
||||||
# return "The company has 42 customers"
|
assert output == "42"
|
||||||
#
|
|
||||||
# agent = Agent(
|
|
||||||
# role="test role",
|
|
||||||
# goal="test goal",
|
|
||||||
# backstory="test backstory",
|
|
||||||
# llm="o4-mini",
|
|
||||||
# max_iter=3,
|
|
||||||
# use_system_prompt=False,
|
|
||||||
# allow_delegation=False,
|
|
||||||
# )
|
|
||||||
#
|
|
||||||
# task = Task(
|
|
||||||
# description="How many customers does the company have?",
|
|
||||||
# agent=agent,
|
|
||||||
# expected_output="The number of customers",
|
|
||||||
# )
|
|
||||||
# output = agent.execute_task(task=task, tools=[company_customer_data])
|
|
||||||
# assert output == "42"
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||||
|
|||||||
@@ -335,16 +335,15 @@ def test_o3_mini_reasoning_effort_medium():
|
|||||||
assert "Paris" in result
|
assert "Paris" in result
|
||||||
|
|
||||||
|
|
||||||
def test_context_window_validation():
|
@pytest.mark.parametrize("model", ["o3-mini", "o4-mini"])
|
||||||
"""Test that context window validation works correctly."""
|
def test_context_window_validation_for_o_models(model):
|
||||||
# Test valid window size for o3-mini
|
"""Test that context window validation works correctly for o-series models."""
|
||||||
llm = LLM(model="o3-mini")
|
# Test valid window size for o-series models
|
||||||
assert llm.get_context_window_size() == int(200000 * CONTEXT_WINDOW_USAGE_RATIO)
|
llm = LLM(model=model)
|
||||||
|
|
||||||
# Test valid window size for o4-mini
|
|
||||||
llm = LLM(model="o4-mini")
|
|
||||||
assert llm.get_context_window_size() == int(200000 * CONTEXT_WINDOW_USAGE_RATIO)
|
assert llm.get_context_window_size() == int(200000 * CONTEXT_WINDOW_USAGE_RATIO)
|
||||||
|
|
||||||
|
def test_context_window_validation():
|
||||||
|
"""Test that context window validation works correctly."""
|
||||||
# Test invalid window size
|
# Test invalid window size
|
||||||
with pytest.raises(ValueError) as excinfo:
|
with pytest.raises(ValueError) as excinfo:
|
||||||
with patch.dict(
|
with patch.dict(
|
||||||
|
|||||||
Reference in New Issue
Block a user