Compare commits

...

2 Commits

Author SHA1 Message Date
Devin AI
55a19a5c3b Address PR feedback: improve comments, maintain alphabetical order, and parameterize tests
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-05-05 05:23:30 +00:00
Devin AI
018fbd044d Add support for OpenAI's o4-mini model
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-05-05 05:18:40 +00:00
4 changed files with 21 additions and 12 deletions

View File

@@ -88,6 +88,7 @@ LLM_CONTEXT_WINDOW_SIZES = {
"o1-preview": 128000, "o1-preview": 128000,
"o1-mini": 128000, "o1-mini": 128000,
"o3-mini": 200000, # Based on official o3-mini specifications "o3-mini": 200000, # Based on official o3-mini specifications
"o4-mini": 200000, # Token limit based on OpenAI's official specifications
# gemini # gemini
"gemini-2.0-flash": 1048576, "gemini-2.0-flash": 1048576,
"gemini-2.0-flash-thinking-exp-01-21": 32768, "gemini-2.0-flash-thinking-exp-01-21": 32768,

View File

@@ -37,10 +37,11 @@ OPENAI_BIGGER_MODELS = [
"gpt-4", "gpt-4",
"gpt-4o", "gpt-4o",
"o1-preview", "o1-preview",
"o1-mini",
"o1", "o1",
"o1-mini",
"o3", "o3",
"o3-mini", "o3-mini",
"o4-mini", # Maintain alphabetical order for clarity
] ]

View File

@@ -413,7 +413,9 @@ def test_agent_execution_with_specific_tools():
@pytest.mark.vcr(filter_headers=["authorization"]) @pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_powered_by_new_o_model_family_that_allows_skipping_tool(): @pytest.mark.parametrize("model", ["o3-mini", "o4-mini"])
def test_agent_powered_by_o_model_that_allows_skipping_tool(model):
"""Test that o-series models can skip using tools when appropriate."""
@tool @tool
def multiplier(first_number: int, second_number: int) -> float: def multiplier(first_number: int, second_number: int) -> float:
"""Useful for when you need to multiply two numbers together.""" """Useful for when you need to multiply two numbers together."""
@@ -423,7 +425,7 @@ def test_agent_powered_by_new_o_model_family_that_allows_skipping_tool():
role="test role", role="test role",
goal="test goal", goal="test goal",
backstory="test backstory", backstory="test backstory",
llm=LLM(model="o3-mini"), llm=LLM(model=model),
max_iter=3, max_iter=3,
use_system_prompt=False, use_system_prompt=False,
allow_delegation=False, allow_delegation=False,
@@ -439,9 +441,11 @@ def test_agent_powered_by_new_o_model_family_that_allows_skipping_tool():
@pytest.mark.vcr(filter_headers=["authorization"]) @pytest.mark.vcr(filter_headers=["authorization"])
def test_agent_powered_by_new_o_model_family_that_uses_tool(): @pytest.mark.parametrize("model", ["o3-mini", "o4-mini"])
def test_agent_powered_by_o_model_that_uses_tool(model):
"""Test that o-series models can use tools when appropriate."""
@tool @tool
def comapny_customer_data() -> float: def company_customer_data() -> float:
"""Useful for getting customer related data.""" """Useful for getting customer related data."""
return "The company has 42 customers" return "The company has 42 customers"
@@ -449,7 +453,7 @@ def test_agent_powered_by_new_o_model_family_that_uses_tool():
role="test role", role="test role",
goal="test goal", goal="test goal",
backstory="test backstory", backstory="test backstory",
llm="o3-mini", llm=model,
max_iter=3, max_iter=3,
use_system_prompt=False, use_system_prompt=False,
allow_delegation=False, allow_delegation=False,
@@ -460,7 +464,7 @@ def test_agent_powered_by_new_o_model_family_that_uses_tool():
agent=agent, agent=agent,
expected_output="The number of customers", expected_output="The number of customers",
) )
output = agent.execute_task(task=task, tools=[comapny_customer_data]) output = agent.execute_task(task=task, tools=[company_customer_data])
assert output == "42" assert output == "42"

View File

@@ -335,12 +335,15 @@ def test_o3_mini_reasoning_effort_medium():
assert "Paris" in result assert "Paris" in result
def test_context_window_validation(): @pytest.mark.parametrize("model", ["o3-mini", "o4-mini"])
"""Test that context window validation works correctly.""" def test_context_window_validation_for_o_models(model):
# Test valid window size """Test that context window validation works correctly for o-series models."""
llm = LLM(model="o3-mini") # Test valid window size for o-series models
llm = LLM(model=model)
assert llm.get_context_window_size() == int(200000 * CONTEXT_WINDOW_USAGE_RATIO) assert llm.get_context_window_size() == int(200000 * CONTEXT_WINDOW_USAGE_RATIO)
def test_context_window_validation():
"""Test that context window validation works correctly."""
# Test invalid window size # Test invalid window size
with pytest.raises(ValueError) as excinfo: with pytest.raises(ValueError) as excinfo:
with patch.dict( with patch.dict(