From 55a19a5c3b60b6a50f80c719b63579efa391afee Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 5 May 2025 05:23:30 +0000 Subject: [PATCH] Address PR feedback: improve comments, maintain alphabetical order, and parameterize tests Co-Authored-By: Joe Moura --- src/crewai/llm.py | 2 +- src/crewai/tools/tool_usage.py | 4 +- tests/agent_test.py | 72 ++++++++-------------------------- tests/llm_test.py | 15 ++++--- 4 files changed, 26 insertions(+), 67 deletions(-) diff --git a/src/crewai/llm.py b/src/crewai/llm.py index bddf35196..b8b398173 100644 --- a/src/crewai/llm.py +++ b/src/crewai/llm.py @@ -87,8 +87,8 @@ LLM_CONTEXT_WINDOW_SIZES = { "gpt-4.1-nano-2025-04-14": 1047576, "o1-preview": 128000, "o1-mini": 128000, - "o4-mini": 200000, # Based on o3-mini specifications "o3-mini": 200000, # Based on official o3-mini specifications + "o4-mini": 200000, # Token limit based on OpenAI's official specifications # gemini "gemini-2.0-flash": 1048576, "gemini-2.0-flash-thinking-exp-01-21": 32768, diff --git a/src/crewai/tools/tool_usage.py b/src/crewai/tools/tool_usage.py index 27b37f057..45480b96e 100644 --- a/src/crewai/tools/tool_usage.py +++ b/src/crewai/tools/tool_usage.py @@ -37,11 +37,11 @@ OPENAI_BIGGER_MODELS = [ "gpt-4", "gpt-4o", "o1-preview", - "o1-mini", "o1", + "o1-mini", "o3", - "o4-mini", "o3-mini", + "o4-mini", # Maintain alphabetical order for clarity ] diff --git a/tests/agent_test.py b/tests/agent_test.py index e65132ca1..3b37c30b2 100644 --- a/tests/agent_test.py +++ b/tests/agent_test.py @@ -413,7 +413,9 @@ def test_agent_execution_with_specific_tools(): @pytest.mark.vcr(filter_headers=["authorization"]) -def test_agent_powered_by_new_o_model_family_that_allows_skipping_tool(): +@pytest.mark.parametrize("model", ["o3-mini", "o4-mini"]) +def test_agent_powered_by_o_model_that_allows_skipping_tool(model): + """Test that o-series models can skip using tools when appropriate.""" @tool def multiplier(first_number: int, second_number: int) -> float: """Useful for when you need to multiply two numbers together.""" @@ -423,7 +425,7 @@ def test_agent_powered_by_new_o_model_family_that_allows_skipping_tool(): role="test role", goal="test goal", backstory="test backstory", - llm=LLM(model="o3-mini"), + llm=LLM(model=model), max_iter=3, use_system_prompt=False, allow_delegation=False, @@ -438,36 +440,12 @@ def test_agent_powered_by_new_o_model_family_that_allows_skipping_tool(): assert output == "12" -# @pytest.mark.vcr(filter_headers=["authorization"]) -# def test_agent_powered_by_o4_mini_that_allows_skipping_tool(): -# @tool -# def multiplier(first_number: int, second_number: int) -> float: -# """Useful for when you need to multiply two numbers together.""" -# return first_number * second_number -# -# agent = Agent( -# role="test role", -# goal="test goal", -# backstory="test backstory", -# llm=LLM(model="o4-mini"), -# max_iter=3, -# use_system_prompt=False, -# allow_delegation=False, -# ) -# -# task = Task( -# description="What is 3 times 4?", -# agent=agent, -# expected_output="The result of the multiplication.", -# ) -# output = agent.execute_task(task=task, tools=[multiplier]) -# assert output == "12" - - @pytest.mark.vcr(filter_headers=["authorization"]) -def test_agent_powered_by_new_o_model_family_that_uses_tool(): +@pytest.mark.parametrize("model", ["o3-mini", "o4-mini"]) +def test_agent_powered_by_o_model_that_uses_tool(model): + """Test that o-series models can use tools when appropriate.""" @tool - def comapny_customer_data() -> float: + def company_customer_data() -> float: """Useful for getting customer related data.""" return "The company has 42 customers" @@ -475,37 +453,19 @@ def test_agent_powered_by_new_o_model_family_that_uses_tool(): role="test role", goal="test goal", backstory="test backstory", - llm="o3-mini", + llm=model, max_iter=3, use_system_prompt=False, allow_delegation=False, ) - -# @pytest.mark.vcr(filter_headers=["authorization"]) -# def test_agent_powered_by_o4_mini_that_uses_tool(): -# @tool -# def company_customer_data() -> float: -# """Useful for getting customer related data.""" -# return "The company has 42 customers" -# -# agent = Agent( -# role="test role", -# goal="test goal", -# backstory="test backstory", -# llm="o4-mini", -# max_iter=3, -# use_system_prompt=False, -# allow_delegation=False, -# ) -# -# task = Task( -# description="How many customers does the company have?", -# agent=agent, -# expected_output="The number of customers", -# ) -# output = agent.execute_task(task=task, tools=[company_customer_data]) -# assert output == "42" + task = Task( + description="How many customers does the company have?", + agent=agent, + expected_output="The number of customers", + ) + output = agent.execute_task(task=task, tools=[company_customer_data]) + assert output == "42" @pytest.mark.vcr(filter_headers=["authorization"]) diff --git a/tests/llm_test.py b/tests/llm_test.py index 1c05073ae..728d099d6 100644 --- a/tests/llm_test.py +++ b/tests/llm_test.py @@ -335,16 +335,15 @@ def test_o3_mini_reasoning_effort_medium(): assert "Paris" in result -def test_context_window_validation(): - """Test that context window validation works correctly.""" - # Test valid window size for o3-mini - llm = LLM(model="o3-mini") - assert llm.get_context_window_size() == int(200000 * CONTEXT_WINDOW_USAGE_RATIO) - - # Test valid window size for o4-mini - llm = LLM(model="o4-mini") +@pytest.mark.parametrize("model", ["o3-mini", "o4-mini"]) +def test_context_window_validation_for_o_models(model): + """Test that context window validation works correctly for o-series models.""" + # Test valid window size for o-series models + llm = LLM(model=model) assert llm.get_context_window_size() == int(200000 * CONTEXT_WINDOW_USAGE_RATIO) +def test_context_window_validation(): + """Test that context window validation works correctly.""" # Test invalid window size with pytest.raises(ValueError) as excinfo: with patch.dict(