From 55a19a5c3b60b6a50f80c719b63579efa391afee Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 5 May 2025 05:23:30 +0000
Subject: [PATCH] Address PR feedback: improve comments, maintain alphabetical
 order, and parameterize tests

Co-Authored-By: Joe Moura <joao@crewai.com>
---
 src/crewai/llm.py              |  2 +-
 src/crewai/tools/tool_usage.py |  4 +-
 tests/agent_test.py            | 72 ++++++++--------------------------
 tests/llm_test.py              | 15 ++++---
 4 files changed, 26 insertions(+), 67 deletions(-)

diff --git a/src/crewai/llm.py b/src/crewai/llm.py
index bddf35196..b8b398173 100644
--- a/src/crewai/llm.py
+++ b/src/crewai/llm.py
@@ -87,8 +87,8 @@ LLM_CONTEXT_WINDOW_SIZES = {
     "gpt-4.1-nano-2025-04-14": 1047576,
     "o1-preview": 128000,
     "o1-mini": 128000,
-    "o4-mini": 200000,  # Based on o3-mini specifications
     "o3-mini": 200000,  # Based on official o3-mini specifications
+    "o4-mini": 200000,  # Token limit based on OpenAI's official specifications
     # gemini
     "gemini-2.0-flash": 1048576,
     "gemini-2.0-flash-thinking-exp-01-21": 32768,
diff --git a/src/crewai/tools/tool_usage.py b/src/crewai/tools/tool_usage.py
index 27b37f057..45480b96e 100644
--- a/src/crewai/tools/tool_usage.py
+++ b/src/crewai/tools/tool_usage.py
@@ -37,11 +37,11 @@ OPENAI_BIGGER_MODELS = [
     "gpt-4",
     "gpt-4o",
     "o1-preview",
-    "o1-mini",
     "o1",
+    "o1-mini",
     "o3",
-    "o4-mini",
     "o3-mini",
+    "o4-mini",  # Maintain alphabetical order for clarity
 ]
 
 
diff --git a/tests/agent_test.py b/tests/agent_test.py
index e65132ca1..3b37c30b2 100644
--- a/tests/agent_test.py
+++ b/tests/agent_test.py
@@ -413,7 +413,9 @@ def test_agent_execution_with_specific_tools():
 
 
 @pytest.mark.vcr(filter_headers=["authorization"])
-def test_agent_powered_by_new_o_model_family_that_allows_skipping_tool():
+@pytest.mark.parametrize("model", ["o3-mini", "o4-mini"])
+def test_agent_powered_by_o_model_that_allows_skipping_tool(model):
+    """Test that o-series models can skip using tools when appropriate."""
     @tool
     def multiplier(first_number: int, second_number: int) -> float:
         """Useful for when you need to multiply two numbers together."""
@@ -423,7 +425,7 @@ def test_agent_powered_by_new_o_model_family_that_allows_skipping_tool():
         role="test role",
         goal="test goal",
         backstory="test backstory",
-        llm=LLM(model="o3-mini"),
+        llm=LLM(model=model),
         max_iter=3,
         use_system_prompt=False,
         allow_delegation=False,
@@ -438,36 +440,12 @@ def test_agent_powered_by_new_o_model_family_that_allows_skipping_tool():
     assert output == "12"
 
 
-# @pytest.mark.vcr(filter_headers=["authorization"])
-# def test_agent_powered_by_o4_mini_that_allows_skipping_tool():
-#     @tool
-#     def multiplier(first_number: int, second_number: int) -> float:
-#         """Useful for when you need to multiply two numbers together."""
-#         return first_number * second_number
-#
-#     agent = Agent(
-#         role="test role",
-#         goal="test goal",
-#         backstory="test backstory",
-#         llm=LLM(model="o4-mini"),
-#         max_iter=3,
-#         use_system_prompt=False,
-#         allow_delegation=False,
-#     )
-#
-#     task = Task(
-#         description="What is 3 times 4?",
-#         agent=agent,
-#         expected_output="The result of the multiplication.",
-#     )
-#     output = agent.execute_task(task=task, tools=[multiplier])
-#     assert output == "12"
-
-
 @pytest.mark.vcr(filter_headers=["authorization"])
-def test_agent_powered_by_new_o_model_family_that_uses_tool():
+@pytest.mark.parametrize("model", ["o3-mini", "o4-mini"])
+def test_agent_powered_by_o_model_that_uses_tool(model):
+    """Test that o-series models can use tools when appropriate."""
     @tool
-    def comapny_customer_data() -> float:
+    def company_customer_data() -> float:
         """Useful for getting customer related data."""
         return "The company has 42 customers"
 
@@ -475,37 +453,19 @@ def test_agent_powered_by_new_o_model_family_that_uses_tool():
         role="test role",
         goal="test goal",
         backstory="test backstory",
-        llm="o3-mini",
+        llm=model,
         max_iter=3,
         use_system_prompt=False,
         allow_delegation=False,
     )
     
-    
-# @pytest.mark.vcr(filter_headers=["authorization"])
-# def test_agent_powered_by_o4_mini_that_uses_tool():
-#     @tool
-#     def company_customer_data() -> float:
-#         """Useful for getting customer related data."""
-#         return "The company has 42 customers"
-#
-#     agent = Agent(
-#         role="test role",
-#         goal="test goal",
-#         backstory="test backstory",
-#         llm="o4-mini",
-#         max_iter=3,
-#         use_system_prompt=False,
-#         allow_delegation=False,
-#     )
-#     
-#     task = Task(
-#         description="How many customers does the company have?",
-#         agent=agent,
-#         expected_output="The number of customers",
-#     )
-#     output = agent.execute_task(task=task, tools=[company_customer_data])
-#     assert output == "42"
+    task = Task(
+        description="How many customers does the company have?",
+        agent=agent,
+        expected_output="The number of customers",
+    )
+    output = agent.execute_task(task=task, tools=[company_customer_data])
+    assert output == "42"
 
 
 @pytest.mark.vcr(filter_headers=["authorization"])
diff --git a/tests/llm_test.py b/tests/llm_test.py
index 1c05073ae..728d099d6 100644
--- a/tests/llm_test.py
+++ b/tests/llm_test.py
@@ -335,16 +335,15 @@ def test_o3_mini_reasoning_effort_medium():
     assert "Paris" in result
 
 
-def test_context_window_validation():
-    """Test that context window validation works correctly."""
-    # Test valid window size for o3-mini
-    llm = LLM(model="o3-mini")
-    assert llm.get_context_window_size() == int(200000 * CONTEXT_WINDOW_USAGE_RATIO)
-    
-    # Test valid window size for o4-mini
-    llm = LLM(model="o4-mini")
+@pytest.mark.parametrize("model", ["o3-mini", "o4-mini"])
+def test_context_window_validation_for_o_models(model):
+    """Test that context window validation works correctly for o-series models."""
+    # Test valid window size for o-series models
+    llm = LLM(model=model)
     assert llm.get_context_window_size() == int(200000 * CONTEXT_WINDOW_USAGE_RATIO)
 
+def test_context_window_validation():
+    """Test that context window validation works correctly."""
     # Test invalid window size
     with pytest.raises(ValueError) as excinfo:
         with patch.dict(