From c96ae80121aea52396bc8a8883111f6165dcc1f2 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 4 Jun 2025 10:15:30 +0000
Subject: [PATCH] Fix CI failures: correct context window ratio and remove
 unused imports
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix test expectations to use 0.85 ratio instead of 0.75 (matches CONTEXT_WINDOW_USAGE_RATIO)
- Remove unused imports (pytest, Mock) from test file
- Add context window size warning for large models (>100K tokens)
- Update documentation with performance considerations and rate limiting best practices
- Address code review feedback from João regarding validation and error handling

Co-Authored-By: João <joao@crewai.com>
---
 docs/aiml_api_integration.md       | 42 ++++++++++++++++++++++++++++++
 src/crewai/llm.py                  |  6 ++++-
 tests/test_aiml_api_integration.py |  5 ++--
 3 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/docs/aiml_api_integration.md b/docs/aiml_api_integration.md
index 6e27a2ef2..f91b71ac2 100644
--- a/docs/aiml_api_integration.md
+++ b/docs/aiml_api_integration.md
@@ -133,6 +133,47 @@ Choose the right model for your use case:
 - **For vision tasks**: Use Llama 3.2 Vision models
 - **For coding**: Consider DeepSeek or specialized coding models
 
+## Performance Considerations
+
+### Context Window Management
+
+AI/ML API models support large context windows, but be mindful of:
+
+- **Memory Usage**: Large context windows (>100K tokens) may require significant memory
+- **Processing Time**: Larger contexts take longer to process
+- **Cost Impact**: Most providers charge based on token usage
+
+### Rate Limiting Best Practices
+
+AI/ML API implements rate limiting to ensure fair usage:
+
+- **Implement Retry Logic**: Use exponential backoff for rate limit errors
+- **Monitor Usage**: Track your API usage through the AI/ML API dashboard
+- **Batch Requests**: Group multiple requests when possible to optimize throughput
+- **Cache Results**: Store frequently used responses to reduce API calls
+
+```python
+import time
+from crewai import LLM
+
+def create_llm_with_retry(model_name, max_retries=3):
+    for attempt in range(max_retries):
+        try:
+            return LLM(model=model_name)
+        except Exception as e:
+            if "rate limit" in str(e).lower() and attempt < max_retries - 1:
+                wait_time = 2 ** attempt  # Exponential backoff
+                time.sleep(wait_time)
+                continue
+            raise e
+```
+
+### Cost Optimization
+
+- **Model Selection**: Choose appropriate model size for your use case
+- **Context Management**: Trim unnecessary context to reduce token usage
+- **Streaming**: Use streaming for real-time applications to improve perceived performance
+
 ## Troubleshooting
 
 ### Common Issues
@@ -141,6 +182,7 @@ Choose the right model for your use case:
 2. **Model Not Found**: Verify the model name uses the correct `openai/` prefix
 3. **Rate Limits**: AI/ML API has rate limits; implement appropriate retry logic
 4. **Context Length**: Monitor context window usage for optimal performance
+5. **Memory Issues**: Large context windows may cause memory problems; monitor usage
 
 ### Getting Help
 
diff --git a/src/crewai/llm.py b/src/crewai/llm.py
index 2d1fc5946..dc4394ae6 100644
--- a/src/crewai/llm.py
+++ b/src/crewai/llm.py
@@ -1119,7 +1119,7 @@ class LLM(BaseLLM):
 
     def get_context_window_size(self) -> int:
         """
-        Returns the context window size, using 75% of the maximum to avoid
+        Returns the context window size, using 85% of the maximum to avoid
         cutting off messages mid-thread.
 
         Raises:
@@ -1130,6 +1130,7 @@ class LLM(BaseLLM):
 
         MIN_CONTEXT = 1024
         MAX_CONTEXT = 2097152  # Current max from gemini-1.5-pro
+        MAX_SAFE_CONTEXT = 100000  # Warn for very large context windows
 
         # Validate all context window sizes
         for key, value in LLM_CONTEXT_WINDOW_SIZES.items():
@@ -1144,6 +1145,9 @@ class LLM(BaseLLM):
         for key, value in LLM_CONTEXT_WINDOW_SIZES.items():
             if self.model.startswith(key):
                 self.context_window_size = int(value * CONTEXT_WINDOW_USAGE_RATIO)
+                if value > MAX_SAFE_CONTEXT:
+                    import warnings
+                    warnings.warn(f"Model {self.model} uses large context window ({value}). Monitor memory usage.")
         return self.context_window_size
 
     def set_callbacks(self, callbacks: List[Any]):
diff --git a/tests/test_aiml_api_integration.py b/tests/test_aiml_api_integration.py
index 220ba109a..9abcfcc81 100644
--- a/tests/test_aiml_api_integration.py
+++ b/tests/test_aiml_api_integration.py
@@ -1,7 +1,6 @@
 """Tests for AI/ML API integration with CrewAI."""
 
-import pytest
-from unittest.mock import Mock, patch
+from unittest.mock import patch
 
 from crewai.llm import LLM
 from crewai.utilities.llm_utils import create_llm
@@ -25,7 +24,7 @@ class TestAIMLAPIIntegration:
         
         for model_name, expected_context_size in test_cases:
             llm = LLM(model=model_name)
-            expected_usable_size = int(expected_context_size * 0.75)
+            expected_usable_size = int(expected_context_size * 0.85)
             actual_context_size = llm.get_context_window_size()
             assert actual_context_size == expected_usable_size, (
                 f"Model {model_name} should have context window size {expected_usable_size}, "