From c96ae80121aea52396bc8a8883111f6165dcc1f2 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Wed, 4 Jun 2025 10:15:30 +0000 Subject: [PATCH] Fix CI failures: correct context window ratio and remove unused imports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix test expectations to use 0.85 ratio instead of 0.75 (matches CONTEXT_WINDOW_USAGE_RATIO) - Remove unused imports (pytest, Mock) from test file - Add context window size warning for large models (>100K tokens) - Update documentation with performance considerations and rate limiting best practices - Address code review feedback from João regarding validation and error handling Co-Authored-By: João --- docs/aiml_api_integration.md | 42 ++++++++++++++++++++++++++++++ src/crewai/llm.py | 6 ++++- tests/test_aiml_api_integration.py | 5 ++-- 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/docs/aiml_api_integration.md b/docs/aiml_api_integration.md index 6e27a2ef2..f91b71ac2 100644 --- a/docs/aiml_api_integration.md +++ b/docs/aiml_api_integration.md @@ -133,6 +133,47 @@ Choose the right model for your use case: - **For vision tasks**: Use Llama 3.2 Vision models - **For coding**: Consider DeepSeek or specialized coding models +## Performance Considerations + +### Context Window Management + +AI/ML API models support large context windows, but be mindful of: + +- **Memory Usage**: Large context windows (>100K tokens) may require significant memory +- **Processing Time**: Larger contexts take longer to process +- **Cost Impact**: Most providers charge based on token usage + +### Rate Limiting Best Practices + +AI/ML API implements rate limiting to ensure fair usage: + +- **Implement Retry Logic**: Use exponential backoff for rate limit errors +- **Monitor Usage**: Track your API usage through the AI/ML API dashboard +- **Batch Requests**: Group multiple requests when possible to optimize throughput +- **Cache Results**: Store frequently used responses to reduce API calls + +```python +import time +from crewai import LLM + +def create_llm_with_retry(model_name, max_retries=3): + for attempt in range(max_retries): + try: + return LLM(model=model_name) + except Exception as e: + if "rate limit" in str(e).lower() and attempt < max_retries - 1: + wait_time = 2 ** attempt # Exponential backoff + time.sleep(wait_time) + continue + raise e +``` + +### Cost Optimization + +- **Model Selection**: Choose appropriate model size for your use case +- **Context Management**: Trim unnecessary context to reduce token usage +- **Streaming**: Use streaming for real-time applications to improve perceived performance + ## Troubleshooting ### Common Issues @@ -141,6 +182,7 @@ Choose the right model for your use case: 2. **Model Not Found**: Verify the model name uses the correct `openai/` prefix 3. **Rate Limits**: AI/ML API has rate limits; implement appropriate retry logic 4. **Context Length**: Monitor context window usage for optimal performance +5. **Memory Issues**: Large context windows may cause memory problems; monitor usage ### Getting Help diff --git a/src/crewai/llm.py b/src/crewai/llm.py index 2d1fc5946..dc4394ae6 100644 --- a/src/crewai/llm.py +++ b/src/crewai/llm.py @@ -1119,7 +1119,7 @@ class LLM(BaseLLM): def get_context_window_size(self) -> int: """ - Returns the context window size, using 75% of the maximum to avoid + Returns the context window size, using 85% of the maximum to avoid cutting off messages mid-thread. Raises: @@ -1130,6 +1130,7 @@ class LLM(BaseLLM): MIN_CONTEXT = 1024 MAX_CONTEXT = 2097152 # Current max from gemini-1.5-pro + MAX_SAFE_CONTEXT = 100000 # Warn for very large context windows # Validate all context window sizes for key, value in LLM_CONTEXT_WINDOW_SIZES.items(): @@ -1144,6 +1145,9 @@ class LLM(BaseLLM): for key, value in LLM_CONTEXT_WINDOW_SIZES.items(): if self.model.startswith(key): self.context_window_size = int(value * CONTEXT_WINDOW_USAGE_RATIO) + if value > MAX_SAFE_CONTEXT: + import warnings + warnings.warn(f"Model {self.model} uses large context window ({value}). Monitor memory usage.") return self.context_window_size def set_callbacks(self, callbacks: List[Any]): diff --git a/tests/test_aiml_api_integration.py b/tests/test_aiml_api_integration.py index 220ba109a..9abcfcc81 100644 --- a/tests/test_aiml_api_integration.py +++ b/tests/test_aiml_api_integration.py @@ -1,7 +1,6 @@ """Tests for AI/ML API integration with CrewAI.""" -import pytest -from unittest.mock import Mock, patch +from unittest.mock import patch from crewai.llm import LLM from crewai.utilities.llm_utils import create_llm @@ -25,7 +24,7 @@ class TestAIMLAPIIntegration: for model_name, expected_context_size in test_cases: llm = LLM(model=model_name) - expected_usable_size = int(expected_context_size * 0.75) + expected_usable_size = int(expected_context_size * 0.85) actual_context_size = llm.get_context_window_size() assert actual_context_size == expected_usable_size, ( f"Model {model_name} should have context window size {expected_usable_size}, "