mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-08 15:48:29 +00:00
Fix CI failures: correct context window ratio and remove unused imports
- Fix test expectations to use 0.85 ratio instead of 0.75 (matches CONTEXT_WINDOW_USAGE_RATIO) - Remove unused imports (pytest, Mock) from test file - Add context window size warning for large models (>100K tokens) - Update documentation with performance considerations and rate limiting best practices - Address code review feedback from João regarding validation and error handling Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -133,6 +133,47 @@ Choose the right model for your use case:
|
||||
- **For vision tasks**: Use Llama 3.2 Vision models
|
||||
- **For coding**: Consider DeepSeek or specialized coding models
|
||||
|
||||
## Performance Considerations
|
||||
|
||||
### Context Window Management
|
||||
|
||||
AI/ML API models support large context windows, but be mindful of:
|
||||
|
||||
- **Memory Usage**: Large context windows (>100K tokens) may require significant memory
|
||||
- **Processing Time**: Larger contexts take longer to process
|
||||
- **Cost Impact**: Most providers charge based on token usage
|
||||
|
||||
### Rate Limiting Best Practices
|
||||
|
||||
AI/ML API implements rate limiting to ensure fair usage:
|
||||
|
||||
- **Implement Retry Logic**: Use exponential backoff for rate limit errors
|
||||
- **Monitor Usage**: Track your API usage through the AI/ML API dashboard
|
||||
- **Batch Requests**: Group multiple requests when possible to optimize throughput
|
||||
- **Cache Results**: Store frequently used responses to reduce API calls
|
||||
|
||||
```python
|
||||
import time
|
||||
from crewai import LLM
|
||||
|
||||
def create_llm_with_retry(model_name, max_retries=3):
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
return LLM(model=model_name)
|
||||
except Exception as e:
|
||||
if "rate limit" in str(e).lower() and attempt < max_retries - 1:
|
||||
wait_time = 2 ** attempt # Exponential backoff
|
||||
time.sleep(wait_time)
|
||||
continue
|
||||
raise e
|
||||
```
|
||||
|
||||
### Cost Optimization
|
||||
|
||||
- **Model Selection**: Choose appropriate model size for your use case
|
||||
- **Context Management**: Trim unnecessary context to reduce token usage
|
||||
- **Streaming**: Use streaming for real-time applications to improve perceived performance
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
@@ -141,6 +182,7 @@ Choose the right model for your use case:
|
||||
2. **Model Not Found**: Verify the model name uses the correct `openai/` prefix
|
||||
3. **Rate Limits**: AI/ML API has rate limits; implement appropriate retry logic
|
||||
4. **Context Length**: Monitor context window usage for optimal performance
|
||||
5. **Memory Issues**: Large context windows may cause memory problems; monitor usage
|
||||
|
||||
### Getting Help
|
||||
|
||||
|
||||
@@ -1119,7 +1119,7 @@ class LLM(BaseLLM):
|
||||
|
||||
def get_context_window_size(self) -> int:
|
||||
"""
|
||||
Returns the context window size, using 75% of the maximum to avoid
|
||||
Returns the context window size, using 85% of the maximum to avoid
|
||||
cutting off messages mid-thread.
|
||||
|
||||
Raises:
|
||||
@@ -1130,6 +1130,7 @@ class LLM(BaseLLM):
|
||||
|
||||
MIN_CONTEXT = 1024
|
||||
MAX_CONTEXT = 2097152 # Current max from gemini-1.5-pro
|
||||
MAX_SAFE_CONTEXT = 100000 # Warn for very large context windows
|
||||
|
||||
# Validate all context window sizes
|
||||
for key, value in LLM_CONTEXT_WINDOW_SIZES.items():
|
||||
@@ -1144,6 +1145,9 @@ class LLM(BaseLLM):
|
||||
for key, value in LLM_CONTEXT_WINDOW_SIZES.items():
|
||||
if self.model.startswith(key):
|
||||
self.context_window_size = int(value * CONTEXT_WINDOW_USAGE_RATIO)
|
||||
if value > MAX_SAFE_CONTEXT:
|
||||
import warnings
|
||||
warnings.warn(f"Model {self.model} uses large context window ({value}). Monitor memory usage.")
|
||||
return self.context_window_size
|
||||
|
||||
def set_callbacks(self, callbacks: List[Any]):
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
"""Tests for AI/ML API integration with CrewAI."""
|
||||
|
||||
import pytest
|
||||
from unittest.mock import Mock, patch
|
||||
from unittest.mock import patch
|
||||
|
||||
from crewai.llm import LLM
|
||||
from crewai.utilities.llm_utils import create_llm
|
||||
@@ -25,7 +24,7 @@ class TestAIMLAPIIntegration:
|
||||
|
||||
for model_name, expected_context_size in test_cases:
|
||||
llm = LLM(model=model_name)
|
||||
expected_usable_size = int(expected_context_size * 0.75)
|
||||
expected_usable_size = int(expected_context_size * 0.85)
|
||||
actual_context_size = llm.get_context_window_size()
|
||||
assert actual_context_size == expected_usable_size, (
|
||||
f"Model {model_name} should have context window size {expected_usable_size}, "
|
||||
|
||||
Reference in New Issue
Block a user