From 3d667a75ddcdef1349b08379b36ad29b787c9055 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 31 Mar 2026 02:07:22 -0700 Subject: [PATCH] fix: add GPT-5, o3, o4-mini to multimodal vision prefixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Added verified vision-capable models: - gpt-5 (all GPT-5 family — confirmed multimodal via openai.com) - o3, o3-pro (full multimodal — openai.com/index/thinking-with-images) - o4-mini, o4 (full multimodal) Added text-only exclusion list to prevent false positives: - o3-mini (text-only, replaced by o4-mini) - o1-mini (text-only) - o1-preview (text-only) Existing prefixes unchanged (Claude 3+, Gemini, GPT-4). --- lib/crewai/src/crewai/llm.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index 352bec16d..1355dbf59 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -2521,18 +2521,37 @@ class LLM(BaseLLM): True if the model likely supports images. """ vision_prefixes = ( + # OpenAI — GPT-4 vision models "gpt-4o", "gpt-4-turbo", "gpt-4-vision", "gpt-4.1", + # OpenAI — GPT-5 family (all support multimodal, see openai.com/index/introducing-gpt-5) + "gpt-5", + # Anthropic — all Claude 3+ models support vision (platform.claude.com/docs/en/build-with-claude/vision) "claude-3", "claude-4", "claude-sonnet-4", "claude-opus-4", "claude-haiku-4", + # Google — all Gemini models support multimodal "gemini", + # OpenAI — o-series reasoning models with vision + # o3 and o4-mini support multimodal (openai.com/index/thinking-with-images) + # o3-mini is text-only — handled via exclusion below + "o3", + "o4-mini", + "o4", ) + # Text-only models that would otherwise match vision prefixes + text_only_models = ("o3-mini", "o1-mini", "o1-preview") + model_lower = self.model.lower() + + # Check exclusion first + if any(model_lower.startswith(m) or f"/{m}" in model_lower for m in text_only_models): + return False + return any( model_lower.startswith(p) or f"/{p}" in model_lower for p in vision_prefixes )