From c14abf1758dd3aafc57ad7f17569174c7cc1ea68 Mon Sep 17 00:00:00 2001 From: alex-clawd Date: Wed, 1 Apr 2026 14:08:37 -0700 Subject: [PATCH] fix: add GPT-5 and o-series to multimodal vision prefixes (#5183) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: add GPT-5, o3, o4-mini to multimodal vision prefixes Added verified vision-capable models: - gpt-5 (all GPT-5 family — confirmed multimodal via openai.com) - o3, o3-pro (full multimodal — openai.com/index/thinking-with-images) - o4-mini, o4 (full multimodal) Added text-only exclusion list to prevent false positives: - o3-mini (text-only, replaced by o4-mini) - o1-mini (text-only) - o1-preview (text-only) Existing prefixes unchanged (Claude 3+, Gemini, GPT-4). * fix: add o1 to vision prefixes + ruff format Co-Authored-By: Claude Opus 4.5 * fix: guard _sync_executor access in test utils for lazy-init event bus * fix: expand vision model coverage — Claude 5, Grok, Pixtral, Qwen VL, LLaVA Co-Authored-By: Claude Opus 4.5 * ci: retrigger — flaky test_hierarchical_verbose_false_manager_agent (ConnectionError) * fix: remove hallucinated claude-5 models from vision prefixes — verified against official docs Co-Authored-By: Claude Opus 4.5 --------- Co-authored-by: Claude Opus 4.5 Co-authored-by: João Moura --- lib/crewai/src/crewai/llm.py | 33 +++++++++++++++++++++++++++++++++ lib/crewai/tests/utils.py | 12 +++++++----- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/lib/crewai/src/crewai/llm.py b/lib/crewai/src/crewai/llm.py index 873c1b7dd..c294d6a84 100644 --- a/lib/crewai/src/crewai/llm.py +++ b/lib/crewai/src/crewai/llm.py @@ -2513,18 +2513,51 @@ class LLM(BaseLLM): True if the model likely supports images. """ vision_prefixes = ( + # OpenAI — GPT-4 vision models "gpt-4o", "gpt-4-turbo", "gpt-4-vision", "gpt-4.1", + # OpenAI — GPT-5 family (all variants support multimodal) + "gpt-5", + # OpenAI — o-series reasoning models with vision + # o1, o3, o4, o4-mini support multimodal + # o1-mini, o1-preview, o3-mini are text-only — handled via exclusion below + "o1", + "o3", + "o4-mini", + "o4", + # Anthropic — Claude 3+ models support vision "claude-3", "claude-4", "claude-sonnet-4", "claude-opus-4", "claude-haiku-4", + # Google — all Gemini models support multimodal "gemini", + # xAI — Grok models support vision + "grok", + # Mistral — Pixtral vision model + "pixtral", + # Open-source vision models + "llava", + # Alibaba — Qwen vision-language models + "qwen-vl", + "qwen2-vl", + "qwen3-vl", ) + # Text-only models that would otherwise match vision prefixes + text_only_models = ("o3-mini", "o1-mini", "o1-preview") + model_lower = self.model.lower() + + # Check exclusion first + if any( + model_lower.startswith(m) or f"/{m}" in model_lower + for m in text_only_models + ): + return False + return any( model_lower.startswith(p) or f"/{p}" in model_lower for p in vision_prefixes ) diff --git a/lib/crewai/tests/utils.py b/lib/crewai/tests/utils.py index a514634ae..68e01031a 100644 --- a/lib/crewai/tests/utils.py +++ b/lib/crewai/tests/utils.py @@ -32,8 +32,10 @@ def wait_for_event_handlers(timeout: float = 5.0) -> None: except Exception: # noqa: S110 pass - crewai_event_bus._sync_executor.shutdown(wait=True) - crewai_event_bus._sync_executor = ThreadPoolExecutor( - max_workers=10, - thread_name_prefix="CrewAISyncHandler", - ) + # Guard against lazy-initialized executor (may not exist if no events were emitted) + if getattr(crewai_event_bus, "_executor_initialized", False): + crewai_event_bus._sync_executor.shutdown(wait=True) + crewai_event_bus._sync_executor = ThreadPoolExecutor( + max_workers=10, + thread_name_prefix="CrewAISyncHandler", + )