Compare commits

...

1 Commits

Author SHA1 Message Date
Alex
3d667a75dd fix: add GPT-5, o3, o4-mini to multimodal vision prefixes
Added verified vision-capable models:
- gpt-5 (all GPT-5 family — confirmed multimodal via openai.com)
- o3, o3-pro (full multimodal — openai.com/index/thinking-with-images)
- o4-mini, o4 (full multimodal)

Added text-only exclusion list to prevent false positives:
- o3-mini (text-only, replaced by o4-mini)
- o1-mini (text-only)
- o1-preview (text-only)

Existing prefixes unchanged (Claude 3+, Gemini, GPT-4).
2026-03-31 07:54:17 -07:00

View File

@@ -2521,18 +2521,37 @@ class LLM(BaseLLM):
True if the model likely supports images.
"""
vision_prefixes = (
# OpenAI — GPT-4 vision models
"gpt-4o",
"gpt-4-turbo",
"gpt-4-vision",
"gpt-4.1",
# OpenAI — GPT-5 family (all support multimodal, see openai.com/index/introducing-gpt-5)
"gpt-5",
# Anthropic — all Claude 3+ models support vision (platform.claude.com/docs/en/build-with-claude/vision)
"claude-3",
"claude-4",
"claude-sonnet-4",
"claude-opus-4",
"claude-haiku-4",
# Google — all Gemini models support multimodal
"gemini",
# OpenAI — o-series reasoning models with vision
# o3 and o4-mini support multimodal (openai.com/index/thinking-with-images)
# o3-mini is text-only — handled via exclusion below
"o3",
"o4-mini",
"o4",
)
# Text-only models that would otherwise match vision prefixes
text_only_models = ("o3-mini", "o1-mini", "o1-preview")
model_lower = self.model.lower()
# Check exclusion first
if any(model_lower.startswith(m) or f"/{m}" in model_lower for m in text_only_models):
return False
return any(
model_lower.startswith(p) or f"/{p}" in model_lower for p in vision_prefixes
)