Fix #2541: Add support for multimodal content format in qwen2.5-vl model

Co-Authored-By: Joe Moura <joao@crewai.com>
2026-01-09 08:08:32 +00:00 · 2025-04-09 03:06:24 +00:00
parent b992ee9d6b
commit cdd5ebfb1a
2 changed files with 50 additions and 2 deletions
--- a/src/crewai/llm.py
+++ b/src/crewai/llm.py
@@ -839,9 +839,13 @@ class LLM(BaseLLM):
        # Validate message format first
        for msg in messages:
-            if not isinstance(msg, dict) or "role" not in msg or "content" not in msg:
+            if not isinstance(msg, dict) or "role" not in msg:
                raise TypeError(
-                    "Invalid message format. Each message must be a dict with 'role' and 'content' keys"
+                    "Invalid message format. Each message must be a dict with 'role' key"
                )
            if "content" not in msg and msg["role"] != "system":
                raise TypeError(
                    "Invalid message format. Each non-system message must have a 'content' key"
                )
        # Handle O1 models specially
@@ -868,6 +872,19 @@ class LLM(BaseLLM):
                messages.append({"role": "user", "content": "Please continue."})
            return messages
        if "qwen" in self.model.lower():
            formatted_messages = []
            for msg in messages:
                if not isinstance(msg.get("content"), str):
                    formatted_messages.append(msg)
                    continue
                formatted_messages.append({
                    "role": msg["role"],
                    "content": [{"type": "text", "text": msg["content"]}]
                })
            return formatted_messages
        # Handle Anthropic models
        if not self.is_anthropic:
            return messages
--- a/tests/test_qwen_multimodal.py
+++ b/tests/test_qwen_multimodal.py
@@ -0,0 +1,31 @@
 import pytest
 from crewai.llm import LLM
@pytest.mark.vcr(filter_headers=["authorization"])
 def test_qwen_multimodal_content_formatting():
    """Test that multimodal content is properly formatted for Qwen models."""
    llm = LLM(model="sambanova/Qwen2.5-72B-Instruct", temperature=0.7)
    message = {"role": "user", "content": "Describe this image"}
    formatted = llm._format_messages_for_provider([message])
    assert isinstance(formatted[0]["content"], list)
    assert formatted[0]["content"][0]["type"] == "text"
    assert formatted[0]["content"][0]["text"] == "Describe this image"
    multimodal_content = [
        {"type": "text", "text": "What's in this image?"}, 
        {"type": "image_url", "image_url": "https://example.com/image.jpg"}
    ]
    message = {"role": "user", "content": multimodal_content}
    formatted = llm._format_messages_for_provider([message])
    assert formatted[0]["content"] == multimodal_content
    messages = [
        {"role": "system", "content": "You are a visual analysis assistant."},
        {"role": "user", "content": multimodal_content}
    ]
    formatted = llm._format_messages_for_provider(messages)
    assert isinstance(formatted[0]["content"], list)
    assert formatted[1]["content"] == multimodal_content