mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-08 15:48:29 +00:00
Fix #2541: Add support for multimodal content format in qwen2.5-vl model
Co-Authored-By: Joe Moura <joao@crewai.com>
This commit is contained in:
@@ -839,9 +839,13 @@ class LLM(BaseLLM):
|
||||
|
||||
# Validate message format first
|
||||
for msg in messages:
|
||||
if not isinstance(msg, dict) or "role" not in msg or "content" not in msg:
|
||||
if not isinstance(msg, dict) or "role" not in msg:
|
||||
raise TypeError(
|
||||
"Invalid message format. Each message must be a dict with 'role' and 'content' keys"
|
||||
"Invalid message format. Each message must be a dict with 'role' key"
|
||||
)
|
||||
if "content" not in msg and msg["role"] != "system":
|
||||
raise TypeError(
|
||||
"Invalid message format. Each non-system message must have a 'content' key"
|
||||
)
|
||||
|
||||
# Handle O1 models specially
|
||||
@@ -868,6 +872,19 @@ class LLM(BaseLLM):
|
||||
messages.append({"role": "user", "content": "Please continue."})
|
||||
return messages
|
||||
|
||||
if "qwen" in self.model.lower():
|
||||
formatted_messages = []
|
||||
for msg in messages:
|
||||
if not isinstance(msg.get("content"), str):
|
||||
formatted_messages.append(msg)
|
||||
continue
|
||||
|
||||
formatted_messages.append({
|
||||
"role": msg["role"],
|
||||
"content": [{"type": "text", "text": msg["content"]}]
|
||||
})
|
||||
return formatted_messages
|
||||
|
||||
# Handle Anthropic models
|
||||
if not self.is_anthropic:
|
||||
return messages
|
||||
|
||||
31
tests/test_qwen_multimodal.py
Normal file
31
tests/test_qwen_multimodal.py
Normal file
@@ -0,0 +1,31 @@
|
||||
import pytest
|
||||
from crewai.llm import LLM
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_qwen_multimodal_content_formatting():
|
||||
"""Test that multimodal content is properly formatted for Qwen models."""
|
||||
|
||||
llm = LLM(model="sambanova/Qwen2.5-72B-Instruct", temperature=0.7)
|
||||
|
||||
message = {"role": "user", "content": "Describe this image"}
|
||||
formatted = llm._format_messages_for_provider([message])
|
||||
assert isinstance(formatted[0]["content"], list)
|
||||
assert formatted[0]["content"][0]["type"] == "text"
|
||||
assert formatted[0]["content"][0]["text"] == "Describe this image"
|
||||
|
||||
multimodal_content = [
|
||||
{"type": "text", "text": "What's in this image?"},
|
||||
{"type": "image_url", "image_url": "https://example.com/image.jpg"}
|
||||
]
|
||||
message = {"role": "user", "content": multimodal_content}
|
||||
formatted = llm._format_messages_for_provider([message])
|
||||
assert formatted[0]["content"] == multimodal_content
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a visual analysis assistant."},
|
||||
{"role": "user", "content": multimodal_content}
|
||||
]
|
||||
formatted = llm._format_messages_for_provider(messages)
|
||||
assert isinstance(formatted[0]["content"], list)
|
||||
assert formatted[1]["content"] == multimodal_content
|
||||
Reference in New Issue
Block a user