mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-09 08:08:32 +00:00
Fix #2541: Add support for multimodal content format in qwen2.5-vl model
Co-Authored-By: Joe Moura <joao@crewai.com>
This commit is contained in:
@@ -839,9 +839,13 @@ class LLM(BaseLLM):
|
|||||||
|
|
||||||
# Validate message format first
|
# Validate message format first
|
||||||
for msg in messages:
|
for msg in messages:
|
||||||
if not isinstance(msg, dict) or "role" not in msg or "content" not in msg:
|
if not isinstance(msg, dict) or "role" not in msg:
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
"Invalid message format. Each message must be a dict with 'role' and 'content' keys"
|
"Invalid message format. Each message must be a dict with 'role' key"
|
||||||
|
)
|
||||||
|
if "content" not in msg and msg["role"] != "system":
|
||||||
|
raise TypeError(
|
||||||
|
"Invalid message format. Each non-system message must have a 'content' key"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Handle O1 models specially
|
# Handle O1 models specially
|
||||||
@@ -868,6 +872,19 @@ class LLM(BaseLLM):
|
|||||||
messages.append({"role": "user", "content": "Please continue."})
|
messages.append({"role": "user", "content": "Please continue."})
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
|
if "qwen" in self.model.lower():
|
||||||
|
formatted_messages = []
|
||||||
|
for msg in messages:
|
||||||
|
if not isinstance(msg.get("content"), str):
|
||||||
|
formatted_messages.append(msg)
|
||||||
|
continue
|
||||||
|
|
||||||
|
formatted_messages.append({
|
||||||
|
"role": msg["role"],
|
||||||
|
"content": [{"type": "text", "text": msg["content"]}]
|
||||||
|
})
|
||||||
|
return formatted_messages
|
||||||
|
|
||||||
# Handle Anthropic models
|
# Handle Anthropic models
|
||||||
if not self.is_anthropic:
|
if not self.is_anthropic:
|
||||||
return messages
|
return messages
|
||||||
|
|||||||
31
tests/test_qwen_multimodal.py
Normal file
31
tests/test_qwen_multimodal.py
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
import pytest
|
||||||
|
from crewai.llm import LLM
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||||
|
def test_qwen_multimodal_content_formatting():
|
||||||
|
"""Test that multimodal content is properly formatted for Qwen models."""
|
||||||
|
|
||||||
|
llm = LLM(model="sambanova/Qwen2.5-72B-Instruct", temperature=0.7)
|
||||||
|
|
||||||
|
message = {"role": "user", "content": "Describe this image"}
|
||||||
|
formatted = llm._format_messages_for_provider([message])
|
||||||
|
assert isinstance(formatted[0]["content"], list)
|
||||||
|
assert formatted[0]["content"][0]["type"] == "text"
|
||||||
|
assert formatted[0]["content"][0]["text"] == "Describe this image"
|
||||||
|
|
||||||
|
multimodal_content = [
|
||||||
|
{"type": "text", "text": "What's in this image?"},
|
||||||
|
{"type": "image_url", "image_url": "https://example.com/image.jpg"}
|
||||||
|
]
|
||||||
|
message = {"role": "user", "content": multimodal_content}
|
||||||
|
formatted = llm._format_messages_for_provider([message])
|
||||||
|
assert formatted[0]["content"] == multimodal_content
|
||||||
|
|
||||||
|
messages = [
|
||||||
|
{"role": "system", "content": "You are a visual analysis assistant."},
|
||||||
|
{"role": "user", "content": multimodal_content}
|
||||||
|
]
|
||||||
|
formatted = llm._format_messages_for_provider(messages)
|
||||||
|
assert isinstance(formatted[0]["content"], list)
|
||||||
|
assert formatted[1]["content"] == multimodal_content
|
||||||
Reference in New Issue
Block a user