diff --git a/src/crewai/llm.py b/src/crewai/llm.py index 741544662..aab6bc7c8 100644 --- a/src/crewai/llm.py +++ b/src/crewai/llm.py @@ -839,9 +839,13 @@ class LLM(BaseLLM): # Validate message format first for msg in messages: - if not isinstance(msg, dict) or "role" not in msg or "content" not in msg: + if not isinstance(msg, dict) or "role" not in msg: raise TypeError( - "Invalid message format. Each message must be a dict with 'role' and 'content' keys" + "Invalid message format. Each message must be a dict with 'role' key" + ) + if "content" not in msg and msg["role"] != "system": + raise TypeError( + "Invalid message format. Each non-system message must have a 'content' key" ) # Handle O1 models specially @@ -868,6 +872,19 @@ class LLM(BaseLLM): messages.append({"role": "user", "content": "Please continue."}) return messages + if "qwen" in self.model.lower(): + formatted_messages = [] + for msg in messages: + if not isinstance(msg.get("content"), str): + formatted_messages.append(msg) + continue + + formatted_messages.append({ + "role": msg["role"], + "content": [{"type": "text", "text": msg["content"]}] + }) + return formatted_messages + # Handle Anthropic models if not self.is_anthropic: return messages diff --git a/tests/test_qwen_multimodal.py b/tests/test_qwen_multimodal.py new file mode 100644 index 000000000..28ee80cde --- /dev/null +++ b/tests/test_qwen_multimodal.py @@ -0,0 +1,31 @@ +import pytest +from crewai.llm import LLM + + +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_qwen_multimodal_content_formatting(): + """Test that multimodal content is properly formatted for Qwen models.""" + + llm = LLM(model="sambanova/Qwen2.5-72B-Instruct", temperature=0.7) + + message = {"role": "user", "content": "Describe this image"} + formatted = llm._format_messages_for_provider([message]) + assert isinstance(formatted[0]["content"], list) + assert formatted[0]["content"][0]["type"] == "text" + assert formatted[0]["content"][0]["text"] == "Describe this image" + + multimodal_content = [ + {"type": "text", "text": "What's in this image?"}, + {"type": "image_url", "image_url": "https://example.com/image.jpg"} + ] + message = {"role": "user", "content": multimodal_content} + formatted = llm._format_messages_for_provider([message]) + assert formatted[0]["content"] == multimodal_content + + messages = [ + {"role": "system", "content": "You are a visual analysis assistant."}, + {"role": "user", "content": multimodal_content} + ] + formatted = llm._format_messages_for_provider(messages) + assert isinstance(formatted[0]["content"], list) + assert formatted[1]["content"] == multimodal_content