refactor: improve multimodal file handling architecture

- Make crewai_files an optional dependency with graceful fallbacks
- Move file formatting from executor to LLM layer (_process_message_files)
- Add files field to LLMMessage type for cleaner message passing
- Add cache_control to Anthropic content blocks for prompt caching
- Clean up formatters: static methods for OpenAI/Gemini, proper error handling
- Remove unused ContentFormatter protocol
- Move test fixtures to lib/crewai-files/tests/fixtures
- Add Azure and Bedrock multimodal integration tests
- Fix mypy errors in crew_agent_executor.py
This commit is contained in:
Greyson LaLonde
2026-01-22 21:55:10 -05:00
parent dc015b14f9
commit a1cbb2f4e2
31 changed files with 320 additions and 1278 deletions

View File

@@ -7,7 +7,7 @@ from unittest.mock import patch
import pytest
from crewai.llm import LLM
from crewai.files import ImageFile, PDFFile, TextFile
from crewai_files import ImageFile, PDFFile, TextFile, format_multimodal_content
# Check for optional provider dependencies
try:
@@ -124,27 +124,18 @@ class TestLiteLLMMultimodal:
llm = LLM(model="gpt-4o", is_litellm=True)
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "image_url"
assert "data:image/png;base64," in result[0]["image_url"]["url"]
def test_format_multimodal_content_non_multimodal(self) -> None:
"""Test non-multimodal model returns empty list."""
llm = LLM(model="gpt-3.5-turbo", is_litellm=True)
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
assert result == []
def test_format_multimodal_content_unsupported_type(self) -> None:
"""Test unsupported content type is skipped."""
llm = LLM(model="gpt-4o", is_litellm=True) # OpenAI doesn't support PDF
files = {"doc": PDFFile(source=MINIMAL_PDF)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert result == []
@@ -175,7 +166,7 @@ class TestAnthropicMultimodal:
llm = LLM(model="anthropic/claude-3-sonnet-20240229")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "image"
@@ -188,7 +179,7 @@ class TestAnthropicMultimodal:
llm = LLM(model="anthropic/claude-3-sonnet-20240229")
files = {"doc": PDFFile(source=MINIMAL_PDF)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "document"
@@ -230,7 +221,7 @@ class TestOpenAIMultimodal:
llm = LLM(model="openai/gpt-4o")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "image_url"
@@ -264,7 +255,7 @@ class TestGeminiMultimodal:
llm = LLM(model="gemini/gemini-pro")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert "inlineData" in result[0]
@@ -321,7 +312,7 @@ class TestAzureMultimodal:
llm = LLM(model="azure/gpt-4o")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "image_url"
@@ -357,7 +348,7 @@ class TestBedrockMultimodal:
"""Test Bedrock supports images and PDFs."""
llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
types = llm.supported_multimodal_content_types()
assert "image/" in types
assert any(t.startswith("image/") for t in types)
assert "application/pdf" in types
def test_format_multimodal_content_image(self) -> None:
@@ -365,7 +356,7 @@ class TestBedrockMultimodal:
llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert "image" in result[0]
@@ -378,7 +369,7 @@ class TestBedrockMultimodal:
llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
files = {"doc": PDFFile(source=MINIMAL_PDF)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert "document" in result[0]
@@ -411,18 +402,6 @@ class TestBaseLLMMultimodal:
llm = TestLLM(model="test")
assert llm.supported_multimodal_content_types() == []
def test_base_format_multimodal_content_empty(self) -> None:
"""Test base implementation returns empty list."""
from crewai.llms.base_llm import BaseLLM
class TestLLM(BaseLLM):
def call(self, messages, tools=None, callbacks=None):
return "test"
llm = TestLLM(model="test")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
assert llm.format_multimodal_content(files) == []
def test_base_format_text_content(self) -> None:
"""Test base text formatting uses OpenAI/Anthropic style."""
from crewai.llms.base_llm import BaseLLM
@@ -447,7 +426,7 @@ class TestMultipleFilesFormatting:
"chart2": ImageFile(source=MINIMAL_PNG),
}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 2
@@ -460,7 +439,7 @@ class TestMultipleFilesFormatting:
"text": TextFile(source=b"hello"), # Not supported
}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "image_url"
@@ -469,6 +448,6 @@ class TestMultipleFilesFormatting:
"""Test empty files dict returns empty list."""
llm = LLM(model="gpt-4o")
result = llm.format_multimodal_content({})
result = format_multimodal_content({}, llm.model)
assert result == []