refactor: improve multimodal file handling architecture

- Make crewai_files an optional dependency with graceful fallbacks
- Move file formatting from executor to LLM layer (_process_message_files)
- Add files field to LLMMessage type for cleaner message passing
- Add cache_control to Anthropic content blocks for prompt caching
- Clean up formatters: static methods for OpenAI/Gemini, proper error handling
- Remove unused ContentFormatter protocol
- Move test fixtures to lib/crewai-files/tests/fixtures
- Add Azure and Bedrock multimodal integration tests
- Fix mypy errors in crew_agent_executor.py
This commit is contained in:
Greyson LaLonde
2026-01-22 21:55:10 -05:00
parent dc015b14f9
commit a1cbb2f4e2
31 changed files with 320 additions and 1278 deletions

View File

@@ -1,104 +0,0 @@
interactions:
- request:
body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"What
type of document is this? Answer in one word."},{"type":"document","source":{"type":"base64","media_type":"application/pdf","data":"JVBERi0xLjQKMSAwIG9iaiA8PCAvVHlwZSAvQ2F0YWxvZyAvUGFnZXMgMiAwIFIgPj4gZW5kb2JqCjIgMCBvYmogPDwgL1R5cGUgL1BhZ2VzIC9LaWRzIFszIDAgUl0gL0NvdW50IDEgPj4gZW5kb2JqCjMgMCBvYmogPDwgL1R5cGUgL1BhZ2UgL1BhcmVudCAyIDAgUiAvTWVkaWFCb3ggWzAgMCA2MTIgNzkyXSA+PiBlbmRvYmoKeHJlZgowIDQKMDAwMDAwMDAwMCA2NTUzNSBmCjAwMDAwMDAwMDkgMDAwMDAgbgowMDAwMDAwMDU4IDAwMDAwIG4KMDAwMDAwMDExNSAwMDAwMCBuCnRyYWlsZXIgPDwgL1NpemUgNCAvUm9vdCAxIDAgUiA+PgpzdGFydHhyZWYKMTk2CiUlRU9GCg=="},"cache_control":{"type":"ephemeral"}}]}],"model":"claude-3-5-haiku-20241022","stream":false}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '748'
content-type:
- application/json
host:
- api.anthropic.com
x-api-key:
- X-API-KEY-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 0.71.1
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.10
x-stainless-timeout:
- NOT_GIVEN
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: !!binary |
H4sIAAAAAAAA/3WQTUvEMBCG/8ucW2jr7rL25sKCKHrQiyASYjJsw6ZJzUxEKf3vTheLX3hKeJ8n
8zIZoY8WPbRgvM4Wy7NyXXbaHXPZVM2qrpoGCnBWhJ4Oqqovd/nBnt92tF1dX+z3u6t7ffO8FYff
B5wtJNIHlCBFPweayBHrwBKZGBjl1j6Oi8/4NpPT0cIdUu4RpqcCiOOgEmqKQQAGqzinAJ+A8CVj
MDIhZO8LyKfSdgQXhsyK4xEDQVtvmo3UatOhMjKMXQzqp1ItXLD9jy1v5wYcOuwxaa/W/V//i9bd
bzoVEDN/j1ayDqZXZ1CxwySLzl9ldbIwTR/rySkqnAEAAA==
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 22 Jan 2026 00:18:50 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
anthropic-organization-id:
- ANTHROPIC-ORGANIZATION-ID-XXX
anthropic-ratelimit-input-tokens-limit:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-input-tokens-remaining:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-input-tokens-reset:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
anthropic-ratelimit-output-tokens-limit:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-output-tokens-remaining:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-output-tokens-reset:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2026-01-22T00:18:50Z'
anthropic-ratelimit-tokens-limit:
- ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
anthropic-ratelimit-tokens-remaining:
- ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
anthropic-ratelimit-tokens-reset:
- ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
cf-cache-status:
- DYNAMIC
request-id:
- REQUEST-ID-XXX
strict-transport-security:
- STS-XXX
x-envoy-upstream-service-time:
- '750'
status:
code: 200
message: OK
version: 1

File diff suppressed because one or more lines are too long

View File

@@ -1,104 +0,0 @@
interactions:
- request:
body: '{"max_tokens":4096,"messages":[{"role":"user","content":[{"type":"text","text":"What
type of document is this? Answer in one word."},{"type":"document","source":{"type":"base64","media_type":"application/pdf","data":"JVBERi0xLjQKMSAwIG9iaiA8PCAvVHlwZSAvQ2F0YWxvZyAvUGFnZXMgMiAwIFIgPj4gZW5kb2JqCjIgMCBvYmogPDwgL1R5cGUgL1BhZ2VzIC9LaWRzIFszIDAgUl0gL0NvdW50IDEgPj4gZW5kb2JqCjMgMCBvYmogPDwgL1R5cGUgL1BhZ2UgL1BhcmVudCAyIDAgUiAvTWVkaWFCb3ggWzAgMCA2MTIgNzkyXSA+PiBlbmRvYmoKeHJlZgowIDQKMDAwMDAwMDAwMCA2NTUzNSBmCjAwMDAwMDAwMDkgMDAwMDAgbgowMDAwMDAwMDU4IDAwMDAwIG4KMDAwMDAwMDExNSAwMDAwMCBuCnRyYWlsZXIgPDwgL1NpemUgNCAvUm9vdCAxIDAgUiA+PgpzdGFydHhyZWYKMTk2CiUlRU9GCg=="},"cache_control":{"type":"ephemeral"}}]}],"model":"claude-3-5-haiku-20241022","stream":false}'
headers:
User-Agent:
- X-USER-AGENT-XXX
accept:
- application/json
accept-encoding:
- ACCEPT-ENCODING-XXX
anthropic-version:
- '2023-06-01'
connection:
- keep-alive
content-length:
- '748'
content-type:
- application/json
host:
- api.anthropic.com
x-api-key:
- X-API-KEY-XXX
x-stainless-arch:
- X-STAINLESS-ARCH-XXX
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- X-STAINLESS-OS-XXX
x-stainless-package-version:
- 0.71.1
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.10
x-stainless-timeout:
- NOT_GIVEN
method: POST
uri: https://api.anthropic.com/v1/messages
response:
body:
string: !!binary |
H4sIAAAAAAAA/3WQTUvEMBCG/8ucW2hju4eeRUU97EFRFAkhGbZh06Qmk1Up/e9OF4tf7CnhfZ7J
y2SCIRh00IF2Khssz8q27JXd51JUoqkrIaAAa1gY0k5W9bbptXo7PD60l/V1f/V0J+5vxQ079DHi
YmFKaoccxOCWQKVkEylPHOngCfnWPU+rT/i+kOPRwfb8AuaXAhKFUUZUKXhO0RtJOXr4AglfM3rN
4z47V0A+NnYTWD9mkhT26BN09UZsuFPpHqXmx8gGL38r1coZm1NsnV0acOxxwKicbIf//jet+790
LiBk+hk1vA7Gg9UoyWLkRZd/MioamOdP24g1JZkBAAA=
headers:
CF-RAY:
- CF-RAY-XXX
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Thu, 22 Jan 2026 00:18:56 GMT
Server:
- cloudflare
Transfer-Encoding:
- chunked
X-Robots-Tag:
- none
anthropic-organization-id:
- ANTHROPIC-ORGANIZATION-ID-XXX
anthropic-ratelimit-input-tokens-limit:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-input-tokens-remaining:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-input-tokens-reset:
- ANTHROPIC-RATELIMIT-INPUT-TOKENS-RESET-XXX
anthropic-ratelimit-output-tokens-limit:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-LIMIT-XXX
anthropic-ratelimit-output-tokens-remaining:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-REMAINING-XXX
anthropic-ratelimit-output-tokens-reset:
- ANTHROPIC-RATELIMIT-OUTPUT-TOKENS-RESET-XXX
anthropic-ratelimit-requests-limit:
- '4000'
anthropic-ratelimit-requests-remaining:
- '3999'
anthropic-ratelimit-requests-reset:
- '2026-01-22T00:18:55Z'
anthropic-ratelimit-tokens-limit:
- ANTHROPIC-RATELIMIT-TOKENS-LIMIT-XXX
anthropic-ratelimit-tokens-remaining:
- ANTHROPIC-RATELIMIT-TOKENS-REMAINING-XXX
anthropic-ratelimit-tokens-reset:
- ANTHROPIC-RATELIMIT-TOKENS-RESET-XXX
cf-cache-status:
- DYNAMIC
request-id:
- REQUEST-ID-XXX
strict-transport-security:
- STS-XXX
x-envoy-upstream-service-time:
- '648'
status:
code: 200
message: OK
version: 1

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +0,0 @@
Quarter,Revenue ($M),Expenses ($M),Profit ($M)
Q1 2024,70,40,30
Q2 2024,75,42,33
Q3 2024,80,45,35
Q4 2024,75,44,31
1 Quarter Revenue ($M) Expenses ($M) Profit ($M)
2 Q1 2024 70 40 30
3 Q2 2024 75 42 33
4 Q3 2024 80 45 35
5 Q4 2024 75 44 31

Binary file not shown.

Before

Width:  |  Height:  |  Size: 27 KiB

View File

@@ -1,10 +0,0 @@
Review Guidelines
1. Be clear and concise: Write feedback that is easy to understand.
2. Focus on behavior and outcomes: Describe what happened and why it matters.
3. Be specific: Provide examples to support your points.
4. Balance positives and improvements: Highlight strengths and areas to grow.
5. Be respectful and constructive: Assume positive intent and offer solutions.
6. Use objective criteria: Reference goals, metrics, or expectations where possible.
7. Suggest next steps: Recommend actionable ways to improve.
8. Proofread: Check tone, grammar, and clarity before submitting.

Binary file not shown.

View File

@@ -7,7 +7,7 @@ from unittest.mock import patch
import pytest
from crewai.llm import LLM
from crewai.files import ImageFile, PDFFile, TextFile
from crewai_files import ImageFile, PDFFile, TextFile, format_multimodal_content
# Check for optional provider dependencies
try:
@@ -124,27 +124,18 @@ class TestLiteLLMMultimodal:
llm = LLM(model="gpt-4o", is_litellm=True)
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "image_url"
assert "data:image/png;base64," in result[0]["image_url"]["url"]
def test_format_multimodal_content_non_multimodal(self) -> None:
"""Test non-multimodal model returns empty list."""
llm = LLM(model="gpt-3.5-turbo", is_litellm=True)
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
assert result == []
def test_format_multimodal_content_unsupported_type(self) -> None:
"""Test unsupported content type is skipped."""
llm = LLM(model="gpt-4o", is_litellm=True) # OpenAI doesn't support PDF
files = {"doc": PDFFile(source=MINIMAL_PDF)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert result == []
@@ -175,7 +166,7 @@ class TestAnthropicMultimodal:
llm = LLM(model="anthropic/claude-3-sonnet-20240229")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "image"
@@ -188,7 +179,7 @@ class TestAnthropicMultimodal:
llm = LLM(model="anthropic/claude-3-sonnet-20240229")
files = {"doc": PDFFile(source=MINIMAL_PDF)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "document"
@@ -230,7 +221,7 @@ class TestOpenAIMultimodal:
llm = LLM(model="openai/gpt-4o")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "image_url"
@@ -264,7 +255,7 @@ class TestGeminiMultimodal:
llm = LLM(model="gemini/gemini-pro")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert "inlineData" in result[0]
@@ -321,7 +312,7 @@ class TestAzureMultimodal:
llm = LLM(model="azure/gpt-4o")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "image_url"
@@ -357,7 +348,7 @@ class TestBedrockMultimodal:
"""Test Bedrock supports images and PDFs."""
llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
types = llm.supported_multimodal_content_types()
assert "image/" in types
assert any(t.startswith("image/") for t in types)
assert "application/pdf" in types
def test_format_multimodal_content_image(self) -> None:
@@ -365,7 +356,7 @@ class TestBedrockMultimodal:
llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert "image" in result[0]
@@ -378,7 +369,7 @@ class TestBedrockMultimodal:
llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
files = {"doc": PDFFile(source=MINIMAL_PDF)}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert "document" in result[0]
@@ -411,18 +402,6 @@ class TestBaseLLMMultimodal:
llm = TestLLM(model="test")
assert llm.supported_multimodal_content_types() == []
def test_base_format_multimodal_content_empty(self) -> None:
"""Test base implementation returns empty list."""
from crewai.llms.base_llm import BaseLLM
class TestLLM(BaseLLM):
def call(self, messages, tools=None, callbacks=None):
return "test"
llm = TestLLM(model="test")
files = {"chart": ImageFile(source=MINIMAL_PNG)}
assert llm.format_multimodal_content(files) == []
def test_base_format_text_content(self) -> None:
"""Test base text formatting uses OpenAI/Anthropic style."""
from crewai.llms.base_llm import BaseLLM
@@ -447,7 +426,7 @@ class TestMultipleFilesFormatting:
"chart2": ImageFile(source=MINIMAL_PNG),
}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 2
@@ -460,7 +439,7 @@ class TestMultipleFilesFormatting:
"text": TextFile(source=b"hello"), # Not supported
}
result = llm.format_multimodal_content(files)
result = format_multimodal_content(files, getattr(llm, "provider", None) or llm.model)
assert len(result) == 1
assert result[0]["type"] == "image_url"
@@ -469,6 +448,6 @@ class TestMultipleFilesFormatting:
"""Test empty files dict returns empty list."""
llm = LLM(model="gpt-4o")
result = llm.format_multimodal_content({})
result = format_multimodal_content({}, llm.model)
assert result == []

View File

@@ -9,13 +9,13 @@ from pathlib import Path
import pytest
from crewai.llm import LLM
from crewai.files import File, ImageFile, PDFFile, TextFile
from crewai_files import File, ImageFile, PDFFile, TextFile, format_multimodal_content
# Path to test data files
TEST_DATA_DIR = Path(__file__).parent.parent.parent.parent.parent / "data"
TEST_IMAGE_PATH = TEST_DATA_DIR / "revenue_chart.png"
TEST_TEXT_PATH = TEST_DATA_DIR / "review_guidelines.txt"
TEST_FIXTURES_DIR = Path(__file__).parent.parent.parent.parent / "crewai-files" / "tests" / "fixtures"
TEST_IMAGE_PATH = TEST_FIXTURES_DIR / "revenue_chart.png"
TEST_TEXT_PATH = TEST_FIXTURES_DIR / "review_guidelines.txt"
@pytest.fixture
@@ -50,7 +50,8 @@ startxref
def _build_multimodal_message(llm: LLM, prompt: str, files: dict) -> list[dict]:
"""Build a multimodal message with text and file content."""
content_blocks = llm.format_multimodal_content(files)
provider = getattr(llm, "provider", None) or llm.model
content_blocks = format_multimodal_content(files, provider)
return [
{
"role": "user",
@@ -124,6 +125,68 @@ class TestAnthropicMultimodalIntegration:
assert len(response) > 0
class TestAzureMultimodalIntegration:
"""Integration tests for Azure OpenAI multimodal with real API calls."""
@pytest.mark.vcr()
def test_describe_image(self, test_image_bytes: bytes) -> None:
"""Test Azure OpenAI can describe an image."""
llm = LLM(model="azure/gpt-4o")
files = {"image": ImageFile(source=test_image_bytes)}
messages = _build_multimodal_message(
llm,
"Describe this image in one sentence. Be brief.",
files,
)
response = llm.call(messages)
assert response
assert isinstance(response, str)
assert len(response) > 0
class TestBedrockMultimodalIntegration:
"""Integration tests for AWS Bedrock multimodal with real API calls."""
@pytest.mark.vcr()
def test_describe_image(self, test_image_bytes: bytes) -> None:
"""Test Bedrock Claude can describe an image."""
llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0")
files = {"image": ImageFile(source=test_image_bytes)}
messages = _build_multimodal_message(
llm,
"Describe this image in one sentence. Be brief.",
files,
)
response = llm.call(messages)
assert response
assert isinstance(response, str)
assert len(response) > 0
@pytest.mark.vcr()
def test_analyze_pdf(self) -> None:
"""Test Bedrock Claude can analyze a PDF."""
llm = LLM(model="bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0")
files = {"document": PDFFile(source=MINIMAL_PDF)}
messages = _build_multimodal_message(
llm,
"What type of document is this? Answer in one word.",
files,
)
response = llm.call(messages)
assert response
assert isinstance(response, str)
assert len(response) > 0
class TestGeminiMultimodalIntegration:
"""Integration tests for Gemini multimodal with real API calls."""

View File

@@ -5,7 +5,7 @@ import base64
import pytest
from crewai.tools.agent_tools.read_file_tool import ReadFileTool
from crewai.files import ImageFile, PDFFile, TextFile
from crewai_files import ImageFile, PDFFile, TextFile
class TestReadFileTool:

View File

@@ -13,7 +13,7 @@ from crewai.utilities.file_store import (
store_files,
store_task_files,
)
from crewai.files import TextFile
from crewai_files import TextFile
class TestFileStore:

View File

@@ -6,7 +6,7 @@ from pathlib import Path
import pytest
from crewai.files import (
from crewai_files import (
AudioFile,
File,
FileBytes,
@@ -20,7 +20,7 @@ from crewai.files import (
normalize_input_files,
wrap_file_source,
)
from crewai.files.file import detect_content_type
from crewai_files.core.sources import detect_content_type
class TestDetectContentType: