test: add file utilities tests

- Add tests for file processing constraints and validators - Add tests for FileProcessor and FileResolver - Add tests for resolved file types - Add tests for file store operations - Add unit tests for multimodal LLM support
2026-05-03 08:12:39 +00:00 · 2026-01-21 20:12:57 -05:00
parent b035aa8947
commit 1fe020fa6f
7 changed files with 1608 additions and 0 deletions
--- a/lib/crewai/tests/llms/test_multimodal.py
+++ b/lib/crewai/tests/llms/test_multimodal.py
@@ -0,0 +1,474 @@
+"""Unit tests for LLM multimodal functionality across all providers."""
+
+import base64
+import os
+from unittest.mock import patch
+
+import pytest
+
+from crewai.llm import LLM
+from crewai.utilities.files import ImageFile, PDFFile, TextFile
+
+# Check for optional provider dependencies
+try:
+    from crewai.llms.providers.anthropic.completion import AnthropicCompletion
+    HAS_ANTHROPIC = True
+except ImportError:
+    HAS_ANTHROPIC = False
+
+try:
+    from crewai.llms.providers.azure.completion import AzureCompletion
+    HAS_AZURE = True
+except ImportError:
+    HAS_AZURE = False
+
+try:
+    from crewai.llms.providers.bedrock.completion import BedrockCompletion
+    HAS_BEDROCK = True
+except ImportError:
+    HAS_BEDROCK = False
+
+
+# Minimal valid PNG for testing
+MINIMAL_PNG = (
+    b"\x89PNG\r\n\x1a\n"
+    b"\x00\x00\x00\rIHDR"
+    b"\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00"
+    b"\x90wS\xde"
+    b"\x00\x00\x00\x00IEND\xaeB`\x82"
+)
+
+MINIMAL_PDF = b"%PDF-1.4 test content"
+
+
+@pytest.fixture(autouse=True)
+def mock_api_keys():
+    """Mock API keys for all providers."""
+    env_vars = {
+        "ANTHROPIC_API_KEY": "test-key",
+        "OPENAI_API_KEY": "test-key",
+        "GOOGLE_API_KEY": "test-key",
+        "AZURE_API_KEY": "test-key",
+        "AWS_ACCESS_KEY_ID": "test-key",
+        "AWS_SECRET_ACCESS_KEY": "test-key",
+    }
+    with patch.dict(os.environ, env_vars):
+        yield
+
+
+class TestLiteLLMMultimodal:
+    """Tests for LLM class (litellm wrapper) multimodal functionality.
+
+    These tests use `is_litellm=True` to ensure the litellm wrapper is used
+    instead of native providers.
+    """
+
+    def test_supports_multimodal_gpt4o(self) -> None:
+        """Test GPT-4o model supports multimodal."""
+        llm = LLM(model="gpt-4o", is_litellm=True)
+        assert llm.supports_multimodal() is True
+
+    def test_supports_multimodal_gpt4_turbo(self) -> None:
+        """Test GPT-4 Turbo model supports multimodal."""
+        llm = LLM(model="gpt-4-turbo", is_litellm=True)
+        assert llm.supports_multimodal() is True
+
+    def test_supports_multimodal_claude3(self) -> None:
+        """Test Claude 3 model supports multimodal via litellm."""
+        # Use litellm/ prefix to avoid native provider import
+        llm = LLM(model="litellm/claude-3-sonnet-20240229")
+        assert llm.supports_multimodal() is True
+
+    def test_supports_multimodal_gemini(self) -> None:
+        """Test Gemini model supports multimodal."""
+        llm = LLM(model="gemini/gemini-pro", is_litellm=True)
+        assert llm.supports_multimodal() is True
+
+    def test_supports_multimodal_gpt35_does_not(self) -> None:
+        """Test GPT-3.5 model does not support multimodal."""
+        llm = LLM(model="gpt-3.5-turbo", is_litellm=True)
+        assert llm.supports_multimodal() is False
+
+    def test_supported_content_types_openai(self) -> None:
+        """Test OpenAI models support images only."""
+        llm = LLM(model="gpt-4o", is_litellm=True)
+        types = llm.supported_multimodal_content_types()
+        assert "image/" in types
+        assert "application/pdf" not in types
+
+    def test_supported_content_types_claude(self) -> None:
+        """Test Claude models support images and PDFs via litellm."""
+        # Use litellm/ prefix to avoid native provider import
+        llm = LLM(model="litellm/claude-3-sonnet-20240229")
+        types = llm.supported_multimodal_content_types()
+        assert "image/" in types
+        assert "application/pdf" in types
+
+    def test_supported_content_types_gemini(self) -> None:
+        """Test Gemini models support wide range of content."""
+        llm = LLM(model="gemini/gemini-pro", is_litellm=True)
+        types = llm.supported_multimodal_content_types()
+        assert "image/" in types
+        assert "audio/" in types
+        assert "video/" in types
+        assert "application/pdf" in types
+        assert "text/" in types
+
+    def test_supported_content_types_non_multimodal(self) -> None:
+        """Test non-multimodal models return empty list."""
+        llm = LLM(model="gpt-3.5-turbo", is_litellm=True)
+        assert llm.supported_multimodal_content_types() == []
+
+    def test_format_multimodal_content_image(self) -> None:
+        """Test formatting image content."""
+        llm = LLM(model="gpt-4o", is_litellm=True)
+        files = {"chart": ImageFile(source=MINIMAL_PNG)}
+
+        result = llm.format_multimodal_content(files)
+
+        assert len(result) == 1
+        assert result[0]["type"] == "image_url"
+        assert "data:image/png;base64," in result[0]["image_url"]["url"]
+
+    def test_format_multimodal_content_non_multimodal(self) -> None:
+        """Test non-multimodal model returns empty list."""
+        llm = LLM(model="gpt-3.5-turbo", is_litellm=True)
+        files = {"chart": ImageFile(source=MINIMAL_PNG)}
+
+        result = llm.format_multimodal_content(files)
+
+        assert result == []
+
+    def test_format_multimodal_content_unsupported_type(self) -> None:
+        """Test unsupported content type is skipped."""
+        llm = LLM(model="gpt-4o", is_litellm=True)  # OpenAI doesn't support PDF
+        files = {"doc": PDFFile(source=MINIMAL_PDF)}
+
+        result = llm.format_multimodal_content(files)
+
+        assert result == []
+
+
+@pytest.mark.skipif(not HAS_ANTHROPIC, reason="Anthropic SDK not installed")
+class TestAnthropicMultimodal:
+    """Tests for Anthropic provider multimodal functionality."""
+
+    def test_supports_multimodal_claude3(self) -> None:
+        """Test Claude 3 supports multimodal."""
+        llm = LLM(model="anthropic/claude-3-sonnet-20240229")
+        assert llm.supports_multimodal() is True
+
+    def test_supports_multimodal_claude4(self) -> None:
+        """Test Claude 4 supports multimodal."""
+        llm = LLM(model="anthropic/claude-4-opus")
+        assert llm.supports_multimodal() is True
+
+    def test_supported_content_types(self) -> None:
+        """Test Anthropic supports images and PDFs."""
+        llm = LLM(model="anthropic/claude-3-sonnet-20240229")
+        types = llm.supported_multimodal_content_types()
+        assert "image/" in types
+        assert "application/pdf" in types
+
+    def test_format_multimodal_content_image(self) -> None:
+        """Test Anthropic image format uses source-based structure."""
+        llm = LLM(model="anthropic/claude-3-sonnet-20240229")
+        files = {"chart": ImageFile(source=MINIMAL_PNG)}
+
+        result = llm.format_multimodal_content(files)
+
+        assert len(result) == 1
+        assert result[0]["type"] == "image"
+        assert result[0]["source"]["type"] == "base64"
+        assert result[0]["source"]["media_type"] == "image/png"
+        assert "data" in result[0]["source"]
+
+    def test_format_multimodal_content_pdf(self) -> None:
+        """Test Anthropic PDF format uses document structure."""
+        llm = LLM(model="anthropic/claude-3-sonnet-20240229")
+        files = {"doc": PDFFile(source=MINIMAL_PDF)}
+
+        result = llm.format_multimodal_content(files)
+
+        assert len(result) == 1
+        assert result[0]["type"] == "document"
+        assert result[0]["source"]["type"] == "base64"
+        assert result[0]["source"]["media_type"] == "application/pdf"
+
+
+class TestOpenAIMultimodal:
+    """Tests for OpenAI provider multimodal functionality."""
+
+    def test_supports_multimodal_gpt4o(self) -> None:
+        """Test GPT-4o supports multimodal."""
+        llm = LLM(model="openai/gpt-4o")
+        assert llm.supports_multimodal() is True
+
+    def test_supports_multimodal_gpt4_vision(self) -> None:
+        """Test GPT-4 Vision supports multimodal."""
+        llm = LLM(model="openai/gpt-4-vision-preview")
+        assert llm.supports_multimodal() is True
+
+    def test_supports_multimodal_o1(self) -> None:
+        """Test O1 model supports multimodal."""
+        llm = LLM(model="openai/o1-preview")
+        assert llm.supports_multimodal() is True
+
+    def test_does_not_support_gpt35(self) -> None:
+        """Test GPT-3.5 does not support multimodal."""
+        llm = LLM(model="openai/gpt-3.5-turbo")
+        assert llm.supports_multimodal() is False
+
+    def test_supported_content_types(self) -> None:
+        """Test OpenAI supports only images."""
+        llm = LLM(model="openai/gpt-4o")
+        types = llm.supported_multimodal_content_types()
+        assert types == ["image/"]
+
+    def test_format_multimodal_content_image(self) -> None:
+        """Test OpenAI uses image_url format."""
+        llm = LLM(model="openai/gpt-4o")
+        files = {"chart": ImageFile(source=MINIMAL_PNG)}
+
+        result = llm.format_multimodal_content(files)
+
+        assert len(result) == 1
+        assert result[0]["type"] == "image_url"
+        url = result[0]["image_url"]["url"]
+        assert url.startswith("data:image/png;base64,")
+        # Verify base64 content
+        b64_data = url.split(",")[1]
+        assert base64.b64decode(b64_data) == MINIMAL_PNG
+
+
+class TestGeminiMultimodal:
+    """Tests for Gemini provider multimodal functionality."""
+
+    def test_supports_multimodal_always_true(self) -> None:
+        """Test Gemini always supports multimodal."""
+        llm = LLM(model="gemini/gemini-pro")
+        assert llm.supports_multimodal() is True
+
+    def test_supported_content_types(self) -> None:
+        """Test Gemini supports wide range of types."""
+        llm = LLM(model="gemini/gemini-pro")
+        types = llm.supported_multimodal_content_types()
+        assert "image/" in types
+        assert "audio/" in types
+        assert "video/" in types
+        assert "application/pdf" in types
+        assert "text/" in types
+
+    def test_format_multimodal_content_image(self) -> None:
+        """Test Gemini uses inlineData format."""
+        llm = LLM(model="gemini/gemini-pro")
+        files = {"chart": ImageFile(source=MINIMAL_PNG)}
+
+        result = llm.format_multimodal_content(files)
+
+        assert len(result) == 1
+        assert "inlineData" in result[0]
+        assert result[0]["inlineData"]["mimeType"] == "image/png"
+        assert "data" in result[0]["inlineData"]
+
+    def test_format_text_content(self) -> None:
+        """Test Gemini text format uses simple text key."""
+        llm = LLM(model="gemini/gemini-pro")
+
+        result = llm.format_text_content("Hello world")
+
+        assert result == {"text": "Hello world"}
+
+
+@pytest.mark.skipif(not HAS_AZURE, reason="Azure AI Inference SDK not installed")
+class TestAzureMultimodal:
+    """Tests for Azure OpenAI provider multimodal functionality."""
+
+    @pytest.fixture(autouse=True)
+    def mock_azure_env(self):
+        """Mock Azure-specific environment variables."""
+        env_vars = {
+            "AZURE_API_KEY": "test-key",
+            "AZURE_API_BASE": "https://test.openai.azure.com",
+            "AZURE_API_VERSION": "2024-02-01",
+        }
+        with patch.dict(os.environ, env_vars):
+            yield
+
+    def test_supports_multimodal_gpt4o(self) -> None:
+        """Test Azure GPT-4o supports multimodal."""
+        llm = LLM(model="azure/gpt-4o")
+        assert llm.supports_multimodal() is True
+
+    def test_supports_multimodal_gpt4_turbo(self) -> None:
+        """Test Azure GPT-4 Turbo supports multimodal."""
+        llm = LLM(model="azure/gpt-4-turbo")
+        assert llm.supports_multimodal() is True
+
+    def test_does_not_support_gpt35(self) -> None:
+        """Test Azure GPT-3.5 does not support multimodal."""
+        llm = LLM(model="azure/gpt-35-turbo")
+        assert llm.supports_multimodal() is False
+
+    def test_supported_content_types(self) -> None:
+        """Test Azure supports only images."""
+        llm = LLM(model="azure/gpt-4o")
+        types = llm.supported_multimodal_content_types()
+        assert types == ["image/"]
+
+    def test_format_multimodal_content_image(self) -> None:
+        """Test Azure uses same format as OpenAI."""
+        llm = LLM(model="azure/gpt-4o")
+        files = {"chart": ImageFile(source=MINIMAL_PNG)}
+
+        result = llm.format_multimodal_content(files)
+
+        assert len(result) == 1
+        assert result[0]["type"] == "image_url"
+        assert "data:image/png;base64," in result[0]["image_url"]["url"]
+
+
+@pytest.mark.skipif(not HAS_BEDROCK, reason="AWS Bedrock SDK not installed")
+class TestBedrockMultimodal:
+    """Tests for AWS Bedrock provider multimodal functionality."""
+
+    @pytest.fixture(autouse=True)
+    def mock_bedrock_env(self):
+        """Mock AWS-specific environment variables."""
+        env_vars = {
+            "AWS_ACCESS_KEY_ID": "test-key",
+            "AWS_SECRET_ACCESS_KEY": "test-secret",
+            "AWS_DEFAULT_REGION": "us-east-1",
+        }
+        with patch.dict(os.environ, env_vars):
+            yield
+
+    def test_supports_multimodal_claude3(self) -> None:
+        """Test Bedrock Claude 3 supports multimodal."""
+        llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
+        assert llm.supports_multimodal() is True
+
+    def test_does_not_support_claude2(self) -> None:
+        """Test Bedrock Claude 2 does not support multimodal."""
+        llm = LLM(model="bedrock/anthropic.claude-v2")
+        assert llm.supports_multimodal() is False
+
+    def test_supported_content_types(self) -> None:
+        """Test Bedrock supports images and PDFs."""
+        llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
+        types = llm.supported_multimodal_content_types()
+        assert "image/" in types
+        assert "application/pdf" in types
+
+    def test_format_multimodal_content_image(self) -> None:
+        """Test Bedrock uses Converse API image format."""
+        llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
+        files = {"chart": ImageFile(source=MINIMAL_PNG)}
+
+        result = llm.format_multimodal_content(files)
+
+        assert len(result) == 1
+        assert "image" in result[0]
+        assert result[0]["image"]["format"] == "png"
+        assert "source" in result[0]["image"]
+        assert "bytes" in result[0]["image"]["source"]
+
+    def test_format_multimodal_content_pdf(self) -> None:
+        """Test Bedrock uses Converse API document format."""
+        llm = LLM(model="bedrock/anthropic.claude-3-sonnet")
+        files = {"doc": PDFFile(source=MINIMAL_PDF)}
+
+        result = llm.format_multimodal_content(files)
+
+        assert len(result) == 1
+        assert "document" in result[0]
+        assert result[0]["document"]["format"] == "pdf"
+        assert "source" in result[0]["document"]
+
+
+class TestBaseLLMMultimodal:
+    """Tests for BaseLLM default multimodal behavior."""
+
+    def test_base_supports_multimodal_false(self) -> None:
+        """Test base implementation returns False."""
+        from crewai.llms.base_llm import BaseLLM
+
+        class TestLLM(BaseLLM):
+            def call(self, messages, tools=None, callbacks=None):
+                return "test"
+
+        llm = TestLLM(model="test")
+        assert llm.supports_multimodal() is False
+
+    def test_base_supported_content_types_empty(self) -> None:
+        """Test base implementation returns empty list."""
+        from crewai.llms.base_llm import BaseLLM
+
+        class TestLLM(BaseLLM):
+            def call(self, messages, tools=None, callbacks=None):
+                return "test"
+
+        llm = TestLLM(model="test")
+        assert llm.supported_multimodal_content_types() == []
+
+    def test_base_format_multimodal_content_empty(self) -> None:
+        """Test base implementation returns empty list."""
+        from crewai.llms.base_llm import BaseLLM
+
+        class TestLLM(BaseLLM):
+            def call(self, messages, tools=None, callbacks=None):
+                return "test"
+
+        llm = TestLLM(model="test")
+        files = {"chart": ImageFile(source=MINIMAL_PNG)}
+        assert llm.format_multimodal_content(files) == []
+
+    def test_base_format_text_content(self) -> None:
+        """Test base text formatting uses OpenAI/Anthropic style."""
+        from crewai.llms.base_llm import BaseLLM
+
+        class TestLLM(BaseLLM):
+            def call(self, messages, tools=None, callbacks=None):
+                return "test"
+
+        llm = TestLLM(model="test")
+        result = llm.format_text_content("Hello")
+        assert result == {"type": "text", "text": "Hello"}
+
+
+class TestMultipleFilesFormatting:
+    """Tests for formatting multiple files at once."""
+
+    def test_format_multiple_images(self) -> None:
+        """Test formatting multiple images."""
+        llm = LLM(model="gpt-4o")
+        files = {
+            "chart1": ImageFile(source=MINIMAL_PNG),
+            "chart2": ImageFile(source=MINIMAL_PNG),
+        }
+
+        result = llm.format_multimodal_content(files)
+
+        assert len(result) == 2
+
+    def test_format_mixed_supported_and_unsupported(self) -> None:
+        """Test only supported types are formatted."""
+        llm = LLM(model="gpt-4o")  # OpenAI - images only
+        files = {
+            "chart": ImageFile(source=MINIMAL_PNG),
+            "doc": PDFFile(source=MINIMAL_PDF),  # Not supported
+            "text": TextFile(source=b"hello"),  # Not supported
+        }
+
+        result = llm.format_multimodal_content(files)
+
+        assert len(result) == 1
+        assert result[0]["type"] == "image_url"
+
+    def test_format_empty_files_dict(self) -> None:
+        """Test empty files dict returns empty list."""
+        llm = LLM(model="gpt-4o")
+
+        result = llm.format_multimodal_content({})
+
+        assert result == []