"""Integration tests for Crew multimodal functionality with input_files. Tests crew.kickoff(input_files={...}) across different providers and file types. """ from pathlib import Path import pytest from crewai import Agent, Crew, LLM, Task from crewai_files import AudioFile, File, ImageFile, PDFFile, TextFile, VideoFile TEST_FIXTURES_DIR = ( Path(__file__).parent.parent.parent / "crewai-files" / "tests" / "fixtures" ) TEST_IMAGE_PATH = TEST_FIXTURES_DIR / "revenue_chart.png" TEST_TEXT_PATH = TEST_FIXTURES_DIR / "review_guidelines.txt" TEST_VIDEO_PATH = TEST_FIXTURES_DIR / "sample_video.mp4" TEST_AUDIO_PATH = TEST_FIXTURES_DIR / "sample_audio.wav" TEST_PDF_PATH = TEST_FIXTURES_DIR / "agents.pdf" OPENAI_IMAGE_MODELS = [ "openai/gpt-4o-mini", "openai/gpt-4o", "openai/o4-mini", "openai/gpt-4.1-mini", ] OPENAI_RESPONSES_MODELS = [ ("openai/gpt-4o-mini", "responses"), ("openai/o4-mini", "responses"), ] ANTHROPIC_MODELS = [ "anthropic/claude-3-5-haiku-20241022", "anthropic/claude-sonnet-4-20250514", ] GEMINI_MODELS = [ "gemini/gemini-2.0-flash", ] BEDROCK_MODELS = [ "bedrock/anthropic.claude-3-haiku-20240307-v1:0", ] @pytest.fixture def image_file() -> ImageFile: """Create an ImageFile from test fixture.""" return ImageFile(source=str(TEST_IMAGE_PATH)) @pytest.fixture def image_bytes() -> bytes: """Load test image bytes.""" return TEST_IMAGE_PATH.read_bytes() @pytest.fixture def text_file() -> TextFile: """Create a TextFile from test fixture.""" return TextFile(source=str(TEST_TEXT_PATH)) @pytest.fixture def text_bytes() -> bytes: """Load test text bytes.""" return TEST_TEXT_PATH.read_bytes() @pytest.fixture def pdf_file() -> PDFFile: """Create a PDFFile from test fixture.""" return PDFFile(source=str(TEST_PDF_PATH)) @pytest.fixture def video_file() -> VideoFile: """Create a VideoFile from test fixture.""" if not TEST_VIDEO_PATH.exists(): pytest.skip("sample_video.mp4 fixture not found") return VideoFile(source=str(TEST_VIDEO_PATH)) @pytest.fixture def audio_file() -> AudioFile: """Create an AudioFile from test fixture.""" if not TEST_AUDIO_PATH.exists(): pytest.skip("sample_audio.wav fixture not found") return AudioFile(source=str(TEST_AUDIO_PATH)) def _create_analyst_crew(llm: LLM) -> Crew: """Create a simple analyst crew for file analysis.""" agent = Agent( role="File Analyst", goal="Analyze and describe files accurately", backstory="Expert at analyzing various file types.", llm=llm, verbose=False, ) task = Task( description="Describe the file(s) you see. Be brief, one sentence max.", expected_output="A brief description of the file.", agent=agent, ) return Crew(agents=[agent], tasks=[task], verbose=False) class TestCrewMultimodalOpenAI: """Test Crew with input_files using OpenAI models.""" @pytest.mark.vcr() @pytest.mark.parametrize("model", OPENAI_IMAGE_MODELS) def test_image_file(self, model: str, image_file: ImageFile) -> None: """Test crew can process an image file.""" llm = LLM(model=model) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"chart": image_file}) assert result.raw assert len(result.raw) > 0 @pytest.mark.vcr() @pytest.mark.parametrize("model", OPENAI_IMAGE_MODELS) def test_image_bytes(self, model: str, image_bytes: bytes) -> None: """Test crew can process image bytes.""" llm = LLM(model=model) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"chart": ImageFile(source=image_bytes)}) assert result.raw assert len(result.raw) > 0 @pytest.mark.vcr() @pytest.mark.parametrize("model", OPENAI_IMAGE_MODELS) def test_generic_file_image(self, model: str, image_bytes: bytes) -> None: """Test crew can process generic File with auto-detected image.""" llm = LLM(model=model) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"chart": File(source=image_bytes)}) assert result.raw assert len(result.raw) > 0 class TestCrewMultimodalOpenAIResponses: """Test Crew with input_files using OpenAI Responses API.""" @pytest.mark.vcr() @pytest.mark.parametrize("model,api", OPENAI_RESPONSES_MODELS) def test_image_file( self, model: str, api: str, image_file: ImageFile ) -> None: """Test crew can process an image file with Responses API.""" llm = LLM(model=model, api=api) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"chart": image_file}) assert result.raw assert len(result.raw) > 0 @pytest.mark.vcr() @pytest.mark.parametrize("model,api", OPENAI_RESPONSES_MODELS) def test_pdf_file(self, model: str, api: str, pdf_file: PDFFile) -> None: """Test crew can process a PDF file with Responses API.""" llm = LLM(model=model, api=api) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"document": pdf_file}) assert result.raw assert len(result.raw) > 0 class TestCrewMultimodalAnthropic: """Test Crew with input_files using Anthropic models.""" @pytest.mark.vcr() @pytest.mark.parametrize("model", ANTHROPIC_MODELS) def test_image_file(self, model: str, image_file: ImageFile) -> None: """Test crew can process an image file.""" llm = LLM(model=model) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"chart": image_file}) assert result.raw assert len(result.raw) > 0 @pytest.mark.vcr() @pytest.mark.parametrize("model", ANTHROPIC_MODELS) def test_pdf_file(self, model: str, pdf_file: PDFFile) -> None: """Test crew can process a PDF file.""" llm = LLM(model=model) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"document": pdf_file}) assert result.raw assert len(result.raw) > 0 @pytest.mark.vcr() @pytest.mark.parametrize("model", ANTHROPIC_MODELS) def test_mixed_files( self, model: str, image_file: ImageFile, pdf_file: PDFFile ) -> None: """Test crew can process multiple file types together.""" llm = LLM(model=model) crew = _create_analyst_crew(llm) result = crew.kickoff( input_files={"chart": image_file, "document": pdf_file} ) assert result.raw assert len(result.raw) > 0 class TestCrewMultimodalGemini: """Test Crew with input_files using Gemini models.""" @pytest.mark.vcr() @pytest.mark.parametrize("model", GEMINI_MODELS) def test_image_file(self, model: str, image_file: ImageFile) -> None: """Test crew can process an image file.""" llm = LLM(model=model) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"chart": image_file}) assert result.raw assert len(result.raw) > 0 @pytest.mark.vcr() @pytest.mark.parametrize("model", GEMINI_MODELS) def test_text_file(self, model: str, text_file: TextFile) -> None: """Test crew can process a text file.""" llm = LLM(model=model) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"readme": text_file}) assert result.raw assert len(result.raw) > 0 @pytest.mark.vcr() @pytest.mark.parametrize("model", GEMINI_MODELS) def test_video_file(self, model: str, video_file: VideoFile) -> None: """Test crew can process a video file.""" llm = LLM(model=model) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"video": video_file}) assert result.raw assert len(result.raw) > 0 @pytest.mark.vcr() @pytest.mark.parametrize("model", GEMINI_MODELS) def test_audio_file(self, model: str, audio_file: AudioFile) -> None: """Test crew can process an audio file.""" llm = LLM(model=model) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"audio": audio_file}) assert result.raw assert len(result.raw) > 0 @pytest.mark.vcr() @pytest.mark.parametrize("model", GEMINI_MODELS) def test_mixed_files( self, model: str, image_file: ImageFile, text_file: TextFile, ) -> None: """Test crew can process multiple file types together.""" llm = LLM(model=model) crew = _create_analyst_crew(llm) result = crew.kickoff( input_files={"chart": image_file, "readme": text_file} ) assert result.raw assert len(result.raw) > 0 class TestCrewMultimodalBedrock: """Test Crew with input_files using Bedrock models.""" @pytest.mark.vcr() @pytest.mark.parametrize("model", BEDROCK_MODELS) def test_image_file(self, model: str, image_file: ImageFile) -> None: """Test crew can process an image file.""" llm = LLM(model=model) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"chart": image_file}) assert result.raw assert len(result.raw) > 0 @pytest.mark.vcr() @pytest.mark.parametrize("model", BEDROCK_MODELS) def test_pdf_file(self, model: str, pdf_file: PDFFile) -> None: """Test crew can process a PDF file.""" llm = LLM(model=model) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"document": pdf_file}) assert result.raw assert len(result.raw) > 0 class TestCrewMultimodalFileTypes: """Test all file types with appropriate providers.""" @pytest.mark.vcr() def test_image_openai(self, image_file: ImageFile) -> None: """Test image file with OpenAI.""" llm = LLM(model="openai/gpt-4o-mini") crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"image": image_file}) assert result.raw @pytest.mark.vcr() def test_pdf_anthropic(self, pdf_file: PDFFile) -> None: """Test PDF file with Anthropic.""" llm = LLM(model="anthropic/claude-3-5-haiku-20241022") crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"document": pdf_file}) assert result.raw @pytest.mark.vcr() def test_pdf_openai_responses(self, pdf_file: PDFFile) -> None: """Test PDF file with OpenAI Responses API.""" llm = LLM(model="openai/gpt-4o-mini", api="responses") crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"document": pdf_file}) assert result.raw @pytest.mark.vcr() def test_text_gemini(self, text_file: TextFile) -> None: """Test text file with Gemini.""" llm = LLM(model="gemini/gemini-2.0-flash") crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"readme": text_file}) assert result.raw @pytest.mark.vcr() def test_video_gemini(self, video_file: VideoFile) -> None: """Test video file with Gemini.""" llm = LLM(model="gemini/gemini-2.0-flash") crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"video": video_file}) assert result.raw @pytest.mark.vcr() def test_audio_gemini(self, audio_file: AudioFile) -> None: """Test audio file with Gemini.""" llm = LLM(model="gemini/gemini-2.0-flash") crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"audio": audio_file}) assert result.raw class TestCrewMultimodalUnsupportedTypes: """Test that unsupported file types fall back to read_file tool.""" @pytest.mark.vcr() def test_video_with_openai_uses_tool(self, video_file: VideoFile) -> None: """Test video with OpenAI (no video support) uses read_file tool.""" llm = LLM(model="openai/gpt-4o-mini") agent = Agent( role="File Analyst", goal="Analyze files", backstory="Expert analyst.", llm=llm, verbose=False, ) task = Task( description="What type of file is this? Just name the file type.", expected_output="The file type.", agent=agent, ) crew = Crew(agents=[agent], tasks=[task], verbose=False) result = crew.kickoff(input_files={"video": video_file}) assert result.raw # Should mention video or the filename since it can't directly process it @pytest.mark.vcr() def test_audio_with_anthropic_uses_tool(self, audio_file: AudioFile) -> None: """Test audio with Anthropic (no audio support) uses read_file tool.""" llm = LLM(model="anthropic/claude-3-5-haiku-20241022") agent = Agent( role="File Analyst", goal="Analyze files", backstory="Expert analyst.", llm=llm, verbose=False, ) task = Task( description="What type of file is this? Just name the file type.", expected_output="The file type.", agent=agent, ) crew = Crew(agents=[agent], tasks=[task], verbose=False) result = crew.kickoff(input_files={"audio": audio_file}) assert result.raw class TestCrewMultimodalFileUpload: """Test file upload functionality with prefer_upload=True.""" @pytest.mark.vcr() def test_image_upload_anthropic(self, image_file: ImageFile) -> None: """Test image upload to Anthropic Files API.""" llm = LLM(model="anthropic/claude-3-5-haiku-20241022", prefer_upload=True) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"chart": image_file}) assert result.raw assert len(result.raw) > 0 @pytest.mark.vcr() def test_image_upload_openai_responses(self, image_file: ImageFile) -> None: """Test image upload to OpenAI Files API via Responses API.""" llm = LLM(model="openai/gpt-4o-mini", api="responses", prefer_upload=True) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"chart": image_file}) assert result.raw assert len(result.raw) > 0 @pytest.mark.vcr() def test_pdf_upload_anthropic(self, pdf_file: PDFFile) -> None: """Test PDF upload to Anthropic Files API.""" llm = LLM(model="anthropic/claude-3-5-haiku-20241022", prefer_upload=True) crew = _create_analyst_crew(llm) result = crew.kickoff(input_files={"document": pdf_file}) assert result.raw assert len(result.raw) > 0