Files
crewAI/lib/crewai-files/tests/test_file_url.py

312 lines
12 KiB
Python

"""Tests for FileUrl source type and URL resolution."""
from unittest.mock import AsyncMock, MagicMock, patch
from crewai_files import FileBytes, FileUrl, ImageFile
from crewai_files.core.resolved import InlineBase64, UrlReference
from crewai_files.core.sources import FilePath, _normalize_source
from crewai_files.resolution.resolver import FileResolver
import pytest
class TestFileUrl:
"""Tests for FileUrl source type."""
def test_create_file_url(self):
"""Test creating FileUrl with valid URL."""
url = FileUrl(url="https://example.com/image.png")
assert url.url == "https://example.com/image.png"
assert url.filename is None
def test_create_file_url_with_filename(self):
"""Test creating FileUrl with custom filename."""
url = FileUrl(url="https://example.com/image.png", filename="custom.png")
assert url.url == "https://example.com/image.png"
assert url.filename == "custom.png"
def test_invalid_url_scheme_raises(self):
"""Test that non-http(s) URLs raise ValueError."""
with pytest.raises(ValueError, match="Invalid URL scheme"):
FileUrl(url="ftp://example.com/file.txt")
def test_invalid_url_scheme_file_raises(self):
"""Test that file:// URLs raise ValueError."""
with pytest.raises(ValueError, match="Invalid URL scheme"):
FileUrl(url="file:///path/to/file.txt")
def test_http_url_valid(self):
"""Test that HTTP URLs are valid."""
url = FileUrl(url="http://example.com/image.jpg")
assert url.url == "http://example.com/image.jpg"
def test_https_url_valid(self):
"""Test that HTTPS URLs are valid."""
url = FileUrl(url="https://example.com/image.jpg")
assert url.url == "https://example.com/image.jpg"
def test_content_type_guessing_png(self):
"""Test content type guessing for PNG files."""
url = FileUrl(url="https://example.com/image.png")
assert url.content_type == "image/png"
def test_content_type_guessing_jpeg(self):
"""Test content type guessing for JPEG files."""
url = FileUrl(url="https://example.com/photo.jpg")
assert url.content_type == "image/jpeg"
def test_content_type_guessing_pdf(self):
"""Test content type guessing for PDF files."""
url = FileUrl(url="https://example.com/document.pdf")
assert url.content_type == "application/pdf"
def test_content_type_guessing_with_query_params(self):
"""Test content type guessing with URL query parameters."""
url = FileUrl(url="https://example.com/image.png?v=123&token=abc")
assert url.content_type == "image/png"
def test_content_type_fallback_unknown(self):
"""Test content type falls back to octet-stream for unknown extensions."""
url = FileUrl(url="https://example.com/file.unknownext123")
assert url.content_type == "application/octet-stream"
def test_content_type_no_extension(self):
"""Test content type for URL without extension."""
url = FileUrl(url="https://example.com/file")
assert url.content_type == "application/octet-stream"
def test_read_fetches_content(self):
"""Test that read() fetches content from URL."""
url = FileUrl(url="https://example.com/image.png")
mock_response = MagicMock()
mock_response.content = b"fake image content"
mock_response.headers = {"content-type": "image/png"}
with patch("httpx.get", return_value=mock_response) as mock_get:
content = url.read()
mock_get.assert_called_once_with(
"https://example.com/image.png", follow_redirects=True
)
assert content == b"fake image content"
def test_read_caches_content(self):
"""Test that read() caches content."""
url = FileUrl(url="https://example.com/image.png")
mock_response = MagicMock()
mock_response.content = b"fake content"
mock_response.headers = {}
with patch("httpx.get", return_value=mock_response) as mock_get:
content1 = url.read()
content2 = url.read()
mock_get.assert_called_once()
assert content1 == content2
def test_read_updates_content_type_from_response(self):
"""Test that read() updates content type from response headers."""
url = FileUrl(url="https://example.com/file")
mock_response = MagicMock()
mock_response.content = b"fake content"
mock_response.headers = {"content-type": "image/webp; charset=utf-8"}
with patch("httpx.get", return_value=mock_response):
url.read()
assert url.content_type == "image/webp"
@pytest.mark.asyncio
async def test_aread_fetches_content(self):
"""Test that aread() fetches content from URL asynchronously."""
url = FileUrl(url="https://example.com/image.png")
mock_response = MagicMock()
mock_response.content = b"async fake content"
mock_response.headers = {"content-type": "image/png"}
mock_response.raise_for_status = MagicMock()
mock_client = MagicMock()
mock_client.get = AsyncMock(return_value=mock_response)
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=None)
with patch("httpx.AsyncClient", return_value=mock_client):
content = await url.aread()
assert content == b"async fake content"
@pytest.mark.asyncio
async def test_aread_caches_content(self):
"""Test that aread() caches content."""
url = FileUrl(url="https://example.com/image.png")
mock_response = MagicMock()
mock_response.content = b"cached content"
mock_response.headers = {}
mock_response.raise_for_status = MagicMock()
mock_client = MagicMock()
mock_client.get = AsyncMock(return_value=mock_response)
mock_client.__aenter__ = AsyncMock(return_value=mock_client)
mock_client.__aexit__ = AsyncMock(return_value=None)
with patch("httpx.AsyncClient", return_value=mock_client):
content1 = await url.aread()
content2 = await url.aread()
mock_client.get.assert_called_once()
assert content1 == content2
class TestNormalizeSource:
"""Tests for _normalize_source with URL detection."""
def test_normalize_url_string(self):
"""Test that URL strings are converted to FileUrl."""
result = _normalize_source("https://example.com/image.png")
assert isinstance(result, FileUrl)
assert result.url == "https://example.com/image.png"
def test_normalize_http_url_string(self):
"""Test that HTTP URL strings are converted to FileUrl."""
result = _normalize_source("http://example.com/file.pdf")
assert isinstance(result, FileUrl)
assert result.url == "http://example.com/file.pdf"
def test_normalize_file_path_string(self, tmp_path):
"""Test that file path strings are converted to FilePath."""
test_file = tmp_path / "test.png"
test_file.write_bytes(b"test content")
result = _normalize_source(str(test_file))
assert isinstance(result, FilePath)
def test_normalize_relative_path_is_not_url(self):
"""Test that relative path strings are not treated as URLs."""
result = _normalize_source("https://example.com/file.png")
assert isinstance(result, FileUrl)
assert not isinstance(result, FilePath)
def test_normalize_file_url_passthrough(self):
"""Test that FileUrl instances pass through unchanged."""
original = FileUrl(url="https://example.com/image.png")
result = _normalize_source(original)
assert result is original
class TestResolverUrlHandling:
"""Tests for FileResolver URL handling."""
def test_resolve_url_source_for_supported_provider(self):
"""Test URL source resolves to UrlReference for supported providers."""
resolver = FileResolver()
file = ImageFile(source=FileUrl(url="https://example.com/image.png"))
resolved = resolver.resolve(file, "anthropic")
assert isinstance(resolved, UrlReference)
assert resolved.url == "https://example.com/image.png"
assert resolved.content_type == "image/png"
def test_resolve_url_source_openai(self):
"""Test URL source resolves to UrlReference for OpenAI."""
resolver = FileResolver()
file = ImageFile(source=FileUrl(url="https://example.com/photo.jpg"))
resolved = resolver.resolve(file, "openai")
assert isinstance(resolved, UrlReference)
assert resolved.url == "https://example.com/photo.jpg"
def test_resolve_url_source_gemini(self):
"""Test URL source resolves to UrlReference for Gemini."""
resolver = FileResolver()
file = ImageFile(source=FileUrl(url="https://example.com/image.webp"))
resolved = resolver.resolve(file, "gemini")
assert isinstance(resolved, UrlReference)
assert resolved.url == "https://example.com/image.webp"
def test_resolve_url_source_azure(self):
"""Test URL source resolves to UrlReference for Azure."""
resolver = FileResolver()
file = ImageFile(source=FileUrl(url="https://example.com/image.gif"))
resolved = resolver.resolve(file, "azure")
assert isinstance(resolved, UrlReference)
assert resolved.url == "https://example.com/image.gif"
def test_resolve_url_source_bedrock_fetches_content(self):
"""Test URL source fetches content for Bedrock (unsupported URLs)."""
resolver = FileResolver()
file_url = FileUrl(url="https://example.com/image.png")
file = ImageFile(source=file_url)
mock_response = MagicMock()
mock_response.content = b"\x89PNG\r\n\x1a\n" + b"\x00" * 50
mock_response.headers = {"content-type": "image/png"}
with patch("httpx.get", return_value=mock_response):
resolved = resolver.resolve(file, "bedrock")
assert not isinstance(resolved, UrlReference)
def test_resolve_bytes_source_still_works(self):
"""Test that bytes source still resolves normally."""
resolver = FileResolver()
minimal_png = (
b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x08\x00\x00\x00\x08"
b"\x01\x00\x00\x00\x00\xf9Y\xab\xcd\x00\x00\x00\nIDATx\x9cc`\x00\x00"
b"\x00\x02\x00\x01\xe2!\xbc3\x00\x00\x00\x00IEND\xaeB`\x82"
)
file = ImageFile(source=FileBytes(data=minimal_png, filename="test.png"))
resolved = resolver.resolve(file, "anthropic")
assert isinstance(resolved, InlineBase64)
@pytest.mark.asyncio
async def test_aresolve_url_source(self):
"""Test async URL resolution for supported provider."""
resolver = FileResolver()
file = ImageFile(source=FileUrl(url="https://example.com/image.png"))
resolved = await resolver.aresolve(file, "anthropic")
assert isinstance(resolved, UrlReference)
assert resolved.url == "https://example.com/image.png"
class TestImageFileWithUrl:
"""Tests for creating ImageFile with URL source."""
def test_image_file_from_url_string(self):
"""Test creating ImageFile from URL string."""
file = ImageFile(source="https://example.com/image.png")
assert isinstance(file.source, FileUrl)
assert file.source.url == "https://example.com/image.png"
def test_image_file_from_file_url(self):
"""Test creating ImageFile from FileUrl instance."""
url = FileUrl(url="https://example.com/photo.jpg")
file = ImageFile(source=url)
assert file.source is url
assert file.content_type == "image/jpeg"