From 3dc8c45cc912ccfea46118eb8e16cf8a8ec54718 Mon Sep 17 00:00:00 2001 From: Iris Clawd Date: Tue, 5 May 2026 03:57:09 +0000 Subject: [PATCH] fix(security): validate IPs on every redirect hop to prevent SSRF bypass (OSS-51) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a custom HTTPAdapter (_SSRFSafeAdapter) that intercepts every request — including redirect hops — and validates the resolved IP against the private/reserved blocklist before the connection proceeds. New public API: - safe_request_session(): returns a Session with the adapter mounted - safe_get(url, **kwargs): drop-in replacement for requests.get() that validates the initial URL AND every redirect destination Updated tools to use safe_get() instead of validate_url() + requests.get(): - ScrapeWebsiteTool - ScrapeElementFromWebsiteTool - WebPageLoader (RAG) Closes OSS-51 --- .../rag/loaders/webpage_loader.py | 5 +- .../src/crewai_tools/security/safe_path.py | 70 +++++++++++++++++++ .../scrape_element_from_website.py | 7 +- .../scrape_website_tool.py | 7 +- .../tests/utilities/test_safe_path.py | 62 ++++++++++++++++ 5 files changed, 138 insertions(+), 13 deletions(-) diff --git a/lib/crewai-tools/src/crewai_tools/rag/loaders/webpage_loader.py b/lib/crewai-tools/src/crewai_tools/rag/loaders/webpage_loader.py index 5d9a2d180..1e7b1129a 100644 --- a/lib/crewai-tools/src/crewai_tools/rag/loaders/webpage_loader.py +++ b/lib/crewai-tools/src/crewai_tools/rag/loaders/webpage_loader.py @@ -2,9 +2,8 @@ import re from typing import Any, Final from bs4 import BeautifulSoup -import requests - from crewai_tools.rag.base_loader import BaseLoader, LoaderResult +from crewai_tools.security.safe_path import safe_get from crewai_tools.rag.source_content import SourceContent @@ -25,7 +24,7 @@ class WebPageLoader(BaseLoader): ) try: - response = requests.get(url, timeout=15, headers=headers) + response = safe_get(url, timeout=15, headers=headers) response.encoding = response.apparent_encoding soup = BeautifulSoup(response.text, "html.parser") diff --git a/lib/crewai-tools/src/crewai_tools/security/safe_path.py b/lib/crewai-tools/src/crewai_tools/security/safe_path.py index 4dde68e12..582e92878 100644 --- a/lib/crewai-tools/src/crewai_tools/security/safe_path.py +++ b/lib/crewai-tools/src/crewai_tools/security/safe_path.py @@ -16,6 +16,9 @@ import os import socket from urllib.parse import urlparse +import requests +from requests.adapters import HTTPAdapter + logger = logging.getLogger(__name__) @@ -203,3 +206,70 @@ def validate_url(url: str) -> str: ) return url + + +# --------------------------------------------------------------------------- +# SSRF-safe HTTP requests (validates IPs on every redirect hop) +# --------------------------------------------------------------------------- + + +class _SSRFSafeAdapter(HTTPAdapter): + """HTTPAdapter that validates the resolved IP of every request — including + redirect hops — against the private/reserved blocklist before the + connection is made.""" + + def send(self, request, **kwargs): + parsed = urlparse(request.url) + if not _is_escape_hatch_enabled() and parsed.hostname: + try: + port = parsed.port or (443 if parsed.scheme == "https" else 80) + addrinfos = socket.getaddrinfo(parsed.hostname, port) + except socket.gaierror as exc: + raise ValueError( + f"Could not resolve hostname: '{parsed.hostname}'" + ) from exc + + for _family, _, _, _, sockaddr in addrinfos: + ip_str = str(sockaddr[0]) + if _is_private_or_reserved(ip_str): + raise ValueError( + f"Redirect to '{request.url}' blocked: resolves to " + f"private/reserved IP {ip_str}. Access to internal " + f"networks is not allowed. " + f"Set {_UNSAFE_PATHS_ENV}=true to bypass." + ) + + return super().send(request, **kwargs) + + +def safe_request_session() -> requests.Session: + """Return a :class:`requests.Session` that validates every connection + target (including redirect destinations) against the SSRF blocklist.""" + session = requests.Session() + adapter = _SSRFSafeAdapter() + session.mount("http://", adapter) + session.mount("https://", adapter) + return session + + +def safe_get(url: str, **kwargs) -> requests.Response: + """Drop-in replacement for ``requests.get()`` with SSRF protection. + + Validates the initial URL via :func:`validate_url`, then executes the + request through a session whose adapter re-checks every redirect hop. + + Args: + url: The URL to fetch. + **kwargs: Passed through to ``session.get()`` (headers, cookies, + timeout, etc.). + + Returns: + The :class:`requests.Response`. + + Raises: + ValueError: If the initial URL or any redirect target resolves to + a private/reserved IP. + """ + validate_url(url) + session = safe_request_session() + return session.get(url, **kwargs) diff --git a/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py b/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py index 7bba12b72..f56d00f94 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py +++ b/lib/crewai-tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py @@ -3,9 +3,7 @@ from typing import Any from crewai.tools import BaseTool from pydantic import BaseModel, Field -import requests - -from crewai_tools.security.safe_path import validate_url +from crewai_tools.security.safe_path import safe_get try: @@ -83,8 +81,7 @@ class ScrapeElementFromWebsiteTool(BaseTool): if website_url is None or css_element is None: raise ValueError("Both website_url and css_element must be provided.") - website_url = validate_url(website_url) - page = requests.get( + page = safe_get( website_url, headers=self.headers, cookies=self.cookies if self.cookies else {}, diff --git a/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py b/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py index d297dfe08..0cacab59f 100644 --- a/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py +++ b/lib/crewai-tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py @@ -3,9 +3,7 @@ import re from typing import Any from pydantic import Field -import requests - -from crewai_tools.security.safe_path import validate_url +from crewai_tools.security.safe_path import safe_get try: @@ -75,8 +73,7 @@ class ScrapeWebsiteTool(BaseTool): if website_url is None: raise ValueError("Website URL must be provided.") - website_url = validate_url(website_url) - page = requests.get( + page = safe_get( website_url, timeout=15, headers=self.headers, diff --git a/lib/crewai-tools/tests/utilities/test_safe_path.py b/lib/crewai-tools/tests/utilities/test_safe_path.py index 4fb5d1ec7..a7b717613 100644 --- a/lib/crewai-tools/tests/utilities/test_safe_path.py +++ b/lib/crewai-tools/tests/utilities/test_safe_path.py @@ -6,7 +6,10 @@ import os import pytest +from unittest.mock import MagicMock, patch + from crewai_tools.security.safe_path import ( + safe_get, validate_directory_path, validate_file_path, validate_url, @@ -168,3 +171,62 @@ class TestValidateUrl: # file:// would normally be blocked result = validate_url("file:///etc/passwd") assert result == "file:///etc/passwd" + + +# --------------------------------------------------------------------------- +# safe_get — redirect-aware SSRF protection +# --------------------------------------------------------------------------- + + +def _fake_getaddrinfo_factory(ip: str): + """Return a getaddrinfo replacement that always resolves to *ip*.""" + def _fake(host, port, *args, **kwargs): + return [(2, 1, 6, "", (ip, port or 80))] + return _fake + + +class TestSafeGet: + """Tests for safe_get (validates IPs on every redirect hop).""" + + @patch("crewai_tools.security.safe_path.socket.getaddrinfo", + side_effect=_fake_getaddrinfo_factory("93.184.216.34")) + @patch("requests.adapters.HTTPAdapter.send") + def test_allows_public_url(self, mock_send, mock_dns): + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.is_redirect = False + mock_response.headers = {} + mock_send.return_value = mock_response + resp = safe_get("https://example.com/page") + assert resp.status_code == 200 + + @patch("crewai_tools.security.safe_path.socket.getaddrinfo", + side_effect=_fake_getaddrinfo_factory("127.0.0.1")) + def test_blocks_redirect_to_localhost(self, mock_dns): + with pytest.raises(ValueError, match="private/reserved IP"): + safe_get("http://evil.com/redirect") + + @patch("crewai_tools.security.safe_path.socket.getaddrinfo", + side_effect=_fake_getaddrinfo_factory("169.254.169.254")) + def test_blocks_redirect_to_metadata(self, mock_dns): + with pytest.raises(ValueError, match="private/reserved IP"): + safe_get("http://evil.com/metadata") + + @patch("crewai_tools.security.safe_path.socket.getaddrinfo", + side_effect=_fake_getaddrinfo_factory("10.0.0.1")) + def test_blocks_redirect_to_private_range(self, mock_dns): + with pytest.raises(ValueError, match="private/reserved IP"): + safe_get("http://evil.com/internal") + + @patch("crewai_tools.security.safe_path.socket.getaddrinfo", + side_effect=_fake_getaddrinfo_factory("169.254.169.254")) + @patch("requests.adapters.HTTPAdapter.send") + def test_escape_hatch_bypasses_redirect_check(self, mock_send, mock_dns, monkeypatch): + monkeypatch.setenv("CREWAI_TOOLS_ALLOW_UNSAFE_PATHS", "true") + mock_response = MagicMock() + mock_response.status_code = 200 + mock_response.is_redirect = False + mock_response.headers = {} + mock_send.return_value = mock_response + resp = safe_get("http://evil.com/metadata") + assert resp.status_code == 200