From d1223a29cb3e4151cdcb6ba6c8431708d8ce40a6 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 18 Nov 2022 00:10:30 -0500 Subject: [PATCH] fix: use Loofah's scrub_uri_attribute method which correctly sanitizes data URL mediatypes --- lib/rails/html/scrubbers.rb | 6 +---- test/sanitizer_test.rb | 50 +++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 5 deletions(-) diff --git a/lib/rails/html/scrubbers.rb b/lib/rails/html/scrubbers.rb index 18bac0a..f9e14cd 100644 --- a/lib/rails/html/scrubbers.rb +++ b/lib/rails/html/scrubbers.rb @@ -139,11 +139,7 @@ def scrub_attribute(node, attr_node) end if Loofah::HTML5::SafeList::ATTR_VAL_IS_URI.include?(attr_name) - # this block lifted nearly verbatim from HTML5 sanitization - val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(Loofah::HTML5::Scrub::CONTROL_CHARACTERS,'').downcase - if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && ! Loofah::HTML5::SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(Loofah::HTML5::SafeList::PROTOCOL_SEPARATOR)[0]) - attr_node.remove - end + return if Loofah::HTML5::Scrub.scrub_uri_attribute(attr_node) end if Loofah::HTML5::SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name) diff --git a/test/sanitizer_test.rb b/test/sanitizer_test.rb index 8a1d5ac..7a60956 100644 --- a/test/sanitizer_test.rb +++ b/test/sanitizer_test.rb @@ -600,6 +600,56 @@ def test_disallow_the_dangerous_safelist_combination_of_select_and_style refute_includes(sanitized, "style") end + %w[text/plain text/css image/png image/gif image/jpeg].each do |mediatype| + define_method "test_mediatype_#{mediatype}_allowed" do + input = %Q() + expected = input + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + + input = %Q() + expected = input + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + end + end + + def test_mediatype_text_html_disallowed + input = %q() + expected = %q() + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + + input = %q() + expected = %q() + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + end + + def test_mediatype_image_svg_xml_disallowed + input = %q() + expected = %q() + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + + input = %q() + expected = %q() + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + end + + def test_mediatype_other_disallowed + input = %q(foo) + expected = %q(foo) + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + + input = %q(foo) + expected = %q(foo) + actual = safe_list_sanitize(input) + assert_equal(expected, actual) + end + def test_scrubbing_svg_attr_values_that_allow_ref input = %Q(
hey
) expected = %Q(
hey
)