Skip to content

Commit

Permalink
fix: replace recursive approach to cdata with escaping solution
Browse files Browse the repository at this point in the history
  • Loading branch information
flavorjones committed Dec 11, 2022
1 parent 415677f commit 86f7f63
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 9 deletions.
40 changes: 40 additions & 0 deletions lib/loofah/html5/scrub.rb
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,46 @@ def force_correct_attribute_escaping!(node)
end.force_encoding(encoding)
end
end

def cdata_needs_escaping?(node)
# Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` or `script` tag as cdata, but it acts that way
node.cdata? || (Nokogiri.jruby? && node.text? && (node.parent.name == "style" || node.parent.name == "script"))
end

def cdata_escape(node)
escaped_text = escape_tags(node.text)
if Nokogiri.jruby?
node.document.create_text_node(escaped_text)
else
node.document.create_cdata(escaped_text)
end
end

TABLE_FOR_ESCAPE_HTML__ = {
'<' => '&lt;',
'>' => '&gt;',
'&' => '&amp;',
}

def escape_tags(string)
# modified version of CGI.escapeHTML from ruby 3.1
enc = string.encoding
unless enc.ascii_compatible?
if enc.dummy?
origenc = enc
enc = Encoding::Converter.asciicompat_encoding(enc)
string = enc ? string.encode(enc) : string.b
end
table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
string = string.gsub(/#{"[<>&]".encode(enc)}/, table)
string.encode!(origenc) if origenc
string
else
string = string.b
string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
string.force_encoding(enc)
end
end
end
end
end
Expand Down
4 changes: 4 additions & 0 deletions lib/loofah/scrubber.rb
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ def html5lib_sanitize(node)
return Scrubber::CONTINUE
end
when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
if HTML5::Scrub.cdata_needs_escaping?(node)
node.before(HTML5::Scrub.cdata_escape(node))
return Scrubber::STOP
end
return Scrubber::CONTINUE
end
Scrubber::STOP
Expand Down
8 changes: 2 additions & 6 deletions lib/loofah/scrubbers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -100,13 +100,9 @@ def initialize

def scrub(node)
return CONTINUE if html5lib_sanitize(node) == CONTINUE
if node.children.length == 1 && node.children.first.cdata?
sanitized_text = Loofah.fragment(node.children.first.to_html).scrub!(:strip).to_html
node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
else
node.before node.children
end
node.before(node.children)
node.remove
return STOP
end
end

Expand Down
17 changes: 14 additions & 3 deletions test/integration/test_ad_hoc.rb
Original file line number Diff line number Diff line change
Expand Up @@ -100,17 +100,28 @@ def test_return_empty_string_when_nothing_left
end

def test_nested_script_cdata_tags_should_be_scrubbed
html = "<script><script src='malicious.js'></script>"
html = "<script><script src=\"malicious.js\">this & that</script>"
stripped = Loofah.fragment(html).scrub!(:strip)

assert_empty stripped.xpath("//script")
refute_match("<script", stripped.to_html)
assert_equal("&lt;script src=\"malicious.js\"&gt;this &amp; that", stripped.to_html)
end

def test_nested_script_cdata_tags_should_be_scrubbed_2
html = "<script><script>alert('a');</script></script>"
stripped = Loofah.fragment(html).scrub!(:strip)

assert_empty stripped.xpath("//script")
refute_match("<script", stripped.to_html)
assert_equal("&lt;script&gt;alert('a');", stripped.to_html)
end

def test_nested_script_cdata_tags_should_be_scrubbed_max_recursion
n = 40
html = "<div>" + ("<script>" * n) + "alert(1);" + ("</script>" * n) + "</div>"
expected = "<div>" + ("&lt;script&gt;" * (n-1)) + "alert(1);</div>"
actual = Loofah.fragment(html).scrub!(:strip).to_html

assert_equal(expected, actual)
end

def test_removal_of_all_tags
Expand Down

0 comments on commit 86f7f63

Please sign in to comment.