Skip to content

Commit

Permalink
LibWeb: Escape HTML text fragments with multi-byte code point awareness
Browse files Browse the repository at this point in the history
The UTF-8 encoding of U+00A0 (NBSP) is the bytes 0xc2 0xa0. By looping
over the string to escape byte-by-byte, we replace the second byte with
" ", but leave the first byte in the resulting text. This creates
an invalid UTF-8 string, with a lone leading byte.
  • Loading branch information
trflynn89 authored and linusg committed Mar 13, 2023
1 parent 3219ecb commit f5f1a52
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3586,23 +3586,23 @@ DeprecatedString HTMLParser::serialize_html_fragment(DOM::Node const& node)
auto escape_string = [](StringView string, AttributeMode attribute_mode) -> DeprecatedString {
// https://html.spec.whatwg.org/multipage/parsing.html#escapingString
StringBuilder builder;
for (auto& ch : string) {
for (auto code_point : Utf8View { string }) {
// 1. Replace any occurrence of the "&" character by the string "&".
if (ch == '&')
if (code_point == '&')
builder.append("&"sv);
// 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the string " ".
else if (ch == '\xA0')
else if (code_point == 0xA0)
builder.append(" "sv);
// 3. If the algorithm was invoked in the attribute mode, replace any occurrences of the """ character by the string """.
else if (ch == '"' && attribute_mode == AttributeMode::Yes)
else if (code_point == '"' && attribute_mode == AttributeMode::Yes)
builder.append("""sv);
// 4. If the algorithm was not invoked in the attribute mode, replace any occurrences of the "<" character by the string "&lt;", and any occurrences of the ">" character by the string "&gt;".
else if (ch == '<' && attribute_mode == AttributeMode::No)
else if (code_point == '<' && attribute_mode == AttributeMode::No)
builder.append("&lt;"sv);
else if (ch == '>' && attribute_mode == AttributeMode::No)
else if (code_point == '>' && attribute_mode == AttributeMode::No)
builder.append("&gt;"sv);
else
builder.append(ch);
builder.append_code_point(code_point);
}
return builder.to_deprecated_string();
};
Expand Down

0 comments on commit f5f1a52

Please sign in to comment.