Skip to content

Commit

Permalink
AK+Everywhere: Fix data corruption due to code-point-to-char conversion
Browse files Browse the repository at this point in the history
In particular, StringView::contains(char) is often used with a u32
code point. When this is done, the compiler will for some reason allow
data corruption to occur silently.

In fact, this is one of two reasons for the following OSS Fuzz issue:
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=49184
This is probably a very old bug.

In the particular case of URLParser, AK::is_url_code_point got confused:
    return /* ... */ || "!$&'()*+,-./:;=?@_~"sv.contains(code_point);
If code_point is a large code point that happens to have the correct
lower bytes, AK::is_url_code_point is then convinced that the given
code point is okay, even if it is actually problematic.

This commit fixes *only* the silent data corruption due to the erroneous
conversion, and does not fully resolve OSS-Fuzz#49184.
  • Loading branch information
BenWiederhake authored and ADKaster committed Oct 9, 2022
1 parent f07e018 commit 3aeb57e
Show file tree
Hide file tree
Showing 7 changed files with 29 additions and 13 deletions.
15 changes: 15 additions & 0 deletions AK/StringView.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <AK/Find.h>
#include <AK/Function.h>
#include <AK/Memory.h>
#include <AK/StringBuilder.h>
#include <AK/StringView.h>
#include <AK/Vector.h>

Expand Down Expand Up @@ -137,6 +138,20 @@ bool StringView::contains(char needle) const
return false;
}

bool StringView::contains(u32 needle) const
{
// A code point should be at most four UTF-8 bytes, which easily fits into StringBuilder's inline-buffer.
// Therefore, this will not allocate.
StringBuilder needle_builder;
auto result = needle_builder.try_append_code_point(needle);
if (result.is_error()) {
// The needle is invalid, therefore the string does not contain it.
return false;
}

return contains(needle_builder.string_view());
}

bool StringView::contains(StringView needle, CaseSensitivity case_sensitivity) const
{
return StringUtils::contains(*this, needle, case_sensitivity);
Expand Down
1 change: 1 addition & 0 deletions AK/StringView.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ class StringView {
[[nodiscard]] bool matches(StringView mask, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
[[nodiscard]] bool matches(StringView mask, Vector<MaskSpan>&, CaseSensitivity = CaseSensitivity::CaseInsensitive) const;
[[nodiscard]] bool contains(char) const;
[[nodiscard]] bool contains(u32) const;
[[nodiscard]] bool contains(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive) const;
[[nodiscard]] bool equals_ignoring_case(StringView other) const;

Expand Down
2 changes: 1 addition & 1 deletion AK/URL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ bool URL::code_point_is_in_percent_encode_set(u32 code_point, URL::PercentEncode
case URL::PercentEncodeSet::EncodeURI:
// NOTE: This is the same percent encode set that JS encodeURI() uses.
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
return code_point >= 0x7E || (!is_ascii_alphanumeric(code_point) && !";,/?:@&=+$-_.!~*'()#"sv.contains(code_point));
return code_point >= 0x7E || (!is_ascii_alphanumeric(code_point) && !";,/?:@&=+$-_.!~*'()#"sv.contains(static_cast<char>(code_point)));
default:
VERIFY_NOT_REACHED();
}
Expand Down
12 changes: 6 additions & 6 deletions AK/URLParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ static void report_validation_error(SourceLocation const& location = SourceLocat

static Optional<String> parse_opaque_host(StringView input)
{
auto forbidden_host_code_points_excluding_percent = "\0\t\n\r #/:<>?@[\\]^|"sv;
for (auto code_point : forbidden_host_code_points_excluding_percent) {
if (input.contains(code_point)) {
auto forbidden_host_characters_excluding_percent = "\0\t\n\r #/:<>?@[\\]^|"sv;
for (auto character : forbidden_host_characters_excluding_percent) {
if (input.contains(character)) {
report_validation_error();
return {};
}
Expand Down Expand Up @@ -72,9 +72,9 @@ static Optional<String> parse_host(StringView input, bool is_not_special = false
// FIXME: Let asciiDomain be the result of running domain to ASCII on domain.
auto& ascii_domain = domain;

auto forbidden_host_code_points = "\0\t\n\r #%/:<>?@[\\]^|"sv;
for (auto code_point : forbidden_host_code_points) {
if (ascii_domain.view().contains(code_point)) {
auto forbidden_host_characters = "\0\t\n\r #%/:<>?@[\\]^|"sv;
for (auto character : forbidden_host_characters) {
if (ascii_domain.view().contains(character)) {
report_validation_error();
return {};
}
Expand Down
8 changes: 4 additions & 4 deletions Userland/Libraries/LibJS/Runtime/GlobalObject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -350,7 +350,7 @@ static ThrowCompletionOr<String> encode(VM& vm, String const& string, StringView
auto code_unit = utf16_string.code_unit_at(k);
// c. If C is in unescapedSet, then
// NOTE: We assume the unescaped set only contains ascii characters as unescaped_set is a StringView.
if (code_unit < 0x80 && unescaped_set.contains(code_unit)) {
if (code_unit < 0x80 && unescaped_set.contains(static_cast<char>(code_unit))) {
// i. Set k to k + 1.
k++;

Expand Down Expand Up @@ -420,8 +420,8 @@ static ThrowCompletionOr<String> decode(VM& vm, String const& string, StringView
continue;
}

if ((decoded_code_unit & 0x80) == 0) {
if (reserved_set.contains(decoded_code_unit))
if (decoded_code_unit < 0x80) {
if (reserved_set.contains(static_cast<char>(decoded_code_unit)))
decoded_builder.append(string.substring_view(k - 2, 3));
else
decoded_builder.append(decoded_code_unit);
Expand Down Expand Up @@ -480,7 +480,7 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape)
StringBuilder escaped;
for (auto code_point : utf8_to_utf16(string)) {
if (code_point < 256) {
if ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789@*_+-./"sv.contains(code_point))
if ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789@*_+-./"sv.contains(static_cast<char>(code_point)))
escaped.append(code_point);
else
escaped.appendff("%{:02X}", code_point);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ bool is_cors_safelisted_request_header(Header const& header)
else if (name.is_one_of_ignoring_case("accept-language"sv, "content-language"sv)) {
// If value contains a byte that is not in the range 0x30 (0) to 0x39 (9), inclusive, is not in the range 0x41 (A) to 0x5A (Z), inclusive, is not in the range 0x61 (a) to 0x7A (z), inclusive, and is not 0x20 (SP), 0x2A (*), 0x2C (,), 0x2D (-), 0x2E (.), 0x3B (;), or 0x3D (=), then return false.
if (any_of(value.span(), [](auto byte) {
return !(is_ascii_digit(byte) || is_ascii_alpha(byte) || " *,-.;="sv.contains(byte));
return !(is_ascii_digit(byte) || is_ascii_alpha(byte) || " *,-.;="sv.contains(static_cast<char>(byte)));
}))
return false;
}
Expand Down
2 changes: 1 addition & 1 deletion Userland/Utilities/grep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ ErrorOr<int> serenity_main(Main::Arguments args)
// Human-readable indexes start at 1, so it's fine to increment already.
line_number += 1;
StringView line_view(line, nread);
bool is_binary = line_view.contains(0);
bool is_binary = line_view.contains('\0');

if (is_binary && binary_mode == BinaryFileMode::Skip)
return 1;
Expand Down

0 comments on commit 3aeb57e

Please sign in to comment.