Skip to content

Commit

Permalink
AK: Make the JsonParser use the new double parser for numbers
Browse files Browse the repository at this point in the history
Because we still support u64 and i64 (on top of i32 and u32) we do still
have to parse the number ourself first. Then if we determine that the
number is a floating point or is outside of the range of i64 and u64 we
fallback and parse it as a double.

Before JsonParser had ifdefs guarding the double computation, but it
just build when we error on ifdef KERNEL so JsonParser is no longer
usable in the Kernel. This can be remedied fairly easily but since
it is not needed we #error on that for now.
  • Loading branch information
davidot authored and linusg committed Oct 23, 2022
1 parent 35e52f7 commit c9aa664
Show file tree
Hide file tree
Showing 2 changed files with 199 additions and 111 deletions.
186 changes: 75 additions & 111 deletions AK/JsonParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
*/

#include <AK/CharacterTypes.h>
#include <AK/FloatingPointStringConversions.h>
#include <AK/JsonArray.h>
#include <AK/JsonObject.h>
#include <AK/JsonParser.h>
Expand Down Expand Up @@ -187,141 +188,104 @@ ErrorOr<JsonValue> JsonParser::parse_string()

ErrorOr<JsonValue> JsonParser::parse_number()
{
JsonValue value;
Vector<char, 128> number_buffer;
Vector<char, 128> fraction_buffer;
Vector<char, 128> exponent_buffer;
Vector<char, 32> number_buffer;

auto start_index = tell();

bool negative = false;
if (peek() == '-') {
number_buffer.append('-');
++m_index;
negative = true;

if (!is_ascii_digit(peek()))
return Error::from_string_literal("JsonParser: Unexpected '-' without further digits");
}

auto fallback_to_double_parse = [&]() -> ErrorOr<JsonValue> {
#ifdef KERNEL
# error JSONParser is currently not available for the Kernel because it disallows floating point. \
If you want to make this KERNEL compatible you can just make this fallback_to_double \
function fail with an error in KERNEL mode.
#endif
// FIXME: Since we know all the characters so far are ascii digits (and one . or e) we could
// use that in the floating point parser.

// The first part should be just ascii digits
StringView view = m_input.substring_view(start_index);

char const* start = view.characters_without_null_termination();
auto parse_result = parse_first_floating_point(start, start + view.length());

if (parse_result.parsed_value()) {
auto characters_parsed = parse_result.end_ptr - start;
m_index = start_index + characters_parsed;

return JsonValue(parse_result.value);
}
return Error::from_string_literal("JsonParser: Invalid floating point");
};

if (peek() == '0') {
if (is_ascii_digit(peek(1)))
return Error::from_string_literal("JsonParser: Cannot have leading zeros");

// Leading zeros are not allowed, however we can have a '.' or 'e' with
// valid digits after just a zero. These cases will be detected by having the next element
// start with a '.' or 'e'.
}

bool is_double = false;
bool all_zero = true;
for (;;) {
char ch = peek();
if (ch == '.') {
if (is_double)
return Error::from_string_literal("JsonParser: Multiple '.' in number");
if (!is_ascii_digit(peek(1)))
return Error::from_string_literal("JsonParser: Must have digits after decimal point");

is_double = true;
++m_index;
continue;
return fallback_to_double_parse();
}
if (ch == '-' || (ch >= '0' && ch <= '9')) {
if (ch != '-' && ch != '0')
all_zero = false;

if (is_double) {
if (ch == '-')
return Error::from_string_literal("JsonParser: Error while parsing number");
if (ch == 'e' || ch == 'E') {
char next = peek(1);
if (!is_ascii_digit(next) && ((next != '+' && next != '-') || !is_ascii_digit(peek(2))))
return Error::from_string_literal("JsonParser: Must have digits after exponent with an optional sign inbetween");

fraction_buffer.append(ch);
} else {
if (number_buffer.size() > 0) {
if (number_buffer.at(0) == '0')
return Error::from_string_literal("JsonParser: Error while parsing number");
}
return fallback_to_double_parse();
}

if (number_buffer.size() > 1) {
if (number_buffer.at(0) == '-' && number_buffer.at(1) == '0')
return Error::from_string_literal("JsonParser: Error while parsing number");
}
if (is_ascii_digit(ch)) {
if (ch != '0')
all_zero = false;

number_buffer.append(ch);
}
number_buffer.append(ch);
++m_index;
continue;
}

break;
}

#ifndef KERNEL
if (peek() == 'e' || peek() == 'E') {
// Force it to be a double
is_double = true;
++m_index;

for (;;) {
char ch = peek();
if (ch == '.')
return Error::from_string_literal("JsonParser: Error while parsing number");
if (ch == '-' || ch == '+' || (ch >= '0' && ch <= '9')) {
exponent_buffer.append(ch);

++m_index;
continue;
}
break;
}

if (exponent_buffer.is_empty())
return Error::from_string_literal("JsonParser: Error while parsing number");
}
#endif
// Negative zero is always a double
if (negative && all_zero)
return JsonValue(-0.0);

StringView number_string(number_buffer.data(), number_buffer.size());

#ifndef KERNEL
// Check for negative zero which needs to be forced to be represented with a double
if (number_string.starts_with('-') && all_zero)
return JsonValue(-0.0);
auto to_unsigned_result = number_string.to_uint<u64>();
if (to_unsigned_result.has_value()) {
if (*to_unsigned_result <= NumericLimits<u32>::max())
return JsonValue((u32)*to_unsigned_result);

if (is_double) {
// FIXME: This logic looks shaky.
int whole = 0;
auto to_signed_result = number_string.to_uint();
if (to_signed_result.has_value()) {
whole = to_signed_result.value();
} else {
auto number = number_string.to_int();
if (!number.has_value())
return Error::from_string_literal("JsonParser: Error while parsing number");
whole = number.value();
}
double number_value = whole;

if (!fraction_buffer.is_empty()) {
StringView fraction_string(fraction_buffer.data(), fraction_buffer.size());
auto fraction_string_uint = fraction_string.to_uint<u64>();
if (!fraction_string_uint.has_value())
return Error::from_string_literal("JsonParser: Error while parsing number");
auto fraction = static_cast<double>(fraction_string_uint.value());
double sign = (whole < 0) ? -1 : 1;
auto divider = pow(10.0, static_cast<double>(fraction_buffer.size()));
number_value += sign * (fraction / divider);
}
return JsonValue(*to_unsigned_result);
} else if (auto signed_number = number_string.to_int<i64>(); signed_number.has_value()) {

if (exponent_buffer.size() > 0) {
StringView exponent_string(exponent_buffer.data(), exponent_buffer.size());
auto exponent_string_uint = exponent_string.to_int();
if (!exponent_string_uint.has_value())
return Error::from_string_literal("JsonParser: Error while parsing number");
double exponent = pow(10.0, static_cast<double>(exponent_string_uint.value()));
number_value *= exponent;
}
if (*signed_number <= NumericLimits<i32>::max())
return JsonValue((i32)*signed_number);

value = JsonValue(number_value);
} else {
#endif
auto to_unsigned_result = number_string.to_uint<u64>();
if (to_unsigned_result.has_value()) {
auto number = *to_unsigned_result;
if (number <= NumericLimits<u32>::max())
value = JsonValue((u32)number);
else
value = JsonValue(number);
} else {
auto number = number_string.to_int<i64>();
if (!number.has_value())
return Error::from_string_literal("JsonParser: Error while parsing number");
if (number.value() <= NumericLimits<i32>::max()) {
value = JsonValue((i32)number.value());
} else {
value = JsonValue(number.value());
}
}
#ifndef KERNEL
return JsonValue(*signed_number);
}
#endif

return value;
// It's possible the unsigned value is bigger than u64 max
return fallback_to_double_parse();
}

ErrorOr<JsonValue> JsonParser::parse_true()
Expand Down
124 changes: 124 additions & 0 deletions Tests/AK/TestJSON.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,3 +143,127 @@ TEST_CASE(json_parse_number_with_exponent)
auto value_with_fraction = JsonValue::from_string("10.5e5"sv);
EXPECT_EQ(value_with_fraction.value().as_double(), 1050000.0);
}

TEST_CASE(json_parse_special_numbers)
{
#define EXPECT_TO_MATCH_NUMBER_BIT_WISE(string_input, double_input) \
do { \
auto value_or_error = JsonValue::from_string(string_input##sv); \
VERIFY(!value_or_error.is_error()); \
if (value_or_error.is_error()) \
dbgln("got {}", value_or_error.error()); \
EXPECT(value_or_error.value().is_number()); \
EXPECT_EQ(bit_cast<u64>(value_or_error.value().to_double(4321.0)), bit_cast<u64>(static_cast<double>(double_input))); \
} while (false)

EXPECT_TO_MATCH_NUMBER_BIT_WISE("-0", -0.);
EXPECT_TO_MATCH_NUMBER_BIT_WISE("-0.0", -0.0);
EXPECT_TO_MATCH_NUMBER_BIT_WISE("-0.00", -0.00);
EXPECT_TO_MATCH_NUMBER_BIT_WISE("-0e0", -0e0);
EXPECT_TO_MATCH_NUMBER_BIT_WISE("-0e1", -0e1);
EXPECT_TO_MATCH_NUMBER_BIT_WISE("-0e2", -0e2);
EXPECT_TO_MATCH_NUMBER_BIT_WISE("-0e1000", -0e1000);
EXPECT_TO_MATCH_NUMBER_BIT_WISE("-0e-1000", -0e-1000);

EXPECT_TO_MATCH_NUMBER_BIT_WISE("0", 0.);
EXPECT_TO_MATCH_NUMBER_BIT_WISE("0.0", 0.0);
EXPECT_TO_MATCH_NUMBER_BIT_WISE("0.00", 0.00);
EXPECT_TO_MATCH_NUMBER_BIT_WISE("0e0", 0e0);
EXPECT_TO_MATCH_NUMBER_BIT_WISE("0e1", 0e1);
EXPECT_TO_MATCH_NUMBER_BIT_WISE("0e2", 0e2);
EXPECT_TO_MATCH_NUMBER_BIT_WISE("0e1000", 0e1000);
EXPECT_TO_MATCH_NUMBER_BIT_WISE("0e-1000", 0e-1000);

// These technically can be non zero, but not in doubles
EXPECT_TO_MATCH_NUMBER_BIT_WISE("-1e-2000", -0.);
EXPECT_TO_MATCH_NUMBER_BIT_WISE("1e-2000", 0.);

#undef EXPECT_TO_MATCH_NUMBER_BIT_WISE
}

TEST_CASE(json_parse_fails_on_invalid_number)
{
#define EXPECT_JSON_PARSE_TO_FAIL(value) \
EXPECT(JsonValue::from_string(value##sv).is_error());

EXPECT_JSON_PARSE_TO_FAIL("-");
EXPECT_JSON_PARSE_TO_FAIL("00");
EXPECT_JSON_PARSE_TO_FAIL("01");
EXPECT_JSON_PARSE_TO_FAIL("-01");
EXPECT_JSON_PARSE_TO_FAIL(".1");
EXPECT_JSON_PARSE_TO_FAIL("-.1");
EXPECT_JSON_PARSE_TO_FAIL("-,1");
EXPECT_JSON_PARSE_TO_FAIL(".1e1");
EXPECT_JSON_PARSE_TO_FAIL(".1e-1");
EXPECT_JSON_PARSE_TO_FAIL("-.1e1");
EXPECT_JSON_PARSE_TO_FAIL("-.1e-1");
EXPECT_JSON_PARSE_TO_FAIL("1.e1");
EXPECT_JSON_PARSE_TO_FAIL("1.e-1");
EXPECT_JSON_PARSE_TO_FAIL("-1.e1");
EXPECT_JSON_PARSE_TO_FAIL("-1.e-1");
EXPECT_JSON_PARSE_TO_FAIL("1e");
EXPECT_JSON_PARSE_TO_FAIL("1e+");
EXPECT_JSON_PARSE_TO_FAIL("1e-");
EXPECT_JSON_PARSE_TO_FAIL("1e-f");
EXPECT_JSON_PARSE_TO_FAIL("1.e");
EXPECT_JSON_PARSE_TO_FAIL("1.e+");
EXPECT_JSON_PARSE_TO_FAIL("1.e-");
EXPECT_JSON_PARSE_TO_FAIL("1.e-f");
EXPECT_JSON_PARSE_TO_FAIL("1p2");
EXPECT_JSON_PARSE_TO_FAIL("1.p2");
EXPECT_JSON_PARSE_TO_FAIL("0x1.0p2");
EXPECT_JSON_PARSE_TO_FAIL("0x1");
EXPECT_JSON_PARSE_TO_FAIL("0x7");
EXPECT_JSON_PARSE_TO_FAIL("0xA");
EXPECT_JSON_PARSE_TO_FAIL("0x");
EXPECT_JSON_PARSE_TO_FAIL("-0x");
EXPECT_JSON_PARSE_TO_FAIL("0x");
EXPECT_JSON_PARSE_TO_FAIL("1x");
EXPECT_JSON_PARSE_TO_FAIL("100x");
EXPECT_JSON_PARSE_TO_FAIL("1000000000000000000000x");
EXPECT_JSON_PARSE_TO_FAIL("0e2x");
EXPECT_JSON_PARSE_TO_FAIL("0.1e2x");
EXPECT_JSON_PARSE_TO_FAIL("0.1x");
EXPECT_JSON_PARSE_TO_FAIL("1e2x");
EXPECT_JSON_PARSE_TO_FAIL("1.2x");
EXPECT_JSON_PARSE_TO_FAIL("1.2e2x");
EXPECT_JSON_PARSE_TO_FAIL(".0");
EXPECT_JSON_PARSE_TO_FAIL(".e1");
EXPECT_JSON_PARSE_TO_FAIL("-.0");
EXPECT_JSON_PARSE_TO_FAIL("-.e1");
EXPECT_JSON_PARSE_TO_FAIL("+0");
EXPECT_JSON_PARSE_TO_FAIL("+0.0");
EXPECT_JSON_PARSE_TO_FAIL("+0.00");
EXPECT_JSON_PARSE_TO_FAIL("+0e0");
EXPECT_JSON_PARSE_TO_FAIL("+0e1");
EXPECT_JSON_PARSE_TO_FAIL("+0e2");
EXPECT_JSON_PARSE_TO_FAIL("+0e1000");
EXPECT_JSON_PARSE_TO_FAIL("+0e-1000");

EXPECT_JSON_PARSE_TO_FAIL("+10");
EXPECT_JSON_PARSE_TO_FAIL("+10e1");
EXPECT_JSON_PARSE_TO_FAIL("+10.3");
EXPECT_JSON_PARSE_TO_FAIL("+10.3e1");

EXPECT_JSON_PARSE_TO_FAIL("0x1");
EXPECT_JSON_PARSE_TO_FAIL("0x2");
EXPECT_JSON_PARSE_TO_FAIL("0xA");
EXPECT_JSON_PARSE_TO_FAIL("0xB");
EXPECT_JSON_PARSE_TO_FAIL("0xF");
EXPECT_JSON_PARSE_TO_FAIL("0Xf");
EXPECT_JSON_PARSE_TO_FAIL("0X3");

EXPECT_JSON_PARSE_TO_FAIL("10ee1");
EXPECT_JSON_PARSE_TO_FAIL("1e1e1");

// These could be valid within an array but not as the top level value
EXPECT_JSON_PARSE_TO_FAIL("0,0");
EXPECT_JSON_PARSE_TO_FAIL(",1");
EXPECT_JSON_PARSE_TO_FAIL("10e1,");
EXPECT_JSON_PARSE_TO_FAIL("10e,1");
EXPECT_JSON_PARSE_TO_FAIL("10,e1");
EXPECT_JSON_PARSE_TO_FAIL("1,0e1");
EXPECT_JSON_PARSE_TO_FAIL(",10e1");

#undef EXPECT_JSON_PARSE_TO_FAIL
}

0 comments on commit c9aa664

Please sign in to comment.