Skip to content

Commit

Permalink
Merge pull request #252 from LeszekSwirski/parse-error
Browse files Browse the repository at this point in the history
Record parse failure reason and location
  • Loading branch information
lemire authored Aug 3, 2024
2 parents 3838b00 + b6ce2c4 commit 0e7a10a
Show file tree
Hide file tree
Showing 2 changed files with 97 additions and 15 deletions.
62 changes: 50 additions & 12 deletions include/fast_float/ascii_number.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,25 @@ void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t
}
}

enum class parse_error {
no_error,
// [JSON-only] The minus sign must be followed by an integer.
missing_integer_after_sign,
// A sign must be followed by an integer or dot.
missing_integer_or_dot_after_sign,
// [JSON-only] The integer part must not have leading zeros.
leading_zeros_in_integer_part,
// [JSON-only] The integer part must have at least one digit.
no_digits_in_integer_part,
// [JSON-only] If there is a decimal point, there must be digits in the
// fractional part.
no_digits_in_fractional_part,
// The mantissa must have at least one digit.
no_digits_in_mantissa,
// Scientific notation requires an exponential part.
missing_exponential_part,
};

template <typename UC>
struct parsed_number_string_t {
int64_t exponent{0};
Expand All @@ -245,11 +264,22 @@ struct parsed_number_string_t {
// contains the range of the significant digits
span<const UC> integer{}; // non-nullable
span<const UC> fraction{}; // nullable
parse_error error{parse_error::no_error};
};

using byte_span = span<const char>;
using parsed_number_string = parsed_number_string_t<char>;

template <typename UC>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
report_parse_error(UC const* p, parse_error error) {
parsed_number_string_t<UC> answer;
answer.valid = false;
answer.lastmatch = p;
answer.error = error;
return answer;
}

// Assuming that you use no more than 19 digits, this will
// parse an ASCII string.
template <typename UC>
Expand All @@ -269,15 +299,16 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
#endif
++p;
if (p == pend) {
return answer;
return report_parse_error<UC>(
p, parse_error::missing_integer_or_dot_after_sign);
}
if (fmt & FASTFLOAT_JSONFMT) {
if (!is_integer(*p)) { // a sign must be followed by an integer
return answer;
return report_parse_error<UC>(p, parse_error::missing_integer_after_sign);
}
} else {
if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
return answer;
return report_parse_error<UC>(p, parse_error::missing_integer_or_dot_after_sign);
}
}
}
Expand All @@ -297,8 +328,12 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
answer.integer = span<const UC>(start_digits, size_t(digit_count));
if (fmt & FASTFLOAT_JSONFMT) {
// at least 1 digit in integer part, without leading zeros
if (digit_count == 0 || (start_digits[0] == UC('0') && digit_count > 1)) {
return answer;
if (digit_count == 0) {
return report_parse_error<UC>(p, parse_error::no_digits_in_integer_part);
}
if ((start_digits[0] == UC('0') && digit_count > 1)) {
return report_parse_error<UC>(start_digits,
parse_error::leading_zeros_in_integer_part);
}
}

Expand All @@ -323,11 +358,10 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
if (fmt & FASTFLOAT_JSONFMT) {
// at least 1 digit in fractional part
if (has_decimal_point && exponent == 0) {
return answer;
return report_parse_error<UC>(p, parse_error::no_digits_in_fractional_part);
}
}
else if (digit_count == 0) { // we must have encountered at least one integer!
return answer;
} else if (digit_count == 0) { // we must have encountered at least one integer!
return report_parse_error<UC>(p, parse_error::no_digits_in_mantissa);
}
int64_t exp_number = 0; // explicit exponential part
if ( ((fmt & chars_format::scientific) &&
Expand All @@ -350,8 +384,10 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
}
if ((p == pend) || !is_integer(*p)) {
if(!(fmt & chars_format::fixed)) {
// We are in error.
return answer;
// The exponential part is invalid for scientific notation, so it must
// be a trailing token for fixed notation. However, fixed notation is
// disabled, so report a scientific notation error.
return report_parse_error<UC>(p, parse_error::missing_exponential_part);
}
// Otherwise, we will be ignoring the 'e'.
p = location_of_e;
Expand All @@ -368,7 +404,9 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
}
} else {
// If it scientific and not fixed, we have to bail out.
if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; }
if ((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) {
return report_parse_error<UC>(p, parse_error::missing_exponential_part);
}
}
answer.lastmatch = p;
answer.valid = true;
Expand Down
50 changes: 47 additions & 3 deletions tests/json_fmt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ struct AcceptedValue {
ExpectedResult expected;
};

struct RejectReason {
fast_float::parse_error error;
intptr_t location_offset;
};
struct RejectedValue {
std::string input;
RejectReason reason;
};

int main() {
const std::vector<AcceptedValue> accept{
{"-0.2", {-0.2, ""}},
Expand All @@ -55,8 +64,18 @@ int main() {
{"1e", {1., "e"}},
{"1e+", {1., "e+"}},
{"inf", {std::numeric_limits<double>::infinity(), ""}}};
const std::vector<std::string> reject{"-.2", "00.02", "0.e+1", "00.e+1",
".25", "+0.25", "inf", "nan(snan)"};
const std::vector<RejectedValue> reject{
{"-.2", {fast_float::parse_error::missing_integer_after_sign, 1}},
{"00.02", {fast_float::parse_error::leading_zeros_in_integer_part, 0}},
{"0.e+1", {fast_float::parse_error::no_digits_in_fractional_part, 2}},
{"00.e+1", {fast_float::parse_error::leading_zeros_in_integer_part, 0}},
{".25", {fast_float::parse_error::no_digits_in_integer_part, 0}},
// The following cases already start as invalid JSON, so they are
// handled as trailing junk and the error is for not having digits in the
// empty string before the invalid token.
{"+0.25", {fast_float::parse_error::no_digits_in_integer_part, 0}},
{"inf", {fast_float::parse_error::no_digits_in_integer_part, 0}},
{"nan(snan)", {fast_float::parse_error::no_digits_in_integer_part, 0}}};

for (std::size_t i = 0; i < accept.size(); ++i)
{
Expand All @@ -80,7 +99,7 @@ int main() {

for (std::size_t i = 0; i < reject.size(); ++i)
{
const auto& s = reject[i];
const auto& s = reject[i].input;
double result;
auto answer = fast_float::from_chars(s.data(), s.data() + s.size(), result, fast_float::chars_format::json);
if (answer.ec == std::errc()) {
Expand All @@ -89,6 +108,31 @@ int main() {
}
}

for (std::size_t i = 0; i < reject.size(); ++i)
{
const auto& f = reject[i].input;
const auto& expected_reason = reject[i].reason;
auto answer = fast_float::parse_number_string(
f.data(), f.data() + f.size(),
fast_float::parse_options(fast_float::chars_format::json));
if (answer.valid) {
std::cerr << "json parse accepted invalid json " << f << std::endl;
return EXIT_FAILURE;
}
if (answer.error != expected_reason.error) {
std::cerr << "json parse failure had invalid error reason " << f
<< std::endl;
return EXIT_FAILURE;
}
intptr_t error_location = answer.lastmatch - f.data();
if (error_location != expected_reason.location_offset) {
std::cerr << "json parse failure had invalid error location " << f
<< " (expected " << expected_reason.location_offset << " got "
<< error_location << ")" << std::endl;
return EXIT_FAILURE;
}
}

if(main_readme() != EXIT_SUCCESS) { return EXIT_FAILURE; }
if(main_readme2() != EXIT_SUCCESS) { return EXIT_FAILURE; }

Expand Down

0 comments on commit 0e7a10a

Please sign in to comment.