Skip to content

Commit

Permalink
ICU-22746 Import ICU4J tests
Browse files Browse the repository at this point in the history
Includes code fixes for `numberingSystem`, `percent`,
and `precision` options in `:number`

Also includes a code fix for number selection:
  Refactor code to conform more closely to the steps in the spec,
  and call the number formatter before the selector so that a FormattedNumber
  with the right options is selected on

Some modifications were needed to add missing params
and to mark some tests as ignored (see ICU-22754).
Also added decimal arguments in JSON test reader

Finally, some redundant tests are removed:
all tests in messageformat2test_features and
messageformat2test_icu, and
messageformat2test_builtin are now covered by JSON tests
  • Loading branch information
catamorphism authored and mihnita committed May 13, 2024
1 parent 75ef0d9 commit 6d5555a
Show file tree
Hide file tree
Showing 16 changed files with 849 additions and 815 deletions.
230 changes: 126 additions & 104 deletions icu4c/source/i18n/messageformat2_function_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "unicode/messageformat2_data_model_names.h"
#include "unicode/messageformat2_function_registry.h"
#include "unicode/smpdtfmt.h"
#include "charstr.h"
#include "messageformat2_allocation.h"
#include "messageformat2_function_registry_internal.h"
#include "messageformat2_macros.h"
Expand Down Expand Up @@ -287,7 +288,7 @@ MFFunctionRegistry::~MFFunctionRegistry() {
// Style options -- specific to `:number`
if (!isInteger) {
if (number.usePercent(opts)) {
nf = nf.unit(NoUnit::percent());
nf = nf.unit(NoUnit::percent()).scale(Scale::powerOfTen(2));
}
}

Expand All @@ -296,18 +297,36 @@ MFFunctionRegistry::~MFFunctionRegistry() {
int32_t minFractionDigits = number.minimumFractionDigits(opts);
int32_t maxFractionDigits = number.maximumFractionDigits(opts);
int32_t minSignificantDigits = number.minimumSignificantDigits(opts);
Precision p = Precision::minMaxFraction(minFractionDigits, maxFractionDigits);
if (minSignificantDigits > 0) {
Precision p = Precision::unlimited();
bool precisionOptions = false;

// Returning -1 means the option wasn't provided
if (maxFractionDigits != -1 && minFractionDigits != -1) {
precisionOptions = true;
p = Precision::minMaxFraction(minFractionDigits, maxFractionDigits);
} else if (minFractionDigits != -1) {
precisionOptions = true;
p = Precision::minFraction(minFractionDigits);
} else if (maxFractionDigits != -1) {
precisionOptions = true;
p = Precision::maxFraction(maxFractionDigits);
}

if (minSignificantDigits != -1) {
precisionOptions = true;
p = p.minSignificantDigits(minSignificantDigits);
}
if (maxSignificantDigits > 0) {
if (maxSignificantDigits != -1) {
precisionOptions = true;
p = p.maxSignificantDigits(maxSignificantDigits);
}
nf = nf.precision(p);
if (precisionOptions) {
nf = nf.precision(p);
}
} else {
// maxSignificantDigits applies to `:integer`, but the other precision options don't
Precision p = Precision::integer();
if (maxSignificantDigits > 0) {
if (maxSignificantDigits != -1) {
p = p.maxSignificantDigits(maxSignificantDigits);
}
nf = nf.precision(p);
Expand Down Expand Up @@ -347,6 +366,24 @@ MFFunctionRegistry::~MFFunctionRegistry() {
grp = UNumberGroupingStrategy::UNUM_GROUPING_AUTO;
}
nf = nf.grouping(grp);

// numberingSystem
UnicodeString ns = opts.getStringFunctionOption(UnicodeString("numberingSystem"));
if (ns.length() > 0) {
ns = ns.toLower(Locale("en-US"));
CharString buffer;
// Ignore bad option values, so use a local status
UErrorCode localStatus = U_ZERO_ERROR;
// Copied from number_skeletons.cpp (helpers::parseNumberingSystemOption)
buffer.appendInvariantChars({false, ns.getBuffer(), ns.length()}, localStatus);
if (U_SUCCESS(localStatus)) {
LocalPointer<NumberingSystem> symbols
(NumberingSystem::createInstanceByName(buffer.data(), localStatus));
if (U_SUCCESS(localStatus)) {
nf = nf.adoptSymbols(symbols.orphan());
}
}
}
}
return LocalizedNumberFormatter(nf.locale(number.locale));
}
Expand Down Expand Up @@ -419,7 +456,10 @@ int32_t StandardFunctions::Number::maximumFractionDigits(const FunctionOptions&
return static_cast<int32_t>(val);
}
}
return number::impl::kMaxIntFracSig;
// Returning -1 indicates that the option wasn't provided or was a non-integer.
// The caller needs to check for that case, since passing -1 to Precision::maxFraction()
// is an error.
return -1;
}

int32_t StandardFunctions::Number::minimumFractionDigits(const FunctionOptions& opts) const {
Expand All @@ -434,7 +474,10 @@ int32_t StandardFunctions::Number::minimumFractionDigits(const FunctionOptions&
}
}
}
return 0;
// Returning -1 indicates that the option wasn't provided or was a non-integer.
// The caller needs to check for that case, since passing -1 to Precision::minFraction()
// is an error.
return -1;
}

int32_t StandardFunctions::Number::minimumIntegerDigits(const FunctionOptions& opts) const {
Expand Down Expand Up @@ -462,10 +505,10 @@ int32_t StandardFunctions::Number::minimumSignificantDigits(const FunctionOption
}
}
}
// Returning 0 indicates that the option wasn't provided or was a non-integer.
// The caller needs to check for that case, since passing 0 to Precision::minSignificantDigits()
// Returning -1 indicates that the option wasn't provided or was a non-integer.
// The caller needs to check for that case, since passing -1 to Precision::minSignificantDigits()
// is an error.
return 0;
return -1;
}

int32_t StandardFunctions::Number::maximumSignificantDigits(const FunctionOptions& opts) const {
Expand All @@ -478,10 +521,10 @@ int32_t StandardFunctions::Number::maximumSignificantDigits(const FunctionOption
return static_cast<int32_t>(val);
}
}
// Returning 0 indicates that the option wasn't provided or was a non-integer.
// The caller needs to check for that case, since passing 0 to Precision::maxSignificantDigits()
// Returning -1 indicates that the option wasn't provided or was a non-integer.
// The caller needs to check for that case, since passing -1 to Precision::maxSignificantDigits()
// is an error.
return 0; // Not a valid value for Precision; has to be checked
return -1; // Not a valid value for Precision; has to be checked
}

bool StandardFunctions::Number::usePercent(const FunctionOptions& opts) const {
Expand Down Expand Up @@ -582,67 +625,17 @@ Selector* StandardFunctions::PluralFactory::createSelector(const Locale& locale,

Selector* result;
if (isInteger) {
result = new Plural(Plural::integer(locale));
result = new Plural(Plural::integer(locale, errorCode));
} else {
result = new Plural(locale);
result = new Plural(locale, errorCode);
}
NULL_ON_ERROR(errorCode);
if (result == nullptr) {
errorCode = U_MEMORY_ALLOCATION_ERROR;
return nullptr;
}
return result;
}

static double tryAsString(const UnicodeString& s, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return 0;
}
// Try parsing the inputString as a double
double valToCheck;
strToDouble(s, valToCheck, errorCode);
return valToCheck;
}

static double tryWithFormattable(const Formattable& value, UErrorCode& errorCode) {
if (U_FAILURE(errorCode)) {
return 0;
}
double valToCheck;
switch (value.getType()) {
case UFMT_DOUBLE: {
valToCheck = value.getDouble(errorCode);
break;
}
case UFMT_LONG: {
valToCheck = (double) value.getLong(errorCode);
break;
}
case UFMT_INT64: {
valToCheck = (double) value.getInt64(errorCode);
break;
}
case UFMT_STRING: {
const UnicodeString& s = value.getString(errorCode);
U_ASSERT(U_SUCCESS(errorCode));
return tryAsString(s, errorCode);
}
default: {
errorCode = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
}
U_ASSERT(U_SUCCESS(errorCode));
return valToCheck;
}

static UnicodeString toJSONString(double d) {
// TODO :(
char buffer[512];
// "Only integer matching is required in the Technical Preview."
snprintf(buffer, 512, "%" PRId64, static_cast<int64_t>(d));
return UnicodeString(buffer);
}

void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat,
FunctionOptions&& opts,
const UnicodeString* keys,
Expand All @@ -658,86 +651,115 @@ void StandardFunctions::Plural::selectKey(FormattedPlaceholder&& toFormat,
return;
}

// Only doubles and integers can match
double valToCheck;
// Handle any formatting options
PluralType type = pluralType(opts);
FormattedPlaceholder resolvedSelector = numberFormatter->format(std::move(toFormat),
std::move(opts),
errorCode);
CHECK_ERROR(errorCode);

bool isFormattedString = toFormat.isEvaluated() && toFormat.output().isString();
bool isFormattedNumber = toFormat.isEvaluated() && toFormat.output().isNumber();
U_ASSERT(resolvedSelector.isEvaluated() && resolvedSelector.output().isNumber());

if (isFormattedString) {
// Formatted string: try parsing it as a number
valToCheck = tryAsString(toFormat.output().getString(), errorCode);
} else {
// Already checked that contents can be formatted
valToCheck = tryWithFormattable(toFormat.asFormattable(), errorCode);
}
// See https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#number-selection
// 1. Let exact be the JSON string representation of the numeric value of resolvedSelector
const number::FormattedNumber& formattedNumber = resolvedSelector.output().getNumber();
UnicodeString exact = formattedNumber.toString(errorCode);

if (U_FAILURE(errorCode)) {
// Non-number => selector error
errorCode = U_MF_SELECTOR_ERROR;
return;
}
// TODO: This needs to be checked against https://github.com/unicode-org/message-format-wg/blob/main/spec/registry.md#number-selection
// Determine `exact`, per step 1 under "Number Selection"
UnicodeString exact = toJSONString(valToCheck);

// Generate the matches
// -----------------------
// Step 2. Let keyword be a string which is the result of rule selection on resolvedSelector.
// If the option select is set to exact, rule-based selection is not used. Return the empty string.
UnicodeString keyword;
if (type != PluralType::PLURAL_EXACT) {
UPluralType t = type == PluralType::PLURAL_ORDINAL ? UPLURAL_TYPE_ORDINAL : UPLURAL_TYPE_CARDINAL;
// Look up plural rules by locale and type
LocalPointer<PluralRules> rules(PluralRules::forLocale(locale, t, errorCode));
CHECK_ERROR(errorCode);

keyword = rules->select(formattedNumber, errorCode);
}

// Steps 3-4 elided:
// 3. Let resultExact be a new empty list of strings.
// 4. Let resultKeyword be a new empty list of strings.
// Instead, we use `prefs` the concatenation of `resultExact`
// and `resultKeyword`.

prefsLen = 0;

// First, check for an exact match
// 5. For each string key in keys:
double keyAsDouble = 0;
for (int32_t i = 0; i < keysLen; i++) {
// Try parsing the key as a double
UErrorCode localErrorCode = U_ZERO_ERROR;
strToDouble(keys[i], keyAsDouble, localErrorCode);
// 5i. If the value of key matches the production number-literal, then
if (U_SUCCESS(localErrorCode)) {
// 5i(a). If key and exact consist of the same sequence of Unicode code points, then
if (exact == keys[i]) {
// 5i(a)(a) Append key as the last element of the list resultExact.
prefs[prefsLen] = keys[i];
prefsLen++;
break;
}
}
}

PluralType type = pluralType(opts);
// Return immediately if exact matching was requested
if (prefsLen == keysLen || type == PluralType::PLURAL_EXACT) {
return;
}

UPluralType t = type == PluralType::PLURAL_ORDINAL ? UPLURAL_TYPE_ORDINAL : UPLURAL_TYPE_CARDINAL;
// Look up plural rules by locale and type
LocalPointer<PluralRules> rules(PluralRules::forLocale(locale, t, errorCode));
CHECK_ERROR(errorCode);


// Check for a match based on the plural category
UnicodeString match;
if (isFormattedNumber) {
match = rules->select(toFormat.output().getNumber(), errorCode);
} else {
if (isInteger) {
match = rules->select(static_cast<int32_t>(trunc(valToCheck)));
} else {
match = rules->select(valToCheck);
}
}
CHECK_ERROR(errorCode);

for (int32_t i = 0; i < keysLen; i ++) {
if (prefsLen >= keysLen) {
break;
}
if (match == keys[i]) {
// 5ii. Else if key is one of the keywords zero, one, two, few, many, or other, then
// 5ii(a). If key and keyword consist of the same sequence of Unicode code points, then
if (keyword == keys[i]) {
// 5ii(a)(a) Append key as the last element of the list resultKeyword.
prefs[prefsLen] = keys[i];
prefsLen++;
}
}

// Note: Step 5(iii) "Else, emit a Selection Error" is omitted in both loops

// 6. Return a new list whose elements are the concatenation of the elements
// (in order) of resultExact followed by the elements (in order) of resultKeyword.
// (Implicit, since `prefs` is an out-parameter)
}

StandardFunctions::Plural::Plural(const Locale& loc, UErrorCode& status) : locale(loc) {
CHECK_ERROR(status);

numberFormatter.adoptInstead(new StandardFunctions::Number(loc));
if (!numberFormatter.isValid()) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}

StandardFunctions::Plural::Plural(const Locale& loc, bool isInt, UErrorCode& status) : locale(loc), isInteger(isInt) {
CHECK_ERROR(status);

if (isInteger) {
numberFormatter.adoptInstead(new StandardFunctions::Number(loc, true));
} else {
numberFormatter.adoptInstead(new StandardFunctions::Number(loc));
}

if (!numberFormatter.isValid()) {
status = U_MEMORY_ALLOCATION_ERROR;
}
}

StandardFunctions::Plural::~Plural() {}

StandardFunctions::PluralFactory::~PluralFactory() {}

// --------- DateTimeFactory
Expand Down
7 changes: 4 additions & 3 deletions icu4c/source/i18n/messageformat2_function_registry_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,12 +175,13 @@ namespace message2 {
PLURAL_CARDINAL,
PLURAL_EXACT
} PluralType;
Plural(const Locale& loc) : locale(loc) {}
Plural(const Locale& loc, bool isInt) : locale(loc), isInteger(isInt) {}
static Plural integer(const Locale& loc) { return Plural(loc, true); }
Plural(const Locale& loc, UErrorCode& errorCode);
Plural(const Locale& loc, bool isInt, UErrorCode& errorCode);
static Plural integer(const Locale& loc, UErrorCode& errorCode) { return Plural(loc, true, errorCode); }
PluralType pluralType(const FunctionOptions& opts) const;
const Locale& locale;
const bool isInteger = false;
LocalPointer<StandardFunctions::Number> numberFormatter;
};

class TextFactory : public SelectorFactory {
Expand Down
2 changes: 1 addition & 1 deletion icu4c/source/test/intltest/Makefile.in
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ fldset.o dadrfmt.o dadrcal.o dcfmapts.o decoll.o dtfmapts.o dtfmrgts.o dtfmtrtts
dtptngts.o encoll.o escoll.o ficoll.o frcoll.o g7coll.o intltest.o \
itercoll.o itformat.o itmajor.o itutil.o jacoll.o lcukocol.o \
loctest.o localebuildertest.o localematchertest.o \
messageformat2test.o messageformat2test_builtin.o messageformat2test_custom.o messageformat2test_features.o messageformat2test_read_json.o messageformat2test_icu.o \
messageformat2test.o messageformat2test_custom.o messageformat2test_read_json.o \
miscdtfm.o mnkytst.o msfmrgts.o nmfmapts.o nmfmtrt.o \
numfmtst.o numrgts.o plurults.o plurfmts.o pptest.o regcoll.o restest.o restsnew.o \
sdtfmtts.o svccoll.o tchcfmt.o selfmts.o \
Expand Down
3 changes: 0 additions & 3 deletions icu4c/source/test/intltest/intltest.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,8 @@
<ClCompile Include="itrbnfrt.cpp" />
<ClCompile Include="locnmtst.cpp" />
<ClCompile Include="measfmttest.cpp" />
<ClCompile Include="messageformat2test_builtin.cpp" />
<ClCompile Include="messageformat2test.cpp" />
<ClCompile Include="messageformat2test_custom.cpp" />
<ClCompile Include="messageformat2test_features.cpp" />
<ClCompile Include="messageformat2test_icu.cpp" />
<ClCompile Include="messageformat2test_read_json.cpp" />
<ClCompile Include="miscdtfm.cpp" />
<ClCompile Include="msfmrgts.cpp" />
Expand Down

0 comments on commit 6d5555a

Please sign in to comment.