Skip to content

Commit

Permalink
ICU-22707 Age 6:2, run generate.sh
Browse files Browse the repository at this point in the history
  • Loading branch information
markusicu committed Apr 30, 2024
1 parent d79c7bf commit 4d9612b
Show file tree
Hide file tree
Showing 24 changed files with 6,925 additions and 6,693 deletions.
2 changes: 1 addition & 1 deletion .bazeliskrc
Expand Up @@ -6,4 +6,4 @@
# for running Bazel commands while ensuring, through configuration, that only a
# specific version of Bazel is executed.

USE_BAZEL_VERSION=6.0.0
USE_BAZEL_VERSION=7.1.1
1,844 changes: 935 additions & 909 deletions icu4c/source/common/norm2_nfc_data.h

Large diffs are not rendered by default.

1,718 changes: 877 additions & 841 deletions icu4c/source/common/propname_data.h

Large diffs are not rendered by default.

1,379 changes: 709 additions & 670 deletions icu4c/source/common/ubidi_props_data.h

Large diffs are not rendered by default.

1,597 changes: 806 additions & 791 deletions icu4c/source/common/ucase_props_data.h

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions icu4c/source/common/uchar.cpp
Expand Up @@ -524,8 +524,8 @@ U_CAPI void U_EXPORT2
u_charAge(UChar32 c, UVersionInfo versionArray) {
if(versionArray!=nullptr) {
uint32_t version=u_getUnicodeProperties(c, 0)>>UPROPS_AGE_SHIFT;
versionArray[0]=(uint8_t)(version>>4);
versionArray[1]=(uint8_t)(version&0xf);
versionArray[0]=(uint8_t)(version>>2);
versionArray[1]=(uint8_t)(version&3);
versionArray[2]=versionArray[3]=0;
}
}
Expand Down
7,007 changes: 3,560 additions & 3,447 deletions icu4c/source/common/uchar_props_data.h

Large diffs are not rendered by default.

11 changes: 9 additions & 2 deletions icu4c/source/common/uprops.h
Expand Up @@ -120,7 +120,8 @@ enum {
/*
* Properties in vector word 0
* Bits
* 31..24 DerivedAge version major/minor one nibble each
* 31..26 Age major version (0..63)
* 25..24 Age minor version (0..3)
* 23..22 3..1: Bits 21..20 & 7..0 = Script_Extensions index
* 3: Script value from Script_Extensions
* 2: Script=Inherited
Expand All @@ -132,7 +133,6 @@ enum {
* 7.. 0 UScriptCode, or index to Script_Extensions
*/

/* derived age: one nibble each for major and minor version numbers */
#define UPROPS_AGE_MASK 0xff000000
#define UPROPS_AGE_SHIFT 24

Expand Down Expand Up @@ -164,6 +164,9 @@ enum {

namespace {

inline constexpr uint8_t UPROPS_AGE_MAJOR_MAX = 63;
inline constexpr uint8_t UPROPS_AGE_MINOR_MAX = 3;

inline uint32_t uprops_mergeScriptCodeOrIndex(uint32_t scriptX) {
return
((scriptX & UPROPS_SCRIPT_HIGH_MASK) >> UPROPS_SCRIPT_HIGH_SHIFT) |
Expand Down Expand Up @@ -236,6 +239,8 @@ enum {

#ifdef __cplusplus

namespace {

// https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type
// The Identifier_Type maps each code point to a *set* of one or more values.
// Some can be combined with others, some can only occur alone.
Expand Down Expand Up @@ -296,6 +301,8 @@ inline constexpr uint8_t uprops_idTypeToEncoded[] = {
UPROPS_ID_TYPE_RECOMMENDED
};

} // namespace

#endif // __cplusplus

#define UPROPS_LB_MASK 0x03f00000
Expand Down
Binary file modified icu4c/source/data/in/nfc.nrm
Binary file not shown.
Binary file modified icu4c/source/data/in/nfkc.nrm
Binary file not shown.
Binary file modified icu4c/source/data/in/nfkc_cf.nrm
Binary file not shown.
Binary file modified icu4c/source/data/in/nfkc_scf.nrm
Binary file not shown.
Binary file modified icu4c/source/data/in/pnames.icu
Binary file not shown.
Binary file modified icu4c/source/data/in/ubidi.icu
Binary file not shown.
Binary file modified icu4c/source/data/in/ucase.icu
Binary file not shown.
Binary file modified icu4c/source/data/in/uemoji.icu
Binary file not shown.
Binary file modified icu4c/source/data/in/ulayout.icu
Binary file not shown.
Binary file modified icu4c/source/data/in/unames.icu
Binary file not shown.
Binary file modified icu4c/source/data/in/uprops.icu
Binary file not shown.
Binary file modified icu4c/source/data/in/uts46.nrm
Binary file not shown.
8 changes: 4 additions & 4 deletions icu4c/source/i18n/collationfcd.cpp
Expand Up @@ -148,10 +148,10 @@ const uint8_t CollationFCD::lcccIndex[2048]={

const uint32_t CollationFCD::lcccBits[73]={
0,0xffffffff,0xffff7fff,0xffff,0xf8,0xfffe0000,0xbfffffff,0xb6,0x7ff0000,0xfffff800,0x10000,0x9fc00000,0x3d9f,0x20000,0xffff0000,0x7ff,
0x200ff800,0xfbc00000,0x3eef,0xe000000,0xff000000,0xfffffc00,0xfffffffb,0x10000000,0x1e2000,0x2000,0x40000000,0x602000,0x18000000,0x400,0x7000000,0xf00,
0x200ff800,0xfbc00000,0x3eef,0xe000000,0xff800000,0xfffffc00,0xfffffffb,0x10000000,0x1e2000,0x2000,0x40000000,0x602000,0x18000000,0x400,0x7000000,0xf00,
0x3000000,0x2a00000,0x3c3e0000,0xdf,0x40,0x6800000,0xe0000000,0x300000,0x100000,0x20040000,0x200,0x1800000,0x9fe00001,0xbfff0000,0x7fff,0x10,
0xff800,0xc00,0xc0040,0x800000,0xfff70000,0x31021fd,0x1fff0000,0x1ffe2,0x38000,0x80000000,0xfc00,0x6000000,0x3ff08000,0xc0000000,0x30000,0x1000,
0x3ffff,0x3800,0x80000,1,0xc19d0000,2,0x400000,0xc0000fd,0x7108000
0x3ffff,0x3800,0x80000,1,0xc19d0000,2,0x400000,0xd0000fd,0x7108000
};

const uint8_t CollationFCD::tcccIndex[2048]={
Expand Down Expand Up @@ -288,12 +288,12 @@ const uint8_t CollationFCD::tcccIndex[2048]={
const uint32_t CollationFCD::tcccBits[123]={
0,0xffffffff,0x3e7effbf,0xbe7effbf,0xfffcffff,0x7ef1ff3f,0xfff3f1f8,0x7fffff3f,0x18003,0xdfffe000,0xff31ffcf,0xcfffffff,0xfffc0,0xffff7fff,0xffff,0x1d760,
0x1fc00,0x187c00,0x200708b,0x2000000,0x708b0000,0xc00000,0xf8,0xfccf0006,0x33ffcfc,0xfffe0000,0xbfffffff,0xb6,0x7ff0000,0x7c,0xfffff800,0x10000,
0x9fc80005,0x3d9f,0x20000,0xffff0000,0x7ff,0x200ff800,0xfbc00000,0x3eef,0xe000000,0xff000000,0xfffffc00,0xfffffffb,0x10120200,0xff1e2000,0x10000000,0xb0002000,
0x9fc80005,0x3d9f,0x20000,0xffff0000,0x7ff,0x200ff800,0xfbc00000,0x3eef,0xe000000,0xff800000,0xfffffc00,0xfffffffb,0x10120200,0xff1e2000,0x10000000,0xb0002000,
0x40000000,0x10480000,0x4e002000,0x2000,0x30002000,0x602100,0x18000000,0x24000400,0x7000000,0xf00,0x3000000,0x2a00000,0x3d7e0000,0xdf,0x40,0x6800000,
0xe0000000,0x300000,0x100000,0x20040000,0x200,0x1800000,0x9fe00001,0xbfff0000,0x7fff,0x10,0xff800,0xc00,0xc0040,0x800000,0xfff70000,0x31021fd,
0xbffffff,0x3ffffff,0x3f3fffff,0xaaff3f3f,0x3fffffff,0x1fdfffff,0xefcfffde,0x1fdc7fff,0x1fff0000,0x1ffe2,0x800,0xc000000,0x4000,0xe000,0x1210,0x50,
0x292,0x333e005,0x333,0xf000,0x3c0f,0x38000,0x80000000,0xfc00,0x55555000,0x36db02a5,0x46100000,0x47900000,0x3ff08000,0xc0000000,0x30000,0x1000,
0x3ffff,0x3800,0x80000,1,0xc19d0000,2,0x400000,0xc0000fd,0x7108000,0x5f7ffc00,0x7fdb
0x3ffff,0x3800,0x80000,1,0xc19d0000,2,0x400000,0xd0000ff,0x7108000,0x5f7ffc00,0x7fdb
};

U_NAMESPACE_END
Expand Down
2 changes: 1 addition & 1 deletion icu4c/source/tools/toolutil/swapimpl.cpp
Expand Up @@ -182,7 +182,7 @@ uprops_swap(const UDataSwapper *ds,
pInfo->dataFormat[1]==0x50 &&
pInfo->dataFormat[2]==0x72 &&
pInfo->dataFormat[3]==0x6f &&
(3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=8) &&
(3<=pInfo->formatVersion[0] && pInfo->formatVersion[0]<=9) &&
(pInfo->formatVersion[0]>=7 ||
(pInfo->formatVersion[2]==UTRIE_SHIFT &&
pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT))
Expand Down
Expand Up @@ -294,10 +294,8 @@ public int getAdditional(int codepoint, int column) {
*/
public VersionInfo getAge(int codepoint)
{
int version = getAdditional(codepoint, 0) >> AGE_SHIFT_;
return VersionInfo.getInstance(
(version >> FIRST_NIBBLE_SHIFT_) & LAST_NIBBLE_MASK_,
version & LAST_NIBBLE_MASK_, 0, 0);
int version = getAdditional(codepoint, 0) >>> AGE_SHIFT_;
return VersionInfo.getInstance(version >> 2, version & 3, 0, 0);
}

private static final int GC_CN_MASK = getMask(UCharacter.UNASSIGNED);
Expand Down Expand Up @@ -1546,14 +1544,6 @@ public static final int mergeScriptCodeOrIndex(int scriptX) {
*/
private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f;

/**
* First nibble shift
*/
private static final int FIRST_NIBBLE_SHIFT_ = 0x4;
/**
* Second nibble mask
*/
private static final int LAST_NIBBLE_MASK_ = 0xF;
/**
* Age value shift
*/
Expand Down Expand Up @@ -1633,7 +1623,7 @@ private UCharacterProperty() throws IOException
private static final class IsAcceptable implements ICUBinary.Authenticate {
@Override
public boolean isDataVersionAcceptable(byte version[]) {
return version[0] == 8;
return version[0] == 9;
}
}
private static final int DATA_FORMAT = 0x5550726F; // "UPro"
Expand Down
30 changes: 18 additions & 12 deletions tools/unicode/c/genprops/corepropsbuilder.cpp
Expand Up @@ -47,7 +47,7 @@ the udata API for loading ICU data. Especially, a UDataInfo structure
precedes the actual data. It contains platform properties values and the
file format version.
The following is a description of format version 8.0 .
The following is a description of format version 9.0 .
Data contents:
Expand Down Expand Up @@ -295,6 +295,10 @@ The 6 bits in vector word 2 that stored emoji properties are unused again.
ICU 75 uses the vector word 2 bits 31..26 for encoded Identifier_Type bit sets.
--- Changes in format version 9.0 (ICU 76) ---
Age major:minor version bit fields changed from 4:4 to 6:2 so that age=16.0 fits.
----------------------------------------------------------------------------- */

U_NAMESPACE_USE
Expand All @@ -312,8 +316,8 @@ UDataInfo dataInfo={
0,

{ 0x55, 0x50, 0x72, 0x6f }, /* dataFormat="UPro" */
{ 8, 0, 0, 0 }, /* formatVersion */
{ 15, 1, 0, 0 } /* dataVersion */
{ 9, 0, 0, 0 }, /* formatVersion */
{ 16, 0, 0, 0 } /* dataVersion */
};

inline uint32_t splitScriptCodeOrIndex(uint32_t v) {
Expand Down Expand Up @@ -712,9 +716,10 @@ CorePropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,

UChar32 start=props.start;
UChar32 end=props.end;
UChar32 pvecEnd=end;
if(start==0 && end==0x10ffff) {
// Also set bits for initialValue and errorValue.
end=UPVEC_MAX_CP;
pvecEnd=UPVEC_MAX_CP;
}

if(newValues.containsSome(0, UCHAR_BINARY_LIMIT-1)) {
Expand All @@ -724,7 +729,7 @@ CorePropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
if(newValues.contains(p2b.prop)) {
uint32_t mask=U_MASK(p2b.vecShift);
uint32_t value= props.binProps[p2b.prop] ? mask : 0;
upvec_setValue(pv, start, end, p2b.vecWord, value, mask, &errorCode);
upvec_setValue(pv, start, pvecEnd, p2b.vecWord, value, mask, &errorCode);
}
}
}
Expand All @@ -738,20 +743,21 @@ CorePropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
uint32_t mask=p2e.vecMask;
uint32_t value=(uint32_t)(props.getIntProp(p2e.prop)<<p2e.vecShift);
U_ASSERT((value&mask)==value);
upvec_setValue(pv, start, end, p2e.vecWord, value, mask, &errorCode);
upvec_setValue(pv, start, pvecEnd, p2e.vecWord, value, mask, &errorCode);
}
}
}
if(newValues.contains(UCHAR_AGE)) {
if(props.age[0]>15 || props.age[1]>15 || props.age[2]!=0 || props.age[3]!=0) {
if(props.age[0]>UPROPS_AGE_MAJOR_MAX || props.age[1]>UPROPS_AGE_MINOR_MAX ||
props.age[2]!=0 || props.age[3]!=0) {
char buffer[U_MAX_VERSION_STRING_LENGTH];
u_versionToString(props.age, buffer);
fprintf(stderr, "genprops error: age %s cannot be encoded\n", buffer);
errorCode=U_ILLEGAL_ARGUMENT_ERROR;
return;
}
uint32_t version=(props.age[0]<<4)|props.age[1];
upvec_setValue(pv, start, end,
uint32_t version=(props.age[0]<<2)|props.age[1];
upvec_setValue(pv, start, pvecEnd,
0, version<<UPROPS_AGE_SHIFT, UPROPS_AGE_MASK,
&errorCode);
}
Expand All @@ -773,7 +779,7 @@ CorePropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
// Use UPROPS_SCRIPT_X_MASK:
// When writing a Script code, remove Script_Extensions bits as well.
// If needed, they will get written again.
upvec_setValue(pv, start, end, 0, value, UPROPS_SCRIPT_X_MASK, &errorCode);
upvec_setValue(pv, start, pvecEnd, 0, value, UPROPS_SCRIPT_X_MASK, &errorCode);
}
// Write a new (Script, Script_Extensions) value if there are Script_Extensions
// and either Script or Script_Extensions are new on the current line.
Expand Down Expand Up @@ -820,12 +826,12 @@ CorePropsBuilder::setProps(const UniProps &props, const UnicodeSet &newValues,
return;
}
scriptX|=splitScriptCodeOrIndex(index);
upvec_setValue(pv, start, end, 0, scriptX, UPROPS_SCRIPT_X_MASK, &errorCode);
upvec_setValue(pv, start, pvecEnd, 0, scriptX, UPROPS_SCRIPT_X_MASK, &errorCode);
}
if(newValues.contains(UCHAR_IDENTIFIER_TYPE)) {
uint32_t encodedType=encodeIdentifierType(props.idType, start==0xA9CF && start==end, errorCode);
upvec_setValue(
pv, start, end, 2,
pv, start, pvecEnd, 2,
encodedType << UPROPS_2_ID_TYPE_SHIFT, UPROPS_2_ID_TYPE_MASK,
&errorCode);
}
Expand Down

0 comments on commit 4d9612b

Please sign in to comment.