Skip to content

Commit

Permalink
ICU-22403 Fix icuexportdata out-of-bounds during decomposition
Browse files Browse the repository at this point in the history
  • Loading branch information
Manishearth authored and markusicu committed May 26, 2023
1 parent 0fb1b55 commit 8bbb8f5
Showing 1 changed file with 30 additions and 53 deletions.
83 changes: 30 additions & 53 deletions icu4c/source/tools/icuexportdata/icuexportdata.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,6 @@ void writeDecompositionData(const char* basename, uint32_t baseSize16, uint32_t
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
}
uset_close(halfWidthCheck);

uset_close(iotaSubscript);
uset_close(halfWidthVoicing);
Expand Down Expand Up @@ -710,6 +709,34 @@ UBool permissibleBmpPair(UBool knownToRoundTrip, UChar32 c, UChar32 second) {
return false;
}


// Find the slice `needle` within `storage` and return its index, failing which,
// append all elements of `needle` to `storage` and return the index of it at the end.
template<typename T>
size_t findOrAppend(std::vector<T>& storage, const UChar32* needle, size_t needleLen) {
// Last index where we might find the start of the complete needle.
// bounds check is `i + needleLen <= storage.size()` since the inner
// loop will range from `i` to `i + needleLen - 1` (the `-1` is why we use `<=`)
for (size_t i = 0; i + needleLen <= storage.size(); i++) {
for (size_t j = 0;; j++) {
if (j == needleLen) {
return i; // found a match
}
if (storage[i + j] != uint32_t(needle[j])) {
break;
}
}
}
// We didn't find anything. Append, keeping the append index in mind.
size_t index = storage.size();
for(size_t i = 0; i < needleLen; i++) {
storage.push_back(T(needle[i]));
}

return index;
}


// Computes data for canonical decompositions
void computeDecompositions(const char* basename,
const USet* backwardCombiningStarters,
Expand Down Expand Up @@ -1027,49 +1054,11 @@ void computeDecompositions(const char* basename,
handleError(status, basename);
}
size_t index = 0;
bool writeToStorage = false;
// Sadly, C++ lacks break and continue by label, so using goto in the
// inner loops to break or continue the outer loop.
if (!supplementary) {
outer16: for (;;) {
if (index == storage16.size()) {
writeToStorage = true;
break;
}
if (storage16[index] == utf32[0]) {
for (int32_t i = 1; i < len; ++i) {
if (storage16[index + i] != uint32_t(utf32[i])) {
++index;
// continue outer
goto outer16;
}
}
// break outer
goto after;
}
++index;
}
index = findOrAppend(storage16, utf32, len);
} else {
outer32: for (;;) {
if (index == storage32.size()) {
writeToStorage = true;
break;
}
if (storage32[index] == uint32_t(utf32[0])) {
for (int32_t i = 1; i < len; ++i) {
if (storage32[index + i] != uint32_t(utf32[i])) {
++index;
// continue outer
goto outer32;
}
}
// break outer
goto after;
}
++index;
}
index = findOrAppend(storage32, utf32, len);
}
after:
if (index > 0xFFF) {
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
Expand All @@ -1081,18 +1070,6 @@ void computeDecompositions(const char* basename,
status.set(U_INTERNAL_PROGRAM_ERROR);
handleError(status, basename);
}
if (writeToStorage) {
if (!supplementary) {
for (int32_t i = 0; i < len; ++i) {
storage16.push_back(uint16_t(utf32[i]));
}
} else {
for (int32_t i = 0; i < len; ++i) {
storage32.push_back(uint32_t(utf32[i]));
}
}
}

uint32_t nonRoundTripMarker = 0;
if (!nonNfdOrRoundTrips) {
nonRoundTripMarker = (NON_ROUND_TRIP_MARKER << 16);
Expand Down

0 comments on commit 8bbb8f5

Please sign in to comment.