Skip to content

Commit

Permalink
ICU-22637 Rewrite custom timezone parser
Browse files Browse the repository at this point in the history
See #2792
  • Loading branch information
FrankYFTang committed Jan 19, 2024
1 parent 3eb8923 commit c833608
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 192 deletions.
189 changes: 83 additions & 106 deletions icu4c/source/i18n/timezone.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,9 @@
#include "unicode/utypes.h"
#include "unicode/ustring.h"
#include "uassert.h"
#include "uinvchar.h"
#include "ustr_imp.h"
#include "util.h"

#ifdef U_DEBUG_TZ
# include <stdio.h>
Expand Down Expand Up @@ -75,7 +77,6 @@ static char gStrBuf[256];
#include "unicode/gregocal.h"
#include "unicode/ures.h"
#include "unicode/tzfmt.h"
#include "unicode/numfmt.h"
#include "gregoimp.h"
#include "uresimp.h" // struct UResourceBundle
#include "olsontz.h"
Expand Down Expand Up @@ -1369,123 +1370,99 @@ TimeZone::getCustomID(const UnicodeString& id, UnicodeString& normalized, UError
UBool
TimeZone::parseCustomID(const UnicodeString& id, int32_t& sign,
int32_t& hour, int32_t& min, int32_t& sec) {
static const int32_t kParseFailed = -99999;

NumberFormat* numberFormat = 0;
UnicodeString idUppercase = id;
idUppercase.toUpper("");

if (id.length() > GMT_ID_LENGTH &&
idUppercase.startsWith(GMT_ID, GMT_ID_LENGTH))
{
ParsePosition pos(GMT_ID_LENGTH);
sign = 1;
hour = 0;
min = 0;
sec = 0;

if (id[pos.getIndex()] == MINUS /*'-'*/) {
sign = -1;
} else if (id[pos.getIndex()] != PLUS /*'+'*/) {
if (id.length() < GMT_ID_LENGTH) {
return false;
}
if (0 != u_strncasecmp(id.getBuffer(), GMT_ID, GMT_ID_LENGTH, 0)) {
return false;
}
// ICU_Utility::parseNumber also accept non ASCII digits so we need to first
// check we only have ASCII chars.
if (!uprv_isInvariantUString(id.getBuffer(), id.length())) {
return false;
}
sign = 1;
hour = 0;
min = 0;
sec = 0;

if (id[GMT_ID_LENGTH] == MINUS /*'-'*/) {
sign = -1;
} else if (id[GMT_ID_LENGTH] != PLUS /*'+'*/) {
return false;
}

int32_t start = GMT_ID_LENGTH + 1;
int32_t pos = start;
hour = ICU_Utility::parseNumber(id, pos, 10);
if (pos == id.length()) {
// Handle the following cases
// HHmmss
// Hmmss
// HHmm
// Hmm
// HH
// H

// Get all digits
// Should be 1 to 6 digits.
int32_t length = pos - start;
switch (length) {
case 1: // H
case 2: // HH
// already set to hour
break;
case 3: // Hmm
case 4: // HHmm
min = hour % 100;
hour /= 100;
break;
case 5: // Hmmss
case 6: // HHmmss
sec = hour % 100;
min = (hour/100) % 100;
hour /= 10000;
break;
default:
// invalid range
return false;
}
} else {
// Handle the following cases
// HH:mm:ss
// H:mm:ss
// HH:mm
// H:mm
if (pos - start < 1 || pos - start > 2 || id[pos] != COLON) {
return false;
}
pos.setIndex(pos.getIndex() + 1);

UErrorCode success = U_ZERO_ERROR;
numberFormat = NumberFormat::createInstance(success);
if(U_FAILURE(success)){
pos++; // skip : after H or HH
if (id.length() == pos) {
return false;
}
numberFormat->setParseIntegerOnly(true);
//numberFormat->setLenient(true); // TODO: May need to set this, depends on latest timezone parsing

// Look for either hh:mm, hhmm, or hh
int32_t start = pos.getIndex();
Formattable n(kParseFailed);
numberFormat->parse(id, n, pos);
if (pos.getIndex() == start) {
delete numberFormat;
start = pos;
min = ICU_Utility::parseNumber(id, pos, 10);
if (pos - start != 2) {
return false;
}
hour = n.getLong();

if (pos.getIndex() < id.length()) {
if (pos.getIndex() - start > 2
|| id[pos.getIndex()] != COLON) {
delete numberFormat;
if (id.length() > pos) {
if (id[pos] != COLON) {
return false;
}
// hh:mm
pos.setIndex(pos.getIndex() + 1);
int32_t oldPos = pos.getIndex();
n.setLong(kParseFailed);
numberFormat->parse(id, n, pos);
if ((pos.getIndex() - oldPos) != 2) {
// must be 2 digits
delete numberFormat;
pos++; // skip : after mm
start = pos;
sec = ICU_Utility::parseNumber(id, pos, 10);
if (pos - start != 2 || id.length() > pos) {
return false;
}
min = n.getLong();
if (pos.getIndex() < id.length()) {
if (id[pos.getIndex()] != COLON) {
delete numberFormat;
return false;
}
// [:ss]
pos.setIndex(pos.getIndex() + 1);
oldPos = pos.getIndex();
n.setLong(kParseFailed);
numberFormat->parse(id, n, pos);
if (pos.getIndex() != id.length()
|| (pos.getIndex() - oldPos) != 2) {
delete numberFormat;
return false;
}
sec = n.getLong();
}
} else {
// Supported formats are below -
//
// HHmmss
// Hmmss
// HHmm
// Hmm
// HH
// H

int32_t length = pos.getIndex() - start;
if (length <= 0 || 6 < length) {
// invalid length
delete numberFormat;
return false;
}
switch (length) {
case 1:
case 2:
// already set to hour
break;
case 3:
case 4:
min = hour % 100;
hour /= 100;
break;
case 5:
case 6:
sec = hour % 100;
min = (hour/100) % 100;
hour /= 10000;
break;
}
}

delete numberFormat;

if (hour > kMAX_CUSTOM_HOUR || min > kMAX_CUSTOM_MIN || sec > kMAX_CUSTOM_SEC) {
return false;
}
return true;
}
return false;
if (hour > kMAX_CUSTOM_HOUR ||
min > kMAX_CUSTOM_MIN ||
sec > kMAX_CUSTOM_SEC) {
return false;
}
return true;
}

UnicodeString&
Expand Down
70 changes: 45 additions & 25 deletions icu4c/source/test/intltest/tztest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1171,36 +1171,56 @@ void TimeZoneTest::TestCustomParse()

struct
{
const char *customId;
const char16_t *customId;
int32_t expectedOffset;
}
kData[] =
{
// ID Expected offset in seconds
{"GMT", kUnparseable}, //Isn't custom. [returns normal GMT]
{"GMT-YOUR.AD.HERE", kUnparseable},
{"GMT0", kUnparseable},
{"GMT+0", (0)},
{"GMT+1", (1*60*60)},
{"GMT-0030", (-30*60)},
{"GMT+15:99", kUnparseable},
{"GMT+", kUnparseable},
{"GMT-", kUnparseable},
{"GMT+0:", kUnparseable},
{"GMT-:", kUnparseable},
{"GMT-YOUR.AD.HERE", kUnparseable},
{"GMT+0010", (10*60)}, // Interpret this as 00:10
{"GMT-10", (-10*60*60)},
{"GMT+30", kUnparseable},
{"GMT-3:30", (-(3*60+30)*60)},
{"GMT-230", (-(2*60+30)*60)},
{"GMT+05:13:05", ((5*60+13)*60+5)},
{"GMT-71023", (-((7*60+10)*60+23))},
{"GMT+01:23:45:67", kUnparseable},
{"GMT+01:234", kUnparseable},
{"GMT-2:31:123", kUnparseable},
{"GMT+3:75", kUnparseable},
{"GMT-01010101", kUnparseable},
{u"GMT", kUnparseable}, //Isn't custom. [returns normal GMT]
{u"GMT-YOUR.AD.HERE", kUnparseable},
{u"GMT0", kUnparseable},
{u"GMT+0", (0)},
{u"GMT+1", (1*60*60)},
{u"GMT-0030", (-30*60)},
{u"GMT+15:99", kUnparseable},
{u"GMT+", kUnparseable},
{u"GMT-", kUnparseable},
{u"GMT+0:", kUnparseable},
{u"GMT-:", kUnparseable},
{u"GMT-YOUR.AD.HERE", kUnparseable},
{u"GMT+0010", (10*60)}, // Interpret this as 00:10
{u"GMT-10", (-10*60*60)},
{u"GMT+30", kUnparseable},
{u"GMT-3:30", (-(3*60+30)*60)},
{u"GMT-230", (-(2*60+30)*60)},
{u"GMT+05:13:05", ((5*60+13)*60+5)},
{u"GMT-71023", (-((7*60+10)*60+23))},
{u"GMT+01:23:45:67", kUnparseable},
{u"GMT+01:234", kUnparseable},
{u"GMT-2:31:123", kUnparseable},
{u"GMT+3:75", kUnparseable},
{u"GMT-01010101", kUnparseable},
{u"GMT-4E58", kUnparseable}, // ICU-22637
{u"GMT-4e58", kUnparseable}, // ICU-22637
{u"GMT-1E01", kUnparseable}, // ICU-22637
{u"GMT-2E01", kUnparseable}, // ICU-22637
{u"GMT-2e01", kUnparseable}, // ICU-22637
{u"GMT-9e02", kUnparseable}, // ICU-22637
{u"GMT-1e03", kUnparseable}, // ICU-22637
{u"GMT-2e03", kUnparseable}, // ICU-22637
{u"GMT-500M", kUnparseable}, // ICU-22637
{u"GMT-500T", kUnparseable}, // ICU-22637
{u"GMT-9E00", kUnparseable}, // ICU-22637
{u"GMT-0X0F", kUnparseable}, // ICU-22637
{u"GMT-0x0F", kUnparseable}, // ICU-22637
{u"GMT-0x12", kUnparseable}, // ICU-22637
{u"GMT-B111", kUnparseable}, // ICU-22637
{u"GMT-b111", kUnparseable}, // ICU-22637
{u"GMT-0b11", kUnparseable}, // ICU-22637
{u"GMT-๑๒", kUnparseable}, // ICU-22637
{u"GMT-๑๒:๓๔", kUnparseable}, // ICU-22637
{u"GMT+๑๒:๓๔:๕๖", kUnparseable}, // ICU-22637
{0, 0}
};

Expand Down

0 comments on commit c833608

Please sign in to comment.