Skip to content

Commit

Permalink
ICU-22357 Update gb18030 mappings for the -2022 version
Browse files Browse the repository at this point in the history
See #2430
  • Loading branch information
pedberg-icu committed May 18, 2023
1 parent ba1c700 commit 7f5d679
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 29 deletions.
76 changes: 56 additions & 20 deletions icu4c/source/data/mappings/gb18030.ucm
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Copyright (C) 2000-2012, International Business Machines Corporation and others.
# All Rights Reserved.

# ICU codepage data for GB 18030
# ICU codepage data for GB 18030-2022

<code_set_name> "gb18030"
<char_name_mask> "AXXXX"
Expand Down Expand Up @@ -51,7 +51,7 @@

# GB 18030 BMP mappings that are not handled algorithmically are
# generated using gbmake4 and gbtoucm tools. Please see charset/source/gb18030/gb18030.html
# or http:https://source.icu-project.org/repos/icu/data/trunk/charset/source/gb18030/gb18030.html
# or https:https://htmlpreview.github.io/?https://github.com/unicode-org/icu-data/blob/main/charset/source/gb18030/gb18030.html
# for more information.

CHARMAP
Expand Down Expand Up @@ -28077,6 +28077,22 @@ CHARMAP
<U9FA3> \xFD\x99 |0
<U9FA4> \xFD\x9A |0
<U9FA5> \xFD\x9B |0
<U9FB4> \xFE\x59 |0
<U9FB4> \x82\x35\x90\x37 |3
<U9FB5> \xFE\x61 |0
<U9FB5> \x82\x35\x90\x38 |3
<U9FB6> \xFE\x66 |0
<U9FB6> \x82\x35\x90\x39 |3
<U9FB7> \xFE\x67 |0
<U9FB7> \x82\x35\x91\x30 |3
<U9FB8> \xFE\x6D |0
<U9FB8> \x82\x35\x91\x31 |3
<U9FB9> \xFE\x7E |0
<U9FB9> \x82\x35\x91\x32 |3
<U9FBA> \xFE\x90 |0
<U9FBA> \x82\x35\x91\x33 |3
<U9FBB> \xFE\xA0 |0
<U9FBB> \x82\x35\x91\x34 |3
<UE000> \xAA\xA1 |0
<UE001> \xAA\xA2 |0
<UE002> \xAA\xA3 |0
Expand Down Expand Up @@ -30010,16 +30026,16 @@ CHARMAP
<UE78A> \xA6\xBE |0
<UE78B> \xA6\xBF |0
<UE78C> \xA6\xC0 |0
<UE78D> \xA6\xD9 |0
<UE78E> \xA6\xDA |0
<UE78F> \xA6\xDB |0
<UE790> \xA6\xDC |0
<UE791> \xA6\xDD |0
<UE792> \xA6\xDE |0
<UE793> \xA6\xDF |0
<UE794> \xA6\xEC |0
<UE795> \xA6\xED |0
<UE796> \xA6\xF3 |0
<UE78D> \xA6\xD9 |1
<UE78E> \xA6\xDA |1
<UE78F> \xA6\xDB |1
<UE790> \xA6\xDC |1
<UE791> \xA6\xDD |1
<UE792> \xA6\xDE |1
<UE793> \xA6\xDF |1
<UE794> \xA6\xEC |1
<UE795> \xA6\xED |1
<UE796> \xA6\xF3 |1
<UE797> \xA6\xF6 |0
<UE798> \xA6\xF7 |0
<UE799> \xA6\xF8 |0
Expand Down Expand Up @@ -30155,27 +30171,27 @@ CHARMAP
<UE81B> \x83\x36\xC9\x37 |0
<UE81C> \x83\x36\xC9\x38 |0
<UE81D> \x83\x36\xC9\x39 |0
<UE81E> \xFE\x59 |0
<UE81E> \xFE\x59 |1
<UE81F> \x83\x36\xCA\x30 |0
<UE820> \x83\x36\xCA\x31 |0
<UE821> \x83\x36\xCA\x32 |0
<UE822> \x83\x36\xCA\x33 |0
<UE823> \x83\x36\xCA\x34 |0
<UE824> \x83\x36\xCA\x35 |0
<UE825> \x83\x36\xCA\x36 |0
<UE826> \xFE\x61 |0
<UE826> \xFE\x61 |1
<UE827> \x83\x36\xCA\x37 |0
<UE828> \x83\x36\xCA\x38 |0
<UE829> \x83\x36\xCA\x39 |0
<UE82A> \x83\x36\xCB\x30 |0
<UE82B> \xFE\x66 |0
<UE82C> \xFE\x67 |0
<UE82B> \xFE\x66 |1
<UE82C> \xFE\x67 |1
<UE82D> \x83\x36\xCB\x31 |0
<UE82E> \x83\x36\xCB\x32 |0
<UE82F> \x83\x36\xCB\x33 |0
<UE830> \x83\x36\xCB\x34 |0
<UE831> \xFE\x6C |0
<UE832> \xFE\x6D |0
<UE832> \xFE\x6D |1
<UE833> \x83\x36\xCB\x35 |0
<UE834> \x83\x36\xCB\x36 |0
<UE835> \x83\x36\xCB\x37 |0
Expand All @@ -30192,7 +30208,7 @@ CHARMAP
<UE840> \x83\x36\xCC\x37 |0
<UE841> \x83\x36\xCC\x38 |0
<UE842> \x83\x36\xCC\x39 |0
<UE843> \xFE\x7E |0
<UE843> \xFE\x7E |1
<UE844> \x83\x36\xCD\x30 |0
<UE845> \x83\x36\xCD\x31 |0
<UE846> \x83\x36\xCD\x32 |0
Expand All @@ -30209,7 +30225,7 @@ CHARMAP
<UE851> \x83\x36\xCE\x33 |0
<UE852> \x83\x36\xCE\x34 |0
<UE853> \x83\x36\xCE\x35 |0
<UE854> \xFE\x90 |0
<UE854> \xFE\x90 |1
<UE855> \xFE\x91 |0
<UE856> \x83\x36\xCE\x36 |0
<UE857> \x83\x36\xCE\x37 |0
Expand All @@ -30225,7 +30241,7 @@ CHARMAP
<UE861> \x83\x36\xCF\x37 |0
<UE862> \x83\x36\xCF\x38 |0
<UE863> \x83\x36\xCF\x39 |0
<UE864> \xFE\xA0 |0
<UE864> \xFE\xA0 |1
<UF92C> \xFD\x9C |0
<UF92D> \x84\x30\x85\x35 |0
<UF92E> \x84\x30\x85\x36 |0
Expand Down Expand Up @@ -30480,6 +30496,26 @@ CHARMAP
<UFA27> \xFE\x4D |0
<UFA28> \xFE\x4E |0
<UFA29> \xFE\x4F |0
<UFE10> \xA6\xD9 |0
<UFE10> \x84\x31\x82\x36 |3
<UFE11> \xA6\xDB |0
<UFE11> \x84\x31\x82\x37 |3
<UFE12> \xA6\xDA |0
<UFE12> \x84\x31\x82\x38 |3
<UFE13> \xA6\xDC |0
<UFE13> \x84\x31\x82\x39 |3
<UFE14> \xA6\xDD |0
<UFE14> \x84\x31\x83\x30 |3
<UFE15> \xA6\xDE |0
<UFE15> \x84\x31\x83\x31 |3
<UFE16> \xA6\xDF |0
<UFE16> \x84\x31\x83\x32 |3
<UFE17> \xA6\xEC |0
<UFE17> \x84\x31\x83\x33 |3
<UFE18> \xA6\xED |0
<UFE18> \x84\x31\x83\x34 |3
<UFE19> \xA6\xF3 |0
<UFE19> \x84\x31\x83\x35 |3
<UFE30> \xA9\x55 |0
<UFE31> \xA6\xF2 |0
<UFE32> \x84\x31\x85\x38 |0
Expand Down
50 changes: 46 additions & 4 deletions icu4c/source/test/testdata/conversion.txt
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,10 @@ conversion:table(nofallback) {
:intvector{ 0,1,4,4,4,4,5,5,5,5,6,7,7,7,7,8,9,11 },
:int{1}, :int{0}, "", "&C", :bin{""}
}
{
{ // gb18030-2022 changes mapping for 0xFE90
"gb18030",
:bin{ 618130fc318130fc8181303c3e813cfc817afe90a8bc },
"a\u05ed\\x810\u9f07\\x810<>\\x81<\u9f07z\ue854\u1e3f",
"a\u05ed\\x810\u9f07\\x810<>\\x81<\u9f07z\u9fba\u1e3f",
:intvector{ 0,1,5,5,5,5,6,7,9,9,9,9,10,11,12,13,13,13,13,14,15,17,18,20 },
:int{1}, :int{0}, "", "&C", :bin{""}
}
Expand Down Expand Up @@ -826,6 +826,27 @@ conversion:table(nofallback) {
:intvector{ 0, 4, 8, 12 },
:int{1}, :int{0}, "", "?", :bin{""}
}
{ // gb18030->U 2005 vs 2022 part 1 (gb18030 2-byte)
"gb18030",
:bin{ A6D9 A6DA A6DB A6DC A6DF A6EC A6ED A6F3 FE59 FE61 FE66 FE67 FE6D FE7E FE90 FEA0 },
"\uFE10\uFE12\uFE11\uFE13\uFE16\uFE17\uFE18\uFE19\u9FB4\u9FB5\u9FB6\u9FB7\u9FB8\u9FB9\u9FBA\u9FBB", // -2005: "\uE78D\uE78E\uE78F\uE790\uE793\uE794\uE795\uE796\uE81E\uE826\uE82B\uE82C\uE832\uE843\uE854\uE864"
:intvector{ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 },
:int{1}, :int{0}, "", "?", :bin{""}
}
{ // gb18030->U 2005 vs 2022 part 2 (gb18030 4-byte)
"gb18030",
:bin{ 82359037 82359038 82359039 82359130 82359131 82359132 82359133 82359134 84318236 84318239 84318332 84318335 },
"\u9FB4\u9FB5\u9FB6\u9FB7\u9FB8\u9FB9\u9FBA\u9FBB\uFE10\uFE13\uFE16\uFE19", // unchanged from 2005 mapping
:intvector{ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44 },
:int{1}, :int{0}, "", "?", :bin{""}
}
{ // gb18030->U 2005 vs 2022 part 3 (gb18030 4-byte), non-changing mappings next to or in linear ranges partially overridden by new explicit maps
"gb18030",
:bin{ 82358F33823590368235913584318235843183368431843684318537 },
"\u9FA6\u9FB3\u9FBC\uFE0F\uFE1A\uFE24\uFE2F",
:intvector{ 0, 4, 8, 12, 16, 20, 24 },
:int{1}, :int{0}, "", "?", :bin{""}
}
{
"x11-compound-text",
:bin{ 1b242944b5ac1b2d41a5e31b2d43d5f51b2d4dd01b2d41411b2d43bc1b2d42ff1b2d54df1b2d44c0b31b2d46b41b2d47b01b2d48e01b2d4ca1 },
Expand Down Expand Up @@ -1817,13 +1838,34 @@ conversion:table(nofallback) {
:intvector{},
:int{1}, :int{0}, "", "0", ""
}
{
{ // gb18030-2022 changes mappings for 0xA6DC,0xA6DB
"gb18030",
"\U00020087\ue790\ue78f\u1e3f",
"\U00020087\ufe13\ufe11\u1e3f",
:bin{ 95329031a6dca6dba8bc },
:intvector{ 0,0,0,0,2,2,3,3,4,4 },
:int{1}, :int{0}, "", "0", ""
}
{ // U->gb18030 2005 vs 2022 part 1 (gb18030 2-byte)
"gb18030",
"\uFE10\uFE12\uFE11\uFE13\uFE16\uFE17\uFE18\uFE19\u9FB4\u9FB5\u9FB6\u9FB7\u9FB8\u9FB9\u9FBA\u9FBB", // -2005: "\uE78D\uE78E\uE78F\uE790\uE793\uE794\uE795\uE796\uE81E\uE826\uE82B\uE82C\uE832\uE843\uE854\uE864"
:bin{ A6D9A6DAA6DBA6DCA6DFA6ECA6EDA6F3FE59FE61FE66FE67FE6DFE7EFE90FEA0 },
:intvector{ 0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15 },
:int{1}, :int{0}, "", "0", ""
}
{ // U->gb18030 2005 vs 2022 part 2 (gb18030 fallback mappings from Unicode PUA)
"gb18030",
"\uE78D\uE793\uE794\uE795\uE796\uE81E\uE826\uE82B\uE82C\uE832\uE843\uE854\uE864",
:bin{ A6D9A6DFA6ECA6EDA6F3FE59FE61FE66FE67FE6DFE7EFE90FEA0 },
:intvector{ 0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12 },
:int{1}, :int{0}, "", "0", ""
}
{ // U->gb18030 2005 vs 2022 part 3 (gb18030 4-byte), non-changing mappings next to or in linear ranges partially overridden by new explicit maps
"gb18030",
"\u9FA6\u9FB3\u9FBC\uFE0F\uFE1A\uFE24\uFE2F",
:bin{ 82358F33823590368235913584318235843183368431843684318537 },
:intvector{ 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6 },
:int{1}, :int{0}, "", "0", ""
}
{
"UTF-7",
"\u00a3I\u00a3\u00a4",
Expand Down
4 changes: 2 additions & 2 deletions icu4j/main/shared/data/icudata.jar
Git LFS file not shown
2 changes: 1 addition & 1 deletion icu4j/main/shared/data/icutzdata.jar
Git LFS file not shown
4 changes: 2 additions & 2 deletions icu4j/main/shared/data/testdata.jar
Git LFS file not shown
Binary file not shown.
Binary file not shown.

0 comments on commit 7f5d679

Please sign in to comment.