Browse Source
Bug#28916 LDML doesn't work for utf8
Bug#28916 LDML doesn't work for utf8
and is not described in the manual - Adding missing initialization for utf8 collations - Minor code clean-ups: renaming variables, moving code into a new separate function. - Adding test, to check that both ucs2 and utf8 user defined collations work (ucs2_test_ci and utf8_test_ci) - Adding Vietnamese collation as a complex user defined collation example. include/m_ctype.h: Renaming variable names to match collation names (for convenience). mysys/charset-def.c: - Removing redundant declarations for variables declared in m_ctype.h - Renaming variable names to match collation names (for convenience). mysys/charset.c: - Renaming "new" to "newcs", to avoid using C reserved word as a variable name - Moving UCA initialization code into a separate function - The bug fix itself: adding initialization of utf8 collations strings/ctype-uca.c: Renaming variable names to match collation names (for convenience). strings/ctype.c: Increasing buffer size to fit tailoring for languages with complex rules (e.g. Vietnamese). mysql-test/r/ctype_ldml.result: Adding test case mysql-test/std_data/Index.xml: Adding Index.xml example with user defined collations. mysql-test/t/ctype_ldml-master.opt: Adding OPT file for the test case, to use the example Index.xml file. mysql-test/t/ctype_ldml.test: Adding test casepull/374/head
9 changed files with 464 additions and 19 deletions
-
3include/m_ctype.h
-
296mysql-test/r/ctype_ldml.result
-
71mysql-test/std_data/Index.xml
-
2mysql-test/t/ctype_ldml-master.opt
-
63mysql-test/t/ctype_ldml.test
-
6mysys/charset-def.c
-
36mysys/charset.c
-
4strings/ctype-uca.c
-
2strings/ctype.c
@ -0,0 +1,296 @@ |
|||
drop table if exists t1; |
|||
set names utf8; |
|||
show variables like 'character_sets_dir%'; |
|||
Variable_name Value |
|||
character_sets_dir MYSQL_TEST_DIR/std_data/ |
|||
show collation like 'utf8_test_ci'; |
|||
Collation Charset Id Default Compiled Sortlen |
|||
utf8_test_ci utf8 240 8 |
|||
create table t1 (c1 char(1) character set utf8 collate utf8_test_ci); |
|||
insert into t1 values ('a'); |
|||
select * from t1 where c1='b'; |
|||
c1 |
|||
a |
|||
drop table t1; |
|||
show collation like 'ucs2_test_ci'; |
|||
Collation Charset Id Default Compiled Sortlen |
|||
ucs2_test_ci ucs2 241 8 |
|||
create table t1 (c1 char(1) character set ucs2 collate ucs2_test_ci); |
|||
insert into t1 values ('a'); |
|||
select * from t1 where c1='b'; |
|||
c1 |
|||
a |
|||
drop table t1; |
|||
show collation like 'ucs2_vn_ci'; |
|||
Collation Charset Id Default Compiled Sortlen |
|||
ucs2_vn_ci ucs2 242 8 |
|||
create table t1 (c1 char(1) character set ucs2 collate ucs2_vn_ci); |
|||
insert into t1 values (0x0061),(0x0041),(0x00E0),(0x00C0),(0x1EA3),(0x1EA2), |
|||
(0x00E3),(0x00C3),(0x00E1),(0x00C1),(0x1EA1),(0x1EA0); |
|||
insert into t1 values (0x0103),(0x0102),(0x1EB1),(0x1EB0),(0x1EB3),(0x1EB2), |
|||
(0x1EB5),(0x1EB4),(0x1EAF),(0x1EAE),(0x1EB7),(0x1EB6); |
|||
insert into t1 values (0x00E2),(0x00C2),(0x1EA7),(0x1EA6),(0x1EA9),(0x1EA8), |
|||
(0x1EAB),(0x1EAA),(0x1EA5),(0x1EA4),(0x1EAD),(0x1EAC); |
|||
insert into t1 values ('b'),('B'),('c'),('C'); |
|||
insert into t1 values ('d'),('D'),(0x0111),(0x0110); |
|||
insert into t1 values (0x0065),(0x0045),(0x00E8),(0x00C8),(0x1EBB),(0x1EBA), |
|||
(0x1EBD),(0x1EBC),(0x00E9),(0x00C9),(0x1EB9),(0x1EB8); |
|||
insert into t1 values (0x00EA),(0x00CA),(0x1EC1),(0x1EC0),(0x1EC3),(0x1EC2), |
|||
(0x1EC5),(0x1EC4),(0x1EBF),(0x1EBE),(0x1EC7),(0x1EC6); |
|||
insert into t1 values ('g'),('G'),('h'),('H'); |
|||
insert into t1 values (0x0069),(0x0049),(0x00EC),(0x00CC),(0x1EC9),(0x1EC8), |
|||
(0x0129),(0x0128),(0x00ED),(0x00CD),(0x1ECB),(0x1ECA); |
|||
insert into t1 values ('k'),('K'),('l'),('L'),('m'),('M'); |
|||
insert into t1 values (0x006F),(0x004F),(0x00F2),(0x00D2),(0x1ECF),(0x1ECE), |
|||
(0x00F5),(0x00D5),(0x00F3),(0x00D3),(0x1ECD),(0x1ECC); |
|||
insert into t1 values (0x00F4),(0x00D4),(0x1ED3),(0x1ED2),(0x1ED5),(0x1ED4), |
|||
(0x1ED7),(0x1ED6),(0x1ED1),(0x1ED0),(0x1ED9),(0x1ED8); |
|||
insert into t1 values (0x01A1),(0x01A0),(0x1EDD),(0x1EDC),(0x1EDF),(0x1EDE), |
|||
(0x1EE1),(0x1EE0),(0x1EDB),(0x1EDA),(0x1EE3),(0x1EE2); |
|||
insert into t1 values ('p'),('P'),('q'),('Q'),('r'),('R'),('s'),('S'),('t'),('T'); |
|||
insert into t1 values (0x0075),(0x0055),(0x00F9),(0x00D9),(0x1EE7),(0x1EE6), |
|||
(0x0169),(0x0168),(0x00FA),(0x00DA),(0x1EE5),(0x1EE4); |
|||
insert into t1 values (0x01B0),(0x01AF),(0x1EEB),(0x1EEA),(0x1EED),(0x1EEC), |
|||
(0x1EEF),(0x1EEE),(0x1EE9),(0x1EE8),(0x1EF1),(0x1EF0); |
|||
insert into t1 values ('v'),('V'),('x'),('X'); |
|||
insert into t1 values (0x0079),(0x0059),(0x1EF3),(0x1EF2),(0x1EF7),(0x1EF6), |
|||
(0x1EF9),(0x1EF8),(0x00FD),(0x00DD),(0x1EF5),(0x1EF4); |
|||
select hex(c1) as h, c1 from t1 order by c1, h; |
|||
h c1 |
|||
0041 A |
|||
0061 a |
|||
00C0 À |
|||
00C1 Á |
|||
00C3 Ã |
|||
00E0 à |
|||
00E1 á |
|||
00E3 ã |
|||
1EA0 Ạ |
|||
1EA1 ạ |
|||
1EA2 Ả |
|||
1EA3 ả |
|||
0102 Ă |
|||
0103 ă |
|||
1EAE Ắ |
|||
1EAF ắ |
|||
1EB0 Ằ |
|||
1EB1 ằ |
|||
1EB2 Ẳ |
|||
1EB3 ẳ |
|||
1EB4 Ẵ |
|||
1EB5 ẵ |
|||
1EB6 Ặ |
|||
1EB7 ặ |
|||
00C2 Â |
|||
00E2 â |
|||
1EA4 Ấ |
|||
1EA5 ấ |
|||
1EA6 Ầ |
|||
1EA7 ầ |
|||
1EA8 Ẩ |
|||
1EA9 ẩ |
|||
1EAA Ẫ |
|||
1EAB ẫ |
|||
1EAC Ậ |
|||
1EAD ậ |
|||
0042 B |
|||
0062 b |
|||
0043 C |
|||
0063 c |
|||
0044 D |
|||
0064 d |
|||
0110 Đ |
|||
0111 đ |
|||
0045 E |
|||
0065 e |
|||
00C8 È |
|||
00C9 É |
|||
00E8 è |
|||
00E9 é |
|||
1EB8 Ẹ |
|||
1EB9 ẹ |
|||
1EBA Ẻ |
|||
1EBB ẻ |
|||
1EBC Ẽ |
|||
1EBD ẽ |
|||
00CA Ê |
|||
00EA ê |
|||
1EBE Ế |
|||
1EBF ế |
|||
1EC0 Ề |
|||
1EC1 ề |
|||
1EC2 Ể |
|||
1EC3 ể |
|||
1EC4 Ễ |
|||
1EC5 ễ |
|||
1EC6 Ệ |
|||
1EC7 ệ |
|||
0047 G |
|||
0067 g |
|||
0048 H |
|||
0068 h |
|||
0049 I |
|||
0069 i |
|||
00CC Ì |
|||
00CD Í |
|||
00EC ì |
|||
00ED í |
|||
0128 Ĩ |
|||
0129 ĩ |
|||
1EC8 Ỉ |
|||
1EC9 ỉ |
|||
1ECA Ị |
|||
1ECB ị |
|||
004B K |
|||
006B k |
|||
004C L |
|||
006C l |
|||
004D M |
|||
006D m |
|||
004F O |
|||
006F o |
|||
00D2 Ò |
|||
00D3 Ó |
|||
00D5 Õ |
|||
00F2 ò |
|||
00F3 ó |
|||
00F5 õ |
|||
1ECC Ọ |
|||
1ECD ọ |
|||
1ECE Ỏ |
|||
1ECF ỏ |
|||
00D4 Ô |
|||
00F4 ô |
|||
1ED0 Ố |
|||
1ED1 ố |
|||
1ED2 Ồ |
|||
1ED3 ồ |
|||
1ED4 Ổ |
|||
1ED5 ổ |
|||
1ED6 Ỗ |
|||
1ED7 ỗ |
|||
1ED8 Ộ |
|||
1ED9 ộ |
|||
01A0 Ơ |
|||
01A1 ơ |
|||
1EDA Ớ |
|||
1EDB ớ |
|||
1EDC Ờ |
|||
1EDD ờ |
|||
1EDE Ở |
|||
1EDF ở |
|||
1EE0 Ỡ |
|||
1EE1 ỡ |
|||
1EE2 Ợ |
|||
1EE3 ợ |
|||
0050 P |
|||
0070 p |
|||
0051 Q |
|||
0071 q |
|||
0052 R |
|||
0072 r |
|||
0053 S |
|||
0073 s |
|||
0054 T |
|||
0074 t |
|||
0055 U |
|||
0075 u |
|||
00D9 Ù |
|||
00DA Ú |
|||
00F9 ù |
|||
00FA ú |
|||
0168 Ũ |
|||
0169 ũ |
|||
1EE4 Ụ |
|||
1EE5 ụ |
|||
1EE6 Ủ |
|||
1EE7 ủ |
|||
01AF Ư |
|||
01B0 ư |
|||
1EE8 Ứ |
|||
1EE9 ứ |
|||
1EEA Ừ |
|||
1EEB ừ |
|||
1EEC Ử |
|||
1EED ử |
|||
1EEE Ữ |
|||
1EEF ữ |
|||
1EF0 Ự |
|||
1EF1 ự |
|||
0056 V |
|||
0076 v |
|||
0058 X |
|||
0078 x |
|||
0059 Y |
|||
0079 y |
|||
00DD Ý |
|||
00FD ý |
|||
1EF2 Ỳ |
|||
1EF3 ỳ |
|||
1EF4 Ỵ |
|||
1EF5 ỵ |
|||
1EF6 Ỷ |
|||
1EF7 ỷ |
|||
1EF8 Ỹ |
|||
1EF9 ỹ |
|||
select group_concat(hex(c1) order by hex(c1)) from t1 group by c1; |
|||
group_concat(hex(c1) order by hex(c1)) |
|||
0041,0061,00C0,00C1,00C3,00E0,00E1,00E3,1EA0,1EA1,1EA2,1EA3 |
|||
0102,0103,1EAE,1EAF,1EB0,1EB1,1EB2,1EB3,1EB4,1EB5,1EB6,1EB7 |
|||
00C2,00E2,1EA4,1EA5,1EA6,1EA7,1EA8,1EA9,1EAA,1EAB,1EAC,1EAD |
|||
0042,0062 |
|||
0043,0063 |
|||
0044,0064 |
|||
0110,0111 |
|||
0045,0065,00C8,00C9,00E8,00E9,1EB8,1EB9,1EBA,1EBB,1EBC,1EBD |
|||
00CA,00EA,1EBE,1EBF,1EC0,1EC1,1EC2,1EC3,1EC4,1EC5,1EC6,1EC7 |
|||
0047,0067 |
|||
0048,0068 |
|||
0049,0069,00CC,00CD,00EC,00ED,0128,0129,1EC8,1EC9,1ECA,1ECB |
|||
004B,006B |
|||
004C,006C |
|||
004D,006D |
|||
004F,006F,00D2,00D3,00D5,00F2,00F3,00F5,1ECC,1ECD,1ECE,1ECF |
|||
00D4,00F4,1ED0,1ED1,1ED2,1ED3,1ED4,1ED5,1ED6,1ED7,1ED8,1ED9 |
|||
01A0,01A1,1EDA,1EDB,1EDC,1EDD,1EDE,1EDF,1EE0,1EE1,1EE2,1EE3 |
|||
0050,0070 |
|||
0051,0071 |
|||
0052,0072 |
|||
0053,0073 |
|||
0054,0074 |
|||
0055,0075,00D9,00DA,00F9,00FA,0168,0169,1EE4,1EE5,1EE6,1EE7 |
|||
01AF,01B0,1EE8,1EE9,1EEA,1EEB,1EEC,1EED,1EEE,1EEF,1EF0,1EF1 |
|||
0056,0076 |
|||
0058,0078 |
|||
0059,0079,00DD,00FD,1EF2,1EF3,1EF4,1EF5,1EF6,1EF7,1EF8,1EF9 |
|||
select group_concat(c1 order by hex(c1) SEPARATOR '') from t1 group by c1; |
|||
group_concat(c1 order by hex(c1) SEPARATOR '') |
|||
AaÀÁÃàáãẠạẢả |
|||
ĂăẮắẰằẲẳẴẵẶặ |
|||
ÂâẤấẦầẨẩẪẫẬậ |
|||
Bb |
|||
Cc |
|||
Dd |
|||
Đđ |
|||
EeÈÉèéẸẹẺẻẼẽ |
|||
ÊêẾếỀềỂểỄễỆệ |
|||
Gg |
|||
Hh |
|||
IiÌÍìíĨĩỈỉỊị |
|||
Kk |
|||
Ll |
|||
Mm |
|||
OoÒÓÕòóõỌọỎỏ |
|||
ÔôỐốỒồỔổỖỗỘộ |
|||
ƠơỚớỜờỞởỠỡỢợ |
|||
Pp |
|||
Qq |
|||
Rr |
|||
Ss |
|||
Tt |
|||
UuÙÚùúŨũỤụỦủ |
|||
ƯưỨứỪừỬửỮữỰự |
|||
Vv |
|||
Xx |
|||
YyÝýỲỳỴỵỶỷỸỹ |
|||
drop table t1; |
@ -0,0 +1,71 @@ |
|||
<charsets> |
|||
|
|||
<charset name="utf8"> |
|||
<collation name="utf8_test_ci" id="240"> |
|||
<rules> |
|||
<reset>a</reset> |
|||
<s>b</s> |
|||
</rules> |
|||
</collation> |
|||
|
|||
</charset> |
|||
|
|||
<charset name="ucs2"> |
|||
<collation name="ucs2_test_ci" id="241"> |
|||
<rules> |
|||
<reset>a</reset> |
|||
<s>b</s> |
|||
</rules> |
|||
</collation> |
|||
<collation name="ucs2_vn_ci" id="242"> |
|||
<!-- Vietnamese experimental collation --> |
|||
<rules> |
|||
<reset>A</reset> |
|||
<p>\u0103</p><t>\u0102</t> |
|||
<s>\u1EB1</s><t>\u1EB0</t> |
|||
<s>\u1EB3</s><t>\u1EB2</t> |
|||
<s>\u1EB5</s><t>\u1EB4</t> |
|||
<s>\u1EAF</s><t>\u1EAE</t> |
|||
<s>\u1EB7</s><t>\u1EB6</t> |
|||
<p>\u00E2</p><t>\u00C2</t> |
|||
<s>\u1EA7</s><t>\u1EA6</t> |
|||
<s>\u1EA9</s><t>\u1EA8</t> |
|||
<s>\u1EAB</s><t>\u1EAA</t> |
|||
<s>\u1EA5</s><t>\u1EA4</t> |
|||
<s>\u1EAD</s><t>\u1EAC</t> |
|||
<reset>D</reset> |
|||
<p>\u0111</p><t>\u0110</t> |
|||
<reset>E</reset> |
|||
<p>\u00EA</p><t>\u00CA</t> |
|||
<s>\u1EC1</s><t>\u1EC0</t> |
|||
<s>\u1EC3</s><t>\u1EC2</t> |
|||
<s>\u1EC5</s><t>\u1EC4</t> |
|||
<s>\u1EBF</s><t>\u1EBE</t> |
|||
<s>\u1EC7</s><t>\u1EC6</t> |
|||
<reset>O</reset> |
|||
<p>\u00F4</p><t>\u00D4</t> |
|||
<s>\u1ED3</s><t>\u1ED2</t> |
|||
<s>\u1ED5</s><t>\u1ED4</t> |
|||
<s>\u1ED7</s><t>\u1ED6</t> |
|||
<s>\u1ED1</s><t>\u1ED0</t> |
|||
<s>\u1ED9</s><t>\u1ED8</t> |
|||
<p>\u01A1</p><t>\u01A0</t> |
|||
<s>\u1EDD</s><t>\u1EDC</t> |
|||
<s>\u1EDF</s><t>\u1EDE</t> |
|||
<s>\u1EE1</s><t>\u1EE0</t> |
|||
<s>\u1EDB</s><t>\u1EDA</t> |
|||
<s>\u1EE3</s><t>\u1EE2</t> |
|||
<reset>U</reset> |
|||
<p>\u01B0</p><t>\u01AF</t> |
|||
<s>\u1EEB</s><t>\u1EEA</t> |
|||
<s>\u1EED</s><t>\u1EEC</t> |
|||
<s>\u1EEF</s><t>\u1EEE</t> |
|||
<s>\u1EE9</s><t>\u1EE8</t> |
|||
<s>\u1EF1</s><t>\u1EF0</t> |
|||
</rules> |
|||
|
|||
</collation> |
|||
|
|||
</charset> |
|||
|
|||
</charsets> |
@ -0,0 +1,2 @@ |
|||
--character-sets-dir=$MYSQL_TEST_DIR/std_data/ |
|||
|
@ -0,0 +1,63 @@ |
|||
--source include/have_ucs2.inc |
|||
|
|||
--disable_warnings |
|||
drop table if exists t1; |
|||
--enable_warnings |
|||
|
|||
set names utf8; |
|||
|
|||
--replace_result $MYSQL_TEST_DIR MYSQL_TEST_DIR |
|||
show variables like 'character_sets_dir%'; |
|||
|
|||
show collation like 'utf8_test_ci'; |
|||
create table t1 (c1 char(1) character set utf8 collate utf8_test_ci); |
|||
insert into t1 values ('a'); |
|||
select * from t1 where c1='b'; |
|||
drop table t1; |
|||
|
|||
show collation like 'ucs2_test_ci'; |
|||
create table t1 (c1 char(1) character set ucs2 collate ucs2_test_ci); |
|||
insert into t1 values ('a'); |
|||
select * from t1 where c1='b'; |
|||
drop table t1; |
|||
|
|||
# |
|||
# Vietnamese experimental collation |
|||
# |
|||
|
|||
show collation like 'ucs2_vn_ci'; |
|||
create table t1 (c1 char(1) character set ucs2 collate ucs2_vn_ci); |
|||
insert into t1 values (0x0061),(0x0041),(0x00E0),(0x00C0),(0x1EA3),(0x1EA2), |
|||
(0x00E3),(0x00C3),(0x00E1),(0x00C1),(0x1EA1),(0x1EA0); |
|||
insert into t1 values (0x0103),(0x0102),(0x1EB1),(0x1EB0),(0x1EB3),(0x1EB2), |
|||
(0x1EB5),(0x1EB4),(0x1EAF),(0x1EAE),(0x1EB7),(0x1EB6); |
|||
insert into t1 values (0x00E2),(0x00C2),(0x1EA7),(0x1EA6),(0x1EA9),(0x1EA8), |
|||
(0x1EAB),(0x1EAA),(0x1EA5),(0x1EA4),(0x1EAD),(0x1EAC); |
|||
insert into t1 values ('b'),('B'),('c'),('C'); |
|||
insert into t1 values ('d'),('D'),(0x0111),(0x0110); |
|||
insert into t1 values (0x0065),(0x0045),(0x00E8),(0x00C8),(0x1EBB),(0x1EBA), |
|||
(0x1EBD),(0x1EBC),(0x00E9),(0x00C9),(0x1EB9),(0x1EB8); |
|||
insert into t1 values (0x00EA),(0x00CA),(0x1EC1),(0x1EC0),(0x1EC3),(0x1EC2), |
|||
(0x1EC5),(0x1EC4),(0x1EBF),(0x1EBE),(0x1EC7),(0x1EC6); |
|||
insert into t1 values ('g'),('G'),('h'),('H'); |
|||
insert into t1 values (0x0069),(0x0049),(0x00EC),(0x00CC),(0x1EC9),(0x1EC8), |
|||
(0x0129),(0x0128),(0x00ED),(0x00CD),(0x1ECB),(0x1ECA); |
|||
insert into t1 values ('k'),('K'),('l'),('L'),('m'),('M'); |
|||
insert into t1 values (0x006F),(0x004F),(0x00F2),(0x00D2),(0x1ECF),(0x1ECE), |
|||
(0x00F5),(0x00D5),(0x00F3),(0x00D3),(0x1ECD),(0x1ECC); |
|||
insert into t1 values (0x00F4),(0x00D4),(0x1ED3),(0x1ED2),(0x1ED5),(0x1ED4), |
|||
(0x1ED7),(0x1ED6),(0x1ED1),(0x1ED0),(0x1ED9),(0x1ED8); |
|||
insert into t1 values (0x01A1),(0x01A0),(0x1EDD),(0x1EDC),(0x1EDF),(0x1EDE), |
|||
(0x1EE1),(0x1EE0),(0x1EDB),(0x1EDA),(0x1EE3),(0x1EE2); |
|||
insert into t1 values ('p'),('P'),('q'),('Q'),('r'),('R'),('s'),('S'),('t'),('T'); |
|||
insert into t1 values (0x0075),(0x0055),(0x00F9),(0x00D9),(0x1EE7),(0x1EE6), |
|||
(0x0169),(0x0168),(0x00FA),(0x00DA),(0x1EE5),(0x1EE4); |
|||
insert into t1 values (0x01B0),(0x01AF),(0x1EEB),(0x1EEA),(0x1EED),(0x1EEC), |
|||
(0x1EEF),(0x1EEE),(0x1EE9),(0x1EE8),(0x1EF1),(0x1EF0); |
|||
insert into t1 values ('v'),('V'),('x'),('X'); |
|||
insert into t1 values (0x0079),(0x0059),(0x1EF3),(0x1EF2),(0x1EF7),(0x1EF6), |
|||
(0x1EF9),(0x1EF8),(0x00FD),(0x00DD),(0x1EF5),(0x1EF4); |
|||
select hex(c1) as h, c1 from t1 order by c1, h; |
|||
select group_concat(hex(c1) order by hex(c1)) from t1 group by c1; |
|||
select group_concat(c1 order by hex(c1) SEPARATOR '') from t1 group by c1; |
|||
drop table t1; |
Write
Preview
Loading…
Cancel
Save
Reference in new issue