20 changed files with 18043 additions and 17 deletions
-
166mysql-test/include/ctype_unicode520.inc
-
45mysql-test/r/ctype_ldml.result
-
393mysql-test/r/ctype_like_range.result
-
715mysql-test/r/ctype_uca.result
-
507mysql-test/r/ctype_utf16_uca.result
-
507mysql-test/r/ctype_utf32_uca.result
-
209mysql-test/r/ctype_utf8mb4_uca.result
-
14mysql-test/std_data/Index.xml
-
1mysql-test/suite/engines/funcs/r/db_alter_character_set.result
-
10mysql-test/suite/engines/funcs/r/db_alter_collate_ascii.result
-
10mysql-test/suite/engines/funcs/r/db_alter_collate_utf8.result
-
22mysql-test/t/ctype_ldml.test
-
9mysql-test/t/ctype_like_range.test
-
9mysql-test/t/ctype_uca.test
-
9mysql-test/t/ctype_utf16_uca.test
-
9mysql-test/t/ctype_utf32_uca.test
-
2mysql-test/t/ctype_utf8mb4_uca.test
-
20mysys/charset-def.c
-
12998strings/ctype-uca.c
-
2405strings/ctype-utf8.c
@ -0,0 +1,166 @@ |
|||
# |
|||
# WL#2673 Unicode collation algorithm new version |
|||
# |
|||
CREATE TABLE t1 AS SELECT repeat('a', 10) as c LIMIT 0; |
|||
SHOW CREATE TABLE t1; |
|||
|
|||
# |
|||
# Unicode-5.0.0 characters |
|||
# |
|||
|
|||
# Latin Extended-B and IP extensions |
|||
INSERT INTO t1 VALUES (_utf32 0x0180),(_utf32 0x023A); |
|||
INSERT INTO t1 VALUES (_utf32 0x023B),(_utf32 0x023C); |
|||
INSERT INTO t1 VALUES (_utf32 0x023D),(_utf32 0x023E); |
|||
INSERT INTO t1 VALUES (_utf32 0x0241),(_utf32 0x0242); |
|||
INSERT INTO t1 VALUES (_utf32 0x0243),(_utf32 0x0244); |
|||
INSERT INTO t1 VALUES (_utf32 0x0245),(_utf32 0x0246); |
|||
INSERT INTO t1 VALUES (_utf32 0x0247),(_utf32 0x0248); |
|||
INSERT INTO t1 VALUES (_utf32 0x0249),(_utf32 0x024A); |
|||
INSERT INTO t1 VALUES (_utf32 0x024B),(_utf32 0x024C); |
|||
INSERT INTO t1 VALUES (_utf32 0x024D),(_utf32 0x024E); |
|||
INSERT INTO t1 VALUES (_utf32 0x024F),(_utf32 0x026B); |
|||
INSERT INTO t1 VALUES (_utf32 0x027D),(_utf32 0x0289); |
|||
INSERT INTO t1 VALUES (_utf32 0x028C); |
|||
|
|||
# Greek and Coptic |
|||
INSERT INTO t1 VALUES (_utf32 0x037B), (_utf32 0x037C); |
|||
INSERT INTO t1 VALUES (_utf32 0x037D), (_utf32 0x03FD); |
|||
INSERT INTO t1 VALUES (_utf32 0x03FE), (_utf32 0x03FF); |
|||
|
|||
# Cyrillic |
|||
INSERT INTO t1 VALUES (_utf32 0x04C0), (_utf32 0x04CF); |
|||
INSERT INTO t1 VALUES (_utf32 0x04F6), (_utf32 0x04F7); |
|||
INSERT INTO t1 VALUES (_utf32 0x04FA), (_utf32 0x04FB); |
|||
INSERT INTO t1 VALUES (_utf32 0x04FC), (_utf32 0x04FD); |
|||
INSERT INTO t1 VALUES (_utf32 0x04FE), (_utf32 0x04FF); |
|||
INSERT INTO t1 VALUES (_utf32 0x0510), (_utf32 0x0511); |
|||
INSERT INTO t1 VALUES (_utf32 0x0512), (_utf32 0x0513); |
|||
|
|||
# Georgian, Georgian Supplement |
|||
INSERT INTO t1 VALUES (_utf32 0x10A0), (_utf32 0x10A1); |
|||
INSERT INTO t1 VALUES (_utf32 0x10A2), (_utf32 0x10A3); |
|||
INSERT INTO t1 VALUES (_utf32 0x10A4), (_utf32 0x10A5); |
|||
INSERT INTO t1 VALUES (_utf32 0x10A6), (_utf32 0x10A7); |
|||
INSERT INTO t1 VALUES (_utf32 0x2D00), (_utf32 0x2D01); |
|||
INSERT INTO t1 VALUES (_utf32 0x2D02), (_utf32 0x2D03); |
|||
INSERT INTO t1 VALUES (_utf32 0x2D04), (_utf32 0x2D05); |
|||
INSERT INTO t1 VALUES (_utf32 0x2D06), (_utf32 0x2D07); |
|||
|
|||
# Phonetic Extensions |
|||
INSERT INTO t1 VALUES (_utf32 0x1D7D); |
|||
|
|||
# Letterlike Symbols |
|||
INSERT INTO t1 VALUES (_utf32 0x2132),(_utf32 0x214E); |
|||
|
|||
# Number Forms |
|||
INSERT INTO t1 VALUES (_utf32 0x2183),(_utf32 0x2184); |
|||
|
|||
# Coptic |
|||
INSERT INTO t1 VALUES (_utf32 0x2C80), (_utf32 0x2C81); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C82), (_utf32 0x2C83); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C84), (_utf32 0x2C85); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C86), (_utf32 0x2C87); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C88), (_utf32 0x2C89); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C8A), (_utf32 0x2C8B); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C8C), (_utf32 0x2C8D); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C8E), (_utf32 0x2C8F); |
|||
|
|||
# Latin Extended-C |
|||
INSERT INTO t1 VALUES (_utf32 0x2C60), (_utf32 0x2C61); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C62), (_utf32 0x2C63); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C64), (_utf32 0x2C65); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C66), (_utf32 0x2C67); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C68), (_utf32 0x2C69); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C6A), (_utf32 0x2C6B); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C6C), (_utf32 0x2C75); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C76); |
|||
|
|||
# Glagolitic |
|||
INSERT INTO t1 VALUES (_utf32 0x2C00), (_utf32 0x2C01); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C02), (_utf32 0x2C03); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C04), (_utf32 0x2C05); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C06), (_utf32 0x2C07); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C30), (_utf32 0x2C31); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C32), (_utf32 0x2C33); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C34), (_utf32 0x2C35); |
|||
INSERT INTO t1 VALUES (_utf32 0x2C36), (_utf32 0x2C37); |
|||
|
|||
# Deseret |
|||
INSERT INTO t1 VALUES (_utf32 0x10400), (_utf32 0x10401); |
|||
INSERT INTO t1 VALUES (_utf32 0x10402), (_utf32 0x10403); |
|||
INSERT INTO t1 VALUES (_utf32 0x10404), (_utf32 0x10405); |
|||
INSERT INTO t1 VALUES (_utf32 0x10406), (_utf32 0x10407); |
|||
INSERT INTO t1 VALUES (_utf32 0x10428), (_utf32 0x10429); |
|||
INSERT INTO t1 VALUES (_utf32 0x1042A), (_utf32 0x1042B); |
|||
INSERT INTO t1 VALUES (_utf32 0x1042C), (_utf32 0x1042D); |
|||
INSERT INTO t1 VALUES (_utf32 0x1042E), (_utf32 0x1042F); |
|||
|
|||
|
|||
# |
|||
# Unicode 5.1.0 characters |
|||
# |
|||
|
|||
INSERT INTO t1 VALUES (_utf32 0x0370); # GREEK CAPITAL LETTER HETA |
|||
INSERT INTO t1 VALUES (_utf32 0x0371); # GREEK SMALL LETTER HETA |
|||
INSERT INTO t1 VALUES (_utf32 0x0372); # GREEK CAPITAL LETTER ARCHAIC SAMPI |
|||
INSERT INTO t1 VALUES (_utf32 0x0373); # GREEK SMALL LETTER ARCHAIC SAMPI |
|||
|
|||
INSERT INTO t1 VALUES (_utf32 0x0514); # CYRILLIC CAPITAL LETTER LHA |
|||
INSERT INTO t1 VALUES (_utf32 0x0515); # CYRILLIC SMALL LETTER LHA |
|||
INSERT INTO t1 VALUES (_utf32 0x0516); # CYRILLIC CAPITAL LETTER RHA |
|||
INSERT INTO t1 VALUES (_utf32 0x0517); # CYRILLIC SMALL LETTER RHA |
|||
|
|||
INSERT INTO t1 VALUES (_utf32 0xA640); # CYRILLIC CAPITAL LETTER ZEMLYA |
|||
INSERT INTO t1 VALUES (_utf32 0xA641); # CYRILLIC SMALL LETTER ZEMLYA |
|||
INSERT INTO t1 VALUES (_utf32 0xA642); # CYRILLIC CAPITAL LETTER DZELO |
|||
INSERT INTO t1 VALUES (_utf32 0xA643); # CYRILLIC SMALL LETTER DZELO |
|||
|
|||
INSERT INTO t1 VALUES (_utf32 0xA722); # LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF |
|||
INSERT INTO t1 VALUES (_utf32 0xA723); # LATIN SMALL LETTER EGYPTOLOGICAL ALEF |
|||
INSERT INTO t1 VALUES (_utf32 0xA724); # LATIN CAPITAL LETTER EGYPTOLOGICAL AIN |
|||
INSERT INTO t1 VALUES (_utf32 0xA725); # LATIN SMALL LETTER EGYPTOLOGICAL AIN |
|||
|
|||
INSERT INTO t1 VALUES (_utf32 0xA726); # LATIN CAPITAL LETTER HENG |
|||
INSERT INTO t1 VALUES (_utf32 0xA727); # LATIN SMALL LETTER HENG |
|||
INSERT INTO t1 VALUES (_utf32 0xA728); # LATIN CAPITAL LETTER TZ |
|||
INSERT INTO t1 VALUES (_utf32 0xA729); # LATIN SMALL LETTER TZ |
|||
INSERT INTO t1 VALUES (_utf32 0xA72A); # LATIN CAPITAL LETTER TRESILLO |
|||
INSERT INTO t1 VALUES (_utf32 0xA72B); # LATIN SMALL LETTER TRESILLO |
|||
|
|||
# |
|||
# Unicode 5.2.0 characters |
|||
# |
|||
|
|||
INSERT INTO t1 VALUES (_utf32 0x2CEB); # COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI;Lu;0;L;;;;;N;;;;2CEC; |
|||
INSERT INTO t1 VALUES (_utf32 0x2CEC); # COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI;Ll;0;L;;;;;N;;;2CEB;;2CEB |
|||
INSERT INTO t1 VALUES (_utf32 0x2CED); # COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA;Lu;0;L;;;;;N;;;;2CEE; |
|||
INSERT INTO t1 VALUES (_utf32 0x2CEE); # COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA;Ll;0;L;;;;;N;;;2CED;;2CED |
|||
|
|||
# |
|||
# Check case folding and UCA weights |
|||
# |
|||
SELECT hex(c), hex(lower(c)), hex(upper(c)), hex(weight_string(c)), c |
|||
FROM t1 ORDER BY c, BINARY c; |
|||
|
|||
|
|||
# |
|||
# Check that LIKE works fine with and without index. |
|||
# This test makes sure that cs->min_sort_char and cs->max_sort_char |
|||
# are set properly |
|||
# Also check that LIKE is case insensitive for supplementary characters |
|||
# |
|||
INSERT INTO t1 VALUES ('a'); |
|||
INSERT INTO t1 VALUES (concat(_utf32 0x61, _utf32 0xFFFF)); |
|||
INSERT INTO t1 VALUES (concat(_utf32 0x61, _utf32 0x10FFFF)); |
|||
INSERT INTO t1 VALUES (concat(_utf32 0x61, _utf32 0x10400)); |
|||
SELECT hex(c), hex(weight_string(c)) FROM t1 WHERE c LIKE 'a%' ORDER BY c; |
|||
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10400 ORDER BY c, BINARY c; |
|||
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10428 ORDER BY c, BINARY c; |
|||
ALTER TABLE t1 ADD KEY(c); |
|||
EXPLAIN SELECT hex(c) FROM t1 WHERE c LIKE 'a%' ORDER BY c; |
|||
SELECT hex(c), hex(weight_string(c)) FROM t1 WHERE c LIKE 'a%' ORDER BY c; |
|||
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10400 ORDER BY c, BINARY c; |
|||
SELECT hex(c), hex(weight_string(c)), c FROM t1 WHERE c LIKE _utf32 0x10428 ORDER BY c, BINARY c; |
|||
|
|||
DROP TABLE t1; |
12998
strings/ctype-uca.c
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
2405
strings/ctype-utf8.c
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
Write
Preview
Loading…
Cancel
Save
Reference in new issue