Browse Source

Backporting WL#1213

pull/73/head
Alexander Barkov 16 years ago
parent
commit
13c32771e7
  1. 65
      config/ac-macros/character_sets.m4
  2. 3
      include/config-win.h
  3. 62
      include/m_ctype.h
  4. 11
      mysql-test/include/ctype_datetime.inc
  5. 50
      mysql-test/include/ctype_like.inc
  6. 4
      mysql-test/include/have_utf16.inc
  7. 4
      mysql-test/include/have_utf32.inc
  8. 7
      mysql-test/include/have_utf8mb4.inc
  9. 33
      mysql-test/r/ctype_ldml.result
  10. 56
      mysql-test/r/ctype_many.result
  11. 1038
      mysql-test/r/ctype_utf16.result
  12. 2373
      mysql-test/r/ctype_utf16_uca.result
  13. 1052
      mysql-test/r/ctype_utf32.result
  14. 2373
      mysql-test/r/ctype_utf32_uca.result
  15. 14
      mysql-test/r/ctype_utf8.result
  16. 2250
      mysql-test/r/ctype_utf8mb4.result
  17. 2
      mysql-test/r/have_utf16.require
  18. 2
      mysql-test/r/have_utf32.require
  19. 2
      mysql-test/r/have_utf8mb4.require
  20. 30
      mysql-test/std_data/Index.xml
  21. 10
      mysql-test/suite/sys_vars/r/character_set_client_basic.result
  22. 2
      mysql-test/suite/sys_vars/r/character_set_connection_basic.result
  23. 2
      mysql-test/suite/sys_vars/r/character_set_database_basic.result
  24. 2
      mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result
  25. BIN
      mysql-test/suite/sys_vars/r/character_set_results_basic.result
  26. 9
      mysql-test/suite/sys_vars/t/character_set_client_basic.test
  27. 3
      mysql-test/suite/sys_vars/t/character_set_connection_basic.test
  28. 3
      mysql-test/suite/sys_vars/t/character_set_database_basic.test
  29. 3
      mysql-test/suite/sys_vars/t/character_set_filesystem_basic.test
  30. 3
      mysql-test/suite/sys_vars/t/character_set_results_basic.test
  31. 21
      mysql-test/t/ctype_ldml.test
  32. 73
      mysql-test/t/ctype_many.test
  33. 731
      mysql-test/t/ctype_utf16.test
  34. 290
      mysql-test/t/ctype_utf16_uca.test
  35. 784
      mysql-test/t/ctype_utf32.test
  36. 291
      mysql-test/t/ctype_utf32_uca.test
  37. 11
      mysql-test/t/ctype_utf8.test
  38. 1670
      mysql-test/t/ctype_utf8mb4.test
  39. 155
      mysys/charset-def.c
  40. 69
      mysys/charset.c
  41. 4
      sql/field.cc
  42. 11
      sql/item.cc
  43. 33
      sql/item_strfunc.cc
  44. 38
      sql/sql_string.cc
  45. 2
      sql/sql_table.cc
  46. 38
      strings/ctype-mb.c
  47. 1986
      strings/ctype-uca.c
  48. 3652
      strings/ctype-ucs2.c
  49. 1054
      strings/ctype-utf8.c

65
config/ac-macros/character_sets.m4

@ -13,11 +13,11 @@ define(CHARSETS_AVAILABLE1,armscii8 ascii big5 cp1250 cp1251 cp1256 cp1257)
define(CHARSETS_AVAILABLE2,cp850 cp852 cp866 cp932 dec8 eucjpms euckr gb2312 gbk geostd8)
define(CHARSETS_AVAILABLE3,greek hebrew hp8 keybcs2 koi8r koi8u)
define(CHARSETS_AVAILABLE4,latin1 latin2 latin5 latin7 macce macroman)
define(CHARSETS_AVAILABLE5,sjis swe7 tis620 ucs2 ujis utf8)
define(CHARSETS_AVAILABLE5,sjis swe7 tis620 ucs2 ujis utf8mb4 utf8 utf16 utf32)
DEFAULT_CHARSET=latin1
CHARSETS_AVAILABLE="CHARSETS_AVAILABLE0 CHARSETS_AVAILABLE1 CHARSETS_AVAILABLE2 CHARSETS_AVAILABLE3 CHARSETS_AVAILABLE4 CHARSETS_AVAILABLE5"
CHARSETS_COMPLEX="big5 cp1250 cp932 eucjpms euckr gb2312 gbk latin1 latin2 sjis tis620 ucs2 ujis utf8"
CHARSETS_COMPLEX="big5 cp1250 cp932 eucjpms euckr gb2312 gbk latin1 latin2 sjis tis620 ucs2 ujis utf8mb4 utf8 utf16 utf32"
AC_DIVERT_POP
@ -50,7 +50,7 @@ AC_ARG_WITH(extra-charsets,
AC_MSG_CHECKING("character sets")
CHARSETS="$default_charset latin1 utf8"
CHARSETS="$default_charset latin1 utf8mb4 utf8"
if test "$extra_charsets" = no; then
CHARSETS="$CHARSETS"
@ -195,8 +195,23 @@ do
AC_DEFINE([USE_MB], [1], [Use multi-byte character routines])
AC_DEFINE(USE_MB_IDENT, 1)
;;
utf8mb4)
AC_DEFINE(HAVE_CHARSET_utf8mb4, 1, [Define to enable utf8mb4])
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
AC_DEFINE(USE_MB_IDENT, 1)
;;
utf8)
AC_DEFINE(HAVE_CHARSET_utf8, 1, [Define to enable ut8])
AC_DEFINE(HAVE_CHARSET_utf8, 1, [Define to enable utf8])
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
AC_DEFINE(USE_MB_IDENT, 1)
;;
utf16)
AC_DEFINE(HAVE_CHARSET_utf16, 1, [Define to enable utf16])
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
AC_DEFINE(USE_MB_IDENT, 1)
;;
utf32)
AC_DEFINE(HAVE_CHARSET_utf32, 1, [Define to enable utf32])
AC_DEFINE([USE_MB], 1, [Use multi-byte character routines])
AC_DEFINE(USE_MB_IDENT, 1)
;;
@ -381,6 +396,48 @@ case $default_charset in
fi
default_charset_collations="$UTFC"
;;
utf8mb4)
default_charset_default_collation="utf8mb4_general_ci"
define(UTFC1, utf8mb4_general_ci utf8mb4_bin)
define(UTFC2, utf8mb4_czech_ci utf8mb4_danish_ci)
define(UTFC3, utf8mb4_esperanto_ci utf8mb4_estonian_ci utf8mb4_hungarian_ci)
define(UTFC4, utf8mb4_icelandic_ci utf8mb4_latvian_ci utf8mb4_lithuanian_ci)
define(UTFC5, utf8mb4_persian_ci utf8mb4_polish_ci utf8mb4_romanian_ci)
define(UTFC6, utf8mb4_sinhala_ci utf8mb4_slovak_ci utf8mb4_slovenian_ci)
define(UTFC7, utf8mb4_spanish2_ci utf8mb4_spanish_ci)
define(UTFC8, utf8mb4_swedish_ci utf8mb4_turkish_ci)
define(UTFC9, utf8mb4_unicode_ci)
UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9"
default_charset_collations="$UTFC"
;;
utf16)
default_charset_default_collation="utf16_general_ci"
define(UTFC1, utf16_general_ci utf16_bin)
define(UTFC2, utf16_czech_ci utf16_danish_ci)
define(UTFC3, utf16_esperanto_ci utf16_estonian_ci utf16_hungarian_ci)
define(UTFC4, utf16_icelandic_ci utf16_latvian_ci utf16_lithuanian_ci)
define(UTFC5, utf16_persian_ci utf16_polish_ci utf16_romanian_ci)
define(UTFC6, utf16_sinhala_ci utf16_slovak_ci utf16_slovenian_ci)
define(UTFC7, utf16_spanish2_ci utf16_spanish_ci)
define(UTFC8, utf16_swedish_ci utf16_turkish_ci)
define(UTFC9, utf16_unicode_ci)
UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9"
default_charset_collations="$UTFC"
;;
utf32)
default_charset_default_collation="utf32_general_ci"
define(UTFC1, utf32_general_ci utf32_bin)
define(UTFC2, utf32_czech_ci utf32_danish_ci)
define(UTFC3, utf32_esperanto_ci utf32_estonian_ci utf32_hungarian_ci)
define(UTFC4, utf32_icelandic_ci utf32_latvian_ci utf32_lithuanian_ci)
define(UTFC5, utf32_persian_ci utf32_polish_ci utf32_romanian_ci)
define(UTFC6, utf32_sinhala_ci utf32_slovak_ci utf32_slovenian_ci)
define(UTFC7, utf32_spanish2_ci utf32_spanish_ci)
define(UTFC8, utf32_swedish_ci utf32_turkish_ci)
define(UTFC9, utf32_unicode_ci)
UTFC="UTFC1 UTFC2 UTFC3 UTFC4 UTFC5 UTFC6 UTFC7 UTFC8 UTFC9"
default_charset_collations="$UTFC"
;;
*)
AC_MSG_ERROR([Charset $cs not available. (Available are: $CHARSETS_AVAILABLE).
See the Installation chapter in the Reference Manual.])

3
include/config-win.h

@ -432,6 +432,9 @@ inline ulonglong double2ulonglong(double d)
#define HAVE_CHARSET_ucs2 1
#define HAVE_CHARSET_ujis 1
#define HAVE_CHARSET_utf8 1
#define HAVE_CHARSET_utf8mb4 1
#define HAVE_CHARSET_utf16 1
#define HAVE_CHARSET_utf32 1
#define HAVE_UCA_COLLATIONS 1
#define HAVE_BOOL 1

62
include/m_ctype.h

@ -98,13 +98,14 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
#define MY_CS_BINSORT 16 /* if binary sort order */
#define MY_CS_PRIMARY 32 /* if primary collation */
#define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */
#define MY_CS_UNICODE 128 /* is a charset is full unicode */
#define MY_CS_UNICODE 128 /* is a charset is BMP Unicode */
#define MY_CS_READY 256 /* if a charset is initialized */
#define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/
#define MY_CS_CSSORT 1024 /* if case sensitive sort order */
#define MY_CS_HIDDEN 2048 /* don't display in SHOW */
#define MY_CS_PUREASCII 4096 /* if a charset is pure ascii */
#define MY_CS_NONASCII 8192 /* if not ASCII-compatible */
#define MY_CS_UNICODE_SUPPLEMENT 16384 /* Non-BMP Unicode characters */
#define MY_CHARSET_UNDEFINED 0
/* Character repertoire flags */
@ -112,7 +113,6 @@ extern MY_UNI_CTYPE my_uni_ctype[256];
#define MY_REPERTOIRE_EXTENDED 2 /* Extended characters: U+0080..U+FFFF */
#define MY_REPERTOIRE_UNICODE30 3 /* ASCII | EXTENDED: U+0000..U+FFFF */
typedef struct my_uni_idx_st
{
uint16 from;
@ -304,10 +304,14 @@ typedef struct charset_info_st
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_bin;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename;
extern CHARSET_INFO my_charset_big5_chinese_ci;
extern CHARSET_INFO my_charset_big5_bin;
extern CHARSET_INFO my_charset_cp932_japanese_ci;
extern CHARSET_INFO my_charset_cp932_bin;
extern CHARSET_INFO my_charset_cp1250_czech_ci;
extern CHARSET_INFO my_charset_eucjpms_japanese_ci;
extern CHARSET_INFO my_charset_eucjpms_bin;
extern CHARSET_INFO my_charset_euckr_korean_ci;
@ -316,7 +320,6 @@ extern CHARSET_INFO my_charset_gb2312_chinese_ci;
extern CHARSET_INFO my_charset_gb2312_bin;
extern CHARSET_INFO my_charset_gbk_chinese_ci;
extern CHARSET_INFO my_charset_gbk_bin;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_latin1;
extern CHARSET_INFO my_charset_latin1_german2_ci;
extern CHARSET_INFO my_charset_latin1_bin;
extern CHARSET_INFO my_charset_latin2_czech_ci;
@ -329,11 +332,22 @@ extern CHARSET_INFO my_charset_ucs2_bin;
extern CHARSET_INFO my_charset_ucs2_unicode_ci;
extern CHARSET_INFO my_charset_ujis_japanese_ci;
extern CHARSET_INFO my_charset_ujis_bin;
extern CHARSET_INFO my_charset_utf16_bin;
extern CHARSET_INFO my_charset_utf16_general_ci;
extern CHARSET_INFO my_charset_utf16_unicode_ci;
extern CHARSET_INFO my_charset_utf32_bin;
extern CHARSET_INFO my_charset_utf32_general_ci;
extern CHARSET_INFO my_charset_utf32_unicode_ci;
extern CHARSET_INFO my_charset_utf8_general_ci;
extern CHARSET_INFO my_charset_utf8_unicode_ci;
extern CHARSET_INFO my_charset_utf8_bin;
extern CHARSET_INFO my_charset_cp1250_czech_ci;
extern MYSQL_PLUGIN_IMPORT CHARSET_INFO my_charset_filename;
extern CHARSET_INFO my_charset_utf8mb4_bin;
extern CHARSET_INFO my_charset_utf8mb4_general_ci;
extern CHARSET_INFO my_charset_utf8mb4_unicode_ci;
#define MY_UTF8MB3 "utf8"
#define MY_UTF8MB4 "utf8mb4"
/* declarations for simple charsets */
extern size_t my_strnxfrm_simple(CHARSET_INFO *, uchar *, size_t,
@ -430,6 +444,19 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
my_bool my_like_range_utf16(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
my_bool my_like_range_utf32(CHARSET_INFO *cs,
const char *ptr, size_t ptr_length,
pbool escape, pbool w_one, pbool w_many,
size_t res_length,
char *min_str, char *max_str,
size_t *min_length, size_t *max_length);
int my_wildcmp_8bit(CHARSET_INFO *,
const char *str,const char *str_end,
@ -480,6 +507,31 @@ uint my_instr_mb(struct charset_info_st *,
const char *s, size_t s_length,
my_match_t *match, uint nmatch);
int my_strnncoll_mb_bin(CHARSET_INFO * cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix);
int my_strnncollsp_mb_bin(CHARSET_INFO *cs,
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool diff_if_only_endspace_difference);
int my_wildcmp_mb_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many);
int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const char *s, const char *t);
void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, size_t len,ulong *nr1, ulong *nr2);
size_t my_strnxfrm_unicode(CHARSET_INFO *,
uchar *dst, size_t dstlen,
const uchar *src, size_t srclen);
int my_wildcmp_unicode(CHARSET_INFO *cs,
const char *str, const char *str_end,
const char *wildstr, const char *wildend,

11
mysql-test/include/ctype_datetime.inc

@ -0,0 +1,11 @@
#
# Bug#32390 Character sets: casting utf32 to/from date doesn't work
#
CREATE TABLE t1 AS SELECT repeat('a',20) AS s1 LIMIT 0;
SET timestamp=1216359724;
INSERT INTO t1 VALUES (current_date);
INSERT INTO t1 VALUES (current_time);
INSERT INTO t1 VALUES (current_timestamp);
SELECT s1, hex(s1) FROM t1;
DROP TABLE t1;
SET timestamp=0;

50
mysql-test/include/ctype_like.inc

@ -0,0 +1,50 @@
select @@collation_connection;
#
# Create a table with a nullable varchar(10) column
# using currect character_set_connection.
create table t1 as select repeat(' ',10) as a union select null;
alter table t1 add key(a);
show create table t1;
insert into t1 values ("a"),("abc"),("abcd"),("hello"),("test");
explain select * from t1 where a like 'abc%';
explain select * from t1 where a like concat('abc','%');
select * from t1 where a like "abc%";
select * from t1 where a like concat("abc","%");
select * from t1 where a like "ABC%";
select * from t1 where a like "test%";
select * from t1 where a like "te_t";
select * from t1 where a like "%a%";
select * from t1 where a like "%abcd%";
select * from t1 where a like "%abc\d%";
drop table t1;
#
# Bug #2619 ucs2 LIKE comparison fails in some cases
#
select 'AA' like 'AA';
select 'AA' like 'A%A';
select 'AA' like 'A%%A';
select 'AA' like 'AA%';
select 'AA' like '%AA%';
select 'AA' like '%A';
select 'AA' like '%AA';
select 'AA' like 'A%A%';
select 'AA' like '_%_%';
select 'AA' like '%A%A';
select 'AAA'like 'A%A%A';
select 'AZ' like 'AZ';
select 'AZ' like 'A%Z';
select 'AZ' like 'A%%Z';
select 'AZ' like 'AZ%';
select 'AZ' like '%AZ%';
select 'AZ' like '%Z';
select 'AZ' like '%AZ';
select 'AZ' like 'A%Z%';
select 'AZ' like '_%_%';
select 'AZ' like '%A%Z';
select 'AZ' like 'A_';
select 'AZ' like '_Z';
select 'AMZ'like 'A%M%Z';

4
mysql-test/include/have_utf16.inc

@ -0,0 +1,4 @@
-- require r/have_utf16.require
disable_query_log;
show collation like 'utf16_general_ci';
enable_query_log;

4
mysql-test/include/have_utf32.inc

@ -0,0 +1,4 @@
-- require r/have_utf32.require
disable_query_log;
show collation like 'utf32_general_ci';
enable_query_log;

7
mysql-test/include/have_utf8mb4.inc

@ -0,0 +1,7 @@
--require r/have_utf8mb4.require
--disable_query_log
SHOW COLLATION LIKE 'utf8mb4_general_ci';
--enable_query_log

33
mysql-test/r/ctype_ldml.result

@ -53,6 +53,33 @@ select * from t1 where c1='b';
c1
a
drop table t1;
show collation like 'utf8mb4_test_ci';
Collation Charset Id Default Compiled Sortlen
utf8mb4_test_ci utf8mb4 326 8
create table t1 (c1 char(1) character set utf8mb4 collate utf8mb4_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
c1
a
drop table t1;
show collation like 'utf16_test_ci';
Collation Charset Id Default Compiled Sortlen
utf16_test_ci utf16 327 8
create table t1 (c1 char(1) character set utf16 collate utf16_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
c1
a
drop table t1;
show collation like 'utf32_test_ci';
Collation Charset Id Default Compiled Sortlen
utf32_test_ci utf32 391 8
create table t1 (c1 char(1) character set utf32 collate utf32_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
c1
a
drop table t1;
CREATE TABLE t1 (
col1 varchar(100) character set utf8 collate utf8_test_ci
);
@ -373,16 +400,22 @@ select "foo" = "foo " collate latin1_test;
The following tests check that two-byte collation IDs work
select * from information_schema.collations where id>256 order by id;
COLLATION_NAME CHARACTER_SET_NAME ID IS_DEFAULT IS_COMPILED SORTLEN
utf8mb4_test_ci utf8mb4 326 8
utf16_test_ci utf16 327 8
utf8_phone_ci utf8 352 8
utf8_test_ci utf8 353 8
ucs2_test_ci ucs2 358 8
ucs2_vn_ci ucs2 359 8
utf32_test_ci utf32 391 8
utf8_maxuserid_ci utf8 2047 8
show collation like '%test%';
Collation Charset Id Default Compiled Sortlen
latin1_test latin1 99 Yes 1
utf8_test_ci utf8 353 8
ucs2_test_ci ucs2 358 8
utf8mb4_test_ci utf8mb4 326 8
utf16_test_ci utf16 327 8
utf32_test_ci utf32 391 8
show collation like 'ucs2_vn_ci';
Collation Charset Id Default Compiled Sortlen
ucs2_vn_ci ucs2 359 8

56
mysql-test/r/ctype_many.result

@ -1683,3 +1683,59 @@ ARMENIAN CAPIT DA 2
ARMENIAN CAPIT ECH 2
ARMENIAN CAPIT ZA 2
DROP TABLE t1;
#
# WL#1213 Implement 4-byte UTF8, UTF16 and UTF32
# Testing that only utf8mb4 is superset for utf8
# No other Unicode character set pairs have superset/subset relations
#
CREATE TABLE t1 (
utf8 CHAR CHARACTER SET utf8,
utf8mb4 CHAR CHARACTER SET utf8mb4,
ucs2 CHAR CHARACTER SET ucs2,
utf16 CHAR CHARACTER SET utf16,
utf32 CHAR CHARACTER SET utf32
);
INSERT INTO t1 VALUES ('','','','','');
SELECT CHARSET(CONCAT(utf8, utf8mb4)) FROM t1;
CHARSET(CONCAT(utf8, utf8mb4))
utf8mb4
SELECT CHARSET(CONCAT(utf8, ucs2)) FROM t1;
ERROR HY000: Illegal mix of collations (utf8_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf8, utf16)) FROM t1;
ERROR HY000: Illegal mix of collations (utf8_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf8, utf32)) FROM t1;
ERROR HY000: Illegal mix of collations (utf8_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf8mb4, utf8)) FROM t1;
CHARSET(CONCAT(utf8mb4, utf8))
utf8mb4
SELECT CHARSET(CONCAT(utf8mb4, ucs2)) FROM t1;
ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf8mb4, utf16)) FROM t1;
ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf8mb4, utf32)) FROM t1;
ERROR HY000: Illegal mix of collations (utf8mb4_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(ucs2, utf8)) FROM t1;
ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf8_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(ucs2, utf8mb4)) FROM t1;
ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf8mb4_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(ucs2, utf16)) FROM t1;
ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(ucs2, utf32)) FROM t1;
ERROR HY000: Illegal mix of collations (ucs2_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf16, utf8)) FROM t1;
ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (utf8_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf16, ucs2)) FROM t1;
ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf16, utf8mb4)) FROM t1;
ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (utf8mb4_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf16, utf32)) FROM t1;
ERROR HY000: Illegal mix of collations (utf16_general_ci,IMPLICIT) and (utf32_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf32, utf8)) FROM t1;
ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf8_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf32, ucs2)) FROM t1;
ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (ucs2_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf32, utf8mb4)) FROM t1;
ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf8mb4_general_ci,IMPLICIT) for operation 'concat'
SELECT CHARSET(CONCAT(utf32, utf16)) FROM t1;
ERROR HY000: Illegal mix of collations (utf32_general_ci,IMPLICIT) and (utf16_general_ci,IMPLICIT) for operation 'concat'
DROP TABLE t1;

1038
mysql-test/r/ctype_utf16.result
File diff suppressed because it is too large
View File

2373
mysql-test/r/ctype_utf16_uca.result
File diff suppressed because it is too large
View File

1052
mysql-test/r/ctype_utf32.result
File diff suppressed because it is too large
View File

2373
mysql-test/r/ctype_utf32_uca.result
File diff suppressed because it is too large
View File

14
mysql-test/r/ctype_utf8.result

@ -1899,6 +1899,20 @@ CONVERT(a, CHAR) CONVERT(b, CHAR)
DROP TABLE t1;
End of 5.0 tests
Start of 5.4 tests
SET NAMES utf8mb3;
SHOW VARIABLES LIKE 'character_set_results%';
Variable_name Value
character_set_results utf8
CREATE TABLE t1 (a CHAR CHARACTER SET utf8mb3 COLLATE utf8mb3_bin);
SHOW CREATE TABLE t1;
Table Create Table
t1 CREATE TABLE `t1` (
`a` char(1) CHARACTER SET utf8 COLLATE utf8_bin DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1;
SELECT _utf8mb3'test';
test
test
CREATE TABLE t1 (
clipid INT NOT NULL,
Tape TINYTEXT,

2250
mysql-test/r/ctype_utf8mb4.result
File diff suppressed because it is too large
View File

2
mysql-test/r/have_utf16.require

@ -0,0 +1,2 @@
Collation Charset Id Default Compiled Sortlen
utf16_general_ci utf16 54 Yes Yes 1

2
mysql-test/r/have_utf32.require

@ -0,0 +1,2 @@
Collation Charset Id Default Compiled Sortlen
utf32_general_ci utf32 60 Yes Yes 1

2
mysql-test/r/have_utf8mb4.require

@ -0,0 +1,2 @@
Collation Charset Id Default Compiled Sortlen
utf8mb4_general_ci utf8mb4 45 Yes Yes 1

30
mysql-test/std_data/Index.xml

@ -33,6 +33,36 @@
</collation>
</charset>
<charset name="utf8mb4">
<collation name="utf8mb4_test_ci" id="326">
<rules>
<reset>a</reset>
<s>b</s>
</rules>
</collation>
</charset>
<charset name="utf16">
<collation name="utf16_test_ci" id="327">
<rules>
<reset>a</reset>
<s>b</s>
</rules>
</collation>
</charset>
<charset name="utf32">
<collation name="utf32_test_ci" id="391">
<rules>
<reset>a</reset>
<s>b</s>
</rules>
</collation>
</charset>
<charset name="ucs2">
<collation name="ucs2_test_ci" id="358">
<rules>

10
mysql-test/suite/sys_vars/r/character_set_client_basic.result

@ -162,8 +162,16 @@ SET @@character_set_client = utf8;
SELECT @@character_set_client;
@@character_set_client
utf8
SET @@character_set_client = utf8mb4;
SELECT @@character_set_client;
@@character_set_client
utf8mb4
SET @@character_set_client = ucs2;
ERROR 42000: Variable 'character_set_client' can't be set to the value of 'ucs2'
SET @@character_set_client = utf16;
ERROR 42000: Variable 'character_set_client' can't be set to the value of 'utf16'
SET @@character_set_client = utf32;
ERROR 42000: Variable 'character_set_client' can't be set to the value of 'utf32'
SET @@character_set_client = cp866;
SELECT @@character_set_client;
@@character_set_client
@ -422,7 +430,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
36
39
'#--------------------FN_DYNVARS_010_10-------------------------#'
SET @@character_set_client = abc;
ERROR 42000: Unknown character set: 'abc'

2
mysql-test/suite/sys_vars/r/character_set_connection_basic.result

@ -424,7 +424,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
36
39
'#--------------------FN_DYNVARS_011_10-------------------------#'
SET @@character_set_connection = abc;
ERROR 42000: Unknown character set: 'abc'

2
mysql-test/suite/sys_vars/r/character_set_database_basic.result

@ -424,7 +424,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
36
39
'#--------------------FN_DYNVARS_012_10-------------------------#'
SET @@character_set_database = "grek";
ERROR 42000: Unknown character set: 'grek'

2
mysql-test/suite/sys_vars/r/character_set_filesystem_basic.result

@ -402,7 +402,7 @@ ERROR 42000: Unknown character set: '100'
SET @total_charset = (SELECT count(*) FROM INFORMATION_SCHEMA.CHARACTER_SETS);
SELECT @total_charset;
@total_charset
36
39
'#--------------------FN_DYNVARS_008_10-------------------------#'
SET @@character_set_filesystem = abc;
ERROR 42000: Unknown character set: 'abc'

BIN
mysql-test/suite/sys_vars/r/character_set_results_basic.result

9
mysql-test/suite/sys_vars/t/character_set_client_basic.test

@ -27,6 +27,9 @@
--source include/have_sjis.inc
--source include/have_utf8.inc
--source include/have_ucs2.inc
--source include/have_utf8mb4.inc
--source include/have_utf16.inc
--source include/have_utf32.inc
--source include/load_sysvars.inc
###################################################
@ -163,9 +166,15 @@ SET @@character_set_client = armscii8;
SELECT @@character_set_client;
SET @@character_set_client = utf8;
SELECT @@character_set_client;
SET @@character_set_client = utf8mb4;
SELECT @@character_set_client;
--error ER_WRONG_VALUE_FOR_VAR
SET @@character_set_client = ucs2;
--error ER_WRONG_VALUE_FOR_VAR
SET @@character_set_client = utf16;
--error ER_WRONG_VALUE_FOR_VAR
SET @@character_set_client = utf32;
SET @@character_set_client = cp866;
SELECT @@character_set_client;

3
mysql-test/suite/sys_vars/t/character_set_connection_basic.test

@ -27,6 +27,9 @@
--source include/have_sjis.inc
--source include/have_utf8.inc
--source include/have_ucs2.inc
--source include/have_utf8mb4.inc
--source include/have_utf16.inc
--source include/have_utf32.inc
--source include/load_sysvars.inc
###################################################

3
mysql-test/suite/sys_vars/t/character_set_database_basic.test

@ -27,6 +27,9 @@
--source include/have_sjis.inc
--source include/have_utf8.inc
--source include/have_ucs2.inc
--source include/have_utf8mb4.inc
--source include/have_utf16.inc
--source include/have_utf32.inc
--source include/load_sysvars.inc
###################################################

3
mysql-test/suite/sys_vars/t/character_set_filesystem_basic.test

@ -27,6 +27,9 @@
--source include/have_sjis.inc
--source include/have_utf8.inc
--source include/have_ucs2.inc
--source include/have_utf8mb4.inc
--source include/have_utf16.inc
--source include/have_utf32.inc
--source include/load_sysvars.inc

3
mysql-test/suite/sys_vars/t/character_set_results_basic.test

@ -27,6 +27,9 @@
--source include/have_sjis.inc
--source include/have_utf8.inc
--source include/have_ucs2.inc
--source include/have_utf8mb4.inc
--source include/have_utf16.inc
--source include/have_utf32.inc
--source include/load_sysvars.inc
################################################

21
mysql-test/t/ctype_ldml.test

@ -1,4 +1,7 @@
--source include/have_ucs2.inc
--source include/have_utf8mb4.inc
--source include/have_utf16.inc
--source include/have_utf32.inc
--disable_warnings
drop table if exists t1;
@ -40,6 +43,24 @@ insert into t1 values ('a');
select * from t1 where c1='b';
drop table t1;
show collation like 'utf8mb4_test_ci';
create table t1 (c1 char(1) character set utf8mb4 collate utf8mb4_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
drop table t1;
show collation like 'utf16_test_ci';
create table t1 (c1 char(1) character set utf16 collate utf16_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
drop table t1;
show collation like 'utf32_test_ci';
create table t1 (c1 char(1) character set utf32 collate utf32_test_ci);
insert into t1 values ('a');
select * from t1 where c1='b';
drop table t1;
#
# Bug#41084 full-text index added to custom UCA collation not working

73
mysql-test/t/ctype_many.test

@ -1,4 +1,7 @@
-- source include/have_ucs2.inc
-- source include/have_utf8mb4.inc
-- source include/have_utf16.inc
-- source include/have_utf32.inc
--disable_warnings
DROP TABLE IF EXISTS t1;
@ -211,3 +214,73 @@ SELECT min(comment),count(*) FROM t1 GROUP BY ucs2_f;
DROP TABLE t1;
# End of 4.1 tests
--echo #
--echo # WL#1213 Implement 4-byte UTF8, UTF16 and UTF32
--echo # Testing that only utf8mb4 is superset for utf8
--echo # No other Unicode character set pairs have superset/subset relations
--echo #
CREATE TABLE t1 (
utf8 CHAR CHARACTER SET utf8,
utf8mb4 CHAR CHARACTER SET utf8mb4,
ucs2 CHAR CHARACTER SET ucs2,
utf16 CHAR CHARACTER SET utf16,
utf32 CHAR CHARACTER SET utf32
);
INSERT INTO t1 VALUES ('','','','','');
# utf8mb4 is superset only for utf8
SELECT CHARSET(CONCAT(utf8, utf8mb4)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf8, ucs2)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf8, utf16)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf8, utf32)) FROM t1;
# utf8mb4 is superset only for utf8
SELECT CHARSET(CONCAT(utf8mb4, utf8)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf8mb4, ucs2)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf8mb4, utf16)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf8mb4, utf32)) FROM t1;
# ucs2 is not a superset for the other Unicode character sets
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(ucs2, utf8)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(ucs2, utf8mb4)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(ucs2, utf16)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(ucs2, utf32)) FROM t1;
# utf16 is not a superset for the other Unicode character sets
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf16, utf8)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf16, ucs2)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf16, utf8mb4)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf16, utf32)) FROM t1;
# utf32 is not a superset for the other Unicode character sets
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf32, utf8)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf32, ucs2)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf32, utf8mb4)) FROM t1;
--error ER_CANT_AGGREGATE_2COLLATIONS
SELECT CHARSET(CONCAT(utf32, utf16)) FROM t1;
DROP TABLE t1;

731
mysql-test/t/ctype_utf16.test

@ -0,0 +1,731 @@
-- source include/have_utf16.inc
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
--echo #
--echo # Start of 5.5 tests
--echo #
SET NAMES latin1;
SET character_set_connection=utf16;
select hex('a'), hex('a ');
-- source include/endspace.inc
# Check that incomplete utf16 characters in HEX notation
# are left-padded with zeros
#
select hex(_utf16 0x44);
select hex(_utf16 0x3344);
select hex(_utf16 0x113344);
# Check that 0x20 is only trimmed when it is
# a part of real SPACE character, not just a part
# of a multibyte sequence.
# Note, CYRILLIC LETTER ER is used as an example, which
# is stored as 0x0420 in utf16, thus contains 0x20 in the
# low byte. The second character is THREE-PER-M, U+2004,
# which contains 0x20 in the high byte.
CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf16;
INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004');
SELECT hex(word) FROM t1 ORDER BY word;
SELECT hex(word2) FROM t1 ORDER BY word2;
DELETE FROM t1;
#
# Check that real spaces are correctly trimmed.
#
INSERT INTO t1 VALUES (X'042000200020',X'042000200020'), (X'200400200020', X'200400200020');
SELECT hex(word) FROM t1 ORDER BY word;
SELECT hex(word2) FROM t1 ORDER BY word2;
DROP TABLE t1;
#
# Check LPAD/RPAD
#
SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'0421'));
SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'04210422'));
SELECT hex(LPAD(_utf16 X'0420',10,_utf16 X'042104220423'));
SELECT hex(LPAD(_utf16 X'0420042104220423042404250426042704280429042A042B',10,_utf16 X'042104220423'));
SELECT hex(LPAD(_utf16 X'D800DC00', 10, _utf16 X'0421'));
SELECT hex(LPAD(_utf16 X'0421', 10, _utf16 X'D800DC00'));
SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'0421'));
SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'04210422'));
SELECT hex(RPAD(_utf16 X'0420',10,_utf16 X'042104220423'));
SELECT hex(RPAD(_utf16 X'0420042104220423042404250426042704280429042A042B',10,_utf16 X'042104220423'));
SELECT hex(RPAD(_utf16 X'D800DC00', 10, _utf16 X'0421'));
SELECT hex(RPAD(_utf16 X'0421', 10, _utf16 X'D800DC00'));
CREATE TABLE t1 SELECT
LPAD(_utf16 X'0420',10,_utf16 X'0421') l,
RPAD(_utf16 X'0420',10,_utf16 X'0421') r;
SHOW CREATE TABLE t1;
select hex(l), hex(r) from t1;
DROP TABLE t1;
create table t1 (f1 char(30));
insert into t1 values ("103000"), ("22720000"), ("3401200"), ("78000");
select lpad(f1, 12, "-o-/") from t1;
drop table t1;
######################################################
#
# Test of like
#
SET NAMES latin1;
SET character_set_connection=utf16;
--source include/ctype_like.inc
SET NAMES utf8;
SET character_set_connection=utf16;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf16);
INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывА'),('ФЫВА');
INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж');
INSERT INTO t1 VALUES ('фывАпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж');
INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВАПРОЛДЖ');
SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a;
SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a;
SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a;
SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf16_bin ORDER BY BINARY a;
DROP TABLE t1;
CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
ENGINE=MyISAM CHARACTER SET utf16;
INSERT INTO t1 (word) VALUES ("cat");
SELECT * FROM t1 WHERE word LIKE "c%";
SELECT * FROM t1 WHERE word LIKE "ca_";
SELECT * FROM t1 WHERE word LIKE "cat";
SELECT * FROM t1 WHERE word LIKE _utf16 x'00630025'; # "c%"
SELECT * FROM t1 WHERE word LIKE _utf16 x'00630061005F'; # "ca_"
DROP TABLE t1;
#
# Check that INSERT() works fine.
# This invokes charpos() function.
select insert(_utf16 0x006100620063,10,2,_utf16 0x006400650066);
select insert(_utf16 0x006100620063,1,2,_utf16 0x006400650066);
########################################################
#
# Bug 1264
#
# Description:
#
# When using a ucs2 table in MySQL,
# either with ucs2_general_ci or ucs2_bin collation,
# words are returned in an incorrect order when using ORDER BY
# on an _indexed_ CHAR or VARCHAR column. They are sorted with
# the longest word *first* instead of last. I.E. The word "aardvark"
# is in the results before the word "a".
#
# If there is no index for the column, the problem does not occur.
#
# Interestingly, if there is no second column, the words are returned
# in the correct order.
#
# According to EXPLAIN, it looks like when the output includes columns that
# are not part of the index sorted on, it does a filesort, which fails.
# Using a straight index yields correct results.
SET NAMES latin1;
#
# Two fields, index
#
CREATE TABLE t1 (
word VARCHAR(64),
bar INT(11) default 0,
PRIMARY KEY (word))
ENGINE=MyISAM
CHARSET utf16
COLLATE utf16_general_ci ;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a");
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY word;
SELECT * FROM t1 ORDER BY word;
EXPLAIN SELECT word FROM t1 ORDER BY word;
SELECT word FROM t1 ORDER by word;
DROP TABLE t1;
#
# One field, index
#
CREATE TABLE t1 (
word VARCHAR(64) ,
PRIMARY KEY (word))
ENGINE=MyISAM
CHARSET utf16
COLLATE utf16_general_ci;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a");
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY WORD;
SELECT * FROM t1 ORDER BY word;
DROP TABLE t1;
#
# Two fields, no index
#
CREATE TABLE t1 (
word TEXT,
bar INT(11) AUTO_INCREMENT,
PRIMARY KEY (bar))
ENGINE=MyISAM
CHARSET utf16
COLLATE utf16_general_ci ;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a" );
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY word;
SELECT * FROM t1 ORDER BY word;
EXPLAIN SELECT word FROM t1 ORDER BY word;
SELECT word FROM t1 ORDER BY word;
DROP TABLE t1;
#
# END OF Bug 1264 test
#
########################################################
#
# Check alignment for from-binary-conversion with CAST and CONVERT
#
SELECT hex(cast(0xAA as char character set utf16));
SELECT hex(convert(0xAA using utf16));
#
# Check alignment for string types
#
CREATE TABLE t1 (a char(10) character set utf16);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a varchar(10) character set utf16);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a text character set utf16);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a mediumtext character set utf16);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a longtext character set utf16);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
##
## Bug #5024 Server crashes with queries on fields
## with certain charset/collation settings
##
##
#create table t1 (s1 char character set utf16 collate utf16_czech_ci);
#insert into t1 values ('0'),('1'),('2'),('a'),('b'),('c');
#select s1 from t1 where s1 > 'a' order by s1;
#drop table t1;
#
#
# Bug #5081 : UCS2 fields are filled with '0x2020'
# after extending field length
#
create table t1(a char(1)) default charset utf16;
insert into t1 values ('a'),('b'),('c');
alter table t1 modify a char(5);
select a, hex(a) from t1;
drop table t1;
#
# Check prepare statement from an UTF16 string
#
set @ivar= 1234;
set @str1 = 'select ?';
set @str2 = convert(@str1 using utf16);
prepare stmt1 from @str2;
execute stmt1 using @ivar;
#
# Check that utf16 works with ENUM and SET type
#
set names utf8;
create table t1 (a enum('x','y','z') character set utf16);
show create table t1;
insert into t1 values ('x');
insert into t1 values ('y');
insert into t1 values ('z');
select a, hex(a) from t1 order by a;
alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf16;
show create table t1;
insert into t1 values ('D');
insert into t1 values ('E ');
insert into t1 values ('ä');
insert into t1 values ('ö');
insert into t1 values ('ü');
select a, hex(a) from t1 order by a;
drop table t1;
create table t1 (a set ('x','y','z','ä','ö','ü') character set utf16);
show create table t1;
insert into t1 values ('x');
insert into t1 values ('y');
insert into t1 values ('z');
insert into t1 values ('x,y');
insert into t1 values ('x,y,z,ä,ö,ü');
select a, hex(a) from t1 order by a;
drop table t1;
#
# Bug#7302 UCS2 data in ENUM fields get truncated when new column is added
#
create table t1(a enum('a','b','c')) default character set utf16;
insert into t1 values('a'),('b'),('c');
alter table t1 add b char(1);
show warnings;
select * from t1 order by a;
drop table t1;
SET NAMES latin1;
SET collation_connection='utf16_general_ci';
-- source include/ctype_filesort.inc
-- source include/ctype_like_escape.inc
SET NAMES latin1;
SET collation_connection='utf16_bin';
-- source include/ctype_filesort.inc
-- source include/ctype_like_escape.inc
#
# Bug#10344 Some string functions fail for UCS2
#
select hex(substr(_utf16 0x00e400e50068,1));
select hex(substr(_utf16 0x00e400e50068,2));
select hex(substr(_utf16 0x00e400e50068,3));
select hex(substr(_utf16 0x00e400e50068,-1));
select hex(substr(_utf16 0x00e400e50068,-2));
select hex(substr(_utf16 0x00e400e50068,-3));
select hex(substr(_utf16 0x00e400e5D800DC00,1));
select hex(substr(_utf16 0x00e400e5D800DC00,2));
select hex(substr(_utf16 0x00e400e5D800DC00,3));
select hex(substr(_utf16 0x00e400e5D800DC00,-1));
select hex(substr(_utf16 0x00e400e5D800DC00,-2));
select hex(substr(_utf16 0x00e400e5D800DC00,-3));
SET NAMES latin1;
##
## Bug#8235
##
## This bug also helped to find another problem that
## INSERT of a UCS2 string containing a negative number
## into a unsigned int column didn't produce warnings.
## This test covers both problems.
##
##SET collation_connection='ucs2_swedish_ci';
##CREATE TABLE t1 (Field1 int(10) default '0');
### no warnings, negative numbers are allowed
##INSERT INTO t1 VALUES ('-1');
##SELECT * FROM t1;
##DROP TABLE t1;
##CREATE TABLE t1 (Field1 int(10) unsigned default '0');
### this should generate a "Data truncated" warning
##INSERT INTO t1 VALUES ('-1');
##DROP TABLE t1;
##SET NAMES latin1;
###
### Bug #14583 Bug on query using a LIKE on indexed field with ucs2_bin collation
###
##--disable_warnings
##create table t1(f1 varchar(5) CHARACTER SET utf16 COLLATE utf16_bin NOT NULL) engine=InnoDB;
##--enable_warnings
##insert into t1 values('a');
##create index t1f1 on t1(f1);
##select f1 from t1 where f1 like 'a%';
##drop table t1;
#
# Bug#9442 Set parameter make query fail if column character set is UCS2
#
create table t1 (utext varchar(20) character set utf16);
insert into t1 values ("lily");
insert into t1 values ("river");
prepare stmt from 'select utext from t1 where utext like ?';
set @param1='%%';
execute stmt using @param1;
execute stmt using @param1;
select utext from t1 where utext like '%%';
drop table t1;
deallocate prepare stmt;
#
# Bug#22052 Trailing spaces are not removed from UNICODE fields in an index
#
create table t1 (
a char(10) character set utf16 not null,
index a (a)
) engine=myisam;
insert into t1 values (repeat(0x201f, 10));
insert into t1 values (repeat(0x2020, 10));
insert into t1 values (repeat(0x2021, 10));
# make sure "index read" is used
explain select hex(a) from t1 order by a;
select hex(a) from t1 order by a;
alter table t1 drop index a;
select hex(a) from t1 order by a;
drop table t1;
##
## Bug #20076: server crashes for a query with GROUP BY if MIN/MAX aggregation
## over a 'ucs2' field uses a temporary table
##
##CREATE TABLE t1 (id int, s char(5) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci);
##INSERT INTO t1 VALUES (1, 'ZZZZZ'), (1, 'ZZZ'), (2, 'ZZZ'), (2, 'ZZZZZ');
##SELECT id, MIN(s) FROM t1 GROUP BY id;
##DROP TABLE t1;
###
### Bug #20536: md5() with GROUP BY and UCS2 return different results on myisam/innodb
###
##
##--disable_warnings
##drop table if exists bug20536;
##--enable_warnings
##
##set names latin1;
##create table bug20536 (id bigint not null auto_increment primary key, name
##varchar(255) character set ucs2 not null);
##insert into `bug20536` (`id`,`name`) values (1, _latin1 x'7465737431'), (2, "'test\\_2'");
##select md5(name) from bug20536;
##select sha1(name) from bug20536;
##select make_set(3, name, upper(name)) from bug20536;
##select export_set(5, name, upper(name)) from bug20536;
##select export_set(5, name, upper(name), ",", 5) from bug20536;
#
# Bug #20108: corrupted default enum value for a ucs2 field
#
CREATE TABLE t1 (
status enum('active','passive') character set utf16 collate utf16_general_ci
NOT NULL default 'passive'
);
SHOW CREATE TABLE t1;
ALTER TABLE t1 ADD a int NOT NULL AFTER status;
SHOW CREATE TABLE t1;
DROP TABLE t1;
##CREATE TABLE t2 (
## status enum('active','passive') collate ucs2_turkish_ci
## NOT NULL default 'passive'
##);
##SHOW CREATE TABLE t2;
##ALTER TABLE t2 ADD a int NOT NULL AFTER status;
##DROP TABLE t2;
--echo End of 4.1 tests
#
# Conversion from an UTF16 string to a decimal column
#
CREATE TABLE t1 (a varchar(64) character set utf16, b decimal(10,3));
INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0);
update t1 set b=a;
SELECT *, hex(a) FROM t1;
DROP TABLE t1;
#
# Bug#9442 Set parameter make query fail if column character set is UCS2
#
create table t1 (utext varchar(20) character set utf16);
insert into t1 values ("lily");
insert into t1 values ("river");
prepare stmt from 'select utext from t1 where utext like ?';
set @param1='%%';
execute stmt using @param1;
execute stmt using @param1;
select utext from t1 where utext like '%%';
drop table t1;
deallocate prepare stmt;
#
# Bug#22638 SOUNDEX broken for international characters
#
set names latin1;
set character_set_connection=utf16;
select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb');
select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb'));
select 'mood' sounds like 'mud';
# Cyrillic A, BE, VE
select hex(soundex(_utf16 0x041004110412));
# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter
select hex(soundex(_utf16 0x00BF00C0));
set names latin1;
#
# Bug #14290: character_maximum_length for text fields
#
create table t1(a blob, b text charset utf16);
select data_type, character_octet_length, character_maximum_length
from information_schema.columns where table_name='t1';
drop table t1;
set names latin1;
set collation_connection=utf16_general_ci;
#
# Testing cs->coll->instr()
#
select position('bb' in 'abba');
#
# Testing cs->coll->hash_sort()
#
create table t1 (a varchar(10) character set utf16) engine=heap;
insert into t1 values ('a'),('A'),('b'),('B');
select * from t1 where a='a' order by binary a;
select hex(min(binary a)),count(*) from t1 group by a;
drop table t1;
#
# Testing cs->cset->numchars()
#
select char_length('abcd'), octet_length('abcd');
select char_length(_utf16 0xD800DC00), octet_length(_utf16 0xD800DC00);
select char_length(_utf16 0xD87FDFFF), octet_length(_utf16 0xD87FDFFF);
#
# Testing cs->cset->charpos()
#
select left('abcd',2);
select hex(left(_utf16 0xD800DC00D87FDFFF, 1));
select hex(right(_utf16 0xD800DC00D87FDFFF, 1));
#
# Testing cs->cset->well_formed_length()
#
create table t1 (a varchar(10) character set utf16);
# Bad sequences
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf16 0xD800);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf16 0xDC00);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf16 0xD800D800);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf16 0xD800E800);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf16 0xD8000800);
# Good sequences
insert into t1 values (_utf16 0xD800DC00);
insert into t1 values (_utf16 0xD800DCFF);
insert into t1 values (_utf16 0xDBFFDC00);
insert into t1 values (_utf16 0xDBFFDCFF);
select hex(a) from t1;
drop table t1;
#
# Bug#32393 Character sets: illegal characters in utf16 columns
#
# Tests that cs->cset->wc_mb() doesn't accept surrogate parts
#
# via alter
#
create table t1 (s1 varchar(50) character set ucs2);
insert into t1 values (0xdf84);
alter table t1 modify column s1 varchar(50) character set utf16;
select hex(s1) from t1;
drop table t1;
#
# via update
#
create table t1 (s1 varchar(5) character set ucs2, s2 varchar(5) character set utf16);
insert into t1 (s1) values (0xdf84);
update t1 set s2 = s1;
select hex(s2) from t1;
drop table t1;
#
# Testing cs->cset->lengthsp()
#
create table t1 (a char(10)) character set utf16;
insert into t1 values ('a ');
select hex(a) from t1;
drop table t1;
#
# Testing cs->cset->caseup() and cs->cset->casedn()
#
select upper('abcd'), lower('ABCD');
#
# TODO: str_to_datetime() is broken and doesn't work with ucs2 and utf16
# Testing cs->cset->snprintf()
#
#create table t1 (a date);
#insert into t1 values ('2007-09-16');
#select * from t1;
#drop table t1;
#
# Testing cs->cset->l10tostr
# !!! Not used in the code
#
# Testing cs->cset->ll10tostr
#
create table t1 (a varchar(10) character set utf16);
insert into t1 values (123456);
select a, hex(a) from t1;
drop table t1;
# Testing cs->cset->fill
# SOUNDEX fills strings with DIGIT ZERO up to four characters
select hex(soundex('a'));
#
# Testing cs->cset->strntol
# !!! Not used in the code
#
# Testing cs->cset->strntoul
#
create table t1 (a enum ('a','b','c')) character set utf16;
insert into t1 values ('1');
select * from t1;
drop table t1;
#
# Testing cs->cset->strntoll and cs->cset->strntoull
#
set names latin1;
select hex(conv(convert('123' using utf16), -10, 16));
select hex(conv(convert('123' using utf16), 10, 16));
#
# Testing cs->cset->strntod
#
set names latin1;
set character_set_connection=utf16;
select 1.1 + '1.2';
select 1.1 + '1.2xxx';
# Testing strntoll10_utf16
# Testing cs->cset->strtoll10
select left('aaa','1');
#
# Testing cs->cset->strntoull10rnd
#
create table t1 (a int);
insert into t1 values ('-1234.1e2');
insert into t1 values ('-1234.1e2xxxx');
insert into t1 values ('-1234.1e2 ');
select * from t1;
drop table t1;
#
# Testing cs->cset->scan
#
create table t1 (a int);
insert into t1 values ('1 ');
insert into t1 values ('1 x');
select * from t1;
drop table t1;
#
# Testing auto-conversion to TEXT
#
create table t1 (a varchar(17000) character set utf16);
show create table t1;
drop table t1;
#
# Testing that maximim possible key length is 1000 bytes
#
create table t1 (a varchar(250) character set utf16 primary key);
show create table t1;
drop table t1;
--error ER_TOO_LONG_KEY
create table t1 (a varchar(334) character set utf16 primary key);
#
# Conversion to utf8
#
create table t1 (a char(1) character set utf16);
insert into t1 values (0xD800DC00),(0xD800DCFF),(0xDB7FDC00),(0xDB7FDCFF);
insert into t1 values (0x00C0), (0x00FF),(0xE000), (0xFFFF);
select hex(a), hex(@a:=convert(a using utf8mb4)), hex(convert(@a using utf16)) from t1;
drop table t1;
#
# Test basic regex functionality
#
set collation_connection=utf16_general_ci;
--source include/ctype_regex.inc
set names latin1;
#
# Test how character set works with date/time
#
SET collation_connection=utf16_general_ci;
--source include/ctype_datetime.inc
SET NAMES latin1;
#
# Bug#33073 Character sets: ordering fails with utf32
#
SET collation_connection=utf16_general_ci;
CREATE TABLE t1 AS SELECT repeat('a',2) as s1 LIMIT 0;
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE');
SELECT * FROM t1 ORDER BY s1;
SET max_sort_length=4;
SELECT * FROM t1 ORDER BY s1;
DROP TABLE t1;
SET max_sort_length=DEFAULT;
SET NAMES latin1;
#
## TODO: add tests for all engines
#
--echo #
--echo # End of 5.5 tests
--echo #

290
mysql-test/t/ctype_utf16_uca.test

@ -0,0 +1,290 @@
-- source include/have_utf16.inc
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
--echo #
--echo # Start of 5.5 tests
--echo #
set names utf8;
set collation_connection=utf16_unicode_ci;
select hex('a'), hex('a ');
-- source include/endspace.inc
#
# Bug #6787 LIKE not working properly with _ and utf8 data
#
select 'c' like '\_' as want0;
#
# Bug #5679 utf8_unicode_ci LIKE--trailing % doesn't equal zero characters
#
CREATE TABLE t (
c char(20) NOT NULL
) ENGINE=MyISAM DEFAULT CHARACTER SET utf16 COLLATE utf16_unicode_ci;
INSERT INTO t VALUES ('a'),('ab'),('aba');
ALTER TABLE t ADD INDEX (c);
SELECT c FROM t WHERE c LIKE 'a%';
DROP TABLE t;
create table t1 (c1 char(10) character set utf16 collate utf16_bin);
#
# Basic Latin
#
insert into t1 values ('A'),('a');
insert into t1 values ('B'),('b');
insert into t1 values ('C'),('c');
insert into t1 values ('D'),('d');
insert into t1 values ('E'),('e');
insert into t1 values ('F'),('f');
insert into t1 values ('G'),('g');
insert into t1 values ('H'),('h');
insert into t1 values ('I'),('i');
insert into t1 values ('J'),('j');
insert into t1 values ('K'),('k');
insert into t1 values ('L'),('l');
insert into t1 values ('M'),('m');
insert into t1 values ('N'),('n');
insert into t1 values ('O'),('o');
insert into t1 values ('P'),('p');
insert into t1 values ('Q'),('q');
insert into t1 values ('R'),('r');
insert into t1 values ('S'),('s');
insert into t1 values ('T'),('t');
insert into t1 values ('U'),('u');
insert into t1 values ('V'),('v');
insert into t1 values ('W'),('w');
insert into t1 values ('X'),('x');
insert into t1 values ('Y'),('y');
insert into t1 values ('Z'),('z');
#
# Latin1 suppliment
#
insert into t1 values (0x00e0),(0x00c0);
insert into t1 values (0x00e1),(0x00c1);
insert into t1 values (0x00e2),(0x00c2);
insert into t1 values (0x00e3),(0x00c3);
insert into t1 values (0x00e4),(0x00c4);
insert into t1 values (0x00e5),(0x00c5);
insert into t1 values (0x00e6),(0x00c6);
insert into t1 values (0x00e7),(0x00c7);
insert into t1 values (0x00e8),(0x00c8);
insert into t1 values (0x00e9),(0x00c9);
insert into t1 values (0x00ea),(0x00ca);
insert into t1 values (0x00eb),(0x00cb);
insert into t1 values (0x00ec),(0x00cc);
insert into t1 values (0x00ed),(0x00cd);
insert into t1 values (0x00ee),(0x00ce);
insert into t1 values (0x00ef),(0x00cf);
insert into t1 values (0x00f0),(0x00d0);
insert into t1 values (0x00f1),(0x00d1);
insert into t1 values (0x00f2),(0x00d2);
insert into t1 values (0x00f3),(0x00d3);
insert into t1 values (0x00f4),(0x00d4);
insert into t1 values (0x00f5),(0x00d5);
insert into t1 values (0x00f6),(0x00d6);
insert into t1 values (0x00f7),(0x00d7);
insert into t1 values (0x00f8),(0x00d8);
insert into t1 values (0x00f9),(0x00d9);
insert into t1 values (0x00fa),(0x00da);
insert into t1 values (0x00fb),(0x00db);
insert into t1 values (0x00fc),(0x00dc);
insert into t1 values (0x00fd),(0x00dd);
insert into t1 values (0x00fe),(0x00de);
insert into t1 values (0x00ff),(0x00df);
#
# Latin extended-A, 0100-017F
#
insert into t1 values (0x0100),(0x0101),(0x0102),(0x0103);
insert into t1 values (0x0104),(0x0105),(0x0106),(0x0107);
insert into t1 values (0x0108),(0x0109),(0x010a),(0x010b);
insert into t1 values (0x010c),(0x010d),(0x010e),(0x010f);
insert into t1 values (0x0110),(0x0111),(0x0112),(0x0113);
insert into t1 values (0x0114),(0x0115),(0x0116),(0x0117);
insert into t1 values (0x0118),(0x0119),(0x011a),(0x011b);
insert into t1 values (0x011c),(0x011d),(0x011e),(0x011f);
insert into t1 values (0x0120),(0x0121),(0x0122),(0x0123);
insert into t1 values (0x0124),(0x0125),(0x0126),(0x0127);
insert into t1 values (0x0128),(0x0129),(0x012a),(0x012b);
insert into t1 values (0x012c),(0x012d),(0x012e),(0x012f);
insert into t1 values (0x0130),(0x0131),(0x0132),(0x0133);
insert into t1 values (0x0134),(0x0135),(0x0136),(0x0137);
insert into t1 values (0x0138),(0x0139),(0x013a),(0x013b);
insert into t1 values (0x013c),(0x013d),(0x013e),(0x013f);
insert into t1 values (0x0140),(0x0141),(0x0142),(0x0143);
insert into t1 values (0x0144),(0x0145),(0x0146),(0x0147);
insert into t1 values (0x0148),(0x0149),(0x014a),(0x014b);
insert into t1 values (0x014c),(0x014d),(0x014e),(0x014f);
insert into t1 values (0x0150),(0x0151),(0x0152),(0x0153);
insert into t1 values (0x0154),(0x0155),(0x0156),(0x0157);
insert into t1 values (0x0158),(0x0159),(0x015a),(0x015b);
insert into t1 values (0x015c),(0x015d),(0x015e),(0x015f);
insert into t1 values (0x0160),(0x0161),(0x0162),(0x0163);
insert into t1 values (0x0164),(0x0165),(0x0166),(0x0167);
insert into t1 values (0x0168),(0x0169),(0x016a),(0x016b);
insert into t1 values (0x016c),(0x016d),(0x016e),(0x016f);
insert into t1 values (0x0170),(0x0171),(0x0172),(0x0173);
insert into t1 values (0x0174),(0x0175),(0x0176),(0x0177);
insert into t1 values (0x0178),(0x0179),(0x017a),(0x017b);
insert into t1 values (0x017c),(0x017d),(0x017e),(0x017f);
#
# Latin extended-B, 0180-024F
#
insert into t1 values (0x0180),(0x0181),(0x0182),(0x0183);
insert into t1 values (0x0184),(0x0185),(0x0186),(0x0187);
insert into t1 values (0x0188),(0x0189),(0x018a),(0x018b);
insert into t1 values (0x018c),(0x018d),(0x018e),(0x018f);
insert into t1 values (0x0190),(0x0191),(0x0192),(0x0193);
insert into t1 values (0x0194),(0x0195),(0x0196),(0x0197);
insert into t1 values (0x0198),(0x0199),(0x019a),(0x019b);
insert into t1 values (0x019c),(0x019d),(0x019e),(0x019f);
insert into t1 values (0x01a0),(0x01a1),(0x01a2),(0x01a3);
insert into t1 values (0x01a4),(0x01a5),(0x01a6),(0x01a7);
insert into t1 values (0x01a8),(0x01a9),(0x01aa),(0x01ab);
insert into t1 values (0x01ac),(0x01ad),(0x01ae),(0x01af);
insert into t1 values (0x01b0),(0x01b1),(0x01b2),(0x01b3);
insert into t1 values (0x01b4),(0x01b5),(0x01b6),(0x01b7);
insert into t1 values (0x01b8),(0x01b9),(0x01ba),(0x01bb);
insert into t1 values (0x01bc),(0x01bd),(0x01be),(0x01bf);
insert into t1 values (0x01c0),(0x01c1),(0x01c2),(0x01c3);
insert into t1 values (0x01c4),(0x01c5),(0x01c6),(0x01c7);
insert into t1 values (0x01c8),(0x01c9),(0x01ca),(0x01cb);
insert into t1 values (0x01cc),(0x01cd),(0x01ce),(0x01cf);
insert into t1 values (0x01d0),(0x01d1),(0x01d2),(0x01d3);
insert into t1 values (0x01d4),(0x01d5),(0x01d6),(0x01d7);
insert into t1 values (0x01d8),(0x01d9),(0x01da),(0x01db);
insert into t1 values (0x01dc),(0x01dd),(0x01de),(0x01df);
insert into t1 values (0x01e0),(0x01e1),(0x01e2),(0x01e3);
insert into t1 values (0x01e4),(0x01e5),(0x01e6),(0x01e7);
insert into t1 values (0x01e8),(0x01e9),(0x01ea),(0x01eb);
insert into t1 values (0x01ec),(0x01ed),(0x01ee),(0x01ef);
insert into t1 values (0x01f0),(0x01f1),(0x01f2),(0x01f3);
insert into t1 values (0x01f4),(0x01f5),(0x01f6),(0x01f7);
insert into t1 values (0x01f8),(0x01f9),(0x01fa),(0x01fb);
insert into t1 values (0x01fc),(0x01fd),(0x01fe),(0x01ff);
insert into t1 values ('AA'),('Aa'),('aa'),('aA');
insert into t1 values ('CH'),('Ch'),('ch'),('cH');
insert into t1 values ('DZ'),('Dz'),('dz'),('dZ');
insert into t1 values ('IJ'),('Ij'),('ij'),('iJ');
insert into t1 values ('LJ'),('Lj'),('lj'),('lJ');
insert into t1 values ('LL'),('Ll'),('ll'),('lL');
insert into t1 values ('NJ'),('Nj'),('nj'),('nJ');
insert into t1 values ('OE'),('Oe'),('oe'),('oE');
insert into t1 values ('SS'),('Ss'),('ss'),('sS');
insert into t1 values ('RR'),('Rr'),('rr'),('rR');
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_unicode_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_icelandic_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_latvian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_romanian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_slovenian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_polish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_estonian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_spanish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_swedish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_turkish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_czech_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_danish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_lithuanian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_slovak_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_spanish2_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_roman_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_esperanto_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf16_hungarian_ci;
drop table t1;
#
# Bug#5324
#
SET NAMES utf8;
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_general_ci NOT NULL, INDEX (c));
INSERT INTO t1 VALUES (0x039C03C903B403B11F770308);
#Check one row
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_general_ci;
INSERT INTO t1 VALUES (0x039C03C903B4);
#Check two rows
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025
COLLATE utf16_general_ci ORDER BY c;
DROP TABLE t1;
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_unicode_ci NOT NULL, INDEX (c));
INSERT INTO t1 VALUES (0x039C03C903B403B11F770308);
#Check one row
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_unicode_ci;
INSERT INTO t1 VALUES (0x039C03C903B4);
#Check two rows
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025
COLLATE utf16_unicode_ci ORDER BY c;
DROP TABLE t1;
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf16 COLLATE utf16_unicode_ci NOT NULL, INDEX (c));
INSERT INTO t1 VALUES (0x039C03C903B403B11F770308);
#Check one row row
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025 COLLATE utf16_unicode_ci;
INSERT INTO t1 VALUES (0x039C03C903B4);
#Check two rows
SELECT * FROM t1 WHERE c LIKE _utf16 0x039C0025
COLLATE utf16_unicode_ci ORDER BY c;
DROP TABLE t1;
SET NAMES utf8;
SET @test_character_set='utf16';
SET @test_collation='utf16_swedish_ci';
-- source include/ctype_common.inc
SET collation_connection='utf16_unicode_ci';
-- source include/ctype_filesort.inc
-- source include/ctype_like_escape.inc
--echo End of 4.1 tests
#
# Check UPPER/LOWER changing length
#
# Result shorter than argument
CREATE TABLE t1 (id int, a varchar(30) character set utf16);
INSERT INTO t1 VALUES (1, 0x01310069), (2, 0x01310131);
INSERT INTO t1 VALUES (3, 0x00690069), (4, 0x01300049);
INSERT INTO t1 VALUES (5, 0x01300130), (6, 0x00490049);
SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
FROM t1 ORDER BY id;
ALTER TABLE t1 MODIFY a VARCHAR(30) character set utf16 collate utf16_turkish_ci;
SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
FROM t1 ORDER BY id;
DROP TABLE t1;
#
# Bug #27079 Crash while grouping empty ucs2 strings
#
CREATE TABLE t1 (
c1 text character set utf16 collate utf16_polish_ci NOT NULL
) ENGINE=MyISAM;
insert into t1 values (''),('a');
SELECT COUNT(*), c1 FROM t1 GROUP BY c1;
DROP TABLE IF EXISTS t1;
#
# Test basic regex functionality
#
set collation_connection=utf16_unicode_ci;
--source include/ctype_regex.inc
--echo #
--echo # End of 5.5 tests
--echo #

784
mysql-test/t/ctype_utf32.test

@ -0,0 +1,784 @@
-- source include/have_utf32.inc
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
--echo #
--echo # Start of 5.5 tests
--echo #
SET NAMES latin1;
SET character_set_connection=utf32;
select hex('a'), hex('a ');
-- source include/endspace.inc
#
# Check that incomplete utf32 characters in HEX notation
# are left-padded with zeros
#
select hex(_utf32 0x44);
select hex(_utf32 0x3344);
select hex(_utf32 0x103344);
select hex(_utf32 X'44');
select hex(_utf32 X'3344');
select hex(_utf32 X'103344');
#
# Check that 0x20 is only trimmed when it is
# a part of real SPACE character, not just a part
# of a multibyte sequence.
# Note, CYRILLIC LETTER ER is used as an example, which
# is stored as 0x0420 in UCS2, thus contains 0x20 in the
# low byte. The second character is THREE-PER-M, U+2004,
# which contains 0x20 in the high byte.
#
CREATE TABLE t1 (word VARCHAR(64), word2 CHAR(64)) CHARACTER SET utf32;
INSERT INTO t1 VALUES (_koi8r 0xF2, _koi8r 0xF2), (X'2004',X'2004');
SELECT hex(word) FROM t1 ORDER BY word;
SELECT hex(word2) FROM t1 ORDER BY word2;
DELETE FROM t1;
#
# Check that real spaces are correctly trimmed.
#
INSERT INTO t1 VALUES
(X'000004200000002000000020',X'000004200000002000000020'),
(X'000020040000002000000020',X'000020040000002000000020');
SELECT hex(word) FROM t1 ORDER BY word;
SELECT hex(word2) FROM t1 ORDER BY word2;
DROP TABLE t1;
#
# Check LPAD/RPAD
#
SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0421'));
SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'0000042100000422'));
SELECT hex(LPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423'));
SELECT hex(LPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423'));
SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0421'));
SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'0000042100000422'));
SELECT hex(RPAD(_utf32 X'0420',10,_utf32 X'000004210000042200000423'));
SELECT hex(RPAD(_utf32 X'000004200000042100000422000004230000042400000425000004260000042700000428000004290000042A0000042B',10,_utf32 X'000004210000042200000423'));
CREATE TABLE t1 SELECT
LPAD(_utf32 X'0420',10,_utf32 X'0421') l,
RPAD(_utf32 X'0420',10,_utf32 X'0421') r;
SHOW CREATE TABLE t1;
select hex(l), hex(r) from t1;
DROP TABLE t1;
create table t1 (f1 char(30));
insert into t1 values ("103000"), ("22720000"), ("3401200"), ("78000");
select lpad(f1, 12, "-o-/") from t1;
drop table t1;
######################################################
#
# Test of like
#
SET NAMES latin1;
SET character_set_connection=utf32;
--source include/ctype_like.inc
SET NAMES utf8;
SET character_set_connection=utf32;
CREATE TABLE t1 (a VARCHAR(10) CHARACTER SET utf32);
INSERT INTO t1 VALUES ('фыва'),('Фыва'),('фЫва'),('фыВа'),('фывА'),('ФЫВА');
INSERT INTO t1 VALUES ('фывапролдж'),('Фывапролдж'),('фЫвапролдж'),('фыВапролдж');
INSERT INTO t1 VALUES ('фывАпролдж'),('фываПролдж'),('фывапРолдж'),('фывапрОлдж');
INSERT INTO t1 VALUES ('фывапроЛдж'),('фывапролДж'),('фывапролдЖ'),('ФЫВАПРОЛДЖ');
SELECT * FROM t1 WHERE a LIKE '%фЫва%' ORDER BY BINARY a;
SELECT * FROM t1 WHERE a LIKE '%фЫв%' ORDER BY BINARY a;
SELECT * FROM t1 WHERE a LIKE 'фЫва%' ORDER BY BINARY a;
SELECT * FROM t1 WHERE a LIKE 'фЫва%' COLLATE utf32_bin ORDER BY BINARY a;
DROP TABLE t1;
CREATE TABLE t1 (word varchar(64) NOT NULL, PRIMARY KEY (word))
ENGINE=MyISAM CHARACTER SET utf32;
INSERT INTO t1 (word) VALUES ("cat");
SELECT * FROM t1 WHERE word LIKE "c%";
SELECT * FROM t1 WHERE word LIKE "ca_";
SELECT * FROM t1 WHERE word LIKE "cat";
SELECT * FROM t1 WHERE word LIKE _utf32 x'0000006300000025'; # "c%"
SELECT * FROM t1 WHERE word LIKE _utf32 x'00000063000000610000005F'; # "ca_"
DROP TABLE t1;
#
# Check that INSERT() works fine.
# This invokes charpos() function.
select insert(_utf32 0x000000610000006200000063,10,2,_utf32 0x000000640000006500000066);
select insert(_utf32 0x000000610000006200000063,1,2,_utf32 0x000000640000006500000066);
#######################################################
#
# Bug 1264
#
# Description:
#
# When using a ucs2 table in MySQL,
# either with ucs2_general_ci or ucs2_bin collation,
# words are returned in an incorrect order when using ORDER BY
# on an _indexed_ CHAR or VARCHAR column. They are sorted with
# the longest word *first* instead of last. I.E. The word "aardvark"
# is in the results before the word "a".
#
# If there is no index for the column, the problem does not occur.
#
# Interestingly, if there is no second column, the words are returned
# in the correct order.
#
# According to EXPLAIN, it looks like when the output includes columns that
# are not part of the index sorted on, it does a filesort, which fails.
# Using a straight index yields correct results.
SET NAMES latin1;
#
# Two fields, index
#
CREATE TABLE t1 (
word VARCHAR(64),
bar INT(11) default 0,
PRIMARY KEY (word))
ENGINE=MyISAM
CHARSET utf32
COLLATE utf32_general_ci ;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a");
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY word;
SELECT * FROM t1 ORDER BY word;
EXPLAIN SELECT word FROM t1 ORDER BY word;
SELECT word FROM t1 ORDER by word;
DROP TABLE t1;
#
# One field, index
#
CREATE TABLE t1 (
word VARCHAR(64) ,
PRIMARY KEY (word))
ENGINE=MyISAM
CHARSET utf32
COLLATE utf32_general_ci;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a");
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY WORD;
SELECT * FROM t1 ORDER BY word;
DROP TABLE t1;
#
# Two fields, no index
#
CREATE TABLE t1 (
word TEXT,
bar INT(11) AUTO_INCREMENT,
PRIMARY KEY (bar))
ENGINE=MyISAM
CHARSET utf32
COLLATE utf32_general_ci ;
INSERT INTO t1 (word) VALUES ("aar");
INSERT INTO t1 (word) VALUES ("a" );
INSERT INTO t1 (word) VALUES ("aardvar");
INSERT INTO t1 (word) VALUES ("aardvark");
INSERT INTO t1 (word) VALUES ("aardvara");
INSERT INTO t1 (word) VALUES ("aardvarz");
EXPLAIN SELECT * FROM t1 ORDER BY word;
SELECT * FROM t1 ORDER BY word;
EXPLAIN SELECT word FROM t1 ORDER BY word;
SELECT word FROM t1 ORDER BY word;
DROP TABLE t1;
#
# END OF Bug 1264 test
#
########################################################
#
# Check alignment for from-binary-conversion with CAST and CONVERT
#
SELECT hex(cast(0xAA as char character set utf32));
SELECT hex(convert(0xAA using utf32));
#
# Check alignment for string types
#
CREATE TABLE t1 (a char(10) character set utf32);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a varchar(10) character set utf32);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a text character set utf32);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a mediumtext character set utf32);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
CREATE TABLE t1 (a longtext character set utf32);
INSERT INTO t1 VALUES (0x1),(0x11),(0x111),(0x1111),(0x11111);
SELECT HEX(a) FROM t1;
DROP TABLE t1;
##
## Bug #5024 Server crashes with queries on fields
## with certain charset/collation settings
##
#
#create table t1 (s1 char character set `ucs2` collate `ucs2_czech_ci`);
#insert into t1 values ('0'),('1'),('2'),('a'),('b'),('c');
#select s1 from t1 where s1 > 'a' order by s1;
#drop table t1;
#
# Bug #5081 : UCS2 fields are filled with '0x2020'
# after extending field length
#
create table t1(a char(1)) default charset utf32;
insert into t1 values ('a'),('b'),('c');
alter table t1 modify a char(5);
select a, hex(a) from t1;
drop table t1;
#
# Check prepare statement from an UTF32 string
#
set @ivar= 1234;
set @str1 = 'select ?';
set @str2 = convert(@str1 using utf32);
prepare stmt1 from @str2;
execute stmt1 using @ivar;
#
# Check that utf32 works with ENUM and SET type
#
set names utf8;
create table t1 (a enum('x','y','z') character set utf32);
show create table t1;
insert into t1 values ('x');
insert into t1 values ('y');
insert into t1 values ('z');
select a, hex(a) from t1 order by a;
alter table t1 change a a enum('x','y','z','d','e','ä','ö','ü') character set utf32;
show create table t1;
insert into t1 values ('D');
insert into t1 values ('E ');
insert into t1 values ('ä');
insert into t1 values ('ö');
insert into t1 values ('ü');
select a, hex(a) from t1 order by a;
drop table t1;
create table t1 (a set ('x','y','z','ä','ö','ü') character set utf32);
show create table t1;
insert into t1 values ('x');
insert into t1 values ('y');
insert into t1 values ('z');
insert into t1 values ('x,y');
insert into t1 values ('x,y,z,ä,ö,ü');
select a, hex(a) from t1 order by a;
drop table t1;
#
# Bug#7302 UCS2 data in ENUM fields get truncated when new column is added
#
create table t1(a enum('a','b','c')) default character set utf32;
insert into t1 values('a'),('b'),('c');
alter table t1 add b char(1);
show warnings;
select * from t1 order by a;
drop table t1;
SET NAMES latin1;
SET collation_connection='utf32_general_ci';
-- source include/ctype_filesort.inc
-- source include/ctype_like_escape.inc
SET NAMES latin1;
SET collation_connection='utf32_bin';
-- source include/ctype_filesort.inc
-- source include/ctype_like_escape.inc
#
# Bug#10344 Some string functions fail for UCS2
#
select hex(substr(_utf32 0x000000e4000000e500000068,1));
select hex(substr(_utf32 0x000000e4000000e500000068,2));
select hex(substr(_utf32 0x000000e4000000e500000068,3));
select hex(substr(_utf32 0x000000e4000000e500000068,-1));
select hex(substr(_utf32 0x000000e4000000e500000068,-2));
select hex(substr(_utf32 0x000000e4000000e500000068,-3));
#SET NAMES latin1;
#
# Bug#8235
#
# This bug also helped to find another problem that
# INSERT of a UCS2 string containing a negative number
# into a unsigned int column didn't produce warnings.
# This test covers both problems.
#
#SET collation_connection='ucs2_swedish_ci';
#CREATE TABLE t1 (Field1 int(10) default '0');
## no warnings, negative numbers are allowed
#INSERT INTO t1 VALUES ('-1');
#SELECT * FROM t1;
#DROP TABLE t1;
#CREATE TABLE t1 (Field1 int(10) unsigned default '0');
## this should generate a "Data truncated" warning
#INSERT INTO t1 VALUES ('-1');
#DROP TABLE t1;
#SET NAMES latin1;
#
##
## Bug#18691 Converting number to UNICODE string returns invalid result
##
#SELECT CONVERT(103, CHAR(50) UNICODE);
#SELECT CONVERT(103.0, CHAR(50) UNICODE);
#SELECT CONVERT(-103, CHAR(50) UNICODE);
#SELECT CONVERT(-103.0, CHAR(50) UNICODE);
#
# Bug#9557 MyISAM utf8 table crash
#
CREATE TABLE t1 (
a varchar(250) NOT NULL default '',
KEY a (a)
) ENGINE=MyISAM DEFAULT CHARSET=utf32 COLLATE utf32_general_ci;
insert into t1 values (0x803d);
insert into t1 values (0x005b);
select hex(a) from t1;
drop table t1;
##
## Bug #14583 Bug on query using a LIKE on indexed field with ucs2_bin collation
##
#--disable_warnings
#create table t1(f1 varchar(5) CHARACTER SET utf32 COLLATE utf32_bin NOT NULL) engine=InnoDB;
#--enable_warnings
#insert into t1 values('a');
#create index t1f1 on t1(f1);
#select f1 from t1 where f1 like 'a%';
#drop table t1;
#
# Bug#9442 Set parameter make query fail if column character set is UCS2
#
create table t1 (utext varchar(20) character set utf32);
insert into t1 values ("lily");
insert into t1 values ("river");
prepare stmt from 'select utext from t1 where utext like ?';
set @param1='%%';
execute stmt using @param1;
execute stmt using @param1;
select utext from t1 where utext like '%%';
drop table t1;
deallocate prepare stmt;
#
# Bug#22052 Trailing spaces are not removed from UNICODE fields in an index
#
create table t1 (
a char(10) character set utf32 not null,
index a (a)
) engine=myisam;
insert into t1 values (repeat(0x0000201f, 10));
insert into t1 values (repeat(0x00002020, 10));
insert into t1 values (repeat(0x00002021, 10));
# make sure "index read" is used
explain select hex(a) from t1 order by a;
select hex(a) from t1 order by a;
alter table t1 drop index a;
select hex(a) from t1 order by a;
drop table t1;
#
# Bug #20076: server crashes for a query with GROUP BY if MIN/MAX aggregation
# over a 'ucs2' field uses a temporary table
#
#CREATE TABLE t1 (id int, s char(5) CHARACTER SET ucs2 COLLATE ucs2_unicode_ci);
#INSERT INTO t1 VALUES (1, 'ZZZZZ'), (1, 'ZZZ'), (2, 'ZZZ'), (2, 'ZZZZZ');
#SELECT id, MIN(s) FROM t1 GROUP BY id;
#DROP TABLE t1;
##
## Bug #20536: md5() with GROUP BY and UCS2 return different results on myisam/innodb
##
#
#--disable_warnings
#drop table if exists bug20536;
#--enable_warnings
#
#set names latin1;
#create table bug20536 (id bigint not null auto_increment primary key, name
#varchar(255) character set ucs2 not null);
#insert into `bug20536` (`id`,`name`) values (1, _latin1 x'7465737431'), (2, "'test\\_2'");
#select md5(name) from bug20536;
#select sha1(name) from bug20536;
#select make_set(3, name, upper(name)) from bug20536;
#select export_set(5, name, upper(name)) from bug20536;
#select export_set(5, name, upper(name), ",", 5) from bug20536;
#
# Bug #20108: corrupted default enum value for a ucs2 field
#
CREATE TABLE t1 (
status enum('active','passive') character set utf32 collate utf32_general_ci
NOT NULL default 'passive'
);
SHOW CREATE TABLE t1;
ALTER TABLE t1 ADD a int NOT NULL AFTER status;
SHOW CREATE TABLE t1;
DROP TABLE t1;
#CREATE TABLE t2 (
# status enum('active','passive') collate ucs2_turkish_ci
# NOT NULL default 'passive'
#);
#SHOW CREATE TABLE t2;
#ALTER TABLE t2 ADD a int NOT NULL AFTER status;
#DROP TABLE t2;
## Some broken functions: add these tests just to document current behavior.
#
## PASSWORD and OLD_PASSWORD don't work with UCS2 strings, but to fix it would
## not be backwards compatible in all cases, so it's best to leave it alone
#select password(name) from bug20536;
#select old_password(name) from bug20536;
#
## Disable test case as encrypt relies on 'crypt' function.
## "decrypt" is noramlly tested in func_crypt.test which have a
## "have_crypt.inc" test
#--disable_parsing
## ENCRYPT relies on OS function crypt() which takes a NUL-terminated string; it
## doesn't return good results for strings with embedded 0 bytes. It won't be
## fixed unless we choose to re-implement the crypt() function ourselves to take
## an extra size_t string_length argument.
#select encrypt(name, 'SALT') from bug20536;
#--enable_parsing
#
## QUOTE doesn't work with UCS2 data. It would require a total rewrite
## of Item_func_quote::val_str(), which isn't worthwhile until UCS2 is
## supported fully as a client character set.
#select quote(name) from bug20536;
#
#drop table bug20536;
#
--echo End of 4.1 tests
#
# Conversion from an UTF32 string to a decimal column
#
CREATE TABLE t1 (a varchar(64) character set utf32, b decimal(10,3));
INSERT INTO t1 VALUES ("1.1", 0), ("2.1", 0);
update t1 set b=a;
SELECT *, hex(a) FROM t1;
DROP TABLE t1;
#
# Bug#9442 Set parameter make query fail if column character set is UCS2
#
create table t1 (utext varchar(20) character set utf32);
insert into t1 values ("lily");
insert into t1 values ("river");
prepare stmt from 'select utext from t1 where utext like ?';
set @param1='%%';
execute stmt using @param1;
execute stmt using @param1;
select utext from t1 where utext like '%%';
drop table t1;
deallocate prepare stmt;
#
# Bug#22638 SOUNDEX broken for international characters
#
set names latin1;
set character_set_connection=utf32;
select soundex(''),soundex('he'),soundex('hello all folks'),soundex('#3556 in bugdb');
select hex(soundex('')),hex(soundex('he')),hex(soundex('hello all folks')),hex(soundex('#3556 in bugdb'));
select 'mood' sounds like 'mud';
# Cyrillic A, BE, VE
select hex(soundex(_utf32 0x000004100000041100000412));
# Make sure that "U+00BF INVERTED QUESTION MARK" is not considered as letter
select hex(soundex(_utf32 0x000000BF000000C0));
set names latin1;
#
# Bug #14290: character_maximum_length for text fields
#
create table t1(a blob, b text charset utf32);
select data_type, character_octet_length, character_maximum_length
from information_schema.columns where table_name='t1';
drop table t1;
set names latin1;
set collation_connection=utf32_general_ci;
#
# Testing cs->coll->instr()
#
select position('bb' in 'abba');
#
# Testing cs->coll->hash_sort()
#
create table t1 (a varchar(10) character set utf32) engine=heap;
insert into t1 values ('a'),('A'),('b'),('B');
select * from t1 where a='a' order by binary a;
select hex(min(binary a)),count(*) from t1 group by a;
drop table t1;
#
# Testing cs->cset->numchars()
#
select char_length('abcd'), octet_length('abcd');
#
# Testing cs->cset->charpos()
#
select left('abcd',2);
#
# Testing cs->cset->well_formed_length()
#
create table t1 (a varchar(10) character set utf32);
insert into t1 values (_utf32 0x0010FFFF);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf32 0x00110000);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf32 0x00110101);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf32 0x01000101);
--error ER_INVALID_CHARACTER_STRING
insert into t1 values (_utf32 0x11000101);
select hex(a) from t1;
drop table t1;
#
# Bug#32914 Character sets: illegal characters in utf8 and utf32 columns
#
create table t1 (utf32 varchar(2) character set utf32);
--echo Wrong character with pad
insert into t1 values (0x110000);
--echo Wrong chsaracter without pad
insert into t1 values (0x00110000);
--echo Wrong character with pad followed by another wrong character
insert into t1 values (0x11000000110000);
--echo Good character with pad followed by bad character
insert into t1 values (0x10000000110000);
--echo Good character without pad followed by bad character
insert into t1 values (0x0010000000110000);
--echo Wrong character with the second byte higher than 0x10
insert into t1 values (0x00800037);
--echo Wrong character with pad with the second byte higher than 0x10
insert into t1 values (0x00800037);
drop table t1;
#
# Bug#32394 Character sets: crash if comparison with 0xfffd
#
select _utf32'a' collate utf32_general_ci = 0xfffd;
select hex(concat(_utf32 0x0410 collate utf32_general_ci, 0x61));
create table t1 (s1 varchar(5) character set utf32);
insert into t1 values (0xfffd);
select case when s1 = 0xfffd then 1 else 0 end from t1;
select hex(s1) from t1 where s1 = 0xfffd;
drop table t1;
#
# Testing cs->cset->lengthsp()
#
create table t1 (a char(10)) character set utf32;
insert into t1 values ('a ');
select hex(a) from t1;
drop table t1;
#
# Testing cs->cset->caseup() and cs->cset->casedn()
#
select upper('abcd'), lower('ABCD');
#
# TODO: str_to_datetime() is broken and doesn't work with ucs2 and utf32
# Testing cs->cset->snprintf()
#
#create table t1 (a date);
#insert into t1 values ('2007-09-16');
#select * from t1;
#drop table t1;
#
# Testing cs->cset->l10tostr
# !!! Not used in the code
#
# Testing cs->cset->ll10tostr
#
create table t1 (a varchar(10) character set utf32);
insert into t1 values (123456);
select a, hex(a) from t1;
drop table t1;
#
# Testing cs->cset->fill
# SOUNDEX fills strings with DIGIT ZERO up to four characters
select hex(soundex('a'));
#
# Testing cs->cset->strntol
# !!! Not used in the code
#
# Testing cs->cset->strntoul
#
create table t1 (a enum ('a','b','c')) character set utf32;
insert into t1 values ('1');
select * from t1;
drop table t1;
#
# Testing cs->cset->strntoll and cs->cset->strntoull
#
set names latin1;
select hex(conv(convert('123' using utf32), -10, 16));
select hex(conv(convert('123' using utf32), 10, 16));
#
# Testing cs->cset->strntod
#
set names latin1;
set character_set_connection=utf32;
select 1.1 + '1.2';
select 1.1 + '1.2xxx';
# Testing strntoll10_utf32
# Testing cs->cset->strtoll10
select left('aaa','1');
#
# Testing cs->cset->strntoull10rnd
#
create table t1 (a int);
insert into t1 values ('-1234.1e2');
insert into t1 values ('-1234.1e2xxxx');
insert into t1 values ('-1234.1e2 ');
select * from t1;
drop table t1;
#
# Testing cs->cset->scan
#
create table t1 (a int);
insert into t1 values ('1 ');
insert into t1 values ('1 x');
select * from t1;
drop table t1;
#
# Testing auto-conversion to TEXT
#
create table t1 (a varchar(17000) character set utf32);
show create table t1;
drop table t1;
#
# Testing that maximim possible key length is 1332 bytes
#
create table t1 (a varchar(250) character set utf32 primary key);
show create table t1;
drop table t1;
--error ER_TOO_LONG_KEY
create table t1 (a varchar(334) character set utf32 primary key);
#
# Testing mi_check with long key values
#
create table t1 (a varchar(333) character set utf32, key(a));
insert into t1 values (repeat('a',333)), (repeat('b',333));
flush tables;
check table t1;
drop table t1;
#
# Test how character set works with date/time
#
SET collation_connection=utf32_general_ci;
--source include/ctype_datetime.inc
SET NAMES latin1;
#
# Test basic regex functionality
#
set collation_connection=utf32_general_ci;
--source include/ctype_regex.inc
set names latin1;
# TODO: add tests for all engines
#
# Bug #36418 Character sets: crash if char(256 using utf32)
#
select hex(char(0x01 using utf32));
select hex(char(0x0102 using utf32));
select hex(char(0x010203 using utf32));
select hex(char(0x01020304 using utf32));
create table t1 (s1 varchar(1) character set utf32, s2 text character set utf32);
create index i on t1 (s1);
insert into t1 values (char(256 using utf32), char(256 using utf32));
select hex(s1), hex(s2) from t1;
drop table t1;
#
# Bug#33073 Character sets: ordering fails with utf32
#
SET collation_connection=utf32_general_ci;
CREATE TABLE t1 AS SELECT repeat('a',2) as s1 LIMIT 0;
SHOW CREATE TABLE t1;
INSERT INTO t1 VALUES ('ab'),('AE'),('ab'),('AE');
SELECT * FROM t1 ORDER BY s1;
SET max_sort_length=4;
SELECT * FROM t1 ORDER BY s1;
DROP TABLE t1;
SET max_sort_length=DEFAULT;
SET NAMES latin1;
--echo #
--echo # End of 5.5 tests
--echo #

291
mysql-test/t/ctype_utf32_uca.test

@ -0,0 +1,291 @@
-- source include/have_ucs2.inc
-- source include/have_utf32.inc
--disable_warnings
DROP TABLE IF EXISTS t1;
--enable_warnings
--echo #
--echo # Start of 5.5 tests
--echo #
set names utf8;
set collation_connection=utf32_unicode_ci;
select hex('a'), hex('a ');
-- source include/endspace.inc
#
# Bug #6787 LIKE not working properly with _ and utf8 data
#
select 'c' like '\_' as want0;
#
# Bug #5679 utf8_unicode_ci LIKE--trailing % doesn't equal zero characters
#
CREATE TABLE t (
c char(20) NOT NULL
) ENGINE=MyISAM DEFAULT CHARACTER SET utf32 COLLATE=utf32_unicode_ci;
INSERT INTO t VALUES ('a'),('ab'),('aba');
ALTER TABLE t ADD INDEX (c);
SELECT c FROM t WHERE c LIKE 'a%';
DROP TABLE t;
create table t1 (c1 char(10) character set utf32 collate utf32_bin);
#
# Basic Latin
#
insert into t1 values ('A'),('a');
insert into t1 values ('B'),('b');
insert into t1 values ('C'),('c');
insert into t1 values ('D'),('d');
insert into t1 values ('E'),('e');
insert into t1 values ('F'),('f');
insert into t1 values ('G'),('g');
insert into t1 values ('H'),('h');
insert into t1 values ('I'),('i');
insert into t1 values ('J'),('j');
insert into t1 values ('K'),('k');
insert into t1 values ('L'),('l');
insert into t1 values ('M'),('m');
insert into t1 values ('N'),('n');
insert into t1 values ('O'),('o');
insert into t1 values ('P'),('p');
insert into t1 values ('Q'),('q');
insert into t1 values ('R'),('r');
insert into t1 values ('S'),('s');
insert into t1 values ('T'),('t');
insert into t1 values ('U'),('u');
insert into t1 values ('V'),('v');
insert into t1 values ('W'),('w');
insert into t1 values ('X'),('x');
insert into t1 values ('Y'),('y');
insert into t1 values ('Z'),('z');
#
# Latin1 suppliment
#
insert into t1 values (_ucs2 0x00e0),(_ucs2 0x00c0);
insert into t1 values (_ucs2 0x00e1),(_ucs2 0x00c1);
insert into t1 values (_ucs2 0x00e2),(_ucs2 0x00c2);
insert into t1 values (_ucs2 0x00e3),(_ucs2 0x00c3);
insert into t1 values (_ucs2 0x00e4),(_ucs2 0x00c4);
insert into t1 values (_ucs2 0x00e5),(_ucs2 0x00c5);
insert into t1 values (_ucs2 0x00e6),(_ucs2 0x00c6);
insert into t1 values (_ucs2 0x00e7),(_ucs2 0x00c7);
insert into t1 values (_ucs2 0x00e8),(_ucs2 0x00c8);
insert into t1 values (_ucs2 0x00e9),(_ucs2 0x00c9);
insert into t1 values (_ucs2 0x00ea),(_ucs2 0x00ca);
insert into t1 values (_ucs2 0x00eb),(_ucs2 0x00cb);
insert into t1 values (_ucs2 0x00ec),(_ucs2 0x00cc);
insert into t1 values (_ucs2 0x00ed),(_ucs2 0x00cd);
insert into t1 values (_ucs2 0x00ee),(_ucs2 0x00ce);
insert into t1 values (_ucs2 0x00ef),(_ucs2 0x00cf);
insert into t1 values (_ucs2 0x00f0),(_ucs2 0x00d0);
insert into t1 values (_ucs2 0x00f1),(_ucs2 0x00d1);
insert into t1 values (_ucs2 0x00f2),(_ucs2 0x00d2);
insert into t1 values (_ucs2 0x00f3),(_ucs2 0x00d3);
insert into t1 values (_ucs2 0x00f4),(_ucs2 0x00d4);
insert into t1 values (_ucs2 0x00f5),(_ucs2 0x00d5);
insert into t1 values (_ucs2 0x00f6),(_ucs2 0x00d6);
insert into t1 values (_ucs2 0x00f7),(_ucs2 0x00d7);
insert into t1 values (_ucs2 0x00f8),(_ucs2 0x00d8);
insert into t1 values (_ucs2 0x00f9),(_ucs2 0x00d9);
insert into t1 values (_ucs2 0x00fa),(_ucs2 0x00da);
insert into t1 values (_ucs2 0x00fb),(_ucs2 0x00db);
insert into t1 values (_ucs2 0x00fc),(_ucs2 0x00dc);
insert into t1 values (_ucs2 0x00fd),(_ucs2 0x00dd);
insert into t1 values (_ucs2 0x00fe),(_ucs2 0x00de);
insert into t1 values (_ucs2 0x00ff),(_ucs2 0x00df);
#
# Latin extended-A, 0100-017F
#
insert into t1 values (_ucs2 0x0100),(_ucs2 0x0101),(_ucs2 0x0102),(_ucs2 0x0103);
insert into t1 values (_ucs2 0x0104),(_ucs2 0x0105),(_ucs2 0x0106),(_ucs2 0x0107);
insert into t1 values (_ucs2 0x0108),(_ucs2 0x0109),(_ucs2 0x010a),(_ucs2 0x010b);
insert into t1 values (_ucs2 0x010c),(_ucs2 0x010d),(_ucs2 0x010e),(_ucs2 0x010f);
insert into t1 values (_ucs2 0x0110),(_ucs2 0x0111),(_ucs2 0x0112),(_ucs2 0x0113);
insert into t1 values (_ucs2 0x0114),(_ucs2 0x0115),(_ucs2 0x0116),(_ucs2 0x0117);
insert into t1 values (_ucs2 0x0118),(_ucs2 0x0119),(_ucs2 0x011a),(_ucs2 0x011b);
insert into t1 values (_ucs2 0x011c),(_ucs2 0x011d),(_ucs2 0x011e),(_ucs2 0x011f);
insert into t1 values (_ucs2 0x0120),(_ucs2 0x0121),(_ucs2 0x0122),(_ucs2 0x0123);
insert into t1 values (_ucs2 0x0124),(_ucs2 0x0125),(_ucs2 0x0126),(_ucs2 0x0127);
insert into t1 values (_ucs2 0x0128),(_ucs2 0x0129),(_ucs2 0x012a),(_ucs2 0x012b);
insert into t1 values (_ucs2 0x012c),(_ucs2 0x012d),(_ucs2 0x012e),(_ucs2 0x012f);
insert into t1 values (_ucs2 0x0130),(_ucs2 0x0131),(_ucs2 0x0132),(_ucs2 0x0133);
insert into t1 values (_ucs2 0x0134),(_ucs2 0x0135),(_ucs2 0x0136),(_ucs2 0x0137);
insert into t1 values (_ucs2 0x0138),(_ucs2 0x0139),(_ucs2 0x013a),(_ucs2 0x013b);
insert into t1 values (_ucs2 0x013c),(_ucs2 0x013d),(_ucs2 0x013e),(_ucs2 0x013f);
insert into t1 values (_ucs2 0x0140),(_ucs2 0x0141),(_ucs2 0x0142),(_ucs2 0x0143);
insert into t1 values (_ucs2 0x0144),(_ucs2 0x0145),(_ucs2 0x0146),(_ucs2 0x0147);
insert into t1 values (_ucs2 0x0148),(_ucs2 0x0149),(_ucs2 0x014a),(_ucs2 0x014b);
insert into t1 values (_ucs2 0x014c),(_ucs2 0x014d),(_ucs2 0x014e),(_ucs2 0x014f);
insert into t1 values (_ucs2 0x0150),(_ucs2 0x0151),(_ucs2 0x0152),(_ucs2 0x0153);
insert into t1 values (_ucs2 0x0154),(_ucs2 0x0155),(_ucs2 0x0156),(_ucs2 0x0157);
insert into t1 values (_ucs2 0x0158),(_ucs2 0x0159),(_ucs2 0x015a),(_ucs2 0x015b);
insert into t1 values (_ucs2 0x015c),(_ucs2 0x015d),(_ucs2 0x015e),(_ucs2 0x015f);
insert into t1 values (_ucs2 0x0160),(_ucs2 0x0161),(_ucs2 0x0162),(_ucs2 0x0163);
insert into t1 values (_ucs2 0x0164),(_ucs2 0x0165),(_ucs2 0x0166),(_ucs2 0x0167);
insert into t1 values (_ucs2 0x0168),(_ucs2 0x0169),(_ucs2 0x016a),(_ucs2 0x016b);
insert into t1 values (_ucs2 0x016c),(_ucs2 0x016d),(_ucs2 0x016e),(_ucs2 0x016f);
insert into t1 values (_ucs2 0x0170),(_ucs2 0x0171),(_ucs2 0x0172),(_ucs2 0x0173);
insert into t1 values (_ucs2 0x0174),(_ucs2 0x0175),(_ucs2 0x0176),(_ucs2 0x0177);
insert into t1 values (_ucs2 0x0178),(_ucs2 0x0179),(_ucs2 0x017a),(_ucs2 0x017b);
insert into t1 values (_ucs2 0x017c),(_ucs2 0x017d),(_ucs2 0x017e),(_ucs2 0x017f);
#
# Latin extended-B, 0180-024F
#
insert into t1 values (_ucs2 0x0180),(_ucs2 0x0181),(_ucs2 0x0182),(_ucs2 0x0183);
insert into t1 values (_ucs2 0x0184),(_ucs2 0x0185),(_ucs2 0x0186),(_ucs2 0x0187);
insert into t1 values (_ucs2 0x0188),(_ucs2 0x0189),(_ucs2 0x018a),(_ucs2 0x018b);
insert into t1 values (_ucs2 0x018c),(_ucs2 0x018d),(_ucs2 0x018e),(_ucs2 0x018f);
insert into t1 values (_ucs2 0x0190),(_ucs2 0x0191),(_ucs2 0x0192),(_ucs2 0x0193);
insert into t1 values (_ucs2 0x0194),(_ucs2 0x0195),(_ucs2 0x0196),(_ucs2 0x0197);
insert into t1 values (_ucs2 0x0198),(_ucs2 0x0199),(_ucs2 0x019a),(_ucs2 0x019b);
insert into t1 values (_ucs2 0x019c),(_ucs2 0x019d),(_ucs2 0x019e),(_ucs2 0x019f);
insert into t1 values (_ucs2 0x01a0),(_ucs2 0x01a1),(_ucs2 0x01a2),(_ucs2 0x01a3);
insert into t1 values (_ucs2 0x01a4),(_ucs2 0x01a5),(_ucs2 0x01a6),(_ucs2 0x01a7);
insert into t1 values (_ucs2 0x01a8),(_ucs2 0x01a9),(_ucs2 0x01aa),(_ucs2 0x01ab);
insert into t1 values (_ucs2 0x01ac),(_ucs2 0x01ad),(_ucs2 0x01ae),(_ucs2 0x01af);
insert into t1 values (_ucs2 0x01b0),(_ucs2 0x01b1),(_ucs2 0x01b2),(_ucs2 0x01b3);
insert into t1 values (_ucs2 0x01b4),(_ucs2 0x01b5),(_ucs2 0x01b6),(_ucs2 0x01b7);
insert into t1 values (_ucs2 0x01b8),(_ucs2 0x01b9),(_ucs2 0x01ba),(_ucs2 0x01bb);
insert into t1 values (_ucs2 0x01bc),(_ucs2 0x01bd),(_ucs2 0x01be),(_ucs2 0x01bf);
insert into t1 values (_ucs2 0x01c0),(_ucs2 0x01c1),(_ucs2 0x01c2),(_ucs2 0x01c3);
insert into t1 values (_ucs2 0x01c4),(_ucs2 0x01c5),(_ucs2 0x01c6),(_ucs2 0x01c7);
insert into t1 values (_ucs2 0x01c8),(_ucs2 0x01c9),(_ucs2 0x01ca),(_ucs2 0x01cb);
insert into t1 values (_ucs2 0x01cc),(_ucs2 0x01cd),(_ucs2 0x01ce),(_ucs2 0x01cf);
insert into t1 values (_ucs2 0x01d0),(_ucs2 0x01d1),(_ucs2 0x01d2),(_ucs2 0x01d3);
insert into t1 values (_ucs2 0x01d4),(_ucs2 0x01d5),(_ucs2 0x01d6),(_ucs2 0x01d7);
insert into t1 values (_ucs2 0x01d8),(_ucs2 0x01d9),(_ucs2 0x01da),(_ucs2 0x01db);
insert into t1 values (_ucs2 0x01dc),(_ucs2 0x01dd),(_ucs2 0x01de),(_ucs2 0x01df);
insert into t1 values (_ucs2 0x01e0),(_ucs2 0x01e1),(_ucs2 0x01e2),(_ucs2 0x01e3);
insert into t1 values (_ucs2 0x01e4),(_ucs2 0x01e5),(_ucs2 0x01e6),(_ucs2 0x01e7);
insert into t1 values (_ucs2 0x01e8),(_ucs2 0x01e9),(_ucs2 0x01ea),(_ucs2 0x01eb);
insert into t1 values (_ucs2 0x01ec),(_ucs2 0x01ed),(_ucs2 0x01ee),(_ucs2 0x01ef);
insert into t1 values (_ucs2 0x01f0),(_ucs2 0x01f1),(_ucs2 0x01f2),(_ucs2 0x01f3);
insert into t1 values (_ucs2 0x01f4),(_ucs2 0x01f5),(_ucs2 0x01f6),(_ucs2 0x01f7);
insert into t1 values (_ucs2 0x01f8),(_ucs2 0x01f9),(_ucs2 0x01fa),(_ucs2 0x01fb);
insert into t1 values (_ucs2 0x01fc),(_ucs2 0x01fd),(_ucs2 0x01fe),(_ucs2 0x01ff);
insert into t1 values ('AA'),('Aa'),('aa'),('aA');
insert into t1 values ('CH'),('Ch'),('ch'),('cH');
insert into t1 values ('DZ'),('Dz'),('dz'),('dZ');
insert into t1 values ('IJ'),('Ij'),('ij'),('iJ');
insert into t1 values ('LJ'),('Lj'),('lj'),('lJ');
insert into t1 values ('LL'),('Ll'),('ll'),('lL');
insert into t1 values ('NJ'),('Nj'),('nj'),('nJ');
insert into t1 values ('OE'),('Oe'),('oe'),('oE');
insert into t1 values ('SS'),('Ss'),('ss'),('sS');
insert into t1 values ('RR'),('Rr'),('rr'),('rR');
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_unicode_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_icelandic_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_latvian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_romanian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_slovenian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_polish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_estonian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_spanish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_swedish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_turkish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_czech_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_danish_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_lithuanian_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_slovak_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_spanish2_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_roman_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_esperanto_ci;
select group_concat(c1 order by binary c1 separator '') from t1 group by c1 collate utf32_hungarian_ci;
drop table t1;
#
# Bug#5324
#
SET NAMES utf8;
#test1
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_general_ci NOT NULL, INDEX (c));
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
#Check one row
SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 COLLATE utf32_general_ci;
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
#Check two rows
SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025
COLLATE utf32_general_ci ORDER BY c;
DROP TABLE t1;
#test2
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_unicode_ci NOT NULL, INDEX (c));
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
#Check one row
SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025 COLLATE utf32_unicode_ci;
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B4);
#Check two rows
SELECT * FROM t1 WHERE c LIKE _utf32 0x0000039C00000025
COLLATE utf32_unicode_ci ORDER BY c;
DROP TABLE t1;
#test 3
CREATE TABLE t1 (c varchar(200) CHARACTER SET utf32 COLLATE utf32_unicode_ci NOT NULL, INDEX (c));
INSERT INTO t1 VALUES (_ucs2 0x039C03C903B403B11F770308);
#Check one row row
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf32) COLLATE utf32_unicode_ci;
INSERT INTO t1 VALUES (CONVERT(_ucs2 0x039C03C903B4 USING utf8));
#Check two rows
SELECT * FROM t1 WHERE c LIKE CONVERT(_ucs2 0x039C0025 USING utf32)
COLLATE utf32_unicode_ci ORDER BY c;
DROP TABLE t1;
SET NAMES utf8;
SET @test_character_set='utf32';
SET @test_collation='utf32_swedish_ci';
-- source include/ctype_common.inc
SET collation_connection='utf32_unicode_ci';
-- source include/ctype_filesort.inc
-- source include/ctype_like_escape.inc
--echo End of 4.1 tests
#
# Check UPPER/LOWER changing length
#
# Result shorter than argument
CREATE TABLE t1 (id int, a varchar(30) character set utf32);
INSERT INTO t1 VALUES (1, _ucs2 0x01310069), (2, _ucs2 0x01310131);
INSERT INTO t1 VALUES (3, _ucs2 0x00690069), (4, _ucs2 0x01300049);
INSERT INTO t1 VALUES (5, _ucs2 0x01300130), (6, _ucs2 0x00490049);
SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
FROM t1 ORDER BY id;
ALTER TABLE t1 MODIFY a VARCHAR(30) character set utf32 collate utf32_turkish_ci;
SELECT a, length(a) la, @l:=lower(a) l, length(@l) ll, @u:=upper(a) u, length(@u) lu
FROM t1 ORDER BY id;
DROP TABLE t1;
#
# Bug #27079 Crash while grouping empty ucs2 strings
#
CREATE TABLE t1 (
c1 text character set utf32 collate utf32_polish_ci NOT NULL
) ENGINE=MyISAM;
insert into t1 values (''),('a');
SELECT COUNT(*), c1 FROM t1 GROUP BY c1;
DROP TABLE IF EXISTS t1;
#
# Test basic regex functionality
#
set collation_connection=utf32_unicode_ci;
--source include/ctype_regex.inc
--echo #
--echo # End of 5.5 tests
--echo #

11
mysql-test/t/ctype_utf8.test

@ -1440,6 +1440,17 @@ DROP TABLE t1;
--echo Start of 5.4 tests
#
# WL#1213: utf8mb3 is an alias for utf8
#
SET NAMES utf8mb3;
SHOW VARIABLES LIKE 'character_set_results%';
CREATE TABLE t1 (a CHAR CHARACTER SET utf8mb3 COLLATE utf8mb3_bin);
SHOW CREATE TABLE t1;
DROP TABLE t1;
SELECT _utf8mb3'test';
#
# Bug#26180: Can't add columns to tables created with utf8 text indexes
#

1670
mysql-test/t/ctype_utf8mb4.test
File diff suppressed because it is too large
View File

155
mysys/charset-def.c

@ -45,6 +45,53 @@ extern CHARSET_INFO my_charset_ucs2_hungarian_uca_ci;
extern CHARSET_INFO my_charset_ucs2_sinhala_uca_ci;
#endif
#ifdef HAVE_CHARSET_utf32
extern CHARSET_INFO my_charset_utf32_icelandic_uca_ci;
extern CHARSET_INFO my_charset_utf32_latvian_uca_ci;
extern CHARSET_INFO my_charset_utf32_romanian_uca_ci;
extern CHARSET_INFO my_charset_utf32_slovenian_uca_ci;
extern CHARSET_INFO my_charset_utf32_polish_uca_ci;
extern CHARSET_INFO my_charset_utf32_estonian_uca_ci;
extern CHARSET_INFO my_charset_utf32_spanish_uca_ci;
extern CHARSET_INFO my_charset_utf32_swedish_uca_ci;
extern CHARSET_INFO my_charset_utf32_turkish_uca_ci;
extern CHARSET_INFO my_charset_utf32_czech_uca_ci;
extern CHARSET_INFO my_charset_utf32_danish_uca_ci;
extern CHARSET_INFO my_charset_utf32_lithuanian_uca_ci;
extern CHARSET_INFO my_charset_utf32_slovak_uca_ci;
extern CHARSET_INFO my_charset_utf32_spanish2_uca_ci;
extern CHARSET_INFO my_charset_utf32_roman_uca_ci;
extern CHARSET_INFO my_charset_utf32_persian_uca_ci;
extern CHARSET_INFO my_charset_utf32_esperanto_uca_ci;
extern CHARSET_INFO my_charset_utf32_hungarian_uca_ci;
extern CHARSET_INFO my_charset_utf32_sinhala_uca_ci;
#endif /* HAVE_CHARSET_utf32 */
#ifdef HAVE_CHARSET_utf16
extern CHARSET_INFO my_charset_utf16_icelandic_uca_ci;
extern CHARSET_INFO my_charset_utf16_latvian_uca_ci;
extern CHARSET_INFO my_charset_utf16_romanian_uca_ci;
extern CHARSET_INFO my_charset_utf16_slovenian_uca_ci;
extern CHARSET_INFO my_charset_utf16_polish_uca_ci;
extern CHARSET_INFO my_charset_utf16_estonian_uca_ci;
extern CHARSET_INFO my_charset_utf16_spanish_uca_ci;
extern CHARSET_INFO my_charset_utf16_swedish_uca_ci;
extern CHARSET_INFO my_charset_utf16_turkish_uca_ci;
extern CHARSET_INFO my_charset_utf16_czech_uca_ci;
extern CHARSET_INFO my_charset_utf16_danish_uca_ci;
extern CHARSET_INFO my_charset_utf16_lithuanian_uca_ci;
extern CHARSET_INFO my_charset_utf16_slovak_uca_ci;
extern CHARSET_INFO my_charset_utf16_spanish2_uca_ci;
extern CHARSET_INFO my_charset_utf16_roman_uca_ci;
extern CHARSET_INFO my_charset_utf16_persian_uca_ci;
extern CHARSET_INFO my_charset_utf16_esperanto_uca_ci;
extern CHARSET_INFO my_charset_utf16_hungarian_uca_ci;
extern CHARSET_INFO my_charset_utf16_sinhala_uca_ci;
#endif /* HAVE_CHARSET_utf16 */
#ifdef HAVE_CHARSET_utf8
extern CHARSET_INFO my_charset_utf8_icelandic_uca_ci;
extern CHARSET_INFO my_charset_utf8_latvian_uca_ci;
@ -70,6 +117,28 @@ extern CHARSET_INFO my_charset_utf8_general_cs;
#endif
#endif
#ifdef HAVE_CHARSET_utf8mb4
extern CHARSET_INFO my_charset_utf8mb4_icelandic_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_latvian_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_romanian_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_slovenian_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_polish_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_estonian_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_spanish_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_swedish_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_turkish_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_czech_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_danish_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_lithuanian_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_slovak_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_spanish2_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_roman_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_persian_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_esperanto_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_hungarian_uca_ci;
extern CHARSET_INFO my_charset_utf8mb4_sinhala_uca_ci;
#endif /* HAVE_CHARSET_utf8mb4 */
#endif /* HAVE_UCA_COLLATIONS */
my_bool init_compiled_charsets(myf flags __attribute__((unused)))
@ -191,7 +260,91 @@ my_bool init_compiled_charsets(myf flags __attribute__((unused)))
add_compiled_collation(&my_charset_utf8_hungarian_uca_ci);
add_compiled_collation(&my_charset_utf8_sinhala_uca_ci);
#endif
#endif
#endif /* HAVE_CHARSET_utf8 */
#ifdef HAVE_CHARSET_utf8mb4
add_compiled_collation(&my_charset_utf8mb4_general_ci);
add_compiled_collation(&my_charset_utf8mb4_bin);
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation(&my_charset_utf8mb4_unicode_ci);
add_compiled_collation(&my_charset_utf8mb4_icelandic_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_latvian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_romanian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_slovenian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_polish_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_estonian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_spanish_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_swedish_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_turkish_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_czech_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_danish_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_lithuanian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_slovak_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_spanish2_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_roman_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_persian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_esperanto_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_hungarian_uca_ci);
add_compiled_collation(&my_charset_utf8mb4_sinhala_uca_ci);
#endif /* HAVE_UCA_COLLATIONS */
#endif /* HAVE_CHARSET_utf8mb4 */
#ifdef HAVE_CHARSET_utf16
add_compiled_collation(&my_charset_utf16_general_ci);
add_compiled_collation(&my_charset_utf16_bin);
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation(&my_charset_utf16_unicode_ci);
add_compiled_collation(&my_charset_utf16_icelandic_uca_ci);
add_compiled_collation(&my_charset_utf16_latvian_uca_ci);
add_compiled_collation(&my_charset_utf16_romanian_uca_ci);
add_compiled_collation(&my_charset_utf16_slovenian_uca_ci);
add_compiled_collation(&my_charset_utf16_polish_uca_ci);
add_compiled_collation(&my_charset_utf16_estonian_uca_ci);
add_compiled_collation(&my_charset_utf16_spanish_uca_ci);
add_compiled_collation(&my_charset_utf16_swedish_uca_ci);
add_compiled_collation(&my_charset_utf16_turkish_uca_ci);
add_compiled_collation(&my_charset_utf16_czech_uca_ci);
add_compiled_collation(&my_charset_utf16_danish_uca_ci);
add_compiled_collation(&my_charset_utf16_lithuanian_uca_ci);
add_compiled_collation(&my_charset_utf16_slovak_uca_ci);
add_compiled_collation(&my_charset_utf16_spanish2_uca_ci);
add_compiled_collation(&my_charset_utf16_roman_uca_ci);
add_compiled_collation(&my_charset_utf16_persian_uca_ci);
add_compiled_collation(&my_charset_utf16_esperanto_uca_ci);
add_compiled_collation(&my_charset_utf16_hungarian_uca_ci);
add_compiled_collation(&my_charset_utf16_sinhala_uca_ci);
#endif /* HAVE_UCA_COLLATIOINS */
#endif /* HAVE_CHARSET_utf16 */
#ifdef HAVE_CHARSET_utf32
add_compiled_collation(&my_charset_utf32_general_ci);
add_compiled_collation(&my_charset_utf32_bin);
#ifdef HAVE_UCA_COLLATIONS
add_compiled_collation(&my_charset_utf32_unicode_ci);
add_compiled_collation(&my_charset_utf32_icelandic_uca_ci);
add_compiled_collation(&my_charset_utf32_latvian_uca_ci);
add_compiled_collation(&my_charset_utf32_romanian_uca_ci);
add_compiled_collation(&my_charset_utf32_slovenian_uca_ci);
add_compiled_collation(&my_charset_utf32_polish_uca_ci);
add_compiled_collation(&my_charset_utf32_estonian_uca_ci);
add_compiled_collation(&my_charset_utf32_spanish_uca_ci);
add_compiled_collation(&my_charset_utf32_swedish_uca_ci);
add_compiled_collation(&my_charset_utf32_turkish_uca_ci);
add_compiled_collation(&my_charset_utf32_czech_uca_ci);
add_compiled_collation(&my_charset_utf32_danish_uca_ci);
add_compiled_collation(&my_charset_utf32_lithuanian_uca_ci);
add_compiled_collation(&my_charset_utf32_slovak_uca_ci);
add_compiled_collation(&my_charset_utf32_spanish2_uca_ci);
add_compiled_collation(&my_charset_utf32_roman_uca_ci);
add_compiled_collation(&my_charset_utf32_persian_uca_ci);
add_compiled_collation(&my_charset_utf32_esperanto_uca_ci);
add_compiled_collation(&my_charset_utf32_hungarian_uca_ci);
add_compiled_collation(&my_charset_utf32_sinhala_uca_ci);
#endif /* HAVE_UCA_COLLATIONS */
#endif /* HAVE_CHARSET_utf32 */
/* Copy compiled charsets */
for (cs=compiled_charsets; cs->name; cs++)

69
mysys/charset.c

@ -252,13 +252,35 @@ static int add_collation(CHARSET_INFO *cs)
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
#endif
}
else if (!strcmp(cs->csname, "utf8"))
else if (!strcmp(cs->csname, "utf8") || !strcmp(cs->csname, "utf8mb3"))
{
#if defined (HAVE_CHARSET_utf8) && defined(HAVE_UCA_COLLATIONS)
copy_uca_collation(newcs, &my_charset_utf8_unicode_ci);
newcs->ctype= my_charset_utf8_unicode_ci.ctype;
if (init_state_maps(newcs))
return MY_XML_ERROR;
#endif
}
else if (!strcmp(cs->csname, "utf8mb4"))
{
#if defined (HAVE_CHARSET_utf8mb4) && defined(HAVE_UCA_COLLATIONS)
copy_uca_collation(newcs, &my_charset_utf8mb4_unicode_ci);
newcs->ctype= my_charset_utf8mb4_unicode_ci.ctype;
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED;
#endif
}
else if (!strcmp(cs->csname, "utf16"))
{
#if defined (HAVE_CHARSET_utf16) && defined(HAVE_UCA_COLLATIONS)
copy_uca_collation(newcs, &my_charset_utf16_unicode_ci);
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
#endif
}
else if (!strcmp(cs->csname, "utf32"))
{
#if defined (HAVE_CHARSET_utf32) && defined(HAVE_UCA_COLLATIONS)
copy_uca_collation(newcs, &my_charset_utf32_unicode_ci);
newcs->state|= MY_CS_AVAILABLE | MY_CS_LOADED | MY_CS_NONASCII;
#endif
}
else
@ -433,17 +455,35 @@ static void init_available_charsets(void)
}
static const char*
get_collation_name_alias(const char *name, char *buf, size_t bufsize)
{
if (!strncasecmp(name, "utf8mb3_", 8))
{
my_snprintf(buf, bufsize, "utf8_%s", name + 8);
return buf;
}
return NULL;
}
uint get_collation_number(const char *name)
{
uint id;
char alias[64];
my_pthread_once(&charsets_initialized, init_available_charsets);
return get_collation_number_internal(name);
if ((id= get_collation_number_internal(name)))
return id;
if ((name= get_collation_name_alias(name, alias, sizeof(alias))))
return get_collation_number_internal(name);
return 0;
}
uint get_charset_number(const char *charset_name, uint cs_flags)
static uint
get_charset_number_internal(const char *charset_name, uint cs_flags)
{
CHARSET_INFO **cs;
my_pthread_once(&charsets_initialized, init_available_charsets);
for (cs= all_charsets;
cs < all_charsets + array_elements(all_charsets);
@ -457,6 +497,27 @@ uint get_charset_number(const char *charset_name, uint cs_flags)
}
static const char*
get_charset_name_alias(const char *name)
{
if (!my_strcasecmp(&my_charset_latin1, name, "utf8mb3"))
return "utf8";
return NULL;
}
uint get_charset_number(const char *charset_name, uint cs_flags)
{
uint id;
my_pthread_once(&charsets_initialized, init_available_charsets);
if ((id= get_charset_number_internal(charset_name, cs_flags)))
return id;
if ((charset_name= get_charset_name_alias(charset_name)))
return get_charset_number_internal(charset_name, cs_flags);
return 0;
}
const char *get_charset_name(uint charset_number)
{
CHARSET_INFO *cs;

4
sql/field.cc

@ -1836,7 +1836,9 @@ int Field::store_time(MYSQL_TIME *ltime, timestamp_type type_arg)
ASSERT_COLUMN_MARKED_FOR_WRITE;
char buff[MAX_DATE_STRING_REP_LENGTH];
uint length= (uint) my_TIME_to_str(ltime, buff);
return store(buff, length, &my_charset_bin);
return store(buff, length,
(charset()->state & MY_CS_NONASCII) ?
&my_charset_latin1 : &my_charset_bin);
}

11
sql/item.cc

@ -854,7 +854,7 @@ Item *Item_param::safe_charset_converter(CHARSET_INFO *tocs)
cnvitem->max_length= cnvitem->str_value.numchars() * tocs->mbmaxlen;
return cnvitem;
}
return NULL;
return Item::safe_charset_converter(tocs);
}
@ -1436,7 +1436,12 @@ left_is_superset(DTCollation *left, DTCollation *right)
if (left->collation->state & MY_CS_UNICODE &&
(left->derivation < right->derivation ||
(left->derivation == right->derivation &&
!(right->collation->state & MY_CS_UNICODE))))
(!(right->collation->state & MY_CS_UNICODE) ||
/* The code below makes 4-byte utf8 a superset over 3-byte utf8 */
(left->collation->state & MY_CS_UNICODE_SUPPLEMENT &&
!(right->collation->state & MY_CS_UNICODE_SUPPLEMENT) &&
left->collation->mbmaxlen > right->collation->mbmaxlen &&
left->collation->mbminlen == right->collation->mbminlen)))))
return TRUE;
/* Allow convert from ASCII */
if (right->repertoire == MY_REPERTOIRE_ASCII &&
@ -1695,7 +1700,7 @@ bool agg_item_set_converter(DTCollation &coll, const char *fname,
{
Item* conv;
uint32 dummy_offset;
if (!String::needs_conversion(0, (*arg)->collation.collation,
if (!String::needs_conversion(1, (*arg)->collation.collation,
coll.collation,
&dummy_offset))
continue;

33
sql/item_strfunc.cc

@ -2371,17 +2371,27 @@ String *Item_func_char::val_str(String *str)
int32 num=(int32) args[i]->val_int();
if (!args[i]->null_value)
{
char char_num= (char) num;
if (num&0xFF000000L) {
str->append((char)(num>>24));
goto b2;
} else if (num&0xFF0000L) {
b2: str->append((char)(num>>16));
goto b1;
} else if (num&0xFF00L) {
b1: str->append((char)(num>>8));
char tmp[4];
if (num & 0xFF000000L)
{
mi_int4store(tmp, num);
str->append(tmp, 4, &my_charset_bin);
}
else if (num & 0xFF0000L)
{
mi_int3store(tmp, num);
str->append(tmp, 3, &my_charset_bin);
}
else if (num & 0xFF00L)
{
mi_int2store(tmp, num);
str->append(tmp, 2, &my_charset_bin);
}
else
{
tmp[0]= (char) num;
str->append(tmp, 1, &my_charset_bin);
}
str->append(&char_num, 1);
}
}
str->realloc(str->length()); // Add end 0 (for Purify)
@ -2769,7 +2779,8 @@ String *Item_func_conv_charset::val_str(String *str)
void Item_func_conv_charset::fix_length_and_dec()
{
collation.set(conv_charset, DERIVATION_IMPLICIT);
max_length = args[0]->max_length*conv_charset->mbmaxlen;
max_length = args[0]->max_length / args[0]->collation.collation->mbmaxlen *
conv_charset->mbmaxlen;
}
void Item_func_conv_charset::print(String *str, enum_query_type query_type)

38
sql/sql_string.cc

@ -412,11 +412,25 @@ bool String::append(const char *s)
bool String::append(const char *s,uint32 arg_length, CHARSET_INFO *cs)
{
uint32 dummy_offset;
uint32 offset;
if (needs_conversion(arg_length, cs, str_charset, &dummy_offset))
if (needs_conversion(arg_length, cs, str_charset, &offset))
{
uint32 add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
uint32 add_length;
if ((cs == &my_charset_bin) && offset)
{
DBUG_ASSERT(str_charset->mbminlen > offset);
offset= str_charset->mbminlen - offset; // How many characters to pad
add_length= arg_length + offset;
if (realloc(str_length + add_length))
return TRUE;
bzero((char*) Ptr + str_length, offset);
memcpy(Ptr + str_length + offset, s, arg_length);
str_length+= add_length;
return FALSE;
}
add_length= arg_length / cs->mbminlen * str_charset->mbmaxlen;
uint dummy_errors;
if (realloc(str_length + add_length))
return TRUE;
@ -966,6 +980,24 @@ well_formed_copy_nchars(CHARSET_INFO *to_cs,
uint pad_length= to_cs->mbminlen - from_offset;
bzero(to, pad_length);
memmove(to + pad_length, from, from_offset);
/*
In some cases left zero-padding can create an incorrect character.
For example:
INSERT INTO t1 (utf32_column) VALUES (0x110000);
We'll pad the value to 0x00110000, which is a wrong UTF32 sequence!
The valid characters range is limited to 0x00000000..0x0010FFFF.
Make sure we didn't pad to an incorrect character.
*/
if (to_cs->cset->well_formed_len(to_cs,
to, to + to_cs->mbminlen, 1,
&well_formed_error) !=
to_cs->mbminlen)
{
*from_end_pos= *well_formed_error_pos= from;
*cannot_convert_error_pos= NULL;
return 0;
}
nchars--;
from+= from_offset;
from_length-= from_offset;

2
sql/sql_table.cc

@ -2776,7 +2776,7 @@ mysql_prepare_create_table(THD *thd, HA_CREATE_INFO *create_info,
sql_field->interval_list);
List_iterator<String> int_it(sql_field->interval_list);
String conv, *tmp;
char comma_buf[2];
char comma_buf[4]; /* 4 bytes for utf32 */
int comma_length= cs->cset->wc_mb(cs, ',', (uchar*) comma_buf,
(uchar*) comma_buf +
sizeof(comma_buf));

38
strings/ctype-mb.c

@ -467,10 +467,11 @@ uint my_instr_mb(CHARSET_INFO *cs,
/* BINARY collations handlers for MB charsets */
static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix)
int
my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
my_bool t_is_prefix)
{
size_t len=min(slen,tlen);
int cmp= memcmp(s,t,len);
@ -503,10 +504,11 @@ static int my_strnncoll_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
0 if strings are equal
*/
static int my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool diff_if_only_endspace_difference)
int
my_strnncollsp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
my_bool diff_if_only_endspace_difference)
{
const uchar *end;
size_t length;
@ -562,14 +564,17 @@ static size_t my_strnxfrm_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
}
static int my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const char *s, const char *t)
int
my_strcasecmp_mb_bin(CHARSET_INFO * cs __attribute__((unused)),
const char *s, const char *t)
{
return strcmp(s,t);
}
static void my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, size_t len,ulong *nr1, ulong *nr2)
void
my_hash_sort_mb_bin(CHARSET_INFO *cs __attribute__((unused)),
const uchar *key, size_t len,ulong *nr1, ulong *nr2)
{
const uchar *pos = key;
@ -787,10 +792,11 @@ fill_max_and_min:
}
static int my_wildcmp_mb_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
int
my_wildcmp_mb_bin(CHARSET_INFO *cs,
const char *str,const char *str_end,
const char *wildstr,const char *wildend,
int escape, int w_one, int w_many)
{
int result= -1; /* Not found, using wildcards */

1986
strings/ctype-uca.c
File diff suppressed because it is too large
View File

3652
strings/ctype-ucs2.c
File diff suppressed because it is too large
View File

1054
strings/ctype-utf8.c
File diff suppressed because it is too large
View File

Loading…
Cancel
Save