Browse Source

MDEV-25904 New collation functions to compare InnoDB style trimmed NO PAD strings

pull/2000/head
Alexander Barkov 4 years ago
parent
commit
b915f79e4e
  1. 54
      include/m_ctype.h
  2. 47
      sql/field.cc
  3. 4
      strings/ctype-big5.c
  4. 25
      strings/ctype-bin.c
  5. 4
      strings/ctype-cp932.c
  6. 1
      strings/ctype-czech.c
  7. 4
      strings/ctype-euc_kr.c
  8. 4
      strings/ctype-eucjpms.c
  9. 4
      strings/ctype-gb2312.c
  10. 4
      strings/ctype-gbk.c
  11. 1
      strings/ctype-latin1.c
  12. 14
      strings/ctype-simple.c
  13. 4
      strings/ctype-sjis.c
  14. 2
      strings/ctype-tis620.c
  15. 179
      strings/ctype-uca-scanner_next.inl
  16. 38
      strings/ctype-uca.c
  17. 276
      strings/ctype-uca.ic
  18. 16
      strings/ctype-ucs2.c
  19. 4
      strings/ctype-ujis.c
  20. 11
      strings/ctype-utf8.c
  21. 1
      strings/ctype-win1250ch.c
  22. 29
      strings/ctype.c
  23. 50
      strings/strcoll.ic
  24. 10
      strings/strings_def.h
  25. 508
      unittest/strings/strings-t.c

54
include/m_ctype.h

@ -330,6 +330,60 @@ struct my_collation_handler_st
const uchar *, size_t, const uchar *, size_t, my_bool);
int (*strnncollsp)(CHARSET_INFO *,
const uchar *, size_t, const uchar *, size_t);
/*
strnncollsp_nchars() - similar to strnncollsp() but assumes that both
strings were originally CHAR(N) values with the
same N, then were optionally space-padded,
or optionally space-trimmed.
In other words, this function compares in the way
if we insert both values into a CHAR(N) column
and then compare the two column values.
It compares the same amount of characters from the two strings.
This is especially important for NOPAD collations.
If CHAR_LENGTH of the two strings are different,
the shorter string is virtually padded with trailing spaces
up to CHAR_LENGTH of the longer string, to guarantee that the
same amount of characters are compared.
This is important if the two CHAR(N) strings are space-trimmed
(e.g. like in InnoDB compact format for CHAR).
The function compares not more than "nchars" characters only.
This can be useful to compare CHAR(N) space-padded strings
(when the exact N is known) without having to truncate them before
the comparison.
For example, Field_string stores a "CHAR(3) CHARACTER SET utf8mb4" value
of "aaa" as 12 bytes in a record buffer:
- 3 bytes of the actual data, followed by
- 9 bytes of spaces (just fillers, not real data)
The caller can pass nchars=3 to compare CHAR(3) record values.
In such case, the comparator won't go inside the 9 bytes of the fillers.
If N is not known, the caller can pass max(len1,len2) as the "nchars" value
(i.e. the maximum of the OCTET_LENGTH of the two strings).
Notes on complex collations.
This function counts contraction parts as individual characters.
For example, the Czech letter 'ch' (in Czech collations)
is ordinarily counted by the "nchars" limit as TWO characters
(although it is only one letter).
This corresponds to what CHAR(N) does in INSERT.
If the "nchars" limit tears apart a contraction, only the part fitting
into "nchars" characters is used. For example, in case of a Czech collation,
the string "ach" with nchars=2 is compared as 'ac': the contraction
'ch' is torn apart and the letter 'c' acts as an individual character.
This emulates the same comparison result with the scenario when we insert
'ach' into a CHAR(2) column and then compare it.
*/
int (*strnncollsp_nchars)(CHARSET_INFO *,
const uchar *str1, size_t len1,
const uchar *str2, size_t len2,
size_t nchars);
size_t (*strnxfrm)(CHARSET_INFO *,
uchar *dst, size_t dstlen, uint nweights,
const uchar *src, size_t srclen, uint flags);

47
sql/field.cc

@ -7433,23 +7433,10 @@ Field_string::compatible_field_size(uint field_metadata,
int Field_string::cmp(const uchar *a_ptr, const uchar *b_ptr)
{
size_t a_len, b_len;
if (field_charset->mbmaxlen != 1)
{
size_t char_len= field_length/field_charset->mbmaxlen;
a_len= my_charpos(field_charset, a_ptr, a_ptr + field_length, char_len);
b_len= my_charpos(field_charset, b_ptr, b_ptr + field_length, char_len);
}
else
a_len= b_len= field_length;
/*
We have to remove end space to be able to compare multi-byte-characters
like in latin_de 'ae' and 0xe4
*/
return field_charset->coll->strnncollsp(field_charset,
a_ptr, a_len,
b_ptr, b_len);
return field_charset->coll->strnncollsp_nchars(field_charset,
a_ptr, field_length,
b_ptr, field_length,
Field_string::char_length());
}
@ -7848,19 +7835,6 @@ int Field_varstring::cmp(const uchar *a_ptr, const uchar *b_ptr)
}
static int cmp_str_prefix(const uchar *ua, size_t alen, const uchar *ub,
size_t blen, size_t prefix, CHARSET_INFO *cs)
{
const char *a= (char*)ua, *b= (char*)ub;
MY_STRCOPY_STATUS status;
prefix/= cs->mbmaxlen;
alen= cs->cset->well_formed_char_length(cs, a, a + alen, prefix, &status);
blen= cs->cset->well_formed_char_length(cs, b, b + blen, prefix, &status);
return cs->coll->strnncollsp(cs, ua, alen, ub, blen);
}
int Field_varstring::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr,
size_t prefix_len)
{
@ -7880,8 +7854,12 @@ int Field_varstring::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr,
a_length= uint2korr(a_ptr);
b_length= uint2korr(b_ptr);
}
return cmp_str_prefix(a_ptr+length_bytes, a_length, b_ptr+length_bytes,
b_length, prefix_len, field_charset);
return field_charset->coll->strnncollsp_nchars(field_charset,
a_ptr + length_bytes,
a_length,
b_ptr + length_bytes,
b_length,
prefix_len / field_charset->mbmaxlen);
}
@ -8659,7 +8637,10 @@ int Field_blob::cmp_prefix(const uchar *a_ptr, const uchar *b_ptr,
memcpy(&blob1, a_ptr+packlength, sizeof(char*));
memcpy(&blob2, b_ptr+packlength, sizeof(char*));
size_t a_len= get_length(a_ptr), b_len= get_length(b_ptr);
return cmp_str_prefix(blob1, a_len, blob2, b_len, prefix_len, field_charset);
return field_charset->coll->strnncollsp_nchars(field_charset,
blob1, a_len,
blob2, b_len,
prefix_len / field_charset->mbmaxlen);
}

4
strings/ctype-big5.c

@ -6711,6 +6711,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_ci=
NULL, /* init */
my_strnncoll_big5_chinese_ci,
my_strnncollsp_big5_chinese_ci,
my_strnncollsp_nchars_big5_chinese_ci,
my_strnxfrm_big5_chinese_ci,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -6727,6 +6728,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_bin=
NULL, /* init */
my_strnncoll_big5_bin,
my_strnncollsp_big5_bin,
my_strnncollsp_nchars_big5_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -6743,6 +6745,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_chinese_nopad_ci=
NULL, /* init */
my_strnncoll_big5_chinese_ci,
my_strnncollsp_big5_chinese_nopad_ci,
my_strnncollsp_nchars_big5_chinese_nopad_ci,
my_strnxfrm_big5_chinese_nopad_ci,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -6759,6 +6762,7 @@ static MY_COLLATION_HANDLER my_collation_handler_big5_nopad_bin=
NULL, /* init */
my_strnncoll_big5_bin,
my_strnncollsp_big5_nopad_bin,
my_strnncollsp_nchars_big5_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,

25
strings/ctype-bin.c

@ -125,6 +125,17 @@ static int my_strnncollsp_binary(CHARSET_INFO * cs __attribute__((unused)),
}
static int my_strnncollsp_nchars_binary(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
size_t nchars)
{
set_if_smaller(slen, nchars);
set_if_smaller(tlen, nchars);
return my_strnncoll_binary(cs, s, slen, t, tlen, 0);
}
static int my_strnncoll_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
@ -199,6 +210,17 @@ static int my_strnncollsp_8bit_bin(CHARSET_INFO * cs __attribute__((unused)),
}
static int my_strnncollsp_nchars_8bit_bin(CHARSET_INFO * cs,
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
size_t nchars)
{
set_if_smaller(a_length, nchars);
set_if_smaller(b_length, nchars);
return my_strnncollsp_8bit_bin(cs, a, a_length, b, b_length);
}
static int my_strnncollsp_8bit_nopad_bin(CHARSET_INFO * cs
__attribute__((unused)),
const uchar *a, size_t a_length,
@ -487,6 +509,7 @@ MY_COLLATION_HANDLER my_collation_8bit_bin_handler =
my_coll_init_8bit_bin,
my_strnncoll_8bit_bin,
my_strnncollsp_8bit_bin,
my_strnncollsp_nchars_8bit_bin,
my_strnxfrm_8bit_bin,
my_strnxfrmlen_simple,
my_like_range_simple,
@ -503,6 +526,7 @@ MY_COLLATION_HANDLER my_collation_8bit_nopad_bin_handler =
my_coll_init_8bit_bin,
my_strnncoll_8bit_bin,
my_strnncollsp_8bit_nopad_bin,
my_strnncollsp_nchars_8bit_bin,
my_strnxfrm_8bit_nopad_bin,
my_strnxfrmlen_simple,
my_like_range_simple,
@ -519,6 +543,7 @@ static MY_COLLATION_HANDLER my_collation_binary_handler =
NULL, /* init */
my_strnncoll_binary,
my_strnncollsp_binary,
my_strnncollsp_nchars_binary,
my_strnxfrm_8bit_bin,
my_strnxfrmlen_simple,
my_like_range_simple,

4
strings/ctype-cp932.c

@ -34667,6 +34667,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_ci=
NULL, /* init */
my_strnncoll_cp932_japanese_ci,
my_strnncollsp_cp932_japanese_ci,
my_strnncollsp_nchars_cp932_japanese_ci,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -34683,6 +34684,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_bin=
NULL, /* init */
my_strnncoll_cp932_bin,
my_strnncollsp_cp932_bin,
my_strnncollsp_nchars_cp932_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -34699,6 +34701,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_japanese_nopad_ci=
NULL, /* init */
my_strnncoll_cp932_japanese_ci,
my_strnncollsp_cp932_japanese_nopad_ci,
my_strnncollsp_nchars_cp932_japanese_nopad_ci,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -34715,6 +34718,7 @@ static MY_COLLATION_HANDLER my_collation_handler_cp932_nopad_bin=
NULL, /* init */
my_strnncoll_cp932_bin,
my_strnncollsp_cp932_nopad_bin,
my_strnncollsp_nchars_cp932_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,

1
strings/ctype-czech.c

@ -610,6 +610,7 @@ static MY_COLLATION_HANDLER my_collation_latin2_czech_ci_handler =
NULL, /* init */
my_strnncoll_czech,
my_strnncollsp_czech,
my_strnncollsp_nchars_generic_8bit,
my_strnxfrm_czech,
my_strnxfrmlen_czech,
my_like_range_czech,

4
strings/ctype-euc_kr.c

@ -9957,6 +9957,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_ci=
NULL, /* init */
my_strnncoll_euckr_korean_ci,
my_strnncollsp_euckr_korean_ci,
my_strnncollsp_nchars_euckr_korean_ci,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -9973,6 +9974,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_bin=
NULL, /* init */
my_strnncoll_euckr_bin,
my_strnncollsp_euckr_bin,
my_strnncollsp_nchars_euckr_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -9989,6 +9991,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_korean_nopad_ci=
NULL, /* init */
my_strnncoll_euckr_korean_ci,
my_strnncollsp_euckr_korean_nopad_ci,
my_strnncollsp_nchars_euckr_korean_nopad_ci,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -10005,6 +10008,7 @@ static MY_COLLATION_HANDLER my_collation_handler_euckr_nopad_bin=
NULL, /* init */
my_strnncoll_euckr_bin,
my_strnncollsp_euckr_nopad_bin,
my_strnncollsp_nchars_euckr_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,

4
strings/ctype-eucjpms.c

@ -67495,6 +67495,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_japanese_ci_handler =
NULL, /* init */
my_strnncoll_eucjpms_japanese_ci,
my_strnncollsp_eucjpms_japanese_ci,
my_strnncollsp_nchars_eucjpms_japanese_ci,
my_strnxfrm_mb, /* strnxfrm */
my_strnxfrmlen_simple,
my_like_range_mb, /* like_range */
@ -67511,6 +67512,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_bin_handler =
NULL, /* init */
my_strnncoll_eucjpms_bin,
my_strnncollsp_eucjpms_bin,
my_strnncollsp_nchars_eucjpms_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -67527,6 +67529,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_japanese_nopad_ci_handler =
NULL, /* init */
my_strnncoll_eucjpms_japanese_ci,
my_strnncollsp_eucjpms_japanese_nopad_ci,
my_strnncollsp_nchars_eucjpms_japanese_nopad_ci,
my_strnxfrm_mb_nopad, /* strnxfrm */
my_strnxfrmlen_simple,
my_like_range_mb, /* like_range */
@ -67543,6 +67546,7 @@ static MY_COLLATION_HANDLER my_collation_eucjpms_nopad_bin_handler =
NULL, /* init */
my_strnncoll_eucjpms_bin,
my_strnncollsp_eucjpms_nopad_bin,
my_strnncollsp_nchars_eucjpms_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,

4
strings/ctype-gb2312.c

@ -6362,6 +6362,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_ci=
NULL, /* init */
my_strnncoll_gb2312_chinese_ci,
my_strnncollsp_gb2312_chinese_ci,
my_strnncollsp_nchars_gb2312_chinese_ci,
my_strnxfrm_mb, /* strnxfrm */
my_strnxfrmlen_simple,
my_like_range_mb, /* like_range */
@ -6378,6 +6379,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_bin=
NULL, /* init */
my_strnncoll_gb2312_bin,
my_strnncollsp_gb2312_bin,
my_strnncollsp_nchars_gb2312_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -6394,6 +6396,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_chinese_nopad_ci=
NULL, /* init */
my_strnncoll_gb2312_chinese_ci,
my_strnncollsp_gb2312_chinese_nopad_ci,
my_strnncollsp_nchars_gb2312_chinese_nopad_ci,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -6410,6 +6413,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gb2312_nopad_bin=
NULL, /* init */
my_strnncoll_gb2312_bin,
my_strnncollsp_gb2312_nopad_bin,
my_strnncollsp_nchars_gb2312_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,

4
strings/ctype-gbk.c

@ -10645,6 +10645,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_ci=
NULL, /* init */
my_strnncoll_gbk_chinese_ci,
my_strnncollsp_gbk_chinese_ci,
my_strnncollsp_nchars_gbk_chinese_ci,
my_strnxfrm_gbk_chinese_ci,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -10661,6 +10662,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_bin=
NULL, /* init */
my_strnncoll_gbk_bin,
my_strnncollsp_gbk_bin,
my_strnncollsp_nchars_gbk_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -10677,6 +10679,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_chinese_nopad_ci=
NULL, /* init */
my_strnncoll_gbk_chinese_ci,
my_strnncollsp_gbk_chinese_nopad_ci,
my_strnncollsp_nchars_gbk_chinese_nopad_ci,
my_strnxfrm_gbk_chinese_nopad_ci,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -10693,6 +10696,7 @@ static MY_COLLATION_HANDLER my_collation_handler_gbk_nopad_bin=
NULL, /* init */
my_strnncoll_gbk_bin,
my_strnncollsp_gbk_nopad_bin,
my_strnncollsp_nchars_gbk_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,

1
strings/ctype-latin1.c

@ -726,6 +726,7 @@ static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
NULL, /* init */
my_strnncoll_latin1_de,
my_strnncollsp_latin1_de,
my_strnncollsp_nchars_generic_8bit,
my_strnxfrm_latin1_de,
my_strnxfrmlen_simple,
my_like_range_simple,

14
strings/ctype-simple.c

@ -208,6 +208,18 @@ int my_strnncollsp_simple(CHARSET_INFO * cs, const uchar *a, size_t a_length,
}
static int
my_strnncollsp_nchars_simple(CHARSET_INFO * cs,
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
size_t nchars)
{
set_if_smaller(a_length, nchars);
set_if_smaller(b_length, nchars);
return my_strnncollsp_simple(cs, a, a_length, b, b_length);
}
int my_strnncollsp_simple_nopad(CHARSET_INFO * cs,
const uchar *a, size_t a_length,
const uchar *b, size_t b_length)
@ -2096,6 +2108,7 @@ MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler =
my_coll_init_simple, /* init */
my_strnncoll_simple,
my_strnncollsp_simple,
my_strnncollsp_nchars_simple,
my_strnxfrm_simple,
my_strnxfrmlen_simple,
my_like_range_simple,
@ -2112,6 +2125,7 @@ MY_COLLATION_HANDLER my_collation_8bit_simple_nopad_ci_handler =
my_coll_init_simple, /* init */
my_strnncoll_simple,
my_strnncollsp_simple_nopad,
my_strnncollsp_nchars_simple,
my_strnxfrm_simple_nopad,
my_strnxfrmlen_simple,
my_like_range_simple,

4
strings/ctype-sjis.c

@ -34046,6 +34046,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_ci=
NULL, /* init */
my_strnncoll_sjis_japanese_ci,
my_strnncollsp_sjis_japanese_ci,
my_strnncollsp_nchars_sjis_japanese_ci,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -34062,6 +34063,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_bin=
NULL, /* init */
my_strnncoll_sjis_bin,
my_strnncollsp_sjis_bin,
my_strnncollsp_nchars_sjis_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -34078,6 +34080,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_japanese_nopad_ci=
NULL, /* init */
my_strnncoll_sjis_japanese_ci,
my_strnncollsp_sjis_japanese_nopad_ci,
my_strnncollsp_nchars_sjis_japanese_nopad_ci,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -34094,6 +34097,7 @@ static MY_COLLATION_HANDLER my_collation_handler_sjis_nopad_bin=
NULL, /* init */
my_strnncoll_sjis_bin,
my_strnncollsp_sjis_nopad_bin,
my_strnncollsp_nchars_sjis_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,

2
strings/ctype-tis620.c

@ -852,6 +852,7 @@ static MY_COLLATION_HANDLER my_collation_ci_handler =
NULL, /* init */
my_strnncoll_tis620,
my_strnncollsp_tis620,
my_strnncollsp_nchars_generic_8bit,
my_strnxfrm_tis620,
my_strnxfrmlen_simple,
my_like_range_simple,
@ -867,6 +868,7 @@ static MY_COLLATION_HANDLER my_collation_nopad_ci_handler =
NULL, /* init */
my_strnncoll_tis620,
my_strnncollsp_tis620_nopad,
my_strnncollsp_nchars_generic_8bit,
my_strnxfrm_tis620_nopad,
my_strnxfrmlen_simple,
my_like_range_simple,

179
strings/ctype-uca-scanner_next.inl

@ -0,0 +1,179 @@
/* Copyright (c) 2004, 2013, Oracle and/or its affiliates.
Copyright (c) 2009, 2021, MariaDB
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public
License as published by the Free Software Foundation; version 2
of the License.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with this library; if not, write to the Free
Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
MA 02110-1335 USA */
#ifdef SCANNER_NEXT_NCHARS
#define SCANNER_NEXT_RETURN(_w,_n) \
do { weight_and_nchars_t rc= {_w, _n}; return rc; } while(0)
#define SCANNER_NEXT_RETURN_CONTRACTION(_cnt,_ignorable_nchars) \
do { \
weight_and_nchars_t rc= { _cnt->weight[0], \
_ignorable_nchars + \
my_contraction_char_length(_cnt) }; \
return rc; \
} while(0)
#else
#define SCANNER_NEXT_RETURN(_w,_n) do { return _w; } while (0)
#define SCANNER_NEXT_RETURN_CONTRACTION(_cnt,_ignorable_nchars) \
do { return _cnt->weight[0]; } while(0)
#endif
static inline
#ifdef SCANNER_NEXT_NCHARS
weight_and_nchars_t
MY_FUNCTION_NAME(scanner_next_with_nchars)(my_uca_scanner *scanner,
size_t nchars)
#else
int
MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
#endif
{
#ifdef SCANNER_NEXT_NCHARS
uint ignorable_nchars;
#define LOCAL_MAX_CONTRACTION_LENGTH nchars
#else
#define LOCAL_MAX_CONTRACTION_LENGTH MY_UCA_MAX_CONTRACTION
#endif
/*
Check if the weights for the previous character have been
already fully scanned. If yes, then get the next character and
initialize wbeg and wlength to its weight string.
*/
if (scanner->wbeg[0])
{
/*
More weights left from the previous step.
Return the next weight from the current expansion.
Return "0" as "nchars". The real nchars was set on a previous
iteration.
*/
SCANNER_NEXT_RETURN(*scanner->wbeg++, 0);
}
#ifdef SCANNER_NEXT_NCHARS
for (ignorable_nchars= 0 ; ; ignorable_nchars++)
#else
for ( ; ; )
#endif
{
const uint16 *wpage;
my_wc_t wc[MY_UCA_MAX_CONTRACTION];
int mblen;
/* Get next character */
#if MY_UCA_ASCII_OPTIMIZE
/* Get next ASCII character */
if (scanner->sbeg < scanner->send && scanner->sbeg[0] < 0x80)
{
wc[0]= scanner->sbeg[0];
scanner->sbeg+= 1;
#if MY_UCA_COMPILE_CONTRACTIONS
if (my_uca_needs_context_handling(scanner->level, wc[0]))
{
const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc,
LOCAL_MAX_CONTRACTION_LENGTH);
if (cnt)
SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars);
}
#endif
scanner->page= 0;
scanner->code= (int) wc[0];
scanner->wbeg= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0];
if (scanner->wbeg[0])
SCANNER_NEXT_RETURN(*scanner->wbeg++, ignorable_nchars + 1);
continue;
}
else
#endif
/* Get next MB character */
if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg,
scanner->send)) <= 0))
{
if (scanner->sbeg >= scanner->send)
{
/* No more bytes, end of line reached */
SCANNER_NEXT_RETURN(-1, ignorable_nchars);
}
/*
There are some more bytes left. Non-positive mb_len means that
we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
*/
if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
{
/* For safety purposes don't go beyond the string range. */
scanner->sbeg= scanner->send;
}
/*
Treat every complete or incomplete mbminlen unit as a weight which is
greater than weight for any possible normal character.
0xFFFF is greater than any possible weight in the UCA weight table.
*/
SCANNER_NEXT_RETURN(0xFFFF, ignorable_nchars + 1);
}
scanner->sbeg+= mblen;
if (wc[0] > scanner->level->maxchar)
{
/* Return 0xFFFD as weight for all characters outside BMP */
scanner->wbeg= nochar;
SCANNER_NEXT_RETURN(0xFFFD, ignorable_nchars + 1);
}
#if MY_UCA_COMPILE_CONTRACTIONS
if (my_uca_needs_context_handling(scanner->level, wc[0]))
{
const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc,
LOCAL_MAX_CONTRACTION_LENGTH);
if (cnt)
SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars);
}
#endif
/* Process single character */
scanner->page= wc[0] >> 8;
scanner->code= wc[0] & 0xFF;
/* If weight page for w[0] does not exist, then calculate algoritmically */
if (!(wpage= scanner->level->weights[scanner->page]))
SCANNER_NEXT_RETURN(my_uca_scanner_next_implicit(scanner),
ignorable_nchars + 1);
/* Calculate pointer to w[0]'s weight, using page and offset */
scanner->wbeg= wpage +
scanner->code * scanner->level->lengths[scanner->page];
if (scanner->wbeg[0])
break;
/* Skip ignorable character and continue the loop */
}
SCANNER_NEXT_RETURN(*scanner->wbeg++, ignorable_nchars + 1);
}
#undef SCANNER_NEXT_NCHARS
#undef SCANNER_NEXT_RETURN
#undef SCANNER_NEXT_RETURN_CONTRACTION
#undef LOCAL_MAX_CONTRACTION_LENGTH

38
strings/ctype-uca.c

@ -35,6 +35,12 @@
#include "strings_def.h"
#include <m_ctype.h>
typedef struct
{
int weight;
uint nchars;
} weight_and_nchars_t;
#define MY_CS_COMMON_UCA_FLAGS (MY_CS_COMPILED|MY_CS_STRNXFRM|MY_CS_UNICODE|MY_CS_NON1TO1)
#define MY_UCA_CNT_FLAG_SIZE 4096
@ -31450,6 +31456,21 @@ my_wmemcmp(my_wc_t *a, my_wc_t *b, size_t len)
}
/*
Return the number of characters in a contraction.
*/
static inline uint my_contraction_char_length(const MY_CONTRACTION *cnt)
{
uint i;
for (i= 2; i < array_elements(cnt->ch); i++)
{
if (cnt->ch[i] == 0)
return i;
}
return array_elements(cnt->ch);
}
/**
Check if a string is a contraction,
and return its weight array on success.
@ -31487,8 +31508,9 @@ my_uca_contraction_find(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len)
a contraction part. Then try to find real contraction among the
candidates, starting from the longest.
@param scanner Pointer to UCA scanner
@param[OUT] *wc Where to store the scanned string
@param scanner Pointer to UCA scanner
@param[OUT] *wc Where to store the scanned string
@param max_char_length The longest contraction character length allowed
@return Weight array
@retval NULL - no contraction found
@ -31496,7 +31518,8 @@ my_uca_contraction_find(const MY_CONTRACTIONS *list, my_wc_t *wc, size_t len)
*/
static const MY_CONTRACTION *
my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc)
my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc,
size_t max_char_length)
{
size_t clen= 1;
int flag;
@ -31505,7 +31528,7 @@ my_uca_scanner_contraction_find(my_uca_scanner *scanner, my_wc_t *wc)
/* Scan all contraction candidates */
for (s= scanner->sbeg, flag= MY_UCA_CNT_MID1;
clen < MY_UCA_MAX_CONTRACTION;
clen < max_char_length;
flag<<= 1)
{
int mblen;
@ -31582,11 +31605,14 @@ my_uca_previous_context_find(my_uca_scanner *scanner,
If wc[0] and the previous character make a previous context
pair, then wc[1] is set to the previous character.
@param max_char_length - the longest contraction character length allowed.
@retval NULL if could not find any contextual weights for wc[0]
@retval non null pointer - the address of MY_CONTRACTION found
*/
static inline const MY_CONTRACTION *
my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc)
my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc,
size_t max_char_length)
{
const MY_CONTRACTION *cnt;
DBUG_ASSERT(scanner->level->contractions.nitems);
@ -31614,7 +31640,7 @@ my_uca_context_weight_find(my_uca_scanner *scanner, my_wc_t *wc)
wc[0]))
{
/* Check if w[0] starts a contraction */
if ((cnt= my_uca_scanner_contraction_find(scanner, wc)))
if ((cnt= my_uca_scanner_contraction_find(scanner, wc, max_char_length)))
return cnt;
}
return NULL;

276
strings/ctype-uca.ic

@ -35,108 +35,9 @@
#error MY_UCA_COLL_INIT is not defined
#endif
static inline int
MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner)
{
/*
Check if the weights for the previous character have been
already fully scanned. If yes, then get the next character and
initialize wbeg and wlength to its weight string.
*/
if (scanner->wbeg[0]) /* More weights left from the previous step: */
return *scanner->wbeg++; /* return the next weight from expansion */
do
{
const uint16 *wpage;
my_wc_t wc[MY_UCA_MAX_CONTRACTION];
int mblen;
/* Get next character */
#if MY_UCA_ASCII_OPTIMIZE
/* Get next ASCII character */
if (scanner->sbeg < scanner->send && scanner->sbeg[0] < 0x80)
{
wc[0]= scanner->sbeg[0];
scanner->sbeg+= 1;
#if MY_UCA_COMPILE_CONTRACTIONS
if (my_uca_needs_context_handling(scanner->level, wc[0]))
{
const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc);
if (cnt)
return cnt->weight[0];
}
#endif
scanner->page= 0;
scanner->code= (int) wc[0];
scanner->wbeg= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0];
if (scanner->wbeg[0])
return *scanner->wbeg++;
continue;
}
else
#endif
/* Get next MB character */
if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg,
scanner->send)) <= 0))
{
if (scanner->sbeg >= scanner->send)
return -1; /* No more bytes, end of line reached */
/*
There are some more bytes left. Non-positive mb_len means that
we got an incomplete or a bad byte sequence. Consume mbminlen bytes.
*/
if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send)
{
/* For safety purposes don't go beyond the string range. */
scanner->sbeg= scanner->send;
}
/*
Treat every complete or incomplete mbminlen unit as a weight which is
greater than weight for any possible normal character.
0xFFFF is greater than any possible weight in the UCA weight table.
*/
return 0xFFFF;
}
scanner->sbeg+= mblen;
if (wc[0] > scanner->level->maxchar)
{
/* Return 0xFFFD as weight for all characters outside BMP */
scanner->wbeg= nochar;
return 0xFFFD;
}
#if MY_UCA_COMPILE_CONTRACTIONS
if (my_uca_needs_context_handling(scanner->level, wc[0]))
{
const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc);
if (cnt)
return cnt->weight[0];
}
#endif
/* Process single character */
scanner->page= wc[0] >> 8;
scanner->code= wc[0] & 0xFF;
/* If weight page for w[0] does not exist, then calculate algoritmically */
if (!(wpage= scanner->level->weights[scanner->page]))
return my_uca_scanner_next_implicit(scanner);
/* Calculate pointer to w[0]'s weight, using page and offset */
scanner->wbeg= wpage +
scanner->code * scanner->level->lengths[scanner->page];
} while (!scanner->wbeg[0]); /* Skip ignorable characters */
return *scanner->wbeg++;
}
#include "ctype-uca-scanner_next.inl"
#define SCANNER_NEXT_NCHARS
#include "ctype-uca-scanner_next.inl"
/*
Compares two strings according to the collation
@ -409,6 +310,173 @@ MY_FUNCTION_NAME(strnncollsp_nopad_multilevel)(CHARSET_INFO *cs,
}
/*
Scan the next weight and perform space padding
or trimming according to "nchars".
*/
static inline weight_and_nchars_t
MY_FUNCTION_NAME(scanner_next_pad_trim)(my_uca_scanner *scanner,
size_t nchars,
uint *generated)
{
weight_and_nchars_t res;
if (nchars > 0 ||
scanner->wbeg[0] /* Some weights from a previous expansion left */)
{
if ((res= MY_FUNCTION_NAME(scanner_next_with_nchars)(scanner,
nchars)).weight < 0)
{
/*
We reached the end of the string, but the caller wants more weights.
Perform space padding.
*/
res.weight= my_space_weight(scanner->level);
res.nchars= 1;
(*generated)++;
}
else if (res.nchars > nchars)
{
/*
We scanned the next collation element, but it does not fit into
the "nchars" limit. This is possible in case of:
- A contraction, e.g. Czech 'ch' with nchars=1
- A sequence of ignorable characters followed by non-ignorable ones,
e.g. CONCAT(x'00','a') with nchars=1.
Perform trimming.
*/
res.weight= scanner->cs->state & MY_CS_NOPAD ?
0 : my_space_weight(scanner->level);
res.nchars= (uint) nchars;
(*generated)++;
}
}
else
{
/* The caller wants nchars==0. Perform trimming. */
res.weight= scanner->cs->state & MY_CS_NOPAD ?
0 : my_space_weight(scanner->level);
res.nchars= 0;
(*generated)++;
}
return res;
}
static int
MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(CHARSET_INFO *cs,
const MY_UCA_WEIGHT_LEVEL *level,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
size_t nchars)
{
my_uca_scanner sscanner;
my_uca_scanner tscanner;
size_t s_nchars_left= nchars;
size_t t_nchars_left= nchars;
my_uca_scanner_init_any(&sscanner, cs, level, s, slen);
my_uca_scanner_init_any(&tscanner, cs, level, t, tlen);
for ( ; ; )
{
weight_and_nchars_t s_res;
weight_and_nchars_t t_res;
uint generated= 0;
int diff;
s_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&sscanner, s_nchars_left,
&generated);
t_res= MY_FUNCTION_NAME(scanner_next_pad_trim)(&tscanner, t_nchars_left,
&generated);
if ((diff= (s_res.weight - t_res.weight)))
return diff;
if (generated == 2)
{
if (cs->state & MY_CS_NOPAD)
{
/*
Both values are auto-generated. There's no real data any more.
We need to handle the remaining virtual trailing spaces.
The two strings still have s_nchars_left and t_nchars_left imaginary
trailing spaces at the end. If s_nchars_left != t_nchars_left,
the strings will be not equal in case of a NOPAD collation.
Example:
"B" is German "U+00DF LATIN SMALL LETTER SHARP S"
When we have these values in a
CHAR(3) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_nopad_ci
column:
'B ' (one character, two trailing spaces)
'ss ' (two characters, one trailing space)
The 'B ' is greater than the 'ss '.
They are compared in the following steps:
1. 'B' == 'ss'
2. ' ' == ' '
3. ' ' > ''
We need to emulate the same behavior in this function even if
it's called with strings 'B' and 'ss' (with space trimmed).
The side which has more remaining virtual spaces at the end
is greater.
*/
if (s_nchars_left < t_nchars_left)
return -1;
if (s_nchars_left > t_nchars_left)
return +1;
}
return 0;
}
DBUG_ASSERT(s_nchars_left >= s_res.nchars);
DBUG_ASSERT(t_nchars_left >= t_res.nchars);
s_nchars_left-= s_res.nchars;
t_nchars_left-= t_res.nchars;
}
return 0;
}
/*
One-level collations.
*/
static int
MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
size_t nchars)
{
return MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs, &cs->uca->level[0],
s, slen, t, tlen,
nchars);
}
/*
Multi-level collations.
*/
static int
MY_FUNCTION_NAME(strnncollsp_nchars_multilevel)(CHARSET_INFO *cs,
const uchar *s, size_t slen,
const uchar *t, size_t tlen,
size_t nchars)
{
uint num_level= cs->levels_for_order;
uint i;
for (i= 0; i != num_level; i++)
{
int ret= MY_FUNCTION_NAME(strnncollsp_nchars_onelevel)(cs,
&cs->uca->level[i],
s, slen,
t, tlen,
nchars);
if (ret)
return ret;
}
return 0;
}
/*
Calculates hash value for the given string,
@ -752,6 +820,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler)=
MY_UCA_COLL_INIT,
MY_FUNCTION_NAME(strnncoll),
MY_FUNCTION_NAME(strnncollsp),
MY_FUNCTION_NAME(strnncollsp_nchars),
MY_FUNCTION_NAME(strnxfrm),
my_strnxfrmlen_any_uca,
MY_LIKE_RANGE,
@ -773,6 +842,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad)=
MY_UCA_COLL_INIT,
MY_FUNCTION_NAME(strnncoll),
MY_FUNCTION_NAME(strnncollsp_nopad),
MY_FUNCTION_NAME(strnncollsp_nchars),
MY_FUNCTION_NAME(strnxfrm_nopad),
my_strnxfrmlen_any_uca,
MY_LIKE_RANGE, /* my_like_range_mb or my_like_range_generic */
@ -792,6 +862,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_multilevel)=
MY_UCA_COLL_INIT,
MY_FUNCTION_NAME(strnncoll_multilevel),
MY_FUNCTION_NAME(strnncollsp_multilevel),
MY_FUNCTION_NAME(strnncollsp_nchars_multilevel),
MY_FUNCTION_NAME(strnxfrm_multilevel),
my_strnxfrmlen_any_uca_multilevel,
MY_LIKE_RANGE,
@ -811,6 +882,7 @@ MY_COLLATION_HANDLER MY_FUNCTION_NAME(collation_handler_nopad_multilevel)=
MY_UCA_COLL_INIT,
MY_FUNCTION_NAME(strnncoll_multilevel),
MY_FUNCTION_NAME(strnncollsp_nopad_multilevel),
MY_FUNCTION_NAME(strnncollsp_nchars_multilevel),
MY_FUNCTION_NAME(strnxfrm_multilevel),
my_strnxfrmlen_any_uca_multilevel,
MY_LIKE_RANGE,

16
strings/ctype-ucs2.c

@ -1505,6 +1505,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_ci_handler =
NULL, /* init */
my_strnncoll_utf16_general_ci,
my_strnncollsp_utf16_general_ci,
my_strnncollsp_nchars_utf16_general_ci,
my_strnxfrm_utf16_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@ -1521,6 +1522,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_bin_handler =
NULL, /* init */
my_strnncoll_utf16_bin,
my_strnncollsp_utf16_bin,
my_strnncollsp_nchars_utf16_bin,
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
@ -1537,6 +1539,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_general_nopad_ci_handler =
NULL, /* init */
my_strnncoll_utf16_general_ci,
my_strnncollsp_utf16_general_nopad_ci,
my_strnncollsp_nchars_utf16_general_nopad_ci,
my_strnxfrm_nopad_utf16_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@ -1553,6 +1556,7 @@ static MY_COLLATION_HANDLER my_collation_utf16_nopad_bin_handler =
NULL, /* init */
my_strnncoll_utf16_bin,
my_strnncollsp_utf16_nopad_bin,
my_strnncollsp_nchars_utf16_nopad_bin,
my_strnxfrm_unicode_full_nopad_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
@ -1845,6 +1849,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_ci_handler =
NULL, /* init */
my_strnncoll_utf16le_general_ci,
my_strnncollsp_utf16le_general_ci,
my_strnncollsp_nchars_utf16le_general_ci,
my_strnxfrm_utf16le_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@ -1861,6 +1866,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_bin_handler =
NULL, /* init */
my_strnncoll_utf16le_bin,
my_strnncollsp_utf16le_bin,
my_strnncollsp_nchars_utf16le_bin,
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
@ -1877,6 +1883,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_general_nopad_ci_handler =
NULL, /* init */
my_strnncoll_utf16le_general_ci,
my_strnncollsp_utf16le_general_nopad_ci,
my_strnncollsp_nchars_utf16le_general_nopad_ci,
my_strnxfrm_nopad_utf16le_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@ -1893,6 +1900,7 @@ static MY_COLLATION_HANDLER my_collation_utf16le_nopad_bin_handler =
NULL, /* init */
my_strnncoll_utf16le_bin,
my_strnncollsp_utf16le_nopad_bin,
my_strnncollsp_nchars_utf16le_nopad_bin,
my_strnxfrm_unicode_full_nopad_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
@ -2671,6 +2679,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_ci_handler =
NULL, /* init */
my_strnncoll_utf32_general_ci,
my_strnncollsp_utf32_general_ci,
my_strnncollsp_nchars_utf32_general_ci,
my_strnxfrm_utf32_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@ -2687,6 +2696,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_bin_handler =
NULL, /* init */
my_strnncoll_utf32_bin,
my_strnncollsp_utf32_bin,
my_strnncollsp_nchars_utf32_bin,
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
@ -2703,6 +2713,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_general_nopad_ci_handler =
NULL, /* init */
my_strnncoll_utf32_general_ci,
my_strnncollsp_utf32_general_nopad_ci,
my_strnncollsp_nchars_utf32_general_nopad_ci,
my_strnxfrm_nopad_utf32_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@ -2719,6 +2730,7 @@ static MY_COLLATION_HANDLER my_collation_utf32_nopad_bin_handler =
NULL, /* init */
my_strnncoll_utf32_bin,
my_strnncollsp_utf32_nopad_bin,
my_strnncollsp_nchars_utf32_nopad_bin,
my_strnxfrm_unicode_full_nopad_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_generic,
@ -3261,6 +3273,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_ci_handler =
NULL, /* init */
my_strnncoll_ucs2_general_ci,
my_strnncollsp_ucs2_general_ci,
my_strnncollsp_nchars_ucs2_general_ci,
my_strnxfrm_ucs2_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@ -3277,6 +3290,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_bin_handler =
NULL, /* init */
my_strnncoll_ucs2_bin,
my_strnncollsp_ucs2_bin,
my_strnncollsp_nchars_ucs2_bin,
my_strnxfrm_ucs2_bin,
my_strnxfrmlen_unicode,
my_like_range_generic,
@ -3293,6 +3307,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_general_nopad_ci_handler =
NULL, /* init */
my_strnncoll_ucs2_general_ci,
my_strnncollsp_ucs2_general_nopad_ci,
my_strnncollsp_nchars_ucs2_general_nopad_ci,
my_strnxfrm_nopad_ucs2_general_ci,
my_strnxfrmlen_unicode,
my_like_range_generic,
@ -3309,6 +3324,7 @@ static MY_COLLATION_HANDLER my_collation_ucs2_nopad_bin_handler =
NULL, /* init */
my_strnncoll_ucs2_bin,
my_strnncollsp_ucs2_nopad_bin,
my_strnncollsp_nchars_ucs2_nopad_bin,
my_strnxfrm_nopad_ucs2_bin,
my_strnxfrmlen_unicode,
my_like_range_generic,

4
strings/ctype-ujis.c

@ -67239,6 +67239,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_japanese_ci_handler =
NULL, /* init */
my_strnncoll_ujis_japanese_ci,
my_strnncollsp_ujis_japanese_ci,
my_strnncollsp_nchars_ujis_japanese_ci,
my_strnxfrm_mb, /* strnxfrm */
my_strnxfrmlen_simple,
my_like_range_mb, /* like_range */
@ -67255,6 +67256,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_bin_handler =
NULL, /* init */
my_strnncoll_ujis_bin,
my_strnncollsp_ujis_bin,
my_strnncollsp_nchars_ujis_bin,
my_strnxfrm_mb,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -67271,6 +67273,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_japanese_nopad_ci_handler =
NULL, /* init */
my_strnncoll_ujis_japanese_ci,
my_strnncollsp_ujis_japanese_nopad_ci,
my_strnncollsp_nchars_ujis_japanese_nopad_ci,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,
@ -67287,6 +67290,7 @@ static MY_COLLATION_HANDLER my_collation_ujis_nopad_bin_handler =
NULL, /* init */
my_strnncoll_ujis_bin,
my_strnncollsp_ujis_nopad_bin,
my_strnncollsp_nchars_ujis_nopad_bin,
my_strnxfrm_mb_nopad,
my_strnxfrmlen_simple,
my_like_range_mb,

11
strings/ctype-utf8.c

@ -5357,6 +5357,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_ci_handler =
NULL, /* init */
my_strnncoll_utf8_general_ci,
my_strnncollsp_utf8_general_ci,
my_strnncollsp_nchars_utf8_general_ci,
my_strnxfrm_utf8_general_ci,
my_strnxfrmlen_unicode,
my_like_range_mb,
@ -5373,6 +5374,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_mysql500_ci_handler =
NULL, /* init */
my_strnncoll_utf8_general_mysql500_ci,
my_strnncollsp_utf8_general_mysql500_ci,
my_strnncollsp_nchars_utf8_general_mysql500_ci,
my_strnxfrm_utf8_general_mysql500_ci,
my_strnxfrmlen_unicode,
my_like_range_mb,
@ -5389,6 +5391,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_bin_handler =
NULL, /* init */
my_strnncoll_utf8_bin,
my_strnncollsp_utf8_bin,
my_strnncollsp_nchars_utf8_bin,
my_strnxfrm_utf8_bin,
my_strnxfrmlen_unicode,
my_like_range_mb,
@ -5405,6 +5408,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_general_nopad_ci_handler =
NULL, /* init */
my_strnncoll_utf8_general_ci,
my_strnncollsp_utf8_general_nopad_ci,
my_strnncollsp_nchars_utf8_general_nopad_ci,
my_strnxfrm_nopad_utf8_general_ci,
my_strnxfrmlen_unicode,
my_like_range_mb,
@ -5421,6 +5425,7 @@ static MY_COLLATION_HANDLER my_collation_utf8_nopad_bin_handler =
NULL, /* init */
my_strnncoll_utf8_bin,
my_strnncollsp_utf8_nopad_bin,
my_strnncollsp_nchars_utf8_nopad_bin,
my_strnxfrm_nopad_utf8_bin,
my_strnxfrmlen_unicode,
my_like_range_mb,
@ -5750,6 +5755,7 @@ static MY_COLLATION_HANDLER my_collation_cs_handler =
NULL, /* init */
my_strnncoll_utf8_cs,
my_strnncollsp_utf8_cs,
my_strnncollsp_nchars_generic,
my_strnxfrm_utf8_general_ci,
my_strnxfrmlen_unicode,
my_like_range_simple,
@ -7058,6 +7064,7 @@ static MY_COLLATION_HANDLER my_collation_filename_handler =
NULL, /* init */
my_strnncoll_simple,
my_strnncollsp_simple,
my_strnncollsp_nchars_generic,
my_strnxfrm_filename,
my_strnxfrmlen_unicode,
my_like_range_mb,
@ -7697,6 +7704,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_ci_handler=
NULL, /* init */
my_strnncoll_utf8mb4_general_ci,
my_strnncollsp_utf8mb4_general_ci,
my_strnncollsp_nchars_utf8mb4_general_ci,
my_strnxfrm_utf8mb4_general_ci,
my_strnxfrmlen_unicode,
my_like_range_mb,
@ -7713,6 +7721,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_bin_handler =
NULL, /* init */
my_strnncoll_utf8mb4_bin,
my_strnncollsp_utf8mb4_bin,
my_strnncollsp_nchars_utf8mb4_bin,
my_strnxfrm_unicode_full_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_mb,
@ -7729,6 +7738,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_general_nopad_ci_handler=
NULL, /* init */
my_strnncoll_utf8mb4_general_ci,
my_strnncollsp_utf8mb4_general_nopad_ci,
my_strnncollsp_nchars_utf8mb4_general_nopad_ci,
my_strnxfrm_nopad_utf8mb4_general_ci,
my_strnxfrmlen_unicode,
my_like_range_mb,
@ -7745,6 +7755,7 @@ static MY_COLLATION_HANDLER my_collation_utf8mb4_nopad_bin_handler =
NULL, /* init */
my_strnncoll_utf8mb4_bin,
my_strnncollsp_utf8mb4_nopad_bin,
my_strnncollsp_nchars_utf8mb4_nopad_bin,
my_strnxfrm_unicode_full_nopad_bin,
my_strnxfrmlen_unicode_full_bin,
my_like_range_mb,

1
strings/ctype-win1250ch.c

@ -674,6 +674,7 @@ static MY_COLLATION_HANDLER my_collation_czech_ci_handler =
NULL, /* init */
my_strnncoll_win1250ch,
my_strnncollsp_win1250ch,
my_strnncollsp_nchars_generic_8bit,
my_strnxfrm_win1250ch,
my_strnxfrmlen_simple,
my_like_range_win1250ch,

29
strings/ctype.c

@ -1210,3 +1210,32 @@ outp:
copy_status->m_source_end_pos= from;
return to - to_start;
}
int my_strnncollsp_nchars_generic(CHARSET_INFO *cs,
const uchar *str1, size_t len1,
const uchar *str2, size_t len2,
size_t nchars)
{
int error;
len1= my_well_formed_length(cs, (const char *) str1,
(const char *) str1 + len1,
nchars, &error);
len2= my_well_formed_length(cs, (const char *) str2,
(const char *) str2 + len2,
nchars, &error);
DBUG_ASSERT((cs->state & MY_CS_NOPAD) == 0);
return cs->coll->strnncollsp(cs, str1, len1, str2, len2);
}
int my_strnncollsp_nchars_generic_8bit(CHARSET_INFO *cs,
const uchar *str1, size_t len1,
const uchar *str2, size_t len2,
size_t nchars)
{
set_if_smaller(len1, nchars);
set_if_smaller(len2, nchars);
DBUG_ASSERT((cs->state & MY_CS_NOPAD) == 0);
return cs->coll->strnncollsp(cs, str1, len1, str2, len2);
}

50
strings/strcoll.ic

@ -287,6 +287,56 @@ MY_FUNCTION_NAME(strnncollsp)(CHARSET_INFO *cs __attribute__((unused)),
}
#endif
/**
Compare two strings according to the collation,
with trailing space padding or trimming, according to "nchars".
@param cs - the character set and collation
@param a - the left string
@param a_length - the length of the left string
@param b - the right string
@param b_length - the length of the right string
@param nchars - compare this amount of characters only
@return - the comparison result
*/
static int
MY_FUNCTION_NAME(strnncollsp_nchars)(CHARSET_INFO *cs __attribute__((unused)),
const uchar *a, size_t a_length,
const uchar *b, size_t b_length,
size_t nchars)
{
const uchar *a_end= a + a_length;
const uchar *b_end= b + b_length;
for ( ; nchars ; nchars--)
{
int a_weight, b_weight, res;
uint a_wlen= MY_FUNCTION_NAME(scan_weight)(&a_weight, a, a_end);
uint b_wlen= MY_FUNCTION_NAME(scan_weight)(&b_weight, b, b_end);
if ((res= (a_weight - b_weight)))
{
/* Got two different weights. See comments in strnncollsp above. */
return res;
}
if (!a_wlen && !b_wlen)
{
/* Got two auto-generated trailing spaces. */
DBUG_ASSERT(a == a_end);
DBUG_ASSERT(b == b_end);
return 0;
}
/*
At least one of the strings has not ended yet, continue comparison.
*/
DBUG_ASSERT(a < a_end || b < b_end);
a+= a_wlen;
b+= b_wlen;
}
return 0;
}
#endif /* DEFINE_STRNNCOLL */

10
strings/strings_def.h

@ -105,6 +105,16 @@ static inline const uchar *skip_trailing_space(const uchar *ptr,size_t len)
}
int my_strnncollsp_nchars_generic(CHARSET_INFO *cs,
const uchar *str1, size_t len1,
const uchar *str2, size_t len2,
size_t nchars);
int my_strnncollsp_nchars_generic_8bit(CHARSET_INFO *cs,
const uchar *str1, size_t len1,
const uchar *str2, size_t len2,
size_t nchars);
uint my_8bit_charset_flags_from_data(CHARSET_INFO *cs);
uint my_8bit_collation_flags_from_data(CHARSET_INFO *cs);

508
unittest/strings/strings-t.c

@ -18,6 +18,30 @@
#include <my_sys.h>
/*
U+00DF LATIN SMALL LETTER SHARP S = _utf8 x'C39F' = _latin1 x'DF'
*/
#define UTF8_sz "\xC3\x9F"
#define LATIN1_sz "\xDF"
/*
U+00C5 LATIN CAPITAL LETTER A WITH RING ABOVE = _utf8 x'C385'
*/
#define UTF8_ARING "\xC3\x85"
/*
U+00E4 LATIN SMALL LETTER A WITH DIAERESIS = _utf8 x'C3A4'
*/
#define UTF8_auml "\xC3\xA4"
#define LATIN1_auml "\xE4"
#define UCS2_a "\x00\x61"
#define UCS2_b "\x00\x62"
#define UCS2_sp "\x00\x20"
/*
Test that like_range() returns well-formed results.
*/
@ -758,11 +782,483 @@ test_strcollsp()
}
int main()
typedef struct
{
LEX_CSTRING a;
LEX_CSTRING b;
size_t nchars;
int res;
} STRNNCOLLSP_CHAR_PARAM;
/*
Some lines in the below test data are marked as follows:
IF - An ignorable failure. The scanner finds an ignorable character
followed by a normal character (or by a contraction),
but the "nchars" limit allows only one character to be scanned.
The whole sequence is ignored an is treated as end-of-line.
CF - A contraction failure. The scanner finds a contraction consisting
of two characters, but the "nchars" limit allows only one character
to be scanned. The whole contraction is ignored and is treated
as end-of-line.
*/
/*
Tests for mbminlen1 character sets,
for both PAD SPACE and NOPAD collations
*/
static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_mbminlen1_xpad_common[]=
{
{{CSTR("a")}, {CSTR("a")}, 0, 0},
{{CSTR("a")}, {CSTR("a")}, 1, 0},
{{CSTR("a")}, {CSTR("a")}, 2, 0},
{{CSTR("a")}, {CSTR("a")}, 3, 0},
{{CSTR("a")}, {CSTR("a")}, 100, 0},
{{CSTR("a")}, {CSTR("ab")}, 0, 0},
{{CSTR("a")}, {CSTR("ab")}, 1, 0},
{{CSTR("a")}, {CSTR("ab")}, 2, -1},
{{CSTR("a")}, {CSTR("ab")}, 3, -1},
{{CSTR("a")}, {CSTR("ab")}, 100, -1},
{{CSTR("a")}, {CSTR("a ")}, 0, 0},
{{CSTR("a")}, {CSTR("a ")}, 1, 0},
{{CSTR("a")}, {CSTR("a ")}, 2, 0},
{{CSTR("a")}, {CSTR("a ")}, 3, 0},
{{CSTR("a")}, {CSTR("a ")}, 100, 0},
{{CSTR("a")}, {CSTR("a ")}, 0, 0},
{{CSTR("a")}, {CSTR("a ")}, 1, 0},
{{CSTR("a")}, {CSTR("a ")}, 2, 0},
{{CSTR("a")}, {CSTR("a ")}, 3, 0},
{{CSTR("a")}, {CSTR("a ")}, 100, 0},
{{CSTR("ss")}, {CSTR("ss")}, 0, 0},
{{CSTR("ss")}, {CSTR("ss")}, 1, 0},
{{CSTR("ss")}, {CSTR("ss")}, 2, 0},
{{CSTR("ss")}, {CSTR("ss")}, 3, 0},
{{CSTR("ss")}, {CSTR("ss")}, 100, 0},
{{NULL, 0}, {NULL, 0}, 0, 0}
};
/* Tests for utf8, for both PAD SPACE and NOPAD collations */
static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mbx_xpad_common[]=
{
{{CSTR(UTF8_sz)}, {CSTR(UTF8_sz)}, 0, 0},
{{CSTR(UTF8_sz)}, {CSTR(UTF8_sz)}, 1, 0},
{{CSTR(UTF8_sz)}, {CSTR(UTF8_sz)}, 2, 0},
{{CSTR(UTF8_sz)}, {CSTR(UTF8_sz)}, 3, 0},
{{CSTR(UTF8_sz)}, {CSTR(UTF8_sz)}, 100, 0},
{{NULL, 0}, {NULL, 0}, 0, 0}
};
/* Tests for latin1, for both PAD and NOPAD collations */
static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_latin1_xpad_common[]=
{
{{CSTR(LATIN1_sz)}, {CSTR(LATIN1_sz)}, 0, 0},
{{CSTR(LATIN1_sz)}, {CSTR(LATIN1_sz)}, 1, 0},
{{CSTR(LATIN1_sz)}, {CSTR(LATIN1_sz)}, 2, 0},
{{CSTR(LATIN1_sz)}, {CSTR(LATIN1_sz)}, 3, 0},
{{CSTR(LATIN1_sz)}, {CSTR(LATIN1_sz)}, 100, 0},
{{NULL, 0}, {NULL, 0}, 0, 0}
};
/* Tests for utf8 collations that sort "A WITH DIAERESIS" equal to "A" */
static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mbx_xpad_a_eq_auml[]=
{
{{CSTR(UTF8_auml "h")}, {CSTR("ah")}, 0, 0},
{{CSTR(UTF8_auml "h")}, {CSTR("ah")}, 1, 0},
{{CSTR(UTF8_auml "h")}, {CSTR("ah")}, 2, 0},
{{CSTR(UTF8_auml "h")}, {CSTR("ah")}, 3, 0},
{{CSTR(UTF8_auml "h")}, {CSTR("ah")}, 100, 0},
{{CSTR(UTF8_auml "h")}, {CSTR("ah ")}, 0, 0},
{{CSTR(UTF8_auml "h")}, {CSTR("ah ")}, 1, 0},
{{CSTR(UTF8_auml "h")}, {CSTR("ah ")}, 2, 0},
{{CSTR(UTF8_auml "h")}, {CSTR("ah ")}, 3, 0},
{{CSTR(UTF8_auml "h")}, {CSTR("ah ")}, 100, 0},
{{NULL, 0}, {NULL, 0}, 0, 0}
};
static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_ci[]=
{
{{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 0, 0},
{{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 1, 0},
{{CSTR("ss")}, {CSTR("s" "\x00" "s")}/*IF*/, 2, 1},
{{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 3, 0},
{{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 4, 0},
{{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 100, 0},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 0, 0},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 1, -1},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 2, 0},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 3, 0},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 4, 0},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, 0},
{{NULL, 0}, {NULL, 0}, 0, 0}
};
static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_unicode_nopad_ci[]=
{
{{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 0, 0},
{{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 1, 0},
{{CSTR("ss")}, {CSTR("s" "\x00" "s")}/*IF*/, 2, 1},
{{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 3, 1},
{{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 4, 1},
{{CSTR("ss")}, {CSTR("s" "\x00" "s")}, 100, 1},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 0, 0},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 1, -1},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 2, -1},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 3, -1},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 4, -1},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, -1},
{{NULL, 0}, {NULL, 0}, 0, 0}
};
static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mb3_danish_ci[]=
{
{{CSTR("aa")}, {CSTR("")}, 0, 0},
{{CSTR("aa")}/*CF*/, {CSTR("")}, 1, 1},
{{CSTR("aa")}, {CSTR("")}, 2, 1},
{{CSTR("aa")}, {CSTR("")}, 3, 1},
{{CSTR("aa")}, {CSTR("")}, 100, 1},
{{CSTR("aa")}, {CSTR("a")}, 0, 0},
{{CSTR("aa")}/*CF*/, {CSTR("a")}, 1, 0},
{{CSTR("aa")}, {CSTR("a")}, 2, 1},
{{CSTR("aa")}, {CSTR("a")}, 3, 1},
{{CSTR("aa")}, {CSTR("a")}, 100, 1},
{{CSTR("aa")}, {CSTR("aa")}, 0, 0},
{{CSTR("aa")}/*CF*/, {CSTR("aa")}/*CF*/, 1, 0},
{{CSTR("aa")}, {CSTR("aa")}, 2, 0},
{{CSTR("aa")}, {CSTR("aa")}, 3, 0},
{{CSTR("aa")}, {CSTR("aa")}, 100, 0},
{{CSTR("aa")}, {CSTR("\x00" "a")}, 0, 0},
{{CSTR("aa")}/*CF*/, {CSTR("\x00" "a")}/*IF*/, 1, 1},
{{CSTR("aa")}, {CSTR("\x00" "a")}, 2, 1},
{{CSTR("aa")}, {CSTR("\x00" "a")}, 3, 1},
{{CSTR("aa")}, {CSTR("\x00" "a")}, 100, 1},
{{CSTR("aa")}, {CSTR("\x00" "aa")}, 0, 0},
{{CSTR("aa")}/*CF*/, {CSTR("\x00" "aa")}/*IF*/, 1, 1},
{{CSTR("aa")}, {CSTR("\x00" "aa")}/*IF*/, 2, 1},
{{CSTR("aa")}, {CSTR("\x00" "aa")}, 3, 0},
{{CSTR("aa")}, {CSTR("\x00" "aa")}, 100, 0},
{{CSTR("aa")}, {CSTR("a" "\x00" "a")}, 0, 0},
{{CSTR("aa")}/*CF*/, {CSTR("a" "\x00" "a")}, 1, 0},
{{CSTR("aa")}, {CSTR("a" "\x00" "a")}/*IF*/, 2, 1},
{{CSTR("aa")}, {CSTR("a" "\x00" "a")}, 3, 1},
{{CSTR("aa")}, {CSTR("a" "\x00" "a")}, 100, 1},
{{CSTR("aa")}, {CSTR(UTF8_ARING)}, 0, 0},
{{CSTR("aa")}/*CF*/, {CSTR(UTF8_ARING)}, 1, -1},
{{CSTR("aa")}, {CSTR(UTF8_ARING)}, 2, 0},
{{CSTR("aa")}, {CSTR(UTF8_ARING)}, 3, 0},
{{CSTR("aa")}, {CSTR(UTF8_ARING)}, 100, 0},
{{NULL, 0}, {NULL, 0}, 0, 0}
};
static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_latin1_german2_ci[]=
{
{{CSTR("ss")}, {CSTR(LATIN1_sz)}, 0, 0},
{{CSTR("ss")}, {CSTR(LATIN1_sz)}, 1, -1},
{{CSTR("ss")}, {CSTR(LATIN1_sz)}, 2, 0},
{{CSTR("ss")}, {CSTR(LATIN1_sz)}, 3, 0},
{{CSTR("ss")}, {CSTR(LATIN1_sz)}, 100, 0},
{{CSTR("ae")}, {CSTR(LATIN1_auml)}, 0, 0},
{{CSTR("ae")}, {CSTR(LATIN1_auml)}, 1, -1},
{{CSTR("ae")}, {CSTR(LATIN1_auml)}, 2, 0},
{{CSTR("ae")}, {CSTR(LATIN1_auml)}, 3, 0},
{{CSTR("ae")}, {CSTR(LATIN1_auml)}, 100, 0},
{{CSTR("ae")}, {CSTR(LATIN1_auml " ")}, 0, 0},
{{CSTR("ae")}, {CSTR(LATIN1_auml " ")}, 1, -1},
{{CSTR("ae")}, {CSTR(LATIN1_auml " ")}, 2, 0},
{{CSTR("ae")}, {CSTR(LATIN1_auml " ")}, 3, 0},
{{CSTR("ae")}, {CSTR(LATIN1_auml " ")}, 100, 0},
{{NULL, 0}, {NULL, 0}, 0, 0}
};
static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_utf8mbx_german2_ci[]=
{
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 0, 0},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 1, -1},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 2, 0},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 3, 0},
{{CSTR("ss")}, {CSTR(UTF8_sz)}, 100, 0},
{{CSTR("ae")}, {CSTR(UTF8_auml)}, 0, 0},
{{CSTR("ae")}, {CSTR(UTF8_auml)}, 1, -1},
{{CSTR("ae")}, {CSTR(UTF8_auml)}, 2, 0},
{{CSTR("ae")}, {CSTR(UTF8_auml)}, 3, 0},
{{CSTR("ae")}, {CSTR(UTF8_auml)}, 100, 0},
{{CSTR("ae")}, {CSTR(UTF8_auml " ")}, 0, 0},
{{CSTR("ae")}, {CSTR(UTF8_auml " ")}, 1, -1},
{{CSTR("ae")}, {CSTR(UTF8_auml " ")}, 2, 0},
{{CSTR("ae")}, {CSTR(UTF8_auml " ")}, 3, 0},
{{CSTR("ae")}, {CSTR(UTF8_auml " ")}, 100, 0},
{{NULL, 0}, {NULL, 0}, 0, 0}
};
static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_mbminlen1_xpad_czech[]=
{
{{CSTR("c")}, {CSTR("ch")}, 0, 0},
{{CSTR("c")}, {CSTR("ch")}, 1, 0},
{{CSTR("c")}, {CSTR("ch")}, 2, -1},
{{CSTR("h")}, {CSTR("ch")}, 0, 0},
{{CSTR("h")}, {CSTR("ch")}, 1, 1},
{{CSTR("h")}, {CSTR("ch")}, 2, -1},
{{CSTR("i")}, {CSTR("ch")}, 0, 0},
{{CSTR("i")}, {CSTR("ch")}, 1, 1},
{{CSTR("i")}, {CSTR("ch")}, 2, 1},
{{NULL, 0}, {NULL, 0}, 0, 0}
};
static STRNNCOLLSP_CHAR_PARAM strnncollsp_char_mbminlen2_xpad_common[]=
{
{{CSTR(UCS2_a)}, {CSTR(UCS2_a)}, 0, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a)}, 1, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a)}, 2, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a)}, 3, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a)}, 100, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp)}, 0, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp)}, 1, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp)}, 2, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp)}, 3, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp)}, 100, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp UCS2_sp)}, 0, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp UCS2_sp)}, 1, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp UCS2_sp)}, 2, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp UCS2_sp)}, 3, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_sp UCS2_sp)}, 100, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_b)}, 0, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_b)}, 1, 0},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_b)}, 2, -1},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_b)}, 3, -1},
{{CSTR(UCS2_a)}, {CSTR(UCS2_a UCS2_b)}, 100, -1},
{{NULL, 0}, {NULL, 0}, 0, 0}
};
static int
strnncollsp_char_one(CHARSET_INFO *cs, const STRNNCOLLSP_CHAR_PARAM *p)
{
int failed= 0;
char ahex[64], bhex[64];
int res= cs->coll->strnncollsp_nchars(cs,
(uchar *) p->a.str, p->a.length,
(uchar *) p->b.str, p->b.length,
p->nchars);
str2hex(ahex, sizeof(ahex), p->a.str, p->a.length);
str2hex(bhex, sizeof(bhex), p->b.str, p->b.length);
diag("%-25s %-12s %-12s %3d %7d %7d%s",
cs->name, ahex, bhex, (int) p->nchars, p->res, res,
eqres(res, p->res) ? "" : " FAILED");
if (!eqres(res, p->res))
{
failed++;
}
else
{
/* Test in reverse order */
res= cs->coll->strnncollsp_nchars(cs,
(uchar *) p->b.str, p->b.length,
(uchar *) p->a.str, p->a.length,
p->nchars);
if (!eqres(res, -p->res))
{
diag("Comparison in reverse order failed. Expected %d, got %d",
-p->res, res);
failed++;
}
}
return failed;
}
static int
strnncollsp_char(const char *collation, const STRNNCOLLSP_CHAR_PARAM *param)
{
int failed= 0;
const STRNNCOLLSP_CHAR_PARAM *p;
CHARSET_INFO *cs= get_charset_by_name(collation, MYF(0));
if (!cs)
{
diag("get_charset_by_name() failed");
return 1;
}
diag("%-25s %-12s %-12s %-3s %7s %7s",
"Collation", "a", "b", "Nch", "ExpSign", "Actual");
for (p= param; p->a.str; p++)
{
failed+= strnncollsp_char_one(cs, p);
}
return failed;
}
static int
strnncollsp_char_mbminlen1(const char *collation,
const STRNNCOLLSP_CHAR_PARAM *specific)
{
int failed= 0;
failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen1_xpad_common);
if (specific)
failed+= strnncollsp_char(collation, specific);
return failed;
}
static int
strnncollsp_char_mbminlen2(const char *collation,
const STRNNCOLLSP_CHAR_PARAM *specific)
{
int failed= 0;
failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen2_xpad_common);
if (specific)
failed+= strnncollsp_char(collation, specific);
return failed;
}
static int
strnncollsp_char_latin1(const char *collation,
const STRNNCOLLSP_CHAR_PARAM *specific)
{
int failed= 0;
failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen1_xpad_common);
failed+= strnncollsp_char(collation, strnncollsp_char_latin1_xpad_common);
if (specific)
failed+= strnncollsp_char(collation, specific);
return failed;
}
static int
strnncollsp_char_utf8mbx(const char *collation,
const STRNNCOLLSP_CHAR_PARAM *specific)
{
int failed= 0;
failed+= strnncollsp_char(collation, strnncollsp_char_mbminlen1_xpad_common);
failed+= strnncollsp_char(collation, strnncollsp_char_utf8mbx_xpad_common);
if (!strstr(collation, "_bin") &&
!strstr(collation, "_german2") &&
!strstr(collation, "_danish"))
failed+= strnncollsp_char(collation,
strnncollsp_char_utf8mbx_xpad_a_eq_auml);
if (specific)
failed+= strnncollsp_char(collation, specific);
return failed;
}
static int
test_strnncollsp_char()
{
int failed= 0;
failed+= strnncollsp_char_latin1("latin1_swedish_ci", NULL);
failed+= strnncollsp_char_latin1("latin1_swedish_nopad_ci", NULL);
failed+= strnncollsp_char_latin1("latin1_bin", NULL);
failed+= strnncollsp_char_latin1("latin1_nopad_bin", NULL);
failed+= strnncollsp_char_latin1("latin1_german2_ci",
strnncollsp_char_latin1_german2_ci);
#ifdef HAVE_CHARSET_cp1250
failed+= strnncollsp_char_mbminlen1("cp1250_czech_cs",
strnncollsp_char_mbminlen1_xpad_czech);
#endif
#ifdef HAVE_CHARSET_latin2
failed+= strnncollsp_char_mbminlen1("latin2_czech_cs",
strnncollsp_char_mbminlen1_xpad_czech);
#endif
#ifdef HAVE_CHARSET_tis620
failed+= strnncollsp_char_mbminlen1("tis620_thai_ci", NULL);
#endif
#ifdef HAVE_CHARSET_big5
failed+= strnncollsp_char_mbminlen1("big5_chinese_ci", NULL);
failed+= strnncollsp_char_mbminlen1("big5_chinese_nopad_ci", NULL);
failed+= strnncollsp_char_mbminlen1("big5_bin", NULL);
failed+= strnncollsp_char_mbminlen1("big5_nopad_bin", NULL);
#endif
failed+= strnncollsp_char_utf8mbx("utf8mb3_general_ci", NULL);
failed+= strnncollsp_char_utf8mbx("utf8mb3_general_nopad_ci", NULL);
failed+= strnncollsp_char_utf8mbx("utf8mb3_bin", NULL);
failed+= strnncollsp_char_utf8mbx("utf8mb3_nopad_bin", NULL);
failed+= strnncollsp_char_utf8mbx("utf8mb3_unicode_ci",
strnncollsp_char_utf8mb3_unicode_ci);
failed+= strnncollsp_char_utf8mbx("utf8mb3_unicode_nopad_ci",
strnncollsp_char_utf8mb3_unicode_nopad_ci);
failed+= strnncollsp_char_utf8mbx("utf8mb3_danish_ci",
strnncollsp_char_utf8mb3_danish_ci);
failed+= strnncollsp_char_utf8mbx("utf8mb3_german2_ci",
strnncollsp_char_utf8mbx_german2_ci);
failed+= strnncollsp_char_utf8mbx("utf8mb3_czech_ci",
strnncollsp_char_mbminlen1_xpad_czech);
#ifdef HAVE_CHARSET_ucs2
failed+= strnncollsp_char_mbminlen2("ucs2_general_ci", NULL);
failed+= strnncollsp_char_mbminlen2("ucs2_general_nopad_ci", NULL);
failed+= strnncollsp_char_mbminlen2("ucs2_bin", NULL);
failed+= strnncollsp_char_mbminlen2("ucs2_nopad_bin", NULL);
failed+= strnncollsp_char_mbminlen2("ucs2_unicode_ci", NULL);
failed+= strnncollsp_char_mbminlen2("ucs2_unicode_nopad_ci", NULL);
#endif
return failed;
}
int main(int ac, char **av)
{
size_t i, failed= 0;
plan(2);
MY_INIT(av[0]);
plan(3);
diag("Testing my_like_range_xxx() functions");
for (i= 0; i < array_elements(charset_list); i++)
@ -780,5 +1276,11 @@ int main()
failed= test_strcollsp();
ok(failed == 0, "Testing cs->coll->strnncollsp()");
diag("Testing cs->coll->strnncollsp_char()");
failed= test_strnncollsp_char();
ok(failed == 0, "Testing cs->coll->strnncollsp_char()");
my_end(0);
return exit_status();
}
Loading…
Cancel
Save