Browse Source
MDEV-25904 New collation functions to compare InnoDB style trimmed NO PAD strings
pull/2000/head
MDEV-25904 New collation functions to compare InnoDB style trimmed NO PAD strings
pull/2000/head
25 changed files with 1150 additions and 144 deletions
-
54include/m_ctype.h
-
47sql/field.cc
-
4strings/ctype-big5.c
-
25strings/ctype-bin.c
-
4strings/ctype-cp932.c
-
1strings/ctype-czech.c
-
4strings/ctype-euc_kr.c
-
4strings/ctype-eucjpms.c
-
4strings/ctype-gb2312.c
-
4strings/ctype-gbk.c
-
1strings/ctype-latin1.c
-
14strings/ctype-simple.c
-
4strings/ctype-sjis.c
-
2strings/ctype-tis620.c
-
179strings/ctype-uca-scanner_next.inl
-
38strings/ctype-uca.c
-
276strings/ctype-uca.ic
-
16strings/ctype-ucs2.c
-
4strings/ctype-ujis.c
-
11strings/ctype-utf8.c
-
1strings/ctype-win1250ch.c
-
29strings/ctype.c
-
50strings/strcoll.ic
-
10strings/strings_def.h
-
508unittest/strings/strings-t.c
@ -0,0 +1,179 @@ |
|||
/* Copyright (c) 2004, 2013, Oracle and/or its affiliates. |
|||
Copyright (c) 2009, 2021, MariaDB |
|||
|
|||
This library is free software; you can redistribute it and/or |
|||
modify it under the terms of the GNU Library General Public |
|||
License as published by the Free Software Foundation; version 2 |
|||
of the License. |
|||
|
|||
This library is distributed in the hope that it will be useful, |
|||
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|||
Library General Public License for more details. |
|||
|
|||
You should have received a copy of the GNU Library General Public |
|||
License along with this library; if not, write to the Free |
|||
Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, |
|||
MA 02110-1335 USA */ |
|||
|
|||
|
|||
#ifdef SCANNER_NEXT_NCHARS |
|||
|
|||
#define SCANNER_NEXT_RETURN(_w,_n) \ |
|||
do { weight_and_nchars_t rc= {_w, _n}; return rc; } while(0) |
|||
|
|||
#define SCANNER_NEXT_RETURN_CONTRACTION(_cnt,_ignorable_nchars) \ |
|||
do { \ |
|||
weight_and_nchars_t rc= { _cnt->weight[0], \ |
|||
_ignorable_nchars + \ |
|||
my_contraction_char_length(_cnt) }; \ |
|||
return rc; \ |
|||
} while(0) |
|||
|
|||
#else |
|||
|
|||
#define SCANNER_NEXT_RETURN(_w,_n) do { return _w; } while (0) |
|||
|
|||
#define SCANNER_NEXT_RETURN_CONTRACTION(_cnt,_ignorable_nchars) \ |
|||
do { return _cnt->weight[0]; } while(0) |
|||
|
|||
#endif |
|||
|
|||
static inline |
|||
#ifdef SCANNER_NEXT_NCHARS |
|||
weight_and_nchars_t |
|||
MY_FUNCTION_NAME(scanner_next_with_nchars)(my_uca_scanner *scanner, |
|||
size_t nchars) |
|||
#else |
|||
int |
|||
MY_FUNCTION_NAME(scanner_next)(my_uca_scanner *scanner) |
|||
#endif |
|||
{ |
|||
#ifdef SCANNER_NEXT_NCHARS |
|||
uint ignorable_nchars; |
|||
#define LOCAL_MAX_CONTRACTION_LENGTH nchars |
|||
#else |
|||
#define LOCAL_MAX_CONTRACTION_LENGTH MY_UCA_MAX_CONTRACTION |
|||
#endif |
|||
/* |
|||
Check if the weights for the previous character have been |
|||
already fully scanned. If yes, then get the next character and |
|||
initialize wbeg and wlength to its weight string. |
|||
*/ |
|||
|
|||
if (scanner->wbeg[0]) |
|||
{ |
|||
/* |
|||
More weights left from the previous step. |
|||
Return the next weight from the current expansion. |
|||
Return "0" as "nchars". The real nchars was set on a previous |
|||
iteration. |
|||
*/ |
|||
SCANNER_NEXT_RETURN(*scanner->wbeg++, 0); |
|||
} |
|||
|
|||
#ifdef SCANNER_NEXT_NCHARS |
|||
for (ignorable_nchars= 0 ; ; ignorable_nchars++) |
|||
#else |
|||
for ( ; ; ) |
|||
#endif |
|||
{ |
|||
const uint16 *wpage; |
|||
my_wc_t wc[MY_UCA_MAX_CONTRACTION]; |
|||
int mblen; |
|||
|
|||
/* Get next character */ |
|||
#if MY_UCA_ASCII_OPTIMIZE |
|||
/* Get next ASCII character */ |
|||
if (scanner->sbeg < scanner->send && scanner->sbeg[0] < 0x80) |
|||
{ |
|||
wc[0]= scanner->sbeg[0]; |
|||
scanner->sbeg+= 1; |
|||
|
|||
#if MY_UCA_COMPILE_CONTRACTIONS |
|||
if (my_uca_needs_context_handling(scanner->level, wc[0])) |
|||
{ |
|||
const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc, |
|||
LOCAL_MAX_CONTRACTION_LENGTH); |
|||
if (cnt) |
|||
SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars); |
|||
} |
|||
#endif |
|||
|
|||
scanner->page= 0; |
|||
scanner->code= (int) wc[0]; |
|||
scanner->wbeg= scanner->level->weights[0] + scanner->code * scanner->level->lengths[0]; |
|||
if (scanner->wbeg[0]) |
|||
SCANNER_NEXT_RETURN(*scanner->wbeg++, ignorable_nchars + 1); |
|||
continue; |
|||
} |
|||
else |
|||
#endif |
|||
/* Get next MB character */ |
|||
if (((mblen= MY_MB_WC(scanner, wc, scanner->sbeg, |
|||
scanner->send)) <= 0)) |
|||
{ |
|||
if (scanner->sbeg >= scanner->send) |
|||
{ |
|||
/* No more bytes, end of line reached */ |
|||
SCANNER_NEXT_RETURN(-1, ignorable_nchars); |
|||
} |
|||
/* |
|||
There are some more bytes left. Non-positive mb_len means that |
|||
we got an incomplete or a bad byte sequence. Consume mbminlen bytes. |
|||
*/ |
|||
if ((scanner->sbeg+= scanner->cs->mbminlen) > scanner->send) |
|||
{ |
|||
/* For safety purposes don't go beyond the string range. */ |
|||
scanner->sbeg= scanner->send; |
|||
} |
|||
/* |
|||
Treat every complete or incomplete mbminlen unit as a weight which is |
|||
greater than weight for any possible normal character. |
|||
0xFFFF is greater than any possible weight in the UCA weight table. |
|||
*/ |
|||
SCANNER_NEXT_RETURN(0xFFFF, ignorable_nchars + 1); |
|||
} |
|||
|
|||
scanner->sbeg+= mblen; |
|||
if (wc[0] > scanner->level->maxchar) |
|||
{ |
|||
/* Return 0xFFFD as weight for all characters outside BMP */ |
|||
scanner->wbeg= nochar; |
|||
SCANNER_NEXT_RETURN(0xFFFD, ignorable_nchars + 1); |
|||
} |
|||
|
|||
#if MY_UCA_COMPILE_CONTRACTIONS |
|||
if (my_uca_needs_context_handling(scanner->level, wc[0])) |
|||
{ |
|||
const MY_CONTRACTION *cnt= my_uca_context_weight_find(scanner, wc, |
|||
LOCAL_MAX_CONTRACTION_LENGTH); |
|||
if (cnt) |
|||
SCANNER_NEXT_RETURN_CONTRACTION(cnt, ignorable_nchars); |
|||
} |
|||
#endif |
|||
|
|||
/* Process single character */ |
|||
scanner->page= wc[0] >> 8; |
|||
scanner->code= wc[0] & 0xFF; |
|||
|
|||
/* If weight page for w[0] does not exist, then calculate algoritmically */ |
|||
if (!(wpage= scanner->level->weights[scanner->page])) |
|||
SCANNER_NEXT_RETURN(my_uca_scanner_next_implicit(scanner), |
|||
ignorable_nchars + 1); |
|||
|
|||
/* Calculate pointer to w[0]'s weight, using page and offset */ |
|||
scanner->wbeg= wpage + |
|||
scanner->code * scanner->level->lengths[scanner->page]; |
|||
if (scanner->wbeg[0]) |
|||
break; |
|||
/* Skip ignorable character and continue the loop */ |
|||
} |
|||
|
|||
SCANNER_NEXT_RETURN(*scanner->wbeg++, ignorable_nchars + 1); |
|||
} |
|||
|
|||
#undef SCANNER_NEXT_NCHARS |
|||
#undef SCANNER_NEXT_RETURN |
|||
#undef SCANNER_NEXT_RETURN_CONTRACTION |
|||
#undef LOCAL_MAX_CONTRACTION_LENGTH |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue