|
|
|
@ -1303,7 +1303,28 @@ create_fromuni(struct charset_info_st *cs, |
|
|
|
if (wc >= idx[i].uidx.from && wc <= idx[i].uidx.to && wc) |
|
|
|
{ |
|
|
|
int ofs= wc - idx[i].uidx.from; |
|
|
|
tab[ofs]= ch; |
|
|
|
if (!tab[ofs] || tab[ofs] > 0x7F) /* Prefer ASCII*/ |
|
|
|
{ |
|
|
|
/* |
|
|
|
Some character sets can have double encoding. For example, |
|
|
|
in ARMSCII8, the following characters are encoded twice: |
|
|
|
|
|
|
|
Encoding#1 Encoding#2 Unicode Character Name |
|
|
|
---------- ---------- ------- -------------- |
|
|
|
0x27 0xFF U+0027 APOSTROPHE |
|
|
|
0x28 0xA5 U+0028 LEFT PARENTHESIS |
|
|
|
0x29 0xA4 U+0029 RIGHT PARENTHESIS |
|
|
|
0x2C 0xAB U+002C COMMA |
|
|
|
0x2D 0xAC U+002D HYPHEN-MINUS |
|
|
|
0x2E 0xA9 U+002E FULL STOP |
|
|
|
|
|
|
|
That is, both 0x27 and 0xFF convert to Unicode U+0027. |
|
|
|
When converting back from Unicode to ARMSCII, |
|
|
|
we prefer the ASCII range, that is we want U+0027 |
|
|
|
to convert to 0x27 rather than to 0xFF. |
|
|
|
*/ |
|
|
|
tab[ofs]= ch; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|