You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

783 lines
28 KiB

23 years ago
  1. /* Copyright (C) 2000 MySQL AB
  2. This program is free software; you can redistribute it and/or modify
  3. it under the terms of the GNU General Public License as published by
  4. the Free Software Foundation; version 2 of the License.
  5. This program is distributed in the hope that it will be useful,
  6. but WITHOUT ANY WARRANTY; without even the implied warranty of
  7. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  8. GNU General Public License for more details.
  9. You should have received a copy of the GNU General Public License
  10. along with this program; if not, write to the Free Software
  11. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
  12. #include <my_global.h>
  13. #include "m_string.h"
  14. #include "m_ctype.h"
  15. static uchar ctype_latin1[] = {
  16. 0,
  17. 32, 32, 32, 32, 32, 32, 32, 32, 32, 40, 40, 40, 40, 40, 32, 32,
  18. 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
  19. 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
  20. 132,132,132,132,132,132,132,132,132,132, 16, 16, 16, 16, 16, 16,
  21. 16,129,129,129,129,129,129, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  22. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 16, 16, 16, 16, 16,
  23. 16,130,130,130,130,130,130, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  24. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 16, 16, 16, 16, 32,
  25. 16, 0, 16, 2, 16, 16, 16, 16, 16, 16, 1, 16, 1, 0, 1, 0,
  26. 0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 2, 16, 2, 0, 2, 1,
  27. 72, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
  28. 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
  29. 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  30. 1, 1, 1, 1, 1, 1, 1, 16, 1, 1, 1, 1, 1, 1, 1, 2,
  31. 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  32. 2, 2, 2, 2, 2, 2, 2, 16, 2, 2, 2, 2, 2, 2, 2, 2
  33. };
  34. static uchar to_lower_latin1[] = {
  35. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
  36. 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
  37. 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
  38. 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
  39. 64, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
  40. 112,113,114,115,116,117,118,119,120,121,122, 91, 92, 93, 94, 95,
  41. 96, 97, 98, 99,100,101,102,103,104,105,106,107,108,109,110,111,
  42. 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
  43. 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  44. 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
  45. 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  46. 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  47. 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
  48. 240,241,242,243,244,245,246,215,248,249,250,251,252,253,254,223,
  49. 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
  50. 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255
  51. };
  52. static uchar to_upper_latin1[] = {
  53. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
  54. 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
  55. 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
  56. 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
  57. 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
  58. 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
  59. 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
  60. 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
  61. 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  62. 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
  63. 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  64. 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  65. 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
  66. 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
  67. 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
  68. 208,209,210,211,212,213,214,247,216,217,218,219,220,221,222,255
  69. };
  70. static uchar sort_order_latin1[] = {
  71. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
  72. 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
  73. 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
  74. 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
  75. 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
  76. 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
  77. 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
  78. 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
  79. 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  80. 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
  81. 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  82. 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  83. 65, 65, 65, 65, 92, 91, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
  84. 68, 78, 79, 79, 79, 79, 93,215,216, 85, 85, 85, 89, 89,222,223,
  85. 65, 65, 65, 65, 92, 91, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
  86. 68, 78, 79, 79, 79, 79, 93,247,216, 85, 85, 85, 89, 89,222,255
  87. };
  88. /*
  89. WL#1494 notes:
  90. We'll use cp1252 instead of iso-8859-1.
  91. cp1252 contains printable characters in the range 0x80-0x9F.
  92. In ISO 8859-1, these code points have no associated printable
  93. characters. Therefore, by converting from CP1252 to ISO 8859-1,
  94. one would lose the euro (for instance). Since most people are
  95. unaware of the difference, and since we don't really want a
  96. "Windows ANSI" to differ from a "Unix ANSI", we will:
  97. - continue to pretend the latin1 character set is ISO 8859-1
  98. - actually allow the storage of euro etc. so it's actually cp1252
  99. Also we'll map these five undefined cp1252 character:
  100. 0x81, 0x8D, 0x8F, 0x90, 0x9D
  101. into corresponding control characters:
  102. U+0081, U+008D, U+008F, U+0090, U+009D.
  103. like ISO-8859-1 does. Otherwise, loading "mysqldump"
  104. output doesn't reproduce these undefined characters.
  105. */
  106. unsigned short cs_to_uni[256]={
  107. 0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,
  108. 0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F,
  109. 0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,
  110. 0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F,
  111. 0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,
  112. 0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F,
  113. 0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,
  114. 0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F,
  115. 0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,
  116. 0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F,
  117. 0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,
  118. 0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F,
  119. 0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,
  120. 0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F,
  121. 0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,
  122. 0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F,
  123. 0x20AC,0x0081,0x201A,0x0192,0x201E,0x2026,0x2020,0x2021,
  124. 0x02C6,0x2030,0x0160,0x2039,0x0152,0x008D,0x017D,0x008F,
  125. 0x0090,0x2018,0x2019,0x201C,0x201D,0x2022,0x2013,0x2014,
  126. 0x02DC,0x2122,0x0161,0x203A,0x0153,0x009D,0x017E,0x0178,
  127. 0x00A0,0x00A1,0x00A2,0x00A3,0x00A4,0x00A5,0x00A6,0x00A7,
  128. 0x00A8,0x00A9,0x00AA,0x00AB,0x00AC,0x00AD,0x00AE,0x00AF,
  129. 0x00B0,0x00B1,0x00B2,0x00B3,0x00B4,0x00B5,0x00B6,0x00B7,
  130. 0x00B8,0x00B9,0x00BA,0x00BB,0x00BC,0x00BD,0x00BE,0x00BF,
  131. 0x00C0,0x00C1,0x00C2,0x00C3,0x00C4,0x00C5,0x00C6,0x00C7,
  132. 0x00C8,0x00C9,0x00CA,0x00CB,0x00CC,0x00CD,0x00CE,0x00CF,
  133. 0x00D0,0x00D1,0x00D2,0x00D3,0x00D4,0x00D5,0x00D6,0x00D7,
  134. 0x00D8,0x00D9,0x00DA,0x00DB,0x00DC,0x00DD,0x00DE,0x00DF,
  135. 0x00E0,0x00E1,0x00E2,0x00E3,0x00E4,0x00E5,0x00E6,0x00E7,
  136. 0x00E8,0x00E9,0x00EA,0x00EB,0x00EC,0x00ED,0x00EE,0x00EF,
  137. 0x00F0,0x00F1,0x00F2,0x00F3,0x00F4,0x00F5,0x00F6,0x00F7,
  138. 0x00F8,0x00F9,0x00FA,0x00FB,0x00FC,0x00FD,0x00FE,0x00FF
  139. };
  140. unsigned char pl00[256]={
  141. 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,
  142. 0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,
  143. 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,
  144. 0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,
  145. 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,
  146. 0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,
  147. 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,
  148. 0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,
  149. 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,
  150. 0x48,0x49,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,
  151. 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,
  152. 0x58,0x59,0x5A,0x5B,0x5C,0x5D,0x5E,0x5F,
  153. 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,
  154. 0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,
  155. 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,
  156. 0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F,
  157. 0x00,0x81,0x00,0x00,0x00,0x00,0x00,0x00,
  158. 0x00,0x00,0x00,0x00,0x00,0x8D,0x00,0x8F,
  159. 0x90,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  160. 0x00,0x00,0x00,0x00,0x00,0x9D,0x00,0x00,
  161. 0xA0,0xA1,0xA2,0xA3,0xA4,0xA5,0xA6,0xA7,
  162. 0xA8,0xA9,0xAA,0xAB,0xAC,0xAD,0xAE,0xAF,
  163. 0xB0,0xB1,0xB2,0xB3,0xB4,0xB5,0xB6,0xB7,
  164. 0xB8,0xB9,0xBA,0xBB,0xBC,0xBD,0xBE,0xBF,
  165. 0xC0,0xC1,0xC2,0xC3,0xC4,0xC5,0xC6,0xC7,
  166. 0xC8,0xC9,0xCA,0xCB,0xCC,0xCD,0xCE,0xCF,
  167. 0xD0,0xD1,0xD2,0xD3,0xD4,0xD5,0xD6,0xD7,
  168. 0xD8,0xD9,0xDA,0xDB,0xDC,0xDD,0xDE,0xDF,
  169. 0xE0,0xE1,0xE2,0xE3,0xE4,0xE5,0xE6,0xE7,
  170. 0xE8,0xE9,0xEA,0xEB,0xEC,0xED,0xEE,0xEF,
  171. 0xF0,0xF1,0xF2,0xF3,0xF4,0xF5,0xF6,0xF7,
  172. 0xF8,0xF9,0xFA,0xFB,0xFC,0xFD,0xFE,0xFF
  173. };
  174. unsigned char pl01[256]={
  175. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  176. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  177. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  178. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  179. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  180. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  181. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  182. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  183. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  184. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  185. 0x00,0x00,0x8C,0x9C,0x00,0x00,0x00,0x00,
  186. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  187. 0x8A,0x9A,0x00,0x00,0x00,0x00,0x00,0x00,
  188. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  189. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  190. 0x9F,0x00,0x00,0x00,0x00,0x8E,0x9E,0x00,
  191. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  192. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  193. 0x00,0x00,0x83,0x00,0x00,0x00,0x00,0x00,
  194. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  195. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  196. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  197. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  198. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  199. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  200. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  201. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  202. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  203. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  204. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  205. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  206. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
  207. };
  208. unsigned char pl02[256]={
  209. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  210. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  211. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  212. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  213. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  214. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  215. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  216. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  217. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  218. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  219. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  220. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  221. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  222. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  223. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  224. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  225. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  226. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  227. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  228. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  229. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  230. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  231. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  232. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  233. 0x00,0x00,0x00,0x00,0x00,0x00,0x88,0x00,
  234. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  235. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  236. 0x00,0x00,0x00,0x00,0x98,0x00,0x00,0x00,
  237. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  238. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  239. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  240. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
  241. };
  242. unsigned char pl20[256]={
  243. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  244. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  245. 0x00,0x00,0x00,0x96,0x97,0x00,0x00,0x00,
  246. 0x91,0x92,0x82,0x00,0x93,0x94,0x84,0x00,
  247. 0x86,0x87,0x95,0x00,0x00,0x00,0x85,0x00,
  248. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  249. 0x89,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  250. 0x00,0x8B,0x9B,0x00,0x00,0x00,0x00,0x00,
  251. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  252. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  253. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  254. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  255. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  256. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  257. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  258. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  259. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  260. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  261. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  262. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  263. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  264. 0x00,0x00,0x00,0x00,0x80,0x00,0x00,0x00,
  265. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  266. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  267. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  268. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  269. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  270. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  271. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  272. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  273. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  274. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
  275. };
  276. unsigned char pl21[256]={
  277. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  278. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  279. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  280. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  281. 0x00,0x00,0x99,0x00,0x00,0x00,0x00,0x00,
  282. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  283. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  284. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  285. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  286. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  287. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  288. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  289. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  290. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  291. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  292. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  293. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  294. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  295. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  296. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  297. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  298. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  299. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  300. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  301. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  302. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  303. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  304. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  305. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  306. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  307. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
  308. 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
  309. };
  310. unsigned char *uni_to_cs[256]={
  311. pl00,pl01,pl02,NULL,NULL,NULL,NULL,NULL,
  312. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  313. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  314. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  315. pl20,pl21,NULL,NULL,NULL,NULL,NULL,NULL,
  316. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  317. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  318. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  319. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  320. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  321. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  322. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  323. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  324. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  325. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  326. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  327. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  328. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  329. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  330. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  331. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  332. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  333. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  334. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  335. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  336. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  337. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  338. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  339. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  340. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  341. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,
  342. NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL
  343. };
  344. static
  345. int my_mb_wc_latin1(CHARSET_INFO *cs __attribute__((unused)),
  346. my_wc_t *wc,
  347. const unsigned char *str,
  348. const unsigned char *end __attribute__((unused)))
  349. {
  350. if (str >= end)
  351. return MY_CS_TOOSMALL;
  352. *wc=cs_to_uni[*str];
  353. return (!wc[0] && str[0]) ? -1 : 1;
  354. }
  355. static
  356. int my_wc_mb_latin1(CHARSET_INFO *cs __attribute__((unused)),
  357. my_wc_t wc,
  358. unsigned char *str,
  359. unsigned char *end __attribute__((unused)))
  360. {
  361. unsigned char *pl;
  362. if (str >= end)
  363. return MY_CS_TOOSMALL;
  364. pl= uni_to_cs[(wc>>8) & 0xFF];
  365. str[0]= pl ? pl[wc & 0xFF] : '\0';
  366. return (!str[0] && wc) ? MY_CS_ILUNI : 1;
  367. }
  368. static MY_CHARSET_HANDLER my_charset_handler=
  369. {
  370. NULL, /* init */
  371. NULL,
  372. my_mbcharlen_8bit,
  373. my_numchars_8bit,
  374. my_charpos_8bit,
  375. my_well_formed_len_8bit,
  376. my_lengthsp_8bit,
  377. my_numcells_8bit,
  378. my_mb_wc_latin1,
  379. my_wc_mb_latin1,
  380. my_mb_ctype_8bit,
  381. my_caseup_str_8bit,
  382. my_casedn_str_8bit,
  383. my_caseup_8bit,
  384. my_casedn_8bit,
  385. my_snprintf_8bit,
  386. my_long10_to_str_8bit,
  387. my_longlong10_to_str_8bit,
  388. my_fill_8bit,
  389. my_strntol_8bit,
  390. my_strntoul_8bit,
  391. my_strntoll_8bit,
  392. my_strntoull_8bit,
  393. my_strntod_8bit,
  394. my_strtoll10_8bit,
  395. my_strntoull10rnd_8bit,
  396. my_scan_8bit
  397. };
  398. CHARSET_INFO my_charset_latin1=
  399. {
  400. 8,0,0, /* number */
  401. MY_CS_COMPILED | MY_CS_PRIMARY, /* state */
  402. "latin1", /* cs name */
  403. "latin1_swedish_ci", /* name */
  404. "", /* comment */
  405. NULL, /* tailoring */
  406. ctype_latin1,
  407. to_lower_latin1,
  408. to_upper_latin1,
  409. sort_order_latin1,
  410. NULL, /* contractions */
  411. NULL, /* sort_order_big*/
  412. cs_to_uni, /* tab_to_uni */
  413. NULL, /* tab_from_uni */
  414. my_unicase_default, /* caseinfo */
  415. NULL, /* state_map */
  416. NULL, /* ident_map */
  417. 1, /* strxfrm_multiply */
  418. 1, /* caseup_multiply */
  419. 1, /* casedn_multiply */
  420. 1, /* mbminlen */
  421. 1, /* mbmaxlen */
  422. 0, /* min_sort_char */
  423. 255, /* max_sort_char */
  424. ' ', /* pad char */
  425. 0, /* escape_with_backslash_is_dangerous */
  426. &my_charset_handler,
  427. &my_collation_8bit_simple_ci_handler
  428. };
  429. /*
  430. * This file is the latin1 character set with German sorting
  431. *
  432. * The modern sort order is used, where:
  433. *
  434. * '' -> "ae"
  435. * '' -> "oe"
  436. * '' -> "ue"
  437. * '' -> "ss"
  438. */
  439. /*
  440. * This is a simple latin1 mapping table, which maps all accented
  441. * characters to their non-accented equivalents. Note: in this
  442. * table, '' is mapped to 'A', '' is mapped to 'Y', etc. - all
  443. * accented characters except the following are treated the same way.
  444. * , , , , ,
  445. */
  446. static uchar sort_order_latin1_de[] = {
  447. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
  448. 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
  449. 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
  450. 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
  451. 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
  452. 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
  453. 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
  454. 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
  455. 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  456. 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
  457. 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  458. 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  459. 65, 65, 65, 65,196, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
  460. 68, 78, 79, 79, 79, 79,214,215,216, 85, 85, 85,220, 89,222,223,
  461. 65, 65, 65, 65,196, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
  462. 68, 78, 79, 79, 79, 79,214,247,216, 85, 85, 85,220, 89,222, 89
  463. };
  464. /*
  465. same as sort_order_latin_de, but maps ALL accented chars to unaccented ones
  466. */
  467. uchar combo1map[]={
  468. 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
  469. 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
  470. 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
  471. 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
  472. 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
  473. 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95,
  474. 96, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
  475. 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,123,124,125,126,127,
  476. 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
  477. 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
  478. 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
  479. 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
  480. 65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
  481. 68, 78, 79, 79, 79, 79, 79,215,216, 85, 85, 85, 85, 89,222, 83,
  482. 65, 65, 65, 65, 65, 65, 92, 67, 69, 69, 69, 69, 73, 73, 73, 73,
  483. 68, 78, 79, 79, 79, 79, 79,247,216, 85, 85, 85, 85, 89,222, 89
  484. };
  485. uchar combo2map[]={
  486. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  487. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  488. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  489. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  490. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  491. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  492. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  493. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  494. 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,69, 0, 0,83, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,
  495. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,69, 0, 0, 0, 0, 0,69, 0, 0, 0, 0
  496. };
  497. /*
  498. Some notes about the following comparison rules:
  499. By definition, my_strnncoll_latin_de must works exactly as if had called
  500. my_strnxfrm_latin_de() on both strings and compared the result strings.
  501. This means that:
  502. must also matches E and A, because my_strxn_frm_latin_de() will convert
  503. both to AE.
  504. The other option would be to not do any accent removal in
  505. sort_order_latin_de[] at all
  506. */
  507. static int my_strnncoll_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
  508. const uchar *a, uint a_length,
  509. const uchar *b, uint b_length,
  510. my_bool b_is_prefix)
  511. {
  512. const uchar *a_end= a + a_length;
  513. const uchar *b_end= b + b_length;
  514. uchar a_char, a_extend= 0, b_char, b_extend= 0;
  515. while ((a < a_end || a_extend) && (b < b_end || b_extend))
  516. {
  517. if (a_extend)
  518. {
  519. a_char=a_extend; a_extend=0;
  520. }
  521. else
  522. {
  523. a_extend=combo2map[*a];
  524. a_char=combo1map[*a++];
  525. }
  526. if (b_extend)
  527. {
  528. b_char=b_extend; b_extend=0;
  529. }
  530. else
  531. {
  532. b_extend=combo2map[*b];
  533. b_char=combo1map[*b++];
  534. }
  535. if (a_char != b_char)
  536. return (int) a_char - (int) b_char;
  537. }
  538. /*
  539. A simple test of string lengths won't work -- we test to see
  540. which string ran out first
  541. */
  542. return ((a < a_end || a_extend) ? (b_is_prefix ? 0 : 1) :
  543. (b < b_end || b_extend) ? -1 : 0);
  544. }
  545. static int my_strnncollsp_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
  546. const uchar *a, uint a_length,
  547. const uchar *b, uint b_length,
  548. my_bool diff_if_only_endspace_difference)
  549. {
  550. const uchar *a_end= a + a_length, *b_end= b + b_length;
  551. uchar a_char, a_extend= 0, b_char, b_extend= 0;
  552. int res;
  553. #ifndef VARCHAR_WITH_DIFF_ENDSPACE_ARE_DIFFERENT_FOR_UNIQUE
  554. diff_if_only_endspace_difference= 0;
  555. #endif
  556. while ((a < a_end || a_extend) && (b < b_end || b_extend))
  557. {
  558. if (a_extend)
  559. {
  560. a_char=a_extend;
  561. a_extend= 0;
  562. }
  563. else
  564. {
  565. a_extend= combo2map[*a];
  566. a_char= combo1map[*a++];
  567. }
  568. if (b_extend)
  569. {
  570. b_char= b_extend;
  571. b_extend= 0;
  572. }
  573. else
  574. {
  575. b_extend= combo2map[*b];
  576. b_char= combo1map[*b++];
  577. }
  578. if (a_char != b_char)
  579. return (int) a_char - (int) b_char;
  580. }
  581. /* Check if double character last */
  582. if (a_extend)
  583. return 1;
  584. if (b_extend)
  585. return -1;
  586. res= 0;
  587. if (a != a_end || b != b_end)
  588. {
  589. int swap= 1;
  590. if (diff_if_only_endspace_difference)
  591. res= 1; /* Assume 'a' is bigger */
  592. /*
  593. Check the next not space character of the longer key. If it's < ' ',
  594. then it's smaller than the other key.
  595. */
  596. if (a == a_end)
  597. {
  598. /* put shorter key in a */
  599. a_end= b_end;
  600. a= b;
  601. swap= -1; /* swap sign of result */
  602. res= -res;
  603. }
  604. for ( ; a < a_end ; a++)
  605. {
  606. if (*a != ' ')
  607. return (*a < ' ') ? -swap : swap;
  608. }
  609. }
  610. return res;
  611. }
  612. static int my_strnxfrm_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
  613. uchar * dest, uint len,
  614. const uchar * src, uint srclen)
  615. {
  616. const uchar *de = dest + len;
  617. const uchar *se = src + srclen;
  618. for ( ; src < se && dest < de ; src++)
  619. {
  620. uchar chr=combo1map[*src];
  621. *dest++=chr;
  622. if ((chr=combo2map[*src]) && dest < de)
  623. *dest++=chr;
  624. }
  625. if (dest < de)
  626. bfill(dest, de - dest, ' ');
  627. return (int) len;
  628. }
  629. void my_hash_sort_latin1_de(CHARSET_INFO *cs __attribute__((unused)),
  630. const uchar *key, uint len,
  631. ulong *nr1, ulong *nr2)
  632. {
  633. const uchar *end= key+len;
  634. /*
  635. Remove end space. We have to do this to be able to compare
  636. 'AE' and '' as identical
  637. */
  638. while (end > key && end[-1] == ' ')
  639. end--;
  640. for (; key < end ; key++)
  641. {
  642. uint X= (uint) combo1map[(uint) *key];
  643. nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
  644. nr2[0]+=3;
  645. if ((X= combo2map[*key]))
  646. {
  647. nr1[0]^=(ulong) ((((uint) nr1[0] & 63)+nr2[0]) * X) + (nr1[0] << 8);
  648. nr2[0]+=3;
  649. }
  650. }
  651. }
  652. static MY_COLLATION_HANDLER my_collation_german2_ci_handler=
  653. {
  654. NULL, /* init */
  655. my_strnncoll_latin1_de,
  656. my_strnncollsp_latin1_de,
  657. my_strnxfrm_latin1_de,
  658. my_strnxfrmlen_simple,
  659. my_like_range_simple,
  660. my_wildcmp_8bit,
  661. my_strcasecmp_8bit,
  662. my_instr_simple,
  663. my_hash_sort_latin1_de,
  664. my_propagate_complex
  665. };
  666. CHARSET_INFO my_charset_latin1_german2_ci=
  667. {
  668. 31,0,0, /* number */
  669. MY_CS_COMPILED|MY_CS_STRNXFRM, /* state */
  670. "latin1", /* cs name */
  671. "latin1_german2_ci", /* name */
  672. "", /* comment */
  673. NULL, /* tailoring */
  674. ctype_latin1,
  675. to_lower_latin1,
  676. to_upper_latin1,
  677. sort_order_latin1_de,
  678. NULL, /* contractions */
  679. NULL, /* sort_order_big*/
  680. cs_to_uni, /* tab_to_uni */
  681. NULL, /* tab_from_uni */
  682. my_unicase_default, /* caseinfo */
  683. NULL, /* state_map */
  684. NULL, /* ident_map */
  685. 2, /* strxfrm_multiply */
  686. 1, /* caseup_multiply */
  687. 1, /* casedn_multiply */
  688. 1, /* mbminlen */
  689. 1, /* mbmaxlen */
  690. 0, /* min_sort_char */
  691. 247, /* max_sort_char */
  692. ' ', /* pad char */
  693. 0, /* escape_with_backslash_is_dangerous */
  694. &my_charset_handler,
  695. &my_collation_german2_ci_handler
  696. };
  697. CHARSET_INFO my_charset_latin1_bin=
  698. {
  699. 47,0,0, /* number */
  700. MY_CS_COMPILED|MY_CS_BINSORT, /* state */
  701. "latin1", /* cs name */
  702. "latin1_bin", /* name */
  703. "", /* comment */
  704. NULL, /* tailoring */
  705. ctype_latin1,
  706. to_lower_latin1,
  707. to_upper_latin1,
  708. NULL, /* sort_order */
  709. NULL, /* contractions */
  710. NULL, /* sort_order_big*/
  711. cs_to_uni, /* tab_to_uni */
  712. NULL, /* tab_from_uni */
  713. my_unicase_default, /* caseinfo */
  714. NULL, /* state_map */
  715. NULL, /* ident_map */
  716. 1, /* strxfrm_multiply */
  717. 1, /* caseup_multiply */
  718. 1, /* casedn_multiply */
  719. 1, /* mbminlen */
  720. 1, /* mbmaxlen */
  721. 0, /* min_sort_char */
  722. 255, /* max_sort_char */
  723. ' ', /* pad char */
  724. 0, /* escape_with_backslash_is_dangerous */
  725. &my_charset_handler,
  726. &my_collation_8bit_bin_handler
  727. };