You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

3300 lines
95 KiB

  1. /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
  2. // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4:
  3. #ident "$Id$"
  4. /*
  5. COPYING CONDITIONS NOTICE:
  6. This program is free software; you can redistribute it and/or modify
  7. it under the terms of version 2 of the GNU General Public License as
  8. published by the Free Software Foundation, and provided that the
  9. following conditions are met:
  10. * Redistributions of source code must retain this COPYING
  11. CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
  12. DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
  13. PATENT MARKING NOTICE (below), and the PATENT RIGHTS
  14. GRANT (below).
  15. * Redistributions in binary form must reproduce this COPYING
  16. CONDITIONS NOTICE, the COPYRIGHT NOTICE (below), the
  17. DISCLAIMER (below), the UNIVERSITY PATENT NOTICE (below), the
  18. PATENT MARKING NOTICE (below), and the PATENT RIGHTS
  19. GRANT (below) in the documentation and/or other materials
  20. provided with the distribution.
  21. You should have received a copy of the GNU General Public License
  22. along with this program; if not, write to the Free Software
  23. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  24. 02110-1301, USA.
  25. COPYRIGHT NOTICE:
  26. TokuDB, Tokutek Fractal Tree Indexing Library.
  27. Copyright (C) 2007-2013 Tokutek, Inc.
  28. DISCLAIMER:
  29. This program is distributed in the hope that it will be useful, but
  30. WITHOUT ANY WARRANTY; without even the implied warranty of
  31. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  32. General Public License for more details.
  33. UNIVERSITY PATENT NOTICE:
  34. The technology is licensed by the Massachusetts Institute of
  35. Technology, Rutgers State University of New Jersey, and the Research
  36. Foundation of State University of New York at Stony Brook under
  37. United States of America Serial No. 11/760379 and to the patents
  38. and/or patent applications resulting from it.
  39. PATENT MARKING NOTICE:
  40. This software is covered by US Patent No. 8,185,551.
  41. PATENT RIGHTS GRANT:
  42. "THIS IMPLEMENTATION" means the copyrightable works distributed by
  43. Tokutek as part of the Fractal Tree project.
  44. "PATENT CLAIMS" means the claims of patents that are owned or
  45. licensable by Tokutek, both currently or in the future; and that in
  46. the absence of this license would be infringed by THIS
  47. IMPLEMENTATION or by using or running THIS IMPLEMENTATION.
  48. "PATENT CHALLENGE" shall mean a challenge to the validity,
  49. patentability, enforceability and/or non-infringement of any of the
  50. PATENT CLAIMS or otherwise opposing any of the PATENT CLAIMS.
  51. Tokutek hereby grants to you, for the term and geographical scope of
  52. the PATENT CLAIMS, a non-exclusive, no-charge, royalty-free,
  53. irrevocable (except as stated in this section) patent license to
  54. make, have made, use, offer to sell, sell, import, transfer, and
  55. otherwise run, modify, and propagate the contents of THIS
  56. IMPLEMENTATION, where such license applies only to the PATENT
  57. CLAIMS. This grant does not include claims that would be infringed
  58. only as a consequence of further modifications of THIS
  59. IMPLEMENTATION. If you or your agent or licensee institute or order
  60. or agree to the institution of patent litigation against any entity
  61. (including a cross-claim or counterclaim in a lawsuit) alleging that
  62. THIS IMPLEMENTATION constitutes direct or contributory patent
  63. infringement, or inducement of patent infringement, then any rights
  64. granted to you under this License shall terminate as of the date
  65. such litigation is filed. If you or your agent or exclusive
  66. licensee institute or order or agree to the institution of a PATENT
  67. CHALLENGE, then Tokutek may terminate any rights granted to you
  68. under this License.
  69. */
  70. #ident "Copyright (c) 2007-2013 Tokutek Inc. All rights reserved."
  71. #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
  72. #include "hatoku_cmp.h"
  73. #ifdef WORDS_BIGENDIAN
  74. #error "WORDS_BIGENDIAN not supported"
  75. #endif
  76. // returns true if the field is a valid field to be used
  77. // in a TokuDB table. The non-valid fields are those
  78. // that have been deprecated since before 5.1, and can
  79. // only exist through upgrades of old versions of MySQL
  80. bool field_valid_for_tokudb_table(Field* field) {
  81. bool ret_val = false;
  82. enum_field_types mysql_type = field->real_type();
  83. switch (mysql_type) {
  84. case MYSQL_TYPE_LONG:
  85. case MYSQL_TYPE_LONGLONG:
  86. case MYSQL_TYPE_TINY:
  87. case MYSQL_TYPE_SHORT:
  88. case MYSQL_TYPE_INT24:
  89. case MYSQL_TYPE_DATE:
  90. case MYSQL_TYPE_YEAR:
  91. case MYSQL_TYPE_NEWDATE:
  92. case MYSQL_TYPE_ENUM:
  93. case MYSQL_TYPE_SET:
  94. case MYSQL_TYPE_TIME:
  95. case MYSQL_TYPE_DATETIME:
  96. case MYSQL_TYPE_TIMESTAMP:
  97. case MYSQL_TYPE_DOUBLE:
  98. case MYSQL_TYPE_FLOAT:
  99. #if 50600 <= MYSQL_VERSION_ID
  100. case MYSQL_TYPE_DATETIME2:
  101. case MYSQL_TYPE_TIMESTAMP2:
  102. case MYSQL_TYPE_TIME2:
  103. #endif
  104. case MYSQL_TYPE_NEWDECIMAL:
  105. case MYSQL_TYPE_BIT:
  106. case MYSQL_TYPE_STRING:
  107. case MYSQL_TYPE_VARCHAR:
  108. case MYSQL_TYPE_TINY_BLOB:
  109. case MYSQL_TYPE_MEDIUM_BLOB:
  110. case MYSQL_TYPE_BLOB:
  111. case MYSQL_TYPE_LONG_BLOB:
  112. ret_val = true;
  113. goto exit;
  114. //
  115. // I believe these are old types that are no longer
  116. // in any 5.1 tables, so tokudb does not need
  117. // to worry about them
  118. // Putting in this assert in case I am wrong.
  119. // Do not support geometry yet.
  120. //
  121. case MYSQL_TYPE_GEOMETRY:
  122. case MYSQL_TYPE_DECIMAL:
  123. case MYSQL_TYPE_VAR_STRING:
  124. case MYSQL_TYPE_NULL:
  125. ret_val = false;
  126. }
  127. exit:
  128. return ret_val;
  129. }
  130. void get_var_field_info(
  131. uint32_t* field_len, // output: length of field
  132. uint32_t* start_offset, // output, length of offset where data starts
  133. uint32_t var_field_index, //input, index of var field we want info on
  134. const uchar* var_field_offset_ptr, //input, pointer to where offset information for all var fields begins
  135. uint32_t num_offset_bytes //input, number of bytes used to store offsets starting at var_field_offset_ptr
  136. )
  137. {
  138. uint32_t data_start_offset = 0;
  139. uint32_t data_end_offset = 0;
  140. switch (num_offset_bytes) {
  141. case (1):
  142. data_end_offset = (var_field_offset_ptr + var_field_index)[0];
  143. break;
  144. case (2):
  145. data_end_offset = uint2korr(var_field_offset_ptr + 2*var_field_index);
  146. break;
  147. default:
  148. assert(false);
  149. break;
  150. }
  151. if (var_field_index) {
  152. switch (num_offset_bytes) {
  153. case (1):
  154. data_start_offset = (var_field_offset_ptr + var_field_index - 1)[0];
  155. break;
  156. case (2):
  157. data_start_offset = uint2korr(var_field_offset_ptr + 2*(var_field_index-1));
  158. break;
  159. default:
  160. assert(false);
  161. break;
  162. }
  163. }
  164. else {
  165. data_start_offset = 0;
  166. }
  167. *start_offset = data_start_offset;
  168. assert(data_end_offset >= data_start_offset);
  169. *field_len = data_end_offset - data_start_offset;
  170. }
  171. void get_blob_field_info(
  172. uint32_t* start_offset,
  173. uint32_t len_of_offsets,
  174. const uchar* var_field_data_ptr,
  175. uint32_t num_offset_bytes
  176. )
  177. {
  178. uint32_t data_end_offset;
  179. //
  180. // need to set var_field_data_ptr to point to beginning of blobs, which
  181. // is at the end of the var stuff (if they exist), if var stuff does not exist
  182. // then the bottom variable will be 0, and var_field_data_ptr is already
  183. // set correctly
  184. //
  185. if (len_of_offsets) {
  186. switch (num_offset_bytes) {
  187. case (1):
  188. data_end_offset = (var_field_data_ptr - 1)[0];
  189. break;
  190. case (2):
  191. data_end_offset = uint2korr(var_field_data_ptr - 2);
  192. break;
  193. default:
  194. assert(false);
  195. break;
  196. }
  197. }
  198. else {
  199. data_end_offset = 0;
  200. }
  201. *start_offset = data_end_offset;
  202. }
  203. // this function is pattern matched from
  204. // InnoDB's get_innobase_type_from_mysql_type
  205. TOKU_TYPE mysql_to_toku_type (Field* field) {
  206. TOKU_TYPE ret_val = toku_type_unknown;
  207. enum_field_types mysql_type = field->real_type();
  208. switch (mysql_type) {
  209. case MYSQL_TYPE_LONG:
  210. case MYSQL_TYPE_LONGLONG:
  211. case MYSQL_TYPE_TINY:
  212. case MYSQL_TYPE_SHORT:
  213. case MYSQL_TYPE_INT24:
  214. case MYSQL_TYPE_DATE:
  215. case MYSQL_TYPE_YEAR:
  216. case MYSQL_TYPE_NEWDATE:
  217. case MYSQL_TYPE_ENUM:
  218. case MYSQL_TYPE_SET:
  219. ret_val = toku_type_int;
  220. goto exit;
  221. case MYSQL_TYPE_TIME:
  222. case MYSQL_TYPE_DATETIME:
  223. case MYSQL_TYPE_TIMESTAMP:
  224. #ifdef MARIADB_BASE_VERSION
  225. // case to handle fractional seconds in MariaDB
  226. //
  227. if (field->key_type() == HA_KEYTYPE_BINARY) {
  228. ret_val = toku_type_fixbinary;
  229. goto exit;
  230. }
  231. #endif
  232. ret_val = toku_type_int;
  233. goto exit;
  234. case MYSQL_TYPE_DOUBLE:
  235. ret_val = toku_type_double;
  236. goto exit;
  237. case MYSQL_TYPE_FLOAT:
  238. ret_val = toku_type_float;
  239. goto exit;
  240. #if 50600 <= MYSQL_VERSION_ID
  241. case MYSQL_TYPE_DATETIME2:
  242. case MYSQL_TYPE_TIMESTAMP2:
  243. case MYSQL_TYPE_TIME2:
  244. #endif
  245. case MYSQL_TYPE_NEWDECIMAL:
  246. case MYSQL_TYPE_BIT:
  247. ret_val = toku_type_fixbinary;
  248. goto exit;
  249. case MYSQL_TYPE_STRING:
  250. if (field->binary()) {
  251. ret_val = toku_type_fixbinary;
  252. }
  253. else {
  254. ret_val = toku_type_fixstring;
  255. }
  256. goto exit;
  257. case MYSQL_TYPE_VARCHAR:
  258. if (field->binary()) {
  259. ret_val = toku_type_varbinary;
  260. }
  261. else {
  262. ret_val = toku_type_varstring;
  263. }
  264. goto exit;
  265. case MYSQL_TYPE_TINY_BLOB:
  266. case MYSQL_TYPE_MEDIUM_BLOB:
  267. case MYSQL_TYPE_BLOB:
  268. case MYSQL_TYPE_LONG_BLOB:
  269. ret_val = toku_type_blob;
  270. goto exit;
  271. //
  272. // I believe these are old types that are no longer
  273. // in any 5.1 tables, so tokudb does not need
  274. // to worry about them
  275. // Putting in this assert in case I am wrong.
  276. // Do not support geometry yet.
  277. //
  278. case MYSQL_TYPE_GEOMETRY:
  279. case MYSQL_TYPE_DECIMAL:
  280. case MYSQL_TYPE_VAR_STRING:
  281. case MYSQL_TYPE_NULL:
  282. assert(false);
  283. }
  284. exit:
  285. return ret_val;
  286. }
  287. static inline CHARSET_INFO* get_charset_from_num (uint32_t charset_number) {
  288. //
  289. // patternmatched off of InnoDB, due to MySQL bug 42649
  290. //
  291. if (charset_number == default_charset_info->number) {
  292. return default_charset_info;
  293. }
  294. else if (charset_number == my_charset_latin1.number) {
  295. return &my_charset_latin1;
  296. }
  297. else {
  298. return get_charset(charset_number, MYF(MY_WME));
  299. }
  300. }
  301. //
  302. // used to read the length of a variable sized field in a tokudb key (buf).
  303. //
  304. static inline uint32_t get_length_from_var_tokudata (uchar* buf, uint32_t length_bytes) {
  305. uint32_t length = (uint32_t)(buf[0]);
  306. if (length_bytes == 2) {
  307. uint32_t rest_of_length = (uint32_t)buf[1];
  308. length += rest_of_length<<8;
  309. }
  310. return length;
  311. }
  312. //
  313. // used to deduce the number of bytes used to store the length of a varstring/varbinary
  314. // in a key field stored in tokudb
  315. //
  316. static inline uint32_t get_length_bytes_from_max(uint32_t max_num_bytes) {
  317. return (max_num_bytes > 255) ? 2 : 1;
  318. }
  319. //
  320. // assuming MySQL in little endian, and we are storing in little endian
  321. //
  322. static inline uchar* pack_toku_int (uchar* to_tokudb, uchar* from_mysql, uint32_t num_bytes) {
  323. switch (num_bytes) {
  324. case (1):
  325. memcpy(to_tokudb, from_mysql, 1);
  326. break;
  327. case (2):
  328. memcpy(to_tokudb, from_mysql, 2);
  329. break;
  330. case (3):
  331. memcpy(to_tokudb, from_mysql, 3);
  332. break;
  333. case (4):
  334. memcpy(to_tokudb, from_mysql, 4);
  335. break;
  336. case (8):
  337. memcpy(to_tokudb, from_mysql, 8);
  338. break;
  339. default:
  340. assert(false);
  341. }
  342. return to_tokudb+num_bytes;
  343. }
  344. //
  345. // assuming MySQL in little endian, and we are unpacking to little endian
  346. //
  347. static inline uchar* unpack_toku_int(uchar* to_mysql, uchar* from_tokudb, uint32_t num_bytes) {
  348. switch (num_bytes) {
  349. case (1):
  350. memcpy(to_mysql, from_tokudb, 1);
  351. break;
  352. case (2):
  353. memcpy(to_mysql, from_tokudb, 2);
  354. break;
  355. case (3):
  356. memcpy(to_mysql, from_tokudb, 3);
  357. break;
  358. case (4):
  359. memcpy(to_mysql, from_tokudb, 4);
  360. break;
  361. case (8):
  362. memcpy(to_mysql, from_tokudb, 8);
  363. break;
  364. default:
  365. assert(false);
  366. }
  367. return from_tokudb+num_bytes;
  368. }
  369. static inline int cmp_toku_int (uchar* a_buf, uchar* b_buf, bool is_unsigned, uint32_t num_bytes) {
  370. int ret_val = 0;
  371. //
  372. // case for unsigned integers
  373. //
  374. if (is_unsigned) {
  375. uint32_t a_num, b_num = 0;
  376. uint64_t a_big_num, b_big_num = 0;
  377. switch (num_bytes) {
  378. case (1):
  379. a_num = *a_buf;
  380. b_num = *b_buf;
  381. ret_val = a_num-b_num;
  382. goto exit;
  383. case (2):
  384. a_num = uint2korr(a_buf);
  385. b_num = uint2korr(b_buf);
  386. ret_val = a_num-b_num;
  387. goto exit;
  388. case (3):
  389. a_num = uint3korr(a_buf);
  390. b_num = uint3korr(b_buf);
  391. ret_val = a_num-b_num;
  392. goto exit;
  393. case (4):
  394. a_num = uint4korr(a_buf);
  395. b_num = uint4korr(b_buf);
  396. if (a_num < b_num) {
  397. ret_val = -1; goto exit;
  398. }
  399. if (a_num > b_num) {
  400. ret_val = 1; goto exit;
  401. }
  402. ret_val = 0;
  403. goto exit;
  404. case (8):
  405. a_big_num = uint8korr(a_buf);
  406. b_big_num = uint8korr(b_buf);
  407. if (a_big_num < b_big_num) {
  408. ret_val = -1; goto exit;
  409. }
  410. else if (a_big_num > b_big_num) {
  411. ret_val = 1; goto exit;
  412. }
  413. ret_val = 0;
  414. goto exit;
  415. default:
  416. assert(false);
  417. }
  418. }
  419. //
  420. // case for signed integers
  421. //
  422. else {
  423. int32_t a_num, b_num = 0;
  424. int64_t a_big_num, b_big_num = 0;
  425. switch (num_bytes) {
  426. case (1):
  427. a_num = *(signed char *)a_buf;
  428. b_num = *(signed char *)b_buf;
  429. ret_val = a_num-b_num;
  430. goto exit;
  431. case (2):
  432. a_num = sint2korr(a_buf);
  433. b_num = sint2korr(b_buf);
  434. ret_val = a_num-b_num;
  435. goto exit;
  436. case (3):
  437. a_num = sint3korr(a_buf);
  438. b_num = sint3korr(b_buf);
  439. ret_val = a_num - b_num;
  440. goto exit;
  441. case (4):
  442. a_num = sint4korr(a_buf);
  443. b_num = sint4korr(b_buf);
  444. if (a_num < b_num) {
  445. ret_val = -1; goto exit;
  446. }
  447. if (a_num > b_num) {
  448. ret_val = 1; goto exit;
  449. }
  450. ret_val = 0;
  451. goto exit;
  452. case (8):
  453. a_big_num = sint8korr(a_buf);
  454. b_big_num = sint8korr(b_buf);
  455. if (a_big_num < b_big_num) {
  456. ret_val = -1; goto exit;
  457. }
  458. else if (a_big_num > b_big_num) {
  459. ret_val = 1; goto exit;
  460. }
  461. ret_val = 0;
  462. goto exit;
  463. default:
  464. assert(false);
  465. }
  466. }
  467. //
  468. // if this is hit, indicates bug in writing of this function
  469. //
  470. assert(false);
  471. exit:
  472. return ret_val;
  473. }
  474. static inline uchar* pack_toku_double (uchar* to_tokudb, uchar* from_mysql) {
  475. memcpy(to_tokudb, from_mysql, sizeof(double));
  476. return to_tokudb + sizeof(double);
  477. }
  478. static inline uchar* unpack_toku_double(uchar* to_mysql, uchar* from_tokudb) {
  479. memcpy(to_mysql, from_tokudb, sizeof(double));
  480. return from_tokudb + sizeof(double);
  481. }
  482. static inline int cmp_toku_double(uchar* a_buf, uchar* b_buf) {
  483. int ret_val;
  484. double a_num;
  485. double b_num;
  486. doubleget(a_num, a_buf);
  487. doubleget(b_num, b_buf);
  488. if (a_num < b_num) {
  489. ret_val = -1;
  490. goto exit;
  491. }
  492. else if (a_num > b_num) {
  493. ret_val = 1;
  494. goto exit;
  495. }
  496. ret_val = 0;
  497. exit:
  498. return ret_val;
  499. }
  500. static inline uchar* pack_toku_float (uchar* to_tokudb, uchar* from_mysql) {
  501. memcpy(to_tokudb, from_mysql, sizeof(float));
  502. return to_tokudb + sizeof(float);
  503. }
  504. static inline uchar* unpack_toku_float(uchar* to_mysql, uchar* from_tokudb) {
  505. memcpy(to_mysql, from_tokudb, sizeof(float));
  506. return from_tokudb + sizeof(float);
  507. }
  508. static inline int cmp_toku_float(uchar* a_buf, uchar* b_buf) {
  509. int ret_val;
  510. float a_num;
  511. float b_num;
  512. //
  513. // This is the way Field_float::cmp gets the floats from the buffers
  514. //
  515. memcpy(&a_num, a_buf, sizeof(float));
  516. memcpy(&b_num, b_buf, sizeof(float));
  517. if (a_num < b_num) {
  518. ret_val = -1;
  519. goto exit;
  520. }
  521. else if (a_num > b_num) {
  522. ret_val = 1;
  523. goto exit;
  524. }
  525. ret_val = 0;
  526. exit:
  527. return ret_val;
  528. }
  529. static inline uchar* pack_toku_binary(uchar* to_tokudb, uchar* from_mysql, uint32_t num_bytes) {
  530. memcpy(to_tokudb, from_mysql, num_bytes);
  531. return to_tokudb + num_bytes;
  532. }
  533. static inline uchar* unpack_toku_binary(uchar* to_mysql, uchar* from_tokudb, uint32_t num_bytes) {
  534. memcpy(to_mysql, from_tokudb, num_bytes);
  535. return from_tokudb + num_bytes;
  536. }
  537. static inline int cmp_toku_binary(
  538. uchar* a_buf,
  539. uint32_t a_num_bytes,
  540. uchar* b_buf,
  541. uint32_t b_num_bytes
  542. )
  543. {
  544. int ret_val = 0;
  545. uint32_t num_bytes_to_cmp = (a_num_bytes < b_num_bytes) ? a_num_bytes : b_num_bytes;
  546. ret_val = memcmp(a_buf, b_buf, num_bytes_to_cmp);
  547. if ((ret_val != 0) || (a_num_bytes == b_num_bytes)) {
  548. goto exit;
  549. }
  550. if (a_num_bytes < b_num_bytes) {
  551. ret_val = -1;
  552. goto exit;
  553. }
  554. else {
  555. ret_val = 1;
  556. goto exit;
  557. }
  558. exit:
  559. return ret_val;
  560. }
  561. //
  562. // partially copied from below
  563. //
  564. uchar* pack_toku_varbinary_from_desc(
  565. uchar* to_tokudb,
  566. const uchar* from_desc,
  567. uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
  568. uint32_t field_length //length of field
  569. )
  570. {
  571. uint32_t length_bytes_in_tokudb = get_length_bytes_from_max(key_part_length);
  572. uint32_t length = field_length;
  573. set_if_smaller(length, key_part_length);
  574. //
  575. // copy the length bytes, assuming both are in little endian
  576. //
  577. to_tokudb[0] = (uchar)length & 255;
  578. if (length_bytes_in_tokudb > 1) {
  579. to_tokudb[1] = (uchar) (length >> 8);
  580. }
  581. //
  582. // copy the string
  583. //
  584. memcpy(to_tokudb + length_bytes_in_tokudb, from_desc, length);
  585. return to_tokudb + length + length_bytes_in_tokudb;
  586. }
  587. static inline uchar* pack_toku_varbinary(
  588. uchar* to_tokudb,
  589. uchar* from_mysql,
  590. uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
  591. uint32_t max_num_bytes
  592. )
  593. {
  594. uint32_t length = 0;
  595. uint32_t length_bytes_in_tokudb;
  596. switch (length_bytes_in_mysql) {
  597. case (0):
  598. length = max_num_bytes;
  599. break;
  600. case (1):
  601. length = (uint32_t)(*from_mysql);
  602. break;
  603. case (2):
  604. length = uint2korr(from_mysql);
  605. break;
  606. case (3):
  607. length = uint3korr(from_mysql);
  608. break;
  609. case (4):
  610. length = uint4korr(from_mysql);
  611. break;
  612. }
  613. //
  614. // from this point on, functionality equivalent to pack_toku_varbinary_from_desc
  615. //
  616. set_if_smaller(length,max_num_bytes);
  617. length_bytes_in_tokudb = get_length_bytes_from_max(max_num_bytes);
  618. //
  619. // copy the length bytes, assuming both are in little endian
  620. //
  621. to_tokudb[0] = (uchar)length & 255;
  622. if (length_bytes_in_tokudb > 1) {
  623. to_tokudb[1] = (uchar) (length >> 8);
  624. }
  625. //
  626. // copy the string
  627. //
  628. memcpy(to_tokudb + length_bytes_in_tokudb, from_mysql + length_bytes_in_mysql, length);
  629. return to_tokudb + length + length_bytes_in_tokudb;
  630. }
  631. static inline uchar* unpack_toku_varbinary(
  632. uchar* to_mysql,
  633. uchar* from_tokudb,
  634. uint32_t length_bytes_in_tokudb, // number of bytes used to encode length in from_tokudb
  635. uint32_t length_bytes_in_mysql // number of bytes used to encode length in to_mysql
  636. )
  637. {
  638. uint32_t length = get_length_from_var_tokudata(from_tokudb, length_bytes_in_tokudb);
  639. //
  640. // copy the length into the mysql buffer
  641. //
  642. switch (length_bytes_in_mysql) {
  643. case (0):
  644. break;
  645. case (1):
  646. *to_mysql = (uchar) length;
  647. break;
  648. case (2):
  649. int2store(to_mysql, length);
  650. break;
  651. case (3):
  652. int3store(to_mysql, length);
  653. break;
  654. case (4):
  655. int4store(to_mysql, length);
  656. break;
  657. default:
  658. assert(false);
  659. }
  660. //
  661. // copy the binary data
  662. //
  663. memcpy(to_mysql + length_bytes_in_mysql, from_tokudb + length_bytes_in_tokudb, length);
  664. return from_tokudb + length_bytes_in_tokudb+ length;
  665. }
  666. static inline int cmp_toku_varbinary(
  667. uchar* a_buf,
  668. uchar* b_buf,
  669. uint32_t length_bytes, //number of bytes used to encode length in a_buf and b_buf
  670. uint32_t* a_bytes_read,
  671. uint32_t* b_bytes_read
  672. )
  673. {
  674. int ret_val = 0;
  675. uint32_t a_len = get_length_from_var_tokudata(a_buf, length_bytes);
  676. uint32_t b_len = get_length_from_var_tokudata(b_buf, length_bytes);
  677. ret_val = cmp_toku_binary(
  678. a_buf + length_bytes,
  679. a_len,
  680. b_buf + length_bytes,
  681. b_len
  682. );
  683. *a_bytes_read = a_len + length_bytes;
  684. *b_bytes_read = b_len + length_bytes;
  685. return ret_val;
  686. }
  687. static inline uchar* pack_toku_blob(
  688. uchar* to_tokudb,
  689. uchar* from_mysql,
  690. uint32_t length_bytes_in_tokudb, //number of bytes to use to encode the length in to_tokudb
  691. uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
  692. uint32_t max_num_bytes,
  693. #if MYSQL_VERSION_ID >= 50600
  694. const CHARSET_INFO* charset
  695. #else
  696. CHARSET_INFO* charset
  697. #endif
  698. )
  699. {
  700. uint32_t length = 0;
  701. uint32_t local_char_length = 0;
  702. uchar* blob_buf = NULL;
  703. switch (length_bytes_in_mysql) {
  704. case (0):
  705. length = max_num_bytes;
  706. break;
  707. case (1):
  708. length = (uint32_t)(*from_mysql);
  709. break;
  710. case (2):
  711. length = uint2korr(from_mysql);
  712. break;
  713. case (3):
  714. length = uint3korr(from_mysql);
  715. break;
  716. case (4):
  717. length = uint4korr(from_mysql);
  718. break;
  719. }
  720. set_if_smaller(length,max_num_bytes);
  721. memcpy(&blob_buf,from_mysql+length_bytes_in_mysql,sizeof(uchar *));
  722. local_char_length= ((charset->mbmaxlen > 1) ?
  723. max_num_bytes/charset->mbmaxlen : max_num_bytes);
  724. if (length > local_char_length)
  725. {
  726. local_char_length= my_charpos(
  727. charset,
  728. blob_buf,
  729. blob_buf+length,
  730. local_char_length
  731. );
  732. set_if_smaller(length, local_char_length);
  733. }
  734. //
  735. // copy the length bytes, assuming both are in little endian
  736. //
  737. to_tokudb[0] = (uchar)length & 255;
  738. if (length_bytes_in_tokudb > 1) {
  739. to_tokudb[1] = (uchar) (length >> 8);
  740. }
  741. //
  742. // copy the string
  743. //
  744. memcpy(to_tokudb + length_bytes_in_tokudb, blob_buf, length);
  745. return to_tokudb + length + length_bytes_in_tokudb;
  746. }
  747. static inline uchar* unpack_toku_blob(
  748. uchar* to_mysql,
  749. uchar* from_tokudb,
  750. uint32_t length_bytes_in_tokudb, // number of bytes used to encode length in from_tokudb
  751. uint32_t length_bytes_in_mysql // number of bytes used to encode length in to_mysql
  752. )
  753. {
  754. uint32_t length = get_length_from_var_tokudata(from_tokudb, length_bytes_in_tokudb);
  755. uchar* blob_pos = NULL;
  756. //
  757. // copy the length into the mysql buffer
  758. //
  759. switch (length_bytes_in_mysql) {
  760. case (0):
  761. break;
  762. case (1):
  763. *to_mysql = (uchar) length;
  764. break;
  765. case (2):
  766. int2store(to_mysql, length);
  767. break;
  768. case (3):
  769. int3store(to_mysql, length);
  770. break;
  771. case (4):
  772. int4store(to_mysql, length);
  773. break;
  774. default:
  775. assert(false);
  776. }
  777. //
  778. // copy the binary data
  779. //
  780. blob_pos = from_tokudb + length_bytes_in_tokudb;
  781. memcpy(to_mysql + length_bytes_in_mysql, &blob_pos, sizeof(uchar *));
  782. return from_tokudb + length_bytes_in_tokudb+ length;
  783. }
  784. //
  785. // partially copied from below
  786. //
  787. uchar* pack_toku_varstring_from_desc(
  788. uchar* to_tokudb,
  789. const uchar* from_desc,
  790. uint32_t key_part_length, //number of bytes to use to encode the length in to_tokudb
  791. uint32_t field_length,
  792. uint32_t charset_num//length of field
  793. )
  794. {
  795. CHARSET_INFO* charset = NULL;
  796. uint32_t length_bytes_in_tokudb = get_length_bytes_from_max(key_part_length);
  797. uint32_t length = field_length;
  798. uint32_t local_char_length = 0;
  799. set_if_smaller(length, key_part_length);
  800. charset = get_charset_from_num(charset_num);
  801. //
  802. // copy the string
  803. //
  804. local_char_length= ((charset->mbmaxlen > 1) ?
  805. key_part_length/charset->mbmaxlen : key_part_length);
  806. if (length > local_char_length)
  807. {
  808. local_char_length= my_charpos(
  809. charset,
  810. from_desc,
  811. from_desc+length,
  812. local_char_length
  813. );
  814. set_if_smaller(length, local_char_length);
  815. }
  816. //
  817. // copy the length bytes, assuming both are in little endian
  818. //
  819. to_tokudb[0] = (uchar)length & 255;
  820. if (length_bytes_in_tokudb > 1) {
  821. to_tokudb[1] = (uchar) (length >> 8);
  822. }
  823. //
  824. // copy the string
  825. //
  826. memcpy(to_tokudb + length_bytes_in_tokudb, from_desc, length);
  827. return to_tokudb + length + length_bytes_in_tokudb;
  828. }
  829. static inline uchar* pack_toku_varstring(
  830. uchar* to_tokudb,
  831. uchar* from_mysql,
  832. uint32_t length_bytes_in_tokudb, //number of bytes to use to encode the length in to_tokudb
  833. uint32_t length_bytes_in_mysql, //number of bytes used to encode the length in from_mysql
  834. uint32_t max_num_bytes,
  835. #if MYSQL_VERSION_ID >= 50600
  836. const CHARSET_INFO *charset
  837. #else
  838. CHARSET_INFO* charset
  839. #endif
  840. )
  841. {
  842. uint32_t length = 0;
  843. uint32_t local_char_length = 0;
  844. switch (length_bytes_in_mysql) {
  845. case (0):
  846. length = max_num_bytes;
  847. break;
  848. case (1):
  849. length = (uint32_t)(*from_mysql);
  850. break;
  851. case (2):
  852. length = uint2korr(from_mysql);
  853. break;
  854. case (3):
  855. length = uint3korr(from_mysql);
  856. break;
  857. case (4):
  858. length = uint4korr(from_mysql);
  859. break;
  860. }
  861. set_if_smaller(length,max_num_bytes);
  862. local_char_length= ((charset->mbmaxlen > 1) ?
  863. max_num_bytes/charset->mbmaxlen : max_num_bytes);
  864. if (length > local_char_length)
  865. {
  866. local_char_length= my_charpos(
  867. charset,
  868. from_mysql+length_bytes_in_mysql,
  869. from_mysql+length_bytes_in_mysql+length,
  870. local_char_length
  871. );
  872. set_if_smaller(length, local_char_length);
  873. }
  874. //
  875. // copy the length bytes, assuming both are in little endian
  876. //
  877. to_tokudb[0] = (uchar)length & 255;
  878. if (length_bytes_in_tokudb > 1) {
  879. to_tokudb[1] = (uchar) (length >> 8);
  880. }
  881. //
  882. // copy the string
  883. //
  884. memcpy(to_tokudb + length_bytes_in_tokudb, from_mysql + length_bytes_in_mysql, length);
  885. return to_tokudb + length + length_bytes_in_tokudb;
  886. }
  887. static inline int cmp_toku_string(
  888. uchar* a_buf,
  889. uint32_t a_num_bytes,
  890. uchar* b_buf,
  891. uint32_t b_num_bytes,
  892. uint32_t charset_number
  893. )
  894. {
  895. int ret_val = 0;
  896. CHARSET_INFO* charset = NULL;
  897. charset = get_charset_from_num(charset_number);
  898. ret_val = charset->coll->strnncollsp(
  899. charset,
  900. a_buf,
  901. a_num_bytes,
  902. b_buf,
  903. b_num_bytes,
  904. 0
  905. );
  906. return ret_val;
  907. }
  908. static inline int cmp_toku_varstring(
  909. uchar* a_buf,
  910. uchar* b_buf,
  911. uint32_t length_bytes, //number of bytes used to encode length in a_buf and b_buf
  912. uint32_t charset_num,
  913. uint32_t* a_bytes_read,
  914. uint32_t* b_bytes_read
  915. )
  916. {
  917. int ret_val = 0;
  918. uint32_t a_len = get_length_from_var_tokudata(a_buf, length_bytes);
  919. uint32_t b_len = get_length_from_var_tokudata(b_buf, length_bytes);
  920. ret_val = cmp_toku_string(
  921. a_buf + length_bytes,
  922. a_len,
  923. b_buf + length_bytes,
  924. b_len,
  925. charset_num
  926. );
  927. *a_bytes_read = a_len + length_bytes;
  928. *b_bytes_read = b_len + length_bytes;
  929. return ret_val;
  930. }
  931. static inline int tokudb_compare_two_hidden_keys(
  932. const void* new_key_data,
  933. const uint32_t new_key_size,
  934. const void* saved_key_data,
  935. const uint32_t saved_key_size
  936. ) {
  937. assert( (new_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH) && (saved_key_size >= TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH) );
  938. ulonglong a = hpk_char_to_num((uchar *) new_key_data);
  939. ulonglong b = hpk_char_to_num((uchar *) saved_key_data);
  940. return a < b ? -1 : (a > b ? 1 : 0);
  941. }
  942. //
  943. // Returns number of bytes used for a given TOKU_TYPE
  944. // in a key descriptor. The number of bytes returned
  945. // here MUST match the number of bytes used for the encoding
  946. // in create_toku_key_descriptor_for_key
  947. // Parameters:
  948. // [in] row_desc - buffer that contains portion of descriptor
  949. // created in create_toku_key_descriptor_for_key. The first
  950. // byte points to the TOKU_TYPE.
  951. //
  952. uint32_t skip_field_in_descriptor(uchar* row_desc) {
  953. uchar* row_desc_pos = row_desc;
  954. TOKU_TYPE toku_type = (TOKU_TYPE)row_desc_pos[0];
  955. row_desc_pos++;
  956. switch (toku_type) {
  957. case (toku_type_hpk):
  958. case (toku_type_double):
  959. case (toku_type_float):
  960. break;
  961. case (toku_type_int):
  962. row_desc_pos += 2;
  963. break;
  964. case (toku_type_fixbinary):
  965. case (toku_type_varbinary):
  966. row_desc_pos++;
  967. break;
  968. case (toku_type_fixstring):
  969. case (toku_type_varstring):
  970. case (toku_type_blob):
  971. row_desc_pos++;
  972. row_desc_pos += sizeof(uint32_t);
  973. break;
  974. default:
  975. assert(false);
  976. break;
  977. }
  978. return (uint32_t)(row_desc_pos - row_desc);
  979. }
  980. //
  981. // outputs a descriptor for key into buf. Returns number of bytes used in buf
  982. // to store the descriptor. Number of bytes used MUST match number of bytes
  983. // we would skip in skip_field_in_descriptor
  984. //
  985. int create_toku_key_descriptor_for_key(KEY* key, uchar* buf) {
  986. uchar* pos = buf;
  987. uint32_t num_bytes_in_field = 0;
  988. uint32_t charset_num = 0;
  989. for (uint i = 0; i < get_key_parts(key); i++){
  990. Field* field = key->key_part[i].field;
  991. //
  992. // The first byte states if there is a null byte
  993. // 0 means no null byte, non-zer means there
  994. // is one
  995. //
  996. *pos = field->null_bit;
  997. pos++;
  998. //
  999. // The second byte for each field is the type
  1000. //
  1001. TOKU_TYPE type = mysql_to_toku_type(field);
  1002. assert (type < 256);
  1003. *pos = (uchar)(type & 255);
  1004. pos++;
  1005. //
  1006. // based on the type, extra data follows afterwards
  1007. //
  1008. switch (type) {
  1009. //
  1010. // two bytes follow for ints, first one states how many
  1011. // bytes the int is (1 , 2, 3, 4 or 8)
  1012. // next one states if it is signed or not
  1013. //
  1014. case (toku_type_int):
  1015. num_bytes_in_field = field->pack_length();
  1016. assert (num_bytes_in_field < 256);
  1017. *pos = (uchar)(num_bytes_in_field & 255);
  1018. pos++;
  1019. *pos = (field->flags & UNSIGNED_FLAG) ? 1 : 0;
  1020. pos++;
  1021. break;
  1022. //
  1023. // nothing follows floats and doubles
  1024. //
  1025. case (toku_type_double):
  1026. case (toku_type_float):
  1027. break;
  1028. //
  1029. // one byte follow stating the length of the field
  1030. //
  1031. case (toku_type_fixbinary):
  1032. num_bytes_in_field = field->pack_length();
  1033. set_if_smaller(num_bytes_in_field, key->key_part[i].length);
  1034. assert(num_bytes_in_field < 256);
  1035. pos[0] = (uchar)(num_bytes_in_field & 255);
  1036. pos++;
  1037. break;
  1038. //
  1039. // one byte follows: the number of bytes used to encode the length
  1040. //
  1041. case (toku_type_varbinary):
  1042. *pos = (uchar)(get_length_bytes_from_max(key->key_part[i].length) & 255);
  1043. pos++;
  1044. break;
  1045. //
  1046. // five bytes follow: one for the number of bytes to encode the length,
  1047. // four for the charset number
  1048. //
  1049. case (toku_type_fixstring):
  1050. case (toku_type_varstring):
  1051. case (toku_type_blob):
  1052. *pos = (uchar)(get_length_bytes_from_max(key->key_part[i].length) & 255);
  1053. pos++;
  1054. charset_num = field->charset()->number;
  1055. pos[0] = (uchar)(charset_num & 255);
  1056. pos[1] = (uchar)((charset_num >> 8) & 255);
  1057. pos[2] = (uchar)((charset_num >> 16) & 255);
  1058. pos[3] = (uchar)((charset_num >> 24) & 255);
  1059. pos += 4;
  1060. break;
  1061. default:
  1062. assert(false);
  1063. }
  1064. }
  1065. return pos - buf;
  1066. }
  1067. //
  1068. // Creates a descriptor for a DB. That contains all information necessary
  1069. // to do both key comparisons and data comparisons (for dup-sort databases).
  1070. //
  1071. // There are two types of descriptors we care about:
  1072. // 1) Primary key, (in a no-dup database)
  1073. // 2) secondary keys, which are a secondary key followed by a primary key,
  1074. // but in a no-dup database.
  1075. //
  1076. // I realize this may be confusing, but here is how it works.
  1077. // All DB's have a key compare.
  1078. // The format of the descriptor must be able to handle both.
  1079. //
  1080. // The first four bytes store an offset into the descriptor to the second piece
  1081. // used for data comparisons. So, if in the future we want to append something
  1082. // to the descriptor, we can.
  1083. //
  1084. //
  1085. int create_toku_key_descriptor(
  1086. uchar* buf,
  1087. bool is_first_hpk,
  1088. KEY* first_key,
  1089. bool is_second_hpk,
  1090. KEY* second_key
  1091. )
  1092. {
  1093. //
  1094. // The first four bytes always contain the offset of where the first key
  1095. // ends.
  1096. //
  1097. uchar* pos = buf + 4;
  1098. uint32_t num_bytes = 0;
  1099. uint32_t offset = 0;
  1100. if (is_first_hpk) {
  1101. pos[0] = 0; //say there is NO infinity byte
  1102. pos[1] = 0; //field cannot be NULL, stating it
  1103. pos[2] = toku_type_hpk;
  1104. pos += 3;
  1105. }
  1106. else {
  1107. //
  1108. // first key is NOT a hidden primary key, so we now pack first_key
  1109. //
  1110. pos[0] = 1; //say there is an infinity byte
  1111. pos++;
  1112. num_bytes = create_toku_key_descriptor_for_key(first_key, pos);
  1113. pos += num_bytes;
  1114. }
  1115. //
  1116. // if we do not have a second key, we can jump to exit right now
  1117. // we do not have a second key if it is not a hidden primary key
  1118. // and if second_key is NULL
  1119. //
  1120. if (is_first_hpk || (!is_second_hpk && (second_key == NULL)) ) {
  1121. goto exit;
  1122. }
  1123. //
  1124. // if we have a second key, and it is an hpk, we need to pack it, and
  1125. // write in the offset to this position in the first four bytes
  1126. //
  1127. if (is_second_hpk) {
  1128. pos[0] = 0; //field cannot be NULL, stating it
  1129. pos[1] = toku_type_hpk;
  1130. pos += 2;
  1131. }
  1132. else {
  1133. //
  1134. // second key is NOT a hidden primary key, so we now pack second_key
  1135. //
  1136. num_bytes = create_toku_key_descriptor_for_key(second_key, pos);
  1137. pos += num_bytes;
  1138. }
  1139. exit:
  1140. offset = pos - buf;
  1141. buf[0] = (uchar)(offset & 255);
  1142. buf[1] = (uchar)((offset >> 8) & 255);
  1143. buf[2] = (uchar)((offset >> 16) & 255);
  1144. buf[3] = (uchar)((offset >> 24) & 255);
  1145. return pos - buf;
  1146. }
  1147. static inline int compare_toku_field(
  1148. uchar* a_buf,
  1149. uchar* b_buf,
  1150. uchar* row_desc,
  1151. uint32_t* a_bytes_read,
  1152. uint32_t* b_bytes_read,
  1153. uint32_t* row_desc_bytes_read
  1154. )
  1155. {
  1156. int ret_val = 0;
  1157. uchar* row_desc_pos = row_desc;
  1158. uint32_t num_bytes = 0;
  1159. uint32_t length_bytes = 0;
  1160. uint32_t charset_num = 0;
  1161. bool is_unsigned = false;
  1162. TOKU_TYPE toku_type = (TOKU_TYPE)row_desc_pos[0];
  1163. row_desc_pos++;
  1164. switch (toku_type) {
  1165. case (toku_type_hpk):
  1166. ret_val = tokudb_compare_two_hidden_keys(
  1167. a_buf,
  1168. TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH,
  1169. b_buf,
  1170. TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH
  1171. );
  1172. *a_bytes_read = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
  1173. *b_bytes_read = TOKUDB_HIDDEN_PRIMARY_KEY_LENGTH;
  1174. break;
  1175. case (toku_type_int):
  1176. num_bytes = row_desc_pos[0];
  1177. is_unsigned = row_desc_pos[1];
  1178. ret_val = cmp_toku_int(
  1179. a_buf,
  1180. b_buf,
  1181. is_unsigned,
  1182. num_bytes
  1183. );
  1184. *a_bytes_read = num_bytes;
  1185. *b_bytes_read = num_bytes;
  1186. row_desc_pos += 2;
  1187. break;
  1188. case (toku_type_double):
  1189. ret_val = cmp_toku_double(a_buf, b_buf);
  1190. *a_bytes_read = sizeof(double);
  1191. *b_bytes_read = sizeof(double);
  1192. break;
  1193. case (toku_type_float):
  1194. ret_val = cmp_toku_float(a_buf, b_buf);
  1195. *a_bytes_read = sizeof(float);
  1196. *b_bytes_read = sizeof(float);
  1197. break;
  1198. case (toku_type_fixbinary):
  1199. num_bytes = row_desc_pos[0];
  1200. ret_val = cmp_toku_binary(a_buf, num_bytes, b_buf,num_bytes);
  1201. *a_bytes_read = num_bytes;
  1202. *b_bytes_read = num_bytes;
  1203. row_desc_pos++;
  1204. break;
  1205. case (toku_type_varbinary):
  1206. length_bytes = row_desc_pos[0];
  1207. ret_val = cmp_toku_varbinary(
  1208. a_buf,
  1209. b_buf,
  1210. length_bytes,
  1211. a_bytes_read,
  1212. b_bytes_read
  1213. );
  1214. row_desc_pos++;
  1215. break;
  1216. case (toku_type_fixstring):
  1217. case (toku_type_varstring):
  1218. case (toku_type_blob):
  1219. length_bytes = row_desc_pos[0];
  1220. row_desc_pos++;
  1221. //
  1222. // not sure we want to read charset_num like this
  1223. //
  1224. charset_num = *(uint32_t *)row_desc_pos;
  1225. row_desc_pos += sizeof(uint32_t);
  1226. ret_val = cmp_toku_varstring(
  1227. a_buf,
  1228. b_buf,
  1229. length_bytes,
  1230. charset_num,
  1231. a_bytes_read,
  1232. b_bytes_read
  1233. );
  1234. break;
  1235. default:
  1236. assert(false);
  1237. break;
  1238. }
  1239. *row_desc_bytes_read = row_desc_pos - row_desc;
  1240. return ret_val;
  1241. }
  1242. //
  1243. // packs a field from a MySQL buffer into a tokudb buffer.
  1244. // Used for inserts/updates
  1245. //
  1246. uchar* pack_toku_key_field(
  1247. uchar* to_tokudb,
  1248. uchar* from_mysql,
  1249. Field* field,
  1250. uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
  1251. )
  1252. {
  1253. uchar* new_pos = NULL;
  1254. uint32_t num_bytes = 0;
  1255. TOKU_TYPE toku_type = mysql_to_toku_type(field);
  1256. switch(toku_type) {
  1257. case (toku_type_int):
  1258. assert(key_part_length == field->pack_length());
  1259. new_pos = pack_toku_int(
  1260. to_tokudb,
  1261. from_mysql,
  1262. field->pack_length()
  1263. );
  1264. goto exit;
  1265. case (toku_type_double):
  1266. assert(field->pack_length() == sizeof(double));
  1267. assert(key_part_length == sizeof(double));
  1268. new_pos = pack_toku_double(to_tokudb, from_mysql);
  1269. goto exit;
  1270. case (toku_type_float):
  1271. assert(field->pack_length() == sizeof(float));
  1272. assert(key_part_length == sizeof(float));
  1273. new_pos = pack_toku_float(to_tokudb, from_mysql);
  1274. goto exit;
  1275. case (toku_type_fixbinary):
  1276. num_bytes = field->pack_length();
  1277. set_if_smaller(num_bytes, key_part_length);
  1278. new_pos = pack_toku_binary(
  1279. to_tokudb,
  1280. from_mysql,
  1281. num_bytes
  1282. );
  1283. goto exit;
  1284. case (toku_type_fixstring):
  1285. num_bytes = field->pack_length();
  1286. set_if_smaller(num_bytes, key_part_length);
  1287. new_pos = pack_toku_varstring(
  1288. to_tokudb,
  1289. from_mysql,
  1290. get_length_bytes_from_max(key_part_length),
  1291. 0,
  1292. num_bytes,
  1293. field->charset()
  1294. );
  1295. goto exit;
  1296. case (toku_type_varbinary):
  1297. new_pos = pack_toku_varbinary(
  1298. to_tokudb,
  1299. from_mysql,
  1300. ((Field_varstring *)field)->length_bytes,
  1301. key_part_length
  1302. );
  1303. goto exit;
  1304. case (toku_type_varstring):
  1305. new_pos = pack_toku_varstring(
  1306. to_tokudb,
  1307. from_mysql,
  1308. get_length_bytes_from_max(key_part_length),
  1309. ((Field_varstring *)field)->length_bytes,
  1310. key_part_length,
  1311. field->charset()
  1312. );
  1313. goto exit;
  1314. case (toku_type_blob):
  1315. new_pos = pack_toku_blob(
  1316. to_tokudb,
  1317. from_mysql,
  1318. get_length_bytes_from_max(key_part_length),
  1319. ((Field_blob *)field)->row_pack_length(), //only calling this because packlength is returned
  1320. key_part_length,
  1321. field->charset()
  1322. );
  1323. goto exit;
  1324. default:
  1325. assert(false);
  1326. }
  1327. assert(false);
  1328. exit:
  1329. return new_pos;
  1330. }
  1331. //
  1332. // packs a field from a MySQL buffer into a tokudb buffer.
  1333. // Used for queries. The only difference between this function
  1334. // and pack_toku_key_field is that all variable sized columns
  1335. // use 2 bytes to encode the length, regardless of the field
  1336. // So varchar(4) will still use 2 bytes to encode the field
  1337. //
  1338. uchar* pack_key_toku_key_field(
  1339. uchar* to_tokudb,
  1340. uchar* from_mysql,
  1341. Field* field,
  1342. uint32_t key_part_length //I really hope this is temporary as I phase out the pack_cmp stuff
  1343. )
  1344. {
  1345. uchar* new_pos = NULL;
  1346. TOKU_TYPE toku_type = mysql_to_toku_type(field);
  1347. switch(toku_type) {
  1348. case (toku_type_int):
  1349. case (toku_type_double):
  1350. case (toku_type_float):
  1351. case (toku_type_fixbinary):
  1352. case (toku_type_fixstring):
  1353. new_pos = pack_toku_key_field(to_tokudb, from_mysql, field, key_part_length);
  1354. goto exit;
  1355. case (toku_type_varbinary):
  1356. new_pos = pack_toku_varbinary(
  1357. to_tokudb,
  1358. from_mysql,
  1359. 2, // for some idiotic reason, 2 bytes are always used here, regardless of length of field
  1360. key_part_length
  1361. );
  1362. goto exit;
  1363. case (toku_type_varstring):
  1364. case (toku_type_blob):
  1365. new_pos = pack_toku_varstring(
  1366. to_tokudb,
  1367. from_mysql,
  1368. get_length_bytes_from_max(key_part_length),
  1369. 2, // for some idiotic reason, 2 bytes are always used here, regardless of length of field
  1370. key_part_length,
  1371. field->charset()
  1372. );
  1373. goto exit;
  1374. default:
  1375. assert(false);
  1376. }
  1377. assert(false);
  1378. exit:
  1379. return new_pos;
  1380. }
  1381. uchar* unpack_toku_key_field(
  1382. uchar* to_mysql,
  1383. uchar* from_tokudb,
  1384. Field* field,
  1385. uint32_t key_part_length
  1386. )
  1387. {
  1388. uchar* new_pos = NULL;
  1389. uint32_t num_bytes = 0;
  1390. uint32_t num_bytes_copied;
  1391. TOKU_TYPE toku_type = mysql_to_toku_type(field);
  1392. switch(toku_type) {
  1393. case (toku_type_int):
  1394. assert(key_part_length == field->pack_length());
  1395. new_pos = unpack_toku_int(
  1396. to_mysql,
  1397. from_tokudb,
  1398. field->pack_length()
  1399. );
  1400. goto exit;
  1401. case (toku_type_double):
  1402. assert(field->pack_length() == sizeof(double));
  1403. assert(key_part_length == sizeof(double));
  1404. new_pos = unpack_toku_double(to_mysql, from_tokudb);
  1405. goto exit;
  1406. case (toku_type_float):
  1407. assert(field->pack_length() == sizeof(float));
  1408. assert(key_part_length == sizeof(float));
  1409. new_pos = unpack_toku_float(to_mysql, from_tokudb);
  1410. goto exit;
  1411. case (toku_type_fixbinary):
  1412. num_bytes = field->pack_length();
  1413. set_if_smaller(num_bytes, key_part_length);
  1414. new_pos = unpack_toku_binary(
  1415. to_mysql,
  1416. from_tokudb,
  1417. num_bytes
  1418. );
  1419. goto exit;
  1420. case (toku_type_fixstring):
  1421. num_bytes = field->pack_length();
  1422. new_pos = unpack_toku_varbinary(
  1423. to_mysql,
  1424. from_tokudb,
  1425. get_length_bytes_from_max(key_part_length),
  1426. 0
  1427. );
  1428. num_bytes_copied = new_pos - (from_tokudb + get_length_bytes_from_max(key_part_length));
  1429. assert(num_bytes_copied <= num_bytes);
  1430. memset(to_mysql+num_bytes_copied, field->charset()->pad_char, num_bytes - num_bytes_copied);
  1431. goto exit;
  1432. case (toku_type_varbinary):
  1433. case (toku_type_varstring):
  1434. new_pos = unpack_toku_varbinary(
  1435. to_mysql,
  1436. from_tokudb,
  1437. get_length_bytes_from_max(key_part_length),
  1438. ((Field_varstring *)field)->length_bytes
  1439. );
  1440. goto exit;
  1441. case (toku_type_blob):
  1442. new_pos = unpack_toku_blob(
  1443. to_mysql,
  1444. from_tokudb,
  1445. get_length_bytes_from_max(key_part_length),
  1446. ((Field_blob *)field)->row_pack_length() //only calling this because packlength is returned
  1447. );
  1448. goto exit;
  1449. default:
  1450. assert(false);
  1451. }
  1452. assert(false);
  1453. exit:
  1454. return new_pos;
  1455. }
  1456. int tokudb_compare_two_keys(
  1457. const void* new_key_data,
  1458. const uint32_t new_key_size,
  1459. const void* saved_key_data,
  1460. const uint32_t saved_key_size,
  1461. const void* row_desc,
  1462. const uint32_t row_desc_size,
  1463. bool cmp_prefix
  1464. )
  1465. {
  1466. int ret_val = 0;
  1467. int8_t new_key_inf_val = COL_NEG_INF;
  1468. int8_t saved_key_inf_val = COL_NEG_INF;
  1469. uchar* row_desc_ptr = (uchar *)row_desc;
  1470. uchar *new_key_ptr = (uchar *)new_key_data;
  1471. uchar *saved_key_ptr = (uchar *)saved_key_data;
  1472. uint32_t new_key_bytes_left = new_key_size;
  1473. uint32_t saved_key_bytes_left = saved_key_size;
  1474. //
  1475. // if the keys have an infinity byte, set it
  1476. //
  1477. if (row_desc_ptr[0]) {
  1478. new_key_inf_val = (int8_t)new_key_ptr[0];
  1479. saved_key_inf_val = (int8_t)saved_key_ptr[0];
  1480. new_key_ptr++;
  1481. saved_key_ptr++;
  1482. }
  1483. row_desc_ptr++;
  1484. while ( (uint32_t)(new_key_ptr - (uchar *)new_key_data) < new_key_size &&
  1485. (uint32_t)(saved_key_ptr - (uchar *)saved_key_data) < saved_key_size &&
  1486. (uint32_t)(row_desc_ptr - (uchar *)row_desc) < row_desc_size
  1487. )
  1488. {
  1489. uint32_t new_key_field_length;
  1490. uint32_t saved_key_field_length;
  1491. uint32_t row_desc_field_length;
  1492. //
  1493. // if there is a null byte at this point in the key
  1494. //
  1495. if (row_desc_ptr[0]) {
  1496. //
  1497. // compare null bytes. If different, return
  1498. //
  1499. if (new_key_ptr[0] != saved_key_ptr[0]) {
  1500. ret_val = ((int) *new_key_ptr - (int) *saved_key_ptr);
  1501. goto exit;
  1502. }
  1503. saved_key_ptr++;
  1504. //
  1505. // in case we just read the fact that new_key_ptr and saved_key_ptr
  1506. // have NULL as their next field
  1507. //
  1508. if (!*new_key_ptr++) {
  1509. //
  1510. // skip row_desc_ptr[0] read in if clause
  1511. //
  1512. row_desc_ptr++;
  1513. //
  1514. // skip data that describes rest of field
  1515. //
  1516. row_desc_ptr += skip_field_in_descriptor(row_desc_ptr);
  1517. continue;
  1518. }
  1519. }
  1520. row_desc_ptr++;
  1521. ret_val = compare_toku_field(
  1522. new_key_ptr,
  1523. saved_key_ptr,
  1524. row_desc_ptr,
  1525. &new_key_field_length,
  1526. &saved_key_field_length,
  1527. &row_desc_field_length
  1528. );
  1529. new_key_ptr += new_key_field_length;
  1530. saved_key_ptr += saved_key_field_length;
  1531. row_desc_ptr += row_desc_field_length;
  1532. if (ret_val) {
  1533. goto exit;
  1534. }
  1535. assert((uint32_t)(new_key_ptr - (uchar *)new_key_data) <= new_key_size);
  1536. assert((uint32_t)(saved_key_ptr - (uchar *)saved_key_data) <= saved_key_size);
  1537. assert((uint32_t)(row_desc_ptr - (uchar *)row_desc) <= row_desc_size);
  1538. }
  1539. new_key_bytes_left = new_key_size - ((uint32_t)(new_key_ptr - (uchar *)new_key_data));
  1540. saved_key_bytes_left = saved_key_size - ((uint32_t)(saved_key_ptr - (uchar *)saved_key_data));
  1541. if (cmp_prefix) {
  1542. ret_val = 0;
  1543. }
  1544. //
  1545. // in this case, read both keys to completion, now read infinity byte
  1546. //
  1547. else if (new_key_bytes_left== 0 && saved_key_bytes_left== 0) {
  1548. ret_val = new_key_inf_val - saved_key_inf_val;
  1549. }
  1550. //
  1551. // at this point, one SHOULD be 0
  1552. //
  1553. else if (new_key_bytes_left == 0 && saved_key_bytes_left > 0) {
  1554. ret_val = (new_key_inf_val == COL_POS_INF ) ? 1 : -1;
  1555. }
  1556. else if (new_key_bytes_left > 0 && saved_key_bytes_left == 0) {
  1557. ret_val = (saved_key_inf_val == COL_POS_INF ) ? -1 : 1;
  1558. }
  1559. //
  1560. // this should never happen, perhaps we should assert(false)
  1561. //
  1562. else {
  1563. assert(false);
  1564. ret_val = new_key_bytes_left - saved_key_bytes_left;
  1565. }
  1566. exit:
  1567. return ret_val;
  1568. }
  1569. int tokudb_cmp_dbt_key(DB* file, const DBT *keya, const DBT *keyb) {
  1570. int cmp;
  1571. if (file->cmp_descriptor->dbt.size == 0) {
  1572. int num_bytes_cmp = keya->size < keyb->size ?
  1573. keya->size : keyb->size;
  1574. cmp = memcmp(keya->data,keyb->data,num_bytes_cmp);
  1575. if (cmp == 0 && (keya->size != keyb->size)) {
  1576. cmp = keya->size < keyb->size ? -1 : 1;
  1577. }
  1578. }
  1579. else {
  1580. cmp = tokudb_compare_two_keys(
  1581. keya->data,
  1582. keya->size,
  1583. keyb->data,
  1584. keyb->size,
  1585. (uchar *)file->cmp_descriptor->dbt.data + 4,
  1586. (*(uint32_t *)file->cmp_descriptor->dbt.data) - 4,
  1587. false
  1588. );
  1589. }
  1590. return cmp;
  1591. }
  1592. //TODO: QQQ Only do one direction for prefix.
  1593. int tokudb_prefix_cmp_dbt_key(DB *file, const DBT *keya, const DBT *keyb) {
  1594. int cmp = tokudb_compare_two_keys(
  1595. keya->data,
  1596. keya->size,
  1597. keyb->data,
  1598. keyb->size,
  1599. (uchar *)file->cmp_descriptor->dbt.data + 4,
  1600. *(uint32_t *)file->cmp_descriptor->dbt.data - 4,
  1601. true
  1602. );
  1603. return cmp;
  1604. }
  1605. static int tokudb_compare_two_key_parts(
  1606. const void* new_key_data,
  1607. const uint32_t new_key_size,
  1608. const void* saved_key_data,
  1609. const uint32_t saved_key_size,
  1610. const void* row_desc,
  1611. const uint32_t row_desc_size,
  1612. uint max_parts
  1613. )
  1614. {
  1615. int ret_val = 0;
  1616. uchar* row_desc_ptr = (uchar *)row_desc;
  1617. uchar *new_key_ptr = (uchar *)new_key_data;
  1618. uchar *saved_key_ptr = (uchar *)saved_key_data;
  1619. //
  1620. // if the keys have an infinity byte, set it
  1621. //
  1622. if (row_desc_ptr[0]) {
  1623. // new_key_inf_val = (int8_t)new_key_ptr[0];
  1624. // saved_key_inf_val = (int8_t)saved_key_ptr[0];
  1625. new_key_ptr++;
  1626. saved_key_ptr++;
  1627. }
  1628. row_desc_ptr++;
  1629. for (uint i = 0; i < max_parts; i++) {
  1630. if (!((uint32_t)(new_key_ptr - (uchar *)new_key_data) < new_key_size &&
  1631. (uint32_t)(saved_key_ptr - (uchar *)saved_key_data) < saved_key_size &&
  1632. (uint32_t)(row_desc_ptr - (uchar *)row_desc) < row_desc_size))
  1633. break;
  1634. uint32_t new_key_field_length;
  1635. uint32_t saved_key_field_length;
  1636. uint32_t row_desc_field_length;
  1637. //
  1638. // if there is a null byte at this point in the key
  1639. //
  1640. if (row_desc_ptr[0]) {
  1641. //
  1642. // compare null bytes. If different, return
  1643. //
  1644. if (new_key_ptr[0] != saved_key_ptr[0]) {
  1645. ret_val = ((int) *new_key_ptr - (int) *saved_key_ptr);
  1646. goto exit;
  1647. }
  1648. saved_key_ptr++;
  1649. //
  1650. // in case we just read the fact that new_key_ptr and saved_key_ptr
  1651. // have NULL as their next field
  1652. //
  1653. if (!*new_key_ptr++) {
  1654. //
  1655. // skip row_desc_ptr[0] read in if clause
  1656. //
  1657. row_desc_ptr++;
  1658. //
  1659. // skip data that describes rest of field
  1660. //
  1661. row_desc_ptr += skip_field_in_descriptor(row_desc_ptr);
  1662. continue;
  1663. }
  1664. }
  1665. row_desc_ptr++;
  1666. ret_val = compare_toku_field(
  1667. new_key_ptr,
  1668. saved_key_ptr,
  1669. row_desc_ptr,
  1670. &new_key_field_length,
  1671. &saved_key_field_length,
  1672. &row_desc_field_length
  1673. );
  1674. new_key_ptr += new_key_field_length;
  1675. saved_key_ptr += saved_key_field_length;
  1676. row_desc_ptr += row_desc_field_length;
  1677. if (ret_val) {
  1678. goto exit;
  1679. }
  1680. assert((uint32_t)(new_key_ptr - (uchar *)new_key_data) <= new_key_size);
  1681. assert((uint32_t)(saved_key_ptr - (uchar *)saved_key_data) <= saved_key_size);
  1682. assert((uint32_t)(row_desc_ptr - (uchar *)row_desc) <= row_desc_size);
  1683. }
  1684. ret_val = 0;
  1685. exit:
  1686. return ret_val;
  1687. }
  1688. static int tokudb_cmp_dbt_key_parts(DB *file, const DBT *keya, const DBT *keyb, uint max_parts) {
  1689. assert(file->cmp_descriptor->dbt.size);
  1690. return tokudb_compare_two_key_parts(
  1691. keya->data,
  1692. keya->size,
  1693. keyb->data,
  1694. keyb->size,
  1695. (uchar *)file->cmp_descriptor->dbt.data + 4,
  1696. (*(uint32_t *)file->cmp_descriptor->dbt.data) - 4,
  1697. max_parts);
  1698. }
  1699. uint32_t create_toku_main_key_pack_descriptor (
  1700. uchar* buf
  1701. )
  1702. {
  1703. //
  1704. // The first four bytes always contain the offset of where the first key
  1705. // ends.
  1706. //
  1707. uchar* pos = buf + 4;
  1708. uint32_t offset = 0;
  1709. //
  1710. // one byte states if this is the main dictionary
  1711. //
  1712. pos[0] = 1;
  1713. pos++;
  1714. goto exit;
  1715. exit:
  1716. offset = pos - buf;
  1717. buf[0] = (uchar)(offset & 255);
  1718. buf[1] = (uchar)((offset >> 8) & 255);
  1719. buf[2] = (uchar)((offset >> 16) & 255);
  1720. buf[3] = (uchar)((offset >> 24) & 255);
  1721. return pos - buf;
  1722. }
  1723. #define COL_FIX_FIELD 0x11
  1724. #define COL_VAR_FIELD 0x22
  1725. #define COL_BLOB_FIELD 0x33
  1726. #define COL_HAS_NO_CHARSET 0x44
  1727. #define COL_HAS_CHARSET 0x55
  1728. #define COL_FIX_PK_OFFSET 0x66
  1729. #define COL_VAR_PK_OFFSET 0x77
  1730. #define CK_FIX_RANGE 0x88
  1731. #define CK_VAR_RANGE 0x99
  1732. #define COPY_OFFSET_TO_BUF memcpy ( \
  1733. pos, \
  1734. &kc_info->cp_info[pk_index][field_index].col_pack_val, \
  1735. sizeof(uint32_t) \
  1736. ); \
  1737. pos += sizeof(uint32_t);
  1738. uint32_t pack_desc_pk_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
  1739. uchar* pos = buf;
  1740. uint16 field_index = key_part->field->field_index;
  1741. Field* field = table_share->field[field_index];
  1742. TOKU_TYPE toku_type = mysql_to_toku_type(field);
  1743. uint32_t key_part_length = key_part->length;
  1744. uint32_t field_length;
  1745. uchar len_bytes = 0;
  1746. switch(toku_type) {
  1747. case (toku_type_int):
  1748. case (toku_type_double):
  1749. case (toku_type_float):
  1750. pos[0] = COL_FIX_FIELD;
  1751. pos++;
  1752. assert(kc_info->field_lengths[field_index] < 256);
  1753. pos[0] = kc_info->field_lengths[field_index];
  1754. pos++;
  1755. break;
  1756. case (toku_type_fixbinary):
  1757. pos[0] = COL_FIX_FIELD;
  1758. pos++;
  1759. field_length = field->pack_length();
  1760. set_if_smaller(key_part_length, field_length);
  1761. assert(key_part_length < 256);
  1762. pos[0] = (uchar)key_part_length;
  1763. pos++;
  1764. break;
  1765. case (toku_type_fixstring):
  1766. case (toku_type_varbinary):
  1767. case (toku_type_varstring):
  1768. case (toku_type_blob):
  1769. pos[0] = COL_VAR_FIELD;
  1770. pos++;
  1771. len_bytes = (key_part_length > 255) ? 2 : 1;
  1772. pos[0] = len_bytes;
  1773. pos++;
  1774. break;
  1775. default:
  1776. assert(false);
  1777. }
  1778. return pos - buf;
  1779. }
  1780. uint32_t pack_desc_pk_offset_info(
  1781. uchar* buf,
  1782. KEY_AND_COL_INFO* kc_info,
  1783. TABLE_SHARE* table_share,
  1784. KEY_PART_INFO* key_part,
  1785. KEY* prim_key,
  1786. uchar* pk_info
  1787. )
  1788. {
  1789. uchar* pos = buf;
  1790. uint16 field_index = key_part->field->field_index;
  1791. bool found_col_in_pk = false;
  1792. uint32_t index_in_pk;
  1793. bool is_constant_offset = true;
  1794. uint32_t offset = 0;
  1795. for (uint i = 0; i < get_key_parts(prim_key); i++) {
  1796. KEY_PART_INFO curr = prim_key->key_part[i];
  1797. uint16 curr_field_index = curr.field->field_index;
  1798. if (pk_info[2*i] == COL_VAR_FIELD) {
  1799. is_constant_offset = false;
  1800. }
  1801. if (curr_field_index == field_index) {
  1802. found_col_in_pk = true;
  1803. index_in_pk = i;
  1804. break;
  1805. }
  1806. offset += pk_info[2*i + 1];
  1807. }
  1808. assert(found_col_in_pk);
  1809. if (is_constant_offset) {
  1810. pos[0] = COL_FIX_PK_OFFSET;
  1811. pos++;
  1812. memcpy (pos, &offset, sizeof(offset));
  1813. pos += sizeof(offset);
  1814. }
  1815. else {
  1816. pos[0] = COL_VAR_PK_OFFSET;
  1817. pos++;
  1818. memcpy(pos, &index_in_pk, sizeof(index_in_pk));
  1819. pos += sizeof(index_in_pk);
  1820. }
  1821. return pos - buf;
  1822. }
  1823. uint32_t pack_desc_offset_info(uchar* buf, KEY_AND_COL_INFO* kc_info, uint pk_index, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
  1824. uchar* pos = buf;
  1825. uint16 field_index = key_part->field->field_index;
  1826. Field* field = table_share->field[field_index];
  1827. TOKU_TYPE toku_type = mysql_to_toku_type(field);
  1828. bool found_index = false;
  1829. switch(toku_type) {
  1830. case (toku_type_int):
  1831. case (toku_type_double):
  1832. case (toku_type_float):
  1833. case (toku_type_fixbinary):
  1834. case (toku_type_fixstring):
  1835. pos[0] = COL_FIX_FIELD;
  1836. pos++;
  1837. // copy the offset
  1838. COPY_OFFSET_TO_BUF;
  1839. break;
  1840. case (toku_type_varbinary):
  1841. case (toku_type_varstring):
  1842. pos[0] = COL_VAR_FIELD;
  1843. pos++;
  1844. // copy the offset
  1845. COPY_OFFSET_TO_BUF;
  1846. break;
  1847. case (toku_type_blob):
  1848. pos[0] = COL_BLOB_FIELD;
  1849. pos++;
  1850. for (uint32_t i = 0; i < kc_info->num_blobs; i++) {
  1851. uint32_t blob_index = kc_info->blob_fields[i];
  1852. if (blob_index == field_index) {
  1853. uint32_t val = i;
  1854. memcpy(pos, &val, sizeof(uint32_t));
  1855. pos += sizeof(uint32_t);
  1856. found_index = true;
  1857. break;
  1858. }
  1859. }
  1860. assert(found_index);
  1861. break;
  1862. default:
  1863. assert(false);
  1864. }
  1865. return pos - buf;
  1866. }
  1867. uint32_t pack_desc_key_length_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
  1868. uchar* pos = buf;
  1869. uint16 field_index = key_part->field->field_index;
  1870. Field* field = table_share->field[field_index];
  1871. TOKU_TYPE toku_type = mysql_to_toku_type(field);
  1872. uint32_t key_part_length = key_part->length;
  1873. uint32_t field_length;
  1874. switch(toku_type) {
  1875. case (toku_type_int):
  1876. case (toku_type_double):
  1877. case (toku_type_float):
  1878. // copy the key_part length
  1879. field_length = kc_info->field_lengths[field_index];
  1880. memcpy(pos, &field_length, sizeof(field_length));
  1881. pos += sizeof(key_part_length);
  1882. break;
  1883. case (toku_type_fixbinary):
  1884. case (toku_type_fixstring):
  1885. field_length = field->pack_length();
  1886. set_if_smaller(key_part_length, field_length);
  1887. case (toku_type_varbinary):
  1888. case (toku_type_varstring):
  1889. case (toku_type_blob):
  1890. // copy the key_part length
  1891. memcpy(pos, &key_part_length, sizeof(key_part_length));
  1892. pos += sizeof(key_part_length);
  1893. break;
  1894. default:
  1895. assert(false);
  1896. }
  1897. return pos - buf;
  1898. }
  1899. uint32_t pack_desc_char_info(uchar* buf, KEY_AND_COL_INFO* kc_info, TABLE_SHARE* table_share, KEY_PART_INFO* key_part) {
  1900. uchar* pos = buf;
  1901. uint16 field_index = key_part->field->field_index;
  1902. Field* field = table_share->field[field_index];
  1903. TOKU_TYPE toku_type = mysql_to_toku_type(field);
  1904. uint32_t charset_num = 0;
  1905. switch(toku_type) {
  1906. case (toku_type_int):
  1907. case (toku_type_double):
  1908. case (toku_type_float):
  1909. case (toku_type_fixbinary):
  1910. case (toku_type_varbinary):
  1911. pos[0] = COL_HAS_NO_CHARSET;
  1912. pos++;
  1913. break;
  1914. case (toku_type_fixstring):
  1915. case (toku_type_varstring):
  1916. case (toku_type_blob):
  1917. pos[0] = COL_HAS_CHARSET;
  1918. pos++;
  1919. // copy the charset
  1920. charset_num = field->charset()->number;
  1921. pos[0] = (uchar)(charset_num & 255);
  1922. pos[1] = (uchar)((charset_num >> 8) & 255);
  1923. pos[2] = (uchar)((charset_num >> 16) & 255);
  1924. pos[3] = (uchar)((charset_num >> 24) & 255);
  1925. pos += 4;
  1926. break;
  1927. default:
  1928. assert(false);
  1929. }
  1930. return pos - buf;
  1931. }
  1932. uint32_t pack_some_row_info (
  1933. uchar* buf,
  1934. uint pk_index,
  1935. TABLE_SHARE* table_share,
  1936. KEY_AND_COL_INFO* kc_info
  1937. )
  1938. {
  1939. uchar* pos = buf;
  1940. uint32_t num_null_bytes = 0;
  1941. //
  1942. // four bytes stating number of null bytes
  1943. //
  1944. num_null_bytes = table_share->null_bytes;
  1945. memcpy(pos, &num_null_bytes, sizeof(num_null_bytes));
  1946. pos += sizeof(num_null_bytes);
  1947. //
  1948. // eight bytes stating mcp_info
  1949. //
  1950. memcpy(pos, &kc_info->mcp_info[pk_index], sizeof(MULTI_COL_PACK_INFO));
  1951. pos += sizeof(MULTI_COL_PACK_INFO);
  1952. //
  1953. // one byte for the number of offset bytes
  1954. //
  1955. pos[0] = (uchar)kc_info->num_offset_bytes;
  1956. pos++;
  1957. return pos - buf;
  1958. }
  1959. uint32_t get_max_clustering_val_pack_desc_size(
  1960. TABLE_SHARE* table_share
  1961. )
  1962. {
  1963. uint32_t ret_val = 0;
  1964. //
  1965. // the fixed stuff:
  1966. // first the things in pack_some_row_info
  1967. // second another mcp_info
  1968. // third a byte that states if blobs exist
  1969. ret_val += sizeof(uint32_t) + sizeof(MULTI_COL_PACK_INFO) + 1;
  1970. ret_val += sizeof(MULTI_COL_PACK_INFO);
  1971. ret_val++;
  1972. //
  1973. // now the variable stuff
  1974. // an upper bound is, for each field, byte stating if it is fixed or var, followed
  1975. // by 8 bytes for endpoints
  1976. //
  1977. ret_val += (table_share->fields)*(1 + 2*sizeof(uint32_t));
  1978. //
  1979. // four bytes storing the length of this portion
  1980. //
  1981. ret_val += 4;
  1982. return ret_val;
  1983. }
  1984. uint32_t create_toku_clustering_val_pack_descriptor (
  1985. uchar* buf,
  1986. uint pk_index,
  1987. TABLE_SHARE* table_share,
  1988. KEY_AND_COL_INFO* kc_info,
  1989. uint32_t keynr,
  1990. bool is_clustering
  1991. )
  1992. {
  1993. uchar* pos = buf + 4;
  1994. uint32_t offset = 0;
  1995. bool start_range_set = false;
  1996. uint32_t last_col = 0;
  1997. //
  1998. // do not need to write anything if the key is not clustering
  1999. //
  2000. if (!is_clustering) {
  2001. goto exit;
  2002. }
  2003. pos += pack_some_row_info(
  2004. pos,
  2005. pk_index,
  2006. table_share,
  2007. kc_info
  2008. );
  2009. //
  2010. // eight bytes stating mcp_info of clustering key
  2011. //
  2012. memcpy(pos, &kc_info->mcp_info[keynr], sizeof(MULTI_COL_PACK_INFO));
  2013. pos += sizeof(MULTI_COL_PACK_INFO);
  2014. //
  2015. // store bit that states if blobs exist
  2016. //
  2017. pos[0] = (kc_info->num_blobs) ? 1 : 0;
  2018. pos++;
  2019. //
  2020. // descriptor assumes that all fields filtered from pk are
  2021. // also filtered from clustering key val. Doing check here to
  2022. // make sure something unexpected does not happen
  2023. //
  2024. for (uint i = 0; i < table_share->fields; i++) {
  2025. bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
  2026. bool col_filtered_in_pk = bitmap_is_set(&kc_info->key_filters[pk_index],i);
  2027. if (col_filtered_in_pk) {
  2028. assert(col_filtered);
  2029. }
  2030. }
  2031. //
  2032. // first handle the fixed fields
  2033. //
  2034. start_range_set = false;
  2035. last_col = 0;
  2036. for (uint i = 0; i < table_share->fields; i++) {
  2037. bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
  2038. if (kc_info->field_lengths[i] == 0) {
  2039. //
  2040. // not a fixed field, continue
  2041. //
  2042. continue;
  2043. }
  2044. if (col_filtered && start_range_set) {
  2045. //
  2046. // need to set the end range
  2047. //
  2048. start_range_set = false;
  2049. uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val + kc_info->field_lengths[last_col];
  2050. memcpy(pos, &end_offset, sizeof(end_offset));
  2051. pos += sizeof(end_offset);
  2052. }
  2053. else if (!col_filtered) {
  2054. if (!start_range_set) {
  2055. pos[0] = CK_FIX_RANGE;
  2056. pos++;
  2057. start_range_set = true;
  2058. uint32_t start_offset = kc_info->cp_info[pk_index][i].col_pack_val;
  2059. memcpy(pos, &start_offset , sizeof(start_offset));
  2060. pos += sizeof(start_offset);
  2061. }
  2062. last_col = i;
  2063. }
  2064. else {
  2065. continue;
  2066. }
  2067. }
  2068. if (start_range_set) {
  2069. //
  2070. // need to set the end range
  2071. //
  2072. start_range_set = false;
  2073. uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val+ kc_info->field_lengths[last_col];
  2074. memcpy(pos, &end_offset, sizeof(end_offset));
  2075. pos += sizeof(end_offset);
  2076. }
  2077. //
  2078. // now handle the var fields
  2079. //
  2080. start_range_set = false;
  2081. last_col = 0;
  2082. for (uint i = 0; i < table_share->fields; i++) {
  2083. bool col_filtered = bitmap_is_set(&kc_info->key_filters[keynr],i);
  2084. if (kc_info->length_bytes[i] == 0) {
  2085. //
  2086. // not a var field, continue
  2087. //
  2088. continue;
  2089. }
  2090. if (col_filtered && start_range_set) {
  2091. //
  2092. // need to set the end range
  2093. //
  2094. start_range_set = false;
  2095. uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val;
  2096. memcpy(pos, &end_offset, sizeof(end_offset));
  2097. pos += sizeof(end_offset);
  2098. }
  2099. else if (!col_filtered) {
  2100. if (!start_range_set) {
  2101. pos[0] = CK_VAR_RANGE;
  2102. pos++;
  2103. start_range_set = true;
  2104. uint32_t start_offset = kc_info->cp_info[pk_index][i].col_pack_val;
  2105. memcpy(pos, &start_offset , sizeof(start_offset));
  2106. pos += sizeof(start_offset);
  2107. }
  2108. last_col = i;
  2109. }
  2110. else {
  2111. continue;
  2112. }
  2113. }
  2114. if (start_range_set) {
  2115. start_range_set = false;
  2116. uint32_t end_offset = kc_info->cp_info[pk_index][last_col].col_pack_val;
  2117. memcpy(pos, &end_offset, sizeof(end_offset));
  2118. pos += sizeof(end_offset);
  2119. }
  2120. exit:
  2121. offset = pos - buf;
  2122. buf[0] = (uchar)(offset & 255);
  2123. buf[1] = (uchar)((offset >> 8) & 255);
  2124. buf[2] = (uchar)((offset >> 16) & 255);
  2125. buf[3] = (uchar)((offset >> 24) & 255);
  2126. return pos - buf;
  2127. }
  2128. uint32_t pack_clustering_val_from_desc(
  2129. uchar* buf,
  2130. void* row_desc,
  2131. uint32_t row_desc_size,
  2132. const DBT* pk_val
  2133. )
  2134. {
  2135. uchar* null_bytes_src_ptr = NULL;
  2136. uchar* fixed_src_ptr = NULL;
  2137. uchar* var_src_offset_ptr = NULL;
  2138. uchar* var_src_data_ptr = NULL;
  2139. uchar* fixed_dest_ptr = NULL;
  2140. uchar* var_dest_offset_ptr = NULL;
  2141. uchar* var_dest_data_ptr = NULL;
  2142. uchar* orig_var_dest_data_ptr = NULL;
  2143. uchar* desc_pos = (uchar *)row_desc;
  2144. uint32_t num_null_bytes = 0;
  2145. uint32_t num_offset_bytes;
  2146. MULTI_COL_PACK_INFO src_mcp_info, dest_mcp_info;
  2147. uchar has_blobs;
  2148. memcpy(&num_null_bytes, desc_pos, sizeof(num_null_bytes));
  2149. desc_pos += sizeof(num_null_bytes);
  2150. memcpy(&src_mcp_info, desc_pos, sizeof(src_mcp_info));
  2151. desc_pos += sizeof(src_mcp_info);
  2152. num_offset_bytes = desc_pos[0];
  2153. desc_pos++;
  2154. memcpy(&dest_mcp_info, desc_pos, sizeof(dest_mcp_info));
  2155. desc_pos += sizeof(dest_mcp_info);
  2156. has_blobs = desc_pos[0];
  2157. desc_pos++;
  2158. //
  2159. //set the variables
  2160. //
  2161. null_bytes_src_ptr = (uchar *)pk_val->data;
  2162. fixed_src_ptr = null_bytes_src_ptr + num_null_bytes;
  2163. var_src_offset_ptr = fixed_src_ptr + src_mcp_info.fixed_field_size;
  2164. var_src_data_ptr = var_src_offset_ptr + src_mcp_info.len_of_offsets;
  2165. fixed_dest_ptr = buf + num_null_bytes;
  2166. var_dest_offset_ptr = fixed_dest_ptr + dest_mcp_info.fixed_field_size;
  2167. var_dest_data_ptr = var_dest_offset_ptr + dest_mcp_info.len_of_offsets;
  2168. orig_var_dest_data_ptr = var_dest_data_ptr;
  2169. //
  2170. // copy the null bytes
  2171. //
  2172. memcpy(buf, null_bytes_src_ptr, num_null_bytes);
  2173. while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
  2174. uint32_t start, end, length;
  2175. uchar curr = desc_pos[0];
  2176. desc_pos++;
  2177. memcpy(&start, desc_pos, sizeof(start));
  2178. desc_pos += sizeof(start);
  2179. memcpy(&end, desc_pos, sizeof(end));
  2180. desc_pos += sizeof(end);
  2181. assert (start <= end);
  2182. if (curr == CK_FIX_RANGE) {
  2183. length = end - start;
  2184. memcpy(fixed_dest_ptr, fixed_src_ptr + start, length);
  2185. fixed_dest_ptr += length;
  2186. }
  2187. else if (curr == CK_VAR_RANGE) {
  2188. uint32_t start_data_size;
  2189. uint32_t start_data_offset;
  2190. uint32_t end_data_size;
  2191. uint32_t end_data_offset;
  2192. uint32_t offset_diffs;
  2193. get_var_field_info(
  2194. &start_data_size,
  2195. &start_data_offset,
  2196. start,
  2197. var_src_offset_ptr,
  2198. num_offset_bytes
  2199. );
  2200. get_var_field_info(
  2201. &end_data_size,
  2202. &end_data_offset,
  2203. end,
  2204. var_src_offset_ptr,
  2205. num_offset_bytes
  2206. );
  2207. length = end_data_offset + end_data_size - start_data_offset;
  2208. //
  2209. // copy the data
  2210. //
  2211. memcpy(
  2212. var_dest_data_ptr,
  2213. var_src_data_ptr + start_data_offset,
  2214. length
  2215. );
  2216. var_dest_data_ptr += length;
  2217. //
  2218. // put in offset info
  2219. //
  2220. offset_diffs = (end_data_offset + end_data_size) - (uint32_t)(var_dest_data_ptr - orig_var_dest_data_ptr);
  2221. for (uint32_t i = start; i <= end; i++) {
  2222. if ( num_offset_bytes == 1 ) {
  2223. assert(offset_diffs < 256);
  2224. var_dest_offset_ptr[0] = var_src_offset_ptr[i] - (uchar)offset_diffs;
  2225. var_dest_offset_ptr++;
  2226. }
  2227. else if ( num_offset_bytes == 2 ) {
  2228. uint32_t tmp = uint2korr(var_src_offset_ptr + 2*i);
  2229. uint32_t new_offset = tmp - offset_diffs;
  2230. assert(new_offset < 1<<16);
  2231. int2store(var_dest_offset_ptr,new_offset);
  2232. var_dest_offset_ptr += 2;
  2233. }
  2234. else {
  2235. assert(false);
  2236. }
  2237. }
  2238. }
  2239. else {
  2240. assert(false);
  2241. }
  2242. }
  2243. //
  2244. // copy blobs
  2245. // at this point, var_dest_data_ptr is pointing to the end, where blobs should be located
  2246. // so, we put the blobs at var_dest_data_ptr
  2247. //
  2248. if (has_blobs) {
  2249. uint32_t num_blob_bytes;
  2250. uint32_t start_offset;
  2251. uchar* src_blob_ptr = NULL;
  2252. get_blob_field_info(
  2253. &start_offset,
  2254. src_mcp_info.len_of_offsets,
  2255. var_src_data_ptr,
  2256. num_offset_bytes
  2257. );
  2258. src_blob_ptr = var_src_data_ptr + start_offset;
  2259. num_blob_bytes = pk_val->size - (start_offset + (var_src_data_ptr - null_bytes_src_ptr));
  2260. memcpy(var_dest_data_ptr, src_blob_ptr, num_blob_bytes);
  2261. var_dest_data_ptr += num_blob_bytes;
  2262. }
  2263. return var_dest_data_ptr - buf;
  2264. }
  2265. uint32_t get_max_secondary_key_pack_desc_size(
  2266. KEY_AND_COL_INFO* kc_info
  2267. )
  2268. {
  2269. uint32_t ret_val = 0;
  2270. //
  2271. // the fixed stuff:
  2272. // byte that states if main dictionary
  2273. // byte that states if hpk
  2274. // the things in pack_some_row_info
  2275. ret_val++;
  2276. ret_val++;
  2277. ret_val += sizeof(uint32_t) + sizeof(MULTI_COL_PACK_INFO) + 1;
  2278. //
  2279. // now variable sized stuff
  2280. //
  2281. // first the blobs
  2282. ret_val += sizeof(kc_info->num_blobs);
  2283. ret_val+= kc_info->num_blobs;
  2284. // then the pk
  2285. // one byte for num key parts
  2286. // two bytes for each key part
  2287. ret_val++;
  2288. ret_val += MAX_REF_PARTS*2;
  2289. // then the key
  2290. // null bit, then null byte,
  2291. // then 1 byte stating what it is, then 4 for offset, 4 for key length,
  2292. // 1 for if charset exists, and 4 for charset
  2293. ret_val += MAX_REF_PARTS*(1 + sizeof(uint32_t) + 1 + 3*sizeof(uint32_t) + 1);
  2294. //
  2295. // four bytes storing the length of this portion
  2296. //
  2297. ret_val += 4;
  2298. return ret_val;
  2299. }
  2300. uint32_t create_toku_secondary_key_pack_descriptor (
  2301. uchar* buf,
  2302. bool has_hpk,
  2303. uint pk_index,
  2304. TABLE_SHARE* table_share,
  2305. TABLE* table,
  2306. KEY_AND_COL_INFO* kc_info,
  2307. KEY* key_info,
  2308. KEY* prim_key
  2309. )
  2310. {
  2311. //
  2312. // The first four bytes always contain the offset of where the first key
  2313. // ends.
  2314. //
  2315. uchar* pk_info = NULL;
  2316. uchar* pos = buf + 4;
  2317. uint32_t offset = 0;
  2318. //
  2319. // first byte states that it is NOT main dictionary
  2320. //
  2321. pos[0] = 0;
  2322. pos++;
  2323. //
  2324. // one byte states if main dictionary has an hpk or not
  2325. //
  2326. if (has_hpk) {
  2327. pos[0] = 1;
  2328. }
  2329. else {
  2330. pos[0] = 0;
  2331. }
  2332. pos++;
  2333. pos += pack_some_row_info(
  2334. pos,
  2335. pk_index,
  2336. table_share,
  2337. kc_info
  2338. );
  2339. //
  2340. // store blob information
  2341. //
  2342. memcpy(pos, &kc_info->num_blobs, sizeof(kc_info->num_blobs));
  2343. pos += sizeof(uint32_t);
  2344. for (uint32_t i = 0; i < kc_info->num_blobs; i++) {
  2345. //
  2346. // store length bytes for each blob
  2347. //
  2348. Field* field = table_share->field[kc_info->blob_fields[i]];
  2349. pos[0] = (uchar)field->row_pack_length();
  2350. pos++;
  2351. }
  2352. //
  2353. // store the pk information
  2354. //
  2355. if (has_hpk) {
  2356. pos[0] = 0;
  2357. pos++;
  2358. }
  2359. else {
  2360. //
  2361. // store number of parts
  2362. //
  2363. assert(get_key_parts(prim_key) < 128);
  2364. pos[0] = 2 * get_key_parts(prim_key);
  2365. pos++;
  2366. //
  2367. // for each part, store if it is a fixed field or var field
  2368. // if fixed, store number of bytes, if var, store
  2369. // number of length bytes
  2370. // total should be two bytes per key part stored
  2371. //
  2372. pk_info = pos;
  2373. uchar* tmp = pos;
  2374. for (uint i = 0; i < get_key_parts(prim_key); i++) {
  2375. tmp += pack_desc_pk_info(
  2376. tmp,
  2377. kc_info,
  2378. table_share,
  2379. &prim_key->key_part[i]
  2380. );
  2381. }
  2382. //
  2383. // asserting that we moved forward as much as we think we have
  2384. //
  2385. assert(tmp - pos == (2 * get_key_parts(prim_key)));
  2386. pos = tmp;
  2387. }
  2388. for (uint i = 0; i < get_key_parts(key_info); i++) {
  2389. KEY_PART_INFO curr_kpi = key_info->key_part[i];
  2390. uint16 field_index = curr_kpi.field->field_index;
  2391. Field* field = table_share->field[field_index];
  2392. bool is_col_in_pk = false;
  2393. if (bitmap_is_set(&kc_info->key_filters[pk_index],field_index)) {
  2394. assert(!has_hpk && prim_key != NULL);
  2395. is_col_in_pk = true;
  2396. }
  2397. else {
  2398. is_col_in_pk = false;
  2399. }
  2400. pos[0] = field->null_bit;
  2401. pos++;
  2402. if (is_col_in_pk) {
  2403. //
  2404. // assert that columns in pk do not have a null bit
  2405. // because in MySQL, pk columns cannot be null
  2406. //
  2407. assert(!field->null_bit);
  2408. }
  2409. if (field->null_bit) {
  2410. uint32_t null_offset = get_null_offset(table,table->field[field_index]);
  2411. memcpy(pos, &null_offset, sizeof(uint32_t));
  2412. pos += sizeof(uint32_t);
  2413. }
  2414. if (is_col_in_pk) {
  2415. pos += pack_desc_pk_offset_info(
  2416. pos,
  2417. kc_info,
  2418. table_share,
  2419. &curr_kpi,
  2420. prim_key,
  2421. pk_info
  2422. );
  2423. }
  2424. else {
  2425. pos += pack_desc_offset_info(
  2426. pos,
  2427. kc_info,
  2428. pk_index,
  2429. table_share,
  2430. &curr_kpi
  2431. );
  2432. }
  2433. pos += pack_desc_key_length_info(
  2434. pos,
  2435. kc_info,
  2436. table_share,
  2437. &curr_kpi
  2438. );
  2439. pos += pack_desc_char_info(
  2440. pos,
  2441. kc_info,
  2442. table_share,
  2443. &curr_kpi
  2444. );
  2445. }
  2446. offset = pos - buf;
  2447. buf[0] = (uchar)(offset & 255);
  2448. buf[1] = (uchar)((offset >> 8) & 255);
  2449. buf[2] = (uchar)((offset >> 16) & 255);
  2450. buf[3] = (uchar)((offset >> 24) & 255);
  2451. return pos - buf;
  2452. }
  2453. uint32_t skip_key_in_desc(
  2454. uchar* row_desc
  2455. )
  2456. {
  2457. uchar* pos = row_desc;
  2458. uchar col_bin_or_char;
  2459. //
  2460. // skip the byte that states if it is a fix field or var field, we do not care
  2461. //
  2462. pos++;
  2463. //
  2464. // skip the offset information
  2465. //
  2466. pos += sizeof(uint32_t);
  2467. //
  2468. // skip the key_part_length info
  2469. //
  2470. pos += sizeof(uint32_t);
  2471. col_bin_or_char = pos[0];
  2472. pos++;
  2473. if (col_bin_or_char == COL_HAS_NO_CHARSET) {
  2474. goto exit;
  2475. }
  2476. //
  2477. // skip the charset info
  2478. //
  2479. pos += 4;
  2480. exit:
  2481. return (uint32_t)(pos-row_desc);
  2482. }
  2483. uint32_t max_key_size_from_desc(
  2484. void* row_desc,
  2485. uint32_t row_desc_size
  2486. )
  2487. {
  2488. uchar* desc_pos = (uchar *)row_desc;
  2489. uint32_t num_blobs;
  2490. uint32_t num_pk_columns;
  2491. //
  2492. // start at 1 for the infinity byte
  2493. //
  2494. uint32_t max_size = 1;
  2495. // skip byte that states if main dictionary
  2496. bool is_main_dictionary = desc_pos[0];
  2497. desc_pos++;
  2498. assert(!is_main_dictionary);
  2499. // skip hpk byte
  2500. desc_pos++;
  2501. // skip num_null_bytes
  2502. desc_pos += sizeof(uint32_t);
  2503. // skip mcp_info
  2504. desc_pos += sizeof(MULTI_COL_PACK_INFO);
  2505. // skip offset_bytes
  2506. desc_pos++;
  2507. // skip over blobs
  2508. memcpy(&num_blobs, desc_pos, sizeof(num_blobs));
  2509. desc_pos += sizeof(num_blobs);
  2510. desc_pos += num_blobs;
  2511. // skip over pk info
  2512. num_pk_columns = desc_pos[0]/2;
  2513. desc_pos++;
  2514. desc_pos += 2*num_pk_columns;
  2515. while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
  2516. uchar has_charset;
  2517. uint32_t key_length = 0;
  2518. uchar null_bit = desc_pos[0];
  2519. desc_pos++;
  2520. if (null_bit) {
  2521. //
  2522. // column is NULLable, skip null_offset, and add a null byte
  2523. //
  2524. max_size++;
  2525. desc_pos += sizeof(uint32_t);
  2526. }
  2527. //
  2528. // skip over byte that states if fix or var
  2529. //
  2530. desc_pos++;
  2531. // skip over offset
  2532. desc_pos += sizeof(uint32_t);
  2533. //
  2534. // get the key length and add it to return value
  2535. //
  2536. memcpy(&key_length, desc_pos, sizeof(key_length));
  2537. desc_pos += sizeof(key_length);
  2538. max_size += key_length;
  2539. max_size += 2; // 2 bytes for a potential length bytes, we are upperbounding, does not need to be super tight
  2540. has_charset = desc_pos[0];
  2541. desc_pos++;
  2542. uint32_t charset_num;
  2543. if (has_charset == COL_HAS_CHARSET) {
  2544. // skip over charsent num
  2545. desc_pos += sizeof(charset_num);
  2546. }
  2547. else {
  2548. assert(has_charset == COL_HAS_NO_CHARSET);
  2549. }
  2550. }
  2551. return max_size;
  2552. }
  2553. uint32_t pack_key_from_desc(
  2554. uchar* buf,
  2555. void* row_desc,
  2556. uint32_t row_desc_size,
  2557. const DBT* pk_key,
  2558. const DBT* pk_val
  2559. )
  2560. {
  2561. MULTI_COL_PACK_INFO mcp_info;
  2562. uint32_t num_null_bytes;
  2563. uint32_t num_blobs;
  2564. uint32_t num_pk_columns;
  2565. uchar* blob_lengths = NULL;
  2566. uchar* pk_info = NULL;
  2567. uchar* pk_data_ptr = NULL;
  2568. uchar* null_bytes_ptr = NULL;
  2569. uchar* fixed_field_ptr = NULL;
  2570. uchar* var_field_offset_ptr = NULL;
  2571. const uchar* var_field_data_ptr = NULL;
  2572. uint32_t num_offset_bytes;
  2573. uchar* packed_key_pos = buf;
  2574. uchar* desc_pos = (uchar *)row_desc;
  2575. bool is_main_dictionary = desc_pos[0];
  2576. desc_pos++;
  2577. assert(!is_main_dictionary);
  2578. //
  2579. // get the constant info out of descriptor
  2580. //
  2581. bool hpk = desc_pos[0];
  2582. desc_pos++;
  2583. memcpy(&num_null_bytes, desc_pos, sizeof(num_null_bytes));
  2584. desc_pos += sizeof(num_null_bytes);
  2585. memcpy(&mcp_info, desc_pos, sizeof(mcp_info));
  2586. desc_pos += sizeof(mcp_info);
  2587. num_offset_bytes = desc_pos[0];
  2588. desc_pos++;
  2589. memcpy(&num_blobs, desc_pos, sizeof(num_blobs));
  2590. desc_pos += sizeof(num_blobs);
  2591. blob_lengths = desc_pos;
  2592. desc_pos += num_blobs;
  2593. num_pk_columns = desc_pos[0]/2;
  2594. desc_pos++;
  2595. pk_info = desc_pos;
  2596. desc_pos += 2*num_pk_columns;
  2597. //
  2598. // now start packing the key
  2599. //
  2600. //
  2601. // pack the infinity byte
  2602. //
  2603. packed_key_pos[0] = COL_ZERO;
  2604. packed_key_pos++;
  2605. //
  2606. // now start packing each column of the key, as described in descriptor
  2607. //
  2608. if (!hpk) {
  2609. // +1 for the infinity byte
  2610. pk_data_ptr = (uchar *)pk_key->data + 1;
  2611. }
  2612. null_bytes_ptr = (uchar *)pk_val->data;
  2613. fixed_field_ptr = null_bytes_ptr + num_null_bytes;
  2614. var_field_offset_ptr = fixed_field_ptr + mcp_info.fixed_field_size;
  2615. var_field_data_ptr = var_field_offset_ptr + mcp_info.len_of_offsets;
  2616. while ( (uint32_t)(desc_pos - (uchar *)row_desc) < row_desc_size) {
  2617. uchar col_fix_val;
  2618. uchar has_charset;
  2619. uint32_t col_pack_val = 0;
  2620. uint32_t key_length = 0;
  2621. uchar null_bit = desc_pos[0];
  2622. desc_pos++;
  2623. if (null_bit) {
  2624. //
  2625. // column is NULLable, need to check the null bytes to see if it is NULL
  2626. //
  2627. uint32_t null_offset = 0;
  2628. bool is_field_null;
  2629. memcpy(&null_offset, desc_pos, sizeof(null_offset));
  2630. desc_pos += sizeof(null_offset);
  2631. is_field_null = (null_bytes_ptr[null_offset] & null_bit) ? true: false;
  2632. if (is_field_null) {
  2633. packed_key_pos[0] = NULL_COL_VAL;
  2634. packed_key_pos++;
  2635. desc_pos += skip_key_in_desc(desc_pos);
  2636. continue;
  2637. }
  2638. else {
  2639. packed_key_pos[0] = NONNULL_COL_VAL;
  2640. packed_key_pos++;
  2641. }
  2642. }
  2643. //
  2644. // now pack the column (unless it was NULL, and we continued)
  2645. //
  2646. col_fix_val = desc_pos[0];
  2647. desc_pos++;
  2648. memcpy(&col_pack_val, desc_pos, sizeof(col_pack_val));
  2649. desc_pos += sizeof(col_pack_val);
  2650. memcpy(&key_length, desc_pos, sizeof(key_length));
  2651. desc_pos += sizeof(key_length);
  2652. has_charset = desc_pos[0];
  2653. desc_pos++;
  2654. uint32_t charset_num = 0;
  2655. if (has_charset == COL_HAS_CHARSET) {
  2656. memcpy(&charset_num, desc_pos, sizeof(charset_num));
  2657. desc_pos += sizeof(charset_num);
  2658. }
  2659. else {
  2660. assert(has_charset == COL_HAS_NO_CHARSET);
  2661. }
  2662. //
  2663. // case where column is in pk val
  2664. //
  2665. if (col_fix_val == COL_FIX_FIELD || col_fix_val == COL_VAR_FIELD || col_fix_val == COL_BLOB_FIELD) {
  2666. if (col_fix_val == COL_FIX_FIELD && has_charset == COL_HAS_NO_CHARSET) {
  2667. memcpy(packed_key_pos, &fixed_field_ptr[col_pack_val], key_length);
  2668. packed_key_pos += key_length;
  2669. }
  2670. else if (col_fix_val == COL_VAR_FIELD && has_charset == COL_HAS_NO_CHARSET) {
  2671. uint32_t data_start_offset = 0;
  2672. uint32_t data_size = 0;
  2673. get_var_field_info(
  2674. &data_size,
  2675. &data_start_offset,
  2676. col_pack_val,
  2677. var_field_offset_ptr,
  2678. num_offset_bytes
  2679. );
  2680. //
  2681. // length of this field in this row is data_size
  2682. // data is located beginning at var_field_data_ptr + data_start_offset
  2683. //
  2684. packed_key_pos = pack_toku_varbinary_from_desc(
  2685. packed_key_pos,
  2686. var_field_data_ptr + data_start_offset,
  2687. key_length, //number of bytes to use to encode the length in to_tokudb
  2688. data_size //length of field
  2689. );
  2690. }
  2691. else {
  2692. const uchar* data_start = NULL;
  2693. uint32_t data_start_offset = 0;
  2694. uint32_t data_size = 0;
  2695. if (col_fix_val == COL_FIX_FIELD) {
  2696. data_start_offset = col_pack_val;
  2697. data_size = key_length;
  2698. data_start = fixed_field_ptr + data_start_offset;
  2699. }
  2700. else if (col_fix_val == COL_VAR_FIELD){
  2701. get_var_field_info(
  2702. &data_size,
  2703. &data_start_offset,
  2704. col_pack_val,
  2705. var_field_offset_ptr,
  2706. num_offset_bytes
  2707. );
  2708. data_start = var_field_data_ptr + data_start_offset;
  2709. }
  2710. else if (col_fix_val == COL_BLOB_FIELD) {
  2711. uint32_t blob_index = col_pack_val;
  2712. uint32_t blob_offset;
  2713. const uchar* blob_ptr = NULL;
  2714. uint32_t field_len;
  2715. uint32_t field_len_bytes = blob_lengths[blob_index];
  2716. get_blob_field_info(
  2717. &blob_offset,
  2718. mcp_info.len_of_offsets,
  2719. var_field_data_ptr,
  2720. num_offset_bytes
  2721. );
  2722. blob_ptr = var_field_data_ptr + blob_offset;
  2723. assert(num_blobs > 0);
  2724. //
  2725. // skip over other blobs to get to the one we want to make a key out of
  2726. //
  2727. for (uint32_t i = 0; i < blob_index; i++) {
  2728. blob_ptr = unpack_toku_field_blob(
  2729. NULL,
  2730. blob_ptr,
  2731. blob_lengths[i],
  2732. true
  2733. );
  2734. }
  2735. //
  2736. // at this point, blob_ptr is pointing to the blob we want to make a key from
  2737. //
  2738. field_len = get_blob_field_len(blob_ptr, field_len_bytes);
  2739. //
  2740. // now we set the variables to make the key
  2741. //
  2742. data_start = blob_ptr + field_len_bytes;
  2743. data_size = field_len;
  2744. }
  2745. else {
  2746. assert(false);
  2747. }
  2748. packed_key_pos = pack_toku_varstring_from_desc(
  2749. packed_key_pos,
  2750. data_start,
  2751. key_length,
  2752. data_size,
  2753. charset_num
  2754. );
  2755. }
  2756. }
  2757. //
  2758. // case where column is in pk key
  2759. //
  2760. else {
  2761. if (col_fix_val == COL_FIX_PK_OFFSET) {
  2762. memcpy(packed_key_pos, &pk_data_ptr[col_pack_val], key_length);
  2763. packed_key_pos += key_length;
  2764. }
  2765. else if (col_fix_val == COL_VAR_PK_OFFSET) {
  2766. uchar* tmp_pk_data_ptr = pk_data_ptr;
  2767. uint32_t index_in_pk = col_pack_val;
  2768. //
  2769. // skip along in pk to the right column
  2770. //
  2771. for (uint32_t i = 0; i < index_in_pk; i++) {
  2772. if (pk_info[2*i] == COL_FIX_FIELD) {
  2773. tmp_pk_data_ptr += pk_info[2*i + 1];
  2774. }
  2775. else if (pk_info[2*i] == COL_VAR_FIELD) {
  2776. uint32_t len_bytes = pk_info[2*i + 1];
  2777. uint32_t len;
  2778. if (len_bytes == 1) {
  2779. len = tmp_pk_data_ptr[0];
  2780. tmp_pk_data_ptr++;
  2781. }
  2782. else if (len_bytes == 2) {
  2783. len = uint2korr(tmp_pk_data_ptr);
  2784. tmp_pk_data_ptr += 2;
  2785. }
  2786. else {
  2787. assert(false);
  2788. }
  2789. tmp_pk_data_ptr += len;
  2790. }
  2791. else {
  2792. assert(false);
  2793. }
  2794. }
  2795. //
  2796. // at this point, tmp_pk_data_ptr is pointing at the column
  2797. //
  2798. uint32_t is_fix_field = pk_info[2*index_in_pk];
  2799. if (is_fix_field == COL_FIX_FIELD) {
  2800. memcpy(packed_key_pos, tmp_pk_data_ptr, key_length);
  2801. packed_key_pos += key_length;
  2802. }
  2803. else if (is_fix_field == COL_VAR_FIELD) {
  2804. const uchar* data_start = NULL;
  2805. uint32_t data_size = 0;
  2806. uint32_t len_bytes = pk_info[2*index_in_pk + 1];
  2807. if (len_bytes == 1) {
  2808. data_size = tmp_pk_data_ptr[0];
  2809. tmp_pk_data_ptr++;
  2810. }
  2811. else if (len_bytes == 2) {
  2812. data_size = uint2korr(tmp_pk_data_ptr);
  2813. tmp_pk_data_ptr += 2;
  2814. }
  2815. else {
  2816. assert(false);
  2817. }
  2818. data_start = tmp_pk_data_ptr;
  2819. if (has_charset == COL_HAS_CHARSET) {
  2820. packed_key_pos = pack_toku_varstring_from_desc(
  2821. packed_key_pos,
  2822. data_start,
  2823. key_length,
  2824. data_size,
  2825. charset_num
  2826. );
  2827. }
  2828. else if (has_charset == COL_HAS_NO_CHARSET) {
  2829. packed_key_pos = pack_toku_varbinary_from_desc(
  2830. packed_key_pos,
  2831. data_start,
  2832. key_length,
  2833. data_size //length of field
  2834. );
  2835. }
  2836. else {
  2837. assert(false);
  2838. }
  2839. }
  2840. else {
  2841. assert(false);
  2842. }
  2843. }
  2844. else {
  2845. assert(false);
  2846. }
  2847. }
  2848. }
  2849. assert( (uint32_t)(desc_pos - (uchar *)row_desc) == row_desc_size);
  2850. //
  2851. // now append the primary key to the end of the key
  2852. //
  2853. if (hpk) {
  2854. memcpy(packed_key_pos, pk_key->data, pk_key->size);
  2855. packed_key_pos += pk_key->size;
  2856. }
  2857. else {
  2858. memcpy(packed_key_pos, (uchar *)pk_key->data + 1, pk_key->size - 1);
  2859. packed_key_pos += (pk_key->size - 1);
  2860. }
  2861. return (uint32_t)(packed_key_pos - buf); //
  2862. }
  2863. bool fields_have_same_name(
  2864. Field* a,
  2865. Field* b
  2866. )
  2867. {
  2868. return strcmp(a->field_name, b->field_name) == 0;
  2869. }
  2870. bool fields_are_same_type(
  2871. Field* a,
  2872. Field* b
  2873. )
  2874. {
  2875. bool retval = true;
  2876. enum_field_types a_mysql_type = a->real_type();
  2877. enum_field_types b_mysql_type = b->real_type();
  2878. TOKU_TYPE a_toku_type = mysql_to_toku_type(a);
  2879. TOKU_TYPE b_toku_type = mysql_to_toku_type(b);
  2880. // make sure have same names
  2881. // make sure have same types
  2882. if (a_mysql_type != b_mysql_type) {
  2883. retval = false;
  2884. goto cleanup;
  2885. }
  2886. // Thanks to MariaDB 5.5, we can have two fields
  2887. // be the same MySQL type but not the same toku type,
  2888. // This is an issue introduced with MariaDB's fractional time
  2889. // implementation
  2890. if (a_toku_type != b_toku_type) {
  2891. retval = false;
  2892. goto cleanup;
  2893. }
  2894. // make sure that either both are nullable, or both not nullable
  2895. if ((a->null_bit && !b->null_bit) || (!a->null_bit && b->null_bit)) {
  2896. retval = false;
  2897. goto cleanup;
  2898. }
  2899. switch (a_mysql_type) {
  2900. case MYSQL_TYPE_TINY:
  2901. case MYSQL_TYPE_SHORT:
  2902. case MYSQL_TYPE_INT24:
  2903. case MYSQL_TYPE_LONG:
  2904. case MYSQL_TYPE_LONGLONG:
  2905. // length, unsigned, auto increment
  2906. if (a->pack_length() != b->pack_length() ||
  2907. (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG) ||
  2908. (a->flags & AUTO_INCREMENT_FLAG) != (b->flags & AUTO_INCREMENT_FLAG)) {
  2909. retval = false;
  2910. goto cleanup;
  2911. }
  2912. break;
  2913. case MYSQL_TYPE_DOUBLE:
  2914. case MYSQL_TYPE_FLOAT:
  2915. // length, unsigned, auto increment
  2916. if (a->pack_length() != b->pack_length() ||
  2917. (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG) ||
  2918. (a->flags & AUTO_INCREMENT_FLAG) != (b->flags & AUTO_INCREMENT_FLAG)) {
  2919. retval = false;
  2920. goto cleanup;
  2921. }
  2922. break;
  2923. case MYSQL_TYPE_NEWDECIMAL:
  2924. // length, unsigned
  2925. if (a->pack_length() != b->pack_length() ||
  2926. (a->flags & UNSIGNED_FLAG) != (b->flags & UNSIGNED_FLAG)) {
  2927. retval = false;
  2928. goto cleanup;
  2929. }
  2930. break;
  2931. case MYSQL_TYPE_ENUM:
  2932. case MYSQL_TYPE_SET:
  2933. case MYSQL_TYPE_BIT:
  2934. // length
  2935. if (a->pack_length() != b->pack_length()) {
  2936. retval = false;
  2937. goto cleanup;
  2938. }
  2939. break;
  2940. case MYSQL_TYPE_DATE:
  2941. case MYSQL_TYPE_DATETIME:
  2942. case MYSQL_TYPE_YEAR:
  2943. case MYSQL_TYPE_NEWDATE:
  2944. case MYSQL_TYPE_TIME:
  2945. case MYSQL_TYPE_TIMESTAMP:
  2946. #if 50600 <= MYSQL_VERSION_ID
  2947. case MYSQL_TYPE_DATETIME2:
  2948. case MYSQL_TYPE_TIMESTAMP2:
  2949. case MYSQL_TYPE_TIME2:
  2950. #endif
  2951. // length
  2952. if (a->pack_length() != b->pack_length()) {
  2953. retval = false;
  2954. goto cleanup;
  2955. }
  2956. break;
  2957. case MYSQL_TYPE_TINY_BLOB:
  2958. case MYSQL_TYPE_MEDIUM_BLOB:
  2959. case MYSQL_TYPE_BLOB:
  2960. case MYSQL_TYPE_LONG_BLOB:
  2961. // test the charset
  2962. if (a->charset()->number != b->charset()->number) {
  2963. retval = false;
  2964. goto cleanup;
  2965. }
  2966. if (a->row_pack_length() != b->row_pack_length()) {
  2967. retval = false;
  2968. goto cleanup;
  2969. }
  2970. break;
  2971. case MYSQL_TYPE_STRING:
  2972. if (a->pack_length() != b->pack_length()) {
  2973. retval = false;
  2974. goto cleanup;
  2975. }
  2976. // if both are binary, we know have same pack lengths,
  2977. // so we can goto end
  2978. if (a->binary() && b->binary()) {
  2979. // nothing to do, we are good
  2980. }
  2981. else if (!a->binary() && !b->binary()) {
  2982. // test the charset
  2983. if (a->charset()->number != b->charset()->number) {
  2984. retval = false;
  2985. goto cleanup;
  2986. }
  2987. }
  2988. else {
  2989. // one is binary and the other is not, so not the same
  2990. retval = false;
  2991. goto cleanup;
  2992. }
  2993. break;
  2994. case MYSQL_TYPE_VARCHAR:
  2995. if (a->field_length != b->field_length) {
  2996. retval = false;
  2997. goto cleanup;
  2998. }
  2999. // if both are binary, we know have same pack lengths,
  3000. // so we can goto end
  3001. if (a->binary() && b->binary()) {
  3002. // nothing to do, we are good
  3003. }
  3004. else if (!a->binary() && !b->binary()) {
  3005. // test the charset
  3006. if (a->charset()->number != b->charset()->number) {
  3007. retval = false;
  3008. goto cleanup;
  3009. }
  3010. }
  3011. else {
  3012. // one is binary and the other is not, so not the same
  3013. retval = false;
  3014. goto cleanup;
  3015. }
  3016. break;
  3017. //
  3018. // I believe these are old types that are no longer
  3019. // in any 5.1 tables, so tokudb does not need
  3020. // to worry about them
  3021. // Putting in this assert in case I am wrong.
  3022. // Do not support geometry yet.
  3023. //
  3024. case MYSQL_TYPE_GEOMETRY:
  3025. case MYSQL_TYPE_DECIMAL:
  3026. case MYSQL_TYPE_VAR_STRING:
  3027. case MYSQL_TYPE_NULL:
  3028. assert(false);
  3029. }
  3030. cleanup:
  3031. return retval;
  3032. }
  3033. bool are_two_fields_same(
  3034. Field* a,
  3035. Field* b
  3036. )
  3037. {
  3038. return fields_have_same_name(a, b) && fields_are_same_type(a, b);
  3039. }