You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2145 lines
59 KiB

12 years ago
12 years ago
12 years ago
12 years ago
  1. /*****************************************************************************
  2. Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved.
  3. Copyright (c) 2012, Facebook Inc.
  4. This program is free software; you can redistribute it and/or modify it under
  5. the terms of the GNU General Public License as published by the Free Software
  6. Foundation; version 2 of the License.
  7. This program is distributed in the hope that it will be useful, but WITHOUT
  8. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  9. FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  10. You should have received a copy of the GNU General Public License along with
  11. this program; if not, write to the Free Software Foundation, Inc.,
  12. 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
  13. *****************************************************************************/
  14. /********************************************************************//**
  15. @file page/page0cur.cc
  16. The page cursor
  17. Created 10/4/1994 Heikki Tuuri
  18. *************************************************************************/
  19. #include "page0cur.h"
  20. #ifdef UNIV_NONINL
  21. #include "page0cur.ic"
  22. #endif
  23. #include "page0zip.h"
  24. #include "btr0btr.h"
  25. #include "mtr0log.h"
  26. #include "log0recv.h"
  27. #include "ut0ut.h"
  28. #ifndef UNIV_HOTBACKUP
  29. #include "rem0cmp.h"
  30. #ifdef PAGE_CUR_ADAPT
  31. # ifdef UNIV_SEARCH_PERF_STAT
  32. static ulint page_cur_short_succ = 0;
  33. # endif /* UNIV_SEARCH_PERF_STAT */
  34. /*******************************************************************//**
  35. This is a linear congruential generator PRNG. Returns a pseudo random
  36. number between 0 and 2^64-1 inclusive. The formula and the constants
  37. being used are:
  38. X[n+1] = (a * X[n] + c) mod m
  39. where:
  40. X[0] = ut_time_us(NULL)
  41. a = 1103515245 (3^5 * 5 * 7 * 129749)
  42. c = 12345 (3 * 5 * 823)
  43. m = 18446744073709551616 (2^64)
  44. @return number between 0 and 2^64-1 */
  45. static
  46. ib_uint64_t
  47. page_cur_lcg_prng(void)
  48. /*===================*/
  49. {
  50. #define LCG_a 1103515245
  51. #define LCG_c 12345
  52. static ib_uint64_t lcg_current = 0;
  53. static ibool initialized = FALSE;
  54. if (!initialized) {
  55. lcg_current = (ib_uint64_t) ut_time_us(NULL);
  56. initialized = TRUE;
  57. }
  58. /* no need to "% 2^64" explicitly because lcg_current is
  59. 64 bit and this will be done anyway */
  60. lcg_current = LCG_a * lcg_current + LCG_c;
  61. return(lcg_current);
  62. }
  63. /****************************************************************//**
  64. Tries a search shortcut based on the last insert.
  65. @return TRUE on success */
  66. UNIV_INLINE
  67. ibool
  68. page_cur_try_search_shortcut(
  69. /*=========================*/
  70. const buf_block_t* block, /*!< in: index page */
  71. const dict_index_t* index, /*!< in: record descriptor */
  72. const dtuple_t* tuple, /*!< in: data tuple */
  73. ulint* iup_matched_fields,
  74. /*!< in/out: already matched
  75. fields in upper limit record */
  76. ulint* iup_matched_bytes,
  77. /*!< in/out: already matched
  78. bytes in a field not yet
  79. completely matched */
  80. ulint* ilow_matched_fields,
  81. /*!< in/out: already matched
  82. fields in lower limit record */
  83. ulint* ilow_matched_bytes,
  84. /*!< in/out: already matched
  85. bytes in a field not yet
  86. completely matched */
  87. page_cur_t* cursor) /*!< out: page cursor */
  88. {
  89. const rec_t* rec;
  90. const rec_t* next_rec;
  91. ulint low_match;
  92. ulint low_bytes;
  93. ulint up_match;
  94. ulint up_bytes;
  95. #ifdef UNIV_SEARCH_DEBUG
  96. page_cur_t cursor2;
  97. #endif
  98. ibool success = FALSE;
  99. const page_t* page = buf_block_get_frame(block);
  100. mem_heap_t* heap = NULL;
  101. ulint offsets_[REC_OFFS_NORMAL_SIZE];
  102. ulint* offsets = offsets_;
  103. rec_offs_init(offsets_);
  104. ut_ad(dtuple_check_typed(tuple));
  105. rec = page_header_get_ptr(page, PAGE_LAST_INSERT);
  106. offsets = rec_get_offsets(rec, index, offsets,
  107. dtuple_get_n_fields(tuple), &heap);
  108. ut_ad(rec);
  109. ut_ad(page_rec_is_user_rec(rec));
  110. ut_pair_min(&low_match, &low_bytes,
  111. *ilow_matched_fields, *ilow_matched_bytes,
  112. *iup_matched_fields, *iup_matched_bytes);
  113. up_match = low_match;
  114. up_bytes = low_bytes;
  115. if (page_cmp_dtuple_rec_with_match(tuple, rec, offsets,
  116. &low_match, &low_bytes) < 0) {
  117. goto exit_func;
  118. }
  119. next_rec = page_rec_get_next_const(rec);
  120. offsets = rec_get_offsets(next_rec, index, offsets,
  121. dtuple_get_n_fields(tuple), &heap);
  122. if (page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets,
  123. &up_match, &up_bytes) >= 0) {
  124. goto exit_func;
  125. }
  126. page_cur_position(rec, block, cursor);
  127. #ifdef UNIV_SEARCH_DEBUG
  128. page_cur_search_with_match(block, index, tuple, PAGE_CUR_DBG,
  129. iup_matched_fields,
  130. iup_matched_bytes,
  131. ilow_matched_fields,
  132. ilow_matched_bytes,
  133. &cursor2);
  134. ut_a(cursor2.rec == cursor->rec);
  135. if (!page_rec_is_supremum(next_rec)) {
  136. ut_a(*iup_matched_fields == up_match);
  137. ut_a(*iup_matched_bytes == up_bytes);
  138. }
  139. ut_a(*ilow_matched_fields == low_match);
  140. ut_a(*ilow_matched_bytes == low_bytes);
  141. #endif
  142. if (!page_rec_is_supremum(next_rec)) {
  143. *iup_matched_fields = up_match;
  144. *iup_matched_bytes = up_bytes;
  145. }
  146. *ilow_matched_fields = low_match;
  147. *ilow_matched_bytes = low_bytes;
  148. #ifdef UNIV_SEARCH_PERF_STAT
  149. page_cur_short_succ++;
  150. #endif
  151. success = TRUE;
  152. exit_func:
  153. if (UNIV_LIKELY_NULL(heap)) {
  154. mem_heap_free(heap);
  155. }
  156. return(success);
  157. }
  158. #endif
  159. #ifdef PAGE_CUR_LE_OR_EXTENDS
  160. /****************************************************************//**
  161. Checks if the nth field in a record is a character type field which extends
  162. the nth field in tuple, i.e., the field is longer or equal in length and has
  163. common first characters.
  164. @return TRUE if rec field extends tuple field */
  165. static
  166. ibool
  167. page_cur_rec_field_extends(
  168. /*=======================*/
  169. const dtuple_t* tuple, /*!< in: data tuple */
  170. const rec_t* rec, /*!< in: record */
  171. const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
  172. ulint n) /*!< in: compare nth field */
  173. {
  174. const dtype_t* type;
  175. const dfield_t* dfield;
  176. const byte* rec_f;
  177. ulint rec_f_len;
  178. ut_ad(rec_offs_validate(rec, NULL, offsets));
  179. dfield = dtuple_get_nth_field(tuple, n);
  180. type = dfield_get_type(dfield);
  181. rec_f = rec_get_nth_field(rec, offsets, n, &rec_f_len);
  182. if (type->mtype == DATA_VARCHAR
  183. || type->mtype == DATA_CHAR
  184. || type->mtype == DATA_FIXBINARY
  185. || type->mtype == DATA_BINARY
  186. || type->mtype == DATA_BLOB
  187. || type->mtype == DATA_VARMYSQL
  188. || type->mtype == DATA_MYSQL) {
  189. if (dfield_get_len(dfield) != UNIV_SQL_NULL
  190. && rec_f_len != UNIV_SQL_NULL
  191. && rec_f_len >= dfield_get_len(dfield)
  192. && !cmp_data_data_slow(type->mtype, type->prtype,
  193. dfield_get_data(dfield),
  194. dfield_get_len(dfield),
  195. rec_f, dfield_get_len(dfield))) {
  196. return(TRUE);
  197. }
  198. }
  199. return(FALSE);
  200. }
  201. #endif /* PAGE_CUR_LE_OR_EXTENDS */
  202. /****************************************************************//**
  203. Searches the right position for a page cursor. */
  204. UNIV_INTERN
  205. void
  206. page_cur_search_with_match(
  207. /*=======================*/
  208. const buf_block_t* block, /*!< in: buffer block */
  209. const dict_index_t* index, /*!< in: record descriptor */
  210. const dtuple_t* tuple, /*!< in: data tuple */
  211. ulint mode, /*!< in: PAGE_CUR_L,
  212. PAGE_CUR_LE, PAGE_CUR_G, or
  213. PAGE_CUR_GE */
  214. ulint* iup_matched_fields,
  215. /*!< in/out: already matched
  216. fields in upper limit record */
  217. ulint* iup_matched_bytes,
  218. /*!< in/out: already matched
  219. bytes in a field not yet
  220. completely matched */
  221. ulint* ilow_matched_fields,
  222. /*!< in/out: already matched
  223. fields in lower limit record */
  224. ulint* ilow_matched_bytes,
  225. /*!< in/out: already matched
  226. bytes in a field not yet
  227. completely matched */
  228. page_cur_t* cursor) /*!< out: page cursor */
  229. {
  230. ulint up;
  231. ulint low;
  232. ulint mid;
  233. const page_t* page;
  234. const page_dir_slot_t* slot;
  235. const rec_t* up_rec;
  236. const rec_t* low_rec;
  237. const rec_t* mid_rec;
  238. ulint up_matched_fields;
  239. ulint up_matched_bytes;
  240. ulint low_matched_fields;
  241. ulint low_matched_bytes;
  242. ulint cur_matched_fields;
  243. ulint cur_matched_bytes;
  244. int cmp;
  245. #ifdef UNIV_SEARCH_DEBUG
  246. int dbg_cmp;
  247. ulint dbg_matched_fields;
  248. ulint dbg_matched_bytes;
  249. #endif
  250. #ifdef UNIV_ZIP_DEBUG
  251. const page_zip_des_t* page_zip = buf_block_get_page_zip(block);
  252. #endif /* UNIV_ZIP_DEBUG */
  253. mem_heap_t* heap = NULL;
  254. ulint offsets_[REC_OFFS_NORMAL_SIZE];
  255. ulint* offsets = offsets_;
  256. rec_offs_init(offsets_);
  257. ut_ad(block && tuple && iup_matched_fields && iup_matched_bytes
  258. && ilow_matched_fields && ilow_matched_bytes && cursor);
  259. ut_ad(dtuple_validate(tuple));
  260. #ifdef UNIV_DEBUG
  261. # ifdef PAGE_CUR_DBG
  262. if (mode != PAGE_CUR_DBG)
  263. # endif /* PAGE_CUR_DBG */
  264. # ifdef PAGE_CUR_LE_OR_EXTENDS
  265. if (mode != PAGE_CUR_LE_OR_EXTENDS)
  266. # endif /* PAGE_CUR_LE_OR_EXTENDS */
  267. ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE
  268. || mode == PAGE_CUR_G || mode == PAGE_CUR_GE);
  269. #endif /* UNIV_DEBUG */
  270. page = buf_block_get_frame(block);
  271. #ifdef UNIV_ZIP_DEBUG
  272. ut_a(!page_zip || page_zip_validate(page_zip, page, index));
  273. #endif /* UNIV_ZIP_DEBUG */
  274. page_check_dir(page);
  275. #ifdef PAGE_CUR_ADAPT
  276. if (page_is_leaf(page)
  277. && (mode == PAGE_CUR_LE)
  278. && (page_header_get_field(page, PAGE_N_DIRECTION) > 3)
  279. && (page_header_get_ptr(page, PAGE_LAST_INSERT))
  280. && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) {
  281. if (page_cur_try_search_shortcut(
  282. block, index, tuple,
  283. iup_matched_fields, iup_matched_bytes,
  284. ilow_matched_fields, ilow_matched_bytes,
  285. cursor)) {
  286. return;
  287. }
  288. }
  289. # ifdef PAGE_CUR_DBG
  290. if (mode == PAGE_CUR_DBG) {
  291. mode = PAGE_CUR_LE;
  292. }
  293. # endif
  294. #endif
  295. /* The following flag does not work for non-latin1 char sets because
  296. cmp_full_field does not tell how many bytes matched */
  297. #ifdef PAGE_CUR_LE_OR_EXTENDS
  298. ut_a(mode != PAGE_CUR_LE_OR_EXTENDS);
  299. #endif /* PAGE_CUR_LE_OR_EXTENDS */
  300. /* If mode PAGE_CUR_G is specified, we are trying to position the
  301. cursor to answer a query of the form "tuple < X", where tuple is
  302. the input parameter, and X denotes an arbitrary physical record on
  303. the page. We want to position the cursor on the first X which
  304. satisfies the condition. */
  305. up_matched_fields = *iup_matched_fields;
  306. up_matched_bytes = *iup_matched_bytes;
  307. low_matched_fields = *ilow_matched_fields;
  308. low_matched_bytes = *ilow_matched_bytes;
  309. /* Perform binary search. First the search is done through the page
  310. directory, after that as a linear search in the list of records
  311. owned by the upper limit directory slot. */
  312. low = 0;
  313. up = page_dir_get_n_slots(page) - 1;
  314. /* Perform binary search until the lower and upper limit directory
  315. slots come to the distance 1 of each other */
  316. while (up - low > 1) {
  317. mid = (low + up) / 2;
  318. slot = page_dir_get_nth_slot(page, mid);
  319. mid_rec = page_dir_slot_get_rec(slot);
  320. ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
  321. low_matched_fields, low_matched_bytes,
  322. up_matched_fields, up_matched_bytes);
  323. offsets = rec_get_offsets(mid_rec, index, offsets,
  324. dtuple_get_n_fields_cmp(tuple),
  325. &heap);
  326. cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
  327. &cur_matched_fields,
  328. &cur_matched_bytes);
  329. if (UNIV_LIKELY(cmp > 0)) {
  330. low_slot_match:
  331. low = mid;
  332. low_matched_fields = cur_matched_fields;
  333. low_matched_bytes = cur_matched_bytes;
  334. } else if (UNIV_EXPECT(cmp, -1)) {
  335. #ifdef PAGE_CUR_LE_OR_EXTENDS
  336. if (mode == PAGE_CUR_LE_OR_EXTENDS
  337. && page_cur_rec_field_extends(
  338. tuple, mid_rec, offsets,
  339. cur_matched_fields)) {
  340. goto low_slot_match;
  341. }
  342. #endif /* PAGE_CUR_LE_OR_EXTENDS */
  343. up_slot_match:
  344. up = mid;
  345. up_matched_fields = cur_matched_fields;
  346. up_matched_bytes = cur_matched_bytes;
  347. } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
  348. #ifdef PAGE_CUR_LE_OR_EXTENDS
  349. || mode == PAGE_CUR_LE_OR_EXTENDS
  350. #endif /* PAGE_CUR_LE_OR_EXTENDS */
  351. ) {
  352. goto low_slot_match;
  353. } else {
  354. goto up_slot_match;
  355. }
  356. }
  357. slot = page_dir_get_nth_slot(page, low);
  358. low_rec = page_dir_slot_get_rec(slot);
  359. slot = page_dir_get_nth_slot(page, up);
  360. up_rec = page_dir_slot_get_rec(slot);
  361. /* Perform linear search until the upper and lower records come to
  362. distance 1 of each other. */
  363. while (page_rec_get_next_const(low_rec) != up_rec) {
  364. mid_rec = page_rec_get_next_const(low_rec);
  365. ut_pair_min(&cur_matched_fields, &cur_matched_bytes,
  366. low_matched_fields, low_matched_bytes,
  367. up_matched_fields, up_matched_bytes);
  368. offsets = rec_get_offsets(mid_rec, index, offsets,
  369. dtuple_get_n_fields_cmp(tuple),
  370. &heap);
  371. cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets,
  372. &cur_matched_fields,
  373. &cur_matched_bytes);
  374. if (UNIV_LIKELY(cmp > 0)) {
  375. low_rec_match:
  376. low_rec = mid_rec;
  377. low_matched_fields = cur_matched_fields;
  378. low_matched_bytes = cur_matched_bytes;
  379. } else if (UNIV_EXPECT(cmp, -1)) {
  380. #ifdef PAGE_CUR_LE_OR_EXTENDS
  381. if (mode == PAGE_CUR_LE_OR_EXTENDS
  382. && page_cur_rec_field_extends(
  383. tuple, mid_rec, offsets,
  384. cur_matched_fields)) {
  385. goto low_rec_match;
  386. }
  387. #endif /* PAGE_CUR_LE_OR_EXTENDS */
  388. up_rec_match:
  389. up_rec = mid_rec;
  390. up_matched_fields = cur_matched_fields;
  391. up_matched_bytes = cur_matched_bytes;
  392. } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE
  393. #ifdef PAGE_CUR_LE_OR_EXTENDS
  394. || mode == PAGE_CUR_LE_OR_EXTENDS
  395. #endif /* PAGE_CUR_LE_OR_EXTENDS */
  396. ) {
  397. goto low_rec_match;
  398. } else {
  399. goto up_rec_match;
  400. }
  401. }
  402. #ifdef UNIV_SEARCH_DEBUG
  403. /* Check that the lower and upper limit records have the
  404. right alphabetical order compared to tuple. */
  405. dbg_matched_fields = 0;
  406. dbg_matched_bytes = 0;
  407. offsets = rec_get_offsets(low_rec, index, offsets,
  408. ULINT_UNDEFINED, &heap);
  409. dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, offsets,
  410. &dbg_matched_fields,
  411. &dbg_matched_bytes);
  412. if (mode == PAGE_CUR_G) {
  413. ut_a(dbg_cmp >= 0);
  414. } else if (mode == PAGE_CUR_GE) {
  415. ut_a(dbg_cmp == 1);
  416. } else if (mode == PAGE_CUR_L) {
  417. ut_a(dbg_cmp == 1);
  418. } else if (mode == PAGE_CUR_LE) {
  419. ut_a(dbg_cmp >= 0);
  420. }
  421. if (!page_rec_is_infimum(low_rec)) {
  422. ut_a(low_matched_fields == dbg_matched_fields);
  423. ut_a(low_matched_bytes == dbg_matched_bytes);
  424. }
  425. dbg_matched_fields = 0;
  426. dbg_matched_bytes = 0;
  427. offsets = rec_get_offsets(up_rec, index, offsets,
  428. ULINT_UNDEFINED, &heap);
  429. dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, offsets,
  430. &dbg_matched_fields,
  431. &dbg_matched_bytes);
  432. if (mode == PAGE_CUR_G) {
  433. ut_a(dbg_cmp == -1);
  434. } else if (mode == PAGE_CUR_GE) {
  435. ut_a(dbg_cmp <= 0);
  436. } else if (mode == PAGE_CUR_L) {
  437. ut_a(dbg_cmp <= 0);
  438. } else if (mode == PAGE_CUR_LE) {
  439. ut_a(dbg_cmp == -1);
  440. }
  441. if (!page_rec_is_supremum(up_rec)) {
  442. ut_a(up_matched_fields == dbg_matched_fields);
  443. ut_a(up_matched_bytes == dbg_matched_bytes);
  444. }
  445. #endif
  446. if (mode <= PAGE_CUR_GE) {
  447. page_cur_position(up_rec, block, cursor);
  448. } else {
  449. page_cur_position(low_rec, block, cursor);
  450. }
  451. *iup_matched_fields = up_matched_fields;
  452. *iup_matched_bytes = up_matched_bytes;
  453. *ilow_matched_fields = low_matched_fields;
  454. *ilow_matched_bytes = low_matched_bytes;
  455. if (UNIV_LIKELY_NULL(heap)) {
  456. mem_heap_free(heap);
  457. }
  458. }
  459. /***********************************************************//**
  460. Positions a page cursor on a randomly chosen user record on a page. If there
  461. are no user records, sets the cursor on the infimum record. */
  462. UNIV_INTERN
  463. void
  464. page_cur_open_on_rnd_user_rec(
  465. /*==========================*/
  466. buf_block_t* block, /*!< in: page */
  467. page_cur_t* cursor) /*!< out: page cursor */
  468. {
  469. ulint rnd;
  470. ulint n_recs = page_get_n_recs(buf_block_get_frame(block));
  471. page_cur_set_before_first(block, cursor);
  472. if (UNIV_UNLIKELY(n_recs == 0)) {
  473. return;
  474. }
  475. rnd = (ulint) (page_cur_lcg_prng() % n_recs);
  476. do {
  477. page_cur_move_to_next(cursor);
  478. } while (rnd--);
  479. }
  480. /***********************************************************//**
  481. Writes the log record of a record insert on a page. */
  482. static
  483. void
  484. page_cur_insert_rec_write_log(
  485. /*==========================*/
  486. rec_t* insert_rec, /*!< in: inserted physical record */
  487. ulint rec_size, /*!< in: insert_rec size */
  488. rec_t* cursor_rec, /*!< in: record the
  489. cursor is pointing to */
  490. dict_index_t* index, /*!< in: record descriptor */
  491. mtr_t* mtr) /*!< in: mini-transaction handle */
  492. {
  493. ulint cur_rec_size;
  494. ulint extra_size;
  495. ulint cur_extra_size;
  496. const byte* ins_ptr;
  497. byte* log_ptr;
  498. const byte* log_end;
  499. ulint i;
  500. ut_a(rec_size < UNIV_PAGE_SIZE);
  501. ut_ad(page_align(insert_rec) == page_align(cursor_rec));
  502. ut_ad(!page_rec_is_comp(insert_rec)
  503. == !dict_table_is_comp(index->table));
  504. {
  505. mem_heap_t* heap = NULL;
  506. ulint cur_offs_[REC_OFFS_NORMAL_SIZE];
  507. ulint ins_offs_[REC_OFFS_NORMAL_SIZE];
  508. ulint* cur_offs;
  509. ulint* ins_offs;
  510. rec_offs_init(cur_offs_);
  511. rec_offs_init(ins_offs_);
  512. cur_offs = rec_get_offsets(cursor_rec, index, cur_offs_,
  513. ULINT_UNDEFINED, &heap);
  514. ins_offs = rec_get_offsets(insert_rec, index, ins_offs_,
  515. ULINT_UNDEFINED, &heap);
  516. extra_size = rec_offs_extra_size(ins_offs);
  517. cur_extra_size = rec_offs_extra_size(cur_offs);
  518. ut_ad(rec_size == rec_offs_size(ins_offs));
  519. cur_rec_size = rec_offs_size(cur_offs);
  520. if (UNIV_LIKELY_NULL(heap)) {
  521. mem_heap_free(heap);
  522. }
  523. }
  524. ins_ptr = insert_rec - extra_size;
  525. i = 0;
  526. if (cur_extra_size == extra_size) {
  527. ulint min_rec_size = ut_min(cur_rec_size, rec_size);
  528. const byte* cur_ptr = cursor_rec - cur_extra_size;
  529. /* Find out the first byte in insert_rec which differs from
  530. cursor_rec; skip the bytes in the record info */
  531. do {
  532. if (*ins_ptr == *cur_ptr) {
  533. i++;
  534. ins_ptr++;
  535. cur_ptr++;
  536. } else if ((i < extra_size)
  537. && (i >= extra_size
  538. - page_rec_get_base_extra_size
  539. (insert_rec))) {
  540. i = extra_size;
  541. ins_ptr = insert_rec;
  542. cur_ptr = cursor_rec;
  543. } else {
  544. break;
  545. }
  546. } while (i < min_rec_size);
  547. }
  548. if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) {
  549. if (page_rec_is_comp(insert_rec)) {
  550. log_ptr = mlog_open_and_write_index(
  551. mtr, insert_rec, index, MLOG_COMP_REC_INSERT,
  552. 2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
  553. if (UNIV_UNLIKELY(!log_ptr)) {
  554. /* Logging in mtr is switched off
  555. during crash recovery: in that case
  556. mlog_open returns NULL */
  557. return;
  558. }
  559. } else {
  560. log_ptr = mlog_open(mtr, 11
  561. + 2 + 5 + 1 + 5 + 5
  562. + MLOG_BUF_MARGIN);
  563. if (UNIV_UNLIKELY(!log_ptr)) {
  564. /* Logging in mtr is switched off
  565. during crash recovery: in that case
  566. mlog_open returns NULL */
  567. return;
  568. }
  569. log_ptr = mlog_write_initial_log_record_fast(
  570. insert_rec, MLOG_REC_INSERT, log_ptr, mtr);
  571. }
  572. log_end = &log_ptr[2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
  573. /* Write the cursor rec offset as a 2-byte ulint */
  574. mach_write_to_2(log_ptr, page_offset(cursor_rec));
  575. log_ptr += 2;
  576. } else {
  577. log_ptr = mlog_open(mtr, 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN);
  578. if (!log_ptr) {
  579. /* Logging in mtr is switched off during crash
  580. recovery: in that case mlog_open returns NULL */
  581. return;
  582. }
  583. log_end = &log_ptr[5 + 1 + 5 + 5 + MLOG_BUF_MARGIN];
  584. }
  585. if (page_rec_is_comp(insert_rec)) {
  586. if (UNIV_UNLIKELY
  587. (rec_get_info_and_status_bits(insert_rec, TRUE)
  588. != rec_get_info_and_status_bits(cursor_rec, TRUE))) {
  589. goto need_extra_info;
  590. }
  591. } else {
  592. if (UNIV_UNLIKELY
  593. (rec_get_info_and_status_bits(insert_rec, FALSE)
  594. != rec_get_info_and_status_bits(cursor_rec, FALSE))) {
  595. goto need_extra_info;
  596. }
  597. }
  598. if (extra_size != cur_extra_size || rec_size != cur_rec_size) {
  599. need_extra_info:
  600. /* Write the record end segment length
  601. and the extra info storage flag */
  602. log_ptr += mach_write_compressed(log_ptr,
  603. 2 * (rec_size - i) + 1);
  604. /* Write the info bits */
  605. mach_write_to_1(log_ptr,
  606. rec_get_info_and_status_bits(
  607. insert_rec,
  608. page_rec_is_comp(insert_rec)));
  609. log_ptr++;
  610. /* Write the record origin offset */
  611. log_ptr += mach_write_compressed(log_ptr, extra_size);
  612. /* Write the mismatch index */
  613. log_ptr += mach_write_compressed(log_ptr, i);
  614. ut_a(i < UNIV_PAGE_SIZE);
  615. ut_a(extra_size < UNIV_PAGE_SIZE);
  616. } else {
  617. /* Write the record end segment length
  618. and the extra info storage flag */
  619. log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i));
  620. }
  621. /* Write to the log the inserted index record end segment which
  622. differs from the cursor record */
  623. rec_size -= i;
  624. if (log_ptr + rec_size <= log_end) {
  625. memcpy(log_ptr, ins_ptr, rec_size);
  626. mlog_close(mtr, log_ptr + rec_size);
  627. } else {
  628. mlog_close(mtr, log_ptr);
  629. ut_a(rec_size < UNIV_PAGE_SIZE);
  630. mlog_catenate_string(mtr, ins_ptr, rec_size);
  631. }
  632. }
  633. #else /* !UNIV_HOTBACKUP */
  634. # define page_cur_insert_rec_write_log(ins_rec,size,cur,index,mtr) ((void) 0)
  635. #endif /* !UNIV_HOTBACKUP */
  636. /***********************************************************//**
  637. Parses a log record of a record insert on a page.
  638. @return end of log record or NULL */
  639. UNIV_INTERN
  640. byte*
  641. page_cur_parse_insert_rec(
  642. /*======================*/
  643. ibool is_short,/*!< in: TRUE if short inserts */
  644. byte* ptr, /*!< in: buffer */
  645. byte* end_ptr,/*!< in: buffer end */
  646. buf_block_t* block, /*!< in: page or NULL */
  647. dict_index_t* index, /*!< in: record descriptor */
  648. mtr_t* mtr) /*!< in: mtr or NULL */
  649. {
  650. ulint origin_offset;
  651. ulint end_seg_len;
  652. ulint mismatch_index;
  653. page_t* page;
  654. rec_t* cursor_rec;
  655. byte buf1[1024];
  656. byte* buf;
  657. byte* ptr2 = ptr;
  658. ulint info_and_status_bits = 0; /* remove warning */
  659. page_cur_t cursor;
  660. mem_heap_t* heap = NULL;
  661. ulint offsets_[REC_OFFS_NORMAL_SIZE];
  662. ulint* offsets = offsets_;
  663. rec_offs_init(offsets_);
  664. page = block ? buf_block_get_frame(block) : NULL;
  665. if (is_short) {
  666. cursor_rec = page_rec_get_prev(page_get_supremum_rec(page));
  667. } else {
  668. ulint offset;
  669. /* Read the cursor rec offset as a 2-byte ulint */
  670. if (UNIV_UNLIKELY(end_ptr < ptr + 2)) {
  671. return(NULL);
  672. }
  673. offset = mach_read_from_2(ptr);
  674. ptr += 2;
  675. cursor_rec = page + offset;
  676. if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)) {
  677. recv_sys->found_corrupt_log = TRUE;
  678. return(NULL);
  679. }
  680. }
  681. ptr = mach_parse_compressed(ptr, end_ptr, &end_seg_len);
  682. if (ptr == NULL) {
  683. return(NULL);
  684. }
  685. if (UNIV_UNLIKELY(end_seg_len >= UNIV_PAGE_SIZE << 1)) {
  686. recv_sys->found_corrupt_log = TRUE;
  687. return(NULL);
  688. }
  689. if (end_seg_len & 0x1UL) {
  690. /* Read the info bits */
  691. if (end_ptr < ptr + 1) {
  692. return(NULL);
  693. }
  694. info_and_status_bits = mach_read_from_1(ptr);
  695. ptr++;
  696. ptr = mach_parse_compressed(ptr, end_ptr, &origin_offset);
  697. if (ptr == NULL) {
  698. return(NULL);
  699. }
  700. ut_a(origin_offset < UNIV_PAGE_SIZE);
  701. ptr = mach_parse_compressed(ptr, end_ptr, &mismatch_index);
  702. if (ptr == NULL) {
  703. return(NULL);
  704. }
  705. ut_a(mismatch_index < UNIV_PAGE_SIZE);
  706. }
  707. if (UNIV_UNLIKELY(end_ptr < ptr + (end_seg_len >> 1))) {
  708. return(NULL);
  709. }
  710. if (!block) {
  711. return(ptr + (end_seg_len >> 1));
  712. }
  713. ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
  714. ut_ad(!buf_block_get_page_zip(block) || page_is_comp(page));
  715. /* Read from the log the inserted index record end segment which
  716. differs from the cursor record */
  717. offsets = rec_get_offsets(cursor_rec, index, offsets,
  718. ULINT_UNDEFINED, &heap);
  719. if (!(end_seg_len & 0x1UL)) {
  720. info_and_status_bits = rec_get_info_and_status_bits(
  721. cursor_rec, page_is_comp(page));
  722. origin_offset = rec_offs_extra_size(offsets);
  723. mismatch_index = rec_offs_size(offsets) - (end_seg_len >> 1);
  724. }
  725. end_seg_len >>= 1;
  726. if (mismatch_index + end_seg_len < sizeof buf1) {
  727. buf = buf1;
  728. } else {
  729. buf = static_cast<byte*>(
  730. mem_alloc(mismatch_index + end_seg_len));
  731. }
  732. /* Build the inserted record to buf */
  733. if (UNIV_UNLIKELY(mismatch_index >= UNIV_PAGE_SIZE)) {
  734. fprintf(stderr,
  735. "Is short %lu, info_and_status_bits %lu, offset %lu, "
  736. "o_offset %lu\n"
  737. "mismatch index %lu, end_seg_len %lu\n"
  738. "parsed len %lu\n",
  739. (ulong) is_short, (ulong) info_and_status_bits,
  740. (ulong) page_offset(cursor_rec),
  741. (ulong) origin_offset,
  742. (ulong) mismatch_index, (ulong) end_seg_len,
  743. (ulong) (ptr - ptr2));
  744. fputs("Dump of 300 bytes of log:\n", stderr);
  745. ut_print_buf(stderr, ptr2, 300);
  746. putc('\n', stderr);
  747. buf_page_print(page, 0, 0);
  748. ut_error;
  749. }
  750. ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index);
  751. ut_memcpy(buf + mismatch_index, ptr, end_seg_len);
  752. if (page_is_comp(page)) {
  753. rec_set_info_and_status_bits(buf + origin_offset,
  754. info_and_status_bits);
  755. } else {
  756. rec_set_info_bits_old(buf + origin_offset,
  757. info_and_status_bits);
  758. }
  759. page_cur_position(cursor_rec, block, &cursor);
  760. offsets = rec_get_offsets(buf + origin_offset, index, offsets,
  761. ULINT_UNDEFINED, &heap);
  762. if (UNIV_UNLIKELY(!page_cur_rec_insert(&cursor,
  763. buf + origin_offset,
  764. index, offsets, mtr))) {
  765. /* The redo log record should only have been written
  766. after the write was successful. */
  767. ut_error;
  768. }
  769. if (buf != buf1) {
  770. mem_free(buf);
  771. }
  772. if (UNIV_LIKELY_NULL(heap)) {
  773. mem_heap_free(heap);
  774. }
  775. return(ptr + end_seg_len);
  776. }
  777. /***********************************************************//**
  778. Inserts a record next to page cursor on an uncompressed page.
  779. Returns pointer to inserted record if succeed, i.e., enough
  780. space available, NULL otherwise. The cursor stays at the same position.
  781. @return pointer to record if succeed, NULL otherwise */
  782. UNIV_INTERN
  783. rec_t*
  784. page_cur_insert_rec_low(
  785. /*====================*/
  786. rec_t* current_rec,/*!< in: pointer to current record after
  787. which the new record is inserted */
  788. dict_index_t* index, /*!< in: record descriptor */
  789. const rec_t* rec, /*!< in: pointer to a physical record */
  790. ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
  791. mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
  792. {
  793. byte* insert_buf;
  794. ulint rec_size;
  795. page_t* page; /*!< the relevant page */
  796. rec_t* last_insert; /*!< cursor position at previous
  797. insert */
  798. rec_t* free_rec; /*!< a free record that was reused,
  799. or NULL */
  800. rec_t* insert_rec; /*!< inserted record */
  801. ulint heap_no; /*!< heap number of the inserted
  802. record */
  803. ut_ad(rec_offs_validate(rec, index, offsets));
  804. page = page_align(current_rec);
  805. ut_ad(dict_table_is_comp(index->table)
  806. == (ibool) !!page_is_comp(page));
  807. ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
  808. ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
  809. == index->id || recv_recovery_is_on()
  810. || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index)));
  811. ut_ad(!page_rec_is_supremum(current_rec));
  812. /* 1. Get the size of the physical record in the page */
  813. rec_size = rec_offs_size(offsets);
  814. #ifdef UNIV_DEBUG_VALGRIND
  815. {
  816. const void* rec_start
  817. = rec - rec_offs_extra_size(offsets);
  818. ulint extra_size
  819. = rec_offs_extra_size(offsets)
  820. - (rec_offs_comp(offsets)
  821. ? REC_N_NEW_EXTRA_BYTES
  822. : REC_N_OLD_EXTRA_BYTES);
  823. /* All data bytes of the record must be valid. */
  824. UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
  825. /* The variable-length header must be valid. */
  826. UNIV_MEM_ASSERT_RW(rec_start, extra_size);
  827. }
  828. #endif /* UNIV_DEBUG_VALGRIND */
  829. /* 2. Try to find suitable space from page memory management */
  830. free_rec = page_header_get_ptr(page, PAGE_FREE);
  831. if (UNIV_LIKELY_NULL(free_rec)) {
  832. /* Try to allocate from the head of the free list. */
  833. ulint foffsets_[REC_OFFS_NORMAL_SIZE];
  834. ulint* foffsets = foffsets_;
  835. mem_heap_t* heap = NULL;
  836. rec_offs_init(foffsets_);
  837. foffsets = rec_get_offsets(
  838. free_rec, index, foffsets, ULINT_UNDEFINED, &heap);
  839. if (rec_offs_size(foffsets) < rec_size) {
  840. if (UNIV_LIKELY_NULL(heap)) {
  841. mem_heap_free(heap);
  842. }
  843. goto use_heap;
  844. }
  845. insert_buf = free_rec - rec_offs_extra_size(foffsets);
  846. if (page_is_comp(page)) {
  847. heap_no = rec_get_heap_no_new(free_rec);
  848. page_mem_alloc_free(page, NULL,
  849. rec_get_next_ptr(free_rec, TRUE),
  850. rec_size);
  851. } else {
  852. heap_no = rec_get_heap_no_old(free_rec);
  853. page_mem_alloc_free(page, NULL,
  854. rec_get_next_ptr(free_rec, FALSE),
  855. rec_size);
  856. }
  857. if (UNIV_LIKELY_NULL(heap)) {
  858. mem_heap_free(heap);
  859. }
  860. } else {
  861. use_heap:
  862. free_rec = NULL;
  863. insert_buf = page_mem_alloc_heap(page, NULL,
  864. rec_size, &heap_no);
  865. if (UNIV_UNLIKELY(insert_buf == NULL)) {
  866. return(NULL);
  867. }
  868. }
  869. /* 3. Create the record */
  870. insert_rec = rec_copy(insert_buf, rec, offsets);
  871. rec_offs_make_valid(insert_rec, index, offsets);
  872. /* 4. Insert the record in the linked list of records */
  873. ut_ad(current_rec != insert_rec);
  874. {
  875. /* next record after current before the insertion */
  876. rec_t* next_rec = page_rec_get_next(current_rec);
  877. #ifdef UNIV_DEBUG
  878. if (page_is_comp(page)) {
  879. ut_ad(rec_get_status(current_rec)
  880. <= REC_STATUS_INFIMUM);
  881. ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
  882. ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
  883. }
  884. #endif
  885. page_rec_set_next(insert_rec, next_rec);
  886. page_rec_set_next(current_rec, insert_rec);
  887. }
  888. page_header_set_field(page, NULL, PAGE_N_RECS,
  889. 1 + page_get_n_recs(page));
  890. /* 5. Set the n_owned field in the inserted record to zero,
  891. and set the heap_no field */
  892. if (page_is_comp(page)) {
  893. rec_set_n_owned_new(insert_rec, NULL, 0);
  894. rec_set_heap_no_new(insert_rec, heap_no);
  895. } else {
  896. rec_set_n_owned_old(insert_rec, 0);
  897. rec_set_heap_no_old(insert_rec, heap_no);
  898. }
  899. UNIV_MEM_ASSERT_RW(rec_get_start(insert_rec, offsets),
  900. rec_offs_size(offsets));
  901. /* 6. Update the last insertion info in page header */
  902. last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
  903. ut_ad(!last_insert || !page_is_comp(page)
  904. || rec_get_node_ptr_flag(last_insert)
  905. == rec_get_node_ptr_flag(insert_rec));
  906. if (UNIV_UNLIKELY(last_insert == NULL)) {
  907. page_header_set_field(page, NULL, PAGE_DIRECTION,
  908. PAGE_NO_DIRECTION);
  909. page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
  910. } else if ((last_insert == current_rec)
  911. && (page_header_get_field(page, PAGE_DIRECTION)
  912. != PAGE_LEFT)) {
  913. page_header_set_field(page, NULL, PAGE_DIRECTION,
  914. PAGE_RIGHT);
  915. page_header_set_field(page, NULL, PAGE_N_DIRECTION,
  916. page_header_get_field(
  917. page, PAGE_N_DIRECTION) + 1);
  918. } else if ((page_rec_get_next(insert_rec) == last_insert)
  919. && (page_header_get_field(page, PAGE_DIRECTION)
  920. != PAGE_RIGHT)) {
  921. page_header_set_field(page, NULL, PAGE_DIRECTION,
  922. PAGE_LEFT);
  923. page_header_set_field(page, NULL, PAGE_N_DIRECTION,
  924. page_header_get_field(
  925. page, PAGE_N_DIRECTION) + 1);
  926. } else {
  927. page_header_set_field(page, NULL, PAGE_DIRECTION,
  928. PAGE_NO_DIRECTION);
  929. page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0);
  930. }
  931. page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, insert_rec);
  932. /* 7. It remains to update the owner record. */
  933. {
  934. rec_t* owner_rec = page_rec_find_owner_rec(insert_rec);
  935. ulint n_owned;
  936. if (page_is_comp(page)) {
  937. n_owned = rec_get_n_owned_new(owner_rec);
  938. rec_set_n_owned_new(owner_rec, NULL, n_owned + 1);
  939. } else {
  940. n_owned = rec_get_n_owned_old(owner_rec);
  941. rec_set_n_owned_old(owner_rec, n_owned + 1);
  942. }
  943. /* 8. Now we have incremented the n_owned field of the owner
  944. record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
  945. we have to split the corresponding directory slot in two. */
  946. if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
  947. page_dir_split_slot(
  948. page, NULL,
  949. page_dir_find_owner_slot(owner_rec));
  950. }
  951. }
  952. /* 9. Write log record of the insert */
  953. if (UNIV_LIKELY(mtr != NULL)) {
  954. page_cur_insert_rec_write_log(insert_rec, rec_size,
  955. current_rec, index, mtr);
  956. }
  957. btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert");
  958. return(insert_rec);
  959. }
  960. /***********************************************************//**
  961. Inserts a record next to page cursor on a compressed and uncompressed
  962. page. Returns pointer to inserted record if succeed, i.e.,
  963. enough space available, NULL otherwise.
  964. The cursor stays at the same position.
  965. IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
  966. if this is a compressed leaf page in a secondary index.
  967. This has to be done either within the same mini-transaction,
  968. or by invoking ibuf_reset_free_bits() before mtr_commit().
  969. @return pointer to record if succeed, NULL otherwise */
  970. UNIV_INTERN
  971. rec_t*
  972. page_cur_insert_rec_zip(
  973. /*====================*/
  974. page_cur_t* cursor, /*!< in/out: page cursor */
  975. dict_index_t* index, /*!< in: record descriptor */
  976. const rec_t* rec, /*!< in: pointer to a physical record */
  977. ulint* offsets,/*!< in/out: rec_get_offsets(rec, index) */
  978. mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
  979. {
  980. byte* insert_buf;
  981. ulint rec_size;
  982. page_t* page; /*!< the relevant page */
  983. rec_t* last_insert; /*!< cursor position at previous
  984. insert */
  985. rec_t* free_rec; /*!< a free record that was reused,
  986. or NULL */
  987. rec_t* insert_rec; /*!< inserted record */
  988. ulint heap_no; /*!< heap number of the inserted
  989. record */
  990. page_zip_des_t* page_zip;
  991. page_zip = page_cur_get_page_zip(cursor);
  992. ut_ad(page_zip);
  993. ut_ad(rec_offs_validate(rec, index, offsets));
  994. page = page_cur_get_page(cursor);
  995. ut_ad(dict_table_is_comp(index->table));
  996. ut_ad(page_is_comp(page));
  997. ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
  998. ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
  999. == index->id || recv_recovery_is_on()
  1000. || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index)));
  1001. ut_ad(!page_cur_is_after_last(cursor));
  1002. #ifdef UNIV_ZIP_DEBUG
  1003. ut_a(page_zip_validate(page_zip, page, index));
  1004. #endif /* UNIV_ZIP_DEBUG */
  1005. /* 1. Get the size of the physical record in the page */
  1006. rec_size = rec_offs_size(offsets);
  1007. #ifdef UNIV_DEBUG_VALGRIND
  1008. {
  1009. const void* rec_start
  1010. = rec - rec_offs_extra_size(offsets);
  1011. ulint extra_size
  1012. = rec_offs_extra_size(offsets)
  1013. - (rec_offs_comp(offsets)
  1014. ? REC_N_NEW_EXTRA_BYTES
  1015. : REC_N_OLD_EXTRA_BYTES);
  1016. /* All data bytes of the record must be valid. */
  1017. UNIV_MEM_ASSERT_RW(rec, rec_offs_data_size(offsets));
  1018. /* The variable-length header must be valid. */
  1019. UNIV_MEM_ASSERT_RW(rec_start, extra_size);
  1020. }
  1021. #endif /* UNIV_DEBUG_VALGRIND */
  1022. const bool reorg_before_insert = page_has_garbage(page)
  1023. && rec_size > page_get_max_insert_size(page, 1)
  1024. && rec_size <= page_get_max_insert_size_after_reorganize(
  1025. page, 1);
  1026. /* 2. Try to find suitable space from page memory management */
  1027. if (!page_zip_available(page_zip, dict_index_is_clust(index),
  1028. rec_size, 1)
  1029. || reorg_before_insert) {
  1030. /* The values can change dynamically. */
  1031. bool log_compressed = page_zip_log_pages;
  1032. ulint level = page_zip_level;
  1033. #ifdef UNIV_DEBUG
  1034. rec_t* cursor_rec = page_cur_get_rec(cursor);
  1035. #endif /* UNIV_DEBUG */
  1036. /* If we are not writing compressed page images, we
  1037. must reorganize the page before attempting the
  1038. insert. */
  1039. if (recv_recovery_is_on()) {
  1040. /* Insert into the uncompressed page only.
  1041. The page reorganization or creation that we
  1042. would attempt outside crash recovery would
  1043. have been covered by a previous redo log record. */
  1044. } else if (page_is_empty(page)) {
  1045. ut_ad(page_cur_is_before_first(cursor));
  1046. /* This is an empty page. Recreate it to
  1047. get rid of the modification log. */
  1048. page_create_zip(page_cur_get_block(cursor), index,
  1049. page_header_get_field(page, PAGE_LEVEL),
  1050. 0, mtr);
  1051. ut_ad(!page_header_get_ptr(page, PAGE_FREE));
  1052. if (page_zip_available(
  1053. page_zip, dict_index_is_clust(index),
  1054. rec_size, 1)) {
  1055. goto use_heap;
  1056. }
  1057. /* The cursor should remain on the page infimum. */
  1058. return(NULL);
  1059. } else if (!page_zip->m_nonempty && !page_has_garbage(page)) {
  1060. /* The page has been freshly compressed, so
  1061. reorganizing it will not help. */
  1062. } else if (log_compressed && !reorg_before_insert) {
  1063. /* Insert into uncompressed page only, and
  1064. try page_zip_reorganize() afterwards. */
  1065. } else if (btr_page_reorganize_low(
  1066. recv_recovery_is_on(), level,
  1067. cursor, index, mtr)) {
  1068. ut_ad(!page_header_get_ptr(page, PAGE_FREE));
  1069. if (page_zip_available(
  1070. page_zip, dict_index_is_clust(index),
  1071. rec_size, 1)) {
  1072. /* After reorganizing, there is space
  1073. available. */
  1074. goto use_heap;
  1075. }
  1076. } else {
  1077. ut_ad(cursor->rec == cursor_rec);
  1078. return(NULL);
  1079. }
  1080. /* Try compressing the whole page afterwards. */
  1081. insert_rec = page_cur_insert_rec_low(
  1082. cursor->rec, index, rec, offsets, NULL);
  1083. /* If recovery is on, this implies that the compression
  1084. of the page was successful during runtime. Had that not
  1085. been the case or had the redo logging of compressed
  1086. pages been enabled during runtime then we'd have seen
  1087. a MLOG_ZIP_PAGE_COMPRESS redo record. Therefore, we
  1088. know that we don't need to reorganize the page. We,
  1089. however, do need to recompress the page. That will
  1090. happen when the next redo record is read which must
  1091. be of type MLOG_ZIP_PAGE_COMPRESS_NO_DATA and it must
  1092. contain a valid compression level value.
  1093. This implies that during recovery from this point till
  1094. the next redo is applied the uncompressed and
  1095. compressed versions are not identical and
  1096. page_zip_validate will fail but that is OK because
  1097. we call page_zip_validate only after processing
  1098. all changes to a page under a single mtr during
  1099. recovery. */
  1100. if (insert_rec == NULL) {
  1101. /* Out of space.
  1102. This should never occur during crash recovery,
  1103. because the MLOG_COMP_REC_INSERT should only
  1104. be logged after a successful operation. */
  1105. ut_ad(!recv_recovery_is_on());
  1106. } else if (recv_recovery_is_on()) {
  1107. /* This should be followed by
  1108. MLOG_ZIP_PAGE_COMPRESS_NO_DATA,
  1109. which should succeed. */
  1110. rec_offs_make_valid(insert_rec, index, offsets);
  1111. } else {
  1112. ulint pos = page_rec_get_n_recs_before(insert_rec);
  1113. ut_ad(pos > 0);
  1114. if (!log_compressed) {
  1115. if (page_zip_compress(
  1116. page_zip, page, index,
  1117. level, NULL)) {
  1118. page_cur_insert_rec_write_log(
  1119. insert_rec, rec_size,
  1120. cursor->rec, index, mtr);
  1121. page_zip_compress_write_log_no_data(
  1122. level, page, index, mtr);
  1123. rec_offs_make_valid(
  1124. insert_rec, index, offsets);
  1125. return(insert_rec);
  1126. }
  1127. ut_ad(cursor->rec
  1128. == (pos > 1
  1129. ? page_rec_get_nth(
  1130. page, pos - 1)
  1131. : page + PAGE_NEW_INFIMUM));
  1132. } else {
  1133. /* We are writing entire page images
  1134. to the log. Reduce the redo log volume
  1135. by reorganizing the page at the same time. */
  1136. if (page_zip_reorganize(
  1137. cursor->block, index, mtr)) {
  1138. /* The page was reorganized:
  1139. Seek to pos. */
  1140. if (pos > 1) {
  1141. cursor->rec = page_rec_get_nth(
  1142. page, pos - 1);
  1143. } else {
  1144. cursor->rec = page
  1145. + PAGE_NEW_INFIMUM;
  1146. }
  1147. insert_rec = page + rec_get_next_offs(
  1148. cursor->rec, TRUE);
  1149. rec_offs_make_valid(
  1150. insert_rec, index, offsets);
  1151. return(insert_rec);
  1152. }
  1153. /* Theoretically, we could try one
  1154. last resort of btr_page_reorganize_low()
  1155. followed by page_zip_available(), but
  1156. that would be very unlikely to
  1157. succeed. (If the full reorganized page
  1158. failed to compress, why would it
  1159. succeed to compress the page, plus log
  1160. the insert of this record? */
  1161. }
  1162. /* Out of space: restore the page */
  1163. btr_blob_dbg_remove(page, index, "insert_zip_fail");
  1164. if (!page_zip_decompress(page_zip, page, FALSE)) {
  1165. ut_error; /* Memory corrupted? */
  1166. }
  1167. ut_ad(page_validate(page, index));
  1168. btr_blob_dbg_add(page, index, "insert_zip_fail");
  1169. insert_rec = NULL;
  1170. }
  1171. return(insert_rec);
  1172. }
  1173. free_rec = page_header_get_ptr(page, PAGE_FREE);
  1174. if (UNIV_LIKELY_NULL(free_rec)) {
  1175. /* Try to allocate from the head of the free list. */
  1176. lint extra_size_diff;
  1177. ulint foffsets_[REC_OFFS_NORMAL_SIZE];
  1178. ulint* foffsets = foffsets_;
  1179. mem_heap_t* heap = NULL;
  1180. rec_offs_init(foffsets_);
  1181. foffsets = rec_get_offsets(free_rec, index, foffsets,
  1182. ULINT_UNDEFINED, &heap);
  1183. if (rec_offs_size(foffsets) < rec_size) {
  1184. too_small:
  1185. if (UNIV_LIKELY_NULL(heap)) {
  1186. mem_heap_free(heap);
  1187. }
  1188. goto use_heap;
  1189. }
  1190. insert_buf = free_rec - rec_offs_extra_size(foffsets);
  1191. /* On compressed pages, do not relocate records from
  1192. the free list. If extra_size would grow, use the heap. */
  1193. extra_size_diff
  1194. = rec_offs_extra_size(offsets)
  1195. - rec_offs_extra_size(foffsets);
  1196. if (UNIV_UNLIKELY(extra_size_diff < 0)) {
  1197. /* Add an offset to the extra_size. */
  1198. if (rec_offs_size(foffsets)
  1199. < rec_size - extra_size_diff) {
  1200. goto too_small;
  1201. }
  1202. insert_buf -= extra_size_diff;
  1203. } else if (UNIV_UNLIKELY(extra_size_diff)) {
  1204. /* Do not allow extra_size to grow */
  1205. goto too_small;
  1206. }
  1207. heap_no = rec_get_heap_no_new(free_rec);
  1208. page_mem_alloc_free(page, page_zip,
  1209. rec_get_next_ptr(free_rec, TRUE),
  1210. rec_size);
  1211. if (!page_is_leaf(page)) {
  1212. /* Zero out the node pointer of free_rec,
  1213. in case it will not be overwritten by
  1214. insert_rec. */
  1215. ut_ad(rec_size > REC_NODE_PTR_SIZE);
  1216. if (rec_offs_extra_size(foffsets)
  1217. + rec_offs_data_size(foffsets) > rec_size) {
  1218. memset(rec_get_end(free_rec, foffsets)
  1219. - REC_NODE_PTR_SIZE, 0,
  1220. REC_NODE_PTR_SIZE);
  1221. }
  1222. } else if (dict_index_is_clust(index)) {
  1223. /* Zero out the DB_TRX_ID and DB_ROLL_PTR
  1224. columns of free_rec, in case it will not be
  1225. overwritten by insert_rec. */
  1226. ulint trx_id_col;
  1227. ulint trx_id_offs;
  1228. ulint len;
  1229. trx_id_col = dict_index_get_sys_col_pos(index,
  1230. DATA_TRX_ID);
  1231. ut_ad(trx_id_col > 0);
  1232. ut_ad(trx_id_col != ULINT_UNDEFINED);
  1233. trx_id_offs = rec_get_nth_field_offs(foffsets,
  1234. trx_id_col, &len);
  1235. ut_ad(len == DATA_TRX_ID_LEN);
  1236. if (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN + trx_id_offs
  1237. + rec_offs_extra_size(foffsets) > rec_size) {
  1238. /* We will have to zero out the
  1239. DB_TRX_ID and DB_ROLL_PTR, because
  1240. they will not be fully overwritten by
  1241. insert_rec. */
  1242. memset(free_rec + trx_id_offs, 0,
  1243. DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN);
  1244. }
  1245. ut_ad(free_rec + trx_id_offs + DATA_TRX_ID_LEN
  1246. == rec_get_nth_field(free_rec, foffsets,
  1247. trx_id_col + 1, &len));
  1248. ut_ad(len == DATA_ROLL_PTR_LEN);
  1249. }
  1250. if (UNIV_LIKELY_NULL(heap)) {
  1251. mem_heap_free(heap);
  1252. }
  1253. } else {
  1254. use_heap:
  1255. free_rec = NULL;
  1256. insert_buf = page_mem_alloc_heap(page, page_zip,
  1257. rec_size, &heap_no);
  1258. if (UNIV_UNLIKELY(insert_buf == NULL)) {
  1259. return(NULL);
  1260. }
  1261. page_zip_dir_add_slot(page_zip, dict_index_is_clust(index));
  1262. }
  1263. /* 3. Create the record */
  1264. insert_rec = rec_copy(insert_buf, rec, offsets);
  1265. rec_offs_make_valid(insert_rec, index, offsets);
  1266. /* 4. Insert the record in the linked list of records */
  1267. ut_ad(cursor->rec != insert_rec);
  1268. {
  1269. /* next record after current before the insertion */
  1270. const rec_t* next_rec = page_rec_get_next_low(
  1271. cursor->rec, TRUE);
  1272. ut_ad(rec_get_status(cursor->rec)
  1273. <= REC_STATUS_INFIMUM);
  1274. ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM);
  1275. ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM);
  1276. page_rec_set_next(insert_rec, next_rec);
  1277. page_rec_set_next(cursor->rec, insert_rec);
  1278. }
  1279. page_header_set_field(page, page_zip, PAGE_N_RECS,
  1280. 1 + page_get_n_recs(page));
  1281. /* 5. Set the n_owned field in the inserted record to zero,
  1282. and set the heap_no field */
  1283. rec_set_n_owned_new(insert_rec, NULL, 0);
  1284. rec_set_heap_no_new(insert_rec, heap_no);
  1285. UNIV_MEM_ASSERT_RW(rec_get_start(insert_rec, offsets),
  1286. rec_offs_size(offsets));
  1287. page_zip_dir_insert(page_zip, cursor->rec, free_rec, insert_rec);
  1288. /* 6. Update the last insertion info in page header */
  1289. last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT);
  1290. ut_ad(!last_insert
  1291. || rec_get_node_ptr_flag(last_insert)
  1292. == rec_get_node_ptr_flag(insert_rec));
  1293. if (UNIV_UNLIKELY(last_insert == NULL)) {
  1294. page_header_set_field(page, page_zip, PAGE_DIRECTION,
  1295. PAGE_NO_DIRECTION);
  1296. page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
  1297. } else if ((last_insert == cursor->rec)
  1298. && (page_header_get_field(page, PAGE_DIRECTION)
  1299. != PAGE_LEFT)) {
  1300. page_header_set_field(page, page_zip, PAGE_DIRECTION,
  1301. PAGE_RIGHT);
  1302. page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
  1303. page_header_get_field(
  1304. page, PAGE_N_DIRECTION) + 1);
  1305. } else if ((page_rec_get_next(insert_rec) == last_insert)
  1306. && (page_header_get_field(page, PAGE_DIRECTION)
  1307. != PAGE_RIGHT)) {
  1308. page_header_set_field(page, page_zip, PAGE_DIRECTION,
  1309. PAGE_LEFT);
  1310. page_header_set_field(page, page_zip, PAGE_N_DIRECTION,
  1311. page_header_get_field(
  1312. page, PAGE_N_DIRECTION) + 1);
  1313. } else {
  1314. page_header_set_field(page, page_zip, PAGE_DIRECTION,
  1315. PAGE_NO_DIRECTION);
  1316. page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
  1317. }
  1318. page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, insert_rec);
  1319. /* 7. It remains to update the owner record. */
  1320. {
  1321. rec_t* owner_rec = page_rec_find_owner_rec(insert_rec);
  1322. ulint n_owned;
  1323. n_owned = rec_get_n_owned_new(owner_rec);
  1324. rec_set_n_owned_new(owner_rec, page_zip, n_owned + 1);
  1325. /* 8. Now we have incremented the n_owned field of the owner
  1326. record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED,
  1327. we have to split the corresponding directory slot in two. */
  1328. if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) {
  1329. page_dir_split_slot(
  1330. page, page_zip,
  1331. page_dir_find_owner_slot(owner_rec));
  1332. }
  1333. }
  1334. page_zip_write_rec(page_zip, insert_rec, index, offsets, 1);
  1335. btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert_zip_ok");
  1336. /* 9. Write log record of the insert */
  1337. if (UNIV_LIKELY(mtr != NULL)) {
  1338. page_cur_insert_rec_write_log(insert_rec, rec_size,
  1339. cursor->rec, index, mtr);
  1340. }
  1341. return(insert_rec);
  1342. }
  1343. #ifndef UNIV_HOTBACKUP
  1344. /**********************************************************//**
  1345. Writes a log record of copying a record list end to a new created page.
  1346. @return 4-byte field where to write the log data length, or NULL if
  1347. logging is disabled */
  1348. UNIV_INLINE
  1349. byte*
  1350. page_copy_rec_list_to_created_page_write_log(
  1351. /*=========================================*/
  1352. page_t* page, /*!< in: index page */
  1353. dict_index_t* index, /*!< in: record descriptor */
  1354. mtr_t* mtr) /*!< in: mtr */
  1355. {
  1356. byte* log_ptr;
  1357. ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
  1358. log_ptr = mlog_open_and_write_index(mtr, page, index,
  1359. page_is_comp(page)
  1360. ? MLOG_COMP_LIST_END_COPY_CREATED
  1361. : MLOG_LIST_END_COPY_CREATED, 4);
  1362. if (UNIV_LIKELY(log_ptr != NULL)) {
  1363. mlog_close(mtr, log_ptr + 4);
  1364. }
  1365. return(log_ptr);
  1366. }
  1367. #endif /* !UNIV_HOTBACKUP */
  1368. /**********************************************************//**
  1369. Parses a log record of copying a record list end to a new created page.
  1370. @return end of log record or NULL */
  1371. UNIV_INTERN
  1372. byte*
  1373. page_parse_copy_rec_list_to_created_page(
  1374. /*=====================================*/
  1375. byte* ptr, /*!< in: buffer */
  1376. byte* end_ptr,/*!< in: buffer end */
  1377. buf_block_t* block, /*!< in: page or NULL */
  1378. dict_index_t* index, /*!< in: record descriptor */
  1379. mtr_t* mtr) /*!< in: mtr or NULL */
  1380. {
  1381. byte* rec_end;
  1382. ulint log_data_len;
  1383. page_t* page;
  1384. page_zip_des_t* page_zip;
  1385. if (ptr + 4 > end_ptr) {
  1386. return(NULL);
  1387. }
  1388. log_data_len = mach_read_from_4(ptr);
  1389. ptr += 4;
  1390. rec_end = ptr + log_data_len;
  1391. if (rec_end > end_ptr) {
  1392. return(NULL);
  1393. }
  1394. if (!block) {
  1395. return(rec_end);
  1396. }
  1397. while (ptr < rec_end) {
  1398. ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr,
  1399. block, index, mtr);
  1400. }
  1401. ut_a(ptr == rec_end);
  1402. page = buf_block_get_frame(block);
  1403. page_zip = buf_block_get_page_zip(block);
  1404. page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
  1405. page_header_set_field(page, page_zip, PAGE_DIRECTION,
  1406. PAGE_NO_DIRECTION);
  1407. page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0);
  1408. return(rec_end);
  1409. }
  1410. #ifndef UNIV_HOTBACKUP
  1411. /*************************************************************//**
  1412. Copies records from page to a newly created page, from a given record onward,
  1413. including that record. Infimum and supremum records are not copied.
  1414. IMPORTANT: The caller will have to update IBUF_BITMAP_FREE
  1415. if this is a compressed leaf page in a secondary index.
  1416. This has to be done either within the same mini-transaction,
  1417. or by invoking ibuf_reset_free_bits() before mtr_commit(). */
  1418. UNIV_INTERN
  1419. void
  1420. page_copy_rec_list_end_to_created_page(
  1421. /*===================================*/
  1422. page_t* new_page, /*!< in/out: index page to copy to */
  1423. rec_t* rec, /*!< in: first record to copy */
  1424. dict_index_t* index, /*!< in: record descriptor */
  1425. mtr_t* mtr) /*!< in: mtr */
  1426. {
  1427. page_dir_slot_t* slot = 0; /* remove warning */
  1428. byte* heap_top;
  1429. rec_t* insert_rec = 0; /* remove warning */
  1430. rec_t* prev_rec;
  1431. ulint count;
  1432. ulint n_recs;
  1433. ulint slot_index;
  1434. ulint rec_size;
  1435. ulint log_mode;
  1436. byte* log_ptr;
  1437. ulint log_data_len;
  1438. mem_heap_t* heap = NULL;
  1439. ulint offsets_[REC_OFFS_NORMAL_SIZE];
  1440. ulint* offsets = offsets_;
  1441. rec_offs_init(offsets_);
  1442. ut_ad(page_dir_get_n_heap(new_page) == PAGE_HEAP_NO_USER_LOW);
  1443. ut_ad(page_align(rec) != new_page);
  1444. ut_ad(page_rec_is_comp(rec) == page_is_comp(new_page));
  1445. if (page_rec_is_infimum(rec)) {
  1446. rec = page_rec_get_next(rec);
  1447. }
  1448. if (page_rec_is_supremum(rec)) {
  1449. return;
  1450. }
  1451. #ifdef UNIV_DEBUG
  1452. /* To pass the debug tests we have to set these dummy values
  1453. in the debug version */
  1454. page_dir_set_n_slots(new_page, NULL, UNIV_PAGE_SIZE / 2);
  1455. page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP,
  1456. new_page + UNIV_PAGE_SIZE - 1);
  1457. #endif
  1458. log_ptr = page_copy_rec_list_to_created_page_write_log(new_page,
  1459. index, mtr);
  1460. log_data_len = dyn_array_get_data_size(&(mtr->log));
  1461. /* Individual inserts are logged in a shorter form */
  1462. log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS);
  1463. prev_rec = page_get_infimum_rec(new_page);
  1464. if (page_is_comp(new_page)) {
  1465. heap_top = new_page + PAGE_NEW_SUPREMUM_END;
  1466. } else {
  1467. heap_top = new_page + PAGE_OLD_SUPREMUM_END;
  1468. }
  1469. count = 0;
  1470. slot_index = 0;
  1471. n_recs = 0;
  1472. do {
  1473. offsets = rec_get_offsets(rec, index, offsets,
  1474. ULINT_UNDEFINED, &heap);
  1475. insert_rec = rec_copy(heap_top, rec, offsets);
  1476. if (page_is_comp(new_page)) {
  1477. rec_set_next_offs_new(prev_rec,
  1478. page_offset(insert_rec));
  1479. rec_set_n_owned_new(insert_rec, NULL, 0);
  1480. rec_set_heap_no_new(insert_rec,
  1481. PAGE_HEAP_NO_USER_LOW + n_recs);
  1482. } else {
  1483. rec_set_next_offs_old(prev_rec,
  1484. page_offset(insert_rec));
  1485. rec_set_n_owned_old(insert_rec, 0);
  1486. rec_set_heap_no_old(insert_rec,
  1487. PAGE_HEAP_NO_USER_LOW + n_recs);
  1488. }
  1489. count++;
  1490. n_recs++;
  1491. if (UNIV_UNLIKELY
  1492. (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2)) {
  1493. slot_index++;
  1494. slot = page_dir_get_nth_slot(new_page, slot_index);
  1495. page_dir_slot_set_rec(slot, insert_rec);
  1496. page_dir_slot_set_n_owned(slot, NULL, count);
  1497. count = 0;
  1498. }
  1499. rec_size = rec_offs_size(offsets);
  1500. ut_ad(heap_top < new_page + UNIV_PAGE_SIZE);
  1501. heap_top += rec_size;
  1502. rec_offs_make_valid(insert_rec, index, offsets);
  1503. btr_blob_dbg_add_rec(insert_rec, index, offsets, "copy_end");
  1504. page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec,
  1505. index, mtr);
  1506. prev_rec = insert_rec;
  1507. rec = page_rec_get_next(rec);
  1508. } while (!page_rec_is_supremum(rec));
  1509. if ((slot_index > 0) && (count + 1
  1510. + (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2
  1511. <= PAGE_DIR_SLOT_MAX_N_OWNED)) {
  1512. /* We can merge the two last dir slots. This operation is
  1513. here to make this function imitate exactly the equivalent
  1514. task made using page_cur_insert_rec, which we use in database
  1515. recovery to reproduce the task performed by this function.
  1516. To be able to check the correctness of recovery, it is good
  1517. that it imitates exactly. */
  1518. count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2;
  1519. page_dir_slot_set_n_owned(slot, NULL, 0);
  1520. slot_index--;
  1521. }
  1522. if (UNIV_LIKELY_NULL(heap)) {
  1523. mem_heap_free(heap);
  1524. }
  1525. log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len;
  1526. ut_a(log_data_len < 100 * UNIV_PAGE_SIZE);
  1527. if (UNIV_LIKELY(log_ptr != NULL)) {
  1528. mach_write_to_4(log_ptr, log_data_len);
  1529. }
  1530. if (page_is_comp(new_page)) {
  1531. rec_set_next_offs_new(insert_rec, PAGE_NEW_SUPREMUM);
  1532. } else {
  1533. rec_set_next_offs_old(insert_rec, PAGE_OLD_SUPREMUM);
  1534. }
  1535. slot = page_dir_get_nth_slot(new_page, 1 + slot_index);
  1536. page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page));
  1537. page_dir_slot_set_n_owned(slot, NULL, count + 1);
  1538. page_dir_set_n_slots(new_page, NULL, 2 + slot_index);
  1539. page_header_set_ptr(new_page, NULL, PAGE_HEAP_TOP, heap_top);
  1540. page_dir_set_n_heap(new_page, NULL, PAGE_HEAP_NO_USER_LOW + n_recs);
  1541. page_header_set_field(new_page, NULL, PAGE_N_RECS, n_recs);
  1542. page_header_set_ptr(new_page, NULL, PAGE_LAST_INSERT, NULL);
  1543. page_header_set_field(new_page, NULL, PAGE_DIRECTION,
  1544. PAGE_NO_DIRECTION);
  1545. page_header_set_field(new_page, NULL, PAGE_N_DIRECTION, 0);
  1546. /* Restore the log mode */
  1547. mtr_set_log_mode(mtr, log_mode);
  1548. }
  1549. /***********************************************************//**
  1550. Writes log record of a record delete on a page. */
  1551. UNIV_INLINE
  1552. void
  1553. page_cur_delete_rec_write_log(
  1554. /*==========================*/
  1555. rec_t* rec, /*!< in: record to be deleted */
  1556. const dict_index_t* index, /*!< in: record descriptor */
  1557. mtr_t* mtr) /*!< in: mini-transaction handle */
  1558. {
  1559. byte* log_ptr;
  1560. ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table));
  1561. log_ptr = mlog_open_and_write_index(mtr, rec, index,
  1562. page_rec_is_comp(rec)
  1563. ? MLOG_COMP_REC_DELETE
  1564. : MLOG_REC_DELETE, 2);
  1565. if (!log_ptr) {
  1566. /* Logging in mtr is switched off during crash recovery:
  1567. in that case mlog_open returns NULL */
  1568. return;
  1569. }
  1570. /* Write the cursor rec offset as a 2-byte ulint */
  1571. mach_write_to_2(log_ptr, page_offset(rec));
  1572. mlog_close(mtr, log_ptr + 2);
  1573. }
  1574. #else /* !UNIV_HOTBACKUP */
  1575. # define page_cur_delete_rec_write_log(rec,index,mtr) ((void) 0)
  1576. #endif /* !UNIV_HOTBACKUP */
  1577. /***********************************************************//**
  1578. Parses log record of a record delete on a page.
  1579. @return pointer to record end or NULL */
  1580. UNIV_INTERN
  1581. byte*
  1582. page_cur_parse_delete_rec(
  1583. /*======================*/
  1584. byte* ptr, /*!< in: buffer */
  1585. byte* end_ptr,/*!< in: buffer end */
  1586. buf_block_t* block, /*!< in: page or NULL */
  1587. dict_index_t* index, /*!< in: record descriptor */
  1588. mtr_t* mtr) /*!< in: mtr or NULL */
  1589. {
  1590. ulint offset;
  1591. page_cur_t cursor;
  1592. if (end_ptr < ptr + 2) {
  1593. return(NULL);
  1594. }
  1595. /* Read the cursor rec offset as a 2-byte ulint */
  1596. offset = mach_read_from_2(ptr);
  1597. ptr += 2;
  1598. ut_a(offset <= UNIV_PAGE_SIZE);
  1599. if (block) {
  1600. page_t* page = buf_block_get_frame(block);
  1601. mem_heap_t* heap = NULL;
  1602. ulint offsets_[REC_OFFS_NORMAL_SIZE];
  1603. rec_t* rec = page + offset;
  1604. rec_offs_init(offsets_);
  1605. page_cur_position(rec, block, &cursor);
  1606. ut_ad(!buf_block_get_page_zip(block) || page_is_comp(page));
  1607. page_cur_delete_rec(&cursor, index,
  1608. rec_get_offsets(rec, index, offsets_,
  1609. ULINT_UNDEFINED, &heap),
  1610. mtr);
  1611. if (UNIV_LIKELY_NULL(heap)) {
  1612. mem_heap_free(heap);
  1613. }
  1614. }
  1615. return(ptr);
  1616. }
  1617. /***********************************************************//**
  1618. Deletes a record at the page cursor. The cursor is moved to the next
  1619. record after the deleted one. */
  1620. UNIV_INTERN
  1621. void
  1622. page_cur_delete_rec(
  1623. /*================*/
  1624. page_cur_t* cursor, /*!< in/out: a page cursor */
  1625. const dict_index_t* index, /*!< in: record descriptor */
  1626. const ulint* offsets,/*!< in: rec_get_offsets(
  1627. cursor->rec, index) */
  1628. mtr_t* mtr) /*!< in: mini-transaction handle
  1629. or NULL */
  1630. {
  1631. page_dir_slot_t* cur_dir_slot;
  1632. page_dir_slot_t* prev_slot;
  1633. page_t* page;
  1634. page_zip_des_t* page_zip;
  1635. rec_t* current_rec;
  1636. rec_t* prev_rec = NULL;
  1637. rec_t* next_rec;
  1638. ulint cur_slot_no;
  1639. ulint cur_n_owned;
  1640. rec_t* rec;
  1641. page = page_cur_get_page(cursor);
  1642. page_zip = page_cur_get_page_zip(cursor);
  1643. /* page_zip_validate() will fail here when
  1644. btr_cur_pessimistic_delete() invokes btr_set_min_rec_mark().
  1645. Then, both "page_zip" and "page" would have the min-rec-mark
  1646. set on the smallest user record, but "page" would additionally
  1647. have it set on the smallest-but-one record. Because sloppy
  1648. page_zip_validate_low() only ignores min-rec-flag differences
  1649. in the smallest user record, it cannot be used here either. */
  1650. current_rec = cursor->rec;
  1651. ut_ad(rec_offs_validate(current_rec, index, offsets));
  1652. ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
  1653. ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX);
  1654. ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)
  1655. == index->id || recv_recovery_is_on()
  1656. || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index)));
  1657. /* The record must not be the supremum or infimum record. */
  1658. ut_ad(page_rec_is_user_rec(current_rec));
  1659. if (page_get_n_recs(page) == 1 && !recv_recovery_is_on()) {
  1660. /* Empty the page, unless we are applying the redo log
  1661. during crash recovery. During normal operation, the
  1662. page_create_empty() gets logged as one of MLOG_PAGE_CREATE,
  1663. MLOG_COMP_PAGE_CREATE, MLOG_ZIP_PAGE_COMPRESS. */
  1664. ut_ad(page_is_leaf(page));
  1665. /* Usually, this should be the root page,
  1666. and the whole index tree should become empty.
  1667. However, this could also be a call in
  1668. btr_cur_pessimistic_update() to delete the only
  1669. record in the page and to insert another one. */
  1670. page_cur_move_to_next(cursor);
  1671. ut_ad(page_cur_is_after_last(cursor));
  1672. page_create_empty(page_cur_get_block(cursor),
  1673. const_cast<dict_index_t*>(index), mtr);
  1674. return;
  1675. }
  1676. /* Save to local variables some data associated with current_rec */
  1677. cur_slot_no = page_dir_find_owner_slot(current_rec);
  1678. ut_ad(cur_slot_no > 0);
  1679. cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no);
  1680. cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot);
  1681. /* 0. Write the log record */
  1682. if (mtr != 0) {
  1683. page_cur_delete_rec_write_log(current_rec, index, mtr);
  1684. }
  1685. /* 1. Reset the last insert info in the page header and increment
  1686. the modify clock for the frame */
  1687. page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL);
  1688. /* The page gets invalid for optimistic searches: increment the
  1689. frame modify clock only if there is an mini-transaction covering
  1690. the change. During IMPORT we allocate local blocks that are not
  1691. part of the buffer pool. */
  1692. if (mtr != 0) {
  1693. buf_block_modify_clock_inc(page_cur_get_block(cursor));
  1694. }
  1695. /* 2. Find the next and the previous record. Note that the cursor is
  1696. left at the next record. */
  1697. ut_ad(cur_slot_no > 0);
  1698. prev_slot = page_dir_get_nth_slot(page, cur_slot_no - 1);
  1699. rec = (rec_t*) page_dir_slot_get_rec(prev_slot);
  1700. /* rec now points to the record of the previous directory slot. Look
  1701. for the immediate predecessor of current_rec in a loop. */
  1702. while(current_rec != rec) {
  1703. prev_rec = rec;
  1704. rec = page_rec_get_next(rec);
  1705. }
  1706. page_cur_move_to_next(cursor);
  1707. next_rec = cursor->rec;
  1708. /* 3. Remove the record from the linked list of records */
  1709. page_rec_set_next(prev_rec, next_rec);
  1710. /* 4. If the deleted record is pointed to by a dir slot, update the
  1711. record pointer in slot. In the following if-clause we assume that
  1712. prev_rec is owned by the same slot, i.e., PAGE_DIR_SLOT_MIN_N_OWNED
  1713. >= 2. */
  1714. #if PAGE_DIR_SLOT_MIN_N_OWNED < 2
  1715. # error "PAGE_DIR_SLOT_MIN_N_OWNED < 2"
  1716. #endif
  1717. ut_ad(cur_n_owned > 1);
  1718. if (current_rec == page_dir_slot_get_rec(cur_dir_slot)) {
  1719. page_dir_slot_set_rec(cur_dir_slot, prev_rec);
  1720. }
  1721. /* 5. Update the number of owned records of the slot */
  1722. page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1);
  1723. /* 6. Free the memory occupied by the record */
  1724. btr_blob_dbg_remove_rec(current_rec, const_cast<dict_index_t*>(index),
  1725. offsets, "delete");
  1726. page_mem_free(page, page_zip, current_rec, index, offsets);
  1727. /* 7. Now we have decremented the number of owned records of the slot.
  1728. If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the
  1729. slots. */
  1730. if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) {
  1731. page_dir_balance_slot(page, page_zip, cur_slot_no);
  1732. }
  1733. #ifdef UNIV_ZIP_DEBUG
  1734. ut_a(!page_zip || page_zip_validate(page_zip, page, index));
  1735. #endif /* UNIV_ZIP_DEBUG */
  1736. }
  1737. #ifdef UNIV_COMPILE_TEST_FUNCS
  1738. /*******************************************************************//**
  1739. Print the first n numbers, generated by page_cur_lcg_prng() to make sure
  1740. (visually) that it works properly. */
  1741. void
  1742. test_page_cur_lcg_prng(
  1743. /*===================*/
  1744. int n) /*!< in: print first n numbers */
  1745. {
  1746. int i;
  1747. unsigned long long rnd;
  1748. for (i = 0; i < n; i++) {
  1749. rnd = page_cur_lcg_prng();
  1750. printf("%llu\t%%2=%llu %%3=%llu %%5=%llu %%7=%llu %%11=%llu\n",
  1751. rnd,
  1752. rnd % 2,
  1753. rnd % 3,
  1754. rnd % 5,
  1755. rnd % 7,
  1756. rnd % 11);
  1757. }
  1758. }
  1759. #endif /* UNIV_COMPILE_TEST_FUNCS */