You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

517 lines
19 KiB

20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
  1. /*****************************************************************************
  2. Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
  3. This program is free software; you can redistribute it and/or modify it under
  4. the terms of the GNU General Public License as published by the Free Software
  5. Foundation; version 2 of the License.
  6. This program is distributed in the hope that it will be useful, but WITHOUT
  7. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  8. FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  9. You should have received a copy of the GNU General Public License along with
  10. this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  11. Place, Suite 330, Boston, MA 02111-1307 USA
  12. *****************************************************************************/
  13. /**************************************************//**
  14. @file include/btr0btr.h
  15. The B-tree
  16. Created 6/2/1994 Heikki Tuuri
  17. *******************************************************/
  18. #ifndef btr0btr_h
  19. #define btr0btr_h
  20. #include "univ.i"
  21. #include "dict0dict.h"
  22. #include "data0data.h"
  23. #include "page0cur.h"
  24. #include "mtr0mtr.h"
  25. #include "btr0types.h"
  26. #ifndef UNIV_HOTBACKUP
  27. /** Maximum record size which can be stored on a page, without using the
  28. special big record storage structure */
  29. #define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200)
  30. /** @brief Maximum depth of a B-tree in InnoDB.
  31. Note that this isn't a maximum as such; none of the tree operations
  32. avoid producing trees bigger than this. It is instead a "max depth
  33. that other code must work with", useful for e.g. fixed-size arrays
  34. that must store some information about each level in a tree. In other
  35. words: if a B-tree with bigger depth than this is encountered, it is
  36. not acceptable for it to lead to mysterious memory corruption, but it
  37. is acceptable for the program to die with a clear assert failure. */
  38. #define BTR_MAX_LEVELS 100
  39. /** Latching modes for btr_cur_search_to_nth_level(). */
  40. enum btr_latch_mode {
  41. /** Search a record on a leaf page and S-latch it. */
  42. BTR_SEARCH_LEAF = RW_S_LATCH,
  43. /** (Prepare to) modify a record on a leaf page and X-latch it. */
  44. BTR_MODIFY_LEAF = RW_X_LATCH,
  45. /** Obtain no latches. */
  46. BTR_NO_LATCHES = RW_NO_LATCH,
  47. /** Start modifying the entire B-tree. */
  48. BTR_MODIFY_TREE = 33,
  49. /** Continue modifying the entire B-tree. */
  50. BTR_CONT_MODIFY_TREE = 34,
  51. /** Search the previous record. */
  52. BTR_SEARCH_PREV = 35,
  53. /** Modify the previous record. */
  54. BTR_MODIFY_PREV = 36
  55. };
  56. /** If this is ORed to btr_latch_mode, it means that the search tuple
  57. will be inserted to the index, at the searched position */
  58. #define BTR_INSERT 512
  59. /** This flag ORed to btr_latch_mode says that we do the search in query
  60. optimization */
  61. #define BTR_ESTIMATE 1024
  62. /** This flag ORed to btr_latch_mode says that we can ignore possible
  63. UNIQUE definition on secondary indexes when we decide if we can use
  64. the insert buffer to speed up inserts */
  65. #define BTR_IGNORE_SEC_UNIQUE 2048
  66. /**************************************************************//**
  67. Gets the root node of a tree and x-latches it.
  68. @return root page, x-latched */
  69. UNIV_INTERN
  70. page_t*
  71. btr_root_get(
  72. /*=========*/
  73. dict_index_t* index, /*!< in: index tree */
  74. mtr_t* mtr); /*!< in: mtr */
  75. /**************************************************************//**
  76. Gets a buffer page and declares its latching order level. */
  77. UNIV_INLINE
  78. buf_block_t*
  79. btr_block_get(
  80. /*==========*/
  81. ulint space, /*!< in: space id */
  82. ulint zip_size, /*!< in: compressed page size in bytes
  83. or 0 for uncompressed pages */
  84. ulint page_no, /*!< in: page number */
  85. ulint mode, /*!< in: latch mode */
  86. mtr_t* mtr); /*!< in: mtr */
  87. /**************************************************************//**
  88. Gets a buffer page and declares its latching order level. */
  89. UNIV_INLINE
  90. page_t*
  91. btr_page_get(
  92. /*=========*/
  93. ulint space, /*!< in: space id */
  94. ulint zip_size, /*!< in: compressed page size in bytes
  95. or 0 for uncompressed pages */
  96. ulint page_no, /*!< in: page number */
  97. ulint mode, /*!< in: latch mode */
  98. mtr_t* mtr); /*!< in: mtr */
  99. #endif /* !UNIV_HOTBACKUP */
  100. /**************************************************************//**
  101. Gets the index id field of a page.
  102. @return index id */
  103. UNIV_INLINE
  104. dulint
  105. btr_page_get_index_id(
  106. /*==================*/
  107. const page_t* page); /*!< in: index page */
  108. #ifndef UNIV_HOTBACKUP
  109. /********************************************************//**
  110. Gets the node level field in an index page.
  111. @return level, leaf level == 0 */
  112. UNIV_INLINE
  113. ulint
  114. btr_page_get_level_low(
  115. /*===================*/
  116. const page_t* page); /*!< in: index page */
  117. /********************************************************//**
  118. Gets the node level field in an index page.
  119. @return level, leaf level == 0 */
  120. UNIV_INLINE
  121. ulint
  122. btr_page_get_level(
  123. /*===============*/
  124. const page_t* page, /*!< in: index page */
  125. mtr_t* mtr); /*!< in: mini-transaction handle */
  126. /********************************************************//**
  127. Gets the next index page number.
  128. @return next page number */
  129. UNIV_INLINE
  130. ulint
  131. btr_page_get_next(
  132. /*==============*/
  133. const page_t* page, /*!< in: index page */
  134. mtr_t* mtr); /*!< in: mini-transaction handle */
  135. /********************************************************//**
  136. Gets the previous index page number.
  137. @return prev page number */
  138. UNIV_INLINE
  139. ulint
  140. btr_page_get_prev(
  141. /*==============*/
  142. const page_t* page, /*!< in: index page */
  143. mtr_t* mtr); /*!< in: mini-transaction handle */
  144. /*************************************************************//**
  145. Gets pointer to the previous user record in the tree. It is assumed
  146. that the caller has appropriate latches on the page and its neighbor.
  147. @return previous user record, NULL if there is none */
  148. UNIV_INTERN
  149. rec_t*
  150. btr_get_prev_user_rec(
  151. /*==================*/
  152. rec_t* rec, /*!< in: record on leaf level */
  153. mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
  154. needed, also to the previous page */
  155. /*************************************************************//**
  156. Gets pointer to the next user record in the tree. It is assumed
  157. that the caller has appropriate latches on the page and its neighbor.
  158. @return next user record, NULL if there is none */
  159. UNIV_INTERN
  160. rec_t*
  161. btr_get_next_user_rec(
  162. /*==================*/
  163. rec_t* rec, /*!< in: record on leaf level */
  164. mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if
  165. needed, also to the next page */
  166. /**************************************************************//**
  167. Releases the latch on a leaf page and bufferunfixes it. */
  168. UNIV_INLINE
  169. void
  170. btr_leaf_page_release(
  171. /*==================*/
  172. buf_block_t* block, /*!< in: buffer block */
  173. ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or
  174. BTR_MODIFY_LEAF */
  175. mtr_t* mtr); /*!< in: mtr */
  176. /**************************************************************//**
  177. Gets the child node file address in a node pointer.
  178. NOTE: the offsets array must contain all offsets for the record since
  179. we read the last field according to offsets and assume that it contains
  180. the child page number. In other words offsets must have been retrieved
  181. with rec_get_offsets(n_fields=ULINT_UNDEFINED).
  182. @return child node address */
  183. UNIV_INLINE
  184. ulint
  185. btr_node_ptr_get_child_page_no(
  186. /*===========================*/
  187. const rec_t* rec, /*!< in: node pointer record */
  188. const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
  189. /************************************************************//**
  190. Creates the root node for a new index tree.
  191. @return page number of the created root, FIL_NULL if did not succeed */
  192. UNIV_INTERN
  193. ulint
  194. btr_create(
  195. /*=======*/
  196. ulint type, /*!< in: type of the index */
  197. ulint space, /*!< in: space where created */
  198. ulint zip_size,/*!< in: compressed page size in bytes
  199. or 0 for uncompressed pages */
  200. dulint index_id,/*!< in: index id */
  201. dict_index_t* index, /*!< in: index */
  202. mtr_t* mtr); /*!< in: mini-transaction handle */
  203. /************************************************************//**
  204. Frees a B-tree except the root page, which MUST be freed after this
  205. by calling btr_free_root. */
  206. UNIV_INTERN
  207. void
  208. btr_free_but_not_root(
  209. /*==================*/
  210. ulint space, /*!< in: space where created */
  211. ulint zip_size, /*!< in: compressed page size in bytes
  212. or 0 for uncompressed pages */
  213. ulint root_page_no); /*!< in: root page number */
  214. /************************************************************//**
  215. Frees the B-tree root page. Other tree MUST already have been freed. */
  216. UNIV_INTERN
  217. void
  218. btr_free_root(
  219. /*==========*/
  220. ulint space, /*!< in: space where created */
  221. ulint zip_size, /*!< in: compressed page size in bytes
  222. or 0 for uncompressed pages */
  223. ulint root_page_no, /*!< in: root page number */
  224. mtr_t* mtr); /*!< in: a mini-transaction which has already
  225. been started */
  226. /*************************************************************//**
  227. Makes tree one level higher by splitting the root, and inserts
  228. the tuple. It is assumed that mtr contains an x-latch on the tree.
  229. NOTE that the operation of this function must always succeed,
  230. we cannot reverse it: therefore enough free disk space must be
  231. guaranteed to be available before this function is called.
  232. @return inserted record */
  233. UNIV_INTERN
  234. rec_t*
  235. btr_root_raise_and_insert(
  236. /*======================*/
  237. btr_cur_t* cursor, /*!< in: cursor at which to insert: must be
  238. on the root page; when the function returns,
  239. the cursor is positioned on the predecessor
  240. of the inserted record */
  241. const dtuple_t* tuple, /*!< in: tuple to insert */
  242. ulint n_ext, /*!< in: number of externally stored columns */
  243. mtr_t* mtr); /*!< in: mtr */
  244. /*************************************************************//**
  245. Reorganizes an index page.
  246. IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf
  247. page of a non-clustered index, the caller must update the insert
  248. buffer free bits in the same mini-transaction in such a way that the
  249. modification will be redo-logged.
  250. @return TRUE on success, FALSE on failure */
  251. UNIV_INTERN
  252. ibool
  253. btr_page_reorganize(
  254. /*================*/
  255. buf_block_t* block, /*!< in: page to be reorganized */
  256. dict_index_t* index, /*!< in: record descriptor */
  257. mtr_t* mtr); /*!< in: mtr */
  258. /*************************************************************//**
  259. Decides if the page should be split at the convergence point of
  260. inserts converging to left.
  261. @return TRUE if split recommended */
  262. UNIV_INTERN
  263. ibool
  264. btr_page_get_split_rec_to_left(
  265. /*===========================*/
  266. btr_cur_t* cursor, /*!< in: cursor at which to insert */
  267. rec_t** split_rec);/*!< out: if split recommended,
  268. the first record on upper half page,
  269. or NULL if tuple should be first */
  270. /*************************************************************//**
  271. Decides if the page should be split at the convergence point of
  272. inserts converging to right.
  273. @return TRUE if split recommended */
  274. UNIV_INTERN
  275. ibool
  276. btr_page_get_split_rec_to_right(
  277. /*============================*/
  278. btr_cur_t* cursor, /*!< in: cursor at which to insert */
  279. rec_t** split_rec);/*!< out: if split recommended,
  280. the first record on upper half page,
  281. or NULL if tuple should be first */
  282. /*************************************************************//**
  283. Splits an index page to halves and inserts the tuple. It is assumed
  284. that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is
  285. released within this function! NOTE that the operation of this
  286. function must always succeed, we cannot reverse it: therefore enough
  287. free disk space (2 pages) must be guaranteed to be available before
  288. this function is called.
  289. @return inserted record */
  290. UNIV_INTERN
  291. rec_t*
  292. btr_page_split_and_insert(
  293. /*======================*/
  294. btr_cur_t* cursor, /*!< in: cursor at which to insert; when the
  295. function returns, the cursor is positioned
  296. on the predecessor of the inserted record */
  297. const dtuple_t* tuple, /*!< in: tuple to insert */
  298. ulint n_ext, /*!< in: number of externally stored columns */
  299. mtr_t* mtr); /*!< in: mtr */
  300. /*******************************************************//**
  301. Inserts a data tuple to a tree on a non-leaf level. It is assumed
  302. that mtr holds an x-latch on the tree. */
  303. UNIV_INTERN
  304. void
  305. btr_insert_on_non_leaf_level_func(
  306. /*==============================*/
  307. dict_index_t* index, /*!< in: index */
  308. ulint level, /*!< in: level, must be > 0 */
  309. dtuple_t* tuple, /*!< in: the record to be inserted */
  310. const char* file, /*!< in: file name */
  311. ulint line, /*!< in: line where called */
  312. mtr_t* mtr); /*!< in: mtr */
  313. # define btr_insert_on_non_leaf_level(i,l,t,m) \
  314. btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m)
  315. #endif /* !UNIV_HOTBACKUP */
  316. /****************************************************************//**
  317. Sets a record as the predefined minimum record. */
  318. UNIV_INTERN
  319. void
  320. btr_set_min_rec_mark(
  321. /*=================*/
  322. rec_t* rec, /*!< in/out: record */
  323. mtr_t* mtr); /*!< in: mtr */
  324. #ifndef UNIV_HOTBACKUP
  325. /*************************************************************//**
  326. Deletes on the upper level the node pointer to a page. */
  327. UNIV_INTERN
  328. void
  329. btr_node_ptr_delete(
  330. /*================*/
  331. dict_index_t* index, /*!< in: index tree */
  332. buf_block_t* block, /*!< in: page whose node pointer is deleted */
  333. mtr_t* mtr); /*!< in: mtr */
  334. #ifdef UNIV_DEBUG
  335. /************************************************************//**
  336. Checks that the node pointer to a page is appropriate.
  337. @return TRUE */
  338. UNIV_INTERN
  339. ibool
  340. btr_check_node_ptr(
  341. /*===============*/
  342. dict_index_t* index, /*!< in: index tree */
  343. buf_block_t* block, /*!< in: index page */
  344. mtr_t* mtr); /*!< in: mtr */
  345. #endif /* UNIV_DEBUG */
  346. /*************************************************************//**
  347. Tries to merge the page first to the left immediate brother if such a
  348. brother exists, and the node pointers to the current page and to the
  349. brother reside on the same page. If the left brother does not satisfy these
  350. conditions, looks at the right brother. If the page is the only one on that
  351. level lifts the records of the page to the father page, thus reducing the
  352. tree height. It is assumed that mtr holds an x-latch on the tree and on the
  353. page. If cursor is on the leaf level, mtr must also hold x-latches to
  354. the brothers, if they exist.
  355. @return TRUE on success */
  356. UNIV_INTERN
  357. ibool
  358. btr_compress(
  359. /*=========*/
  360. btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift;
  361. the page must not be empty: in record delete
  362. use btr_discard_page if the page would become
  363. empty */
  364. mtr_t* mtr); /*!< in: mtr */
  365. /*************************************************************//**
  366. Discards a page from a B-tree. This is used to remove the last record from
  367. a B-tree page: the whole page must be removed at the same time. This cannot
  368. be used for the root page, which is allowed to be empty. */
  369. UNIV_INTERN
  370. void
  371. btr_discard_page(
  372. /*=============*/
  373. btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on
  374. the root page */
  375. mtr_t* mtr); /*!< in: mtr */
  376. #endif /* !UNIV_HOTBACKUP */
  377. /****************************************************************//**
  378. Parses the redo log record for setting an index record as the predefined
  379. minimum record.
  380. @return end of log record or NULL */
  381. UNIV_INTERN
  382. byte*
  383. btr_parse_set_min_rec_mark(
  384. /*=======================*/
  385. byte* ptr, /*!< in: buffer */
  386. byte* end_ptr,/*!< in: buffer end */
  387. ulint comp, /*!< in: nonzero=compact page format */
  388. page_t* page, /*!< in: page or NULL */
  389. mtr_t* mtr); /*!< in: mtr or NULL */
  390. /***********************************************************//**
  391. Parses a redo log record of reorganizing a page.
  392. @return end of log record or NULL */
  393. UNIV_INTERN
  394. byte*
  395. btr_parse_page_reorganize(
  396. /*======================*/
  397. byte* ptr, /*!< in: buffer */
  398. byte* end_ptr,/*!< in: buffer end */
  399. dict_index_t* index, /*!< in: record descriptor */
  400. buf_block_t* block, /*!< in: page to be reorganized, or NULL */
  401. mtr_t* mtr); /*!< in: mtr or NULL */
  402. #ifndef UNIV_HOTBACKUP
  403. /**************************************************************//**
  404. Gets the number of pages in a B-tree.
  405. @return number of pages */
  406. UNIV_INTERN
  407. ulint
  408. btr_get_size(
  409. /*=========*/
  410. dict_index_t* index, /*!< in: index */
  411. ulint flag); /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
  412. /**************************************************************//**
  413. Allocates a new file page to be used in an index tree. NOTE: we assume
  414. that the caller has made the reservation for free extents!
  415. @return new allocated block, x-latched; NULL if out of space */
  416. UNIV_INTERN
  417. buf_block_t*
  418. btr_page_alloc(
  419. /*===========*/
  420. dict_index_t* index, /*!< in: index tree */
  421. ulint hint_page_no, /*!< in: hint of a good page */
  422. byte file_direction, /*!< in: direction where a possible
  423. page split is made */
  424. ulint level, /*!< in: level where the page is placed
  425. in the tree */
  426. mtr_t* mtr); /*!< in: mtr */
  427. /**************************************************************//**
  428. Frees a file page used in an index tree. NOTE: cannot free field external
  429. storage pages because the page must contain info on its level. */
  430. UNIV_INTERN
  431. void
  432. btr_page_free(
  433. /*==========*/
  434. dict_index_t* index, /*!< in: index tree */
  435. buf_block_t* block, /*!< in: block to be freed, x-latched */
  436. mtr_t* mtr); /*!< in: mtr */
  437. /**************************************************************//**
  438. Frees a file page used in an index tree. Can be used also to BLOB
  439. external storage pages, because the page level 0 can be given as an
  440. argument. */
  441. UNIV_INTERN
  442. void
  443. btr_page_free_low(
  444. /*==============*/
  445. dict_index_t* index, /*!< in: index tree */
  446. buf_block_t* block, /*!< in: block to be freed, x-latched */
  447. ulint level, /*!< in: page level */
  448. mtr_t* mtr); /*!< in: mtr */
  449. #ifdef UNIV_BTR_PRINT
  450. /*************************************************************//**
  451. Prints size info of a B-tree. */
  452. UNIV_INTERN
  453. void
  454. btr_print_size(
  455. /*===========*/
  456. dict_index_t* index); /*!< in: index tree */
  457. /**************************************************************//**
  458. Prints directories and other info of all nodes in the index. */
  459. UNIV_INTERN
  460. void
  461. btr_print_index(
  462. /*============*/
  463. dict_index_t* index, /*!< in: index */
  464. ulint width); /*!< in: print this many entries from start
  465. and end */
  466. #endif /* UNIV_BTR_PRINT */
  467. /************************************************************//**
  468. Checks the size and number of fields in a record based on the definition of
  469. the index.
  470. @return TRUE if ok */
  471. UNIV_INTERN
  472. ibool
  473. btr_index_rec_validate(
  474. /*===================*/
  475. const rec_t* rec, /*!< in: index record */
  476. const dict_index_t* index, /*!< in: index */
  477. ibool dump_on_error); /*!< in: TRUE if the function
  478. should print hex dump of record
  479. and page on error */
  480. /**************************************************************//**
  481. Checks the consistency of an index tree.
  482. @return TRUE if ok */
  483. UNIV_INTERN
  484. ibool
  485. btr_validate_index(
  486. /*===============*/
  487. dict_index_t* index, /*!< in: index */
  488. trx_t* trx); /*!< in: transaction or NULL */
  489. #define BTR_N_LEAF_PAGES 1
  490. #define BTR_TOTAL_SIZE 2
  491. #endif /* !UNIV_HOTBACKUP */
  492. #ifndef UNIV_NONINL
  493. #include "btr0btr.ic"
  494. #endif
  495. #endif