You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

387 lines
12 KiB

17 years ago
17 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
17 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
  1. /*****************************************************************************
  2. Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
  3. This program is free software; you can redistribute it and/or modify it under
  4. the terms of the GNU General Public License as published by the Free Software
  5. Foundation; version 2 of the License.
  6. This program is distributed in the hope that it will be useful, but WITHOUT
  7. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  8. FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  9. You should have received a copy of the GNU General Public License along with
  10. this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  11. Place, Suite 330, Boston, MA 02111-1307 USA
  12. *****************************************************************************/
  13. /**************************************************//**
  14. @file include/page0zip.ic
  15. Compressed page interface
  16. Created June 2005 by Marko Makela
  17. *******************************************************/
  18. #ifdef UNIV_MATERIALIZE
  19. # undef UNIV_INLINE
  20. # define UNIV_INLINE
  21. #endif
  22. #include "page0zip.h"
  23. #include "page0page.h"
  24. /* The format of compressed pages is as follows.
  25. The header and trailer of the uncompressed pages, excluding the page
  26. directory in the trailer, are copied as is to the header and trailer
  27. of the compressed page.
  28. At the end of the compressed page, there is a dense page directory
  29. pointing to every user record contained on the page, including deleted
  30. records on the free list. The dense directory is indexed in the
  31. collation order, i.e., in the order in which the record list is
  32. linked on the uncompressed page. The infimum and supremum records are
  33. excluded. The two most significant bits of the entries are allocated
  34. for the delete-mark and an n_owned flag indicating the last record in
  35. a chain of records pointed to from the sparse page directory on the
  36. uncompressed page.
  37. The data between PAGE_ZIP_START and the last page directory entry will
  38. be written in compressed format, starting at offset PAGE_DATA.
  39. Infimum and supremum records are not stored. We exclude the
  40. REC_N_NEW_EXTRA_BYTES in every record header. These can be recovered
  41. from the dense page directory stored at the end of the compressed
  42. page.
  43. The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and
  44. roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of
  45. externally stored columns are stored separately, in ascending order of
  46. heap_no and column index, starting backwards from the dense page
  47. directory.
  48. The compressed data stream may be followed by a modification log
  49. covering the compressed portion of the page, as follows.
  50. MODIFICATION LOG ENTRY FORMAT
  51. - write record:
  52. - (heap_no - 1) << 1 (1..2 bytes)
  53. - extra bytes backwards
  54. - data bytes
  55. - clear record:
  56. - (heap_no - 1) << 1 | 1 (1..2 bytes)
  57. The integer values are stored in a variable-length format:
  58. - 0xxxxxxx: 0..127
  59. - 1xxxxxxx xxxxxxxx: 0..32767
  60. The end of the modification log is marked by a 0 byte.
  61. In summary, the compressed page looks like this:
  62. (1) Uncompressed page header (PAGE_DATA bytes)
  63. (2) Compressed index information
  64. (3) Compressed page data
  65. (4) Page modification log (page_zip->m_start..page_zip->m_end)
  66. (5) Empty zero-filled space
  67. (6) BLOB pointers (on leaf pages)
  68. - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column
  69. - in descending collation order
  70. (7) Uncompressed columns of user records, n_dense * uncompressed_size bytes,
  71. - indexed by heap_no
  72. - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes
  73. - REC_NODE_PTR_SIZE for non-leaf pages
  74. - 0 otherwise
  75. (8) dense page directory, stored backwards
  76. - n_dense = n_heap - 2
  77. - existing records in ascending collation order
  78. - deleted records (free list) in link order
  79. */
  80. /** Start offset of the area that will be compressed */
  81. #define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END
  82. /** Size of an compressed page directory entry */
  83. #define PAGE_ZIP_DIR_SLOT_SIZE 2
  84. /** Mask of record offsets */
  85. #define PAGE_ZIP_DIR_SLOT_MASK 0x3fff
  86. /** 'owned' flag */
  87. #define PAGE_ZIP_DIR_SLOT_OWNED 0x4000
  88. /** 'deleted' flag */
  89. #define PAGE_ZIP_DIR_SLOT_DEL 0x8000
  90. /**********************************************************************//**
  91. Determine the size of a compressed page in bytes.
  92. @return size in bytes */
  93. UNIV_INLINE
  94. ulint
  95. page_zip_get_size(
  96. /*==============*/
  97. const page_zip_des_t* page_zip) /*!< in: compressed page */
  98. {
  99. ulint size;
  100. if (UNIV_UNLIKELY(!page_zip->ssize)) {
  101. return(0);
  102. }
  103. size = (PAGE_ZIP_MIN_SIZE >> 1) << page_zip->ssize;
  104. ut_ad(size >= PAGE_ZIP_MIN_SIZE);
  105. ut_ad(size <= UNIV_PAGE_SIZE);
  106. return(size);
  107. }
  108. /**********************************************************************//**
  109. Set the size of a compressed page in bytes. */
  110. UNIV_INLINE
  111. void
  112. page_zip_set_size(
  113. /*==============*/
  114. page_zip_des_t* page_zip, /*!< in/out: compressed page */
  115. ulint size) /*!< in: size in bytes */
  116. {
  117. if (size) {
  118. int ssize;
  119. ut_ad(ut_is_2pow(size));
  120. for (ssize = 1; size > (ulint) (512 << ssize); ssize++) {
  121. }
  122. page_zip->ssize = ssize;
  123. } else {
  124. page_zip->ssize = 0;
  125. }
  126. ut_ad(page_zip_get_size(page_zip) == size);
  127. }
  128. #ifndef UNIV_HOTBACKUP
  129. /**********************************************************************//**
  130. Determine if a record is so big that it needs to be stored externally.
  131. @return FALSE if the entire record can be stored locally on the page */
  132. UNIV_INLINE
  133. ibool
  134. page_zip_rec_needs_ext(
  135. /*===================*/
  136. ulint rec_size, /*!< in: length of the record in bytes */
  137. ulint comp, /*!< in: nonzero=compact format */
  138. ulint n_fields, /*!< in: number of fields in the record;
  139. ignored if zip_size == 0 */
  140. ulint zip_size) /*!< in: compressed page size in bytes, or 0 */
  141. {
  142. ut_ad(rec_size > comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES);
  143. ut_ad(ut_is_2pow(zip_size));
  144. ut_ad(comp || !zip_size);
  145. #if UNIV_PAGE_SIZE > REC_MAX_DATA_SIZE
  146. if (UNIV_UNLIKELY(rec_size >= REC_MAX_DATA_SIZE)) {
  147. return(TRUE);
  148. }
  149. #endif
  150. if (UNIV_UNLIKELY(zip_size)) {
  151. ut_ad(comp);
  152. /* On a compressed page, there is a two-byte entry in
  153. the dense page directory for every record. But there
  154. is no record header. There should be enough room for
  155. one record on an empty leaf page. Subtract 1 byte for
  156. the encoded heap number. Check also the available space
  157. on the uncompressed page. */
  158. return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2 - 1)
  159. >= page_zip_empty_size(n_fields, zip_size)
  160. || rec_size >= page_get_free_space_of_empty(TRUE) / 2);
  161. }
  162. return(rec_size >= page_get_free_space_of_empty(comp) / 2);
  163. }
  164. #endif /* !UNIV_HOTBACKUP */
  165. #ifdef UNIV_DEBUG
  166. /**********************************************************************//**
  167. Validate a compressed page descriptor.
  168. @return TRUE if ok */
  169. UNIV_INLINE
  170. ibool
  171. page_zip_simple_validate(
  172. /*=====================*/
  173. const page_zip_des_t* page_zip)/*!< in: compressed page descriptor */
  174. {
  175. ut_ad(page_zip);
  176. ut_ad(page_zip->data);
  177. ut_ad(page_zip->ssize < PAGE_ZIP_NUM_SSIZE);
  178. ut_ad(page_zip_get_size(page_zip)
  179. > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE);
  180. ut_ad(page_zip->m_start <= page_zip->m_end);
  181. ut_ad(page_zip->m_end < page_zip_get_size(page_zip));
  182. ut_ad(page_zip->n_blobs
  183. < page_zip_get_size(page_zip) / BTR_EXTERN_FIELD_REF_SIZE);
  184. return(TRUE);
  185. }
  186. #endif /* UNIV_DEBUG */
  187. /**********************************************************************//**
  188. Determine if the length of the page trailer.
  189. @return length of the page trailer, in bytes, not including the
  190. terminating zero byte of the modification log */
  191. UNIV_INLINE
  192. ibool
  193. page_zip_get_trailer_len(
  194. /*=====================*/
  195. const page_zip_des_t* page_zip,/*!< in: compressed page */
  196. ibool is_clust)/*!< in: TRUE if clustered index */
  197. {
  198. ulint uncompressed_size;
  199. ut_ad(page_zip_simple_validate(page_zip));
  200. UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
  201. if (UNIV_UNLIKELY(!page_is_leaf(page_zip->data))) {
  202. uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
  203. + REC_NODE_PTR_SIZE;
  204. ut_ad(!page_zip->n_blobs);
  205. } else if (UNIV_UNLIKELY(is_clust)) {
  206. uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
  207. + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
  208. } else {
  209. uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE;
  210. ut_ad(!page_zip->n_blobs);
  211. }
  212. return((page_dir_get_n_heap(page_zip->data) - 2)
  213. * uncompressed_size
  214. + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE);
  215. }
  216. /**********************************************************************//**
  217. Determine how big record can be inserted without recompressing the page.
  218. @return a positive number indicating the maximum size of a record
  219. whose insertion is guaranteed to succeed, or zero or negative */
  220. UNIV_INLINE
  221. lint
  222. page_zip_max_ins_size(
  223. /*==================*/
  224. const page_zip_des_t* page_zip,/*!< in: compressed page */
  225. ibool is_clust)/*!< in: TRUE if clustered index */
  226. {
  227. ulint trailer_len;
  228. trailer_len = page_zip_get_trailer_len(page_zip, is_clust);
  229. /* When a record is created, a pointer may be added to
  230. the dense directory.
  231. Likewise, space for the columns that will not be
  232. compressed will be allocated from the page trailer.
  233. Also the BLOB pointers will be allocated from there, but
  234. we may as well count them in the length of the record. */
  235. trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
  236. return((lint) page_zip_get_size(page_zip)
  237. - trailer_len - page_zip->m_end
  238. - (REC_N_NEW_EXTRA_BYTES - 2));
  239. }
  240. /**********************************************************************//**
  241. Determine if enough space is available in the modification log.
  242. @return TRUE if enough space is available */
  243. UNIV_INLINE
  244. ibool
  245. page_zip_available(
  246. /*===============*/
  247. const page_zip_des_t* page_zip,/*!< in: compressed page */
  248. ibool is_clust,/*!< in: TRUE if clustered index */
  249. ulint length, /*!< in: combined size of the record */
  250. ulint create) /*!< in: nonzero=add the record to
  251. the heap */
  252. {
  253. ulint trailer_len;
  254. ut_ad(length > REC_N_NEW_EXTRA_BYTES);
  255. trailer_len = page_zip_get_trailer_len(page_zip, is_clust);
  256. /* Subtract the fixed extra bytes and add the maximum
  257. space needed for identifying the record (encoded heap_no). */
  258. length -= REC_N_NEW_EXTRA_BYTES - 2;
  259. if (UNIV_UNLIKELY(create)) {
  260. /* When a record is created, a pointer may be added to
  261. the dense directory.
  262. Likewise, space for the columns that will not be
  263. compressed will be allocated from the page trailer.
  264. Also the BLOB pointers will be allocated from there, but
  265. we may as well count them in the length of the record. */
  266. trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
  267. }
  268. return(UNIV_LIKELY(length
  269. + trailer_len
  270. + page_zip->m_end
  271. < page_zip_get_size(page_zip)));
  272. }
  273. /**********************************************************************//**
  274. Initialize a compressed page descriptor. */
  275. UNIV_INLINE
  276. void
  277. page_zip_des_init(
  278. /*==============*/
  279. page_zip_des_t* page_zip) /*!< in/out: compressed page
  280. descriptor */
  281. {
  282. memset(page_zip, 0, sizeof *page_zip);
  283. }
  284. /**********************************************************************//**
  285. Write a log record of writing to the uncompressed header portion of a page. */
  286. UNIV_INTERN
  287. void
  288. page_zip_write_header_log(
  289. /*======================*/
  290. const byte* data,/*!< in: data on the uncompressed page */
  291. ulint length, /*!< in: length of the data */
  292. mtr_t* mtr); /*!< in: mini-transaction */
  293. /**********************************************************************//**
  294. Write data to the uncompressed header portion of a page. The data must
  295. already have been written to the uncompressed page.
  296. However, the data portion of the uncompressed page may differ from
  297. the compressed page when a record is being inserted in
  298. page_cur_insert_rec_zip(). */
  299. UNIV_INLINE
  300. void
  301. page_zip_write_header(
  302. /*==================*/
  303. page_zip_des_t* page_zip,/*!< in/out: compressed page */
  304. const byte* str, /*!< in: address on the uncompressed page */
  305. ulint length, /*!< in: length of the data */
  306. mtr_t* mtr) /*!< in: mini-transaction, or NULL */
  307. {
  308. ulint pos;
  309. ut_ad(PAGE_ZIP_MATCH(str, page_zip));
  310. ut_ad(page_zip_simple_validate(page_zip));
  311. UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
  312. pos = page_offset(str);
  313. ut_ad(pos < PAGE_DATA);
  314. memcpy(page_zip->data + pos, str, length);
  315. /* The following would fail in page_cur_insert_rec_zip(). */
  316. /* ut_ad(page_zip_validate(page_zip, str - pos)); */
  317. if (UNIV_LIKELY_NULL(mtr)) {
  318. #ifndef UNIV_HOTBACKUP
  319. page_zip_write_header_log(str, length, mtr);
  320. #endif /* !UNIV_HOTBACKUP */
  321. }
  322. }
  323. #ifdef UNIV_MATERIALIZE
  324. # undef UNIV_INLINE
  325. # define UNIV_INLINE UNIV_INLINE_ORIGINAL
  326. #endif