mirror of https://github.com/MariaDB/server
				
				
			
			You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							337 lines
						
					
					
						
							10 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							337 lines
						
					
					
						
							10 KiB
						
					
					
				
								/******************************************************
							 | 
						|
								Compressed page interface
							 | 
						|
								
							 | 
						|
								(c) 2005 Innobase Oy
							 | 
						|
								
							 | 
						|
								Created June 2005 by Marko Makela
							 | 
						|
								*******************************************************/
							 | 
						|
								
							 | 
						|
								#ifdef UNIV_MATERIALIZE
							 | 
						|
								# undef UNIV_INLINE
							 | 
						|
								# define UNIV_INLINE
							 | 
						|
								#endif
							 | 
						|
								
							 | 
						|
								#include "page0zip.h"
							 | 
						|
								#include "page0page.h"
							 | 
						|
								
							 | 
						|
								/* The format of compressed pages is as follows.
							 | 
						|
								
							 | 
						|
								The header and trailer of the uncompressed pages, excluding the page
							 | 
						|
								directory in the trailer, are copied as is to the header and trailer
							 | 
						|
								of the compressed page.
							 | 
						|
								
							 | 
						|
								At the end of the compressed page, there is a dense page directory
							 | 
						|
								pointing to every user record contained on the page, including deleted
							 | 
						|
								records on the free list.  The dense directory is indexed in the
							 | 
						|
								collation order, i.e., in the order in which the record list is
							 | 
						|
								linked on the uncompressed page.  The infimum and supremum records are
							 | 
						|
								excluded.  The two most significant bits of the entries are allocated
							 | 
						|
								for the delete-mark and an n_owned flag indicating the last record in
							 | 
						|
								a chain of records pointed to from the sparse page directory on the
							 | 
						|
								uncompressed page.
							 | 
						|
								
							 | 
						|
								The data between PAGE_ZIP_START and the last page directory entry will
							 | 
						|
								be written in compressed format, starting at offset PAGE_DATA.
							 | 
						|
								Infimum and supremum records are not stored.  We exclude the
							 | 
						|
								REC_N_NEW_EXTRA_BYTES in every record header.  These can be recovered
							 | 
						|
								from the dense page directory stored at the end of the compressed
							 | 
						|
								page.
							 | 
						|
								
							 | 
						|
								The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and
							 | 
						|
								roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of
							 | 
						|
								externally stored columns are stored separately, in ascending order of
							 | 
						|
								heap_no and column index, starting backwards from the dense page
							 | 
						|
								directory.
							 | 
						|
								
							 | 
						|
								The compressed data stream may be followed by a modification log
							 | 
						|
								covering the compressed portion of the page, as follows.
							 | 
						|
								
							 | 
						|
								MODIFICATION LOG ENTRY FORMAT
							 | 
						|
								- write record:
							 | 
						|
								  - (heap_no - 1) << 1 (1..2 bytes)
							 | 
						|
								  - extra bytes backwards
							 | 
						|
								  - data bytes
							 | 
						|
								- clear record:
							 | 
						|
								  - (heap_no - 1) << 1 | 1 (1..2 bytes)
							 | 
						|
								
							 | 
						|
								The integer values are stored in a variable-length format:
							 | 
						|
								- 0xxxxxxx: 0..127
							 | 
						|
								- 1xxxxxxx xxxxxxxx: 0..32767
							 | 
						|
								
							 | 
						|
								The end of the modification log is marked by a 0 byte.
							 | 
						|
								
							 | 
						|
								In summary, the compressed page looks like this:
							 | 
						|
								
							 | 
						|
								(1) Uncompressed page header (PAGE_DATA bytes)
							 | 
						|
								(2) Compressed index information
							 | 
						|
								(3) Compressed page data
							 | 
						|
								(4) Page modification log (page_zip->m_start..page_zip->m_end)
							 | 
						|
								(5) Empty zero-filled space
							 | 
						|
								(6) BLOB pointers (on leaf pages)
							 | 
						|
								  - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column
							 | 
						|
								  - in descending collation order
							 | 
						|
								(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes,
							 | 
						|
								  - indexed by heap_no
							 | 
						|
								  - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes
							 | 
						|
								  - REC_NODE_PTR_SIZE for non-leaf pages
							 | 
						|
								  - 0 otherwise
							 | 
						|
								(8) dense page directory, stored backwards
							 | 
						|
								  - n_dense = n_heap - 2
							 | 
						|
								  - existing records in ascending collation order
							 | 
						|
								  - deleted records (free list) in link order
							 | 
						|
								*/
							 | 
						|
								
							 | 
						|
								/* Start offset of the area that will be compressed */
							 | 
						|
								#define PAGE_ZIP_START		PAGE_NEW_SUPREMUM_END
							 | 
						|
								/* Size of an compressed page directory entry */
							 | 
						|
								#define PAGE_ZIP_DIR_SLOT_SIZE	2
							 | 
						|
								/* Mask of record offsets */
							 | 
						|
								#define PAGE_ZIP_DIR_SLOT_MASK	0x3fff
							 | 
						|
								/* 'owned' flag */
							 | 
						|
								#define PAGE_ZIP_DIR_SLOT_OWNED	0x4000
							 | 
						|
								/* 'deleted' flag */
							 | 
						|
								#define PAGE_ZIP_DIR_SLOT_DEL	0x8000
							 | 
						|
								
							 | 
						|
								/**************************************************************************
							 | 
						|
								Determine if a record is so big that it needs to be stored externally. */
							 | 
						|
								UNIV_INLINE
							 | 
						|
								ibool
							 | 
						|
								page_zip_rec_needs_ext(
							 | 
						|
								/*===================*/
							 | 
						|
												/* out: FALSE if the entire record
							 | 
						|
												can be stored locally on the page */
							 | 
						|
									ulint	rec_size,	/* in: length of the record in bytes */
							 | 
						|
									ulint	comp,		/* in: nonzero=compact format */
							 | 
						|
									ulint	zip_size)	/* in: compressed page size in bytes, or 0 */
							 | 
						|
								{
							 | 
						|
									ut_ad(rec_size > comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES);
							 | 
						|
									ut_ad(ut_is_2pow(zip_size));
							 | 
						|
								
							 | 
						|
								#if UNIV_PAGE_SIZE > REC_MAX_DATA_SIZE
							 | 
						|
									if (UNIV_UNLIKELY(rec_size >= REC_MAX_DATA_SIZE)) {
							 | 
						|
										return(TRUE);
							 | 
						|
									}
							 | 
						|
								#endif
							 | 
						|
								
							 | 
						|
									if (UNIV_UNLIKELY(!comp)) {
							 | 
						|
										ut_ad(!zip_size);
							 | 
						|
										return(rec_size >= page_get_free_space_of_empty(FALSE) / 2);
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									/* If zip_size != 0, the record should fit on the compressed page.
							 | 
						|
									If not, the right-hand-side of the comparison will overwrap
							 | 
						|
									and the condition will not hold.  Thus, we do not need to test
							 | 
						|
									for zip_size != 0.  We subtract the size of the page header and
							 | 
						|
									assume that compressing the index information takes 50 bytes. */
							 | 
						|
									if (rec_size >= zip_size - (PAGE_DATA + 50)) {
							 | 
						|
										return(TRUE);
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return(rec_size >= page_get_free_space_of_empty(TRUE) / 2);
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								#ifdef UNIV_DEBUG
							 | 
						|
								/**************************************************************************
							 | 
						|
								Validate a compressed page descriptor. */
							 | 
						|
								UNIV_INLINE
							 | 
						|
								ibool
							 | 
						|
								page_zip_simple_validate(
							 | 
						|
								/*=====================*/
							 | 
						|
													/* out: TRUE if ok */
							 | 
						|
									const page_zip_des_t*	page_zip)/* in: compressed page descriptor */
							 | 
						|
								{
							 | 
						|
									ut_ad(page_zip);
							 | 
						|
									ut_ad(page_zip->data);
							 | 
						|
									ut_ad(ut_is_2pow(page_zip->size));
							 | 
						|
									ut_ad(page_zip->size <= UNIV_PAGE_SIZE);
							 | 
						|
									ut_ad(page_zip->size > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE);
							 | 
						|
									ut_ad(page_zip->m_start <= page_zip->m_end);
							 | 
						|
									ut_ad(page_zip->m_end < page_zip->size);
							 | 
						|
									ut_ad(page_zip->n_blobs < page_zip->size / BTR_EXTERN_FIELD_REF_SIZE);
							 | 
						|
									return(TRUE);
							 | 
						|
								}
							 | 
						|
								#endif /* UNIV_DEBUG */
							 | 
						|
								
							 | 
						|
								/**************************************************************************
							 | 
						|
								Determine if the length of the page trailer. */
							 | 
						|
								UNIV_INLINE
							 | 
						|
								ibool
							 | 
						|
								page_zip_get_trailer_len(
							 | 
						|
								/*=====================*/
							 | 
						|
													/* out: length of the page trailer,
							 | 
						|
													in bytes, not including the terminating
							 | 
						|
													zero byte of the modification log */
							 | 
						|
									const page_zip_des_t*	page_zip,/* in: compressed page */
							 | 
						|
									dict_index_t*		index,	/* in: index of the B-tree node */
							 | 
						|
									ulint*			entry_size)/* out: size of the uncompressed
							 | 
						|
													portion of a user record */
							 | 
						|
								{
							 | 
						|
									ulint	uncompressed_size;
							 | 
						|
								
							 | 
						|
									ut_ad(page_zip_simple_validate(page_zip));
							 | 
						|
								
							 | 
						|
									if (UNIV_UNLIKELY(!page_is_leaf(page_zip->data))) {
							 | 
						|
										uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
							 | 
						|
											+ REC_NODE_PTR_SIZE;
							 | 
						|
										ut_ad(!page_zip->n_blobs);
							 | 
						|
									} else if (dict_index_is_clust(index)) {
							 | 
						|
										uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
							 | 
						|
											+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
							 | 
						|
									} else {
							 | 
						|
										uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE;
							 | 
						|
										ut_ad(!page_zip->n_blobs);
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									if (entry_size) {
							 | 
						|
										*entry_size = uncompressed_size;
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return((page_dir_get_n_heap(page_zip->data) - 2)
							 | 
						|
									       * uncompressed_size
							 | 
						|
									       + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE);
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								/**************************************************************************
							 | 
						|
								Determine if enough space is available in the modification log. */
							 | 
						|
								UNIV_INLINE
							 | 
						|
								ibool
							 | 
						|
								page_zip_available(
							 | 
						|
								/*===============*/
							 | 
						|
													/* out: TRUE if enough space
							 | 
						|
													is available */
							 | 
						|
									const page_zip_des_t*	page_zip,/* in: compressed page */
							 | 
						|
									dict_index_t*		index,	/* in: index of the B-tree node */
							 | 
						|
									ulint			length,	/* in: combined size of the record */
							 | 
						|
									ulint			create)	/* in: nonzero=add the record to
							 | 
						|
													the heap */
							 | 
						|
								{
							 | 
						|
									ulint	uncompressed_size;
							 | 
						|
									ulint	trailer_len;
							 | 
						|
								
							 | 
						|
									ut_ad(length > REC_N_NEW_EXTRA_BYTES);
							 | 
						|
								
							 | 
						|
									trailer_len = page_zip_get_trailer_len(page_zip, index,
							 | 
						|
													       &uncompressed_size);
							 | 
						|
								
							 | 
						|
									/* Subtract the fixed extra bytes and add the maximum
							 | 
						|
									space needed for identifying the record (encoded heap_no). */
							 | 
						|
									length -= REC_N_NEW_EXTRA_BYTES - 2;
							 | 
						|
								
							 | 
						|
									if (UNIV_UNLIKELY(create)) {
							 | 
						|
										/* When a record is created, a pointer may be added to
							 | 
						|
										the dense directory.
							 | 
						|
										Likewise, space for the columns that will not be
							 | 
						|
										compressed will be allocated from the page trailer.
							 | 
						|
										Also the BLOB pointers will be allocated from there, but
							 | 
						|
										we may as well count them in the length of the record. */
							 | 
						|
								
							 | 
						|
										trailer_len += uncompressed_size;
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									return(UNIV_LIKELY(length
							 | 
						|
											   + trailer_len
							 | 
						|
											   + page_zip->m_end
							 | 
						|
											   < page_zip->size));
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								/**************************************************************************
							 | 
						|
								Initialize a compressed page descriptor. */
							 | 
						|
								UNIV_INLINE
							 | 
						|
								void
							 | 
						|
								page_zip_des_init(
							 | 
						|
								/*==============*/
							 | 
						|
									page_zip_des_t*	page_zip)	/* in/out: compressed page
							 | 
						|
													descriptor */
							 | 
						|
								{
							 | 
						|
									memset(page_zip, 0, sizeof *page_zip);
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								/**************************************************************************
							 | 
						|
								Ensure that enough space is available in the modification log.
							 | 
						|
								If not, try to compress the page. */
							 | 
						|
								UNIV_INLINE
							 | 
						|
								ibool
							 | 
						|
								page_zip_alloc(
							 | 
						|
								/*===========*/
							 | 
						|
												/* out: TRUE if enough space is available */
							 | 
						|
									page_zip_des_t*	page_zip,/* in/out: compressed page;
							 | 
						|
												will only be modified if compression is needed
							 | 
						|
												and successful */
							 | 
						|
									const page_t*	page,	/* in: uncompressed page */
							 | 
						|
									dict_index_t*	index,	/* in: index of the B-tree node */
							 | 
						|
									ulint		length,	/* in: combined size of the record */
							 | 
						|
									ulint		create,	/* in: nonzero=add the record to the heap */
							 | 
						|
									mtr_t*		mtr)	/* in: mini-transaction, or NULL */
							 | 
						|
								{
							 | 
						|
									ut_ad(page_is_comp(page));
							 | 
						|
								#ifdef UNIV_ZIP_DEBUG
							 | 
						|
									ut_a(page_zip_validate(page_zip, page));
							 | 
						|
								#endif /* UNIV_ZIP_DEBUG */
							 | 
						|
								
							 | 
						|
									if (page_zip_available(page_zip, index, length, create)) {
							 | 
						|
										return(TRUE);
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									if (page_zip->m_start == page_zip->m_end) {
							 | 
						|
										/* The page has been freshly compressed, so
							 | 
						|
										recompressing it will not help. */
							 | 
						|
										return(FALSE);
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									if (!page_zip_compress(page_zip, page, index, mtr)) {
							 | 
						|
										/* Unable to compress the page */
							 | 
						|
										return(FALSE);
							 | 
						|
									}
							 | 
						|
								
							 | 
						|
									/* Check if there is enough space available after compression. */
							 | 
						|
									return(page_zip_available(page_zip, index, length, create));
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								/**************************************************************************
							 | 
						|
								Write a log record of writing to the uncompressed header portion of a page. */
							 | 
						|
								
							 | 
						|
								void
							 | 
						|
								page_zip_write_header_log(
							 | 
						|
								/*======================*/
							 | 
						|
									const byte*	data,/* in: data on the uncompressed page */
							 | 
						|
									ulint		length,	/* in: length of the data */
							 | 
						|
									mtr_t*		mtr);	/* in: mini-transaction */
							 | 
						|
								
							 | 
						|
								/**************************************************************************
							 | 
						|
								Write data to the uncompressed header portion of a page.  The data must
							 | 
						|
								already have been written to the uncompressed page.
							 | 
						|
								However, the data portion of the uncompressed page may differ from
							 | 
						|
								the compressed page when a record is being inserted in
							 | 
						|
								page_cur_insert_rec_low(). */
							 | 
						|
								UNIV_INLINE
							 | 
						|
								void
							 | 
						|
								page_zip_write_header(
							 | 
						|
								/*==================*/
							 | 
						|
									page_zip_des_t*	page_zip,/* in/out: compressed page */
							 | 
						|
									const byte*	str,	/* in: address on the uncompressed page */
							 | 
						|
									ulint		length,	/* in: length of the data */
							 | 
						|
									mtr_t*		mtr)	/* in: mini-transaction, or NULL */
							 | 
						|
								{
							 | 
						|
									ulint	pos;
							 | 
						|
								
							 | 
						|
									ut_ad(buf_frame_get_page_zip((byte*)str) == page_zip);
							 | 
						|
									ut_ad(page_zip_simple_validate(page_zip));
							 | 
						|
								
							 | 
						|
									pos = page_offset(str);
							 | 
						|
								
							 | 
						|
									ut_ad(pos < PAGE_DATA);
							 | 
						|
								
							 | 
						|
									memcpy(page_zip->data + pos, str, length);
							 | 
						|
								
							 | 
						|
									/* The following would fail in page_cur_insert_rec_low(). */
							 | 
						|
									/* ut_ad(page_zip_validate(page_zip, str - pos)); */
							 | 
						|
								
							 | 
						|
									if (UNIV_LIKELY_NULL(mtr)) {
							 | 
						|
										page_zip_write_header_log(str, length, mtr);
							 | 
						|
									}
							 | 
						|
								}
							 | 
						|
								
							 | 
						|
								#ifdef UNIV_MATERIALIZE
							 | 
						|
								# undef UNIV_INLINE
							 | 
						|
								# define UNIV_INLINE	UNIV_INLINE_ORIGINAL
							 | 
						|
								#endif
							 |