mirror of https://github.com/MariaDB/server
				
				
			
			You can not select more than 25 topics
			Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
		
		
		
		
		
			
		
			
				
					
					
						
							416 lines
						
					
					
						
							12 KiB
						
					
					
				
			
		
		
		
			
			
			
		
		
	
	
							416 lines
						
					
					
						
							12 KiB
						
					
					
				| /****************************************************** | |
| Compressed page interface | |
| 
 | |
| (c) 2005 Innobase Oy | |
| 
 | |
| Created June 2005 by Marko Makela | |
| *******************************************************/ | |
| 
 | |
| #ifdef UNIV_MATERIALIZE | |
| # undef UNIV_INLINE | |
| # define UNIV_INLINE | |
| #endif | |
| 
 | |
| #include "page0zip.h" | |
| #include "page0page.h" | |
| 
 | |
| /* The format of compressed pages is as follows. | |
| 
 | |
| The header and trailer of the uncompressed pages, excluding the page | |
| directory in the trailer, are copied as is to the header and trailer | |
| of the compressed page. | |
| 
 | |
| At the end of the compressed page, there is a dense page directory | |
| pointing to every user record contained on the page, including deleted | |
| records on the free list.  The dense directory is indexed in the | |
| collation order, i.e., in the order in which the record list is | |
| linked on the uncompressed page.  The infimum and supremum records are | |
| excluded.  The two most significant bits of the entries are allocated | |
| for the delete-mark and an n_owned flag indicating the last record in | |
| a chain of records pointed to from the sparse page directory on the | |
| uncompressed page. | |
| 
 | |
| The data between PAGE_ZIP_START and the last page directory entry will | |
| be written in compressed format, starting at offset PAGE_DATA. | |
| Infimum and supremum records are not stored.  We exclude the | |
| REC_N_NEW_EXTRA_BYTES in every record header.  These can be recovered | |
| from the dense page directory stored at the end of the compressed | |
| page. | |
| 
 | |
| The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and | |
| roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of | |
| externally stored columns are stored separately, in ascending order of | |
| heap_no and column index, starting backwards from the dense page | |
| directory. | |
| 
 | |
| The compressed data stream may be followed by a modification log | |
| covering the compressed portion of the page, as follows. | |
| 
 | |
| MODIFICATION LOG ENTRY FORMAT | |
| - write record: | |
|   - (heap_no - 1) << 1 (1..2 bytes) | |
|   - extra bytes backwards | |
|   - data bytes | |
| - clear record: | |
|   - (heap_no - 1) << 1 | 1 (1..2 bytes) | |
| 
 | |
| The integer values are stored in a variable-length format: | |
| - 0xxxxxxx: 0..127 | |
| - 1xxxxxxx xxxxxxxx: 0..32767 | |
| 
 | |
| The end of the modification log is marked by a 0 byte. | |
| 
 | |
| In summary, the compressed page looks like this: | |
| 
 | |
| (1) Uncompressed page header (PAGE_DATA bytes) | |
| (2) Compressed index information | |
| (3) Compressed page data | |
| (4) Page modification log (page_zip->m_start..page_zip->m_end) | |
| (5) Empty zero-filled space | |
| (6) BLOB pointers (on leaf pages) | |
|   - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column | |
|   - in descending collation order | |
| (7) Uncompressed columns of user records, n_dense * uncompressed_size bytes, | |
|   - indexed by heap_no | |
|   - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes | |
|   - REC_NODE_PTR_SIZE for non-leaf pages | |
|   - 0 otherwise | |
| (8) dense page directory, stored backwards | |
|   - n_dense = n_heap - 2 | |
|   - existing records in ascending collation order | |
|   - deleted records (free list) in link order | |
| */ | |
| 
 | |
| /* Start offset of the area that will be compressed */ | |
| #define PAGE_ZIP_START		PAGE_NEW_SUPREMUM_END | |
| /* Size of an compressed page directory entry */ | |
| #define PAGE_ZIP_DIR_SLOT_SIZE	2 | |
| /* Mask of record offsets */ | |
| #define PAGE_ZIP_DIR_SLOT_MASK	0x3fff | |
| /* 'owned' flag */ | |
| #define PAGE_ZIP_DIR_SLOT_OWNED	0x4000 | |
| /* 'deleted' flag */ | |
| #define PAGE_ZIP_DIR_SLOT_DEL	0x8000 | |
| 
 | |
| /************************************************************************** | |
| Determine the size of a compressed page in bytes. */ | |
| UNIV_INLINE | |
| ulint | |
| page_zip_get_size( | |
| /*==============*/ | |
| 						/* out: size in bytes */ | |
| 	const page_zip_des_t*	page_zip)	/* in: compressed page */ | |
| { | |
| 	ulint	size; | |
| 
 | |
| 	if (UNIV_UNLIKELY(!page_zip->ssize)) { | |
| 		return(0); | |
| 	} | |
| 
 | |
| 	size = 512 << page_zip->ssize; | |
| 
 | |
| 	ut_ad(size >= PAGE_ZIP_MIN_SIZE); | |
| 	ut_ad(size <= UNIV_PAGE_SIZE); | |
| 
 | |
| 	return(size); | |
| } | |
| /************************************************************************** | |
| Set the size of a compressed page in bytes. */ | |
| UNIV_INLINE | |
| void | |
| page_zip_set_size( | |
| /*==============*/ | |
| 	page_zip_des_t*	page_zip,	/* in/out: compressed page */ | |
| 	ulint		size)		/* in: size in bytes */ | |
| { | |
| 	if (size) { | |
| 		int	ssize; | |
| 
 | |
| 		ut_ad(ut_is_2pow(size)); | |
| 
 | |
| 		for (ssize = 1; size > (ulint) (512 << ssize); ssize++); | |
| 
 | |
| 		page_zip->ssize = ssize; | |
| 	} else { | |
| 		page_zip->ssize = 0; | |
| 	} | |
| 
 | |
| 	ut_ad(page_zip_get_size(page_zip) == size); | |
| } | |
| 
 | |
| /************************************************************************** | |
| Determine if a record is so big that it needs to be stored externally. */ | |
| UNIV_INLINE | |
| ibool | |
| page_zip_rec_needs_ext( | |
| /*===================*/ | |
| 				/* out: FALSE if the entire record | |
| 				can be stored locally on the page */ | |
| 	ulint	rec_size,	/* in: length of the record in bytes */ | |
| 	ulint	comp,		/* in: nonzero=compact format */ | |
| 	ulint	zip_size)	/* in: compressed page size in bytes, or 0 */ | |
| { | |
| 	ut_ad(rec_size > comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES); | |
| 	ut_ad(ut_is_2pow(zip_size)); | |
| 
 | |
| #if UNIV_PAGE_SIZE > REC_MAX_DATA_SIZE | |
| 	if (UNIV_UNLIKELY(rec_size >= REC_MAX_DATA_SIZE)) { | |
| 		return(TRUE); | |
| 	} | |
| #endif | |
| 
 | |
| 	if (UNIV_UNLIKELY(!comp)) { | |
| 		ut_ad(!zip_size); | |
| 		return(rec_size >= page_get_free_space_of_empty(FALSE) / 2); | |
| 	} | |
| 
 | |
| 	/* If zip_size != 0, the record should fit on the compressed page. | |
| 	If not, the right-hand-side of the comparison will overwrap | |
| 	and the condition will not hold.  Thus, we do not need to test | |
| 	for zip_size != 0.  We subtract the size of the page header and | |
| 	assume that compressing the index information takes 50 bytes. */ | |
| 	if (rec_size >= zip_size - (PAGE_DATA + 50)) { | |
| 		return(TRUE); | |
| 	} | |
| 
 | |
| 	return(rec_size >= page_get_free_space_of_empty(TRUE) / 2); | |
| } | |
| 
 | |
| #ifdef UNIV_DEBUG | |
| /************************************************************************** | |
| Validate a compressed page descriptor. */ | |
| UNIV_INLINE | |
| ibool | |
| page_zip_simple_validate( | |
| /*=====================*/ | |
| 					/* out: TRUE if ok */ | |
| 	const page_zip_des_t*	page_zip)/* in: compressed page descriptor */ | |
| { | |
| 	ut_ad(page_zip); | |
| 	ut_ad(page_zip->data); | |
| 	ut_ad(page_zip_get_size(page_zip) | |
| 	      > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE); | |
| 	ut_ad(page_zip->m_start <= page_zip->m_end); | |
| 	ut_ad(page_zip->m_end < page_zip_get_size(page_zip)); | |
| 	ut_ad(page_zip->n_blobs | |
| 	      < page_zip_get_size(page_zip) / BTR_EXTERN_FIELD_REF_SIZE); | |
| 	return(TRUE); | |
| } | |
| #endif /* UNIV_DEBUG */ | |
| 
 | |
| /************************************************************************** | |
| Determine if the length of the page trailer. */ | |
| UNIV_INLINE | |
| ibool | |
| page_zip_get_trailer_len( | |
| /*=====================*/ | |
| 					/* out: length of the page trailer, | |
| 					in bytes, not including the terminating | |
| 					zero byte of the modification log */ | |
| 	const page_zip_des_t*	page_zip,/* in: compressed page */ | |
| 	ibool			is_clust,/* in: TRUE if clustered index */ | |
| 	ulint*			entry_size)/* out: size of the uncompressed | |
| 					portion of a user record */ | |
| { | |
| 	ulint	uncompressed_size; | |
| 
 | |
| 	ut_ad(page_zip_simple_validate(page_zip)); | |
| 
 | |
| 	if (UNIV_UNLIKELY(!page_is_leaf(page_zip->data))) { | |
| 		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE | |
| 			+ REC_NODE_PTR_SIZE; | |
| 		ut_ad(!page_zip->n_blobs); | |
| 	} else if (UNIV_UNLIKELY(is_clust)) { | |
| 		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE | |
| 			+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; | |
| 	} else { | |
| 		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE; | |
| 		ut_ad(!page_zip->n_blobs); | |
| 	} | |
| 
 | |
| 	if (entry_size) { | |
| 		*entry_size = uncompressed_size; | |
| 	} | |
| 
 | |
| 	return((page_dir_get_n_heap(page_zip->data) - 2) | |
| 	       * uncompressed_size | |
| 	       + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE); | |
| } | |
| 
 | |
| /************************************************************************** | |
| Determine how big record can be inserted without recompressing the page. */ | |
| UNIV_INLINE | |
| lint | |
| page_zip_max_ins_size( | |
| /*==================*/ | |
| 					/* out: TRUE if page_zip_write_rec() | |
| 					will succeed */ | |
| 	const page_zip_des_t*	page_zip,/* in: compressed page */ | |
| 	ibool			is_clust)/* in: TRUE if clustered index */ | |
| { | |
| 	ulint	uncompressed_size; | |
| 	ulint	trailer_len; | |
| 
 | |
| 	trailer_len = page_zip_get_trailer_len(page_zip, is_clust, | |
| 					       &uncompressed_size); | |
| 
 | |
| 	/* When a record is created, a pointer may be added to | |
| 	the dense directory. | |
| 	Likewise, space for the columns that will not be | |
| 	compressed will be allocated from the page trailer. | |
| 	Also the BLOB pointers will be allocated from there, but | |
| 	we may as well count them in the length of the record. */ | |
| 
 | |
| 	trailer_len += uncompressed_size; | |
| 
 | |
| 	return((lint) page_zip_get_size(page_zip) | |
| 	       - trailer_len - page_zip->m_end | |
| 	       - (REC_N_NEW_EXTRA_BYTES - 2)); | |
| } | |
| 
 | |
| /************************************************************************** | |
| Determine if enough space is available in the modification log. */ | |
| UNIV_INLINE | |
| ibool | |
| page_zip_available( | |
| /*===============*/ | |
| 					/* out: TRUE if enough space | |
| 					is available */ | |
| 	const page_zip_des_t*	page_zip,/* in: compressed page */ | |
| 	ibool			is_clust,/* in: TRUE if clustered index */ | |
| 	ulint			length,	/* in: combined size of the record */ | |
| 	ulint			create)	/* in: nonzero=add the record to | |
| 					the heap */ | |
| { | |
| 	ulint	uncompressed_size; | |
| 	ulint	trailer_len; | |
| 
 | |
| 	ut_ad(length > REC_N_NEW_EXTRA_BYTES); | |
| 
 | |
| 	trailer_len = page_zip_get_trailer_len(page_zip, is_clust, | |
| 					       &uncompressed_size); | |
| 
 | |
| 	/* Subtract the fixed extra bytes and add the maximum | |
| 	space needed for identifying the record (encoded heap_no). */ | |
| 	length -= REC_N_NEW_EXTRA_BYTES - 2; | |
| 
 | |
| 	if (UNIV_UNLIKELY(create)) { | |
| 		/* When a record is created, a pointer may be added to | |
| 		the dense directory. | |
| 		Likewise, space for the columns that will not be | |
| 		compressed will be allocated from the page trailer. | |
| 		Also the BLOB pointers will be allocated from there, but | |
| 		we may as well count them in the length of the record. */ | |
| 
 | |
| 		trailer_len += uncompressed_size; | |
| 	} | |
| 
 | |
| 	return(UNIV_LIKELY(length | |
| 			   + trailer_len | |
| 			   + page_zip->m_end | |
| 			   < page_zip_get_size(page_zip))); | |
| } | |
| 
 | |
| /************************************************************************** | |
| Initialize a compressed page descriptor. */ | |
| UNIV_INLINE | |
| void | |
| page_zip_des_init( | |
| /*==============*/ | |
| 	page_zip_des_t*	page_zip)	/* in/out: compressed page | |
| 					descriptor */ | |
| { | |
| 	memset(page_zip, 0, sizeof *page_zip); | |
| } | |
| 
 | |
| /************************************************************************** | |
| Ensure that enough space is available in the modification log. | |
| If not, try to compress the page. */ | |
| UNIV_INLINE | |
| ibool | |
| page_zip_alloc( | |
| /*===========*/ | |
| 				/* out: TRUE if enough space is available */ | |
| 	page_zip_des_t*	page_zip,/* in/out: compressed page; | |
| 				will only be modified if compression is needed | |
| 				and successful */ | |
| 	const page_t*	page,	/* in: uncompressed page */ | |
| 	dict_index_t*	index,	/* in: index of the B-tree node */ | |
| 	ulint		length,	/* in: combined size of the record */ | |
| 	ulint		create,	/* in: nonzero=add the record to the heap */ | |
| 	mtr_t*		mtr)	/* in: mini-transaction, or NULL */ | |
| { | |
| 	ut_ad(page_is_comp(page)); | |
| #ifdef UNIV_ZIP_DEBUG | |
| 	ut_a(page_zip_validate(page_zip, page)); | |
| #endif /* UNIV_ZIP_DEBUG */ | |
| 
 | |
| 	if (page_zip_available(page_zip, dict_index_is_clust(index), | |
| 			       length, create)) { | |
| 		return(TRUE); | |
| 	} | |
| 
 | |
| 	if (!page_zip->m_nonempty) { | |
| 		/* The page has been freshly compressed, so | |
| 		recompressing it will not help. */ | |
| 		return(FALSE); | |
| 	} | |
| 
 | |
| 	if (!page_zip_compress(page_zip, page, index, mtr)) { | |
| 		/* Unable to compress the page */ | |
| 		return(FALSE); | |
| 	} | |
| 
 | |
| 	/* Check if there is enough space available after compression. */ | |
| 	return(page_zip_available(page_zip, dict_index_is_clust(index), | |
| 				  length, create)); | |
| } | |
| 
 | |
| /************************************************************************** | |
| Write a log record of writing to the uncompressed header portion of a page. */ | |
| 
 | |
| void | |
| page_zip_write_header_log( | |
| /*======================*/ | |
| 	const byte*	data,/* in: data on the uncompressed page */ | |
| 	ulint		length,	/* in: length of the data */ | |
| 	mtr_t*		mtr);	/* in: mini-transaction */ | |
| 
 | |
| /************************************************************************** | |
| Write data to the uncompressed header portion of a page.  The data must | |
| already have been written to the uncompressed page. | |
| However, the data portion of the uncompressed page may differ from | |
| the compressed page when a record is being inserted in | |
| page_cur_insert_rec_zip(). */ | |
| UNIV_INLINE | |
| void | |
| page_zip_write_header( | |
| /*==================*/ | |
| 	page_zip_des_t*	page_zip,/* in/out: compressed page */ | |
| 	const byte*	str,	/* in: address on the uncompressed page */ | |
| 	ulint		length,	/* in: length of the data */ | |
| 	mtr_t*		mtr)	/* in: mini-transaction, or NULL */ | |
| { | |
| 	ulint	pos; | |
| 
 | |
| 	ut_ad(buf_frame_get_page_zip((byte*)str) == page_zip); | |
| 	ut_ad(page_zip_simple_validate(page_zip)); | |
| 
 | |
| 	pos = page_offset(str); | |
| 
 | |
| 	ut_ad(pos < PAGE_DATA); | |
| 
 | |
| 	memcpy(page_zip->data + pos, str, length); | |
| 
 | |
| 	/* The following would fail in page_cur_insert_rec_zip(). */ | |
| 	/* ut_ad(page_zip_validate(page_zip, str - pos)); */ | |
| 
 | |
| 	if (UNIV_LIKELY_NULL(mtr)) { | |
| 		page_zip_write_header_log(str, length, mtr); | |
| 	} | |
| } | |
| 
 | |
| #ifdef UNIV_MATERIALIZE | |
| # undef UNIV_INLINE | |
| # define UNIV_INLINE	UNIV_INLINE_ORIGINAL | |
| #endif
 |