You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

5330 lines
148 KiB

17 years ago
17 years ago
17 years ago
17 years ago
16 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
16 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
16 years ago
16 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
17 years ago
17 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
17 years ago
17 years ago
17 years ago
14 years ago
14 years ago
17 years ago
17 years ago
17 years ago
17 years ago
17 years ago
14 years ago
17 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
17 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
16 years ago
14 years ago
16 years ago
14 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
16 years ago
14 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
14 years ago
17 years ago
14 years ago
17 years ago
14 years ago
17 years ago
14 years ago
17 years ago
14 years ago
17 years ago
14 years ago
17 years ago
14 years ago
17 years ago
14 years ago
16 years ago
14 years ago
14 years ago
16 years ago
16 years ago
14 years ago
16 years ago
14 years ago
16 years ago
14 years ago
16 years ago
14 years ago
16 years ago
16 years ago
16 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
17 years ago
17 years ago
17 years ago
17 years ago
17 years ago
14 years ago
16 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
17 years ago
17 years ago
14 years ago
14 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
17 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
17 years ago
17 years ago
16 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
16 years ago
16 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
17 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
17 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
17 years ago
14 years ago
14 years ago
14 years ago
14 years ago
17 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
17 years ago
14 years ago
14 years ago
17 years ago
16 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
16 years ago
  1. /*****************************************************************************
  2. Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
  3. Copyright (c) 2008, Google Inc.
  4. Portions of this file contain modifications contributed and copyrighted by
  5. Google, Inc. Those modifications are gratefully acknowledged and are described
  6. briefly in the InnoDB documentation. The contributions by Google are
  7. incorporated with their permission, and subject to the conditions contained in
  8. the file COPYING.Google.
  9. This program is free software; you can redistribute it and/or modify it under
  10. the terms of the GNU General Public License as published by the Free Software
  11. Foundation; version 2 of the License.
  12. This program is distributed in the hope that it will be useful, but WITHOUT
  13. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  14. FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  15. You should have received a copy of the GNU General Public License along with
  16. this program; if not, write to the Free Software Foundation, Inc.,
  17. 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  18. *****************************************************************************/
  19. /**************************************************//**
  20. @file buf/buf0buf.c
  21. The database buffer buf_pool
  22. Created 11/5/1995 Heikki Tuuri
  23. *******************************************************/
  24. #include "buf0buf.h"
  25. #ifdef UNIV_NONINL
  26. #include "buf0buf.ic"
  27. #endif
  28. #include "mem0mem.h"
  29. #include "btr0btr.h"
  30. #include "fil0fil.h"
  31. #ifndef UNIV_HOTBACKUP
  32. #include "buf0buddy.h"
  33. #include "lock0lock.h"
  34. #include "btr0sea.h"
  35. #include "ibuf0ibuf.h"
  36. #include "trx0undo.h"
  37. #include "log0log.h"
  38. #endif /* !UNIV_HOTBACKUP */
  39. #include "srv0srv.h"
  40. #include "dict0dict.h"
  41. #include "log0recv.h"
  42. #include "page0zip.h"
  43. #include "trx0trx.h"
  44. #include "srv0start.h"
  45. /* prototypes for new functions added to ha_innodb.cc */
  46. trx_t* innobase_get_trx();
  47. static inline
  48. void
  49. _increment_page_get_statistics(buf_block_t* block, trx_t* trx)
  50. {
  51. ulint block_hash;
  52. ulint block_hash_byte;
  53. byte block_hash_offset;
  54. ut_ad(block);
  55. ut_ad(trx && trx->take_stats);
  56. if (!trx->distinct_page_access_hash) {
  57. trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE);
  58. memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
  59. }
  60. block_hash = ut_hash_ulint((block->page.space << 20) + block->page.space +
  61. block->page.offset, DPAH_SIZE << 3);
  62. block_hash_byte = block_hash >> 3;
  63. block_hash_offset = (byte) block_hash & 0x07;
  64. if (block_hash_byte >= DPAH_SIZE)
  65. fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset);
  66. if (block_hash_offset > 7)
  67. fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset);
  68. if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0)
  69. trx->distinct_page_access++;
  70. trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset;
  71. return;
  72. }
  73. /*
  74. IMPLEMENTATION OF THE BUFFER POOL
  75. =================================
  76. Performance improvement:
  77. ------------------------
  78. Thread scheduling in NT may be so slow that the OS wait mechanism should
  79. not be used even in waiting for disk reads to complete.
  80. Rather, we should put waiting query threads to the queue of
  81. waiting jobs, and let the OS thread do something useful while the i/o
  82. is processed. In this way we could remove most OS thread switches in
  83. an i/o-intensive benchmark like TPC-C.
  84. A possibility is to put a user space thread library between the database
  85. and NT. User space thread libraries might be very fast.
  86. SQL Server 7.0 can be configured to use 'fibers' which are lightweight
  87. threads in NT. These should be studied.
  88. Buffer frames and blocks
  89. ------------------------
  90. Following the terminology of Gray and Reuter, we call the memory
  91. blocks where file pages are loaded buffer frames. For each buffer
  92. frame there is a control block, or shortly, a block, in the buffer
  93. control array. The control info which does not need to be stored
  94. in the file along with the file page, resides in the control block.
  95. Buffer pool struct
  96. ------------------
  97. The buffer buf_pool contains a single mutex which protects all the
  98. control data structures of the buf_pool. The content of a buffer frame is
  99. protected by a separate read-write lock in its control block, though.
  100. These locks can be locked and unlocked without owning the buf_pool->mutex.
  101. The OS events in the buf_pool struct can be waited for without owning the
  102. buf_pool->mutex.
  103. The buf_pool->mutex is a hot-spot in main memory, causing a lot of
  104. memory bus traffic on multiprocessor systems when processors
  105. alternately access the mutex. On our Pentium, the mutex is accessed
  106. maybe every 10 microseconds. We gave up the solution to have mutexes
  107. for each control block, for instance, because it seemed to be
  108. complicated.
  109. A solution to reduce mutex contention of the buf_pool->mutex is to
  110. create a separate mutex for the page hash table. On Pentium,
  111. accessing the hash table takes 2 microseconds, about half
  112. of the total buf_pool->mutex hold time.
  113. Control blocks
  114. --------------
  115. The control block contains, for instance, the bufferfix count
  116. which is incremented when a thread wants a file page to be fixed
  117. in a buffer frame. The bufferfix operation does not lock the
  118. contents of the frame, however. For this purpose, the control
  119. block contains a read-write lock.
  120. The buffer frames have to be aligned so that the start memory
  121. address of a frame is divisible by the universal page size, which
  122. is a power of two.
  123. We intend to make the buffer buf_pool size on-line reconfigurable,
  124. that is, the buf_pool size can be changed without closing the database.
  125. Then the database administarator may adjust it to be bigger
  126. at night, for example. The control block array must
  127. contain enough control blocks for the maximum buffer buf_pool size
  128. which is used in the particular database.
  129. If the buf_pool size is cut, we exploit the virtual memory mechanism of
  130. the OS, and just refrain from using frames at high addresses. Then the OS
  131. can swap them to disk.
  132. The control blocks containing file pages are put to a hash table
  133. according to the file address of the page.
  134. We could speed up the access to an individual page by using
  135. "pointer swizzling": we could replace the page references on
  136. non-leaf index pages by direct pointers to the page, if it exists
  137. in the buf_pool. We could make a separate hash table where we could
  138. chain all the page references in non-leaf pages residing in the buf_pool,
  139. using the page reference as the hash key,
  140. and at the time of reading of a page update the pointers accordingly.
  141. Drawbacks of this solution are added complexity and,
  142. possibly, extra space required on non-leaf pages for memory pointers.
  143. A simpler solution is just to speed up the hash table mechanism
  144. in the database, using tables whose size is a power of 2.
  145. Lists of blocks
  146. ---------------
  147. There are several lists of control blocks.
  148. The free list (buf_pool->free) contains blocks which are currently not
  149. used.
  150. The common LRU list contains all the blocks holding a file page
  151. except those for which the bufferfix count is non-zero.
  152. The pages are in the LRU list roughly in the order of the last
  153. access to the page, so that the oldest pages are at the end of the
  154. list. We also keep a pointer to near the end of the LRU list,
  155. which we can use when we want to artificially age a page in the
  156. buf_pool. This is used if we know that some page is not needed
  157. again for some time: we insert the block right after the pointer,
  158. causing it to be replaced sooner than would normally be the case.
  159. Currently this aging mechanism is used for read-ahead mechanism
  160. of pages, and it can also be used when there is a scan of a full
  161. table which cannot fit in the memory. Putting the pages near the
  162. end of the LRU list, we make sure that most of the buf_pool stays
  163. in the main memory, undisturbed.
  164. The unzip_LRU list contains a subset of the common LRU list. The
  165. blocks on the unzip_LRU list hold a compressed file page and the
  166. corresponding uncompressed page frame. A block is in unzip_LRU if and
  167. only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
  168. holds. The blocks in unzip_LRU will be in same order as they are in
  169. the common LRU list. That is, each manipulation of the common LRU
  170. list will result in the same manipulation of the unzip_LRU list.
  171. The chain of modified blocks (buf_pool->flush_list) contains the blocks
  172. holding file pages that have been modified in the memory
  173. but not written to disk yet. The block with the oldest modification
  174. which has not yet been written to disk is at the end of the chain.
  175. The access to this list is protected by buf_pool->flush_list_mutex.
  176. The chain of unmodified compressed blocks (buf_pool->zip_clean)
  177. contains the control blocks (buf_page_t) of those compressed pages
  178. that are not in buf_pool->flush_list and for which no uncompressed
  179. page has been allocated in the buffer pool. The control blocks for
  180. uncompressed pages are accessible via buf_block_t objects that are
  181. reachable via buf_pool->chunks[].
  182. The chains of free memory blocks (buf_pool->zip_free[]) are used by
  183. the buddy allocator (buf0buddy.c) to keep track of currently unused
  184. memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2. These
  185. blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
  186. BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
  187. pool. The buddy allocator is solely used for allocating control
  188. blocks for compressed pages (buf_page_t) and compressed page frames.
  189. Loading a file page
  190. -------------------
  191. First, a victim block for replacement has to be found in the
  192. buf_pool. It is taken from the free list or searched for from the
  193. end of the LRU-list. An exclusive lock is reserved for the frame,
  194. the io_fix field is set in the block fixing the block in buf_pool,
  195. and the io-operation for loading the page is queued. The io-handler thread
  196. releases the X-lock on the frame and resets the io_fix field
  197. when the io operation completes.
  198. A thread may request the above operation using the function
  199. buf_page_get(). It may then continue to request a lock on the frame.
  200. The lock is granted when the io-handler releases the x-lock.
  201. Read-ahead
  202. ----------
  203. The read-ahead mechanism is intended to be intelligent and
  204. isolated from the semantically higher levels of the database
  205. index management. From the higher level we only need the
  206. information if a file page has a natural successor or
  207. predecessor page. On the leaf level of a B-tree index,
  208. these are the next and previous pages in the natural
  209. order of the pages.
  210. Let us first explain the read-ahead mechanism when the leafs
  211. of a B-tree are scanned in an ascending or descending order.
  212. When a read page is the first time referenced in the buf_pool,
  213. the buffer manager checks if it is at the border of a so-called
  214. linear read-ahead area. The tablespace is divided into these
  215. areas of size 64 blocks, for example. So if the page is at the
  216. border of such an area, the read-ahead mechanism checks if
  217. all the other blocks in the area have been accessed in an
  218. ascending or descending order. If this is the case, the system
  219. looks at the natural successor or predecessor of the page,
  220. checks if that is at the border of another area, and in this case
  221. issues read-requests for all the pages in that area. Maybe
  222. we could relax the condition that all the pages in the area
  223. have to be accessed: if data is deleted from a table, there may
  224. appear holes of unused pages in the area.
  225. A different read-ahead mechanism is used when there appears
  226. to be a random access pattern to a file.
  227. If a new page is referenced in the buf_pool, and several pages
  228. of its random access area (for instance, 32 consecutive pages
  229. in a tablespace) have recently been referenced, we may predict
  230. that the whole area may be needed in the near future, and issue
  231. the read requests for the whole area.
  232. */
  233. #ifndef UNIV_HOTBACKUP
  234. /** Value in microseconds */
  235. static const int WAIT_FOR_READ = 100;
  236. /** Number of attemtps made to read in a page in the buffer pool */
  237. static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
  238. /** The buffer pools of the database */
  239. UNIV_INTERN buf_pool_t* buf_pool_ptr;
  240. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  241. static ulint buf_dbg_counter = 0; /*!< This is used to insert validation
  242. operations in execution in the
  243. debug version */
  244. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  245. #ifdef UNIV_DEBUG
  246. /** If this is set TRUE, the program prints info whenever
  247. read-ahead or flush occurs */
  248. UNIV_INTERN ibool buf_debug_prints = FALSE;
  249. #endif /* UNIV_DEBUG */
  250. #ifdef UNIV_PFS_RWLOCK
  251. /* Keys to register buffer block related rwlocks and mutexes with
  252. performance schema */
  253. UNIV_INTERN mysql_pfs_key_t buf_pool_page_hash_key;
  254. UNIV_INTERN mysql_pfs_key_t buf_block_lock_key;
  255. # ifdef UNIV_SYNC_DEBUG
  256. UNIV_INTERN mysql_pfs_key_t buf_block_debug_latch_key;
  257. # endif /* UNIV_SYNC_DEBUG */
  258. #endif /* UNIV_PFS_RWLOCK */
  259. #ifdef UNIV_PFS_MUTEX
  260. UNIV_INTERN mysql_pfs_key_t buffer_block_mutex_key;
  261. UNIV_INTERN mysql_pfs_key_t buf_pool_mutex_key;
  262. UNIV_INTERN mysql_pfs_key_t buf_pool_zip_mutex_key;
  263. UNIV_INTERN mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
  264. UNIV_INTERN mysql_pfs_key_t buf_pool_free_list_mutex_key;
  265. UNIV_INTERN mysql_pfs_key_t buf_pool_zip_free_mutex_key;
  266. UNIV_INTERN mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
  267. UNIV_INTERN mysql_pfs_key_t flush_list_mutex_key;
  268. #endif /* UNIV_PFS_MUTEX */
  269. #if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
  270. # ifndef PFS_SKIP_BUFFER_MUTEX_RWLOCK
  271. /* Buffer block mutexes and rwlocks can be registered
  272. in one group rather than individually. If PFS_GROUP_BUFFER_SYNC
  273. is defined, register buffer block mutex and rwlock
  274. in one group after their initialization. */
  275. # define PFS_GROUP_BUFFER_SYNC
  276. /* This define caps the number of mutexes/rwlocks can
  277. be registered with performance schema. Developers can
  278. modify this define if necessary. Please note, this would
  279. be effective only if PFS_GROUP_BUFFER_SYNC is defined. */
  280. # define PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER ULINT_MAX
  281. # endif /* !PFS_SKIP_BUFFER_MUTEX_RWLOCK */
  282. #endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
  283. /********************************************************************//**
  284. Gets the smallest oldest_modification lsn for any page in the pool. Returns
  285. zero if all modified pages have been flushed to disk.
  286. @return oldest modification in pool, zero if none */
  287. UNIV_INTERN
  288. ib_uint64_t
  289. buf_pool_get_oldest_modification(void)
  290. /*==================================*/
  291. {
  292. ulint i;
  293. buf_page_t* bpage;
  294. ib_uint64_t lsn = 0;
  295. ib_uint64_t oldest_lsn = 0;
  296. /* When we traverse all the flush lists we don't want another
  297. thread to add a dirty page to any flush list. */
  298. if (srv_buf_pool_instances > 1)
  299. log_flush_order_mutex_enter();
  300. for (i = 0; i < srv_buf_pool_instances; i++) {
  301. buf_pool_t* buf_pool;
  302. buf_pool = buf_pool_from_array(i);
  303. buf_flush_list_mutex_enter(buf_pool);
  304. bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
  305. if (bpage != NULL) {
  306. ut_ad(bpage->in_flush_list);
  307. lsn = bpage->oldest_modification;
  308. }
  309. buf_flush_list_mutex_exit(buf_pool);
  310. if (!oldest_lsn || oldest_lsn > lsn) {
  311. oldest_lsn = lsn;
  312. }
  313. }
  314. if (srv_buf_pool_instances > 1)
  315. log_flush_order_mutex_exit();
  316. /* The returned answer may be out of date: the flush_list can
  317. change after the mutex has been released. */
  318. return(oldest_lsn);
  319. }
  320. /********************************************************************//**
  321. Get total buffer pool statistics. */
  322. UNIV_INTERN
  323. void
  324. buf_get_total_list_len(
  325. /*===================*/
  326. ulint* LRU_len, /*!< out: length of all LRU lists */
  327. ulint* free_len, /*!< out: length of all free lists */
  328. ulint* flush_list_len) /*!< out: length of all flush lists */
  329. {
  330. ulint i;
  331. *LRU_len = 0;
  332. *free_len = 0;
  333. *flush_list_len = 0;
  334. for (i = 0; i < srv_buf_pool_instances; i++) {
  335. buf_pool_t* buf_pool;
  336. buf_pool = buf_pool_from_array(i);
  337. *LRU_len += UT_LIST_GET_LEN(buf_pool->LRU);
  338. *free_len += UT_LIST_GET_LEN(buf_pool->free);
  339. *flush_list_len += UT_LIST_GET_LEN(buf_pool->flush_list);
  340. }
  341. }
  342. /********************************************************************//**
  343. Get total list size in bytes from all buffer pools. */
  344. UNIV_INTERN
  345. void
  346. buf_get_total_list_size_in_bytes(
  347. /*=============================*/
  348. buf_pools_list_size_t* buf_pools_list_size) /*!< out: list sizes
  349. in all buffer pools */
  350. {
  351. ulint i;
  352. ut_ad(buf_pools_list_size);
  353. memset(buf_pools_list_size, 0, sizeof(*buf_pools_list_size));
  354. for (i = 0; i < srv_buf_pool_instances; i++) {
  355. buf_pool_t* buf_pool;
  356. buf_pool = buf_pool_from_array(i);
  357. /* We don't need mutex protection since this is
  358. for statistics purpose */
  359. buf_pools_list_size->LRU_bytes += buf_pool->stat.LRU_bytes;
  360. buf_pools_list_size->unzip_LRU_bytes +=
  361. UT_LIST_GET_LEN(buf_pool->unzip_LRU) * UNIV_PAGE_SIZE;
  362. buf_pools_list_size->flush_list_bytes +=
  363. buf_pool->stat.flush_list_bytes;
  364. }
  365. }
  366. /********************************************************************//**
  367. Get total buffer pool statistics. */
  368. UNIV_INTERN
  369. void
  370. buf_get_total_stat(
  371. /*===============*/
  372. buf_pool_stat_t* tot_stat) /*!< out: buffer pool stats */
  373. {
  374. ulint i;
  375. memset(tot_stat, 0, sizeof(*tot_stat));
  376. for (i = 0; i < srv_buf_pool_instances; i++) {
  377. buf_pool_stat_t*buf_stat;
  378. buf_pool_t* buf_pool;
  379. buf_pool = buf_pool_from_array(i);
  380. buf_stat = &buf_pool->stat;
  381. tot_stat->n_page_gets += buf_stat->n_page_gets;
  382. tot_stat->n_pages_read += buf_stat->n_pages_read;
  383. tot_stat->n_pages_written += buf_stat->n_pages_written;
  384. tot_stat->n_pages_created += buf_stat->n_pages_created;
  385. tot_stat->n_ra_pages_read_rnd += buf_stat->n_ra_pages_read_rnd;
  386. tot_stat->n_ra_pages_read += buf_stat->n_ra_pages_read;
  387. tot_stat->n_ra_pages_evicted += buf_stat->n_ra_pages_evicted;
  388. tot_stat->n_pages_made_young += buf_stat->n_pages_made_young;
  389. tot_stat->n_pages_not_made_young +=
  390. buf_stat->n_pages_not_made_young;
  391. }
  392. }
  393. /********************************************************************//**
  394. Allocates a buffer block.
  395. @return own: the allocated block, in state BUF_BLOCK_MEMORY */
  396. UNIV_INTERN
  397. buf_block_t*
  398. buf_block_alloc(
  399. /*============*/
  400. buf_pool_t* buf_pool) /*!< in/out: buffer pool instance,
  401. or NULL for round-robin selection
  402. of the buffer pool */
  403. {
  404. buf_block_t* block;
  405. ulint index;
  406. static ulint buf_pool_index;
  407. if (buf_pool == NULL) {
  408. /* We are allocating memory from any buffer pool, ensure
  409. we spread the grace on all buffer pool instances. */
  410. index = buf_pool_index++ % srv_buf_pool_instances;
  411. buf_pool = buf_pool_from_array(index);
  412. }
  413. block = buf_LRU_get_free_block(buf_pool);
  414. buf_block_set_state(block, BUF_BLOCK_MEMORY);
  415. return(block);
  416. }
  417. #endif /* !UNIV_HOTBACKUP */
  418. /********************************************************************//**
  419. Calculates a page checksum which is stored to the page when it is written
  420. to a file. Note that we must be careful to calculate the same value on
  421. 32-bit and 64-bit architectures.
  422. @return checksum */
  423. UNIV_INTERN
  424. ulint
  425. buf_calc_page_new_checksum(
  426. /*=======================*/
  427. const byte* page) /*!< in: buffer page */
  428. {
  429. ulint checksum;
  430. /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
  431. ..._ARCH_LOG_NO, are written outside the buffer pool to the first
  432. pages of data files, we have to skip them in the page checksum
  433. calculation.
  434. We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
  435. checksum is stored, and also the last 8 bytes of page because
  436. there we store the old formula checksum. */
  437. checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
  438. FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
  439. + ut_fold_binary(page + FIL_PAGE_DATA,
  440. UNIV_PAGE_SIZE - FIL_PAGE_DATA
  441. - FIL_PAGE_END_LSN_OLD_CHKSUM);
  442. checksum = checksum & 0xFFFFFFFFUL;
  443. return(checksum);
  444. }
  445. UNIV_INTERN
  446. ulint
  447. buf_calc_page_new_checksum_32(
  448. /*==========================*/
  449. const byte* page) /*!< in: buffer page */
  450. {
  451. ulint checksum;
  452. checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
  453. FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
  454. + ut_fold_binary(page + FIL_PAGE_DATA,
  455. FIL_PAGE_DATA_ALIGN_32 - FIL_PAGE_DATA)
  456. + ut_fold_binary_32(page + FIL_PAGE_DATA_ALIGN_32,
  457. UNIV_PAGE_SIZE - FIL_PAGE_DATA_ALIGN_32
  458. - FIL_PAGE_END_LSN_OLD_CHKSUM);
  459. checksum = checksum & 0xFFFFFFFFUL;
  460. return(checksum);
  461. }
  462. /********************************************************************//**
  463. In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
  464. looked at the first few bytes of the page. This calculates that old
  465. checksum.
  466. NOTE: we must first store the new formula checksum to
  467. FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
  468. because this takes that field as an input!
  469. @return checksum */
  470. UNIV_INTERN
  471. ulint
  472. buf_calc_page_old_checksum(
  473. /*=======================*/
  474. const byte* page) /*!< in: buffer page */
  475. {
  476. ulint checksum;
  477. checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
  478. checksum = checksum & 0xFFFFFFFFUL;
  479. return(checksum);
  480. }
  481. /********************************************************************//**
  482. Checks if a page is corrupt.
  483. @return TRUE if corrupted */
  484. UNIV_INTERN
  485. ibool
  486. buf_page_is_corrupted(
  487. /*==================*/
  488. ibool check_lsn, /*!< in: TRUE if we need to check
  489. and complain about the LSN */
  490. const byte* read_buf, /*!< in: a database page */
  491. ulint zip_size) /*!< in: size of compressed page;
  492. 0 for uncompressed pages */
  493. {
  494. ulint checksum_field;
  495. ulint old_checksum_field;
  496. if (UNIV_LIKELY(!zip_size)
  497. && memcmp(read_buf + FIL_PAGE_LSN + 4,
  498. read_buf + UNIV_PAGE_SIZE
  499. - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
  500. /* Stored log sequence numbers at the start and the end
  501. of page do not match */
  502. return(TRUE);
  503. }
  504. #ifndef UNIV_HOTBACKUP
  505. if (check_lsn && recv_lsn_checks_on) {
  506. ib_uint64_t current_lsn;
  507. if (log_peek_lsn(&current_lsn)
  508. && UNIV_UNLIKELY
  509. (current_lsn
  510. < mach_read_from_8(read_buf + FIL_PAGE_LSN))) {
  511. ut_print_timestamp(stderr);
  512. fprintf(stderr,
  513. " InnoDB: Error: page %lu log sequence number"
  514. " %llu\n"
  515. "InnoDB: is in the future! Current system "
  516. "log sequence number %llu.\n"
  517. "InnoDB: Your database may be corrupt or "
  518. "you may have copied the InnoDB\n"
  519. "InnoDB: tablespace but not the InnoDB "
  520. "log files. See\n"
  521. "InnoDB: " REFMAN "forcing-innodb-recovery.html\n"
  522. "InnoDB: for more information.\n",
  523. (ulong) mach_read_from_4(read_buf
  524. + FIL_PAGE_OFFSET),
  525. mach_read_from_8(read_buf + FIL_PAGE_LSN),
  526. current_lsn);
  527. }
  528. }
  529. #endif
  530. /* If we use checksums validation, make additional check before
  531. returning TRUE to ensure that the checksum is not equal to
  532. BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
  533. disabled. Otherwise, skip checksum calculation and return FALSE */
  534. if (UNIV_LIKELY(srv_use_checksums)) {
  535. checksum_field = mach_read_from_4(read_buf
  536. + FIL_PAGE_SPACE_OR_CHKSUM);
  537. if (UNIV_UNLIKELY(zip_size)) {
  538. return(checksum_field != BUF_NO_CHECKSUM_MAGIC
  539. && checksum_field
  540. != page_zip_calc_checksum(read_buf, zip_size));
  541. }
  542. old_checksum_field = mach_read_from_4(
  543. read_buf + UNIV_PAGE_SIZE
  544. - FIL_PAGE_END_LSN_OLD_CHKSUM);
  545. /* There are 2 valid formulas for old_checksum_field:
  546. 1. Very old versions of InnoDB only stored 8 byte lsn to the
  547. start and the end of the page.
  548. 2. Newer InnoDB versions store the old formula checksum
  549. there. */
  550. if (old_checksum_field != mach_read_from_4(read_buf
  551. + FIL_PAGE_LSN)
  552. && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
  553. && old_checksum_field
  554. != buf_calc_page_old_checksum(read_buf)) {
  555. return(TRUE);
  556. }
  557. /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
  558. (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
  559. if (!srv_fast_checksum
  560. && checksum_field != 0
  561. && checksum_field != BUF_NO_CHECKSUM_MAGIC
  562. && checksum_field
  563. != buf_calc_page_new_checksum(read_buf)) {
  564. return(TRUE);
  565. }
  566. if (srv_fast_checksum
  567. && checksum_field != 0
  568. && checksum_field != BUF_NO_CHECKSUM_MAGIC
  569. && checksum_field
  570. != buf_calc_page_new_checksum_32(read_buf)
  571. && checksum_field
  572. != buf_calc_page_new_checksum(read_buf)) {
  573. return(TRUE);
  574. }
  575. }
  576. return(FALSE);
  577. }
  578. /********************************************************************//**
  579. Prints a page to stderr. */
  580. UNIV_INTERN
  581. void
  582. buf_page_print(
  583. /*===========*/
  584. const byte* read_buf, /*!< in: a database page */
  585. ulint zip_size, /*!< in: compressed page size, or
  586. 0 for uncompressed pages */
  587. ulint flags) /*!< in: 0 or
  588. BUF_PAGE_PRINT_NO_CRASH or
  589. BUF_PAGE_PRINT_NO_FULL */
  590. {
  591. #ifndef UNIV_HOTBACKUP
  592. dict_index_t* index;
  593. #endif /* !UNIV_HOTBACKUP */
  594. ulint checksum;
  595. ulint checksum_32;
  596. ulint old_checksum;
  597. ulint size = zip_size;
  598. if (!size) {
  599. size = UNIV_PAGE_SIZE;
  600. }
  601. if (!(flags & BUF_PAGE_PRINT_NO_FULL)) {
  602. ut_print_timestamp(stderr);
  603. fprintf(stderr,
  604. " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
  605. (ulong) size);
  606. ut_print_buf(stderr, read_buf, size);
  607. fputs("\nInnoDB: End of page dump\n", stderr);
  608. }
  609. if (zip_size) {
  610. /* Print compressed page. */
  611. switch (fil_page_get_type(read_buf)) {
  612. case FIL_PAGE_TYPE_ZBLOB:
  613. case FIL_PAGE_TYPE_ZBLOB2:
  614. checksum = srv_use_checksums
  615. ? page_zip_calc_checksum(read_buf, zip_size)
  616. : BUF_NO_CHECKSUM_MAGIC;
  617. ut_print_timestamp(stderr);
  618. fprintf(stderr,
  619. " InnoDB: Compressed BLOB page"
  620. " checksum %lu, stored %lu\n"
  621. "InnoDB: Page lsn %lu %lu\n"
  622. "InnoDB: Page number (if stored"
  623. " to page already) %lu,\n"
  624. "InnoDB: space id (if stored"
  625. " to page already) %lu\n",
  626. (ulong) checksum,
  627. (ulong) mach_read_from_4(
  628. read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
  629. (ulong) mach_read_from_4(
  630. read_buf + FIL_PAGE_LSN),
  631. (ulong) mach_read_from_4(
  632. read_buf + (FIL_PAGE_LSN + 4)),
  633. (ulong) mach_read_from_4(
  634. read_buf + FIL_PAGE_OFFSET),
  635. (ulong) mach_read_from_4(
  636. read_buf
  637. + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
  638. return;
  639. default:
  640. ut_print_timestamp(stderr);
  641. fprintf(stderr,
  642. " InnoDB: unknown page type %lu,"
  643. " assuming FIL_PAGE_INDEX\n",
  644. fil_page_get_type(read_buf));
  645. /* fall through */
  646. case FIL_PAGE_INDEX:
  647. checksum = srv_use_checksums
  648. ? page_zip_calc_checksum(read_buf, zip_size)
  649. : BUF_NO_CHECKSUM_MAGIC;
  650. ut_print_timestamp(stderr);
  651. fprintf(stderr,
  652. " InnoDB: Compressed page checksum %lu,"
  653. " stored %lu\n"
  654. "InnoDB: Page lsn %lu %lu\n"
  655. "InnoDB: Page number (if stored"
  656. " to page already) %lu,\n"
  657. "InnoDB: space id (if stored"
  658. " to page already) %lu\n",
  659. (ulong) checksum,
  660. (ulong) mach_read_from_4(
  661. read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
  662. (ulong) mach_read_from_4(
  663. read_buf + FIL_PAGE_LSN),
  664. (ulong) mach_read_from_4(
  665. read_buf + (FIL_PAGE_LSN + 4)),
  666. (ulong) mach_read_from_4(
  667. read_buf + FIL_PAGE_OFFSET),
  668. (ulong) mach_read_from_4(
  669. read_buf
  670. + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
  671. return;
  672. case FIL_PAGE_TYPE_XDES:
  673. /* This is an uncompressed page. */
  674. break;
  675. }
  676. }
  677. checksum = srv_use_checksums
  678. ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
  679. checksum_32 = srv_use_checksums
  680. ? buf_calc_page_new_checksum_32(read_buf) : BUF_NO_CHECKSUM_MAGIC;
  681. old_checksum = srv_use_checksums
  682. ? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
  683. ut_print_timestamp(stderr);
  684. fprintf(stderr,
  685. " InnoDB: Page checksum %lu (32bit_calc: %lu), prior-to-4.0.14-form"
  686. " checksum %lu\n"
  687. "InnoDB: stored checksum %lu, prior-to-4.0.14-form"
  688. " stored checksum %lu\n"
  689. "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
  690. " at page end %lu\n"
  691. "InnoDB: Page number (if stored to page already) %lu,\n"
  692. "InnoDB: space id (if created with >= MySQL-4.1.1"
  693. " and stored already) %lu\n",
  694. (ulong) checksum, (ulong) checksum_32, (ulong) old_checksum,
  695. (ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
  696. (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
  697. - FIL_PAGE_END_LSN_OLD_CHKSUM),
  698. (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
  699. (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
  700. (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
  701. - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
  702. (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
  703. (ulong) mach_read_from_4(read_buf
  704. + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
  705. #ifndef UNIV_HOTBACKUP
  706. if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
  707. == TRX_UNDO_INSERT) {
  708. fprintf(stderr,
  709. "InnoDB: Page may be an insert undo log page\n");
  710. } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
  711. + TRX_UNDO_PAGE_TYPE)
  712. == TRX_UNDO_UPDATE) {
  713. fprintf(stderr,
  714. "InnoDB: Page may be an update undo log page\n");
  715. }
  716. #endif /* !UNIV_HOTBACKUP */
  717. switch (fil_page_get_type(read_buf)) {
  718. index_id_t index_id;
  719. case FIL_PAGE_INDEX:
  720. index_id = btr_page_get_index_id(read_buf);
  721. fprintf(stderr,
  722. "InnoDB: Page may be an index page where"
  723. " index id is %llu\n",
  724. (ullint) index_id);
  725. #ifndef UNIV_HOTBACKUP
  726. index = dict_index_find_on_id_low(index_id);
  727. if (index) {
  728. fputs("InnoDB: (", stderr);
  729. dict_index_name_print(stderr, NULL, index);
  730. fputs(")\n", stderr);
  731. }
  732. #endif /* !UNIV_HOTBACKUP */
  733. break;
  734. case FIL_PAGE_INODE:
  735. fputs("InnoDB: Page may be an 'inode' page\n", stderr);
  736. break;
  737. case FIL_PAGE_IBUF_FREE_LIST:
  738. fputs("InnoDB: Page may be an insert buffer free list page\n",
  739. stderr);
  740. break;
  741. case FIL_PAGE_TYPE_ALLOCATED:
  742. fputs("InnoDB: Page may be a freshly allocated page\n",
  743. stderr);
  744. break;
  745. case FIL_PAGE_IBUF_BITMAP:
  746. fputs("InnoDB: Page may be an insert buffer bitmap page\n",
  747. stderr);
  748. break;
  749. case FIL_PAGE_TYPE_SYS:
  750. fputs("InnoDB: Page may be a system page\n",
  751. stderr);
  752. break;
  753. case FIL_PAGE_TYPE_TRX_SYS:
  754. fputs("InnoDB: Page may be a transaction system page\n",
  755. stderr);
  756. break;
  757. case FIL_PAGE_TYPE_FSP_HDR:
  758. fputs("InnoDB: Page may be a file space header page\n",
  759. stderr);
  760. break;
  761. case FIL_PAGE_TYPE_XDES:
  762. fputs("InnoDB: Page may be an extent descriptor page\n",
  763. stderr);
  764. break;
  765. case FIL_PAGE_TYPE_BLOB:
  766. fputs("InnoDB: Page may be a BLOB page\n",
  767. stderr);
  768. break;
  769. case FIL_PAGE_TYPE_ZBLOB:
  770. case FIL_PAGE_TYPE_ZBLOB2:
  771. fputs("InnoDB: Page may be a compressed BLOB page\n",
  772. stderr);
  773. break;
  774. }
  775. ut_ad(flags & BUF_PAGE_PRINT_NO_CRASH);
  776. }
  777. #ifndef UNIV_HOTBACKUP
  778. # ifdef PFS_GROUP_BUFFER_SYNC
  779. /********************************************************************//**
  780. This function registers mutexes and rwlocks in buffer blocks with
  781. performance schema. If PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER is
  782. defined to be a value less than chunk->size, then only mutexes
  783. and rwlocks in the first PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER
  784. blocks are registered. */
  785. static
  786. void
  787. pfs_register_buffer_block(
  788. /*======================*/
  789. buf_chunk_t* chunk) /*!< in/out: chunk of buffers */
  790. {
  791. ulint i;
  792. ulint num_to_register;
  793. buf_block_t* block;
  794. block = chunk->blocks;
  795. num_to_register = ut_min(chunk->size,
  796. PFS_MAX_BUFFER_MUTEX_LOCK_REGISTER);
  797. for (i = 0; i < num_to_register; i++) {
  798. mutex_t* mutex;
  799. rw_lock_t* rwlock;
  800. # ifdef UNIV_PFS_MUTEX
  801. mutex = &block->mutex;
  802. ut_a(!mutex->pfs_psi);
  803. mutex->pfs_psi = (PSI_server)
  804. ? PSI_server->init_mutex(buffer_block_mutex_key, mutex)
  805. : NULL;
  806. # endif /* UNIV_PFS_MUTEX */
  807. # ifdef UNIV_PFS_RWLOCK
  808. rwlock = &block->lock;
  809. ut_a(!rwlock->pfs_psi);
  810. rwlock->pfs_psi = (PSI_server)
  811. ? PSI_server->init_rwlock(buf_block_lock_key, rwlock)
  812. : NULL;
  813. # ifdef UNIV_SYNC_DEBUG
  814. rwlock = &block->debug_latch;
  815. ut_a(!rwlock->pfs_psi);
  816. rwlock->pfs_psi = (PSI_server)
  817. ? PSI_server->init_rwlock(buf_block_debug_latch_key,
  818. rwlock)
  819. : NULL;
  820. # endif /* UNIV_SYNC_DEBUG */
  821. # endif /* UNIV_PFS_RWLOCK */
  822. block++;
  823. }
  824. }
  825. # endif /* PFS_GROUP_BUFFER_SYNC */
  826. /********************************************************************//**
  827. Initializes a buffer control block when the buf_pool is created. */
  828. static
  829. void
  830. buf_block_init(
  831. /*===========*/
  832. buf_pool_t* buf_pool, /*!< in: buffer pool instance */
  833. buf_block_t* block, /*!< in: pointer to control block */
  834. byte* frame) /*!< in: pointer to buffer frame */
  835. {
  836. UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
  837. block->frame = frame;
  838. block->page.buf_pool_index = buf_pool_index(buf_pool);
  839. block->page.state = BUF_BLOCK_NOT_USED;
  840. block->page.buf_fix_count = 0;
  841. block->page.io_fix = BUF_IO_NONE;
  842. block->modify_clock = 0;
  843. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  844. block->page.file_page_was_freed = FALSE;
  845. #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
  846. block->check_index_page_at_flush = FALSE;
  847. block->index = NULL;
  848. #ifdef UNIV_DEBUG
  849. block->page.in_page_hash = FALSE;
  850. block->page.in_zip_hash = FALSE;
  851. block->page.in_flush_list = FALSE;
  852. block->page.in_free_list = FALSE;
  853. #endif /* UNIV_DEBUG */
  854. block->page.flush_list.prev = NULL;
  855. block->page.flush_list.next = NULL;
  856. block->page.zip_list.prev = NULL;
  857. block->page.zip_list.next = NULL;
  858. block->page.in_LRU_list = FALSE;
  859. block->in_unzip_LRU_list = FALSE;
  860. #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
  861. block->n_pointers = 0;
  862. #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
  863. page_zip_des_init(&block->page.zip);
  864. #if defined PFS_SKIP_BUFFER_MUTEX_RWLOCK || defined PFS_GROUP_BUFFER_SYNC
  865. /* If PFS_SKIP_BUFFER_MUTEX_RWLOCK is defined, skip registration
  866. of buffer block mutex/rwlock with performance schema. If
  867. PFS_GROUP_BUFFER_SYNC is defined, skip the registration
  868. since buffer block mutex/rwlock will be registered later in
  869. pfs_register_buffer_block() */
  870. mutex_create(PFS_NOT_INSTRUMENTED, &block->mutex, SYNC_BUF_BLOCK);
  871. rw_lock_create(PFS_NOT_INSTRUMENTED, &block->lock, SYNC_LEVEL_VARYING);
  872. # ifdef UNIV_SYNC_DEBUG
  873. rw_lock_create(PFS_NOT_INSTRUMENTED,
  874. &block->debug_latch, SYNC_NO_ORDER_CHECK);
  875. # endif /* UNIV_SYNC_DEBUG */
  876. #else /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
  877. mutex_create(buffer_block_mutex_key, &block->mutex, SYNC_BUF_BLOCK);
  878. rw_lock_create(buf_block_lock_key, &block->lock, SYNC_LEVEL_VARYING);
  879. # ifdef UNIV_SYNC_DEBUG
  880. rw_lock_create(buf_block_debug_latch_key,
  881. &block->debug_latch, SYNC_NO_ORDER_CHECK);
  882. # endif /* UNIV_SYNC_DEBUG */
  883. #endif /* PFS_SKIP_BUFFER_MUTEX_RWLOCK || PFS_GROUP_BUFFER_SYNC */
  884. ut_ad(rw_lock_validate(&(block->lock)));
  885. }
  886. /********************************************************************//**
  887. Allocates a chunk of buffer frames.
  888. @return chunk, or NULL on failure */
  889. static
  890. buf_chunk_t*
  891. buf_chunk_init(
  892. /*===========*/
  893. buf_pool_t* buf_pool, /*!< in: buffer pool instance */
  894. buf_chunk_t* chunk, /*!< out: chunk of buffers */
  895. ulint mem_size, /*!< in: requested size in bytes */
  896. ibool populate) /*!< in: virtual page preallocation */
  897. {
  898. buf_block_t* block;
  899. byte* frame;
  900. ulint i;
  901. ulint size_target;
  902. /* Round down to a multiple of page size,
  903. although it already should be. */
  904. mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
  905. size_target = (mem_size / UNIV_PAGE_SIZE) - 1;
  906. /* Reserve space for the block descriptors. */
  907. mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
  908. + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
  909. chunk->mem_size = mem_size;
  910. chunk->mem = os_mem_alloc_large(&chunk->mem_size, populate);
  911. if (UNIV_UNLIKELY(chunk->mem == NULL)) {
  912. return(NULL);
  913. }
  914. /* Allocate the block descriptors from
  915. the start of the memory block. */
  916. chunk->blocks = chunk->mem;
  917. /* Align a pointer to the first frame. Note that when
  918. os_large_page_size is smaller than UNIV_PAGE_SIZE,
  919. we may allocate one fewer block than requested. When
  920. it is bigger, we may allocate more blocks than requested. */
  921. frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
  922. chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
  923. - (frame != chunk->mem);
  924. /* Subtract the space needed for block descriptors. */
  925. {
  926. ulint size = chunk->size;
  927. while (frame < (byte*) (chunk->blocks + size)) {
  928. frame += UNIV_PAGE_SIZE;
  929. size--;
  930. }
  931. chunk->size = size;
  932. }
  933. if (chunk->size > size_target) {
  934. chunk->size = size_target;
  935. }
  936. /* Init block structs and assign frames for them. Then we
  937. assign the frames to the first blocks (we already mapped the
  938. memory above). */
  939. block = chunk->blocks;
  940. for (i = chunk->size; i--; ) {
  941. buf_block_init(buf_pool, block, frame);
  942. UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
  943. /* Add the block to the free list */
  944. mutex_enter(&buf_pool->free_list_mutex);
  945. UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
  946. ut_d(block->page.in_free_list = TRUE);
  947. mutex_exit(&buf_pool->free_list_mutex);
  948. ut_ad(buf_pool_from_block(block) == buf_pool);
  949. block++;
  950. frame += UNIV_PAGE_SIZE;
  951. }
  952. #ifdef PFS_GROUP_BUFFER_SYNC
  953. pfs_register_buffer_block(chunk);
  954. #endif
  955. return(chunk);
  956. }
  957. #ifdef UNIV_DEBUG
  958. /*********************************************************************//**
  959. Finds a block in the given buffer chunk that points to a
  960. given compressed page.
  961. @return buffer block pointing to the compressed page, or NULL */
  962. static
  963. buf_block_t*
  964. buf_chunk_contains_zip(
  965. /*===================*/
  966. buf_chunk_t* chunk, /*!< in: chunk being checked */
  967. const void* data) /*!< in: pointer to compressed page */
  968. {
  969. buf_block_t* block;
  970. ulint i;
  971. block = chunk->blocks;
  972. for (i = chunk->size; i--; block++) {
  973. if (block->page.zip.data == data) {
  974. return(block);
  975. }
  976. }
  977. return(NULL);
  978. }
  979. /*********************************************************************//**
  980. Finds a block in the buffer pool that points to a
  981. given compressed page.
  982. @return buffer block pointing to the compressed page, or NULL */
  983. UNIV_INTERN
  984. buf_block_t*
  985. buf_pool_contains_zip(
  986. /*==================*/
  987. buf_pool_t* buf_pool, /*!< in: buffer pool instance */
  988. const void* data) /*!< in: pointer to compressed page */
  989. {
  990. ulint n;
  991. buf_chunk_t* chunk = buf_pool->chunks;
  992. ut_ad(buf_pool);
  993. //ut_ad(buf_pool_mutex_own(buf_pool));
  994. ut_ad(mutex_own(&buf_pool->zip_free_mutex));
  995. for (n = buf_pool->n_chunks; n--; chunk++) {
  996. buf_block_t* block = buf_chunk_contains_zip(chunk, data);
  997. if (block) {
  998. return(block);
  999. }
  1000. }
  1001. return(NULL);
  1002. }
  1003. #endif /* UNIV_DEBUG */
  1004. /*********************************************************************//**
  1005. Checks that all file pages in the buffer chunk are in a replaceable state.
  1006. @return address of a non-free block, or NULL if all freed */
  1007. static
  1008. const buf_block_t*
  1009. buf_chunk_not_freed(
  1010. /*================*/
  1011. buf_chunk_t* chunk) /*!< in: chunk being checked */
  1012. {
  1013. buf_block_t* block;
  1014. ulint i;
  1015. block = chunk->blocks;
  1016. for (i = chunk->size; i--; block++) {
  1017. ibool ready;
  1018. switch (buf_block_get_state(block)) {
  1019. case BUF_BLOCK_ZIP_FREE:
  1020. case BUF_BLOCK_ZIP_PAGE:
  1021. case BUF_BLOCK_ZIP_DIRTY:
  1022. /* The uncompressed buffer pool should never
  1023. contain compressed block descriptors. */
  1024. ut_error;
  1025. break;
  1026. case BUF_BLOCK_NOT_USED:
  1027. case BUF_BLOCK_READY_FOR_USE:
  1028. case BUF_BLOCK_MEMORY:
  1029. case BUF_BLOCK_REMOVE_HASH:
  1030. /* Skip blocks that are not being used for
  1031. file pages. */
  1032. break;
  1033. case BUF_BLOCK_FILE_PAGE:
  1034. mutex_enter(&block->mutex);
  1035. ready = buf_flush_ready_for_replace(&block->page);
  1036. mutex_exit(&block->mutex);
  1037. if (block->page.is_corrupt) {
  1038. /* corrupt page may remain, it can be skipped */
  1039. break;
  1040. }
  1041. if (!ready) {
  1042. return(block);
  1043. }
  1044. break;
  1045. }
  1046. }
  1047. return(NULL);
  1048. }
  1049. /********************************************************************//**
  1050. Set buffer pool size variables after resizing it */
  1051. static
  1052. void
  1053. buf_pool_set_sizes(void)
  1054. /*====================*/
  1055. {
  1056. ulint i;
  1057. ulint curr_size = 0;
  1058. buf_pool_mutex_enter_all();
  1059. for (i = 0; i < srv_buf_pool_instances; i++) {
  1060. buf_pool_t* buf_pool;
  1061. buf_pool = buf_pool_from_array(i);
  1062. curr_size += buf_pool->curr_pool_size;
  1063. }
  1064. srv_buf_pool_curr_size = curr_size;
  1065. srv_buf_pool_old_size = srv_buf_pool_size;
  1066. buf_pool_mutex_exit_all();
  1067. }
  1068. /********************************************************************//**
  1069. Initialize a buffer pool instance.
  1070. @return DB_SUCCESS if all goes well. */
  1071. UNIV_INTERN
  1072. ulint
  1073. buf_pool_init_instance(
  1074. /*===================*/
  1075. buf_pool_t* buf_pool, /*!< in: buffer pool instance */
  1076. ulint buf_pool_size, /*!< in: size in bytes */
  1077. ibool populate, /*!< in: virtual page preallocation */
  1078. ulint instance_no) /*!< in: id of the instance */
  1079. {
  1080. ulint i;
  1081. buf_chunk_t* chunk;
  1082. /* 1. Initialize general fields
  1083. ------------------------------- */
  1084. mutex_create(buf_pool_mutex_key,
  1085. &buf_pool->mutex, SYNC_BUF_POOL);
  1086. mutex_create(buf_pool_LRU_list_mutex_key,
  1087. &buf_pool->LRU_list_mutex, SYNC_BUF_LRU_LIST);
  1088. rw_lock_create(buf_pool_page_hash_key,
  1089. &buf_pool->page_hash_latch, SYNC_BUF_PAGE_HASH);
  1090. mutex_create(buf_pool_free_list_mutex_key,
  1091. &buf_pool->free_list_mutex, SYNC_BUF_FREE_LIST);
  1092. mutex_create(buf_pool_zip_free_mutex_key,
  1093. &buf_pool->zip_free_mutex, SYNC_BUF_ZIP_FREE);
  1094. mutex_create(buf_pool_zip_hash_mutex_key,
  1095. &buf_pool->zip_hash_mutex, SYNC_BUF_ZIP_HASH);
  1096. mutex_create(buf_pool_zip_mutex_key,
  1097. &buf_pool->zip_mutex, SYNC_BUF_BLOCK);
  1098. mutex_enter(&buf_pool->LRU_list_mutex);
  1099. rw_lock_x_lock(&buf_pool->page_hash_latch);
  1100. buf_pool_mutex_enter(buf_pool);
  1101. if (buf_pool_size > 0) {
  1102. buf_pool->n_chunks = 1;
  1103. buf_pool->chunks = chunk = mem_zalloc(sizeof *chunk);
  1104. UT_LIST_INIT(buf_pool->free);
  1105. if (!buf_chunk_init(buf_pool, chunk, buf_pool_size, populate)) {
  1106. mem_free(chunk);
  1107. mem_free(buf_pool);
  1108. mutex_exit(&buf_pool->LRU_list_mutex);
  1109. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  1110. buf_pool_mutex_exit(buf_pool);
  1111. return(DB_ERROR);
  1112. }
  1113. buf_pool->instance_no = instance_no;
  1114. buf_pool->old_pool_size = buf_pool_size;
  1115. buf_pool->curr_size = chunk->size;
  1116. buf_pool->curr_pool_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
  1117. buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
  1118. buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
  1119. buf_pool->last_printout_time = ut_time();
  1120. }
  1121. /* 2. Initialize flushing fields
  1122. -------------------------------- */
  1123. mutex_create(flush_list_mutex_key, &buf_pool->flush_list_mutex,
  1124. SYNC_BUF_FLUSH_LIST);
  1125. for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
  1126. buf_pool->no_flush[i] = os_event_create(NULL);
  1127. }
  1128. /* 3. Initialize LRU fields
  1129. --------------------------- */
  1130. /* All fields are initialized by mem_zalloc(). */
  1131. mutex_exit(&buf_pool->LRU_list_mutex);
  1132. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  1133. buf_pool_mutex_exit(buf_pool);
  1134. return(DB_SUCCESS);
  1135. }
  1136. /********************************************************************//**
  1137. free one buffer pool instance */
  1138. static
  1139. void
  1140. buf_pool_free_instance(
  1141. /*===================*/
  1142. buf_pool_t* buf_pool) /* in,own: buffer pool instance
  1143. to free */
  1144. {
  1145. buf_chunk_t* chunk;
  1146. buf_chunk_t* chunks;
  1147. buf_page_t* bpage;
  1148. bpage = UT_LIST_GET_LAST(buf_pool->LRU);
  1149. while (bpage != NULL) {
  1150. buf_page_t* prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
  1151. enum buf_page_state state = buf_page_get_state(bpage);
  1152. ut_ad(buf_page_in_file(bpage));
  1153. ut_ad(bpage->in_LRU_list);
  1154. if (state != BUF_BLOCK_FILE_PAGE) {
  1155. /* We must not have any dirty block except
  1156. when doing a fast shutdown. */
  1157. ut_ad(state == BUF_BLOCK_ZIP_PAGE
  1158. || srv_fast_shutdown == 2);
  1159. buf_page_free_descriptor(bpage);
  1160. }
  1161. bpage = prev_bpage;
  1162. }
  1163. chunks = buf_pool->chunks;
  1164. chunk = chunks + buf_pool->n_chunks;
  1165. while (--chunk >= chunks) {
  1166. os_mem_free_large(chunk->mem, chunk->mem_size);
  1167. }
  1168. mem_free(buf_pool->chunks);
  1169. hash_table_free(buf_pool->page_hash);
  1170. hash_table_free(buf_pool->zip_hash);
  1171. }
  1172. /********************************************************************//**
  1173. Creates the buffer pool.
  1174. @return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
  1175. UNIV_INTERN
  1176. ulint
  1177. buf_pool_init(
  1178. /*==========*/
  1179. ulint total_size, /*!< in: size of the total pool in bytes */
  1180. ibool populate, /*!< in: virtual page preallocation */
  1181. ulint n_instances) /*!< in: number of instances */
  1182. {
  1183. ulint i;
  1184. const ulint size = total_size / n_instances;
  1185. ut_ad(n_instances > 0);
  1186. ut_ad(n_instances <= MAX_BUFFER_POOLS);
  1187. ut_ad(n_instances == srv_buf_pool_instances);
  1188. /* We create an extra buffer pool instance, this instance is used
  1189. for flushing the flush lists, to keep track of n_flush for all
  1190. the buffer pools and also used as a waiting object during flushing. */
  1191. buf_pool_ptr = mem_zalloc(n_instances * sizeof *buf_pool_ptr);
  1192. for (i = 0; i < n_instances; i++) {
  1193. buf_pool_t* ptr = &buf_pool_ptr[i];
  1194. if (buf_pool_init_instance(ptr, size, populate, i) != DB_SUCCESS) {
  1195. /* Free all the instances created so far. */
  1196. buf_pool_free(i);
  1197. return(DB_ERROR);
  1198. }
  1199. }
  1200. buf_pool_set_sizes();
  1201. buf_LRU_old_ratio_update(100 * 3/ 8, FALSE);
  1202. btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
  1203. return(DB_SUCCESS);
  1204. }
  1205. /********************************************************************//**
  1206. Frees the buffer pool at shutdown. This must not be invoked before
  1207. freeing all mutexes. */
  1208. UNIV_INTERN
  1209. void
  1210. buf_pool_free(
  1211. /*==========*/
  1212. ulint n_instances) /*!< in: numbere of instances to free */
  1213. {
  1214. ulint i;
  1215. for (i = 0; i < n_instances; i++) {
  1216. buf_pool_free_instance(buf_pool_from_array(i));
  1217. }
  1218. mem_free(buf_pool_ptr);
  1219. buf_pool_ptr = NULL;
  1220. }
  1221. /********************************************************************//**
  1222. Clears the adaptive hash index on all pages in the buffer pool. */
  1223. UNIV_INTERN
  1224. void
  1225. buf_pool_clear_hash_index(void)
  1226. /*===========================*/
  1227. {
  1228. ulint p;
  1229. #ifdef UNIV_SYNC_DEBUG
  1230. ulint j;
  1231. for (j = 0; j < btr_search_index_num; j++) {
  1232. ut_ad(rw_lock_own(&btr_search_latch_arr[j], RW_LOCK_EX));
  1233. }
  1234. #endif /* UNIV_SYNC_DEBUG */
  1235. ut_ad(!btr_search_enabled);
  1236. for (p = 0; p < srv_buf_pool_instances; p++) {
  1237. buf_pool_t* buf_pool = buf_pool_from_array(p);
  1238. buf_chunk_t* chunks = buf_pool->chunks;
  1239. buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
  1240. while (--chunk >= chunks) {
  1241. buf_block_t* block = chunk->blocks;
  1242. ulint i = chunk->size;
  1243. for (; i--; block++) {
  1244. dict_index_t* index = block->index;
  1245. /* We can set block->index = NULL
  1246. when we have an x-latch on btr_search_latch;
  1247. see the comment in buf0buf.h */
  1248. if (!index) {
  1249. /* Not hashed */
  1250. continue;
  1251. }
  1252. block->index = NULL;
  1253. # if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
  1254. block->n_pointers = 0;
  1255. # endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
  1256. }
  1257. }
  1258. }
  1259. }
  1260. /********************************************************************//**
  1261. Relocate a buffer control block. Relocates the block on the LRU list
  1262. and in buf_pool->page_hash. Does not relocate bpage->list.
  1263. The caller must take care of relocating bpage->list. */
  1264. UNIV_INTERN
  1265. void
  1266. buf_relocate(
  1267. /*=========*/
  1268. buf_page_t* bpage, /*!< in/out: control block being relocated;
  1269. buf_page_get_state(bpage) must be
  1270. BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
  1271. buf_page_t* dpage) /*!< in/out: destination control block */
  1272. {
  1273. buf_page_t* b;
  1274. ulint fold;
  1275. buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
  1276. //ut_ad(buf_pool_mutex_own(buf_pool));
  1277. ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
  1278. #ifdef UNIV_SYNC_DEBUG
  1279. ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
  1280. #endif
  1281. ut_ad(mutex_own(buf_page_get_mutex(bpage)));
  1282. ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
  1283. ut_a(bpage->buf_fix_count == 0);
  1284. ut_ad(bpage->in_LRU_list);
  1285. ut_ad(!bpage->in_zip_hash);
  1286. ut_ad(bpage->in_page_hash);
  1287. ut_ad(bpage == buf_page_hash_get(buf_pool,
  1288. bpage->space, bpage->offset));
  1289. ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
  1290. #ifdef UNIV_DEBUG
  1291. switch (buf_page_get_state(bpage)) {
  1292. case BUF_BLOCK_ZIP_FREE:
  1293. case BUF_BLOCK_NOT_USED:
  1294. case BUF_BLOCK_READY_FOR_USE:
  1295. case BUF_BLOCK_FILE_PAGE:
  1296. case BUF_BLOCK_MEMORY:
  1297. case BUF_BLOCK_REMOVE_HASH:
  1298. ut_error;
  1299. case BUF_BLOCK_ZIP_DIRTY:
  1300. case BUF_BLOCK_ZIP_PAGE:
  1301. break;
  1302. }
  1303. #endif /* UNIV_DEBUG */
  1304. memcpy(dpage, bpage, sizeof *dpage);
  1305. bpage->in_LRU_list = FALSE;
  1306. ut_d(bpage->in_page_hash = FALSE);
  1307. /* relocate buf_pool->LRU */
  1308. b = UT_LIST_GET_PREV(LRU, bpage);
  1309. UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
  1310. if (b) {
  1311. UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
  1312. } else {
  1313. UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
  1314. }
  1315. if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
  1316. buf_pool->LRU_old = dpage;
  1317. #ifdef UNIV_LRU_DEBUG
  1318. /* buf_pool->LRU_old must be the first item in the LRU list
  1319. whose "old" flag is set. */
  1320. ut_a(buf_pool->LRU_old->old);
  1321. ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
  1322. || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
  1323. ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
  1324. || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
  1325. } else {
  1326. /* Check that the "old" flag is consistent in
  1327. the block and its neighbours. */
  1328. buf_page_set_old(dpage, buf_page_is_old(dpage));
  1329. #endif /* UNIV_LRU_DEBUG */
  1330. }
  1331. ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
  1332. ut_ad(ut_list_node_313->in_LRU_list)));
  1333. /* relocate buf_pool->page_hash */
  1334. fold = buf_page_address_fold(bpage->space, bpage->offset);
  1335. HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
  1336. HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
  1337. }
  1338. /********************************************************************//**
  1339. Determine if a block is a sentinel for a buffer pool watch.
  1340. @return TRUE if a sentinel for a buffer pool watch, FALSE if not */
  1341. UNIV_INTERN
  1342. ibool
  1343. buf_pool_watch_is_sentinel(
  1344. /*=======================*/
  1345. buf_pool_t* buf_pool, /*!< buffer pool instance */
  1346. const buf_page_t* bpage) /*!< in: block */
  1347. {
  1348. ut_ad(buf_page_in_file(bpage));
  1349. if (bpage < &buf_pool->watch[0]
  1350. || bpage >= &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
  1351. ut_ad(buf_page_get_state(bpage) != BUF_BLOCK_ZIP_PAGE
  1352. || bpage->zip.data != NULL);
  1353. return(FALSE);
  1354. }
  1355. ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_PAGE);
  1356. ut_ad(!bpage->in_zip_hash);
  1357. ut_ad(bpage->in_page_hash);
  1358. ut_ad(bpage->zip.data == NULL);
  1359. ut_ad(bpage->buf_fix_count > 0);
  1360. return(TRUE);
  1361. }
  1362. /****************************************************************//**
  1363. Add watch for the given page to be read in. Caller must have the buffer pool
  1364. mutex reserved.
  1365. @return NULL if watch set, block if the page is in the buffer pool */
  1366. UNIV_INTERN
  1367. buf_page_t*
  1368. buf_pool_watch_set(
  1369. /*===============*/
  1370. ulint space, /*!< in: space id */
  1371. ulint offset, /*!< in: page number */
  1372. ulint fold) /*!< in: buf_page_address_fold(space, offset) */
  1373. {
  1374. buf_page_t* bpage;
  1375. ulint i;
  1376. buf_pool_t* buf_pool = buf_pool_get(space, offset);
  1377. mutex_t* block_mutex;
  1378. //ut_ad(buf_pool_mutex_own(buf_pool));
  1379. rw_lock_x_lock(&buf_pool->page_hash_latch);
  1380. bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
  1381. if (UNIV_LIKELY_NULL(bpage)) {
  1382. block_mutex = buf_page_get_mutex_enter(bpage);
  1383. ut_a(block_mutex);
  1384. if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) {
  1385. /* The page was loaded meanwhile. */
  1386. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  1387. return(bpage);
  1388. }
  1389. /* Add to an existing watch. */
  1390. bpage->buf_fix_count++;
  1391. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  1392. mutex_exit(block_mutex);
  1393. return(NULL);
  1394. }
  1395. /* buf_pool->watch is protected by zip_mutex for now */
  1396. mutex_enter(&buf_pool->zip_mutex);
  1397. for (i = 0; i < BUF_POOL_WATCH_SIZE; i++) {
  1398. bpage = &buf_pool->watch[i];
  1399. ut_ad(bpage->access_time == 0);
  1400. ut_ad(bpage->newest_modification == 0);
  1401. ut_ad(bpage->oldest_modification == 0);
  1402. ut_ad(bpage->zip.data == NULL);
  1403. ut_ad(!bpage->in_zip_hash);
  1404. switch (bpage->state) {
  1405. case BUF_BLOCK_POOL_WATCH:
  1406. ut_ad(!bpage->in_page_hash);
  1407. ut_ad(bpage->buf_fix_count == 0);
  1408. /* bpage is pointing to buf_pool->watch[],
  1409. which is protected by buf_pool->mutex.
  1410. Normally, buf_page_t objects are protected by
  1411. buf_block_t::mutex or buf_pool->zip_mutex or both. */
  1412. bpage->state = BUF_BLOCK_ZIP_PAGE;
  1413. bpage->space = space;
  1414. bpage->offset = offset;
  1415. bpage->buf_fix_count = 1;
  1416. bpage->buf_pool_index = buf_pool_index(buf_pool);
  1417. ut_d(bpage->in_page_hash = TRUE);
  1418. HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
  1419. fold, bpage);
  1420. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  1421. mutex_exit(&buf_pool->zip_mutex);
  1422. return(NULL);
  1423. case BUF_BLOCK_ZIP_PAGE:
  1424. ut_ad(bpage->in_page_hash);
  1425. ut_ad(bpage->buf_fix_count > 0);
  1426. break;
  1427. default:
  1428. ut_error;
  1429. }
  1430. }
  1431. /* Allocation failed. Either the maximum number of purge
  1432. threads should never exceed BUF_POOL_WATCH_SIZE, or this code
  1433. should be modified to return a special non-NULL value and the
  1434. caller should purge the record directly. */
  1435. ut_error;
  1436. /* Fix compiler warning */
  1437. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  1438. mutex_exit(&buf_pool->zip_mutex);
  1439. return(NULL);
  1440. }
  1441. /****************************************************************//**
  1442. Remove the sentinel block for the watch before replacing it with a real block.
  1443. buf_page_watch_clear() or buf_page_watch_occurred() will notice that
  1444. the block has been replaced with the real block.
  1445. @return reference count, to be added to the replacement block */
  1446. static
  1447. void
  1448. buf_pool_watch_remove(
  1449. /*==================*/
  1450. buf_pool_t* buf_pool, /*!< buffer pool instance */
  1451. ulint fold, /*!< in: buf_page_address_fold(
  1452. space, offset) */
  1453. buf_page_t* watch) /*!< in/out: sentinel for watch */
  1454. {
  1455. //ut_ad(buf_pool_mutex_own(buf_pool));
  1456. #ifdef UNIV_SYNC_DEBUG
  1457. ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
  1458. #endif
  1459. ut_ad(mutex_own(&buf_pool->zip_mutex)); /* for now */
  1460. HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, watch);
  1461. ut_d(watch->in_page_hash = FALSE);
  1462. watch->buf_fix_count = 0;
  1463. watch->state = BUF_BLOCK_POOL_WATCH;
  1464. }
  1465. /****************************************************************//**
  1466. Stop watching if the page has been read in.
  1467. buf_pool_watch_set(space,offset) must have returned NULL before. */
  1468. UNIV_INTERN
  1469. void
  1470. buf_pool_watch_unset(
  1471. /*=================*/
  1472. ulint space, /*!< in: space id */
  1473. ulint offset) /*!< in: page number */
  1474. {
  1475. buf_page_t* bpage;
  1476. buf_pool_t* buf_pool = buf_pool_get(space, offset);
  1477. ulint fold = buf_page_address_fold(space, offset);
  1478. //buf_pool_mutex_enter(buf_pool);
  1479. rw_lock_x_lock(&buf_pool->page_hash_latch);
  1480. bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
  1481. /* The page must exist because buf_pool_watch_set()
  1482. increments buf_fix_count. */
  1483. ut_a(bpage);
  1484. if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) {
  1485. mutex_t* mutex = buf_page_get_mutex_enter(bpage);
  1486. ut_a(bpage->buf_fix_count > 0);
  1487. bpage->buf_fix_count--;
  1488. mutex_exit(mutex);
  1489. } else {
  1490. mutex_enter(&buf_pool->zip_mutex);
  1491. ut_a(bpage->buf_fix_count > 0);
  1492. if (UNIV_LIKELY(!--bpage->buf_fix_count)) {
  1493. buf_pool_watch_remove(buf_pool, fold, bpage);
  1494. }
  1495. mutex_exit(&buf_pool->zip_mutex);
  1496. }
  1497. //buf_pool_mutex_exit(buf_pool);
  1498. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  1499. }
  1500. /****************************************************************//**
  1501. Check if the page has been read in.
  1502. This may only be called after buf_pool_watch_set(space,offset)
  1503. has returned NULL and before invoking buf_pool_watch_unset(space,offset).
  1504. @return FALSE if the given page was not read in, TRUE if it was */
  1505. UNIV_INTERN
  1506. ibool
  1507. buf_pool_watch_occurred(
  1508. /*====================*/
  1509. ulint space, /*!< in: space id */
  1510. ulint offset) /*!< in: page number */
  1511. {
  1512. ibool ret;
  1513. buf_page_t* bpage;
  1514. buf_pool_t* buf_pool = buf_pool_get(space, offset);
  1515. ulint fold = buf_page_address_fold(space, offset);
  1516. //buf_pool_mutex_enter(buf_pool);
  1517. rw_lock_s_lock(&buf_pool->page_hash_latch);
  1518. bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
  1519. /* The page must exist because buf_pool_watch_set()
  1520. increments buf_fix_count. */
  1521. ut_a(bpage);
  1522. ret = !buf_pool_watch_is_sentinel(buf_pool, bpage);
  1523. //buf_pool_mutex_exit(buf_pool);
  1524. rw_lock_s_unlock(&buf_pool->page_hash_latch);
  1525. return(ret);
  1526. }
  1527. /********************************************************************//**
  1528. Moves a page to the start of the buffer pool LRU list. This high-level
  1529. function can be used to prevent an important page from slipping out of
  1530. the buffer pool. */
  1531. UNIV_INTERN
  1532. void
  1533. buf_page_make_young(
  1534. /*================*/
  1535. buf_page_t* bpage) /*!< in: buffer block of a file page */
  1536. {
  1537. buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
  1538. //buf_pool_mutex_enter(buf_pool);
  1539. mutex_enter(&buf_pool->LRU_list_mutex);
  1540. ut_a(buf_page_in_file(bpage));
  1541. buf_LRU_make_block_young(bpage);
  1542. //buf_pool_mutex_exit(buf_pool);
  1543. mutex_exit(&buf_pool->LRU_list_mutex);
  1544. }
  1545. /********************************************************************//**
  1546. Moves a page to the start of the buffer pool LRU list if it is too old.
  1547. This high-level function can be used to prevent an important page from
  1548. slipping out of the buffer pool. */
  1549. static
  1550. void
  1551. buf_page_make_young_if_needed(
  1552. /*==========================*/
  1553. buf_page_t* bpage) /*!< in/out: buffer block of a
  1554. file page */
  1555. {
  1556. #ifdef UNIV_DEBUG
  1557. buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
  1558. ut_ad(!buf_pool_mutex_own(buf_pool));
  1559. #endif /* UNIV_DEBUG */
  1560. ut_a(buf_page_in_file(bpage));
  1561. if (buf_page_peek_if_too_old(bpage)) {
  1562. buf_page_make_young(bpage);
  1563. }
  1564. }
  1565. /********************************************************************//**
  1566. Resets the check_index_page_at_flush field of a page if found in the buffer
  1567. pool. */
  1568. UNIV_INTERN
  1569. void
  1570. buf_reset_check_index_page_at_flush(
  1571. /*================================*/
  1572. ulint space, /*!< in: space id */
  1573. ulint offset) /*!< in: page number */
  1574. {
  1575. buf_block_t* block;
  1576. buf_pool_t* buf_pool = buf_pool_get(space, offset);
  1577. //buf_pool_mutex_enter(buf_pool);
  1578. rw_lock_s_lock(&buf_pool->page_hash_latch);
  1579. block = (buf_block_t*) buf_page_hash_get(buf_pool, space, offset);
  1580. if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
  1581. ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
  1582. block->check_index_page_at_flush = FALSE;
  1583. }
  1584. //buf_pool_mutex_exit(buf_pool);
  1585. rw_lock_s_unlock(&buf_pool->page_hash_latch);
  1586. }
  1587. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  1588. /********************************************************************//**
  1589. Sets file_page_was_freed TRUE if the page is found in the buffer pool.
  1590. This function should be called when we free a file page and want the
  1591. debug version to check that it is not accessed any more unless
  1592. reallocated.
  1593. @return control block if found in page hash table, otherwise NULL */
  1594. UNIV_INTERN
  1595. buf_page_t*
  1596. buf_page_set_file_page_was_freed(
  1597. /*=============================*/
  1598. ulint space, /*!< in: space id */
  1599. ulint offset) /*!< in: page number */
  1600. {
  1601. buf_page_t* bpage;
  1602. buf_pool_t* buf_pool = buf_pool_get(space, offset);
  1603. //buf_pool_mutex_enter(buf_pool);
  1604. rw_lock_s_lock(&buf_pool->page_hash_latch);
  1605. bpage = buf_page_hash_get(buf_pool, space, offset);
  1606. if (bpage) {
  1607. ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
  1608. /* bpage->file_page_was_freed can already hold
  1609. when this code is invoked from dict_drop_index_tree() */
  1610. bpage->file_page_was_freed = TRUE;
  1611. }
  1612. //buf_pool_mutex_exit(buf_pool);
  1613. rw_lock_s_unlock(&buf_pool->page_hash_latch);
  1614. return(bpage);
  1615. }
  1616. /********************************************************************//**
  1617. Sets file_page_was_freed FALSE if the page is found in the buffer pool.
  1618. This function should be called when we free a file page and want the
  1619. debug version to check that it is not accessed any more unless
  1620. reallocated.
  1621. @return control block if found in page hash table, otherwise NULL */
  1622. UNIV_INTERN
  1623. buf_page_t*
  1624. buf_page_reset_file_page_was_freed(
  1625. /*===============================*/
  1626. ulint space, /*!< in: space id */
  1627. ulint offset) /*!< in: page number */
  1628. {
  1629. buf_page_t* bpage;
  1630. buf_pool_t* buf_pool = buf_pool_get(space, offset);
  1631. //buf_pool_mutex_enter(buf_pool);
  1632. rw_lock_s_lock(&buf_pool->page_hash_latch);
  1633. bpage = buf_page_hash_get(buf_pool, space, offset);
  1634. if (bpage) {
  1635. ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
  1636. bpage->file_page_was_freed = FALSE;
  1637. }
  1638. //buf_pool_mutex_exit(buf_pool);
  1639. rw_lock_s_unlock(&buf_pool->page_hash_latch);
  1640. return(bpage);
  1641. }
  1642. #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
  1643. /********************************************************************//**
  1644. Get read access to a compressed page (usually of type
  1645. FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
  1646. The page must be released with buf_page_release_zip().
  1647. NOTE: the page is not protected by any latch. Mutual exclusion has to
  1648. be implemented at a higher level. In other words, all possible
  1649. accesses to a given page through this function must be protected by
  1650. the same set of mutexes or latches.
  1651. @return pointer to the block */
  1652. UNIV_INTERN
  1653. buf_page_t*
  1654. buf_page_get_zip(
  1655. /*=============*/
  1656. ulint space, /*!< in: space id */
  1657. ulint zip_size,/*!< in: compressed page size */
  1658. ulint offset) /*!< in: page number */
  1659. {
  1660. buf_page_t* bpage;
  1661. mutex_t* block_mutex;
  1662. ibool must_read;
  1663. trx_t* trx = NULL;
  1664. ulint sec;
  1665. ulint ms;
  1666. ib_uint64_t start_time;
  1667. ib_uint64_t finish_time;
  1668. buf_pool_t* buf_pool = buf_pool_get(space, offset);
  1669. if (UNIV_UNLIKELY(innobase_get_slow_log())) {
  1670. trx = innobase_get_trx();
  1671. }
  1672. buf_pool->stat.n_page_gets++;
  1673. for (;;) {
  1674. //buf_pool_mutex_enter(buf_pool);
  1675. lookup:
  1676. rw_lock_s_lock(&buf_pool->page_hash_latch);
  1677. bpage = buf_page_hash_get(buf_pool, space, offset);
  1678. if (bpage) {
  1679. ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
  1680. break;
  1681. }
  1682. /* Page not in buf_pool: needs to be read from file */
  1683. //buf_pool_mutex_exit(buf_pool);
  1684. rw_lock_s_unlock(&buf_pool->page_hash_latch);
  1685. buf_read_page(space, zip_size, offset, trx);
  1686. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  1687. ut_a(++buf_dbg_counter % 5771 || buf_validate());
  1688. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  1689. }
  1690. if (UNIV_UNLIKELY(!bpage->zip.data)) {
  1691. /* There is no compressed page. */
  1692. err_exit:
  1693. //buf_pool_mutex_exit(buf_pool);
  1694. rw_lock_s_unlock(&buf_pool->page_hash_latch);
  1695. return(NULL);
  1696. }
  1697. if (UNIV_UNLIKELY(bpage->is_corrupt && srv_pass_corrupt_table <= 1)) {
  1698. rw_lock_s_unlock(&buf_pool->page_hash_latch);
  1699. return(NULL);
  1700. }
  1701. block_mutex = buf_page_get_mutex_enter(bpage);
  1702. rw_lock_s_unlock(&buf_pool->page_hash_latch);
  1703. ut_ad(!buf_pool_watch_is_sentinel(buf_pool, bpage));
  1704. switch (buf_page_get_state(bpage)) {
  1705. case BUF_BLOCK_NOT_USED:
  1706. case BUF_BLOCK_READY_FOR_USE:
  1707. case BUF_BLOCK_MEMORY:
  1708. case BUF_BLOCK_REMOVE_HASH:
  1709. case BUF_BLOCK_ZIP_FREE:
  1710. if (block_mutex)
  1711. mutex_exit(block_mutex);
  1712. break;
  1713. case BUF_BLOCK_ZIP_PAGE:
  1714. case BUF_BLOCK_ZIP_DIRTY:
  1715. ut_a(block_mutex == &buf_pool->zip_mutex);
  1716. bpage->buf_fix_count++;
  1717. goto got_block;
  1718. case BUF_BLOCK_FILE_PAGE:
  1719. ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
  1720. /* release mutex to obey to latch-order */
  1721. mutex_exit(block_mutex);
  1722. /* get LRU_list_mutex for buf_LRU_free_block() */
  1723. mutex_enter(&buf_pool->LRU_list_mutex);
  1724. mutex_enter(block_mutex);
  1725. if (UNIV_UNLIKELY(bpage->space != space
  1726. || bpage->offset != offset
  1727. || !bpage->in_LRU_list
  1728. || !bpage->zip.data)) {
  1729. /* someone should interrupt, retry */
  1730. mutex_exit(&buf_pool->LRU_list_mutex);
  1731. mutex_exit(block_mutex);
  1732. goto lookup;
  1733. }
  1734. /* Discard the uncompressed page frame if possible. */
  1735. if (buf_LRU_free_block(bpage, FALSE, TRUE)) {
  1736. mutex_exit(&buf_pool->LRU_list_mutex);
  1737. mutex_exit(block_mutex);
  1738. goto lookup;
  1739. }
  1740. mutex_exit(&buf_pool->LRU_list_mutex);
  1741. buf_block_buf_fix_inc((buf_block_t*) bpage,
  1742. __FILE__, __LINE__);
  1743. goto got_block;
  1744. }
  1745. ut_error;
  1746. goto err_exit;
  1747. got_block:
  1748. must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
  1749. //buf_pool_mutex_exit(buf_pool);
  1750. buf_page_set_accessed(bpage);
  1751. mutex_exit(block_mutex);
  1752. buf_page_make_young_if_needed(bpage);
  1753. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  1754. ut_a(!bpage->file_page_was_freed);
  1755. #endif
  1756. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  1757. ut_a(++buf_dbg_counter % 5771 || buf_validate());
  1758. ut_a(bpage->buf_fix_count > 0);
  1759. ut_a(buf_page_in_file(bpage));
  1760. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  1761. if (must_read) {
  1762. /* Let us wait until the read operation
  1763. completes */
  1764. if (UNIV_UNLIKELY(trx && trx->take_stats))
  1765. {
  1766. ut_usectime(&sec, &ms);
  1767. start_time = (ib_uint64_t)sec * 1000000 + ms;
  1768. } else {
  1769. start_time = 0;
  1770. }
  1771. for (;;) {
  1772. enum buf_io_fix io_fix;
  1773. mutex_enter(block_mutex);
  1774. io_fix = buf_page_get_io_fix(bpage);
  1775. mutex_exit(block_mutex);
  1776. if (io_fix == BUF_IO_READ) {
  1777. os_thread_sleep(WAIT_FOR_READ);
  1778. } else {
  1779. break;
  1780. }
  1781. }
  1782. if (UNIV_UNLIKELY(start_time != 0))
  1783. {
  1784. ut_usectime(&sec, &ms);
  1785. finish_time = (ib_uint64_t)sec * 1000000 + ms;
  1786. trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
  1787. }
  1788. }
  1789. #ifdef UNIV_IBUF_COUNT_DEBUG
  1790. ut_a(ibuf_count_get(buf_page_get_space(bpage),
  1791. buf_page_get_page_no(bpage)) == 0);
  1792. #endif
  1793. return(bpage);
  1794. }
  1795. /********************************************************************//**
  1796. Initialize some fields of a control block. */
  1797. UNIV_INLINE
  1798. void
  1799. buf_block_init_low(
  1800. /*===============*/
  1801. buf_block_t* block) /*!< in: block to init */
  1802. {
  1803. block->check_index_page_at_flush = FALSE;
  1804. block->index = NULL;
  1805. block->n_hash_helps = 0;
  1806. block->n_fields = 1;
  1807. block->n_bytes = 0;
  1808. block->left_side = TRUE;
  1809. }
  1810. #endif /* !UNIV_HOTBACKUP */
  1811. /********************************************************************//**
  1812. Decompress a block.
  1813. @return TRUE if successful */
  1814. UNIV_INTERN
  1815. ibool
  1816. buf_zip_decompress(
  1817. /*===============*/
  1818. buf_block_t* block, /*!< in/out: block */
  1819. ibool check) /*!< in: TRUE=verify the page checksum */
  1820. {
  1821. const byte* frame = block->page.zip.data;
  1822. ulint stamp_checksum = mach_read_from_4(
  1823. frame + FIL_PAGE_SPACE_OR_CHKSUM);
  1824. ut_ad(buf_block_get_zip_size(block));
  1825. ut_a(buf_block_get_space(block) != 0);
  1826. if (UNIV_LIKELY(check && stamp_checksum != BUF_NO_CHECKSUM_MAGIC)) {
  1827. ulint calc_checksum = page_zip_calc_checksum(
  1828. frame, page_zip_get_size(&block->page.zip));
  1829. if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
  1830. ut_print_timestamp(stderr);
  1831. fprintf(stderr,
  1832. " InnoDB: compressed page checksum mismatch"
  1833. " (space %u page %u): %lu != %lu\n",
  1834. block->page.space, block->page.offset,
  1835. stamp_checksum, calc_checksum);
  1836. return(FALSE);
  1837. }
  1838. }
  1839. switch (fil_page_get_type(frame)) {
  1840. case FIL_PAGE_INDEX:
  1841. if (page_zip_decompress(&block->page.zip,
  1842. block->frame, TRUE)) {
  1843. return(TRUE);
  1844. }
  1845. fprintf(stderr,
  1846. "InnoDB: unable to decompress space %lu page %lu\n",
  1847. (ulong) block->page.space,
  1848. (ulong) block->page.offset);
  1849. return(FALSE);
  1850. case FIL_PAGE_TYPE_ALLOCATED:
  1851. case FIL_PAGE_INODE:
  1852. case FIL_PAGE_IBUF_BITMAP:
  1853. case FIL_PAGE_TYPE_FSP_HDR:
  1854. case FIL_PAGE_TYPE_XDES:
  1855. case FIL_PAGE_TYPE_ZBLOB:
  1856. case FIL_PAGE_TYPE_ZBLOB2:
  1857. /* Copy to uncompressed storage. */
  1858. memcpy(block->frame, frame,
  1859. buf_block_get_zip_size(block));
  1860. return(TRUE);
  1861. }
  1862. ut_print_timestamp(stderr);
  1863. fprintf(stderr,
  1864. " InnoDB: unknown compressed page"
  1865. " type %lu\n",
  1866. fil_page_get_type(frame));
  1867. return(FALSE);
  1868. }
  1869. #ifndef UNIV_HOTBACKUP
  1870. /*******************************************************************//**
  1871. Gets the block to whose frame the pointer is pointing to if found
  1872. in this buffer pool instance.
  1873. @return pointer to block */
  1874. UNIV_INTERN
  1875. buf_block_t*
  1876. buf_block_align_instance(
  1877. /*=====================*/
  1878. buf_pool_t* buf_pool, /*!< in: buffer in which the block
  1879. resides */
  1880. const byte* ptr) /*!< in: pointer to a frame */
  1881. {
  1882. buf_chunk_t* chunk;
  1883. ulint i;
  1884. /* TODO: protect buf_pool->chunks with a mutex (it will
  1885. currently remain constant after buf_pool_init()) */
  1886. for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
  1887. ulint offs;
  1888. if (UNIV_UNLIKELY(ptr < chunk->blocks->frame)) {
  1889. continue;
  1890. }
  1891. /* else */
  1892. offs = ptr - chunk->blocks->frame;
  1893. offs >>= UNIV_PAGE_SIZE_SHIFT;
  1894. if (UNIV_LIKELY(offs < chunk->size)) {
  1895. buf_block_t* block = &chunk->blocks[offs];
  1896. /* The function buf_chunk_init() invokes
  1897. buf_block_init() so that block[n].frame ==
  1898. block->frame + n * UNIV_PAGE_SIZE. Check it. */
  1899. ut_ad(block->frame == page_align(ptr));
  1900. #ifdef UNIV_DEBUG
  1901. /* A thread that updates these fields must
  1902. hold buf_pool->mutex and block->mutex. Acquire
  1903. only the latter. */
  1904. mutex_enter(&block->mutex);
  1905. switch (buf_block_get_state(block)) {
  1906. case BUF_BLOCK_ZIP_FREE:
  1907. case BUF_BLOCK_ZIP_PAGE:
  1908. case BUF_BLOCK_ZIP_DIRTY:
  1909. /* These types should only be used in
  1910. the compressed buffer pool, whose
  1911. memory is allocated from
  1912. buf_pool->chunks, in UNIV_PAGE_SIZE
  1913. blocks flagged as BUF_BLOCK_MEMORY. */
  1914. ut_error;
  1915. break;
  1916. case BUF_BLOCK_NOT_USED:
  1917. case BUF_BLOCK_READY_FOR_USE:
  1918. case BUF_BLOCK_MEMORY:
  1919. /* Some data structures contain
  1920. "guess" pointers to file pages. The
  1921. file pages may have been freed and
  1922. reused. Do not complain. */
  1923. break;
  1924. case BUF_BLOCK_REMOVE_HASH:
  1925. /* buf_LRU_block_remove_hashed_page()
  1926. will overwrite the FIL_PAGE_OFFSET and
  1927. FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
  1928. 0xff and set the state to
  1929. BUF_BLOCK_REMOVE_HASH. */
  1930. ut_ad(page_get_space_id(page_align(ptr))
  1931. == 0xffffffff);
  1932. ut_ad(page_get_page_no(page_align(ptr))
  1933. == 0xffffffff);
  1934. break;
  1935. case BUF_BLOCK_FILE_PAGE:
  1936. ut_ad(block->page.space
  1937. == page_get_space_id(page_align(ptr)));
  1938. ut_ad(block->page.offset
  1939. == page_get_page_no(page_align(ptr)));
  1940. break;
  1941. }
  1942. mutex_exit(&block->mutex);
  1943. #endif /* UNIV_DEBUG */
  1944. return(block);
  1945. }
  1946. }
  1947. return(NULL);
  1948. }
  1949. /*******************************************************************//**
  1950. Gets the block to whose frame the pointer is pointing to.
  1951. @return pointer to block, never NULL */
  1952. UNIV_INTERN
  1953. buf_block_t*
  1954. buf_block_align(
  1955. /*============*/
  1956. const byte* ptr) /*!< in: pointer to a frame */
  1957. {
  1958. ulint i;
  1959. for (i = 0; i < srv_buf_pool_instances; i++) {
  1960. buf_block_t* block;
  1961. block = buf_block_align_instance(
  1962. buf_pool_from_array(i), ptr);
  1963. if (block) {
  1964. return(block);
  1965. }
  1966. }
  1967. /* The block should always be found. */
  1968. ut_error;
  1969. return(NULL);
  1970. }
  1971. /********************************************************************//**
  1972. Find out if a pointer belongs to a buf_block_t. It can be a pointer to
  1973. the buf_block_t itself or a member of it. This functions checks one of
  1974. the buffer pool instances.
  1975. @return TRUE if ptr belongs to a buf_block_t struct */
  1976. static
  1977. ibool
  1978. buf_pointer_is_block_field_instance(
  1979. /*================================*/
  1980. buf_pool_t* buf_pool, /*!< in: buffer pool instance */
  1981. const void* ptr) /*!< in: pointer not dereferenced */
  1982. {
  1983. const buf_chunk_t* chunk = buf_pool->chunks;
  1984. const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks;
  1985. /* TODO: protect buf_pool->chunks with a mutex (it will
  1986. currently remain constant after buf_pool_init()) */
  1987. while (chunk < echunk) {
  1988. if (ptr >= (void *)chunk->blocks
  1989. && ptr < (void *)(chunk->blocks + chunk->size)) {
  1990. return(TRUE);
  1991. }
  1992. chunk++;
  1993. }
  1994. return(FALSE);
  1995. }
  1996. /********************************************************************//**
  1997. Find out if a pointer belongs to a buf_block_t. It can be a pointer to
  1998. the buf_block_t itself or a member of it
  1999. @return TRUE if ptr belongs to a buf_block_t struct */
  2000. UNIV_INTERN
  2001. ibool
  2002. buf_pointer_is_block_field(
  2003. /*=======================*/
  2004. const void* ptr) /*!< in: pointer not dereferenced */
  2005. {
  2006. ulint i;
  2007. for (i = 0; i < srv_buf_pool_instances; i++) {
  2008. ibool found;
  2009. found = buf_pointer_is_block_field_instance(
  2010. buf_pool_from_array(i), ptr);
  2011. if (found) {
  2012. return(TRUE);
  2013. }
  2014. }
  2015. return(FALSE);
  2016. }
  2017. /********************************************************************//**
  2018. Find out if a buffer block was created by buf_chunk_init().
  2019. @return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
  2020. static
  2021. ibool
  2022. buf_block_is_uncompressed(
  2023. /*======================*/
  2024. buf_pool_t* buf_pool, /*!< in: buffer pool instance */
  2025. const buf_block_t* block) /*!< in: pointer to block,
  2026. not dereferenced */
  2027. {
  2028. //ut_ad(buf_pool_mutex_own(buf_pool));
  2029. if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
  2030. /* The pointer should be aligned. */
  2031. return(FALSE);
  2032. }
  2033. return(buf_pointer_is_block_field_instance(buf_pool, (void *)block));
  2034. }
  2035. /********************************************************************//**
  2036. This is the general function used to get access to a database page.
  2037. @return pointer to the block or NULL */
  2038. UNIV_INTERN
  2039. buf_block_t*
  2040. buf_page_get_gen(
  2041. /*=============*/
  2042. ulint space, /*!< in: space id */
  2043. ulint zip_size,/*!< in: compressed page size in bytes
  2044. or 0 for uncompressed pages */
  2045. ulint offset, /*!< in: page number */
  2046. ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
  2047. buf_block_t* guess, /*!< in: guessed block or NULL */
  2048. ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
  2049. BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or
  2050. BUF_GET_IF_IN_POOL_OR_WATCH */
  2051. const char* file, /*!< in: file name */
  2052. ulint line, /*!< in: line where called */
  2053. mtr_t* mtr) /*!< in: mini-transaction */
  2054. {
  2055. buf_block_t* block;
  2056. ulint fold;
  2057. unsigned access_time;
  2058. ulint fix_type;
  2059. ibool must_read;
  2060. ulint retries = 0;
  2061. mutex_t* block_mutex = NULL;
  2062. trx_t* trx = NULL;
  2063. ulint sec;
  2064. ulint ms;
  2065. ib_uint64_t start_time;
  2066. ib_uint64_t finish_time;
  2067. buf_pool_t* buf_pool = buf_pool_get(space, offset);
  2068. ut_ad(mtr);
  2069. ut_ad(mtr->state == MTR_ACTIVE);
  2070. ut_ad((rw_latch == RW_S_LATCH)
  2071. || (rw_latch == RW_X_LATCH)
  2072. || (rw_latch == RW_NO_LATCH));
  2073. #ifdef UNIV_DEBUG
  2074. switch (mode) {
  2075. case BUF_GET_NO_LATCH:
  2076. ut_ad(rw_latch == RW_NO_LATCH);
  2077. break;
  2078. case BUF_GET:
  2079. case BUF_GET_IF_IN_POOL:
  2080. case BUF_PEEK_IF_IN_POOL:
  2081. case BUF_GET_IF_IN_POOL_OR_WATCH:
  2082. case BUF_GET_POSSIBLY_FREED:
  2083. break;
  2084. default:
  2085. ut_error;
  2086. }
  2087. #endif /* UNIV_DEBUG */
  2088. ut_ad(zip_size == fil_space_get_zip_size(space));
  2089. ut_ad(ut_is_2pow(zip_size));
  2090. #ifndef UNIV_LOG_DEBUG
  2091. ut_ad(!ibuf_inside(mtr)
  2092. || ibuf_page_low(space, zip_size, offset,
  2093. FALSE, file, line, NULL));
  2094. #endif
  2095. if (UNIV_UNLIKELY(innobase_get_slow_log())) {
  2096. trx = innobase_get_trx();
  2097. }
  2098. buf_pool->stat.n_page_gets++;
  2099. fold = buf_page_address_fold(space, offset);
  2100. loop:
  2101. block = guess;
  2102. //buf_pool_mutex_enter(buf_pool);
  2103. if (block) {
  2104. block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
  2105. /* If the guess is a compressed page descriptor that
  2106. has been allocated by buf_page_alloc_descriptor(),
  2107. it may have been freed by buf_relocate(). */
  2108. if (!block_mutex) {
  2109. block = guess = NULL;
  2110. } else if (!buf_block_is_uncompressed(buf_pool, block)
  2111. || offset != block->page.offset
  2112. || space != block->page.space
  2113. || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
  2114. mutex_exit(block_mutex);
  2115. block = guess = NULL;
  2116. } else {
  2117. ut_ad(!block->page.in_zip_hash);
  2118. ut_ad(block->page.in_page_hash);
  2119. }
  2120. }
  2121. if (block == NULL) {
  2122. rw_lock_s_lock(&buf_pool->page_hash_latch);
  2123. block = (buf_block_t*) buf_page_hash_get_low(
  2124. buf_pool, space, offset, fold);
  2125. if (block) {
  2126. block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
  2127. ut_a(block_mutex);
  2128. }
  2129. rw_lock_s_unlock(&buf_pool->page_hash_latch);
  2130. }
  2131. loop2:
  2132. if (block && buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
  2133. mutex_exit(block_mutex);
  2134. block = NULL;
  2135. }
  2136. if (block == NULL) {
  2137. /* Page not in buf_pool: needs to be read from file */
  2138. if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
  2139. block = (buf_block_t*) buf_pool_watch_set(
  2140. space, offset, fold);
  2141. if (UNIV_LIKELY_NULL(block)) {
  2142. block_mutex = buf_page_get_mutex((buf_page_t*)block);
  2143. ut_a(block_mutex);
  2144. ut_ad(mutex_own(block_mutex));
  2145. goto got_block;
  2146. }
  2147. }
  2148. //buf_pool_mutex_exit(buf_pool);
  2149. if (mode == BUF_GET_IF_IN_POOL
  2150. || mode == BUF_PEEK_IF_IN_POOL
  2151. || mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
  2152. return(NULL);
  2153. }
  2154. if (buf_read_page(space, zip_size, offset, trx)) {
  2155. buf_read_ahead_random(space, zip_size, offset,
  2156. ibuf_inside(mtr), trx);
  2157. retries = 0;
  2158. } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
  2159. ++retries;
  2160. DBUG_EXECUTE_IF(
  2161. "innodb_page_corruption_retries",
  2162. retries = BUF_PAGE_READ_MAX_RETRIES;
  2163. );
  2164. } else {
  2165. fprintf(stderr, "InnoDB: Error: Unable"
  2166. " to read tablespace %lu page no"
  2167. " %lu into the buffer pool after"
  2168. " %lu attempts\n"
  2169. "InnoDB: The most probable cause"
  2170. " of this error may be that the"
  2171. " table has been corrupted.\n"
  2172. "InnoDB: You can try to fix this"
  2173. " problem by using"
  2174. " innodb_force_recovery.\n"
  2175. "InnoDB: Please see reference manual"
  2176. " for more details.\n"
  2177. "InnoDB: Aborting...\n",
  2178. space, offset,
  2179. BUF_PAGE_READ_MAX_RETRIES);
  2180. ut_error;
  2181. }
  2182. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  2183. ut_a(++buf_dbg_counter % 5771 || buf_validate());
  2184. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  2185. goto loop;
  2186. }
  2187. got_block:
  2188. ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
  2189. must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
  2190. if (must_read && (mode == BUF_GET_IF_IN_POOL
  2191. || mode == BUF_PEEK_IF_IN_POOL)) {
  2192. /* The page is being read to buffer pool,
  2193. but we cannot wait around for the read to
  2194. complete. */
  2195. null_exit:
  2196. //buf_pool_mutex_exit(buf_pool);
  2197. mutex_exit(block_mutex);
  2198. return(NULL);
  2199. }
  2200. if (UNIV_UNLIKELY(block->page.is_corrupt &&
  2201. srv_pass_corrupt_table <= 1)) {
  2202. mutex_exit(block_mutex);
  2203. return(NULL);
  2204. }
  2205. switch (buf_block_get_state(block)) {
  2206. buf_page_t* bpage;
  2207. ibool success;
  2208. case BUF_BLOCK_FILE_PAGE:
  2209. if (block_mutex == &buf_pool->zip_mutex) {
  2210. /* it is wrong mutex... */
  2211. mutex_exit(block_mutex);
  2212. goto loop;
  2213. }
  2214. break;
  2215. case BUF_BLOCK_ZIP_PAGE:
  2216. case BUF_BLOCK_ZIP_DIRTY:
  2217. ut_ad(block_mutex == &buf_pool->zip_mutex);
  2218. if (mode == BUF_PEEK_IF_IN_POOL) {
  2219. /* This mode is only used for dropping an
  2220. adaptive hash index. There cannot be an
  2221. adaptive hash index for a compressed-only
  2222. page, so do not bother decompressing the page. */
  2223. goto null_exit;
  2224. }
  2225. bpage = &block->page;
  2226. /* Protect bpage->buf_fix_count. */
  2227. //mutex_enter(&buf_pool->zip_mutex);
  2228. if (bpage->buf_fix_count
  2229. || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
  2230. /* This condition often occurs when the buffer
  2231. is not buffer-fixed, but I/O-fixed by
  2232. buf_page_init_for_read(). */
  2233. //mutex_exit(&buf_pool->zip_mutex);
  2234. wait_until_unfixed:
  2235. /* The block is buffer-fixed or I/O-fixed.
  2236. Try again later. */
  2237. //buf_pool_mutex_exit(buf_pool);
  2238. mutex_exit(block_mutex);
  2239. os_thread_sleep(WAIT_FOR_READ);
  2240. goto loop;
  2241. }
  2242. /* Buffer-fix the block so that it cannot be evicted
  2243. or relocated while we are attempting to allocate an
  2244. uncompressed page. */
  2245. bpage->buf_fix_count++;
  2246. /* Allocate an uncompressed page. */
  2247. //buf_pool_mutex_exit(buf_pool);
  2248. //mutex_exit(&buf_pool->zip_mutex);
  2249. mutex_exit(block_mutex);
  2250. block = buf_LRU_get_free_block(buf_pool);
  2251. ut_a(block);
  2252. block_mutex = &block->mutex;
  2253. mutex_enter(&buf_pool->LRU_list_mutex);
  2254. rw_lock_x_lock(&buf_pool->page_hash_latch);
  2255. mutex_enter(&block->mutex);
  2256. mutex_enter(&buf_pool->zip_mutex);
  2257. /* Buffer-fixing prevents the page_hash from changing. */
  2258. ut_ad(bpage == buf_page_hash_get_low(buf_pool,
  2259. space, offset, fold));
  2260. if (UNIV_UNLIKELY
  2261. (--bpage->buf_fix_count
  2262. || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
  2263. mutex_exit(&buf_pool->zip_mutex);
  2264. /* The block was buffer-fixed or I/O-fixed while
  2265. buf_pool->mutex was not held by this thread.
  2266. Free the block that was allocated and retry.
  2267. This should be extremely unlikely, for example,
  2268. if buf_page_get_zip() was invoked. */
  2269. buf_LRU_block_free_non_file_page(block, TRUE);
  2270. //mutex_exit(&block->mutex);
  2271. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  2272. mutex_exit(&buf_pool->LRU_list_mutex);
  2273. goto wait_until_unfixed;
  2274. }
  2275. /* Move the compressed page from bpage to block,
  2276. and uncompress it. */
  2277. buf_relocate(bpage, &block->page);
  2278. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  2279. buf_block_init_low(block);
  2280. block->lock_hash_val = lock_rec_hash(space, offset);
  2281. UNIV_MEM_DESC(&block->page.zip.data,
  2282. page_zip_get_size(&block->page.zip), block);
  2283. if (buf_page_get_state(&block->page)
  2284. == BUF_BLOCK_ZIP_PAGE) {
  2285. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  2286. UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
  2287. &block->page);
  2288. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  2289. ut_ad(!block->page.in_flush_list);
  2290. } else {
  2291. /* Relocate buf_pool->flush_list. */
  2292. buf_flush_relocate_on_flush_list(bpage,
  2293. &block->page);
  2294. }
  2295. /* Buffer-fix, I/O-fix, and X-latch the block
  2296. for the duration of the decompression.
  2297. Also add the block to the unzip_LRU list. */
  2298. block->page.state = BUF_BLOCK_FILE_PAGE;
  2299. /* Insert at the front of unzip_LRU list */
  2300. buf_unzip_LRU_add_block(block, FALSE);
  2301. mutex_exit(&buf_pool->LRU_list_mutex);
  2302. block->page.buf_fix_count = 1;
  2303. buf_block_set_io_fix(block, BUF_IO_READ);
  2304. rw_lock_x_lock_inline(&block->lock, 0, file, line);
  2305. UNIV_MEM_INVALID(bpage, sizeof *bpage);
  2306. access_time = buf_page_is_accessed(&block->page);
  2307. mutex_exit(block_mutex);
  2308. mutex_exit(&buf_pool->zip_mutex);
  2309. buf_pool_mutex_enter(buf_pool);
  2310. buf_pool->n_pend_unzip++;
  2311. buf_pool_mutex_exit(buf_pool);
  2312. buf_page_free_descriptor(bpage);
  2313. /* Decompress the page while not holding
  2314. buf_pool->mutex or block->mutex. */
  2315. success = buf_zip_decompress(block, srv_use_checksums);
  2316. ut_a(success);
  2317. if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
  2318. if (access_time) {
  2319. #ifdef UNIV_IBUF_COUNT_DEBUG
  2320. ut_a(ibuf_count_get(space, offset) == 0);
  2321. #endif /* UNIV_IBUF_COUNT_DEBUG */
  2322. } else {
  2323. ibuf_merge_or_delete_for_page(
  2324. block, space, offset, zip_size, TRUE);
  2325. }
  2326. }
  2327. /* Unfix and unlatch the block. */
  2328. //buf_pool_mutex_enter(buf_pool);
  2329. block_mutex = &block->mutex;
  2330. mutex_enter(block_mutex);
  2331. block->page.buf_fix_count--;
  2332. buf_block_set_io_fix(block, BUF_IO_NONE);
  2333. buf_pool_mutex_enter(buf_pool);
  2334. buf_pool->n_pend_unzip--;
  2335. buf_pool_mutex_exit(buf_pool);
  2336. rw_lock_x_unlock(&block->lock);
  2337. break;
  2338. case BUF_BLOCK_ZIP_FREE:
  2339. case BUF_BLOCK_NOT_USED:
  2340. case BUF_BLOCK_READY_FOR_USE:
  2341. case BUF_BLOCK_MEMORY:
  2342. case BUF_BLOCK_REMOVE_HASH:
  2343. ut_error;
  2344. break;
  2345. }
  2346. ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
  2347. //mutex_enter(&block->mutex);
  2348. #if UNIV_WORD_SIZE == 4
  2349. /* On 32-bit systems, there is no padding in buf_page_t. On
  2350. other systems, Valgrind could complain about uninitialized pad
  2351. bytes. */
  2352. UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
  2353. #endif
  2354. #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
  2355. if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH)
  2356. && ibuf_debug) {
  2357. /* Try to evict the block from the buffer pool, to use the
  2358. insert buffer (change buffer) as much as possible. */
  2359. ulint page_no = buf_block_get_page_no(block);
  2360. if (buf_LRU_free_block(&block->page, TRUE, FALSE)) {
  2361. mutex_exit(block_mutex);
  2362. if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) {
  2363. /* Set the watch, as it would have
  2364. been set if the page were not in the
  2365. buffer pool in the first place. */
  2366. block = (buf_block_t*) buf_pool_watch_set(
  2367. space, offset, fold);
  2368. if (UNIV_LIKELY_NULL(block)) {
  2369. block_mutex = buf_page_get_mutex((buf_page_t*)block);
  2370. ut_a(block_mutex);
  2371. ut_ad(mutex_own(block_mutex));
  2372. /* The page entered the buffer
  2373. pool for some reason. Try to
  2374. evict it again. */
  2375. goto got_block;
  2376. }
  2377. }
  2378. //buf_pool_mutex_exit(buf_pool);
  2379. fprintf(stderr,
  2380. "innodb_change_buffering_debug evict %u %u\n",
  2381. (unsigned) space, (unsigned) offset);
  2382. return(NULL);
  2383. } else if (UNIV_UNLIKELY(buf_block_get_state(block)
  2384. != BUF_BLOCK_FILE_PAGE
  2385. || (buf_block_get_page_no(block) != page_no)
  2386. || (buf_block_get_space(block) != space))) {
  2387. /* buf_LRU_free_block temporarily releases the
  2388. block mutex, and now block points to something
  2389. else. */
  2390. mutex_exit(block_mutex);
  2391. block = NULL;
  2392. goto loop2;
  2393. } else if (buf_flush_page_try(buf_pool, block)) {
  2394. fprintf(stderr,
  2395. "innodb_change_buffering_debug flush %u %u\n",
  2396. (unsigned) space, (unsigned) offset);
  2397. guess = block;
  2398. goto loop;
  2399. }
  2400. /* Failed to evict the page; change it directly */
  2401. }
  2402. #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
  2403. buf_block_buf_fix_inc(block, file, line);
  2404. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  2405. ut_a(mode == BUF_GET_POSSIBLY_FREED
  2406. || !block->page.file_page_was_freed);
  2407. #endif
  2408. /* Check if this is the first access to the page */
  2409. access_time = buf_page_is_accessed(&block->page);
  2410. buf_page_set_accessed(&block->page);
  2411. mutex_exit(&block->mutex);
  2412. if (mode != BUF_PEEK_IF_IN_POOL) {
  2413. buf_page_make_young_if_needed(&block->page);
  2414. }
  2415. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  2416. ut_a(++buf_dbg_counter % 5771 || buf_validate());
  2417. ut_a(block->page.buf_fix_count > 0);
  2418. ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
  2419. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  2420. switch (rw_latch) {
  2421. case RW_NO_LATCH:
  2422. if (must_read) {
  2423. /* Let us wait until the read operation
  2424. completes */
  2425. if (UNIV_UNLIKELY(trx && trx->take_stats))
  2426. {
  2427. ut_usectime(&sec, &ms);
  2428. start_time = (ib_uint64_t)sec * 1000000 + ms;
  2429. } else {
  2430. start_time = 0;
  2431. }
  2432. for (;;) {
  2433. enum buf_io_fix io_fix;
  2434. mutex_enter(&block->mutex);
  2435. io_fix = buf_block_get_io_fix(block);
  2436. mutex_exit(&block->mutex);
  2437. if (io_fix == BUF_IO_READ) {
  2438. /* wait by temporaly s-latch */
  2439. rw_lock_s_lock(&(block->lock));
  2440. rw_lock_s_unlock(&(block->lock));
  2441. } else {
  2442. break;
  2443. }
  2444. }
  2445. if (UNIV_UNLIKELY(start_time != 0))
  2446. {
  2447. ut_usectime(&sec, &ms);
  2448. finish_time = (ib_uint64_t)sec * 1000000 + ms;
  2449. trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
  2450. }
  2451. }
  2452. fix_type = MTR_MEMO_BUF_FIX;
  2453. break;
  2454. case RW_S_LATCH:
  2455. rw_lock_s_lock_inline(&(block->lock), 0, file, line);
  2456. fix_type = MTR_MEMO_PAGE_S_FIX;
  2457. break;
  2458. default:
  2459. ut_ad(rw_latch == RW_X_LATCH);
  2460. rw_lock_x_lock_inline(&(block->lock), 0, file, line);
  2461. fix_type = MTR_MEMO_PAGE_X_FIX;
  2462. break;
  2463. }
  2464. mtr_memo_push(mtr, block, fix_type);
  2465. if (mode != BUF_PEEK_IF_IN_POOL && !access_time) {
  2466. /* In the case of a first access, try to apply linear
  2467. read-ahead */
  2468. buf_read_ahead_linear(space, zip_size, offset,
  2469. ibuf_inside(mtr), trx);
  2470. }
  2471. #ifdef UNIV_IBUF_COUNT_DEBUG
  2472. ut_a(ibuf_count_get(buf_block_get_space(block),
  2473. buf_block_get_page_no(block)) == 0);
  2474. #endif
  2475. if (UNIV_UNLIKELY(trx && trx->take_stats)) {
  2476. _increment_page_get_statistics(block, trx);
  2477. }
  2478. return(block);
  2479. }
  2480. /********************************************************************//**
  2481. This is the general function used to get optimistic access to a database
  2482. page.
  2483. @return TRUE if success */
  2484. UNIV_INTERN
  2485. ibool
  2486. buf_page_optimistic_get(
  2487. /*====================*/
  2488. ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
  2489. buf_block_t* block, /*!< in: guessed buffer block */
  2490. ib_uint64_t modify_clock,/*!< in: modify clock value if mode is
  2491. ..._GUESS_ON_CLOCK */
  2492. const char* file, /*!< in: file name */
  2493. ulint line, /*!< in: line where called */
  2494. mtr_t* mtr) /*!< in: mini-transaction */
  2495. {
  2496. buf_pool_t* buf_pool;
  2497. unsigned access_time;
  2498. ibool success;
  2499. ulint fix_type;
  2500. trx_t* trx = NULL;
  2501. ut_ad(block);
  2502. ut_ad(mtr);
  2503. ut_ad(mtr->state == MTR_ACTIVE);
  2504. ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
  2505. mutex_enter(&block->mutex);
  2506. if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
  2507. mutex_exit(&block->mutex);
  2508. return(FALSE);
  2509. }
  2510. buf_block_buf_fix_inc(block, file, line);
  2511. access_time = buf_page_is_accessed(&block->page);
  2512. buf_page_set_accessed(&block->page);
  2513. mutex_exit(&block->mutex);
  2514. buf_page_make_young_if_needed(&block->page);
  2515. ut_ad(!ibuf_inside(mtr)
  2516. || ibuf_page(buf_block_get_space(block),
  2517. buf_block_get_zip_size(block),
  2518. buf_block_get_page_no(block), NULL));
  2519. if (rw_latch == RW_S_LATCH) {
  2520. success = rw_lock_s_lock_nowait(&(block->lock),
  2521. file, line);
  2522. fix_type = MTR_MEMO_PAGE_S_FIX;
  2523. } else {
  2524. success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
  2525. file, line);
  2526. fix_type = MTR_MEMO_PAGE_X_FIX;
  2527. }
  2528. if (UNIV_UNLIKELY(!success)) {
  2529. mutex_enter(&block->mutex);
  2530. buf_block_buf_fix_dec(block);
  2531. mutex_exit(&block->mutex);
  2532. return(FALSE);
  2533. }
  2534. if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
  2535. buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
  2536. if (rw_latch == RW_S_LATCH) {
  2537. rw_lock_s_unlock(&(block->lock));
  2538. } else {
  2539. rw_lock_x_unlock(&(block->lock));
  2540. }
  2541. mutex_enter(&block->mutex);
  2542. buf_block_buf_fix_dec(block);
  2543. mutex_exit(&block->mutex);
  2544. return(FALSE);
  2545. }
  2546. mtr_memo_push(mtr, block, fix_type);
  2547. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  2548. ut_a(++buf_dbg_counter % 5771 || buf_validate());
  2549. ut_a(block->page.buf_fix_count > 0);
  2550. ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
  2551. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  2552. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  2553. ut_a(block->page.file_page_was_freed == FALSE);
  2554. #endif
  2555. if (UNIV_UNLIKELY(innobase_get_slow_log())) {
  2556. trx = innobase_get_trx();
  2557. }
  2558. if (!access_time) {
  2559. /* In the case of a first access, try to apply linear
  2560. read-ahead */
  2561. buf_read_ahead_linear(buf_block_get_space(block),
  2562. buf_block_get_zip_size(block),
  2563. buf_block_get_page_no(block),
  2564. ibuf_inside(mtr), trx);
  2565. }
  2566. #ifdef UNIV_IBUF_COUNT_DEBUG
  2567. ut_a(ibuf_count_get(buf_block_get_space(block),
  2568. buf_block_get_page_no(block)) == 0);
  2569. #endif
  2570. buf_pool = buf_pool_from_block(block);
  2571. buf_pool->stat.n_page_gets++;
  2572. if (UNIV_UNLIKELY(trx && trx->take_stats)) {
  2573. _increment_page_get_statistics(block, trx);
  2574. }
  2575. return(TRUE);
  2576. }
  2577. /********************************************************************//**
  2578. This is used to get access to a known database page, when no waiting can be
  2579. done. For example, if a search in an adaptive hash index leads us to this
  2580. frame.
  2581. @return TRUE if success */
  2582. UNIV_INTERN
  2583. ibool
  2584. buf_page_get_known_nowait(
  2585. /*======================*/
  2586. ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
  2587. buf_block_t* block, /*!< in: the known page */
  2588. ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
  2589. const char* file, /*!< in: file name */
  2590. ulint line, /*!< in: line where called */
  2591. mtr_t* mtr) /*!< in: mini-transaction */
  2592. {
  2593. buf_pool_t* buf_pool;
  2594. ibool success;
  2595. ulint fix_type;
  2596. trx_t* trx = NULL;
  2597. ut_ad(mtr);
  2598. ut_ad(mtr->state == MTR_ACTIVE);
  2599. ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
  2600. mutex_enter(&block->mutex);
  2601. if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
  2602. /* Another thread is just freeing the block from the LRU list
  2603. of the buffer pool: do not try to access this page; this
  2604. attempt to access the page can only come through the hash
  2605. index because when the buffer block state is ..._REMOVE_HASH,
  2606. we have already removed it from the page address hash table
  2607. of the buffer pool. */
  2608. mutex_exit(&block->mutex);
  2609. return(FALSE);
  2610. }
  2611. ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
  2612. buf_block_buf_fix_inc(block, file, line);
  2613. buf_page_set_accessed(&block->page);
  2614. mutex_exit(&block->mutex);
  2615. buf_pool = buf_pool_from_block(block);
  2616. if (mode == BUF_MAKE_YOUNG) {
  2617. buf_page_make_young_if_needed(&block->page);
  2618. }
  2619. ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD);
  2620. if (rw_latch == RW_S_LATCH) {
  2621. success = rw_lock_s_lock_nowait(&(block->lock),
  2622. file, line);
  2623. fix_type = MTR_MEMO_PAGE_S_FIX;
  2624. } else {
  2625. success = rw_lock_x_lock_func_nowait_inline(&(block->lock),
  2626. file, line);
  2627. fix_type = MTR_MEMO_PAGE_X_FIX;
  2628. }
  2629. if (!success) {
  2630. mutex_enter(&block->mutex);
  2631. buf_block_buf_fix_dec(block);
  2632. mutex_exit(&block->mutex);
  2633. return(FALSE);
  2634. }
  2635. mtr_memo_push(mtr, block, fix_type);
  2636. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  2637. ut_a(++buf_dbg_counter % 5771 || buf_validate());
  2638. ut_a(block->page.buf_fix_count > 0);
  2639. ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
  2640. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  2641. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  2642. ut_a(mode == BUF_KEEP_OLD || !block->page.file_page_was_freed);
  2643. #endif
  2644. #ifdef UNIV_IBUF_COUNT_DEBUG
  2645. ut_a((mode == BUF_KEEP_OLD)
  2646. || (ibuf_count_get(buf_block_get_space(block),
  2647. buf_block_get_page_no(block)) == 0));
  2648. #endif
  2649. buf_pool->stat.n_page_gets++;
  2650. if (UNIV_UNLIKELY(innobase_get_slow_log())) {
  2651. trx = innobase_get_trx();
  2652. if (trx != NULL && trx->take_stats) {
  2653. _increment_page_get_statistics(block, trx);
  2654. }
  2655. }
  2656. return(TRUE);
  2657. }
  2658. /*******************************************************************//**
  2659. Given a tablespace id and page number tries to get that page. If the
  2660. page is not in the buffer pool it is not loaded and NULL is returned.
  2661. Suitable for using when holding the kernel mutex.
  2662. @return pointer to a page or NULL */
  2663. UNIV_INTERN
  2664. const buf_block_t*
  2665. buf_page_try_get_func(
  2666. /*==================*/
  2667. ulint space_id,/*!< in: tablespace id */
  2668. ulint page_no,/*!< in: page number */
  2669. const char* file, /*!< in: file name */
  2670. ulint line, /*!< in: line where called */
  2671. mtr_t* mtr) /*!< in: mini-transaction */
  2672. {
  2673. buf_block_t* block;
  2674. ibool success;
  2675. ulint fix_type;
  2676. buf_pool_t* buf_pool = buf_pool_get(space_id, page_no);
  2677. ut_ad(mtr);
  2678. ut_ad(mtr->state == MTR_ACTIVE);
  2679. //buf_pool_mutex_enter(buf_pool);
  2680. rw_lock_s_lock(&buf_pool->page_hash_latch);
  2681. block = buf_block_hash_get(buf_pool, space_id, page_no);
  2682. if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
  2683. //buf_pool_mutex_exit(buf_pool);
  2684. rw_lock_s_unlock(&buf_pool->page_hash_latch);
  2685. return(NULL);
  2686. }
  2687. ut_ad(!buf_pool_watch_is_sentinel(buf_pool, &block->page));
  2688. mutex_enter(&block->mutex);
  2689. //buf_pool_mutex_exit(buf_pool);
  2690. rw_lock_s_unlock(&buf_pool->page_hash_latch);
  2691. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  2692. ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
  2693. ut_a(buf_block_get_space(block) == space_id);
  2694. ut_a(buf_block_get_page_no(block) == page_no);
  2695. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  2696. buf_block_buf_fix_inc(block, file, line);
  2697. mutex_exit(&block->mutex);
  2698. fix_type = MTR_MEMO_PAGE_S_FIX;
  2699. success = rw_lock_s_lock_nowait(&block->lock, file, line);
  2700. if (!success) {
  2701. /* Let us try to get an X-latch. If the current thread
  2702. is holding an X-latch on the page, we cannot get an
  2703. S-latch. */
  2704. fix_type = MTR_MEMO_PAGE_X_FIX;
  2705. success = rw_lock_x_lock_func_nowait_inline(&block->lock,
  2706. file, line);
  2707. }
  2708. if (!success) {
  2709. mutex_enter(&block->mutex);
  2710. buf_block_buf_fix_dec(block);
  2711. mutex_exit(&block->mutex);
  2712. return(NULL);
  2713. }
  2714. mtr_memo_push(mtr, block, fix_type);
  2715. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  2716. ut_a(++buf_dbg_counter % 5771 || buf_validate());
  2717. ut_a(block->page.buf_fix_count > 0);
  2718. ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
  2719. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  2720. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  2721. ut_a(block->page.file_page_was_freed == FALSE);
  2722. #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
  2723. buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
  2724. buf_pool->stat.n_page_gets++;
  2725. #ifdef UNIV_IBUF_COUNT_DEBUG
  2726. ut_a(ibuf_count_get(buf_block_get_space(block),
  2727. buf_block_get_page_no(block)) == 0);
  2728. #endif
  2729. return(block);
  2730. }
  2731. /********************************************************************//**
  2732. Initialize some fields of a control block. */
  2733. UNIV_INLINE
  2734. void
  2735. buf_page_init_low(
  2736. /*==============*/
  2737. buf_page_t* bpage) /*!< in: block to init */
  2738. {
  2739. bpage->flush_type = BUF_FLUSH_LRU;
  2740. bpage->io_fix = BUF_IO_NONE;
  2741. bpage->buf_fix_count = 0;
  2742. bpage->freed_page_clock = 0;
  2743. bpage->access_time = 0;
  2744. bpage->newest_modification = 0;
  2745. bpage->oldest_modification = 0;
  2746. HASH_INVALIDATE(bpage, hash);
  2747. bpage->is_corrupt = FALSE;
  2748. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  2749. bpage->file_page_was_freed = FALSE;
  2750. #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
  2751. }
  2752. /********************************************************************//**
  2753. Inits a page to the buffer buf_pool. */
  2754. static __attribute__((nonnull))
  2755. void
  2756. buf_page_init(
  2757. /*==========*/
  2758. buf_pool_t* buf_pool,/*!< in/out: buffer pool */
  2759. ulint space, /*!< in: space id */
  2760. ulint offset, /*!< in: offset of the page within space
  2761. in units of a page */
  2762. ulint fold, /*!< in: buf_page_address_fold(space,offset) */
  2763. ulint zip_size,/*!< in: compressed page size, or 0 */
  2764. buf_block_t* block) /*!< in/out: block to init */
  2765. {
  2766. buf_page_t* hash_page;
  2767. ut_ad(buf_pool == buf_pool_get(space, offset));
  2768. //ut_ad(buf_pool_mutex_own(buf_pool));
  2769. #ifdef UNIV_SYNC_DEBUG
  2770. ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX));
  2771. #endif
  2772. ut_ad(mutex_own(&(block->mutex)));
  2773. ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
  2774. /* Set the state of the block */
  2775. buf_block_set_file_page(block, space, offset);
  2776. #ifdef UNIV_DEBUG_VALGRIND
  2777. if (!space) {
  2778. /* Silence valid Valgrind warnings about uninitialized
  2779. data being written to data files. There are some unused
  2780. bytes on some pages that InnoDB does not initialize. */
  2781. UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
  2782. }
  2783. #endif /* UNIV_DEBUG_VALGRIND */
  2784. buf_block_init_low(block);
  2785. block->lock_hash_val = lock_rec_hash(space, offset);
  2786. buf_page_init_low(&block->page);
  2787. /* Insert into the hash table of file pages */
  2788. hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
  2789. if (UNIV_LIKELY(!hash_page)) {
  2790. } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) {
  2791. /* Preserve the reference count. */
  2792. ulint buf_fix_count;
  2793. mutex_enter(&buf_pool->zip_mutex);
  2794. buf_fix_count = hash_page->buf_fix_count;
  2795. ut_a(buf_fix_count > 0);
  2796. block->page.buf_fix_count += buf_fix_count;
  2797. buf_pool_watch_remove(buf_pool, fold, hash_page);
  2798. mutex_exit(&buf_pool->zip_mutex);
  2799. } else {
  2800. fprintf(stderr,
  2801. "InnoDB: Error: page %lu %lu already found"
  2802. " in the hash table: %p, %p\n",
  2803. (ulong) space,
  2804. (ulong) offset,
  2805. (const void*) hash_page, (const void*) block);
  2806. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  2807. mutex_exit(&block->mutex);
  2808. //buf_pool_mutex_exit(buf_pool);
  2809. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  2810. buf_print();
  2811. buf_LRU_print();
  2812. buf_validate();
  2813. buf_LRU_validate();
  2814. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  2815. ut_error;
  2816. }
  2817. ut_ad(!block->page.in_zip_hash);
  2818. ut_ad(!block->page.in_page_hash);
  2819. ut_d(block->page.in_page_hash = TRUE);
  2820. HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
  2821. fold, &block->page);
  2822. if (zip_size) {
  2823. page_zip_set_size(&block->page.zip, zip_size);
  2824. }
  2825. }
  2826. /********************************************************************//**
  2827. Function which inits a page for read to the buffer buf_pool. If the page is
  2828. (1) already in buf_pool, or
  2829. (2) if we specify to read only ibuf pages and the page is not an ibuf page, or
  2830. (3) if the space is deleted or being deleted,
  2831. then this function does nothing.
  2832. Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
  2833. on the buffer frame. The io-handler must take care that the flag is cleared
  2834. and the lock released later.
  2835. @return pointer to the block or NULL */
  2836. UNIV_INTERN
  2837. buf_page_t*
  2838. buf_page_init_for_read(
  2839. /*===================*/
  2840. ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
  2841. ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
  2842. ulint space, /*!< in: space id */
  2843. ulint zip_size,/*!< in: compressed page size, or 0 */
  2844. ibool unzip, /*!< in: TRUE=request uncompressed page */
  2845. ib_int64_t tablespace_version,
  2846. /*!< in: prevents reading from a wrong
  2847. version of the tablespace in case we have done
  2848. DISCARD + IMPORT */
  2849. ulint offset) /*!< in: page number */
  2850. {
  2851. buf_block_t* block;
  2852. buf_page_t* bpage = NULL;
  2853. buf_page_t* watch_page;
  2854. mtr_t mtr;
  2855. ulint fold;
  2856. ibool lru = FALSE;
  2857. void* data;
  2858. buf_pool_t* buf_pool = buf_pool_get(space, offset);
  2859. ut_ad(buf_pool);
  2860. *err = DB_SUCCESS;
  2861. if (mode == BUF_READ_IBUF_PAGES_ONLY) {
  2862. /* It is a read-ahead within an ibuf routine */
  2863. ut_ad(!ibuf_bitmap_page(zip_size, offset));
  2864. ibuf_mtr_start(&mtr);
  2865. if (!recv_no_ibuf_operations
  2866. && !ibuf_page(space, zip_size, offset, &mtr)) {
  2867. ibuf_mtr_commit(&mtr);
  2868. return(NULL);
  2869. }
  2870. } else {
  2871. ut_ad(mode == BUF_READ_ANY_PAGE);
  2872. }
  2873. if (zip_size && UNIV_LIKELY(!unzip)
  2874. && UNIV_LIKELY(!recv_recovery_is_on())) {
  2875. block = NULL;
  2876. } else {
  2877. block = buf_LRU_get_free_block(buf_pool);
  2878. ut_ad(block);
  2879. ut_ad(buf_pool_from_block(block) == buf_pool);
  2880. }
  2881. fold = buf_page_address_fold(space, offset);
  2882. //buf_pool_mutex_enter(buf_pool);
  2883. mutex_enter(&buf_pool->LRU_list_mutex);
  2884. rw_lock_x_lock(&buf_pool->page_hash_latch);
  2885. watch_page = buf_page_hash_get_low(buf_pool, space, offset, fold);
  2886. if (watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page)) {
  2887. /* The page is already in the buffer pool. */
  2888. watch_page = NULL;
  2889. err_exit:
  2890. if (block) {
  2891. mutex_enter(&block->mutex);
  2892. mutex_exit(&buf_pool->LRU_list_mutex);
  2893. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  2894. buf_LRU_block_free_non_file_page(block, FALSE);
  2895. mutex_exit(&block->mutex);
  2896. }
  2897. else {
  2898. mutex_exit(&buf_pool->LRU_list_mutex);
  2899. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  2900. }
  2901. bpage = NULL;
  2902. goto func_exit;
  2903. }
  2904. if (fil_tablespace_deleted_or_being_deleted_in_mem(
  2905. space, tablespace_version)) {
  2906. /* The page belongs to a space which has been
  2907. deleted or is being deleted. */
  2908. *err = DB_TABLESPACE_DELETED;
  2909. goto err_exit;
  2910. }
  2911. if (block) {
  2912. bpage = &block->page;
  2913. mutex_enter(&block->mutex);
  2914. ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
  2915. buf_page_init(buf_pool, space, offset, fold, zip_size, block);
  2916. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  2917. /* The block must be put to the LRU list, to the old blocks */
  2918. buf_LRU_add_block(bpage, TRUE/* to old blocks */);
  2919. /* We set a pass-type x-lock on the frame because then
  2920. the same thread which called for the read operation
  2921. (and is running now at this point of code) can wait
  2922. for the read to complete by waiting for the x-lock on
  2923. the frame; if the x-lock were recursive, the same
  2924. thread would illegally get the x-lock before the page
  2925. read is completed. The x-lock is cleared by the
  2926. io-handler thread. */
  2927. rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
  2928. buf_page_set_io_fix(bpage, BUF_IO_READ);
  2929. if (UNIV_UNLIKELY(zip_size)) {
  2930. /* buf_pool->mutex may be released and
  2931. reacquired by buf_buddy_alloc(). Thus, we
  2932. must release block->mutex in order not to
  2933. break the latching order in the reacquisition
  2934. of buf_pool->mutex. We also must defer this
  2935. operation until after the block descriptor has
  2936. been added to buf_pool->LRU and
  2937. buf_pool->page_hash. */
  2938. mutex_exit(&block->mutex);
  2939. data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
  2940. mutex_enter(&block->mutex);
  2941. block->page.zip.data = data;
  2942. /* To maintain the invariant
  2943. block->in_unzip_LRU_list
  2944. == buf_page_belongs_to_unzip_LRU(&block->page)
  2945. we have to add this block to unzip_LRU
  2946. after block->page.zip.data is set. */
  2947. ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
  2948. buf_unzip_LRU_add_block(block, TRUE);
  2949. }
  2950. mutex_exit(&buf_pool->LRU_list_mutex);
  2951. mutex_exit(&block->mutex);
  2952. } else {
  2953. /* The compressed page must be allocated before the
  2954. control block (bpage), in order to avoid the
  2955. invocation of buf_buddy_relocate_block() on
  2956. uninitialized data. */
  2957. data = buf_buddy_alloc(buf_pool, zip_size, &lru, TRUE);
  2958. /* If buf_buddy_alloc() allocated storage from the LRU list,
  2959. it released and reacquired buf_pool->mutex. Thus, we must
  2960. check the page_hash again, as it may have been modified. */
  2961. if (UNIV_UNLIKELY(lru)) {
  2962. watch_page = buf_page_hash_get_low(
  2963. buf_pool, space, offset, fold);
  2964. if (watch_page
  2965. && !buf_pool_watch_is_sentinel(buf_pool,
  2966. watch_page)) {
  2967. /* The block was added by some other thread. */
  2968. watch_page = NULL;
  2969. buf_buddy_free(buf_pool, data, zip_size, TRUE);
  2970. mutex_exit(&buf_pool->LRU_list_mutex);
  2971. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  2972. bpage = NULL;
  2973. goto func_exit;
  2974. }
  2975. }
  2976. bpage = buf_page_alloc_descriptor();
  2977. /* Initialize the buf_pool pointer. */
  2978. bpage->buf_pool_index = buf_pool_index(buf_pool);
  2979. page_zip_des_init(&bpage->zip);
  2980. page_zip_set_size(&bpage->zip, zip_size);
  2981. bpage->zip.data = data;
  2982. mutex_enter(&buf_pool->zip_mutex);
  2983. UNIV_MEM_DESC(bpage->zip.data,
  2984. page_zip_get_size(&bpage->zip), bpage);
  2985. buf_page_init_low(bpage);
  2986. bpage->state = BUF_BLOCK_ZIP_PAGE;
  2987. bpage->space = space;
  2988. bpage->offset = offset;
  2989. #ifdef UNIV_DEBUG
  2990. bpage->in_page_hash = FALSE;
  2991. bpage->in_zip_hash = FALSE;
  2992. bpage->in_flush_list = FALSE;
  2993. bpage->in_free_list = FALSE;
  2994. #endif /* UNIV_DEBUG */
  2995. bpage->in_LRU_list = FALSE;
  2996. ut_d(bpage->in_page_hash = TRUE);
  2997. if (UNIV_LIKELY_NULL(watch_page)) {
  2998. /* Preserve the reference count. */
  2999. ulint buf_fix_count = watch_page->buf_fix_count;
  3000. ut_a(buf_fix_count > 0);
  3001. bpage->buf_fix_count += buf_fix_count;
  3002. ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page));
  3003. buf_pool_watch_remove(buf_pool, fold, watch_page);
  3004. }
  3005. HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold,
  3006. bpage);
  3007. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  3008. /* The block must be put to the LRU list, to the old blocks
  3009. The zip_size is already set into the page zip */
  3010. buf_LRU_add_block(bpage, TRUE/* to old blocks */);
  3011. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  3012. buf_LRU_insert_zip_clean(bpage);
  3013. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  3014. mutex_exit(&buf_pool->LRU_list_mutex);
  3015. buf_page_set_io_fix(bpage, BUF_IO_READ);
  3016. mutex_exit(&buf_pool->zip_mutex);
  3017. }
  3018. buf_pool_mutex_enter(buf_pool);
  3019. buf_pool->n_pend_reads++;
  3020. buf_pool_mutex_exit(buf_pool);
  3021. func_exit:
  3022. //buf_pool_mutex_exit(buf_pool);
  3023. if (mode == BUF_READ_IBUF_PAGES_ONLY) {
  3024. ibuf_mtr_commit(&mtr);
  3025. }
  3026. ut_ad(!bpage || buf_page_in_file(bpage));
  3027. return(bpage);
  3028. }
  3029. /********************************************************************//**
  3030. Initializes a page to the buffer buf_pool. The page is usually not read
  3031. from a file even if it cannot be found in the buffer buf_pool. This is one
  3032. of the functions which perform to a block a state transition NOT_USED =>
  3033. FILE_PAGE (the other is buf_page_get_gen).
  3034. @return pointer to the block, page bufferfixed */
  3035. UNIV_INTERN
  3036. buf_block_t*
  3037. buf_page_create(
  3038. /*============*/
  3039. ulint space, /*!< in: space id */
  3040. ulint offset, /*!< in: offset of the page within space in units of
  3041. a page */
  3042. ulint zip_size,/*!< in: compressed page size, or 0 */
  3043. mtr_t* mtr) /*!< in: mini-transaction handle */
  3044. {
  3045. buf_frame_t* frame;
  3046. buf_block_t* block;
  3047. ulint fold;
  3048. buf_block_t* free_block = NULL;
  3049. buf_pool_t* buf_pool = buf_pool_get(space, offset);
  3050. ut_ad(mtr);
  3051. ut_ad(mtr->state == MTR_ACTIVE);
  3052. ut_ad(space || !zip_size);
  3053. free_block = buf_LRU_get_free_block(buf_pool);
  3054. fold = buf_page_address_fold(space, offset);
  3055. //buf_pool_mutex_enter(buf_pool);
  3056. mutex_enter(&buf_pool->LRU_list_mutex);
  3057. rw_lock_x_lock(&buf_pool->page_hash_latch);
  3058. block = (buf_block_t*) buf_page_hash_get_low(
  3059. buf_pool, space, offset, fold);
  3060. if (block
  3061. && buf_page_in_file(&block->page)
  3062. && !buf_pool_watch_is_sentinel(buf_pool, &block->page)) {
  3063. #ifdef UNIV_IBUF_COUNT_DEBUG
  3064. ut_a(ibuf_count_get(space, offset) == 0);
  3065. #endif
  3066. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  3067. block->page.file_page_was_freed = FALSE;
  3068. #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
  3069. /* Page can be found in buf_pool */
  3070. //buf_pool_mutex_exit(buf_pool);
  3071. mutex_exit(&buf_pool->LRU_list_mutex);
  3072. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  3073. buf_block_free(free_block);
  3074. return(buf_page_get_with_no_latch(space, zip_size,
  3075. offset, mtr));
  3076. }
  3077. /* If we get here, the page was not in buf_pool: init it there */
  3078. #ifdef UNIV_DEBUG
  3079. if (buf_debug_prints) {
  3080. fprintf(stderr, "Creating space %lu page %lu to buffer\n",
  3081. (ulong) space, (ulong) offset);
  3082. }
  3083. #endif /* UNIV_DEBUG */
  3084. block = free_block;
  3085. mutex_enter(&block->mutex);
  3086. buf_page_init(buf_pool, space, offset, fold, zip_size,block);
  3087. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  3088. /* The block must be put to the LRU list */
  3089. buf_LRU_add_block(&block->page, FALSE);
  3090. buf_block_buf_fix_inc(block, __FILE__, __LINE__);
  3091. buf_pool->stat.n_pages_created++;
  3092. if (zip_size) {
  3093. void* data;
  3094. ibool lru;
  3095. /* Prevent race conditions during buf_buddy_alloc(),
  3096. which may release and reacquire buf_pool->mutex,
  3097. by IO-fixing and X-latching the block. */
  3098. buf_page_set_io_fix(&block->page, BUF_IO_READ);
  3099. rw_lock_x_lock(&block->lock);
  3100. mutex_exit(&block->mutex);
  3101. /* buf_pool->mutex may be released and reacquired by
  3102. buf_buddy_alloc(). Thus, we must release block->mutex
  3103. in order not to break the latching order in
  3104. the reacquisition of buf_pool->mutex. We also must
  3105. defer this operation until after the block descriptor
  3106. has been added to buf_pool->LRU and buf_pool->page_hash. */
  3107. data = buf_buddy_alloc(buf_pool, zip_size, &lru, FALSE);
  3108. mutex_enter(&block->mutex);
  3109. block->page.zip.data = data;
  3110. /* To maintain the invariant
  3111. block->in_unzip_LRU_list
  3112. == buf_page_belongs_to_unzip_LRU(&block->page)
  3113. we have to add this block to unzip_LRU after
  3114. block->page.zip.data is set. */
  3115. ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
  3116. buf_unzip_LRU_add_block(block, FALSE);
  3117. buf_page_set_io_fix(&block->page, BUF_IO_NONE);
  3118. rw_lock_x_unlock(&block->lock);
  3119. }
  3120. mutex_exit(&buf_pool->LRU_list_mutex);
  3121. mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
  3122. buf_page_set_accessed(&block->page);
  3123. mutex_exit(&block->mutex);
  3124. /* Delete possible entries for the page from the insert buffer:
  3125. such can exist if the page belonged to an index which was dropped */
  3126. ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
  3127. /* Flush pages from the end of the LRU list if necessary */
  3128. buf_flush_free_margin(buf_pool, FALSE);
  3129. frame = block->frame;
  3130. memset(frame + FIL_PAGE_PREV, 0xff, 4);
  3131. memset(frame + FIL_PAGE_NEXT, 0xff, 4);
  3132. mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
  3133. /* Reset to zero the file flush lsn field in the page; if the first
  3134. page of an ibdata file is 'created' in this function into the buffer
  3135. pool then we lose the original contents of the file flush lsn stamp.
  3136. Then InnoDB could in a crash recovery print a big, false, corruption
  3137. warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
  3138. memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
  3139. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  3140. ut_a(++buf_dbg_counter % 5771 || buf_validate());
  3141. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  3142. #ifdef UNIV_IBUF_COUNT_DEBUG
  3143. ut_a(ibuf_count_get(buf_block_get_space(block),
  3144. buf_block_get_page_no(block)) == 0);
  3145. #endif
  3146. return(block);
  3147. }
  3148. /********************************************************************//**
  3149. Mark a table with the specified space pointed by bpage->space corrupted.
  3150. Also remove the bpage from LRU list.
  3151. @return TRUE if successful */
  3152. static
  3153. ibool
  3154. buf_mark_space_corrupt(
  3155. /*===================*/
  3156. buf_page_t* bpage) /*!< in: pointer to the block in question */
  3157. {
  3158. buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
  3159. const ibool uncompressed = (buf_page_get_state(bpage)
  3160. == BUF_BLOCK_FILE_PAGE);
  3161. ulint space = bpage->space;
  3162. ibool ret = TRUE;
  3163. /* First unfix and release lock on the bpage */
  3164. //buf_pool_mutex_enter(buf_pool);
  3165. mutex_enter(&buf_pool->LRU_list_mutex);
  3166. rw_lock_x_lock(&buf_pool->page_hash_latch);
  3167. mutex_enter(buf_page_get_mutex(bpage));
  3168. ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
  3169. ut_ad(bpage->buf_fix_count == 0);
  3170. /* Set BUF_IO_NONE before we remove the block from LRU list */
  3171. buf_page_set_io_fix(bpage, BUF_IO_NONE);
  3172. if (uncompressed) {
  3173. rw_lock_x_unlock_gen(
  3174. &((buf_block_t*) bpage)->lock,
  3175. BUF_IO_READ);
  3176. }
  3177. /* Find the table with specified space id, and mark it corrupted */
  3178. if (dict_set_corrupted_by_space(space)) {
  3179. buf_LRU_free_one_page(bpage);
  3180. } else {
  3181. ret = FALSE;
  3182. }
  3183. buf_pool_mutex_enter(buf_pool);
  3184. ut_ad(buf_pool->n_pend_reads > 0);
  3185. buf_pool->n_pend_reads--;
  3186. buf_pool_mutex_exit(buf_pool);
  3187. mutex_exit(buf_page_get_mutex(bpage));
  3188. //buf_pool_mutex_exit(buf_pool);
  3189. mutex_exit(&buf_pool->LRU_list_mutex);
  3190. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  3191. return(ret);
  3192. }
  3193. /********************************************************************//**
  3194. Completes an asynchronous read or write request of a file page to or from
  3195. the buffer pool.
  3196. @return TRUE if successful */
  3197. UNIV_INTERN
  3198. ibool
  3199. buf_page_io_complete(
  3200. /*=================*/
  3201. buf_page_t* bpage) /*!< in: pointer to the block in question */
  3202. {
  3203. enum buf_io_fix io_type;
  3204. buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
  3205. const ibool uncompressed = (buf_page_get_state(bpage)
  3206. == BUF_BLOCK_FILE_PAGE);
  3207. ibool have_LRU_mutex = FALSE;
  3208. mutex_t* block_mutex;
  3209. ut_a(buf_page_in_file(bpage));
  3210. /* We do not need protect io_fix here by mutex to read
  3211. it because this is the only function where we can change the value
  3212. from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
  3213. ensures that this is the only thread that handles the i/o for this
  3214. block. */
  3215. io_type = buf_page_get_io_fix_unlocked(bpage);
  3216. ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
  3217. if (io_type == BUF_IO_READ) {
  3218. ulint read_page_no;
  3219. ulint read_space_id;
  3220. byte* frame;
  3221. if (buf_page_get_zip_size(bpage)) {
  3222. frame = bpage->zip.data;
  3223. buf_pool->n_pend_unzip++;
  3224. if (uncompressed
  3225. && !buf_zip_decompress((buf_block_t*) bpage,
  3226. FALSE)) {
  3227. buf_pool->n_pend_unzip--;
  3228. goto corrupt;
  3229. }
  3230. buf_pool->n_pend_unzip--;
  3231. } else {
  3232. ut_a(uncompressed);
  3233. frame = ((buf_block_t*) bpage)->frame;
  3234. }
  3235. /* If this page is not uninitialized and not in the
  3236. doublewrite buffer, then the page number and space id
  3237. should be the same as in block. */
  3238. read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
  3239. read_space_id = mach_read_from_4(
  3240. frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
  3241. if ((bpage->space == TRX_SYS_SPACE
  3242. || (srv_doublewrite_file && bpage->space == TRX_DOUBLEWRITE_SPACE))
  3243. && trx_doublewrite_page_inside(bpage->offset)) {
  3244. ut_print_timestamp(stderr);
  3245. fprintf(stderr,
  3246. " InnoDB: Error: reading page %lu\n"
  3247. "InnoDB: which is in the"
  3248. " doublewrite buffer!\n",
  3249. (ulong) bpage->offset);
  3250. } else if (!read_space_id && !read_page_no) {
  3251. /* This is likely an uninitialized page. */
  3252. } else if ((bpage->space
  3253. && bpage->space != read_space_id)
  3254. || bpage->offset != read_page_no) {
  3255. /* We did not compare space_id to read_space_id
  3256. if bpage->space == 0, because the field on the
  3257. page may contain garbage in MySQL < 4.1.1,
  3258. which only supported bpage->space == 0. */
  3259. ut_print_timestamp(stderr);
  3260. fprintf(stderr,
  3261. " InnoDB: Error: space id and page n:o"
  3262. " stored in the page\n"
  3263. "InnoDB: read in are %lu:%lu,"
  3264. " should be %lu:%lu!\n",
  3265. (ulong) read_space_id, (ulong) read_page_no,
  3266. (ulong) bpage->space,
  3267. (ulong) bpage->offset);
  3268. }
  3269. if (UNIV_LIKELY(!bpage->is_corrupt ||
  3270. !srv_pass_corrupt_table)) {
  3271. /* From version 3.23.38 up we store the page checksum
  3272. to the 4 first bytes of the page end lsn field */
  3273. if (buf_page_is_corrupted(TRUE, frame,
  3274. buf_page_get_zip_size(bpage))) {
  3275. corrupt:
  3276. fprintf(stderr,
  3277. "InnoDB: Database page corruption on disk"
  3278. " or a failed\n"
  3279. "InnoDB: file read of page %lu.\n"
  3280. "InnoDB: You may have to recover"
  3281. " from a backup.\n",
  3282. (ulong) bpage->offset);
  3283. buf_page_print(frame, buf_page_get_zip_size(bpage),
  3284. BUF_PAGE_PRINT_NO_CRASH);
  3285. fprintf(stderr,
  3286. "InnoDB: Database page corruption on disk"
  3287. " or a failed\n"
  3288. "InnoDB: file read of page %lu.\n"
  3289. "InnoDB: You may have to recover"
  3290. " from a backup.\n",
  3291. (ulong) bpage->offset);
  3292. fputs("InnoDB: It is also possible that"
  3293. " your operating\n"
  3294. "InnoDB: system has corrupted its"
  3295. " own file cache\n"
  3296. "InnoDB: and rebooting your computer"
  3297. " removes the\n"
  3298. "InnoDB: error.\n"
  3299. "InnoDB: If the corrupt page is an index page\n"
  3300. "InnoDB: you can also try to"
  3301. " fix the corruption\n"
  3302. "InnoDB: by dumping, dropping,"
  3303. " and reimporting\n"
  3304. "InnoDB: the corrupt table."
  3305. " You can use CHECK\n"
  3306. "InnoDB: TABLE to scan your"
  3307. " table for corruption.\n"
  3308. "InnoDB: See also "
  3309. REFMAN "forcing-innodb-recovery.html\n"
  3310. "InnoDB: about forcing recovery.\n", stderr);
  3311. if (srv_pass_corrupt_table && !trx_sys_sys_space(bpage->space)
  3312. && bpage->space < SRV_LOG_SPACE_FIRST_ID) {
  3313. trx_t* trx;
  3314. fprintf(stderr,
  3315. "InnoDB: space %u will be treated as corrupt.\n",
  3316. bpage->space);
  3317. fil_space_set_corrupt(bpage->space);
  3318. trx = innobase_get_trx();
  3319. if (trx && trx->dict_operation_lock_mode == RW_X_LATCH) {
  3320. dict_table_set_corrupt_by_space(bpage->space, FALSE);
  3321. } else {
  3322. dict_table_set_corrupt_by_space(bpage->space, TRUE);
  3323. }
  3324. bpage->is_corrupt = TRUE;
  3325. } else
  3326. if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
  3327. /* If page space id is larger than TRX_SYS_SPACE
  3328. (0), we will attempt to mark the corresponding
  3329. table as corrupted instead of crashing server */
  3330. if (bpage->space > TRX_SYS_SPACE
  3331. && buf_mark_space_corrupt(bpage)) {
  3332. return(FALSE);
  3333. } else {
  3334. fputs("InnoDB: Ending processing"
  3335. " because of"
  3336. " a corrupt database page.\n",
  3337. stderr);
  3338. ut_error;
  3339. }
  3340. }
  3341. }
  3342. } /**/
  3343. if (recv_recovery_is_on()) {
  3344. /* Pages must be uncompressed for crash recovery. */
  3345. ut_a(uncompressed);
  3346. recv_recover_page(TRUE, (buf_block_t*) bpage);
  3347. }
  3348. if (uncompressed && !recv_no_ibuf_operations) {
  3349. buf_block_t* block;
  3350. ibool update_ibuf_bitmap;
  3351. if (UNIV_UNLIKELY(bpage->is_corrupt &&
  3352. srv_pass_corrupt_table)) {
  3353. block = NULL;
  3354. update_ibuf_bitmap = FALSE;
  3355. } else {
  3356. block = (buf_block_t *) bpage;
  3357. update_ibuf_bitmap = TRUE;
  3358. }
  3359. ibuf_merge_or_delete_for_page(
  3360. block, bpage->space,
  3361. bpage->offset, buf_page_get_zip_size(bpage),
  3362. update_ibuf_bitmap);
  3363. }
  3364. }
  3365. if (io_type == BUF_IO_WRITE
  3366. && (
  3367. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  3368. /* to keep consistency at buf_LRU_insert_zip_clean() */
  3369. buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY ||
  3370. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  3371. buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)) {
  3372. have_LRU_mutex = TRUE; /* optimistic */
  3373. }
  3374. retry_mutex:
  3375. if (have_LRU_mutex)
  3376. mutex_enter(&buf_pool->LRU_list_mutex);
  3377. block_mutex = buf_page_get_mutex_enter(bpage);
  3378. ut_a(block_mutex);
  3379. if (UNIV_UNLIKELY(io_type == BUF_IO_WRITE
  3380. && (
  3381. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  3382. buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY
  3383. ||
  3384. #endif
  3385. buf_page_get_flush_type(bpage) == BUF_FLUSH_LRU)
  3386. && !have_LRU_mutex)) {
  3387. mutex_exit(block_mutex);
  3388. have_LRU_mutex = TRUE;
  3389. goto retry_mutex;
  3390. }
  3391. buf_pool_mutex_enter(buf_pool);
  3392. #ifdef UNIV_IBUF_COUNT_DEBUG
  3393. if (io_type == BUF_IO_WRITE || uncompressed) {
  3394. /* For BUF_IO_READ of compressed-only blocks, the
  3395. buffered operations will be merged by buf_page_get_gen()
  3396. after the block has been uncompressed. */
  3397. ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
  3398. }
  3399. #endif
  3400. /* Because this thread which does the unlocking is not the same that
  3401. did the locking, we use a pass value != 0 in unlock, which simply
  3402. removes the newest lock debug record, without checking the thread
  3403. id. */
  3404. buf_page_set_io_fix(bpage, BUF_IO_NONE);
  3405. switch (io_type) {
  3406. case BUF_IO_READ:
  3407. /* NOTE that the call to ibuf may have moved the ownership of
  3408. the x-latch to this OS thread: do not let this confuse you in
  3409. debugging! */
  3410. ut_a(!have_LRU_mutex);
  3411. ut_ad(buf_pool->n_pend_reads > 0);
  3412. buf_pool->n_pend_reads--;
  3413. buf_pool->stat.n_pages_read++;
  3414. if (uncompressed) {
  3415. rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
  3416. BUF_IO_READ);
  3417. }
  3418. break;
  3419. case BUF_IO_WRITE:
  3420. /* Write means a flush operation: call the completion
  3421. routine in the flush system */
  3422. buf_flush_write_complete(bpage);
  3423. if (have_LRU_mutex)
  3424. mutex_exit(&buf_pool->LRU_list_mutex);
  3425. if (uncompressed) {
  3426. rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
  3427. BUF_IO_WRITE);
  3428. }
  3429. buf_pool->stat.n_pages_written++;
  3430. break;
  3431. default:
  3432. ut_error;
  3433. }
  3434. #ifdef UNIV_DEBUG
  3435. if (buf_debug_prints) {
  3436. fprintf(stderr, "Has %s page space %lu page no %lu\n",
  3437. io_type == BUF_IO_READ ? "read" : "written",
  3438. (ulong) buf_page_get_space(bpage),
  3439. (ulong) buf_page_get_page_no(bpage));
  3440. }
  3441. #endif /* UNIV_DEBUG */
  3442. buf_pool_mutex_exit(buf_pool);
  3443. mutex_exit(block_mutex);
  3444. return(TRUE);
  3445. }
  3446. /********************************************************************//**
  3447. */
  3448. UNIV_INTERN
  3449. buf_block_t*
  3450. buf_page_from_array(
  3451. /*================*/
  3452. buf_pool_t* buf_pool,
  3453. ulint n_block)
  3454. {
  3455. ulint n_chunks, offset;
  3456. buf_chunk_t* chunk;
  3457. ut_a(n_block < buf_pool->curr_size);
  3458. chunk = buf_pool->chunks;
  3459. offset = n_block;
  3460. for (n_chunks = buf_pool->n_chunks; n_chunks--; chunk++) {
  3461. if (offset < chunk->size) {
  3462. return(&chunk->blocks[offset]);
  3463. }
  3464. offset -= chunk->size;
  3465. }
  3466. ut_error;
  3467. return(NULL);
  3468. }
  3469. /*********************************************************************//**
  3470. Asserts that all file pages in the buffer are in a replaceable state.
  3471. @return TRUE */
  3472. static
  3473. ibool
  3474. buf_all_freed_instance(
  3475. /*===================*/
  3476. buf_pool_t* buf_pool) /*!< in: buffer pool instancce */
  3477. {
  3478. ulint i;
  3479. buf_chunk_t* chunk;
  3480. ut_ad(buf_pool);
  3481. //buf_pool_mutex_enter(buf_pool);
  3482. mutex_enter(&buf_pool->LRU_list_mutex);
  3483. rw_lock_x_lock(&buf_pool->page_hash_latch);
  3484. chunk = buf_pool->chunks;
  3485. for (i = buf_pool->n_chunks; i--; chunk++) {
  3486. const buf_block_t* block = buf_chunk_not_freed(chunk);
  3487. if (UNIV_LIKELY_NULL(block)) {
  3488. fprintf(stderr,
  3489. "Page %lu %lu still fixed or dirty\n",
  3490. (ulong) block->page.space,
  3491. (ulong) block->page.offset);
  3492. ut_error;
  3493. }
  3494. }
  3495. //buf_pool_mutex_exit(buf_pool);
  3496. mutex_exit(&buf_pool->LRU_list_mutex);
  3497. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  3498. return(TRUE);
  3499. }
  3500. /*********************************************************************//**
  3501. Invalidates file pages in one buffer pool instance */
  3502. static
  3503. void
  3504. buf_pool_invalidate_instance(
  3505. /*=========================*/
  3506. buf_pool_t* buf_pool) /*!< in: buffer pool instance */
  3507. {
  3508. ibool freed;
  3509. enum buf_flush i;
  3510. buf_pool_mutex_enter(buf_pool);
  3511. for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
  3512. /* As this function is called during startup and
  3513. during redo application phase during recovery, InnoDB
  3514. is single threaded (apart from IO helper threads) at
  3515. this stage. No new write batch can be in intialization
  3516. stage at this point. */
  3517. ut_ad(buf_pool->init_flush[i] == FALSE);
  3518. /* However, it is possible that a write batch that has
  3519. been posted earlier is still not complete. For buffer
  3520. pool invalidation to proceed we must ensure there is NO
  3521. write activity happening. */
  3522. if (buf_pool->n_flush[i] > 0) {
  3523. buf_pool_mutex_exit(buf_pool);
  3524. buf_flush_wait_batch_end(buf_pool, i);
  3525. buf_pool_mutex_enter(buf_pool);
  3526. }
  3527. }
  3528. buf_pool_mutex_exit(buf_pool);
  3529. ut_ad(buf_all_freed_instance(buf_pool));
  3530. freed = TRUE;
  3531. while (freed) {
  3532. freed = buf_LRU_search_and_free_block(buf_pool, 100);
  3533. }
  3534. //buf_pool_mutex_enter(buf_pool);
  3535. mutex_enter(&buf_pool->LRU_list_mutex);
  3536. ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
  3537. ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
  3538. buf_pool->freed_page_clock = 0;
  3539. buf_pool->LRU_old = NULL;
  3540. buf_pool->LRU_old_len = 0;
  3541. buf_pool->LRU_flush_ended = 0;
  3542. memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
  3543. buf_refresh_io_stats(buf_pool);
  3544. //buf_pool_mutex_exit(buf_pool);
  3545. mutex_exit(&buf_pool->LRU_list_mutex);
  3546. }
  3547. /*********************************************************************//**
  3548. Invalidates the file pages in the buffer pool when an archive recovery is
  3549. completed. All the file pages buffered must be in a replaceable state when
  3550. this function is called: not latched and not modified. */
  3551. UNIV_INTERN
  3552. void
  3553. buf_pool_invalidate(void)
  3554. /*=====================*/
  3555. {
  3556. ulint i;
  3557. for (i = 0; i < srv_buf_pool_instances; i++) {
  3558. buf_pool_invalidate_instance(buf_pool_from_array(i));
  3559. }
  3560. }
  3561. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  3562. /*********************************************************************//**
  3563. Validates data in one buffer pool instance
  3564. @return TRUE */
  3565. static
  3566. ibool
  3567. buf_pool_validate_instance(
  3568. /*=======================*/
  3569. buf_pool_t* buf_pool) /*!< in: buffer pool instance */
  3570. {
  3571. buf_page_t* b;
  3572. buf_chunk_t* chunk;
  3573. ulint i;
  3574. ulint n_single_flush = 0;
  3575. ulint n_lru_flush = 0;
  3576. ulint n_list_flush = 0;
  3577. ulint n_lru = 0;
  3578. ulint n_flush = 0;
  3579. ulint n_free = 0;
  3580. ulint n_zip = 0;
  3581. ut_ad(buf_pool);
  3582. //buf_pool_mutex_enter(buf_pool);
  3583. mutex_enter(&buf_pool->LRU_list_mutex);
  3584. rw_lock_x_lock(&buf_pool->page_hash_latch);
  3585. /* for keep the new latch order, it cannot validate correctly... */
  3586. chunk = buf_pool->chunks;
  3587. /* Check the uncompressed blocks. */
  3588. for (i = buf_pool->n_chunks; i--; chunk++) {
  3589. ulint j;
  3590. buf_block_t* block = chunk->blocks;
  3591. for (j = chunk->size; j--; block++) {
  3592. mutex_enter(&block->mutex);
  3593. switch (buf_block_get_state(block)) {
  3594. case BUF_BLOCK_ZIP_FREE:
  3595. case BUF_BLOCK_ZIP_PAGE:
  3596. case BUF_BLOCK_ZIP_DIRTY:
  3597. /* These should only occur on
  3598. zip_clean, zip_free[], or flush_list. */
  3599. ut_error;
  3600. break;
  3601. case BUF_BLOCK_FILE_PAGE:
  3602. ut_a(buf_page_hash_get(buf_pool,
  3603. buf_block_get_space(
  3604. block),
  3605. buf_block_get_page_no(
  3606. block))
  3607. == &block->page);
  3608. #ifdef UNIV_IBUF_COUNT_DEBUG
  3609. ut_a(buf_page_get_io_fix(&block->page)
  3610. == BUF_IO_READ
  3611. || !ibuf_count_get(buf_block_get_space(
  3612. block),
  3613. buf_block_get_page_no(
  3614. block)));
  3615. #endif
  3616. switch (buf_page_get_io_fix(&block->page)) {
  3617. case BUF_IO_NONE:
  3618. break;
  3619. case BUF_IO_WRITE:
  3620. switch (buf_page_get_flush_type(
  3621. &block->page)) {
  3622. case BUF_FLUSH_LRU:
  3623. n_lru_flush++;
  3624. ut_a(rw_lock_is_locked(
  3625. &block->lock,
  3626. RW_LOCK_SHARED));
  3627. break;
  3628. case BUF_FLUSH_LIST:
  3629. n_list_flush++;
  3630. break;
  3631. case BUF_FLUSH_SINGLE_PAGE:
  3632. n_single_flush++;
  3633. break;
  3634. default:
  3635. ut_error;
  3636. }
  3637. break;
  3638. case BUF_IO_READ:
  3639. ut_a(rw_lock_is_locked(&block->lock,
  3640. RW_LOCK_EX));
  3641. break;
  3642. case BUF_IO_PIN:
  3643. break;
  3644. }
  3645. n_lru++;
  3646. break;
  3647. case BUF_BLOCK_NOT_USED:
  3648. n_free++;
  3649. break;
  3650. case BUF_BLOCK_READY_FOR_USE:
  3651. case BUF_BLOCK_MEMORY:
  3652. case BUF_BLOCK_REMOVE_HASH:
  3653. /* do nothing */
  3654. break;
  3655. }
  3656. mutex_exit(&block->mutex);
  3657. }
  3658. }
  3659. mutex_enter(&buf_pool->zip_mutex);
  3660. /* Check clean compressed-only blocks. */
  3661. for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
  3662. b = UT_LIST_GET_NEXT(zip_list, b)) {
  3663. ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
  3664. switch (buf_page_get_io_fix(b)) {
  3665. case BUF_IO_NONE:
  3666. case BUF_IO_PIN:
  3667. /* All clean blocks should be I/O-unfixed. */
  3668. break;
  3669. case BUF_IO_READ:
  3670. /* In buf_LRU_free_block(), we temporarily set
  3671. b->io_fix = BUF_IO_READ for a newly allocated
  3672. control block in order to prevent
  3673. buf_page_get_gen() from decompressing the block. */
  3674. break;
  3675. default:
  3676. ut_error;
  3677. break;
  3678. }
  3679. /* It is OK to read oldest_modification here because
  3680. we have acquired buf_pool->zip_mutex above which acts
  3681. as the 'block->mutex' for these bpages. */
  3682. ut_a(!b->oldest_modification);
  3683. ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
  3684. n_lru++;
  3685. n_zip++;
  3686. }
  3687. /* Check dirty blocks. */
  3688. buf_flush_list_mutex_enter(buf_pool);
  3689. for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
  3690. b = UT_LIST_GET_NEXT(flush_list, b)) {
  3691. ut_ad(b->in_flush_list);
  3692. ut_a(b->oldest_modification);
  3693. n_flush++;
  3694. switch (buf_page_get_state(b)) {
  3695. case BUF_BLOCK_ZIP_DIRTY:
  3696. n_lru++;
  3697. n_zip++;
  3698. switch (buf_page_get_io_fix(b)) {
  3699. case BUF_IO_NONE:
  3700. case BUF_IO_READ:
  3701. case BUF_IO_PIN:
  3702. break;
  3703. case BUF_IO_WRITE:
  3704. switch (buf_page_get_flush_type(b)) {
  3705. case BUF_FLUSH_LRU:
  3706. n_lru_flush++;
  3707. break;
  3708. case BUF_FLUSH_LIST:
  3709. n_list_flush++;
  3710. break;
  3711. case BUF_FLUSH_SINGLE_PAGE:
  3712. n_single_flush++;
  3713. break;
  3714. default:
  3715. ut_error;
  3716. }
  3717. break;
  3718. }
  3719. break;
  3720. case BUF_BLOCK_FILE_PAGE:
  3721. /* uncompressed page */
  3722. break;
  3723. case BUF_BLOCK_ZIP_FREE:
  3724. case BUF_BLOCK_ZIP_PAGE:
  3725. case BUF_BLOCK_NOT_USED:
  3726. case BUF_BLOCK_READY_FOR_USE:
  3727. case BUF_BLOCK_MEMORY:
  3728. case BUF_BLOCK_REMOVE_HASH:
  3729. ut_error;
  3730. break;
  3731. }
  3732. ut_a(buf_page_hash_get(buf_pool, b->space, b->offset) == b);
  3733. }
  3734. ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
  3735. buf_flush_list_mutex_exit(buf_pool);
  3736. mutex_exit(&buf_pool->zip_mutex);
  3737. if (n_lru + n_free > buf_pool->curr_size + n_zip) {
  3738. fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
  3739. (ulong) n_lru, (ulong) n_free,
  3740. (ulong) buf_pool->curr_size, (ulong) n_zip);
  3741. ut_error;
  3742. }
  3743. ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
  3744. /* because of latching order with block->mutex, we cannot get needed mutexes before that */
  3745. /*
  3746. if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
  3747. fprintf(stderr, "Free list len %lu, free blocks %lu\n",
  3748. (ulong) UT_LIST_GET_LEN(buf_pool->free),
  3749. (ulong) n_free);
  3750. ut_error;
  3751. }
  3752. ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
  3753. ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
  3754. ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
  3755. */
  3756. //buf_pool_mutex_exit(buf_pool);
  3757. mutex_exit(&buf_pool->LRU_list_mutex);
  3758. rw_lock_x_unlock(&buf_pool->page_hash_latch);
  3759. ut_a(buf_LRU_validate());
  3760. ut_a(buf_flush_validate(buf_pool));
  3761. return(TRUE);
  3762. }
  3763. /*********************************************************************//**
  3764. Validates the buffer buf_pool data structure.
  3765. @return TRUE */
  3766. UNIV_INTERN
  3767. ibool
  3768. buf_validate(void)
  3769. /*==============*/
  3770. {
  3771. ulint i;
  3772. for (i = 0; i < srv_buf_pool_instances; i++) {
  3773. buf_pool_t* buf_pool;
  3774. buf_pool = buf_pool_from_array(i);
  3775. buf_pool_validate_instance(buf_pool);
  3776. }
  3777. return(TRUE);
  3778. }
  3779. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  3780. #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  3781. /*********************************************************************//**
  3782. Prints info of the buffer buf_pool data structure for one instance. */
  3783. static
  3784. void
  3785. buf_print_instance(
  3786. /*===============*/
  3787. buf_pool_t* buf_pool)
  3788. {
  3789. index_id_t* index_ids;
  3790. ulint* counts;
  3791. ulint size;
  3792. ulint i;
  3793. ulint j;
  3794. index_id_t id;
  3795. ulint n_found;
  3796. buf_chunk_t* chunk;
  3797. dict_index_t* index;
  3798. ut_ad(buf_pool);
  3799. size = buf_pool->curr_size;
  3800. index_ids = mem_alloc(size * sizeof *index_ids);
  3801. counts = mem_alloc(sizeof(ulint) * size);
  3802. //buf_pool_mutex_enter(buf_pool);
  3803. mutex_enter(&buf_pool->LRU_list_mutex);
  3804. mutex_enter(&buf_pool->free_list_mutex);
  3805. buf_flush_list_mutex_enter(buf_pool);
  3806. fprintf(stderr,
  3807. "buf_pool size %lu\n"
  3808. "database pages %lu\n"
  3809. "free pages %lu\n"
  3810. "modified database pages %lu\n"
  3811. "n pending decompressions %lu\n"
  3812. "n pending reads %lu\n"
  3813. "n pending flush LRU %lu list %lu single page %lu\n"
  3814. "pages made young %lu, not young %lu\n"
  3815. "pages read %lu, created %lu, written %lu\n",
  3816. (ulong) size,
  3817. (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
  3818. (ulong) UT_LIST_GET_LEN(buf_pool->free),
  3819. (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
  3820. (ulong) buf_pool->n_pend_unzip,
  3821. (ulong) buf_pool->n_pend_reads,
  3822. (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
  3823. (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
  3824. (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
  3825. (ulong) buf_pool->stat.n_pages_made_young,
  3826. (ulong) buf_pool->stat.n_pages_not_made_young,
  3827. (ulong) buf_pool->stat.n_pages_read,
  3828. (ulong) buf_pool->stat.n_pages_created,
  3829. (ulong) buf_pool->stat.n_pages_written);
  3830. buf_flush_list_mutex_exit(buf_pool);
  3831. /* Count the number of blocks belonging to each index in the buffer */
  3832. n_found = 0;
  3833. chunk = buf_pool->chunks;
  3834. for (i = buf_pool->n_chunks; i--; chunk++) {
  3835. buf_block_t* block = chunk->blocks;
  3836. ulint n_blocks = chunk->size;
  3837. for (; n_blocks--; block++) {
  3838. const buf_frame_t* frame = block->frame;
  3839. if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
  3840. id = btr_page_get_index_id(frame);
  3841. /* Look for the id in the index_ids array */
  3842. j = 0;
  3843. while (j < n_found) {
  3844. if (index_ids[j] == id) {
  3845. counts[j]++;
  3846. break;
  3847. }
  3848. j++;
  3849. }
  3850. if (j == n_found) {
  3851. n_found++;
  3852. index_ids[j] = id;
  3853. counts[j] = 1;
  3854. }
  3855. }
  3856. }
  3857. }
  3858. //buf_pool_mutex_exit(buf_pool);
  3859. mutex_exit(&buf_pool->LRU_list_mutex);
  3860. mutex_exit(&buf_pool->free_list_mutex);
  3861. for (i = 0; i < n_found; i++) {
  3862. index = dict_index_get_if_in_cache(index_ids[i]);
  3863. fprintf(stderr,
  3864. "Block count for index %llu in buffer is about %lu",
  3865. (ullint) index_ids[i],
  3866. (ulong) counts[i]);
  3867. if (index) {
  3868. putc(' ', stderr);
  3869. dict_index_name_print(stderr, NULL, index);
  3870. }
  3871. putc('\n', stderr);
  3872. }
  3873. mem_free(index_ids);
  3874. mem_free(counts);
  3875. ut_a(buf_pool_validate_instance(buf_pool));
  3876. }
  3877. /*********************************************************************//**
  3878. Prints info of the buffer buf_pool data structure. */
  3879. UNIV_INTERN
  3880. void
  3881. buf_print(void)
  3882. /*===========*/
  3883. {
  3884. ulint i;
  3885. for (i = 0; i < srv_buf_pool_instances; i++) {
  3886. buf_pool_t* buf_pool;
  3887. buf_pool = buf_pool_from_array(i);
  3888. buf_print_instance(buf_pool);
  3889. }
  3890. }
  3891. #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
  3892. #ifdef UNIV_DEBUG
  3893. /*********************************************************************//**
  3894. Returns the number of latched pages in the buffer pool.
  3895. @return number of latched pages */
  3896. UNIV_INTERN
  3897. ulint
  3898. buf_get_latched_pages_number_instance(
  3899. /*==================================*/
  3900. buf_pool_t* buf_pool) /*!< in: buffer pool instance */
  3901. {
  3902. buf_page_t* b;
  3903. ulint i;
  3904. buf_chunk_t* chunk;
  3905. ulint fixed_pages_number = 0;
  3906. //buf_pool_mutex_enter(buf_pool);
  3907. chunk = buf_pool->chunks;
  3908. for (i = buf_pool->n_chunks; i--; chunk++) {
  3909. buf_block_t* block;
  3910. ulint j;
  3911. block = chunk->blocks;
  3912. for (j = chunk->size; j--; block++) {
  3913. if (buf_block_get_state(block)
  3914. != BUF_BLOCK_FILE_PAGE) {
  3915. continue;
  3916. }
  3917. mutex_enter(&block->mutex);
  3918. if (block->page.buf_fix_count != 0
  3919. || buf_page_get_io_fix(&block->page)
  3920. != BUF_IO_NONE) {
  3921. fixed_pages_number++;
  3922. }
  3923. mutex_exit(&block->mutex);
  3924. }
  3925. }
  3926. mutex_enter(&buf_pool->zip_mutex);
  3927. /* Traverse the lists of clean and dirty compressed-only blocks. */
  3928. for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
  3929. b = UT_LIST_GET_NEXT(zip_list, b)) {
  3930. ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
  3931. ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
  3932. if (b->buf_fix_count != 0
  3933. || buf_page_get_io_fix(b) != BUF_IO_NONE) {
  3934. fixed_pages_number++;
  3935. }
  3936. }
  3937. buf_flush_list_mutex_enter(buf_pool);
  3938. for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
  3939. b = UT_LIST_GET_NEXT(flush_list, b)) {
  3940. ut_ad(b->in_flush_list);
  3941. switch (buf_page_get_state(b)) {
  3942. case BUF_BLOCK_ZIP_DIRTY:
  3943. if (b->buf_fix_count != 0
  3944. || buf_page_get_io_fix(b) != BUF_IO_NONE) {
  3945. fixed_pages_number++;
  3946. }
  3947. break;
  3948. case BUF_BLOCK_FILE_PAGE:
  3949. /* uncompressed page */
  3950. break;
  3951. case BUF_BLOCK_ZIP_FREE:
  3952. case BUF_BLOCK_ZIP_PAGE:
  3953. case BUF_BLOCK_NOT_USED:
  3954. case BUF_BLOCK_READY_FOR_USE:
  3955. case BUF_BLOCK_MEMORY:
  3956. case BUF_BLOCK_REMOVE_HASH:
  3957. ut_error;
  3958. break;
  3959. }
  3960. }
  3961. buf_flush_list_mutex_exit(buf_pool);
  3962. mutex_exit(&buf_pool->zip_mutex);
  3963. //buf_pool_mutex_exit(buf_pool);
  3964. return(fixed_pages_number);
  3965. }
  3966. /*********************************************************************//**
  3967. Returns the number of latched pages in all the buffer pools.
  3968. @return number of latched pages */
  3969. UNIV_INTERN
  3970. ulint
  3971. buf_get_latched_pages_number(void)
  3972. /*==============================*/
  3973. {
  3974. ulint i;
  3975. ulint total_latched_pages = 0;
  3976. for (i = 0; i < srv_buf_pool_instances; i++) {
  3977. buf_pool_t* buf_pool;
  3978. buf_pool = buf_pool_from_array(i);
  3979. total_latched_pages += buf_get_latched_pages_number_instance(
  3980. buf_pool);
  3981. }
  3982. return(total_latched_pages);
  3983. }
  3984. #endif /* UNIV_DEBUG */
  3985. /*********************************************************************//**
  3986. Returns the number of pending buf pool ios.
  3987. @return number of pending I/O operations */
  3988. UNIV_INTERN
  3989. ulint
  3990. buf_get_n_pending_ios(void)
  3991. /*=======================*/
  3992. {
  3993. ulint i;
  3994. ulint pend_ios = 0;
  3995. for (i = 0; i < srv_buf_pool_instances; i++) {
  3996. buf_pool_t* buf_pool;
  3997. buf_pool = buf_pool_from_array(i);
  3998. pend_ios +=
  3999. buf_pool->n_pend_reads
  4000. + buf_pool->n_flush[BUF_FLUSH_LRU]
  4001. + buf_pool->n_flush[BUF_FLUSH_LIST]
  4002. + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
  4003. }
  4004. return(pend_ios);
  4005. }
  4006. /*********************************************************************//**
  4007. Returns the ratio in percents of modified pages in the buffer pool /
  4008. database pages in the buffer pool.
  4009. @return modified page percentage ratio */
  4010. UNIV_INTERN
  4011. ulint
  4012. buf_get_modified_ratio_pct(void)
  4013. /*============================*/
  4014. {
  4015. ulint ratio;
  4016. ulint lru_len = 0;
  4017. ulint free_len = 0;
  4018. ulint flush_list_len = 0;
  4019. buf_get_total_list_len(&lru_len, &free_len, &flush_list_len);
  4020. ratio = (100 * flush_list_len) / (1 + lru_len + free_len);
  4021. /* 1 + is there to avoid division by zero */
  4022. return(ratio);
  4023. }
  4024. /*******************************************************************//**
  4025. Aggregates a pool stats information with the total buffer pool stats */
  4026. static
  4027. void
  4028. buf_stats_aggregate_pool_info(
  4029. /*==========================*/
  4030. buf_pool_info_t* total_info, /*!< in/out: the buffer pool
  4031. info to store aggregated
  4032. result */
  4033. const buf_pool_info_t* pool_info) /*!< in: individual buffer pool
  4034. stats info */
  4035. {
  4036. ut_a(total_info && pool_info);
  4037. /* Nothing to copy if total_info is the same as pool_info */
  4038. if (total_info == pool_info) {
  4039. return;
  4040. }
  4041. total_info->pool_size += pool_info->pool_size;
  4042. total_info->pool_size_bytes += pool_info->pool_size_bytes;
  4043. total_info->lru_len += pool_info->lru_len;
  4044. total_info->old_lru_len += pool_info->old_lru_len;
  4045. total_info->free_list_len += pool_info->free_list_len;
  4046. total_info->flush_list_len += pool_info->flush_list_len;
  4047. total_info->n_pend_unzip += pool_info->n_pend_unzip;
  4048. total_info->n_pend_reads += pool_info->n_pend_reads;
  4049. total_info->n_pending_flush_lru += pool_info->n_pending_flush_lru;
  4050. total_info->n_pending_flush_list += pool_info->n_pending_flush_list;
  4051. total_info->n_pending_flush_single_page +=
  4052. pool_info->n_pending_flush_single_page;
  4053. total_info->n_pages_made_young += pool_info->n_pages_made_young;
  4054. total_info->n_pages_not_made_young += pool_info->n_pages_not_made_young;
  4055. total_info->n_pages_read += pool_info->n_pages_read;
  4056. total_info->n_pages_created += pool_info->n_pages_created;
  4057. total_info->n_pages_written += pool_info->n_pages_written;
  4058. total_info->n_page_gets += pool_info->n_page_gets;
  4059. total_info->n_ra_pages_read_rnd += pool_info->n_ra_pages_read_rnd;
  4060. total_info->n_ra_pages_read += pool_info->n_ra_pages_read;
  4061. total_info->n_ra_pages_evicted += pool_info->n_ra_pages_evicted;
  4062. total_info->page_made_young_rate += pool_info->page_made_young_rate;
  4063. total_info->page_not_made_young_rate +=
  4064. pool_info->page_not_made_young_rate;
  4065. total_info->pages_read_rate += pool_info->pages_read_rate;
  4066. total_info->pages_created_rate += pool_info->pages_created_rate;
  4067. total_info->pages_written_rate += pool_info->pages_written_rate;
  4068. total_info->n_page_get_delta += pool_info->n_page_get_delta;
  4069. total_info->page_read_delta += pool_info->page_read_delta;
  4070. total_info->young_making_delta += pool_info->young_making_delta;
  4071. total_info->not_young_making_delta += pool_info->not_young_making_delta;
  4072. total_info->pages_readahead_rnd_rate += pool_info->pages_readahead_rnd_rate;
  4073. total_info->pages_readahead_rate += pool_info->pages_readahead_rate;
  4074. total_info->pages_evicted_rate += pool_info->pages_evicted_rate;
  4075. total_info->unzip_lru_len += pool_info->unzip_lru_len;
  4076. total_info->io_sum += pool_info->io_sum;
  4077. total_info->io_cur += pool_info->io_cur;
  4078. total_info->unzip_sum += pool_info->unzip_sum;
  4079. total_info->unzip_cur += pool_info->unzip_cur;
  4080. }
  4081. /*******************************************************************//**
  4082. Collect buffer pool stats information for a buffer pool. Also
  4083. record aggregated stats if there are more than one buffer pool
  4084. in the server */
  4085. UNIV_INTERN
  4086. void
  4087. buf_stats_get_pool_info(
  4088. /*====================*/
  4089. buf_pool_t* buf_pool, /*!< in: buffer pool */
  4090. ulint pool_id, /*!< in: buffer pool ID */
  4091. buf_pool_info_t* all_pool_info) /*!< in/out: buffer pool info
  4092. to fill */
  4093. {
  4094. buf_pool_info_t* pool_info;
  4095. time_t current_time;
  4096. double time_elapsed;
  4097. /* Find appropriate pool_info to store stats for this buffer pool */
  4098. pool_info = &all_pool_info[pool_id];
  4099. mutex_enter(&buf_pool->LRU_list_mutex);
  4100. mutex_enter(&buf_pool->free_list_mutex);
  4101. buf_pool_mutex_enter(buf_pool);
  4102. buf_flush_list_mutex_enter(buf_pool);
  4103. pool_info->pool_unique_id = pool_id;
  4104. pool_info->pool_size = buf_pool->curr_size;
  4105. pool_info->pool_size_bytes = buf_pool->curr_pool_size;
  4106. pool_info->lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
  4107. pool_info->old_lru_len = buf_pool->LRU_old_len;
  4108. pool_info->free_list_len = UT_LIST_GET_LEN(buf_pool->free);
  4109. pool_info->flush_list_len = UT_LIST_GET_LEN(buf_pool->flush_list);
  4110. pool_info->n_pend_unzip = UT_LIST_GET_LEN(buf_pool->unzip_LRU);
  4111. pool_info->n_pend_reads = buf_pool->n_pend_reads;
  4112. pool_info->n_pending_flush_lru =
  4113. (buf_pool->n_flush[BUF_FLUSH_LRU]
  4114. + buf_pool->init_flush[BUF_FLUSH_LRU]);
  4115. pool_info->n_pending_flush_list =
  4116. (buf_pool->n_flush[BUF_FLUSH_LIST]
  4117. + buf_pool->init_flush[BUF_FLUSH_LIST]);
  4118. pool_info->n_pending_flush_single_page =
  4119. buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
  4120. buf_flush_list_mutex_exit(buf_pool);
  4121. current_time = time(NULL);
  4122. time_elapsed = 0.001 + difftime(current_time,
  4123. buf_pool->last_printout_time);
  4124. pool_info->n_pages_made_young = buf_pool->stat.n_pages_made_young;
  4125. pool_info->n_pages_not_made_young =
  4126. buf_pool->stat.n_pages_not_made_young;
  4127. pool_info->n_pages_read = buf_pool->stat.n_pages_read;
  4128. pool_info->n_pages_created = buf_pool->stat.n_pages_created;
  4129. pool_info->n_pages_written = buf_pool->stat.n_pages_written;
  4130. pool_info->n_page_gets = buf_pool->stat.n_page_gets;
  4131. pool_info->n_ra_pages_read_rnd = buf_pool->stat.n_ra_pages_read_rnd;
  4132. pool_info->n_ra_pages_read = buf_pool->stat.n_ra_pages_read;
  4133. pool_info->n_ra_pages_evicted = buf_pool->stat.n_ra_pages_evicted;
  4134. pool_info->page_made_young_rate =
  4135. (buf_pool->stat.n_pages_made_young
  4136. - buf_pool->old_stat.n_pages_made_young) / time_elapsed;
  4137. pool_info->page_not_made_young_rate =
  4138. (buf_pool->stat.n_pages_not_made_young
  4139. - buf_pool->old_stat.n_pages_not_made_young) / time_elapsed;
  4140. pool_info->pages_read_rate =
  4141. (buf_pool->stat.n_pages_read
  4142. - buf_pool->old_stat.n_pages_read) / time_elapsed;
  4143. pool_info->pages_created_rate =
  4144. (buf_pool->stat.n_pages_created
  4145. - buf_pool->old_stat.n_pages_created) / time_elapsed;
  4146. pool_info->pages_written_rate =
  4147. (buf_pool->stat.n_pages_written
  4148. - buf_pool->old_stat.n_pages_written) / time_elapsed;
  4149. pool_info->n_page_get_delta = buf_pool->stat.n_page_gets
  4150. - buf_pool->old_stat.n_page_gets;
  4151. if (pool_info->n_page_get_delta) {
  4152. pool_info->page_read_delta = buf_pool->stat.n_pages_read
  4153. - buf_pool->old_stat.n_pages_read;
  4154. pool_info->young_making_delta =
  4155. buf_pool->stat.n_pages_made_young
  4156. - buf_pool->old_stat.n_pages_made_young;
  4157. pool_info->not_young_making_delta =
  4158. buf_pool->stat.n_pages_not_made_young
  4159. - buf_pool->old_stat.n_pages_not_made_young;
  4160. }
  4161. pool_info->pages_readahead_rnd_rate =
  4162. (buf_pool->stat.n_ra_pages_read_rnd
  4163. - buf_pool->old_stat.n_ra_pages_read_rnd) / time_elapsed;
  4164. pool_info->pages_readahead_rate =
  4165. (buf_pool->stat.n_ra_pages_read
  4166. - buf_pool->old_stat.n_ra_pages_read) / time_elapsed;
  4167. pool_info->pages_evicted_rate =
  4168. (buf_pool->stat.n_ra_pages_evicted
  4169. - buf_pool->old_stat.n_ra_pages_evicted) / time_elapsed;
  4170. pool_info->unzip_lru_len = UT_LIST_GET_LEN(buf_pool->unzip_LRU);
  4171. pool_info->io_sum = buf_LRU_stat_sum.io;
  4172. pool_info->io_cur = buf_LRU_stat_cur.io;
  4173. pool_info->unzip_sum = buf_LRU_stat_sum.unzip;
  4174. pool_info->unzip_cur = buf_LRU_stat_cur.unzip;
  4175. buf_refresh_io_stats(buf_pool);
  4176. mutex_exit(&buf_pool->LRU_list_mutex);
  4177. mutex_exit(&buf_pool->free_list_mutex);
  4178. buf_pool_mutex_exit(buf_pool);
  4179. }
  4180. /*********************************************************************//**
  4181. Prints info of the buffer i/o. */
  4182. UNIV_INTERN
  4183. void
  4184. buf_print_io_instance(
  4185. /*==================*/
  4186. buf_pool_info_t*pool_info, /*!< in: buffer pool info */
  4187. FILE* file) /*!< in/out: buffer where to print */
  4188. {
  4189. ut_ad(pool_info);
  4190. fprintf(file,
  4191. "Buffer pool size %lu\n"
  4192. "Buffer pool size, bytes %lu\n"
  4193. "Free buffers %lu\n"
  4194. "Database pages %lu\n"
  4195. "Old database pages %lu\n"
  4196. "Modified db pages %lu\n"
  4197. "Pending reads %lu\n"
  4198. "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
  4199. pool_info->pool_size,
  4200. pool_info->pool_size_bytes,
  4201. pool_info->free_list_len,
  4202. pool_info->lru_len,
  4203. pool_info->old_lru_len,
  4204. pool_info->flush_list_len,
  4205. pool_info->n_pend_reads,
  4206. pool_info->n_pending_flush_lru,
  4207. pool_info->n_pending_flush_list,
  4208. pool_info->n_pending_flush_single_page);
  4209. fprintf(file,
  4210. "Pages made young %lu, not young %lu\n"
  4211. "%.2f youngs/s, %.2f non-youngs/s\n"
  4212. "Pages read %lu, created %lu, written %lu\n"
  4213. "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
  4214. pool_info->n_pages_made_young,
  4215. pool_info->n_pages_not_made_young,
  4216. pool_info->page_made_young_rate,
  4217. pool_info->page_not_made_young_rate,
  4218. pool_info->n_pages_read,
  4219. pool_info->n_pages_created,
  4220. pool_info->n_pages_written,
  4221. pool_info->pages_read_rate,
  4222. pool_info->pages_created_rate,
  4223. pool_info->pages_written_rate);
  4224. if (pool_info->n_page_get_delta) {
  4225. fprintf(file,
  4226. "Buffer pool hit rate %lu / 1000,"
  4227. " young-making rate %lu / 1000 not %lu / 1000\n",
  4228. (ulong) (1000 - (1000 * pool_info->page_read_delta
  4229. / pool_info->n_page_get_delta)),
  4230. (ulong) (1000 * pool_info->young_making_delta
  4231. / pool_info->n_page_get_delta),
  4232. (ulong) (1000 * pool_info->not_young_making_delta
  4233. / pool_info->n_page_get_delta));
  4234. } else {
  4235. fputs("No buffer pool page gets since the last printout\n",
  4236. file);
  4237. }
  4238. /* Statistics about read ahead algorithm */
  4239. fprintf(file, "Pages read ahead %.2f/s,"
  4240. " evicted without access %.2f/s,"
  4241. " Random read ahead %.2f/s\n",
  4242. pool_info->pages_readahead_rate,
  4243. pool_info->pages_evicted_rate,
  4244. pool_info->pages_readahead_rnd_rate);
  4245. /* Print some values to help us with visualizing what is
  4246. happening with LRU eviction. */
  4247. fprintf(file,
  4248. "LRU len: %lu, unzip_LRU len: %lu\n"
  4249. "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
  4250. pool_info->lru_len, pool_info->unzip_lru_len,
  4251. pool_info->io_sum, pool_info->io_cur,
  4252. pool_info->unzip_sum, pool_info->unzip_cur);
  4253. }
  4254. /*********************************************************************//**
  4255. Prints info of the buffer i/o. */
  4256. UNIV_INTERN
  4257. void
  4258. buf_print_io(
  4259. /*=========*/
  4260. FILE* file) /*!< in/out: buffer where to print */
  4261. {
  4262. ulint i;
  4263. buf_pool_info_t* pool_info;
  4264. buf_pool_info_t* pool_info_total;
  4265. /* If srv_buf_pool_instances is greater than 1, allocate
  4266. one extra buf_pool_info_t, the last one stores
  4267. aggregated/total values from all pools */
  4268. if (srv_buf_pool_instances > 1) {
  4269. pool_info = (buf_pool_info_t*) mem_zalloc((
  4270. srv_buf_pool_instances + 1) * sizeof *pool_info);
  4271. pool_info_total = &pool_info[srv_buf_pool_instances];
  4272. } else {
  4273. ut_a(srv_buf_pool_instances == 1);
  4274. pool_info_total = pool_info = (buf_pool_info_t*) mem_zalloc(
  4275. sizeof *pool_info)
  4276. }
  4277. for (i = 0; i < srv_buf_pool_instances; i++) {
  4278. buf_pool_t* buf_pool;
  4279. buf_pool = buf_pool_from_array(i);
  4280. /* Fetch individual buffer pool info and calculate
  4281. aggregated stats along the way */
  4282. buf_stats_get_pool_info(buf_pool, i, pool_info);
  4283. /* If we have more than one buffer pool, store
  4284. the aggregated stats */
  4285. if (srv_buf_pool_instances > 1) {
  4286. buf_stats_aggregate_pool_info(pool_info_total,
  4287. &pool_info[i]);
  4288. }
  4289. }
  4290. /* Print the aggreate buffer pool info */
  4291. buf_print_io_instance(pool_info_total, file);
  4292. /* If there are more than one buffer pool, print each individual pool
  4293. info */
  4294. if (srv_buf_pool_instances > 1) {
  4295. fputs("----------------------\n"
  4296. "INDIVIDUAL BUFFER POOL INFO\n"
  4297. "----------------------\n", file);
  4298. for (i = 0; i < srv_buf_pool_instances; i++) {
  4299. fprintf(file, "---BUFFER POOL %lu\n", i);
  4300. buf_print_io_instance(&pool_info[i], file);
  4301. }
  4302. }
  4303. mem_free(pool_info);
  4304. }
  4305. /**********************************************************************//**
  4306. Refreshes the statistics used to print per-second averages. */
  4307. UNIV_INTERN
  4308. void
  4309. buf_refresh_io_stats(
  4310. /*=================*/
  4311. buf_pool_t* buf_pool) /*!< in: buffer pool instance */
  4312. {
  4313. buf_pool->last_printout_time = ut_time();
  4314. buf_pool->old_stat = buf_pool->stat;
  4315. }
  4316. /**********************************************************************//**
  4317. Refreshes the statistics used to print per-second averages. */
  4318. UNIV_INTERN
  4319. void
  4320. buf_refresh_io_stats_all(void)
  4321. /*==========================*/
  4322. {
  4323. ulint i;
  4324. for (i = 0; i < srv_buf_pool_instances; i++) {
  4325. buf_pool_t* buf_pool;
  4326. buf_pool = buf_pool_from_array(i);
  4327. buf_refresh_io_stats(buf_pool);
  4328. }
  4329. }
  4330. /**********************************************************************//**
  4331. Check if all pages in all buffer pools are in a replacable state.
  4332. @return FALSE if not */
  4333. UNIV_INTERN
  4334. ibool
  4335. buf_all_freed(void)
  4336. /*===============*/
  4337. {
  4338. ulint i;
  4339. for (i = 0; i < srv_buf_pool_instances; i++) {
  4340. buf_pool_t* buf_pool;
  4341. buf_pool = buf_pool_from_array(i);
  4342. if (!buf_all_freed_instance(buf_pool)) {
  4343. return(FALSE);
  4344. }
  4345. }
  4346. return(TRUE);
  4347. }
  4348. /*********************************************************************//**
  4349. Checks that there currently are no pending i/o-operations for the buffer
  4350. pool.
  4351. @return number of pending i/o */
  4352. UNIV_INTERN
  4353. ulint
  4354. buf_pool_check_num_pending_io(void)
  4355. /*===============================*/
  4356. {
  4357. ulint i;
  4358. ulint pending_io = 0;
  4359. buf_pool_mutex_enter_all();
  4360. for (i = 0; i < srv_buf_pool_instances; i++) {
  4361. const buf_pool_t* buf_pool;
  4362. buf_pool = buf_pool_from_array(i);
  4363. pending_io += buf_pool->n_pend_reads
  4364. + buf_pool->n_flush[BUF_FLUSH_LRU]
  4365. + buf_pool->n_flush[BUF_FLUSH_LIST]
  4366. + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE];
  4367. }
  4368. buf_pool_mutex_exit_all();
  4369. return(pending_io);
  4370. }
  4371. #if 0
  4372. Code currently not used
  4373. /*********************************************************************//**
  4374. Gets the current length of the free list of buffer blocks.
  4375. @return length of the free list */
  4376. UNIV_INTERN
  4377. ulint
  4378. buf_get_free_list_len(void)
  4379. /*=======================*/
  4380. {
  4381. ulint len;
  4382. //buf_pool_mutex_enter(buf_pool);
  4383. mutex_enter(&buf_pool->free_list_mutex);
  4384. len = UT_LIST_GET_LEN(buf_pool->free);
  4385. //buf_pool_mutex_exit(buf_pool);
  4386. mutex_exit(&buf_pool->free_list_mutex);
  4387. return(len);
  4388. }
  4389. #endif
  4390. #else /* !UNIV_HOTBACKUP */
  4391. /********************************************************************//**
  4392. Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
  4393. UNIV_INTERN
  4394. void
  4395. buf_page_init_for_backup_restore(
  4396. /*=============================*/
  4397. ulint space, /*!< in: space id */
  4398. ulint offset, /*!< in: offset of the page within space
  4399. in units of a page */
  4400. ulint zip_size,/*!< in: compressed page size in bytes
  4401. or 0 for uncompressed pages */
  4402. buf_block_t* block) /*!< in: block to init */
  4403. {
  4404. block->page.state = BUF_BLOCK_FILE_PAGE;
  4405. block->page.space = space;
  4406. block->page.offset = offset;
  4407. page_zip_des_init(&block->page.zip);
  4408. /* We assume that block->page.data has been allocated
  4409. with zip_size == UNIV_PAGE_SIZE. */
  4410. ut_ad(zip_size <= UNIV_PAGE_SIZE);
  4411. ut_ad(ut_is_2pow(zip_size));
  4412. page_zip_set_size(&block->page.zip, zip_size);
  4413. if (zip_size) {
  4414. block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
  4415. }
  4416. }
  4417. #endif /* !UNIV_HOTBACKUP */