You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4933 lines
136 KiB

17 years ago
17 years ago
17 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
17 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
17 years ago
17 years ago
16 years ago
16 years ago
- maria/ma_test_all.sh can now be run with --tmpdir=/dev/shm for faster testing - Fixed mysql-test-run failures on window - Fixed compiler warnings from my last push (sorry about that) - Fixed that maria_chk --repair --extended works again - Fixed compiler warnings about using not unitialized data mysql-test/mysql-test-run.pl: Better output mysql-test/suite/parts/inc/partition_check_drop.inc: Use remove_files_wildcard instead of rm mysys/safemalloc.c: Fixed argument to printf storage/maria/ma_cache.c: Don't give errors when running maria_chk storage/maria/ma_dynrec.c: Don't give errors when running maria_chk storage/maria/ma_rt_test.c: Added option --datadir for where to put logs and test data storage/maria/ma_test1.c: Added option --datadir for where to put logs and test data storage/maria/ma_test2.c: Added option --datadir for where to put logs and test data storage/maria/maria_chk.c: If --datadir is used but --logdir is not, set --logdir from --datadir (this reflects how --help said how things should work) storage/maria/maria_read_log.c: Changed short option for 'maria-log-dir-path' from -l to -h to be same as mysqld, maria_chk, ma_test1 etc.. storage/maria/unittest/ma_test_all-t: Allow one to specify --tmpdir for where to store logs and data storage/xtradb/buf/buf0buf.c: Fixed compiler warnings about using not unitialized data storage/xtradb/row/row0upd.c: Fixed compiler warnings about using not unitialized data storage/xtradb/srv/srv0srv.c: Fixed compiler warnings about using not unitialized data
15 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
17 years ago
17 years ago
16 years ago
16 years ago
16 years ago
17 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
17 years ago
16 years ago
17 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
17 years ago
16 years ago
17 years ago
16 years ago
17 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
17 years ago
17 years ago
17 years ago
17 years ago
17 years ago
16 years ago
16 years ago
16 years ago
16 years ago
17 years ago
17 years ago
16 years ago
16 years ago
16 years ago
17 years ago
16 years ago
16 years ago
16 years ago
17 years ago
17 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
17 years ago
17 years ago
17 years ago
17 years ago
17 years ago
17 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
17 years ago
17 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
  1. /*****************************************************************************
  2. Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
  3. Copyright (c) 2008, Google Inc.
  4. Portions of this file contain modifications contributed and copyrighted by
  5. Google, Inc. Those modifications are gratefully acknowledged and are described
  6. briefly in the InnoDB documentation. The contributions by Google are
  7. incorporated with their permission, and subject to the conditions contained in
  8. the file COPYING.Google.
  9. This program is free software; you can redistribute it and/or modify it under
  10. the terms of the GNU General Public License as published by the Free Software
  11. Foundation; version 2 of the License.
  12. This program is distributed in the hope that it will be useful, but WITHOUT
  13. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  14. FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  15. You should have received a copy of the GNU General Public License along with
  16. this program; if not, write to the Free Software Foundation, Inc., 59 Temple
  17. Place, Suite 330, Boston, MA 02111-1307 USA
  18. *****************************************************************************/
  19. /**************************************************//**
  20. @file buf/buf0buf.c
  21. The database buffer buf_pool
  22. Created 11/5/1995 Heikki Tuuri
  23. *******************************************************/
  24. #include "buf0buf.h"
  25. #ifdef UNIV_NONINL
  26. #include "buf0buf.ic"
  27. #endif
  28. #include "mem0mem.h"
  29. #include "btr0btr.h"
  30. #include "fil0fil.h"
  31. #ifndef UNIV_HOTBACKUP
  32. #include "buf0buddy.h"
  33. #include "lock0lock.h"
  34. #include "btr0sea.h"
  35. #include "ibuf0ibuf.h"
  36. #include "trx0undo.h"
  37. #include "log0log.h"
  38. #endif /* !UNIV_HOTBACKUP */
  39. #include "srv0srv.h"
  40. #include "dict0dict.h"
  41. #include "log0recv.h"
  42. #include "page0zip.h"
  43. #include "trx0trx.h"
  44. #include "srv0start.h"
  45. #include "que0que.h"
  46. #include "read0read.h"
  47. #include "row0row.h"
  48. #include "ha_prototypes.h"
  49. /* prototypes for new functions added to ha_innodb.cc */
  50. trx_t* innobase_get_trx();
  51. inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx)
  52. {
  53. ulint block_hash;
  54. ulint block_hash_byte;
  55. byte block_hash_offset;
  56. ut_ad(block);
  57. if (!innobase_get_slow_log() || !trx || !trx->take_stats)
  58. return;
  59. if (!trx->distinct_page_access_hash) {
  60. trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE);
  61. memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
  62. }
  63. block_hash = ut_hash_ulint((block->page.space << 20) + block->page.space +
  64. block->page.offset, DPAH_SIZE << 3);
  65. block_hash_byte = block_hash >> 3;
  66. block_hash_offset = (byte) block_hash & 0x07;
  67. if (block_hash_byte >= DPAH_SIZE)
  68. fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset);
  69. if (block_hash_offset > 7)
  70. fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %d !!!\n", block_hash_byte, block_hash_offset);
  71. if ((trx->distinct_page_access_hash[block_hash_byte] & ((byte) 0x01 << block_hash_offset)) == 0)
  72. trx->distinct_page_access++;
  73. trx->distinct_page_access_hash[block_hash_byte] |= (byte) 0x01 << block_hash_offset;
  74. return;
  75. }
  76. /*
  77. IMPLEMENTATION OF THE BUFFER POOL
  78. =================================
  79. Performance improvement:
  80. ------------------------
  81. Thread scheduling in NT may be so slow that the OS wait mechanism should
  82. not be used even in waiting for disk reads to complete.
  83. Rather, we should put waiting query threads to the queue of
  84. waiting jobs, and let the OS thread do something useful while the i/o
  85. is processed. In this way we could remove most OS thread switches in
  86. an i/o-intensive benchmark like TPC-C.
  87. A possibility is to put a user space thread library between the database
  88. and NT. User space thread libraries might be very fast.
  89. SQL Server 7.0 can be configured to use 'fibers' which are lightweight
  90. threads in NT. These should be studied.
  91. Buffer frames and blocks
  92. ------------------------
  93. Following the terminology of Gray and Reuter, we call the memory
  94. blocks where file pages are loaded buffer frames. For each buffer
  95. frame there is a control block, or shortly, a block, in the buffer
  96. control array. The control info which does not need to be stored
  97. in the file along with the file page, resides in the control block.
  98. Buffer pool struct
  99. ------------------
  100. The buffer buf_pool contains a single mutex which protects all the
  101. control data structures of the buf_pool. The content of a buffer frame is
  102. protected by a separate read-write lock in its control block, though.
  103. These locks can be locked and unlocked without owning the buf_pool mutex.
  104. The OS events in the buf_pool struct can be waited for without owning the
  105. buf_pool mutex.
  106. The buf_pool mutex is a hot-spot in main memory, causing a lot of
  107. memory bus traffic on multiprocessor systems when processors
  108. alternately access the mutex. On our Pentium, the mutex is accessed
  109. maybe every 10 microseconds. We gave up the solution to have mutexes
  110. for each control block, for instance, because it seemed to be
  111. complicated.
  112. A solution to reduce mutex contention of the buf_pool mutex is to
  113. create a separate mutex for the page hash table. On Pentium,
  114. accessing the hash table takes 2 microseconds, about half
  115. of the total buf_pool mutex hold time.
  116. Control blocks
  117. --------------
  118. The control block contains, for instance, the bufferfix count
  119. which is incremented when a thread wants a file page to be fixed
  120. in a buffer frame. The bufferfix operation does not lock the
  121. contents of the frame, however. For this purpose, the control
  122. block contains a read-write lock.
  123. The buffer frames have to be aligned so that the start memory
  124. address of a frame is divisible by the universal page size, which
  125. is a power of two.
  126. We intend to make the buffer buf_pool size on-line reconfigurable,
  127. that is, the buf_pool size can be changed without closing the database.
  128. Then the database administarator may adjust it to be bigger
  129. at night, for example. The control block array must
  130. contain enough control blocks for the maximum buffer buf_pool size
  131. which is used in the particular database.
  132. If the buf_pool size is cut, we exploit the virtual memory mechanism of
  133. the OS, and just refrain from using frames at high addresses. Then the OS
  134. can swap them to disk.
  135. The control blocks containing file pages are put to a hash table
  136. according to the file address of the page.
  137. We could speed up the access to an individual page by using
  138. "pointer swizzling": we could replace the page references on
  139. non-leaf index pages by direct pointers to the page, if it exists
  140. in the buf_pool. We could make a separate hash table where we could
  141. chain all the page references in non-leaf pages residing in the buf_pool,
  142. using the page reference as the hash key,
  143. and at the time of reading of a page update the pointers accordingly.
  144. Drawbacks of this solution are added complexity and,
  145. possibly, extra space required on non-leaf pages for memory pointers.
  146. A simpler solution is just to speed up the hash table mechanism
  147. in the database, using tables whose size is a power of 2.
  148. Lists of blocks
  149. ---------------
  150. There are several lists of control blocks.
  151. The free list (buf_pool->free) contains blocks which are currently not
  152. used.
  153. The common LRU list contains all the blocks holding a file page
  154. except those for which the bufferfix count is non-zero.
  155. The pages are in the LRU list roughly in the order of the last
  156. access to the page, so that the oldest pages are at the end of the
  157. list. We also keep a pointer to near the end of the LRU list,
  158. which we can use when we want to artificially age a page in the
  159. buf_pool. This is used if we know that some page is not needed
  160. again for some time: we insert the block right after the pointer,
  161. causing it to be replaced sooner than would noramlly be the case.
  162. Currently this aging mechanism is used for read-ahead mechanism
  163. of pages, and it can also be used when there is a scan of a full
  164. table which cannot fit in the memory. Putting the pages near the
  165. of the LRU list, we make sure that most of the buf_pool stays in the
  166. main memory, undisturbed.
  167. The unzip_LRU list contains a subset of the common LRU list. The
  168. blocks on the unzip_LRU list hold a compressed file page and the
  169. corresponding uncompressed page frame. A block is in unzip_LRU if and
  170. only if the predicate buf_page_belongs_to_unzip_LRU(&block->page)
  171. holds. The blocks in unzip_LRU will be in same order as they are in
  172. the common LRU list. That is, each manipulation of the common LRU
  173. list will result in the same manipulation of the unzip_LRU list.
  174. The chain of modified blocks (buf_pool->flush_list) contains the blocks
  175. holding file pages that have been modified in the memory
  176. but not written to disk yet. The block with the oldest modification
  177. which has not yet been written to disk is at the end of the chain.
  178. The chain of unmodified compressed blocks (buf_pool->zip_clean)
  179. contains the control blocks (buf_page_t) of those compressed pages
  180. that are not in buf_pool->flush_list and for which no uncompressed
  181. page has been allocated in the buffer pool. The control blocks for
  182. uncompressed pages are accessible via buf_block_t objects that are
  183. reachable via buf_pool->chunks[].
  184. The chains of free memory blocks (buf_pool->zip_free[]) are used by
  185. the buddy allocator (buf0buddy.c) to keep track of currently unused
  186. memory blocks of size sizeof(buf_page_t)..UNIV_PAGE_SIZE / 2. These
  187. blocks are inside the UNIV_PAGE_SIZE-sized memory blocks of type
  188. BUF_BLOCK_MEMORY that the buddy allocator requests from the buffer
  189. pool. The buddy allocator is solely used for allocating control
  190. blocks for compressed pages (buf_page_t) and compressed page frames.
  191. Loading a file page
  192. -------------------
  193. First, a victim block for replacement has to be found in the
  194. buf_pool. It is taken from the free list or searched for from the
  195. end of the LRU-list. An exclusive lock is reserved for the frame,
  196. the io_fix field is set in the block fixing the block in buf_pool,
  197. and the io-operation for loading the page is queued. The io-handler thread
  198. releases the X-lock on the frame and resets the io_fix field
  199. when the io operation completes.
  200. A thread may request the above operation using the function
  201. buf_page_get(). It may then continue to request a lock on the frame.
  202. The lock is granted when the io-handler releases the x-lock.
  203. Read-ahead
  204. ----------
  205. The read-ahead mechanism is intended to be intelligent and
  206. isolated from the semantically higher levels of the database
  207. index management. From the higher level we only need the
  208. information if a file page has a natural successor or
  209. predecessor page. On the leaf level of a B-tree index,
  210. these are the next and previous pages in the natural
  211. order of the pages.
  212. Let us first explain the read-ahead mechanism when the leafs
  213. of a B-tree are scanned in an ascending or descending order.
  214. When a read page is the first time referenced in the buf_pool,
  215. the buffer manager checks if it is at the border of a so-called
  216. linear read-ahead area. The tablespace is divided into these
  217. areas of size 64 blocks, for example. So if the page is at the
  218. border of such an area, the read-ahead mechanism checks if
  219. all the other blocks in the area have been accessed in an
  220. ascending or descending order. If this is the case, the system
  221. looks at the natural successor or predecessor of the page,
  222. checks if that is at the border of another area, and in this case
  223. issues read-requests for all the pages in that area. Maybe
  224. we could relax the condition that all the pages in the area
  225. have to be accessed: if data is deleted from a table, there may
  226. appear holes of unused pages in the area.
  227. A different read-ahead mechanism is used when there appears
  228. to be a random access pattern to a file.
  229. If a new page is referenced in the buf_pool, and several pages
  230. of its random access area (for instance, 32 consecutive pages
  231. in a tablespace) have recently been referenced, we may predict
  232. that the whole area may be needed in the near future, and issue
  233. the read requests for the whole area.
  234. */
  235. #ifndef UNIV_HOTBACKUP
  236. /** Value in microseconds */
  237. static const int WAIT_FOR_READ = 5000;
  238. /** Number of attemtps made to read in a page in the buffer pool */
  239. static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
  240. /** The buffer buf_pool of the database */
  241. UNIV_INTERN buf_pool_t* buf_pool = NULL;
  242. /** mutex protecting the buffer pool struct and control blocks, except the
  243. read-write lock in them */
  244. UNIV_INTERN mutex_t buf_pool_mutex;
  245. UNIV_INTERN mutex_t LRU_list_mutex;
  246. UNIV_INTERN mutex_t flush_list_mutex;
  247. UNIV_INTERN rw_lock_t page_hash_latch;
  248. UNIV_INTERN mutex_t free_list_mutex;
  249. UNIV_INTERN mutex_t zip_free_mutex;
  250. UNIV_INTERN mutex_t zip_hash_mutex;
  251. /** mutex protecting the control blocks of compressed-only pages
  252. (of type buf_page_t, not buf_block_t) */
  253. UNIV_INTERN mutex_t buf_pool_zip_mutex;
  254. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  255. static ulint buf_dbg_counter = 0; /*!< This is used to insert validation
  256. operations in excution in the
  257. debug version */
  258. /** Flag to forbid the release of the buffer pool mutex.
  259. Protected by buf_pool_mutex. */
  260. UNIV_INTERN ulint buf_pool_mutex_exit_forbidden = 0;
  261. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  262. #ifdef UNIV_DEBUG
  263. /** If this is set TRUE, the program prints info whenever
  264. read-ahead or flush occurs */
  265. UNIV_INTERN ibool buf_debug_prints = FALSE;
  266. #endif /* UNIV_DEBUG */
  267. /* Buffer pool shared memory segment information */
  268. typedef struct buf_shm_info_struct buf_shm_info_t;
  269. struct buf_shm_info_struct {
  270. char head_str[8];
  271. ulint binary_id;
  272. ibool is_new; /* during initializing */
  273. ibool clean; /* clean shutdowned and free */
  274. ibool reusable; /* reusable */
  275. ulint buf_pool_size; /* backup value */
  276. ulint page_size; /* backup value */
  277. ulint frame_offset; /* offset of the first frame based on chunk->mem */
  278. ulint zip_hash_offset;
  279. ulint zip_hash_n;
  280. ulint checksum;
  281. buf_pool_t buf_pool_backup;
  282. buf_chunk_t chunk_backup;
  283. ib_uint64_t dummy;
  284. };
  285. #define BUF_SHM_INFO_HEAD "XTRA_SHM"
  286. #endif /* !UNIV_HOTBACKUP */
  287. /********************************************************************//**
  288. Calculates a page checksum which is stored to the page when it is written
  289. to a file. Note that we must be careful to calculate the same value on
  290. 32-bit and 64-bit architectures.
  291. @return checksum */
  292. UNIV_INTERN
  293. ulint
  294. buf_calc_page_new_checksum(
  295. /*=======================*/
  296. const byte* page) /*!< in: buffer page */
  297. {
  298. ulint checksum;
  299. /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x
  300. ..._ARCH_LOG_NO, are written outside the buffer pool to the first
  301. pages of data files, we have to skip them in the page checksum
  302. calculation.
  303. We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
  304. checksum is stored, and also the last 8 bytes of page because
  305. there we store the old formula checksum. */
  306. checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
  307. FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
  308. + ut_fold_binary(page + FIL_PAGE_DATA,
  309. UNIV_PAGE_SIZE - FIL_PAGE_DATA
  310. - FIL_PAGE_END_LSN_OLD_CHKSUM);
  311. checksum = checksum & 0xFFFFFFFFUL;
  312. return(checksum);
  313. }
  314. UNIV_INTERN
  315. ulint
  316. buf_calc_page_new_checksum_32(
  317. /*==========================*/
  318. const byte* page) /*!< in: buffer page */
  319. {
  320. ulint checksum;
  321. checksum = ut_fold_binary(page + FIL_PAGE_OFFSET,
  322. FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
  323. + ut_fold_binary(page + FIL_PAGE_DATA,
  324. FIL_PAGE_DATA_ALIGN_32 - FIL_PAGE_DATA)
  325. + ut_fold_binary_32(page + FIL_PAGE_DATA_ALIGN_32,
  326. UNIV_PAGE_SIZE - FIL_PAGE_DATA_ALIGN_32
  327. - FIL_PAGE_END_LSN_OLD_CHKSUM);
  328. checksum = checksum & 0xFFFFFFFFUL;
  329. return(checksum);
  330. }
  331. /********************************************************************//**
  332. In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only
  333. looked at the first few bytes of the page. This calculates that old
  334. checksum.
  335. NOTE: we must first store the new formula checksum to
  336. FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum
  337. because this takes that field as an input!
  338. @return checksum */
  339. UNIV_INTERN
  340. ulint
  341. buf_calc_page_old_checksum(
  342. /*=======================*/
  343. const byte* page) /*!< in: buffer page */
  344. {
  345. ulint checksum;
  346. checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
  347. checksum = checksum & 0xFFFFFFFFUL;
  348. return(checksum);
  349. }
  350. /********************************************************************//**
  351. Checks if a page is corrupt.
  352. @return TRUE if corrupted */
  353. UNIV_INTERN
  354. ibool
  355. buf_page_is_corrupted(
  356. /*==================*/
  357. const byte* read_buf, /*!< in: a database page */
  358. ulint zip_size) /*!< in: size of compressed page;
  359. 0 for uncompressed pages */
  360. {
  361. ulint checksum_field;
  362. ulint old_checksum_field;
  363. if (UNIV_LIKELY(!zip_size)
  364. && memcmp(read_buf + FIL_PAGE_LSN + 4,
  365. read_buf + UNIV_PAGE_SIZE
  366. - FIL_PAGE_END_LSN_OLD_CHKSUM + 4, 4)) {
  367. /* Stored log sequence numbers at the start and the end
  368. of page do not match */
  369. return(TRUE);
  370. }
  371. #ifndef UNIV_HOTBACKUP
  372. if (recv_lsn_checks_on) {
  373. ib_uint64_t current_lsn;
  374. if (log_peek_lsn(&current_lsn)
  375. && current_lsn < mach_read_ull(read_buf + FIL_PAGE_LSN)) {
  376. ut_print_timestamp(stderr);
  377. fprintf(stderr,
  378. " InnoDB: Error: page %lu log sequence number"
  379. " %llu\n"
  380. "InnoDB: is in the future! Current system "
  381. "log sequence number %llu.\n"
  382. "InnoDB: Your database may be corrupt or "
  383. "you may have copied the InnoDB\n"
  384. "InnoDB: tablespace but not the InnoDB "
  385. "log files. See\n"
  386. "InnoDB: " REFMAN "forcing-innodb-recovery.html\n"
  387. "InnoDB: for more information.\n",
  388. (ulong) mach_read_from_4(read_buf
  389. + FIL_PAGE_OFFSET),
  390. mach_read_ull(read_buf + FIL_PAGE_LSN),
  391. current_lsn);
  392. }
  393. }
  394. #endif
  395. /* If we use checksums validation, make additional check before
  396. returning TRUE to ensure that the checksum is not equal to
  397. BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums
  398. disabled. Otherwise, skip checksum calculation and return FALSE */
  399. if (UNIV_LIKELY(srv_use_checksums)) {
  400. checksum_field = mach_read_from_4(read_buf
  401. + FIL_PAGE_SPACE_OR_CHKSUM);
  402. if (UNIV_UNLIKELY(zip_size)) {
  403. return(checksum_field != BUF_NO_CHECKSUM_MAGIC
  404. && checksum_field
  405. != page_zip_calc_checksum(read_buf, zip_size));
  406. }
  407. old_checksum_field = mach_read_from_4(
  408. read_buf + UNIV_PAGE_SIZE
  409. - FIL_PAGE_END_LSN_OLD_CHKSUM);
  410. /* There are 2 valid formulas for old_checksum_field:
  411. 1. Very old versions of InnoDB only stored 8 byte lsn to the
  412. start and the end of the page.
  413. 2. Newer InnoDB versions store the old formula checksum
  414. there. */
  415. if (old_checksum_field != mach_read_from_4(read_buf
  416. + FIL_PAGE_LSN)
  417. && old_checksum_field != BUF_NO_CHECKSUM_MAGIC
  418. && old_checksum_field
  419. != buf_calc_page_old_checksum(read_buf)) {
  420. return(TRUE);
  421. }
  422. /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
  423. (always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
  424. if (!srv_fast_checksum
  425. && checksum_field != 0
  426. && checksum_field != BUF_NO_CHECKSUM_MAGIC
  427. && checksum_field
  428. != buf_calc_page_new_checksum(read_buf)) {
  429. return(TRUE);
  430. }
  431. if (srv_fast_checksum
  432. && checksum_field != 0
  433. && checksum_field != BUF_NO_CHECKSUM_MAGIC
  434. && checksum_field
  435. != buf_calc_page_new_checksum_32(read_buf)
  436. && checksum_field
  437. != buf_calc_page_new_checksum(read_buf)) {
  438. return(TRUE);
  439. }
  440. }
  441. return(FALSE);
  442. }
  443. /********************************************************************//**
  444. Prints a page to stderr. */
  445. UNIV_INTERN
  446. void
  447. buf_page_print(
  448. /*===========*/
  449. const byte* read_buf, /*!< in: a database page */
  450. ulint zip_size) /*!< in: compressed page size, or
  451. 0 for uncompressed pages */
  452. {
  453. #ifndef UNIV_HOTBACKUP
  454. dict_index_t* index;
  455. #endif /* !UNIV_HOTBACKUP */
  456. ulint checksum;
  457. ulint checksum_32;
  458. ulint old_checksum;
  459. ulint size = zip_size;
  460. if (!size) {
  461. size = UNIV_PAGE_SIZE;
  462. }
  463. ut_print_timestamp(stderr);
  464. fprintf(stderr, " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
  465. (ulong) size);
  466. ut_print_buf(stderr, read_buf, size);
  467. fputs("\nInnoDB: End of page dump\n", stderr);
  468. if (zip_size) {
  469. /* Print compressed page. */
  470. switch (fil_page_get_type(read_buf)) {
  471. case FIL_PAGE_TYPE_ZBLOB:
  472. case FIL_PAGE_TYPE_ZBLOB2:
  473. checksum = srv_use_checksums
  474. ? page_zip_calc_checksum(read_buf, zip_size)
  475. : BUF_NO_CHECKSUM_MAGIC;
  476. ut_print_timestamp(stderr);
  477. fprintf(stderr,
  478. " InnoDB: Compressed BLOB page"
  479. " checksum %lu, stored %lu\n"
  480. "InnoDB: Page lsn %lu %lu\n"
  481. "InnoDB: Page number (if stored"
  482. " to page already) %lu,\n"
  483. "InnoDB: space id (if stored"
  484. " to page already) %lu\n",
  485. (ulong) checksum,
  486. (ulong) mach_read_from_4(
  487. read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
  488. (ulong) mach_read_from_4(
  489. read_buf + FIL_PAGE_LSN),
  490. (ulong) mach_read_from_4(
  491. read_buf + (FIL_PAGE_LSN + 4)),
  492. (ulong) mach_read_from_4(
  493. read_buf + FIL_PAGE_OFFSET),
  494. (ulong) mach_read_from_4(
  495. read_buf
  496. + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
  497. return;
  498. default:
  499. ut_print_timestamp(stderr);
  500. fprintf(stderr,
  501. " InnoDB: unknown page type %lu,"
  502. " assuming FIL_PAGE_INDEX\n",
  503. fil_page_get_type(read_buf));
  504. /* fall through */
  505. case FIL_PAGE_INDEX:
  506. checksum = srv_use_checksums
  507. ? page_zip_calc_checksum(read_buf, zip_size)
  508. : BUF_NO_CHECKSUM_MAGIC;
  509. ut_print_timestamp(stderr);
  510. fprintf(stderr,
  511. " InnoDB: Compressed page checksum %lu,"
  512. " stored %lu\n"
  513. "InnoDB: Page lsn %lu %lu\n"
  514. "InnoDB: Page number (if stored"
  515. " to page already) %lu,\n"
  516. "InnoDB: space id (if stored"
  517. " to page already) %lu\n",
  518. (ulong) checksum,
  519. (ulong) mach_read_from_4(
  520. read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
  521. (ulong) mach_read_from_4(
  522. read_buf + FIL_PAGE_LSN),
  523. (ulong) mach_read_from_4(
  524. read_buf + (FIL_PAGE_LSN + 4)),
  525. (ulong) mach_read_from_4(
  526. read_buf + FIL_PAGE_OFFSET),
  527. (ulong) mach_read_from_4(
  528. read_buf
  529. + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
  530. return;
  531. case FIL_PAGE_TYPE_XDES:
  532. /* This is an uncompressed page. */
  533. break;
  534. }
  535. }
  536. checksum = srv_use_checksums
  537. ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
  538. checksum_32 = srv_use_checksums
  539. ? buf_calc_page_new_checksum_32(read_buf) : BUF_NO_CHECKSUM_MAGIC;
  540. old_checksum = srv_use_checksums
  541. ? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC;
  542. ut_print_timestamp(stderr);
  543. fprintf(stderr,
  544. " InnoDB: Page checksum %lu (32bit_calc: %lu), prior-to-4.0.14-form"
  545. " checksum %lu\n"
  546. "InnoDB: stored checksum %lu, prior-to-4.0.14-form"
  547. " stored checksum %lu\n"
  548. "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn"
  549. " at page end %lu\n"
  550. "InnoDB: Page number (if stored to page already) %lu,\n"
  551. "InnoDB: space id (if created with >= MySQL-4.1.1"
  552. " and stored already) %lu\n",
  553. (ulong) checksum, (ulong) checksum_32, (ulong) old_checksum,
  554. (ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM),
  555. (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
  556. - FIL_PAGE_END_LSN_OLD_CHKSUM),
  557. (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN),
  558. (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4),
  559. (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE
  560. - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
  561. (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET),
  562. (ulong) mach_read_from_4(read_buf
  563. + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID));
  564. #ifndef UNIV_HOTBACKUP
  565. if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE)
  566. == TRX_UNDO_INSERT) {
  567. fprintf(stderr,
  568. "InnoDB: Page may be an insert undo log page\n");
  569. } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR
  570. + TRX_UNDO_PAGE_TYPE)
  571. == TRX_UNDO_UPDATE) {
  572. fprintf(stderr,
  573. "InnoDB: Page may be an update undo log page\n");
  574. }
  575. #endif /* !UNIV_HOTBACKUP */
  576. switch (fil_page_get_type(read_buf)) {
  577. case FIL_PAGE_INDEX:
  578. fprintf(stderr,
  579. "InnoDB: Page may be an index page where"
  580. " index id is %lu %lu\n",
  581. (ulong) ut_dulint_get_high(
  582. btr_page_get_index_id(read_buf)),
  583. (ulong) ut_dulint_get_low(
  584. btr_page_get_index_id(read_buf)));
  585. #ifndef UNIV_HOTBACKUP
  586. index = dict_index_find_on_id_low(
  587. btr_page_get_index_id(read_buf));
  588. if (index) {
  589. fputs("InnoDB: (", stderr);
  590. dict_index_name_print(stderr, NULL, index);
  591. fputs(")\n", stderr);
  592. }
  593. #endif /* !UNIV_HOTBACKUP */
  594. break;
  595. case FIL_PAGE_INODE:
  596. fputs("InnoDB: Page may be an 'inode' page\n", stderr);
  597. break;
  598. case FIL_PAGE_IBUF_FREE_LIST:
  599. fputs("InnoDB: Page may be an insert buffer free list page\n",
  600. stderr);
  601. break;
  602. case FIL_PAGE_TYPE_ALLOCATED:
  603. fputs("InnoDB: Page may be a freshly allocated page\n",
  604. stderr);
  605. break;
  606. case FIL_PAGE_IBUF_BITMAP:
  607. fputs("InnoDB: Page may be an insert buffer bitmap page\n",
  608. stderr);
  609. break;
  610. case FIL_PAGE_TYPE_SYS:
  611. fputs("InnoDB: Page may be a system page\n",
  612. stderr);
  613. break;
  614. case FIL_PAGE_TYPE_TRX_SYS:
  615. fputs("InnoDB: Page may be a transaction system page\n",
  616. stderr);
  617. break;
  618. case FIL_PAGE_TYPE_FSP_HDR:
  619. fputs("InnoDB: Page may be a file space header page\n",
  620. stderr);
  621. break;
  622. case FIL_PAGE_TYPE_XDES:
  623. fputs("InnoDB: Page may be an extent descriptor page\n",
  624. stderr);
  625. break;
  626. case FIL_PAGE_TYPE_BLOB:
  627. fputs("InnoDB: Page may be a BLOB page\n",
  628. stderr);
  629. break;
  630. case FIL_PAGE_TYPE_ZBLOB:
  631. case FIL_PAGE_TYPE_ZBLOB2:
  632. fputs("InnoDB: Page may be a compressed BLOB page\n",
  633. stderr);
  634. break;
  635. }
  636. }
  637. #ifndef UNIV_HOTBACKUP
  638. /********************************************************************//**
  639. Initializes a buffer control block when the buf_pool is created. */
  640. static
  641. void
  642. buf_block_init(
  643. /*===========*/
  644. buf_block_t* block, /*!< in: pointer to control block */
  645. byte* frame) /*!< in: pointer to buffer frame */
  646. {
  647. UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE, block);
  648. block->frame = frame;
  649. block->page.state = BUF_BLOCK_NOT_USED;
  650. block->page.buf_fix_count = 0;
  651. block->page.io_fix = BUF_IO_NONE;
  652. block->modify_clock = 0;
  653. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  654. block->page.file_page_was_freed = FALSE;
  655. #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
  656. block->check_index_page_at_flush = FALSE;
  657. block->index = NULL;
  658. #ifdef UNIV_DEBUG
  659. block->page.in_page_hash = FALSE;
  660. block->page.in_zip_hash = FALSE;
  661. block->page.in_flush_list = FALSE;
  662. block->page.in_free_list = FALSE;
  663. #endif /* UNIV_DEBUG */
  664. block->page.in_LRU_list = FALSE;
  665. block->in_unzip_LRU_list = FALSE;
  666. #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
  667. block->n_pointers = 0;
  668. #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
  669. page_zip_des_init(&block->page.zip);
  670. mutex_create(&block->mutex, SYNC_BUF_BLOCK);
  671. rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
  672. ut_ad(rw_lock_validate(&(block->lock)));
  673. #ifdef UNIV_SYNC_DEBUG
  674. rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
  675. #endif /* UNIV_SYNC_DEBUG */
  676. }
  677. static
  678. void
  679. buf_block_reuse(
  680. /*============*/
  681. buf_block_t* block,
  682. ptrdiff_t frame_offset)
  683. {
  684. /* block_init */
  685. block->frame += frame_offset;
  686. UNIV_MEM_DESC(block->frame, UNIV_PAGE_SIZE, block);
  687. block->index = NULL;
  688. #ifdef UNIV_DEBUG
  689. /* recreate later */
  690. block->page.in_page_hash = FALSE;
  691. block->page.in_zip_hash = FALSE;
  692. #endif /* UNIV_DEBUG */
  693. #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
  694. block->n_pointers = 0;
  695. #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
  696. if (block->page.zip.data)
  697. block->page.zip.data += frame_offset;
  698. block->is_hashed = FALSE;
  699. mutex_create(&block->mutex, SYNC_BUF_BLOCK);
  700. rw_lock_create(&block->lock, SYNC_LEVEL_VARYING);
  701. ut_ad(rw_lock_validate(&(block->lock)));
  702. #ifdef UNIV_SYNC_DEBUG
  703. rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK);
  704. #endif /* UNIV_SYNC_DEBUG */
  705. }
  706. /********************************************************************//**
  707. Allocates a chunk of buffer frames.
  708. @return chunk, or NULL on failure */
  709. static
  710. buf_chunk_t*
  711. buf_chunk_init(
  712. /*===========*/
  713. buf_chunk_t* chunk, /*!< out: chunk of buffers */
  714. ulint mem_size) /*!< in: requested size in bytes */
  715. {
  716. buf_block_t* block;
  717. byte* frame;
  718. ulint zip_hash_n = 0;
  719. ulint zip_hash_mem_size = 0;
  720. hash_table_t* zip_hash_tmp = NULL;
  721. ulint i;
  722. ulint size_target;
  723. buf_shm_info_t* shm_info = NULL;
  724. /* Round down to a multiple of page size,
  725. although it already should be. */
  726. mem_size = ut_2pow_round(mem_size, UNIV_PAGE_SIZE);
  727. size_target = (mem_size / UNIV_PAGE_SIZE) - 1;
  728. srv_buffer_pool_shm_is_reused = FALSE;
  729. if (srv_buffer_pool_shm_key) {
  730. /* zip_hash size */
  731. zip_hash_n = (mem_size / UNIV_PAGE_SIZE) * 2;
  732. zip_hash_mem_size = ut_2pow_round(hash_create_needed(zip_hash_n)
  733. + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
  734. }
  735. /* Reserve space for the block descriptors. */
  736. mem_size += ut_2pow_round((mem_size / UNIV_PAGE_SIZE) * (sizeof *block)
  737. + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
  738. if (srv_buffer_pool_shm_key) {
  739. mem_size += ut_2pow_round(sizeof(buf_shm_info_t)
  740. + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE);
  741. mem_size += zip_hash_mem_size;
  742. }
  743. chunk->mem_size = mem_size;
  744. if (srv_buffer_pool_shm_key) {
  745. ulint binary_id;
  746. ibool is_new;
  747. ut_a(buf_pool->n_chunks == 1);
  748. fprintf(stderr,
  749. "InnoDB: Warning: The innodb_buffer_pool_shm_key option has been specified.\n"
  750. "InnoDB: Do not change the following between restarts of the server while this option is being used:\n"
  751. "InnoDB: * the mysqld executable between restarts of the server.\n"
  752. "InnoDB: * the value of innodb_buffer_pool_size.\n"
  753. "InnoDB: * the value of innodb_page_size.\n"
  754. "InnoDB: * datafiles created by InnoDB during this session.\n"
  755. "InnoDB: Otherwise, data corruption in datafiles may result.\n");
  756. /* FIXME: This is vague id still */
  757. binary_id = (ulint) ((byte*)mtr_commit - (byte*)btr_root_get)
  758. + (ulint) ((byte*)os_get_os_version - (byte*)buf_calc_page_new_checksum)
  759. + (ulint) ((byte*)page_dir_find_owner_slot - (byte*)dfield_data_is_binary_equal)
  760. + (ulint) ((byte*)que_graph_publish - (byte*)dict_casedn_str)
  761. + (ulint) ((byte*)read_view_oldest_copy_or_open_new - (byte*)fil_space_get_version)
  762. + (ulint) ((byte*)rec_get_n_extern_new - (byte*)fsp_get_size_low)
  763. + (ulint) ((byte*)row_get_trx_id_offset - (byte*)ha_create_func)
  764. + (ulint) ((byte*)srv_set_io_thread_op_info - (byte*)thd_is_replication_slave_thread)
  765. + (ulint) ((byte*)mutex_create_func - (byte*)ibuf_inside)
  766. + (ulint) ((byte*)trx_set_detailed_error - (byte*)lock_check_trx_id_sanity)
  767. + (ulint) ((byte*)ut_time - (byte*)mem_heap_strdup);
  768. chunk->mem = os_shm_alloc(&chunk->mem_size, srv_buffer_pool_shm_key, &is_new);
  769. if (UNIV_UNLIKELY(chunk->mem == NULL)) {
  770. return(NULL);
  771. }
  772. init_again:
  773. #ifdef UNIV_SET_MEM_TO_ZERO
  774. if (is_new) {
  775. memset(chunk->mem, '\0', chunk->mem_size);
  776. }
  777. #endif
  778. /* for ut_fold_binary_32(), these values should be 32-bit aligned */
  779. ut_a(sizeof(buf_shm_info_t) % 4 == 0);
  780. ut_a((ulint)chunk->mem % 4 == 0);
  781. ut_a(chunk->mem_size % 4 == 0);
  782. shm_info = chunk->mem;
  783. zip_hash_tmp = (hash_table_t*)((byte*)chunk->mem + chunk->mem_size - zip_hash_mem_size);
  784. if (is_new) {
  785. strncpy(shm_info->head_str, BUF_SHM_INFO_HEAD, 8);
  786. shm_info->binary_id = binary_id;
  787. shm_info->is_new = TRUE; /* changed to FALSE when the initialization is finished */
  788. shm_info->clean = FALSE; /* changed to TRUE when free the segment. */
  789. shm_info->reusable = FALSE; /* changed to TRUE when validation is finished. */
  790. shm_info->buf_pool_size = srv_buf_pool_size;
  791. shm_info->page_size = srv_page_size;
  792. shm_info->zip_hash_offset = chunk->mem_size - zip_hash_mem_size;
  793. shm_info->zip_hash_n = zip_hash_n;
  794. } else {
  795. ulint checksum;
  796. if (strncmp(shm_info->head_str, BUF_SHM_INFO_HEAD, 8)) {
  797. fprintf(stderr,
  798. "InnoDB: Error: The shared memory segment seems not to be for buffer pool.\n");
  799. return(NULL);
  800. }
  801. if (shm_info->binary_id != binary_id) {
  802. fprintf(stderr,
  803. "InnoDB: Error: The shared memory segment seems not to be for this binary.\n");
  804. return(NULL);
  805. }
  806. if (shm_info->is_new) {
  807. fprintf(stderr,
  808. "InnoDB: Error: The shared memory was not initialized yet.\n");
  809. return(NULL);
  810. }
  811. if (shm_info->buf_pool_size != srv_buf_pool_size) {
  812. fprintf(stderr,
  813. "InnoDB: Error: srv_buf_pool_size is different (shm=%lu current=%lu).\n",
  814. shm_info->buf_pool_size, srv_buf_pool_size);
  815. return(NULL);
  816. }
  817. if (shm_info->page_size != srv_page_size) {
  818. fprintf(stderr,
  819. "InnoDB: Error: srv_page_size is different (shm=%lu current=%lu).\n",
  820. shm_info->page_size, srv_page_size);
  821. return(NULL);
  822. }
  823. if (!shm_info->reusable) {
  824. fprintf(stderr,
  825. "InnoDB: Warning: The shared memory has unrecoverable contents.\n"
  826. "InnoDB: The shared memory segment is initialized.\n");
  827. is_new = TRUE;
  828. goto init_again;
  829. }
  830. if (!shm_info->clean) {
  831. fprintf(stderr,
  832. "InnoDB: Warning: The shared memory was not shut down cleanly.\n"
  833. "InnoDB: The shared memory segment is initialized.\n");
  834. is_new = TRUE;
  835. goto init_again;
  836. }
  837. ut_a(shm_info->zip_hash_offset == chunk->mem_size - zip_hash_mem_size);
  838. ut_a(shm_info->zip_hash_n == zip_hash_n);
  839. /* check checksum */
  840. if (srv_buffer_pool_shm_checksum) {
  841. checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
  842. chunk->mem_size - sizeof(buf_shm_info_t));
  843. } else {
  844. checksum = BUF_NO_CHECKSUM_MAGIC;
  845. }
  846. if (shm_info->checksum != BUF_NO_CHECKSUM_MAGIC
  847. && shm_info->checksum != checksum) {
  848. fprintf(stderr,
  849. "InnoDB: Error: checksum of the shared memory is not match. "
  850. "(stored=%lu calculated=%lu)\n",
  851. shm_info->checksum, checksum);
  852. return(NULL);
  853. }
  854. /* flag to use the segment. */
  855. shm_info->clean = FALSE; /* changed to TRUE when free the segment. */
  856. }
  857. /* init zip_hash contents */
  858. if (is_new) {
  859. hash_create_init(zip_hash_tmp, zip_hash_n);
  860. } else {
  861. /* adjust offset is done later */
  862. hash_create_reuse(zip_hash_tmp);
  863. srv_buffer_pool_shm_is_reused = TRUE;
  864. }
  865. } else {
  866. chunk->mem = os_mem_alloc_large(&chunk->mem_size);
  867. if (UNIV_UNLIKELY(chunk->mem == NULL)) {
  868. return(NULL);
  869. }
  870. }
  871. /* Allocate the block descriptors from
  872. the start of the memory block. */
  873. if (srv_buffer_pool_shm_key) {
  874. chunk->blocks = (buf_block_t*)((byte*)chunk->mem + sizeof(buf_shm_info_t));
  875. } else {
  876. chunk->blocks = chunk->mem;
  877. }
  878. /* Align a pointer to the first frame. Note that when
  879. os_large_page_size is smaller than UNIV_PAGE_SIZE,
  880. we may allocate one fewer block than requested. When
  881. it is bigger, we may allocate more blocks than requested. */
  882. frame = ut_align(chunk->mem, UNIV_PAGE_SIZE);
  883. if (srv_buffer_pool_shm_key) {
  884. /* reserve zip_hash space and always -1 for reproductibity */
  885. chunk->size = (chunk->mem_size - zip_hash_mem_size) / UNIV_PAGE_SIZE - 1;
  886. } else {
  887. chunk->size = chunk->mem_size / UNIV_PAGE_SIZE
  888. - (frame != chunk->mem);
  889. }
  890. /* Subtract the space needed for block descriptors. */
  891. {
  892. ulint size = chunk->size;
  893. while (frame < (byte*) (chunk->blocks + size)) {
  894. frame += UNIV_PAGE_SIZE;
  895. size--;
  896. }
  897. chunk->size = size;
  898. }
  899. if (chunk->size > size_target) {
  900. chunk->size = size_target;
  901. }
  902. if (shm_info && !(shm_info->is_new)) {
  903. /* convert the shared memory segment for reuse */
  904. ptrdiff_t phys_offset;
  905. ptrdiff_t logi_offset;
  906. ptrdiff_t blocks_offset;
  907. void* previous_frame_address;
  908. if (chunk->size < shm_info->chunk_backup.size) {
  909. fprintf(stderr,
  910. "InnoDB: Error: The buffer pool became smaller because of allocated address.\n"
  911. "InnoDB: Retrying may avoid this situation.\n");
  912. shm_info->clean = TRUE; /* release the flag for retrying */
  913. return(NULL);
  914. }
  915. chunk->size = shm_info->chunk_backup.size;
  916. phys_offset = frame - ((byte*)chunk->mem + shm_info->frame_offset);
  917. logi_offset = frame - chunk->blocks[0].frame;
  918. previous_frame_address = chunk->blocks[0].frame;
  919. blocks_offset = (byte*)chunk->blocks - (byte*)shm_info->chunk_backup.blocks;
  920. if (phys_offset || logi_offset || blocks_offset) {
  921. fprintf(stderr,
  922. "InnoDB: Buffer pool in the shared memory segment should be converted.\n"
  923. "InnoDB: Previous frames in address : %p\n"
  924. "InnoDB: Previous frames were located : %p\n"
  925. "InnoDB: Current frames should be located: %p\n"
  926. "InnoDB: Pysical offset : %ld (%#lx)\n"
  927. "InnoDB: Logical offset (frames) : %ld (%#lx)\n"
  928. "InnoDB: Logical offset (blocks) : %ld (%#lx)\n",
  929. (byte*)chunk->mem + shm_info->frame_offset,
  930. chunk->blocks[0].frame, frame,
  931. (long) phys_offset, (long) phys_offset,
  932. (long) logi_offset, (long) logi_offset,
  933. (long) blocks_offset, (long) blocks_offset);
  934. } else {
  935. fprintf(stderr,
  936. "InnoDB: Buffer pool in the shared memory segment can be used as it is.\n");
  937. }
  938. if (phys_offset) {
  939. fprintf(stderr,
  940. "InnoDB: Aligning physical offset...");
  941. memmove(frame, (byte*)chunk->mem + shm_info->frame_offset,
  942. chunk->size * UNIV_PAGE_SIZE);
  943. fprintf(stderr,
  944. " Done.\n");
  945. }
  946. /* buf_block_t */
  947. block = chunk->blocks;
  948. for (i = chunk->size; i--; ) {
  949. buf_block_reuse(block, logi_offset);
  950. block++;
  951. }
  952. if (logi_offset || blocks_offset) {
  953. fprintf(stderr,
  954. "InnoDB: Aligning logical offset...");
  955. /* buf_pool_t buf_pool_backup */
  956. UT_LIST_OFFSET(flush_list, buf_page_t, shm_info->buf_pool_backup.flush_list,
  957. previous_frame_address, logi_offset, blocks_offset);
  958. UT_LIST_OFFSET(free, buf_page_t, shm_info->buf_pool_backup.free,
  959. previous_frame_address, logi_offset, blocks_offset);
  960. UT_LIST_OFFSET(LRU, buf_page_t, shm_info->buf_pool_backup.LRU,
  961. previous_frame_address, logi_offset, blocks_offset);
  962. if (shm_info->buf_pool_backup.LRU_old)
  963. shm_info->buf_pool_backup.LRU_old =
  964. (buf_page_t*)((byte*)(shm_info->buf_pool_backup.LRU_old)
  965. + (((void*)shm_info->buf_pool_backup.LRU_old > previous_frame_address)
  966. ? logi_offset : blocks_offset));
  967. UT_LIST_OFFSET(unzip_LRU, buf_block_t, shm_info->buf_pool_backup.unzip_LRU,
  968. previous_frame_address, logi_offset, blocks_offset);
  969. UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_clean,
  970. previous_frame_address, logi_offset, blocks_offset);
  971. for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
  972. UT_LIST_OFFSET(zip_list, buf_page_t, shm_info->buf_pool_backup.zip_free[i],
  973. previous_frame_address, logi_offset, blocks_offset);
  974. }
  975. HASH_OFFSET(zip_hash_tmp, buf_page_t, hash,
  976. previous_frame_address, logi_offset, blocks_offset);
  977. fprintf(stderr,
  978. " Done.\n");
  979. }
  980. } else {
  981. /* Init block structs and assign frames for them. Then we
  982. assign the frames to the first blocks (we already mapped the
  983. memory above). */
  984. block = chunk->blocks;
  985. for (i = chunk->size; i--; ) {
  986. buf_block_init(block, frame);
  987. #ifdef HAVE_valgrind
  988. /* Wipe contents of frame to eliminate a Purify warning */
  989. memset(block->frame, '\0', UNIV_PAGE_SIZE);
  990. #endif
  991. /* Add the block to the free list */
  992. mutex_enter(&free_list_mutex);
  993. UT_LIST_ADD_LAST(free, buf_pool->free, (&block->page));
  994. ut_d(block->page.in_free_list = TRUE);
  995. mutex_exit(&free_list_mutex);
  996. block++;
  997. frame += UNIV_PAGE_SIZE;
  998. }
  999. }
  1000. if (shm_info) {
  1001. shm_info->frame_offset = chunk->blocks[0].frame - (byte*)chunk->mem;
  1002. }
  1003. return(chunk);
  1004. }
  1005. #ifdef UNIV_DEBUG
  1006. /*********************************************************************//**
  1007. Finds a block in the given buffer chunk that points to a
  1008. given compressed page.
  1009. @return buffer block pointing to the compressed page, or NULL */
  1010. static
  1011. buf_block_t*
  1012. buf_chunk_contains_zip(
  1013. /*===================*/
  1014. buf_chunk_t* chunk, /*!< in: chunk being checked */
  1015. const void* data) /*!< in: pointer to compressed page */
  1016. {
  1017. buf_block_t* block;
  1018. ulint i;
  1019. ut_ad(buf_pool);
  1020. //ut_ad(buf_pool_mutex_own());
  1021. block = chunk->blocks;
  1022. for (i = chunk->size; i--; block++) {
  1023. if (block->page.zip.data == data) {
  1024. return(block);
  1025. }
  1026. }
  1027. return(NULL);
  1028. }
  1029. /*********************************************************************//**
  1030. Finds a block in the buffer pool that points to a
  1031. given compressed page.
  1032. @return buffer block pointing to the compressed page, or NULL */
  1033. UNIV_INTERN
  1034. buf_block_t*
  1035. buf_pool_contains_zip(
  1036. /*==================*/
  1037. const void* data) /*!< in: pointer to compressed page */
  1038. {
  1039. ulint n;
  1040. buf_chunk_t* chunk = buf_pool->chunks;
  1041. for (n = buf_pool->n_chunks; n--; chunk++) {
  1042. buf_block_t* block = buf_chunk_contains_zip(chunk, data);
  1043. if (block) {
  1044. return(block);
  1045. }
  1046. }
  1047. return(NULL);
  1048. }
  1049. #endif /* UNIV_DEBUG */
  1050. /*********************************************************************//**
  1051. Checks that all file pages in the buffer chunk are in a replaceable state.
  1052. @return address of a non-free block, or NULL if all freed */
  1053. static
  1054. const buf_block_t*
  1055. buf_chunk_not_freed(
  1056. /*================*/
  1057. buf_chunk_t* chunk) /*!< in: chunk being checked */
  1058. {
  1059. buf_block_t* block;
  1060. ulint i;
  1061. ut_ad(buf_pool);
  1062. //ut_ad(buf_pool_mutex_own()); /*optimistic...*/
  1063. block = chunk->blocks;
  1064. for (i = chunk->size; i--; block++) {
  1065. ibool ready;
  1066. switch (buf_block_get_state(block)) {
  1067. case BUF_BLOCK_ZIP_FREE:
  1068. case BUF_BLOCK_ZIP_PAGE:
  1069. case BUF_BLOCK_ZIP_DIRTY:
  1070. /* The uncompressed buffer pool should never
  1071. contain compressed block descriptors. */
  1072. ut_error;
  1073. break;
  1074. case BUF_BLOCK_NOT_USED:
  1075. case BUF_BLOCK_READY_FOR_USE:
  1076. case BUF_BLOCK_MEMORY:
  1077. case BUF_BLOCK_REMOVE_HASH:
  1078. /* Skip blocks that are not being used for
  1079. file pages. */
  1080. break;
  1081. case BUF_BLOCK_FILE_PAGE:
  1082. mutex_enter(&block->mutex);
  1083. ready = buf_flush_ready_for_replace(&block->page);
  1084. mutex_exit(&block->mutex);
  1085. if (block->page.is_corrupt) {
  1086. /* corrupt page may remain, it can be skipped */
  1087. break;
  1088. }
  1089. if (!ready) {
  1090. return(block);
  1091. }
  1092. break;
  1093. }
  1094. }
  1095. return(NULL);
  1096. }
  1097. /*********************************************************************//**
  1098. Checks that all blocks in the buffer chunk are in BUF_BLOCK_NOT_USED state.
  1099. @return TRUE if all freed */
  1100. static
  1101. ibool
  1102. buf_chunk_all_free(
  1103. /*===============*/
  1104. const buf_chunk_t* chunk) /*!< in: chunk being checked */
  1105. {
  1106. const buf_block_t* block;
  1107. ulint i;
  1108. ut_ad(buf_pool);
  1109. ut_ad(buf_pool_mutex_own()); /* but we need all mutex here */
  1110. block = chunk->blocks;
  1111. for (i = chunk->size; i--; block++) {
  1112. if (buf_block_get_state(block) != BUF_BLOCK_NOT_USED) {
  1113. return(FALSE);
  1114. }
  1115. }
  1116. return(TRUE);
  1117. }
  1118. /********************************************************************//**
  1119. Frees a chunk of buffer frames. */
  1120. static
  1121. void
  1122. buf_chunk_free(
  1123. /*===========*/
  1124. buf_chunk_t* chunk) /*!< out: chunk of buffers */
  1125. {
  1126. buf_block_t* block;
  1127. const buf_block_t* block_end;
  1128. ut_ad(buf_pool_mutex_own()); /* but we need all mutex here */
  1129. block_end = chunk->blocks + chunk->size;
  1130. for (block = chunk->blocks; block < block_end; block++) {
  1131. ut_a(buf_block_get_state(block) == BUF_BLOCK_NOT_USED);
  1132. ut_a(!block->page.zip.data);
  1133. ut_ad(!block->page.in_LRU_list);
  1134. ut_ad(!block->in_unzip_LRU_list);
  1135. ut_ad(!block->page.in_flush_list);
  1136. /* Remove the block from the free list. */
  1137. mutex_enter(&free_list_mutex);
  1138. ut_ad(block->page.in_free_list);
  1139. UT_LIST_REMOVE(free, buf_pool->free, (&block->page));
  1140. mutex_exit(&free_list_mutex);
  1141. /* Free the latches. */
  1142. mutex_free(&block->mutex);
  1143. rw_lock_free(&block->lock);
  1144. #ifdef UNIV_SYNC_DEBUG
  1145. rw_lock_free(&block->debug_latch);
  1146. #endif /* UNIV_SYNC_DEBUG */
  1147. UNIV_MEM_UNDESC(block);
  1148. }
  1149. ut_a(!srv_buffer_pool_shm_key);
  1150. os_mem_free_large(chunk->mem, chunk->mem_size);
  1151. }
  1152. /********************************************************************//**
  1153. Creates the buffer pool.
  1154. @return own: buf_pool object, NULL if not enough memory or error */
  1155. UNIV_INTERN
  1156. buf_pool_t*
  1157. buf_pool_init(void)
  1158. /*===============*/
  1159. {
  1160. buf_chunk_t* chunk;
  1161. ulint i;
  1162. buf_pool = mem_zalloc(sizeof(buf_pool_t));
  1163. /* 1. Initialize general fields
  1164. ------------------------------- */
  1165. mutex_create(&buf_pool_mutex, SYNC_BUF_POOL);
  1166. mutex_create(&LRU_list_mutex, SYNC_BUF_LRU_LIST);
  1167. mutex_create(&flush_list_mutex, SYNC_BUF_FLUSH_LIST);
  1168. rw_lock_create(&page_hash_latch, SYNC_BUF_PAGE_HASH);
  1169. mutex_create(&free_list_mutex, SYNC_BUF_FREE_LIST);
  1170. mutex_create(&zip_free_mutex, SYNC_BUF_ZIP_FREE);
  1171. mutex_create(&zip_hash_mutex, SYNC_BUF_ZIP_HASH);
  1172. mutex_create(&buf_pool_zip_mutex, SYNC_BUF_BLOCK);
  1173. mutex_enter(&LRU_list_mutex);
  1174. rw_lock_x_lock(&page_hash_latch);
  1175. buf_pool_mutex_enter();
  1176. buf_pool->n_chunks = 1;
  1177. buf_pool->chunks = chunk = mem_alloc(sizeof *chunk);
  1178. UT_LIST_INIT(buf_pool->free);
  1179. if (!buf_chunk_init(chunk, srv_buf_pool_size)) {
  1180. mem_free(chunk);
  1181. mem_free(buf_pool);
  1182. buf_pool = NULL;
  1183. return(NULL);
  1184. }
  1185. srv_buf_pool_old_size = srv_buf_pool_size;
  1186. buf_pool->curr_size = chunk->size;
  1187. srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
  1188. buf_pool->page_hash = hash_create(2 * buf_pool->curr_size);
  1189. /* zip_hash is allocated to shm when srv_buffer_pool_shm_key is enabled */
  1190. if (!srv_buffer_pool_shm_key) {
  1191. buf_pool->zip_hash = hash_create(2 * buf_pool->curr_size);
  1192. }
  1193. buf_pool->last_printout_time = time(NULL);
  1194. /* 2. Initialize flushing fields
  1195. -------------------------------- */
  1196. for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
  1197. buf_pool->no_flush[i] = os_event_create(NULL);
  1198. }
  1199. /* 3. Initialize LRU fields
  1200. --------------------------- */
  1201. /* All fields are initialized by mem_zalloc(). */
  1202. if (srv_buffer_pool_shm_key) {
  1203. buf_shm_info_t* shm_info;
  1204. ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
  1205. shm_info = chunk->mem;
  1206. buf_pool->zip_hash = (hash_table_t*)((byte*)chunk->mem + shm_info->zip_hash_offset);
  1207. if(shm_info->is_new) {
  1208. shm_info->is_new = FALSE; /* initialization was finished */
  1209. } else {
  1210. buf_block_t* block = chunk->blocks;
  1211. buf_page_t* b;
  1212. /* shm_info->buf_pool_backup should be converted */
  1213. /* at buf_chunk_init(). So copy simply. */
  1214. buf_pool->flush_list = shm_info->buf_pool_backup.flush_list;
  1215. buf_pool->freed_page_clock = shm_info->buf_pool_backup.freed_page_clock;
  1216. buf_pool->free = shm_info->buf_pool_backup.free;
  1217. buf_pool->LRU = shm_info->buf_pool_backup.LRU;
  1218. buf_pool->LRU_old = shm_info->buf_pool_backup.LRU_old;
  1219. buf_pool->LRU_old_len = shm_info->buf_pool_backup.LRU_old_len;
  1220. buf_pool->unzip_LRU = shm_info->buf_pool_backup.unzip_LRU;
  1221. buf_pool->zip_clean = shm_info->buf_pool_backup.zip_clean;
  1222. for (i = 0; i < BUF_BUDDY_SIZES_MAX; i++) {
  1223. buf_pool->zip_free[i] = shm_info->buf_pool_backup.zip_free[i];
  1224. }
  1225. for (i = 0; i < chunk->size; i++, block++) {
  1226. if (buf_block_get_state(block)
  1227. == BUF_BLOCK_FILE_PAGE) {
  1228. ut_d(block->page.in_page_hash = TRUE);
  1229. HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
  1230. buf_page_address_fold(
  1231. block->page.space,
  1232. block->page.offset),
  1233. &block->page);
  1234. }
  1235. }
  1236. for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
  1237. b = UT_LIST_GET_NEXT(zip_list, b)) {
  1238. ut_ad(!b->in_flush_list);
  1239. ut_ad(b->in_LRU_list);
  1240. ut_d(b->in_page_hash = TRUE);
  1241. HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
  1242. buf_page_address_fold(b->space, b->offset), b);
  1243. }
  1244. for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
  1245. b = UT_LIST_GET_NEXT(flush_list, b)) {
  1246. ut_ad(b->in_flush_list);
  1247. ut_ad(b->in_LRU_list);
  1248. switch (buf_page_get_state(b)) {
  1249. case BUF_BLOCK_ZIP_DIRTY:
  1250. ut_d(b->in_page_hash = TRUE);
  1251. HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
  1252. buf_page_address_fold(b->space,
  1253. b->offset), b);
  1254. break;
  1255. case BUF_BLOCK_FILE_PAGE:
  1256. /* uncompressed page */
  1257. break;
  1258. case BUF_BLOCK_ZIP_FREE:
  1259. case BUF_BLOCK_ZIP_PAGE:
  1260. case BUF_BLOCK_NOT_USED:
  1261. case BUF_BLOCK_READY_FOR_USE:
  1262. case BUF_BLOCK_MEMORY:
  1263. case BUF_BLOCK_REMOVE_HASH:
  1264. ut_error;
  1265. break;
  1266. }
  1267. }
  1268. }
  1269. }
  1270. mutex_exit(&LRU_list_mutex);
  1271. rw_lock_x_unlock(&page_hash_latch);
  1272. buf_pool_mutex_exit();
  1273. btr_search_sys_create(buf_pool->curr_size
  1274. * UNIV_PAGE_SIZE / sizeof(void*) / 64);
  1275. /* 4. Initialize the buddy allocator fields */
  1276. /* All fields are initialized by mem_zalloc(). */
  1277. return(buf_pool);
  1278. }
  1279. /********************************************************************//**
  1280. Frees the buffer pool at shutdown. This must not be invoked before
  1281. freeing all mutexes. */
  1282. UNIV_INTERN
  1283. void
  1284. buf_pool_free(void)
  1285. /*===============*/
  1286. {
  1287. buf_chunk_t* chunk;
  1288. buf_chunk_t* chunks;
  1289. if (srv_buffer_pool_shm_key) {
  1290. buf_shm_info_t* shm_info;
  1291. ut_a(buf_pool->n_chunks == 1);
  1292. chunk = buf_pool->chunks;
  1293. shm_info = chunk->mem;
  1294. ut_a((byte*)chunk->blocks == (byte*)chunk->mem + sizeof(buf_shm_info_t));
  1295. /* validation the shared memory segment doesn't have unrecoverable contents. */
  1296. /* Currently, validation became not needed */
  1297. shm_info->reusable = TRUE;
  1298. memcpy(&(shm_info->buf_pool_backup), buf_pool, sizeof(buf_pool_t));
  1299. memcpy(&(shm_info->chunk_backup), chunk, sizeof(buf_chunk_t));
  1300. if (srv_fast_shutdown < 2) {
  1301. if (srv_buffer_pool_shm_checksum) {
  1302. shm_info->checksum = ut_fold_binary_32((byte*)chunk->mem + sizeof(buf_shm_info_t),
  1303. chunk->mem_size - sizeof(buf_shm_info_t));
  1304. } else {
  1305. shm_info->checksum = BUF_NO_CHECKSUM_MAGIC;
  1306. }
  1307. shm_info->clean = TRUE;
  1308. }
  1309. os_shm_free(chunk->mem, chunk->mem_size);
  1310. } else {
  1311. chunks = buf_pool->chunks;
  1312. chunk = chunks + buf_pool->n_chunks;
  1313. while (--chunk >= chunks) {
  1314. /* Bypass the checks of buf_chunk_free(), since they
  1315. would fail at shutdown. */
  1316. os_mem_free_large(chunk->mem, chunk->mem_size);
  1317. }
  1318. }
  1319. mem_free(buf_pool->chunks);
  1320. hash_table_free(buf_pool->page_hash);
  1321. if (!srv_buffer_pool_shm_key) {
  1322. hash_table_free(buf_pool->zip_hash);
  1323. }
  1324. mem_free(buf_pool);
  1325. buf_pool = NULL;
  1326. }
  1327. /********************************************************************//**
  1328. Drops the adaptive hash index. To prevent a livelock, this function
  1329. is only to be called while holding btr_search_latch and while
  1330. btr_search_enabled == FALSE. */
  1331. UNIV_INTERN
  1332. void
  1333. buf_pool_drop_hash_index(void)
  1334. /*==========================*/
  1335. {
  1336. ibool released_search_latch;
  1337. #ifdef UNIV_SYNC_DEBUG
  1338. ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX));
  1339. #endif /* UNIV_SYNC_DEBUG */
  1340. ut_ad(!btr_search_enabled);
  1341. do {
  1342. buf_chunk_t* chunks = buf_pool->chunks;
  1343. buf_chunk_t* chunk = chunks + buf_pool->n_chunks;
  1344. released_search_latch = FALSE;
  1345. while (--chunk >= chunks) {
  1346. buf_block_t* block = chunk->blocks;
  1347. ulint i = chunk->size;
  1348. for (; i--; block++) {
  1349. /* block->is_hashed cannot be modified
  1350. when we have an x-latch on btr_search_latch;
  1351. see the comment in buf0buf.h */
  1352. if (buf_block_get_state(block)
  1353. != BUF_BLOCK_FILE_PAGE
  1354. || !block->is_hashed) {
  1355. continue;
  1356. }
  1357. /* To follow the latching order, we
  1358. have to release btr_search_latch
  1359. before acquiring block->latch. */
  1360. rw_lock_x_unlock(&btr_search_latch);
  1361. /* When we release the search latch,
  1362. we must rescan all blocks, because
  1363. some may become hashed again. */
  1364. released_search_latch = TRUE;
  1365. rw_lock_x_lock(&block->lock);
  1366. /* This should be guaranteed by the
  1367. callers, which will be holding
  1368. btr_search_enabled_mutex. */
  1369. ut_ad(!btr_search_enabled);
  1370. /* Because we did not buffer-fix the
  1371. block by calling buf_block_get_gen(),
  1372. it is possible that the block has been
  1373. allocated for some other use after
  1374. btr_search_latch was released above.
  1375. We do not care which file page the
  1376. block is mapped to. All we want to do
  1377. is to drop any hash entries referring
  1378. to the page. */
  1379. /* It is possible that
  1380. block->page.state != BUF_FILE_PAGE.
  1381. Even that does not matter, because
  1382. btr_search_drop_page_hash_index() will
  1383. check block->is_hashed before doing
  1384. anything. block->is_hashed can only
  1385. be set on uncompressed file pages. */
  1386. btr_search_drop_page_hash_index(block);
  1387. rw_lock_x_unlock(&block->lock);
  1388. rw_lock_x_lock(&btr_search_latch);
  1389. ut_ad(!btr_search_enabled);
  1390. }
  1391. }
  1392. } while (released_search_latch);
  1393. }
  1394. /********************************************************************//**
  1395. Relocate a buffer control block. Relocates the block on the LRU list
  1396. and in buf_pool->page_hash. Does not relocate bpage->list.
  1397. The caller must take care of relocating bpage->list. */
  1398. UNIV_INTERN
  1399. void
  1400. buf_relocate(
  1401. /*=========*/
  1402. buf_page_t* bpage, /*!< in/out: control block being relocated;
  1403. buf_page_get_state(bpage) must be
  1404. BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */
  1405. buf_page_t* dpage) /*!< in/out: destination control block */
  1406. {
  1407. buf_page_t* b;
  1408. ulint fold;
  1409. //ut_ad(buf_pool_mutex_own());
  1410. ut_ad(mutex_own(&LRU_list_mutex));
  1411. #ifdef UNIV_SYNC_DEBUG
  1412. ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
  1413. #endif
  1414. ut_ad(mutex_own(buf_page_get_mutex(bpage)));
  1415. ut_a(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
  1416. ut_a(bpage->buf_fix_count == 0);
  1417. ut_ad(bpage->in_LRU_list);
  1418. ut_ad(!bpage->in_zip_hash);
  1419. ut_ad(bpage->in_page_hash);
  1420. ut_ad(bpage == buf_page_hash_get(bpage->space, bpage->offset));
  1421. #ifdef UNIV_DEBUG
  1422. switch (buf_page_get_state(bpage)) {
  1423. case BUF_BLOCK_ZIP_FREE:
  1424. case BUF_BLOCK_NOT_USED:
  1425. case BUF_BLOCK_READY_FOR_USE:
  1426. case BUF_BLOCK_FILE_PAGE:
  1427. case BUF_BLOCK_MEMORY:
  1428. case BUF_BLOCK_REMOVE_HASH:
  1429. ut_error;
  1430. case BUF_BLOCK_ZIP_DIRTY:
  1431. case BUF_BLOCK_ZIP_PAGE:
  1432. break;
  1433. }
  1434. #endif /* UNIV_DEBUG */
  1435. memcpy(dpage, bpage, sizeof *dpage);
  1436. bpage->in_LRU_list = FALSE;
  1437. ut_d(bpage->in_page_hash = FALSE);
  1438. /* relocate buf_pool->LRU */
  1439. b = UT_LIST_GET_PREV(LRU, bpage);
  1440. UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
  1441. if (b) {
  1442. UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, b, dpage);
  1443. } else {
  1444. UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, dpage);
  1445. }
  1446. if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
  1447. buf_pool->LRU_old = dpage;
  1448. #ifdef UNIV_LRU_DEBUG
  1449. /* buf_pool->LRU_old must be the first item in the LRU list
  1450. whose "old" flag is set. */
  1451. ut_a(buf_pool->LRU_old->old);
  1452. ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
  1453. || !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
  1454. ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
  1455. || UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
  1456. } else {
  1457. /* Check that the "old" flag is consistent in
  1458. the block and its neighbours. */
  1459. buf_page_set_old(dpage, buf_page_is_old(dpage));
  1460. #endif /* UNIV_LRU_DEBUG */
  1461. }
  1462. ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU,
  1463. ut_ad(ut_list_node_313->in_LRU_list)));
  1464. /* relocate buf_pool->page_hash */
  1465. fold = buf_page_address_fold(bpage->space, bpage->offset);
  1466. HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
  1467. HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
  1468. }
  1469. /********************************************************************//**
  1470. Shrinks the buffer pool. */
  1471. static
  1472. void
  1473. buf_pool_shrink(
  1474. /*============*/
  1475. ulint chunk_size) /*!< in: number of pages to remove */
  1476. {
  1477. buf_chunk_t* chunks;
  1478. buf_chunk_t* chunk;
  1479. ulint max_size;
  1480. ulint max_free_size;
  1481. buf_chunk_t* max_chunk;
  1482. buf_chunk_t* max_free_chunk;
  1483. ut_ad(!buf_pool_mutex_own());
  1484. try_again:
  1485. btr_search_disable(); /* Empty the adaptive hash index again */
  1486. //buf_pool_mutex_enter();
  1487. mutex_enter(&LRU_list_mutex);
  1488. if (srv_buffer_pool_shm_key) {
  1489. /* Cannot support shrink */
  1490. goto func_done;
  1491. }
  1492. shrink_again:
  1493. if (buf_pool->n_chunks <= 1) {
  1494. /* Cannot shrink if there is only one chunk */
  1495. goto func_done;
  1496. }
  1497. /* Search for the largest free chunk
  1498. not larger than the size difference */
  1499. chunks = buf_pool->chunks;
  1500. chunk = chunks + buf_pool->n_chunks;
  1501. max_size = max_free_size = 0;
  1502. max_chunk = max_free_chunk = NULL;
  1503. while (--chunk >= chunks) {
  1504. if (chunk->size <= chunk_size
  1505. && chunk->size > max_free_size) {
  1506. if (chunk->size > max_size) {
  1507. max_size = chunk->size;
  1508. max_chunk = chunk;
  1509. }
  1510. if (buf_chunk_all_free(chunk)) {
  1511. max_free_size = chunk->size;
  1512. max_free_chunk = chunk;
  1513. }
  1514. }
  1515. }
  1516. if (!max_free_size) {
  1517. ulint dirty = 0;
  1518. ulint nonfree = 0;
  1519. buf_block_t* block;
  1520. buf_block_t* bend;
  1521. /* Cannot shrink: try again later
  1522. (do not assign srv_buf_pool_old_size) */
  1523. if (!max_chunk) {
  1524. goto func_exit;
  1525. }
  1526. block = max_chunk->blocks;
  1527. bend = block + max_chunk->size;
  1528. /* Move the blocks of chunk to the end of the
  1529. LRU list and try to flush them. */
  1530. for (; block < bend; block++) {
  1531. switch (buf_block_get_state(block)) {
  1532. case BUF_BLOCK_NOT_USED:
  1533. continue;
  1534. case BUF_BLOCK_FILE_PAGE:
  1535. break;
  1536. default:
  1537. nonfree++;
  1538. continue;
  1539. }
  1540. mutex_enter(&block->mutex);
  1541. /* The following calls will temporarily
  1542. release block->mutex and buf_pool_mutex.
  1543. Therefore, we have to always retry,
  1544. even if !dirty && !nonfree. */
  1545. if (!buf_flush_ready_for_replace(&block->page)) {
  1546. buf_LRU_make_block_old(&block->page);
  1547. dirty++;
  1548. } else if (buf_LRU_free_block(&block->page, TRUE, FALSE)
  1549. != BUF_LRU_FREED) {
  1550. nonfree++;
  1551. }
  1552. mutex_exit(&block->mutex);
  1553. }
  1554. //buf_pool_mutex_exit();
  1555. mutex_exit(&LRU_list_mutex);
  1556. /* Request for a flush of the chunk if it helps.
  1557. Do not flush if there are non-free blocks, since
  1558. flushing will not make the chunk freeable. */
  1559. if (nonfree) {
  1560. /* Avoid busy-waiting. */
  1561. os_thread_sleep(100000);
  1562. } else if (dirty
  1563. && buf_flush_batch(BUF_FLUSH_LRU, dirty, 0)
  1564. == ULINT_UNDEFINED) {
  1565. buf_flush_wait_batch_end(BUF_FLUSH_LRU);
  1566. }
  1567. goto try_again;
  1568. }
  1569. max_size = max_free_size;
  1570. max_chunk = max_free_chunk;
  1571. srv_buf_pool_old_size = srv_buf_pool_size;
  1572. /* Rewrite buf_pool->chunks. Copy everything but max_chunk. */
  1573. chunks = mem_alloc((buf_pool->n_chunks - 1) * sizeof *chunks);
  1574. memcpy(chunks, buf_pool->chunks,
  1575. (max_chunk - buf_pool->chunks) * sizeof *chunks);
  1576. memcpy(chunks + (max_chunk - buf_pool->chunks),
  1577. max_chunk + 1,
  1578. buf_pool->chunks + buf_pool->n_chunks
  1579. - (max_chunk + 1));
  1580. ut_a(buf_pool->curr_size > max_chunk->size);
  1581. buf_pool->curr_size -= max_chunk->size;
  1582. srv_buf_pool_curr_size = buf_pool->curr_size * UNIV_PAGE_SIZE;
  1583. chunk_size -= max_chunk->size;
  1584. buf_chunk_free(max_chunk);
  1585. mem_free(buf_pool->chunks);
  1586. buf_pool->chunks = chunks;
  1587. buf_pool->n_chunks--;
  1588. /* Allow a slack of one megabyte. */
  1589. if (chunk_size > 1048576 / UNIV_PAGE_SIZE) {
  1590. goto shrink_again;
  1591. }
  1592. func_done:
  1593. srv_buf_pool_old_size = srv_buf_pool_size;
  1594. func_exit:
  1595. //buf_pool_mutex_exit();
  1596. mutex_exit(&LRU_list_mutex);
  1597. btr_search_enable();
  1598. }
  1599. /********************************************************************//**
  1600. Rebuild buf_pool->page_hash. */
  1601. static
  1602. void
  1603. buf_pool_page_hash_rebuild(void)
  1604. /*============================*/
  1605. {
  1606. ulint i;
  1607. ulint n_chunks;
  1608. buf_chunk_t* chunk;
  1609. hash_table_t* page_hash;
  1610. hash_table_t* zip_hash;
  1611. buf_page_t* b;
  1612. //buf_pool_mutex_enter();
  1613. mutex_enter(&LRU_list_mutex);
  1614. rw_lock_x_lock(&page_hash_latch);
  1615. mutex_enter(&flush_list_mutex);
  1616. /* Free, create, and populate the hash table. */
  1617. hash_table_free(buf_pool->page_hash);
  1618. buf_pool->page_hash = page_hash = hash_create(2 * buf_pool->curr_size);
  1619. zip_hash = hash_create(2 * buf_pool->curr_size);
  1620. HASH_MIGRATE(buf_pool->zip_hash, zip_hash, buf_page_t, hash,
  1621. BUF_POOL_ZIP_FOLD_BPAGE);
  1622. hash_table_free(buf_pool->zip_hash);
  1623. buf_pool->zip_hash = zip_hash;
  1624. /* Insert the uncompressed file pages to buf_pool->page_hash. */
  1625. chunk = buf_pool->chunks;
  1626. n_chunks = buf_pool->n_chunks;
  1627. for (i = 0; i < n_chunks; i++, chunk++) {
  1628. ulint j;
  1629. buf_block_t* block = chunk->blocks;
  1630. for (j = 0; j < chunk->size; j++, block++) {
  1631. if (buf_block_get_state(block)
  1632. == BUF_BLOCK_FILE_PAGE) {
  1633. ut_ad(!block->page.in_zip_hash);
  1634. ut_ad(block->page.in_page_hash);
  1635. HASH_INSERT(buf_page_t, hash, page_hash,
  1636. buf_page_address_fold(
  1637. block->page.space,
  1638. block->page.offset),
  1639. &block->page);
  1640. }
  1641. }
  1642. }
  1643. /* Insert the compressed-only pages to buf_pool->page_hash.
  1644. All such blocks are either in buf_pool->zip_clean or
  1645. in buf_pool->flush_list. */
  1646. for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
  1647. b = UT_LIST_GET_NEXT(zip_list, b)) {
  1648. ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
  1649. ut_ad(!b->in_flush_list);
  1650. ut_ad(b->in_LRU_list);
  1651. ut_ad(b->in_page_hash);
  1652. ut_ad(!b->in_zip_hash);
  1653. HASH_INSERT(buf_page_t, hash, page_hash,
  1654. buf_page_address_fold(b->space, b->offset), b);
  1655. }
  1656. for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
  1657. b = UT_LIST_GET_NEXT(flush_list, b)) {
  1658. ut_ad(b->in_flush_list);
  1659. ut_ad(b->in_LRU_list);
  1660. ut_ad(b->in_page_hash);
  1661. ut_ad(!b->in_zip_hash);
  1662. switch (buf_page_get_state(b)) {
  1663. case BUF_BLOCK_ZIP_DIRTY:
  1664. HASH_INSERT(buf_page_t, hash, page_hash,
  1665. buf_page_address_fold(b->space,
  1666. b->offset), b);
  1667. break;
  1668. case BUF_BLOCK_FILE_PAGE:
  1669. /* uncompressed page */
  1670. break;
  1671. case BUF_BLOCK_ZIP_FREE:
  1672. case BUF_BLOCK_ZIP_PAGE:
  1673. case BUF_BLOCK_NOT_USED:
  1674. case BUF_BLOCK_READY_FOR_USE:
  1675. case BUF_BLOCK_MEMORY:
  1676. case BUF_BLOCK_REMOVE_HASH:
  1677. ut_error;
  1678. break;
  1679. }
  1680. }
  1681. //buf_pool_mutex_exit();
  1682. mutex_exit(&LRU_list_mutex);
  1683. rw_lock_x_unlock(&page_hash_latch);
  1684. mutex_exit(&flush_list_mutex);
  1685. }
  1686. /********************************************************************//**
  1687. Resizes the buffer pool. */
  1688. UNIV_INTERN
  1689. void
  1690. buf_pool_resize(void)
  1691. /*=================*/
  1692. {
  1693. if (srv_buffer_pool_shm_key) {
  1694. /* Cannot support resize */
  1695. return;
  1696. }
  1697. //buf_pool_mutex_enter();
  1698. mutex_enter(&LRU_list_mutex);
  1699. if (srv_buf_pool_old_size == srv_buf_pool_size) {
  1700. //buf_pool_mutex_exit();
  1701. mutex_exit(&LRU_list_mutex);
  1702. return;
  1703. }
  1704. if (srv_buf_pool_curr_size + 1048576 > srv_buf_pool_size) {
  1705. //buf_pool_mutex_exit();
  1706. mutex_exit(&LRU_list_mutex);
  1707. /* Disable adaptive hash indexes and empty the index
  1708. in order to free up memory in the buffer pool chunks. */
  1709. buf_pool_shrink((srv_buf_pool_curr_size - srv_buf_pool_size)
  1710. / UNIV_PAGE_SIZE);
  1711. } else if (srv_buf_pool_curr_size + 1048576 < srv_buf_pool_size) {
  1712. /* Enlarge the buffer pool by at least one megabyte */
  1713. ulint mem_size
  1714. = srv_buf_pool_size - srv_buf_pool_curr_size;
  1715. buf_chunk_t* chunks;
  1716. buf_chunk_t* chunk;
  1717. chunks = mem_alloc((buf_pool->n_chunks + 1) * sizeof *chunks);
  1718. memcpy(chunks, buf_pool->chunks, buf_pool->n_chunks
  1719. * sizeof *chunks);
  1720. chunk = &chunks[buf_pool->n_chunks];
  1721. if (!buf_chunk_init(chunk, mem_size)) {
  1722. mem_free(chunks);
  1723. } else {
  1724. buf_pool->curr_size += chunk->size;
  1725. srv_buf_pool_curr_size = buf_pool->curr_size
  1726. * UNIV_PAGE_SIZE;
  1727. mem_free(buf_pool->chunks);
  1728. buf_pool->chunks = chunks;
  1729. buf_pool->n_chunks++;
  1730. }
  1731. srv_buf_pool_old_size = srv_buf_pool_size;
  1732. //buf_pool_mutex_exit();
  1733. mutex_exit(&LRU_list_mutex);
  1734. }
  1735. buf_pool_page_hash_rebuild();
  1736. }
  1737. /********************************************************************//**
  1738. Moves a page to the start of the buffer pool LRU list. This high-level
  1739. function can be used to prevent an important page from slipping out of
  1740. the buffer pool. */
  1741. UNIV_INTERN
  1742. void
  1743. buf_page_make_young(
  1744. /*================*/
  1745. buf_page_t* bpage) /*!< in: buffer block of a file page */
  1746. {
  1747. //buf_pool_mutex_enter();
  1748. mutex_enter(&LRU_list_mutex);
  1749. ut_a(buf_page_in_file(bpage));
  1750. buf_LRU_make_block_young(bpage);
  1751. //buf_pool_mutex_exit();
  1752. mutex_exit(&LRU_list_mutex);
  1753. }
  1754. /********************************************************************//**
  1755. Sets the time of the first access of a page and moves a page to the
  1756. start of the buffer pool LRU list if it is too old. This high-level
  1757. function can be used to prevent an important page from slipping
  1758. out of the buffer pool. */
  1759. static
  1760. void
  1761. buf_page_set_accessed_make_young(
  1762. /*=============================*/
  1763. buf_page_t* bpage, /*!< in/out: buffer block of a
  1764. file page */
  1765. unsigned access_time) /*!< in: bpage->access_time
  1766. read under mutex protection,
  1767. or 0 if unknown */
  1768. {
  1769. ut_ad(!buf_pool_mutex_own());
  1770. ut_a(buf_page_in_file(bpage));
  1771. if (buf_page_peek_if_too_old(bpage)) {
  1772. //buf_pool_mutex_enter();
  1773. mutex_enter(&LRU_list_mutex);
  1774. buf_LRU_make_block_young(bpage);
  1775. //buf_pool_mutex_exit();
  1776. mutex_exit(&LRU_list_mutex);
  1777. } else if (!access_time) {
  1778. ulint time_ms = ut_time_ms();
  1779. mutex_t* block_mutex = buf_page_get_mutex_enter(bpage);
  1780. //buf_pool_mutex_enter();
  1781. if (block_mutex) {
  1782. buf_page_set_accessed(bpage, time_ms);
  1783. mutex_exit(block_mutex);
  1784. }
  1785. //buf_pool_mutex_exit();
  1786. }
  1787. }
  1788. /********************************************************************//**
  1789. Resets the check_index_page_at_flush field of a page if found in the buffer
  1790. pool. */
  1791. UNIV_INTERN
  1792. void
  1793. buf_reset_check_index_page_at_flush(
  1794. /*================================*/
  1795. ulint space, /*!< in: space id */
  1796. ulint offset) /*!< in: page number */
  1797. {
  1798. buf_block_t* block;
  1799. //buf_pool_mutex_enter();
  1800. rw_lock_s_lock(&page_hash_latch);
  1801. block = (buf_block_t*) buf_page_hash_get(space, offset);
  1802. if (block && buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE) {
  1803. block->check_index_page_at_flush = FALSE;
  1804. }
  1805. //buf_pool_mutex_exit();
  1806. rw_lock_s_unlock(&page_hash_latch);
  1807. }
  1808. /********************************************************************//**
  1809. Returns the current state of is_hashed of a page. FALSE if the page is
  1810. not in the pool. NOTE that this operation does not fix the page in the
  1811. pool if it is found there.
  1812. @return TRUE if page hash index is built in search system */
  1813. UNIV_INTERN
  1814. ibool
  1815. buf_page_peek_if_search_hashed(
  1816. /*===========================*/
  1817. ulint space, /*!< in: space id */
  1818. ulint offset) /*!< in: page number */
  1819. {
  1820. buf_block_t* block;
  1821. ibool is_hashed;
  1822. //buf_pool_mutex_enter();
  1823. rw_lock_s_lock(&page_hash_latch);
  1824. block = (buf_block_t*) buf_page_hash_get(space, offset);
  1825. if (!block || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
  1826. is_hashed = FALSE;
  1827. } else {
  1828. is_hashed = block->is_hashed;
  1829. }
  1830. //buf_pool_mutex_exit();
  1831. rw_lock_s_unlock(&page_hash_latch);
  1832. return(is_hashed);
  1833. }
  1834. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  1835. /********************************************************************//**
  1836. Sets file_page_was_freed TRUE if the page is found in the buffer pool.
  1837. This function should be called when we free a file page and want the
  1838. debug version to check that it is not accessed any more unless
  1839. reallocated.
  1840. @return control block if found in page hash table, otherwise NULL */
  1841. UNIV_INTERN
  1842. buf_page_t*
  1843. buf_page_set_file_page_was_freed(
  1844. /*=============================*/
  1845. ulint space, /*!< in: space id */
  1846. ulint offset) /*!< in: page number */
  1847. {
  1848. buf_page_t* bpage;
  1849. //buf_pool_mutex_enter();
  1850. rw_lock_s_lock(&page_hash_latch);
  1851. bpage = buf_page_hash_get(space, offset);
  1852. if (bpage) {
  1853. /* bpage->file_page_was_freed can already hold
  1854. when this code is invoked from dict_drop_index_tree() */
  1855. bpage->file_page_was_freed = TRUE;
  1856. }
  1857. //buf_pool_mutex_exit();
  1858. rw_lock_s_unlock(&page_hash_latch);
  1859. return(bpage);
  1860. }
  1861. /********************************************************************//**
  1862. Sets file_page_was_freed FALSE if the page is found in the buffer pool.
  1863. This function should be called when we free a file page and want the
  1864. debug version to check that it is not accessed any more unless
  1865. reallocated.
  1866. @return control block if found in page hash table, otherwise NULL */
  1867. UNIV_INTERN
  1868. buf_page_t*
  1869. buf_page_reset_file_page_was_freed(
  1870. /*===============================*/
  1871. ulint space, /*!< in: space id */
  1872. ulint offset) /*!< in: page number */
  1873. {
  1874. buf_page_t* bpage;
  1875. //buf_pool_mutex_enter();
  1876. rw_lock_s_lock(&page_hash_latch);
  1877. bpage = buf_page_hash_get(space, offset);
  1878. if (bpage) {
  1879. bpage->file_page_was_freed = FALSE;
  1880. }
  1881. //buf_pool_mutex_exit();
  1882. rw_lock_s_unlock(&page_hash_latch);
  1883. return(bpage);
  1884. }
  1885. #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
  1886. /********************************************************************//**
  1887. Get read access to a compressed page (usually of type
  1888. FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
  1889. The page must be released with buf_page_release_zip().
  1890. NOTE: the page is not protected by any latch. Mutual exclusion has to
  1891. be implemented at a higher level. In other words, all possible
  1892. accesses to a given page through this function must be protected by
  1893. the same set of mutexes or latches.
  1894. @return pointer to the block */
  1895. UNIV_INTERN
  1896. buf_page_t*
  1897. buf_page_get_zip(
  1898. /*=============*/
  1899. ulint space, /*!< in: space id */
  1900. ulint zip_size,/*!< in: compressed page size */
  1901. ulint offset) /*!< in: page number */
  1902. {
  1903. buf_page_t* bpage;
  1904. mutex_t* block_mutex;
  1905. ibool must_read;
  1906. unsigned access_time;
  1907. trx_t* trx = NULL;
  1908. ulint sec;
  1909. ulint ms;
  1910. ib_uint64_t start_time;
  1911. ib_uint64_t finish_time;
  1912. #ifndef UNIV_LOG_DEBUG
  1913. ut_ad(!ibuf_inside());
  1914. #endif
  1915. if (innobase_get_slow_log()) {
  1916. trx = innobase_get_trx();
  1917. }
  1918. buf_pool->stat.n_page_gets++;
  1919. for (;;) {
  1920. //buf_pool_mutex_enter();
  1921. lookup:
  1922. rw_lock_s_lock(&page_hash_latch);
  1923. bpage = buf_page_hash_get(space, offset);
  1924. if (bpage) {
  1925. break;
  1926. }
  1927. /* Page not in buf_pool: needs to be read from file */
  1928. //buf_pool_mutex_exit();
  1929. rw_lock_s_unlock(&page_hash_latch);
  1930. buf_read_page(space, zip_size, offset, trx);
  1931. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  1932. ut_a(++buf_dbg_counter % 37 || buf_validate());
  1933. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  1934. }
  1935. if (UNIV_UNLIKELY(!bpage->zip.data)) {
  1936. /* There is no compressed page. */
  1937. err_exit:
  1938. //buf_pool_mutex_exit();
  1939. rw_lock_s_unlock(&page_hash_latch);
  1940. return(NULL);
  1941. }
  1942. if (srv_pass_corrupt_table) {
  1943. if (bpage->is_corrupt) {
  1944. rw_lock_s_unlock(&page_hash_latch);
  1945. return(NULL);
  1946. }
  1947. }
  1948. ut_a(!(bpage->is_corrupt));
  1949. block_mutex = buf_page_get_mutex_enter(bpage);
  1950. rw_lock_s_unlock(&page_hash_latch);
  1951. switch (buf_page_get_state(bpage)) {
  1952. case BUF_BLOCK_NOT_USED:
  1953. case BUF_BLOCK_READY_FOR_USE:
  1954. case BUF_BLOCK_MEMORY:
  1955. case BUF_BLOCK_REMOVE_HASH:
  1956. case BUF_BLOCK_ZIP_FREE:
  1957. if (block_mutex)
  1958. mutex_exit(block_mutex);
  1959. break;
  1960. case BUF_BLOCK_ZIP_PAGE:
  1961. case BUF_BLOCK_ZIP_DIRTY:
  1962. ut_a(block_mutex == &buf_pool_zip_mutex);
  1963. bpage->buf_fix_count++;
  1964. goto got_block;
  1965. case BUF_BLOCK_FILE_PAGE:
  1966. ut_a(block_mutex == &((buf_block_t*) bpage)->mutex);
  1967. /* Discard the uncompressed page frame if possible. */
  1968. if (buf_LRU_free_block(bpage, FALSE, FALSE) == BUF_LRU_FREED) {
  1969. mutex_exit(block_mutex);
  1970. goto lookup;
  1971. }
  1972. buf_block_buf_fix_inc((buf_block_t*) bpage,
  1973. __FILE__, __LINE__);
  1974. goto got_block;
  1975. }
  1976. ut_error;
  1977. goto err_exit;
  1978. got_block:
  1979. must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
  1980. access_time = buf_page_is_accessed(bpage);
  1981. //buf_pool_mutex_exit();
  1982. mutex_exit(block_mutex);
  1983. buf_page_set_accessed_make_young(bpage, access_time);
  1984. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  1985. ut_a(!bpage->file_page_was_freed);
  1986. #endif
  1987. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  1988. ut_a(++buf_dbg_counter % 5771 || buf_validate());
  1989. ut_a(bpage->buf_fix_count > 0);
  1990. ut_a(buf_page_in_file(bpage));
  1991. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  1992. if (must_read) {
  1993. /* Let us wait until the read operation
  1994. completes */
  1995. if (innobase_get_slow_log() && trx && trx->take_stats)
  1996. {
  1997. ut_usectime(&sec, &ms);
  1998. start_time = (ib_uint64_t)sec * 1000000 + ms;
  1999. } else {
  2000. start_time = 0;
  2001. }
  2002. for (;;) {
  2003. enum buf_io_fix io_fix;
  2004. mutex_enter(block_mutex);
  2005. io_fix = buf_page_get_io_fix(bpage);
  2006. mutex_exit(block_mutex);
  2007. if (io_fix == BUF_IO_READ) {
  2008. os_thread_sleep(WAIT_FOR_READ);
  2009. } else {
  2010. break;
  2011. }
  2012. }
  2013. if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
  2014. {
  2015. ut_usectime(&sec, &ms);
  2016. finish_time = (ib_uint64_t)sec * 1000000 + ms;
  2017. trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
  2018. }
  2019. }
  2020. #ifdef UNIV_IBUF_COUNT_DEBUG
  2021. ut_a(ibuf_count_get(buf_page_get_space(bpage),
  2022. buf_page_get_page_no(bpage)) == 0);
  2023. #endif
  2024. return(bpage);
  2025. }
  2026. /********************************************************************//**
  2027. Initialize some fields of a control block. */
  2028. UNIV_INLINE
  2029. void
  2030. buf_block_init_low(
  2031. /*===============*/
  2032. buf_block_t* block) /*!< in: block to init */
  2033. {
  2034. block->check_index_page_at_flush = FALSE;
  2035. block->index = NULL;
  2036. block->n_hash_helps = 0;
  2037. block->is_hashed = FALSE;
  2038. block->n_fields = 1;
  2039. block->n_bytes = 0;
  2040. block->left_side = TRUE;
  2041. }
  2042. #endif /* !UNIV_HOTBACKUP */
  2043. /********************************************************************//**
  2044. Decompress a block.
  2045. @return TRUE if successful */
  2046. UNIV_INTERN
  2047. ibool
  2048. buf_zip_decompress(
  2049. /*===============*/
  2050. buf_block_t* block, /*!< in/out: block */
  2051. ibool check) /*!< in: TRUE=verify the page checksum */
  2052. {
  2053. const byte* frame = block->page.zip.data;
  2054. ulint stamp_checksum = mach_read_from_4(
  2055. frame + FIL_PAGE_SPACE_OR_CHKSUM);
  2056. ut_ad(buf_block_get_zip_size(block));
  2057. ut_a(buf_block_get_space(block) != 0);
  2058. if (UNIV_LIKELY(check && stamp_checksum != BUF_NO_CHECKSUM_MAGIC)) {
  2059. ulint calc_checksum = page_zip_calc_checksum(
  2060. frame, page_zip_get_size(&block->page.zip));
  2061. if (UNIV_UNLIKELY(stamp_checksum != calc_checksum)) {
  2062. ut_print_timestamp(stderr);
  2063. fprintf(stderr,
  2064. " InnoDB: compressed page checksum mismatch"
  2065. " (space %u page %u): %lu != %lu\n",
  2066. block->page.space, block->page.offset,
  2067. stamp_checksum, calc_checksum);
  2068. return(FALSE);
  2069. }
  2070. }
  2071. switch (fil_page_get_type(frame)) {
  2072. case FIL_PAGE_INDEX:
  2073. if (page_zip_decompress(&block->page.zip,
  2074. block->frame, TRUE)) {
  2075. return(TRUE);
  2076. }
  2077. fprintf(stderr,
  2078. "InnoDB: unable to decompress space %lu page %lu\n",
  2079. (ulong) block->page.space,
  2080. (ulong) block->page.offset);
  2081. return(FALSE);
  2082. case FIL_PAGE_TYPE_ALLOCATED:
  2083. case FIL_PAGE_INODE:
  2084. case FIL_PAGE_IBUF_BITMAP:
  2085. case FIL_PAGE_TYPE_FSP_HDR:
  2086. case FIL_PAGE_TYPE_XDES:
  2087. case FIL_PAGE_TYPE_ZBLOB:
  2088. case FIL_PAGE_TYPE_ZBLOB2:
  2089. /* Copy to uncompressed storage. */
  2090. memcpy(block->frame, frame,
  2091. buf_block_get_zip_size(block));
  2092. return(TRUE);
  2093. }
  2094. ut_print_timestamp(stderr);
  2095. fprintf(stderr,
  2096. " InnoDB: unknown compressed page"
  2097. " type %lu\n",
  2098. fil_page_get_type(frame));
  2099. return(FALSE);
  2100. }
  2101. #ifndef UNIV_HOTBACKUP
  2102. /*******************************************************************//**
  2103. Gets the block to whose frame the pointer is pointing to.
  2104. @return pointer to block, never NULL */
  2105. UNIV_INTERN
  2106. buf_block_t*
  2107. buf_block_align(
  2108. /*============*/
  2109. const byte* ptr) /*!< in: pointer to a frame */
  2110. {
  2111. buf_chunk_t* chunk;
  2112. ulint i;
  2113. /* TODO: protect buf_pool->chunks with a mutex (it will
  2114. currently remain constant after buf_pool_init()) */
  2115. for (chunk = buf_pool->chunks, i = buf_pool->n_chunks; i--; chunk++) {
  2116. lint offs = ptr - chunk->blocks->frame;
  2117. if (UNIV_UNLIKELY(offs < 0)) {
  2118. continue;
  2119. }
  2120. offs >>= UNIV_PAGE_SIZE_SHIFT;
  2121. if (UNIV_LIKELY((ulint) offs < chunk->size)) {
  2122. buf_block_t* block = &chunk->blocks[offs];
  2123. /* The function buf_chunk_init() invokes
  2124. buf_block_init() so that block[n].frame ==
  2125. block->frame + n * UNIV_PAGE_SIZE. Check it. */
  2126. ut_ad(block->frame == page_align(ptr));
  2127. #ifdef UNIV_DEBUG
  2128. /* A thread that updates these fields must
  2129. hold buf_pool_mutex and block->mutex. Acquire
  2130. only the latter. */
  2131. mutex_enter(&block->mutex);
  2132. switch (buf_block_get_state(block)) {
  2133. case BUF_BLOCK_ZIP_FREE:
  2134. case BUF_BLOCK_ZIP_PAGE:
  2135. case BUF_BLOCK_ZIP_DIRTY:
  2136. /* These types should only be used in
  2137. the compressed buffer pool, whose
  2138. memory is allocated from
  2139. buf_pool->chunks, in UNIV_PAGE_SIZE
  2140. blocks flagged as BUF_BLOCK_MEMORY. */
  2141. ut_error;
  2142. break;
  2143. case BUF_BLOCK_NOT_USED:
  2144. case BUF_BLOCK_READY_FOR_USE:
  2145. case BUF_BLOCK_MEMORY:
  2146. /* Some data structures contain
  2147. "guess" pointers to file pages. The
  2148. file pages may have been freed and
  2149. reused. Do not complain. */
  2150. break;
  2151. case BUF_BLOCK_REMOVE_HASH:
  2152. /* buf_LRU_block_remove_hashed_page()
  2153. will overwrite the FIL_PAGE_OFFSET and
  2154. FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID with
  2155. 0xff and set the state to
  2156. BUF_BLOCK_REMOVE_HASH. */
  2157. ut_ad(page_get_space_id(page_align(ptr))
  2158. == 0xffffffff);
  2159. ut_ad(page_get_page_no(page_align(ptr))
  2160. == 0xffffffff);
  2161. break;
  2162. case BUF_BLOCK_FILE_PAGE:
  2163. ut_ad(block->page.space
  2164. == page_get_space_id(page_align(ptr)));
  2165. ut_ad(block->page.offset
  2166. == page_get_page_no(page_align(ptr)));
  2167. break;
  2168. }
  2169. mutex_exit(&block->mutex);
  2170. #endif /* UNIV_DEBUG */
  2171. return(block);
  2172. }
  2173. }
  2174. /* The block should always be found. */
  2175. ut_error;
  2176. return(NULL);
  2177. }
  2178. /********************************************************************//**
  2179. Find out if a pointer belongs to a buf_block_t. It can be a pointer to
  2180. the buf_block_t itself or a member of it
  2181. @return TRUE if ptr belongs to a buf_block_t struct */
  2182. UNIV_INTERN
  2183. ibool
  2184. buf_pointer_is_block_field(
  2185. /*=======================*/
  2186. const void* ptr) /*!< in: pointer not
  2187. dereferenced */
  2188. {
  2189. const buf_chunk_t* chunk = buf_pool->chunks;
  2190. const buf_chunk_t* const echunk = chunk + buf_pool->n_chunks;
  2191. /* TODO: protect buf_pool->chunks with a mutex (it will
  2192. currently remain constant after buf_pool_init()) */
  2193. while (chunk < echunk) {
  2194. if (ptr >= (void *)chunk->blocks
  2195. && ptr < (void *)(chunk->blocks + chunk->size)) {
  2196. return(TRUE);
  2197. }
  2198. chunk++;
  2199. }
  2200. return(FALSE);
  2201. }
  2202. /********************************************************************//**
  2203. Find out if a buffer block was created by buf_chunk_init().
  2204. @return TRUE if "block" has been added to buf_pool->free by buf_chunk_init() */
  2205. static
  2206. ibool
  2207. buf_block_is_uncompressed(
  2208. /*======================*/
  2209. const buf_block_t* block) /*!< in: pointer to block,
  2210. not dereferenced */
  2211. {
  2212. //ut_ad(buf_pool_mutex_own());
  2213. if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) {
  2214. /* The pointer should be aligned. */
  2215. return(FALSE);
  2216. }
  2217. return(buf_pointer_is_block_field((void *)block));
  2218. }
  2219. /********************************************************************//**
  2220. This is the general function used to get access to a database page.
  2221. @return pointer to the block or NULL */
  2222. UNIV_INTERN
  2223. buf_block_t*
  2224. buf_page_get_gen(
  2225. /*=============*/
  2226. ulint space, /*!< in: space id */
  2227. ulint zip_size,/*!< in: compressed page size in bytes
  2228. or 0 for uncompressed pages */
  2229. ulint offset, /*!< in: page number */
  2230. ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
  2231. buf_block_t* guess, /*!< in: guessed block or NULL */
  2232. ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
  2233. BUF_GET_NO_LATCH */
  2234. const char* file, /*!< in: file name */
  2235. ulint line, /*!< in: line where called */
  2236. mtr_t* mtr) /*!< in: mini-transaction */
  2237. {
  2238. buf_block_t* block;
  2239. unsigned access_time;
  2240. ulint fix_type;
  2241. ibool must_read;
  2242. ulint retries = 0;
  2243. mutex_t* block_mutex= 0;
  2244. trx_t* trx = NULL;
  2245. ulint sec;
  2246. ulint ms;
  2247. ib_uint64_t start_time;
  2248. ib_uint64_t finish_time;
  2249. ut_ad(mtr);
  2250. ut_ad(mtr->state == MTR_ACTIVE);
  2251. ut_ad((rw_latch == RW_S_LATCH)
  2252. || (rw_latch == RW_X_LATCH)
  2253. || (rw_latch == RW_NO_LATCH));
  2254. ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH));
  2255. ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL)
  2256. || (mode == BUF_GET_NO_LATCH));
  2257. ut_ad(zip_size == fil_space_get_zip_size(space));
  2258. ut_ad(ut_is_2pow(zip_size));
  2259. #ifndef UNIV_LOG_DEBUG
  2260. ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset, NULL));
  2261. #endif
  2262. if (innobase_get_slow_log()) {
  2263. trx = innobase_get_trx();
  2264. }
  2265. buf_pool->stat.n_page_gets++;
  2266. loop:
  2267. block = guess;
  2268. //buf_pool_mutex_enter();
  2269. if (block) {
  2270. block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
  2271. /* If the guess is a compressed page descriptor that
  2272. has been allocated by buf_buddy_alloc(), it may have
  2273. been invalidated by buf_buddy_relocate(). In that
  2274. case, block could point to something that happens to
  2275. contain the expected bits in block->page. Similarly,
  2276. the guess may be pointing to a buffer pool chunk that
  2277. has been released when resizing the buffer pool. */
  2278. if (!block_mutex) {
  2279. block = guess = NULL;
  2280. } else if (!buf_block_is_uncompressed(block)
  2281. || offset != block->page.offset
  2282. || space != block->page.space
  2283. || buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE) {
  2284. mutex_exit(block_mutex);
  2285. block = guess = NULL;
  2286. } else {
  2287. ut_ad(!block->page.in_zip_hash);
  2288. ut_ad(block->page.in_page_hash);
  2289. }
  2290. }
  2291. if (block == NULL) {
  2292. rw_lock_s_lock(&page_hash_latch);
  2293. block = (buf_block_t*) buf_page_hash_get(space, offset);
  2294. if (block) {
  2295. block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
  2296. ut_a(block_mutex);
  2297. }
  2298. rw_lock_s_unlock(&page_hash_latch);
  2299. }
  2300. loop2:
  2301. if (block == NULL) {
  2302. /* Page not in buf_pool: needs to be read from file */
  2303. //buf_pool_mutex_exit();
  2304. if (mode == BUF_GET_IF_IN_POOL) {
  2305. return(NULL);
  2306. }
  2307. if (buf_read_page(space, zip_size, offset, trx)) {
  2308. retries = 0;
  2309. } else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
  2310. ++retries;
  2311. } else {
  2312. fprintf(stderr, "InnoDB: Error: Unable"
  2313. " to read tablespace %lu page no"
  2314. " %lu into the buffer pool after"
  2315. " %lu attempts\n"
  2316. "InnoDB: The most probable cause"
  2317. " of this error may be that the"
  2318. " table has been corrupted.\n"
  2319. "InnoDB: You can try to fix this"
  2320. " problem by using"
  2321. " innodb_force_recovery.\n"
  2322. "InnoDB: Please see reference manual"
  2323. " for more details.\n"
  2324. "InnoDB: Aborting...\n",
  2325. space, offset,
  2326. BUF_PAGE_READ_MAX_RETRIES);
  2327. ut_error;
  2328. }
  2329. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  2330. ut_a(++buf_dbg_counter % 37 || buf_validate());
  2331. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  2332. goto loop;
  2333. }
  2334. ut_ad(page_zip_get_size(&block->page.zip) == zip_size);
  2335. must_read = buf_block_get_io_fix(block) == BUF_IO_READ;
  2336. if (must_read && mode == BUF_GET_IF_IN_POOL) {
  2337. /* The page is only being read to buffer */
  2338. //buf_pool_mutex_exit();
  2339. mutex_exit(block_mutex);
  2340. return(NULL);
  2341. }
  2342. if (srv_pass_corrupt_table) {
  2343. if (block->page.is_corrupt) {
  2344. mutex_exit(block_mutex);
  2345. return(NULL);
  2346. }
  2347. }
  2348. ut_a(!(block->page.is_corrupt));
  2349. switch (buf_block_get_state(block)) {
  2350. buf_page_t* bpage;
  2351. ibool success;
  2352. case BUF_BLOCK_FILE_PAGE:
  2353. if (block_mutex == &buf_pool_zip_mutex) {
  2354. /* it is wrong mutex... */
  2355. mutex_exit(block_mutex);
  2356. goto loop;
  2357. }
  2358. break;
  2359. case BUF_BLOCK_ZIP_PAGE:
  2360. case BUF_BLOCK_ZIP_DIRTY:
  2361. ut_ad(block_mutex == &buf_pool_zip_mutex);
  2362. bpage = &block->page;
  2363. /* Protect bpage->buf_fix_count. */
  2364. /* Already proteced here. */
  2365. //mutex_enter(&buf_pool_zip_mutex);
  2366. if (bpage->buf_fix_count
  2367. || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
  2368. /* This condition often occurs when the buffer
  2369. is not buffer-fixed, but I/O-fixed by
  2370. buf_page_init_for_read(). */
  2371. //mutex_exit(&buf_pool_zip_mutex);
  2372. wait_until_unfixed:
  2373. /* The block is buffer-fixed or I/O-fixed.
  2374. Try again later. */
  2375. //buf_pool_mutex_exit();
  2376. mutex_exit(block_mutex);
  2377. os_thread_sleep(WAIT_FOR_READ);
  2378. goto loop;
  2379. }
  2380. /* Allocate an uncompressed page. */
  2381. //buf_pool_mutex_exit();
  2382. //mutex_exit(&buf_pool_zip_mutex);
  2383. mutex_exit(block_mutex);
  2384. block = buf_LRU_get_free_block();
  2385. ut_a(block);
  2386. block_mutex = &block->mutex;
  2387. //buf_pool_mutex_enter();
  2388. mutex_enter(&LRU_list_mutex);
  2389. rw_lock_x_lock(&page_hash_latch);
  2390. mutex_enter(block_mutex);
  2391. {
  2392. buf_page_t* hash_bpage
  2393. = buf_page_hash_get(space, offset);
  2394. if (UNIV_UNLIKELY(bpage != hash_bpage)) {
  2395. /* The buf_pool->page_hash was modified
  2396. while buf_pool_mutex was released.
  2397. Free the block that was allocated. */
  2398. buf_LRU_block_free_non_file_page(block, TRUE);
  2399. mutex_exit(block_mutex);
  2400. block = (buf_block_t*) hash_bpage;
  2401. if (block) {
  2402. block_mutex = buf_page_get_mutex_enter((buf_page_t*)block);
  2403. ut_a(block_mutex);
  2404. }
  2405. rw_lock_x_unlock(&page_hash_latch);
  2406. mutex_exit(&LRU_list_mutex);
  2407. goto loop2;
  2408. }
  2409. }
  2410. mutex_enter(&buf_pool_zip_mutex);
  2411. if (UNIV_UNLIKELY
  2412. (bpage->buf_fix_count
  2413. || buf_page_get_io_fix(bpage) != BUF_IO_NONE)) {
  2414. mutex_exit(&buf_pool_zip_mutex);
  2415. /* The block was buffer-fixed or I/O-fixed
  2416. while buf_pool_mutex was not held by this thread.
  2417. Free the block that was allocated and try again.
  2418. This should be extremely unlikely. */
  2419. buf_LRU_block_free_non_file_page(block, TRUE);
  2420. //mutex_exit(&block->mutex);
  2421. rw_lock_x_unlock(&page_hash_latch);
  2422. mutex_exit(&LRU_list_mutex);
  2423. goto wait_until_unfixed;
  2424. }
  2425. /* Move the compressed page from bpage to block,
  2426. and uncompress it. */
  2427. mutex_enter(&flush_list_mutex);
  2428. buf_relocate(bpage, &block->page);
  2429. rw_lock_x_unlock(&page_hash_latch);
  2430. buf_block_init_low(block);
  2431. block->lock_hash_val = lock_rec_hash(space, offset);
  2432. UNIV_MEM_DESC(&block->page.zip.data,
  2433. page_zip_get_size(&block->page.zip), block);
  2434. if (buf_page_get_state(&block->page)
  2435. == BUF_BLOCK_ZIP_PAGE) {
  2436. UT_LIST_REMOVE(zip_list, buf_pool->zip_clean,
  2437. &block->page);
  2438. ut_ad(!block->page.in_flush_list);
  2439. } else {
  2440. /* Relocate buf_pool->flush_list. */
  2441. buf_flush_relocate_on_flush_list(bpage,
  2442. &block->page);
  2443. }
  2444. mutex_exit(&flush_list_mutex);
  2445. /* Buffer-fix, I/O-fix, and X-latch the block
  2446. for the duration of the decompression.
  2447. Also add the block to the unzip_LRU list. */
  2448. block->page.state = BUF_BLOCK_FILE_PAGE;
  2449. /* Insert at the front of unzip_LRU list */
  2450. buf_unzip_LRU_add_block(block, FALSE);
  2451. mutex_exit(&LRU_list_mutex);
  2452. block->page.buf_fix_count = 1;
  2453. buf_block_set_io_fix(block, BUF_IO_READ);
  2454. rw_lock_x_lock_func(&block->lock, 0, file, line);
  2455. UNIV_MEM_INVALID(bpage, sizeof *bpage);
  2456. mutex_exit(block_mutex);
  2457. mutex_exit(&buf_pool_zip_mutex);
  2458. mutex_enter(&buf_pool_mutex);
  2459. buf_pool->n_pend_unzip++;
  2460. mutex_exit(&buf_pool_mutex);
  2461. buf_buddy_free(bpage, sizeof *bpage, FALSE);
  2462. //buf_pool_mutex_exit();
  2463. /* Decompress the page and apply buffered operations
  2464. while not holding buf_pool_mutex or block->mutex. */
  2465. success = buf_zip_decompress(block, srv_use_checksums);
  2466. ut_a(success);
  2467. if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
  2468. ibuf_merge_or_delete_for_page(block, space, offset,
  2469. zip_size, TRUE);
  2470. }
  2471. /* Unfix and unlatch the block. */
  2472. //buf_pool_mutex_enter();
  2473. block_mutex = &block->mutex;
  2474. mutex_enter(block_mutex);
  2475. block->page.buf_fix_count--;
  2476. buf_block_set_io_fix(block, BUF_IO_NONE);
  2477. mutex_enter(&buf_pool_mutex);
  2478. buf_pool->n_pend_unzip--;
  2479. mutex_exit(&buf_pool_mutex);
  2480. rw_lock_x_unlock(&block->lock);
  2481. break;
  2482. case BUF_BLOCK_ZIP_FREE:
  2483. case BUF_BLOCK_NOT_USED:
  2484. case BUF_BLOCK_READY_FOR_USE:
  2485. case BUF_BLOCK_MEMORY:
  2486. case BUF_BLOCK_REMOVE_HASH:
  2487. ut_error;
  2488. break;
  2489. }
  2490. ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
  2491. //mutex_enter(&block->mutex);
  2492. #if UNIV_WORD_SIZE == 4
  2493. /* On 32-bit systems, there is no padding in buf_page_t. On
  2494. other systems, Valgrind could complain about uninitialized pad
  2495. bytes. */
  2496. UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page);
  2497. #endif
  2498. #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
  2499. if (mode == BUF_GET_IF_IN_POOL && ibuf_debug) {
  2500. /* Try to evict the block from the buffer pool, to use the
  2501. insert buffer as much as possible. */
  2502. if (buf_LRU_free_block(&block->page, TRUE, FALSE) == BUF_LRU_FREED) {
  2503. buf_pool_mutex_exit();
  2504. mutex_exit(&block->mutex);
  2505. fprintf(stderr,
  2506. "innodb_change_buffering_debug evict %u %u\n",
  2507. (unsigned) space, (unsigned) offset);
  2508. return(NULL);
  2509. } else if (buf_flush_page_try(block)) {
  2510. fprintf(stderr,
  2511. "innodb_change_buffering_debug flush %u %u\n",
  2512. (unsigned) space, (unsigned) offset);
  2513. guess = block;
  2514. goto loop;
  2515. }
  2516. /* Failed to evict the page; change it directly */
  2517. }
  2518. #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
  2519. buf_block_buf_fix_inc(block, file, line);
  2520. //mutex_exit(&block->mutex);
  2521. /* Check if this is the first access to the page */
  2522. access_time = buf_page_is_accessed(&block->page);
  2523. //buf_pool_mutex_exit();
  2524. mutex_exit(block_mutex);
  2525. buf_page_set_accessed_make_young(&block->page, access_time);
  2526. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  2527. ut_a(!block->page.file_page_was_freed);
  2528. #endif
  2529. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  2530. ut_a(++buf_dbg_counter % 5771 || buf_validate());
  2531. ut_a(block->page.buf_fix_count > 0);
  2532. ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
  2533. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  2534. switch (rw_latch) {
  2535. case RW_NO_LATCH:
  2536. if (must_read) {
  2537. /* Let us wait until the read operation
  2538. completes */
  2539. if (innobase_get_slow_log() && trx && trx->take_stats)
  2540. {
  2541. ut_usectime(&sec, &ms);
  2542. start_time = (ib_uint64_t)sec * 1000000 + ms;
  2543. } else {
  2544. start_time = 0;
  2545. }
  2546. for (;;) {
  2547. enum buf_io_fix io_fix;
  2548. mutex_enter(&block->mutex);
  2549. io_fix = buf_block_get_io_fix(block);
  2550. mutex_exit(&block->mutex);
  2551. if (io_fix == BUF_IO_READ) {
  2552. os_thread_sleep(WAIT_FOR_READ);
  2553. } else {
  2554. break;
  2555. }
  2556. }
  2557. if (innobase_get_slow_log() && trx && trx->take_stats && start_time)
  2558. {
  2559. ut_usectime(&sec, &ms);
  2560. finish_time = (ib_uint64_t)sec * 1000000 + ms;
  2561. trx->io_reads_wait_timer += (ulint)(finish_time - start_time);
  2562. }
  2563. }
  2564. fix_type = MTR_MEMO_BUF_FIX;
  2565. break;
  2566. case RW_S_LATCH:
  2567. rw_lock_s_lock_func(&(block->lock), 0, file, line);
  2568. fix_type = MTR_MEMO_PAGE_S_FIX;
  2569. break;
  2570. default:
  2571. ut_ad(rw_latch == RW_X_LATCH);
  2572. rw_lock_x_lock_func(&(block->lock), 0, file, line);
  2573. fix_type = MTR_MEMO_PAGE_X_FIX;
  2574. break;
  2575. }
  2576. mtr_memo_push(mtr, block, fix_type);
  2577. if (!access_time) {
  2578. /* In the case of a first access, try to apply linear
  2579. read-ahead */
  2580. buf_read_ahead_linear(space, zip_size, offset, trx);
  2581. }
  2582. #ifdef UNIV_IBUF_COUNT_DEBUG
  2583. ut_a(ibuf_count_get(buf_block_get_space(block),
  2584. buf_block_get_page_no(block)) == 0);
  2585. #endif
  2586. if (innobase_get_slow_log()) {
  2587. _increment_page_get_statistics(block, trx);
  2588. }
  2589. return(block);
  2590. }
  2591. /********************************************************************//**
  2592. This is the general function used to get optimistic access to a database
  2593. page.
  2594. @return TRUE if success */
  2595. UNIV_INTERN
  2596. ibool
  2597. buf_page_optimistic_get(
  2598. /*====================*/
  2599. ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
  2600. buf_block_t* block, /*!< in: guessed buffer block */
  2601. ib_uint64_t modify_clock,/*!< in: modify clock value if mode is
  2602. ..._GUESS_ON_CLOCK */
  2603. const char* file, /*!< in: file name */
  2604. ulint line, /*!< in: line where called */
  2605. mtr_t* mtr) /*!< in: mini-transaction */
  2606. {
  2607. unsigned access_time;
  2608. ibool success;
  2609. ulint fix_type;
  2610. trx_t* trx = NULL;
  2611. ut_ad(block);
  2612. ut_ad(mtr);
  2613. ut_ad(mtr->state == MTR_ACTIVE);
  2614. ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
  2615. mutex_enter(&block->mutex);
  2616. if (UNIV_UNLIKELY(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE)) {
  2617. mutex_exit(&block->mutex);
  2618. return(FALSE);
  2619. }
  2620. buf_block_buf_fix_inc(block, file, line);
  2621. mutex_exit(&block->mutex);
  2622. /* Check if this is the first access to the page.
  2623. We do a dirty read on purpose, to avoid mutex contention.
  2624. This field is only used for heuristic purposes; it does not
  2625. affect correctness. */
  2626. access_time = buf_page_is_accessed(&block->page);
  2627. buf_page_set_accessed_make_young(&block->page, access_time);
  2628. ut_ad(!ibuf_inside()
  2629. || ibuf_page(buf_block_get_space(block),
  2630. buf_block_get_zip_size(block),
  2631. buf_block_get_page_no(block), NULL));
  2632. if (rw_latch == RW_S_LATCH) {
  2633. success = rw_lock_s_lock_nowait(&(block->lock),
  2634. file, line);
  2635. fix_type = MTR_MEMO_PAGE_S_FIX;
  2636. } else {
  2637. success = rw_lock_x_lock_func_nowait(&(block->lock),
  2638. file, line);
  2639. fix_type = MTR_MEMO_PAGE_X_FIX;
  2640. }
  2641. if (UNIV_UNLIKELY(!success)) {
  2642. mutex_enter(&block->mutex);
  2643. buf_block_buf_fix_dec(block);
  2644. mutex_exit(&block->mutex);
  2645. return(FALSE);
  2646. }
  2647. if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
  2648. buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
  2649. if (rw_latch == RW_S_LATCH) {
  2650. rw_lock_s_unlock(&(block->lock));
  2651. } else {
  2652. rw_lock_x_unlock(&(block->lock));
  2653. }
  2654. mutex_enter(&block->mutex);
  2655. buf_block_buf_fix_dec(block);
  2656. mutex_exit(&block->mutex);
  2657. return(FALSE);
  2658. }
  2659. mtr_memo_push(mtr, block, fix_type);
  2660. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  2661. ut_a(++buf_dbg_counter % 5771 || buf_validate());
  2662. ut_a(block->page.buf_fix_count > 0);
  2663. ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
  2664. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  2665. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  2666. ut_a(block->page.file_page_was_freed == FALSE);
  2667. #endif
  2668. if (innobase_get_slow_log()) {
  2669. trx = innobase_get_trx();
  2670. }
  2671. if (UNIV_UNLIKELY(!access_time)) {
  2672. /* In the case of a first access, try to apply linear
  2673. read-ahead */
  2674. buf_read_ahead_linear(buf_block_get_space(block),
  2675. buf_block_get_zip_size(block),
  2676. buf_block_get_page_no(block), trx);
  2677. }
  2678. #ifdef UNIV_IBUF_COUNT_DEBUG
  2679. ut_a(ibuf_count_get(buf_block_get_space(block),
  2680. buf_block_get_page_no(block)) == 0);
  2681. #endif
  2682. buf_pool->stat.n_page_gets++;
  2683. if (innobase_get_slow_log()) {
  2684. _increment_page_get_statistics(block, trx);
  2685. }
  2686. return(TRUE);
  2687. }
  2688. /********************************************************************//**
  2689. This is used to get access to a known database page, when no waiting can be
  2690. done. For example, if a search in an adaptive hash index leads us to this
  2691. frame.
  2692. @return TRUE if success */
  2693. UNIV_INTERN
  2694. ibool
  2695. buf_page_get_known_nowait(
  2696. /*======================*/
  2697. ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
  2698. buf_block_t* block, /*!< in: the known page */
  2699. ulint mode, /*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
  2700. const char* file, /*!< in: file name */
  2701. ulint line, /*!< in: line where called */
  2702. mtr_t* mtr) /*!< in: mini-transaction */
  2703. {
  2704. ibool success;
  2705. ulint fix_type;
  2706. trx_t* trx = NULL;
  2707. ut_ad(mtr);
  2708. ut_ad(mtr->state == MTR_ACTIVE);
  2709. ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
  2710. mutex_enter(&block->mutex);
  2711. if (buf_block_get_state(block) == BUF_BLOCK_REMOVE_HASH) {
  2712. /* Another thread is just freeing the block from the LRU list
  2713. of the buffer pool: do not try to access this page; this
  2714. attempt to access the page can only come through the hash
  2715. index because when the buffer block state is ..._REMOVE_HASH,
  2716. we have already removed it from the page address hash table
  2717. of the buffer pool. */
  2718. mutex_exit(&block->mutex);
  2719. return(FALSE);
  2720. }
  2721. ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
  2722. buf_block_buf_fix_inc(block, file, line);
  2723. mutex_exit(&block->mutex);
  2724. if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
  2725. //buf_pool_mutex_enter();
  2726. mutex_enter(&LRU_list_mutex);
  2727. buf_LRU_make_block_young(&block->page);
  2728. //buf_pool_mutex_exit();
  2729. mutex_exit(&LRU_list_mutex);
  2730. } else if (!buf_page_is_accessed(&block->page)) {
  2731. /* Above, we do a dirty read on purpose, to avoid
  2732. mutex contention. The field buf_page_t::access_time
  2733. is only used for heuristic purposes. Writes to the
  2734. field must be protected by mutex, however. */
  2735. ulint time_ms = ut_time_ms();
  2736. //buf_pool_mutex_enter();
  2737. mutex_enter(&block->mutex);
  2738. buf_page_set_accessed(&block->page, time_ms);
  2739. //buf_pool_mutex_exit();
  2740. mutex_exit(&block->mutex);
  2741. }
  2742. ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
  2743. if (rw_latch == RW_S_LATCH) {
  2744. success = rw_lock_s_lock_nowait(&(block->lock),
  2745. file, line);
  2746. fix_type = MTR_MEMO_PAGE_S_FIX;
  2747. } else {
  2748. success = rw_lock_x_lock_func_nowait(&(block->lock),
  2749. file, line);
  2750. fix_type = MTR_MEMO_PAGE_X_FIX;
  2751. }
  2752. if (!success) {
  2753. mutex_enter(&block->mutex);
  2754. buf_block_buf_fix_dec(block);
  2755. mutex_exit(&block->mutex);
  2756. return(FALSE);
  2757. }
  2758. mtr_memo_push(mtr, block, fix_type);
  2759. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  2760. ut_a(++buf_dbg_counter % 5771 || buf_validate());
  2761. ut_a(block->page.buf_fix_count > 0);
  2762. ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
  2763. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  2764. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  2765. ut_a(block->page.file_page_was_freed == FALSE);
  2766. #endif
  2767. #ifdef UNIV_IBUF_COUNT_DEBUG
  2768. ut_a((mode == BUF_KEEP_OLD)
  2769. || (ibuf_count_get(buf_block_get_space(block),
  2770. buf_block_get_page_no(block)) == 0));
  2771. #endif
  2772. buf_pool->stat.n_page_gets++;
  2773. if (innobase_get_slow_log()) {
  2774. trx = innobase_get_trx();
  2775. _increment_page_get_statistics(block, trx);
  2776. }
  2777. return(TRUE);
  2778. }
  2779. /*******************************************************************//**
  2780. Given a tablespace id and page number tries to get that page. If the
  2781. page is not in the buffer pool it is not loaded and NULL is returned.
  2782. Suitable for using when holding the kernel mutex.
  2783. @return pointer to a page or NULL */
  2784. UNIV_INTERN
  2785. const buf_block_t*
  2786. buf_page_try_get_func(
  2787. /*==================*/
  2788. ulint space_id,/*!< in: tablespace id */
  2789. ulint page_no,/*!< in: page number */
  2790. const char* file, /*!< in: file name */
  2791. ulint line, /*!< in: line where called */
  2792. mtr_t* mtr) /*!< in: mini-transaction */
  2793. {
  2794. buf_block_t* block;
  2795. ibool success;
  2796. ulint fix_type;
  2797. ut_ad(mtr);
  2798. ut_ad(mtr->state == MTR_ACTIVE);
  2799. //buf_pool_mutex_enter();
  2800. rw_lock_s_lock(&page_hash_latch);
  2801. block = buf_block_hash_get(space_id, page_no);
  2802. if (!block) {
  2803. //buf_pool_mutex_exit();
  2804. rw_lock_s_unlock(&page_hash_latch);
  2805. return(NULL);
  2806. }
  2807. mutex_enter(&block->mutex);
  2808. //buf_pool_mutex_exit();
  2809. rw_lock_s_unlock(&page_hash_latch);
  2810. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  2811. ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
  2812. ut_a(buf_block_get_space(block) == space_id);
  2813. ut_a(buf_block_get_page_no(block) == page_no);
  2814. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  2815. buf_block_buf_fix_inc(block, file, line);
  2816. mutex_exit(&block->mutex);
  2817. fix_type = MTR_MEMO_PAGE_S_FIX;
  2818. success = rw_lock_s_lock_nowait(&block->lock, file, line);
  2819. if (!success) {
  2820. /* Let us try to get an X-latch. If the current thread
  2821. is holding an X-latch on the page, we cannot get an
  2822. S-latch. */
  2823. fix_type = MTR_MEMO_PAGE_X_FIX;
  2824. success = rw_lock_x_lock_func_nowait(&block->lock,
  2825. file, line);
  2826. }
  2827. if (!success) {
  2828. mutex_enter(&block->mutex);
  2829. buf_block_buf_fix_dec(block);
  2830. mutex_exit(&block->mutex);
  2831. return(NULL);
  2832. }
  2833. mtr_memo_push(mtr, block, fix_type);
  2834. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  2835. ut_a(++buf_dbg_counter % 5771 || buf_validate());
  2836. ut_a(block->page.buf_fix_count > 0);
  2837. ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
  2838. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  2839. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  2840. ut_a(block->page.file_page_was_freed == FALSE);
  2841. #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
  2842. buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
  2843. buf_pool->stat.n_page_gets++;
  2844. #ifdef UNIV_IBUF_COUNT_DEBUG
  2845. ut_a(ibuf_count_get(buf_block_get_space(block),
  2846. buf_block_get_page_no(block)) == 0);
  2847. #endif
  2848. return(block);
  2849. }
  2850. /********************************************************************//**
  2851. Initialize some fields of a control block. */
  2852. UNIV_INLINE
  2853. void
  2854. buf_page_init_low(
  2855. /*==============*/
  2856. buf_page_t* bpage) /*!< in: block to init */
  2857. {
  2858. bpage->flush_type = BUF_FLUSH_LRU;
  2859. bpage->io_fix = BUF_IO_NONE;
  2860. bpage->buf_fix_count = 0;
  2861. bpage->freed_page_clock = 0;
  2862. bpage->access_time = 0;
  2863. bpage->newest_modification = 0;
  2864. bpage->oldest_modification = 0;
  2865. HASH_INVALIDATE(bpage, hash);
  2866. bpage->is_corrupt = FALSE;
  2867. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  2868. bpage->file_page_was_freed = FALSE;
  2869. #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
  2870. }
  2871. /********************************************************************//**
  2872. Inits a page to the buffer buf_pool. */
  2873. static
  2874. void
  2875. buf_page_init(
  2876. /*==========*/
  2877. ulint space, /*!< in: space id */
  2878. ulint offset, /*!< in: offset of the page within space
  2879. in units of a page */
  2880. buf_block_t* block) /*!< in: block to init */
  2881. {
  2882. buf_page_t* hash_page;
  2883. //ut_ad(buf_pool_mutex_own());
  2884. #ifdef UNIV_SYNC_DEBUG
  2885. ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX));
  2886. #endif
  2887. ut_ad(mutex_own(&(block->mutex)));
  2888. ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
  2889. /* Set the state of the block */
  2890. buf_block_set_file_page(block, space, offset);
  2891. #ifdef UNIV_DEBUG_VALGRIND
  2892. if (!space) {
  2893. /* Silence valid Valgrind warnings about uninitialized
  2894. data being written to data files. There are some unused
  2895. bytes on some pages that InnoDB does not initialize. */
  2896. UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
  2897. }
  2898. #endif /* UNIV_DEBUG_VALGRIND */
  2899. buf_block_init_low(block);
  2900. block->lock_hash_val = lock_rec_hash(space, offset);
  2901. /* Insert into the hash table of file pages */
  2902. hash_page = buf_page_hash_get(space, offset);
  2903. if (UNIV_LIKELY_NULL(hash_page)) {
  2904. fprintf(stderr,
  2905. "InnoDB: Error: page %lu %lu already found"
  2906. " in the hash table: %p, %p\n",
  2907. (ulong) space,
  2908. (ulong) offset,
  2909. (const void*) hash_page, (const void*) block);
  2910. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  2911. mutex_exit(&block->mutex);
  2912. //buf_pool_mutex_exit();
  2913. rw_lock_x_unlock(&page_hash_latch);
  2914. buf_print();
  2915. buf_LRU_print();
  2916. buf_validate();
  2917. buf_LRU_validate();
  2918. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  2919. ut_error;
  2920. }
  2921. buf_page_init_low(&block->page);
  2922. ut_ad(!block->page.in_zip_hash);
  2923. ut_ad(!block->page.in_page_hash);
  2924. ut_d(block->page.in_page_hash = TRUE);
  2925. HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
  2926. buf_page_address_fold(space, offset), &block->page);
  2927. }
  2928. /********************************************************************//**
  2929. Function which inits a page for read to the buffer buf_pool. If the page is
  2930. (1) already in buf_pool, or
  2931. (2) if we specify to read only ibuf pages and the page is not an ibuf page, or
  2932. (3) if the space is deleted or being deleted,
  2933. then this function does nothing.
  2934. Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
  2935. on the buffer frame. The io-handler must take care that the flag is cleared
  2936. and the lock released later.
  2937. @return pointer to the block or NULL */
  2938. UNIV_INTERN
  2939. buf_page_t*
  2940. buf_page_init_for_read(
  2941. /*===================*/
  2942. ulint* err, /*!< out: DB_SUCCESS or DB_TABLESPACE_DELETED */
  2943. ulint mode, /*!< in: BUF_READ_IBUF_PAGES_ONLY, ... */
  2944. ulint space, /*!< in: space id */
  2945. ulint zip_size,/*!< in: compressed page size, or 0 */
  2946. ibool unzip, /*!< in: TRUE=request uncompressed page */
  2947. ib_int64_t tablespace_version,/*!< in: prevents reading from a wrong
  2948. version of the tablespace in case we have done
  2949. DISCARD + IMPORT */
  2950. ulint offset) /*!< in: page number */
  2951. {
  2952. buf_block_t* block;
  2953. buf_page_t* bpage;
  2954. mtr_t mtr;
  2955. ibool lru = FALSE;
  2956. void* data;
  2957. ut_ad(buf_pool);
  2958. *err = DB_SUCCESS;
  2959. if (mode == BUF_READ_IBUF_PAGES_ONLY) {
  2960. /* It is a read-ahead within an ibuf routine */
  2961. ut_ad(!ibuf_bitmap_page(zip_size, offset));
  2962. ut_ad(ibuf_inside());
  2963. mtr_start(&mtr);
  2964. if (!recv_no_ibuf_operations
  2965. && !ibuf_page(space, zip_size, offset, &mtr)) {
  2966. mtr_commit(&mtr);
  2967. return(NULL);
  2968. }
  2969. } else {
  2970. ut_ad(mode == BUF_READ_ANY_PAGE);
  2971. }
  2972. if (zip_size && UNIV_LIKELY(!unzip)
  2973. && UNIV_LIKELY(!recv_recovery_is_on())) {
  2974. block = NULL;
  2975. } else {
  2976. block = buf_LRU_get_free_block();
  2977. ut_ad(block);
  2978. }
  2979. //buf_pool_mutex_enter();
  2980. mutex_enter(&LRU_list_mutex);
  2981. rw_lock_x_lock(&page_hash_latch);
  2982. if (buf_page_hash_get(space, offset)) {
  2983. /* The page is already in the buffer pool. */
  2984. err_exit:
  2985. if (block) {
  2986. mutex_enter(&block->mutex);
  2987. mutex_exit(&LRU_list_mutex);
  2988. rw_lock_x_unlock(&page_hash_latch);
  2989. buf_LRU_block_free_non_file_page(block, FALSE);
  2990. mutex_exit(&block->mutex);
  2991. }
  2992. else {
  2993. mutex_exit(&LRU_list_mutex);
  2994. rw_lock_x_unlock(&page_hash_latch);
  2995. }
  2996. bpage = NULL;
  2997. goto func_exit;
  2998. }
  2999. if (fil_tablespace_deleted_or_being_deleted_in_mem(
  3000. space, tablespace_version)) {
  3001. /* The page belongs to a space which has been
  3002. deleted or is being deleted. */
  3003. *err = DB_TABLESPACE_DELETED;
  3004. goto err_exit;
  3005. }
  3006. if (block) {
  3007. bpage = &block->page;
  3008. mutex_enter(&block->mutex);
  3009. buf_page_init(space, offset, block);
  3010. rw_lock_x_unlock(&page_hash_latch);
  3011. /* The block must be put to the LRU list, to the old blocks */
  3012. buf_LRU_add_block(bpage, TRUE/* to old blocks */);
  3013. /* We set a pass-type x-lock on the frame because then
  3014. the same thread which called for the read operation
  3015. (and is running now at this point of code) can wait
  3016. for the read to complete by waiting for the x-lock on
  3017. the frame; if the x-lock were recursive, the same
  3018. thread would illegally get the x-lock before the page
  3019. read is completed. The x-lock is cleared by the
  3020. io-handler thread. */
  3021. rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
  3022. buf_page_set_io_fix(bpage, BUF_IO_READ);
  3023. if (UNIV_UNLIKELY(zip_size)) {
  3024. page_zip_set_size(&block->page.zip, zip_size);
  3025. /* buf_pool_mutex may be released and
  3026. reacquired by buf_buddy_alloc(). Thus, we
  3027. must release block->mutex in order not to
  3028. break the latching order in the reacquisition
  3029. of buf_pool_mutex. We also must defer this
  3030. operation until after the block descriptor has
  3031. been added to buf_pool->LRU and
  3032. buf_pool->page_hash. */
  3033. mutex_exit(&block->mutex);
  3034. data = buf_buddy_alloc(zip_size, &lru, FALSE);
  3035. mutex_enter(&block->mutex);
  3036. block->page.zip.data = data;
  3037. /* To maintain the invariant
  3038. block->in_unzip_LRU_list
  3039. == buf_page_belongs_to_unzip_LRU(&block->page)
  3040. we have to add this block to unzip_LRU
  3041. after block->page.zip.data is set. */
  3042. ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
  3043. buf_unzip_LRU_add_block(block, TRUE);
  3044. }
  3045. mutex_exit(&LRU_list_mutex);
  3046. mutex_exit(&block->mutex);
  3047. } else {
  3048. /* Defer buf_buddy_alloc() until after the block has
  3049. been found not to exist. The buf_buddy_alloc() and
  3050. buf_buddy_free() calls may be expensive because of
  3051. buf_buddy_relocate(). */
  3052. /* The compressed page must be allocated before the
  3053. control block (bpage), in order to avoid the
  3054. invocation of buf_buddy_relocate_block() on
  3055. uninitialized data. */
  3056. data = buf_buddy_alloc(zip_size, &lru, TRUE);
  3057. bpage = buf_buddy_alloc(sizeof *bpage, &lru, TRUE);
  3058. /* If buf_buddy_alloc() allocated storage from the LRU list,
  3059. it released and reacquired buf_pool_mutex. Thus, we must
  3060. check the page_hash again, as it may have been modified. */
  3061. if (UNIV_UNLIKELY(lru)
  3062. && UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
  3063. /* The block was added by some other thread. */
  3064. buf_buddy_free(bpage, sizeof *bpage, TRUE);
  3065. buf_buddy_free(data, zip_size, TRUE);
  3066. mutex_exit(&LRU_list_mutex);
  3067. rw_lock_x_unlock(&page_hash_latch);
  3068. bpage = NULL;
  3069. goto func_exit;
  3070. }
  3071. page_zip_des_init(&bpage->zip);
  3072. page_zip_set_size(&bpage->zip, zip_size);
  3073. bpage->zip.data = data;
  3074. mutex_enter(&buf_pool_zip_mutex);
  3075. UNIV_MEM_DESC(bpage->zip.data,
  3076. page_zip_get_size(&bpage->zip), bpage);
  3077. buf_page_init_low(bpage);
  3078. bpage->state = BUF_BLOCK_ZIP_PAGE;
  3079. bpage->space = space;
  3080. bpage->offset = offset;
  3081. bpage->space_was_being_deleted = FALSE;
  3082. #ifdef UNIV_DEBUG
  3083. bpage->in_page_hash = FALSE;
  3084. bpage->in_zip_hash = FALSE;
  3085. bpage->in_flush_list = FALSE;
  3086. bpage->in_free_list = FALSE;
  3087. #endif /* UNIV_DEBUG */
  3088. bpage->in_LRU_list = FALSE;
  3089. ut_d(bpage->in_page_hash = TRUE);
  3090. HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
  3091. buf_page_address_fold(space, offset), bpage);
  3092. rw_lock_x_unlock(&page_hash_latch);
  3093. /* The block must be put to the LRU list, to the old blocks */
  3094. buf_LRU_add_block(bpage, TRUE/* to old blocks */);
  3095. mutex_enter(&flush_list_mutex);
  3096. buf_LRU_insert_zip_clean(bpage);
  3097. mutex_exit(&flush_list_mutex);
  3098. mutex_exit(&LRU_list_mutex);
  3099. buf_page_set_io_fix(bpage, BUF_IO_READ);
  3100. mutex_exit(&buf_pool_zip_mutex);
  3101. }
  3102. mutex_enter(&buf_pool_mutex);
  3103. buf_pool->n_pend_reads++;
  3104. mutex_exit(&buf_pool_mutex);
  3105. func_exit:
  3106. //buf_pool_mutex_exit();
  3107. if (mode == BUF_READ_IBUF_PAGES_ONLY) {
  3108. mtr_commit(&mtr);
  3109. }
  3110. ut_ad(!bpage || buf_page_in_file(bpage));
  3111. return(bpage);
  3112. }
  3113. /********************************************************************//**
  3114. Initializes a page to the buffer buf_pool. The page is usually not read
  3115. from a file even if it cannot be found in the buffer buf_pool. This is one
  3116. of the functions which perform to a block a state transition NOT_USED =>
  3117. FILE_PAGE (the other is buf_page_get_gen).
  3118. @return pointer to the block, page bufferfixed */
  3119. UNIV_INTERN
  3120. buf_block_t*
  3121. buf_page_create(
  3122. /*============*/
  3123. ulint space, /*!< in: space id */
  3124. ulint offset, /*!< in: offset of the page within space in units of
  3125. a page */
  3126. ulint zip_size,/*!< in: compressed page size, or 0 */
  3127. mtr_t* mtr) /*!< in: mini-transaction handle */
  3128. {
  3129. buf_frame_t* frame;
  3130. buf_block_t* block;
  3131. buf_block_t* free_block = NULL;
  3132. ulint time_ms = ut_time_ms();
  3133. ut_ad(mtr);
  3134. ut_ad(mtr->state == MTR_ACTIVE);
  3135. ut_ad(space || !zip_size);
  3136. free_block = buf_LRU_get_free_block();
  3137. //buf_pool_mutex_enter();
  3138. mutex_enter(&LRU_list_mutex);
  3139. rw_lock_x_lock(&page_hash_latch);
  3140. block = (buf_block_t*) buf_page_hash_get(space, offset);
  3141. if (block && buf_page_in_file(&block->page)) {
  3142. #ifdef UNIV_IBUF_COUNT_DEBUG
  3143. ut_a(ibuf_count_get(space, offset) == 0);
  3144. #endif
  3145. #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
  3146. block->page.file_page_was_freed = FALSE;
  3147. #endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
  3148. /* Page can be found in buf_pool */
  3149. //buf_pool_mutex_exit();
  3150. mutex_exit(&LRU_list_mutex);
  3151. rw_lock_x_unlock(&page_hash_latch);
  3152. buf_block_free(free_block);
  3153. return(buf_page_get_with_no_latch(space, zip_size,
  3154. offset, mtr));
  3155. }
  3156. /* If we get here, the page was not in buf_pool: init it there */
  3157. #ifdef UNIV_DEBUG
  3158. if (buf_debug_prints) {
  3159. fprintf(stderr, "Creating space %lu page %lu to buffer\n",
  3160. (ulong) space, (ulong) offset);
  3161. }
  3162. #endif /* UNIV_DEBUG */
  3163. block = free_block;
  3164. mutex_enter(&block->mutex);
  3165. buf_page_init(space, offset, block);
  3166. rw_lock_x_unlock(&page_hash_latch);
  3167. /* The block must be put to the LRU list */
  3168. buf_LRU_add_block(&block->page, FALSE);
  3169. buf_block_buf_fix_inc(block, __FILE__, __LINE__);
  3170. buf_pool->stat.n_pages_created++;
  3171. if (zip_size) {
  3172. void* data;
  3173. ibool lru;
  3174. /* Prevent race conditions during buf_buddy_alloc(),
  3175. which may release and reacquire buf_pool_mutex,
  3176. by IO-fixing and X-latching the block. */
  3177. buf_page_set_io_fix(&block->page, BUF_IO_READ);
  3178. rw_lock_x_lock(&block->lock);
  3179. page_zip_set_size(&block->page.zip, zip_size);
  3180. mutex_exit(&block->mutex);
  3181. /* buf_pool_mutex may be released and reacquired by
  3182. buf_buddy_alloc(). Thus, we must release block->mutex
  3183. in order not to break the latching order in
  3184. the reacquisition of buf_pool_mutex. We also must
  3185. defer this operation until after the block descriptor
  3186. has been added to buf_pool->LRU and buf_pool->page_hash. */
  3187. data = buf_buddy_alloc(zip_size, &lru, FALSE);
  3188. mutex_enter(&block->mutex);
  3189. block->page.zip.data = data;
  3190. /* To maintain the invariant
  3191. block->in_unzip_LRU_list
  3192. == buf_page_belongs_to_unzip_LRU(&block->page)
  3193. we have to add this block to unzip_LRU after
  3194. block->page.zip.data is set. */
  3195. ut_ad(buf_page_belongs_to_unzip_LRU(&block->page));
  3196. buf_unzip_LRU_add_block(block, FALSE);
  3197. buf_page_set_io_fix(&block->page, BUF_IO_NONE);
  3198. rw_lock_x_unlock(&block->lock);
  3199. }
  3200. buf_page_set_accessed(&block->page, time_ms);
  3201. //buf_pool_mutex_exit();
  3202. mutex_exit(&LRU_list_mutex);
  3203. mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
  3204. mutex_exit(&block->mutex);
  3205. /* Delete possible entries for the page from the insert buffer:
  3206. such can exist if the page belonged to an index which was dropped */
  3207. ibuf_merge_or_delete_for_page(NULL, space, offset, zip_size, TRUE);
  3208. /* Flush pages from the end of the LRU list if necessary */
  3209. buf_flush_free_margin(FALSE);
  3210. frame = block->frame;
  3211. memset(frame + FIL_PAGE_PREV, 0xff, 4);
  3212. memset(frame + FIL_PAGE_NEXT, 0xff, 4);
  3213. mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED);
  3214. /* Reset to zero the file flush lsn field in the page; if the first
  3215. page of an ibdata file is 'created' in this function into the buffer
  3216. pool then we lose the original contents of the file flush lsn stamp.
  3217. Then InnoDB could in a crash recovery print a big, false, corruption
  3218. warning if the stamp contains an lsn bigger than the ib_logfile lsn. */
  3219. memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8);
  3220. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  3221. ut_a(++buf_dbg_counter % 357 || buf_validate());
  3222. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  3223. #ifdef UNIV_IBUF_COUNT_DEBUG
  3224. ut_a(ibuf_count_get(buf_block_get_space(block),
  3225. buf_block_get_page_no(block)) == 0);
  3226. #endif
  3227. return(block);
  3228. }
  3229. /********************************************************************//**
  3230. Completes an asynchronous read or write request of a file page to or from
  3231. the buffer pool. */
  3232. UNIV_INTERN
  3233. void
  3234. buf_page_io_complete(
  3235. /*=================*/
  3236. buf_page_t* bpage, /*!< in: pointer to the block in question */
  3237. trx_t* trx)
  3238. {
  3239. enum buf_io_fix io_type;
  3240. const ibool uncompressed = (buf_page_get_state(bpage)
  3241. == BUF_BLOCK_FILE_PAGE);
  3242. mutex_t* block_mutex;
  3243. ut_a(buf_page_in_file(bpage));
  3244. /* We do not need protect io_fix here by mutex to read
  3245. it because this is the only function where we can change the value
  3246. from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code
  3247. ensures that this is the only thread that handles the i/o for this
  3248. block. */
  3249. io_type = buf_page_get_io_fix(bpage);
  3250. ut_ad(io_type == BUF_IO_READ || io_type == BUF_IO_WRITE);
  3251. if (io_type == BUF_IO_READ) {
  3252. ulint read_page_no;
  3253. ulint read_space_id;
  3254. byte* frame;
  3255. if (buf_page_get_zip_size(bpage)) {
  3256. frame = bpage->zip.data;
  3257. buf_pool->n_pend_unzip++;
  3258. if (uncompressed
  3259. && !buf_zip_decompress((buf_block_t*) bpage,
  3260. FALSE)) {
  3261. buf_pool->n_pend_unzip--;
  3262. goto corrupt;
  3263. }
  3264. buf_pool->n_pend_unzip--;
  3265. } else {
  3266. ut_a(uncompressed);
  3267. frame = ((buf_block_t*) bpage)->frame;
  3268. }
  3269. /* If this page is not uninitialized and not in the
  3270. doublewrite buffer, then the page number and space id
  3271. should be the same as in block. */
  3272. read_page_no = mach_read_from_4(frame + FIL_PAGE_OFFSET);
  3273. read_space_id = mach_read_from_4(
  3274. frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
  3275. if ((bpage->space == TRX_SYS_SPACE
  3276. || (srv_doublewrite_file && bpage->space == TRX_DOUBLEWRITE_SPACE))
  3277. && trx_doublewrite_page_inside(bpage->offset)) {
  3278. ut_print_timestamp(stderr);
  3279. fprintf(stderr,
  3280. " InnoDB: Error: reading page %lu\n"
  3281. "InnoDB: which is in the"
  3282. " doublewrite buffer!\n",
  3283. (ulong) bpage->offset);
  3284. } else if (!read_space_id && !read_page_no) {
  3285. /* This is likely an uninitialized page. */
  3286. } else if ((bpage->space
  3287. && bpage->space != read_space_id)
  3288. || bpage->offset != read_page_no) {
  3289. /* We did not compare space_id to read_space_id
  3290. if bpage->space == 0, because the field on the
  3291. page may contain garbage in MySQL < 4.1.1,
  3292. which only supported bpage->space == 0. */
  3293. ut_print_timestamp(stderr);
  3294. fprintf(stderr,
  3295. " InnoDB: Error: space id and page n:o"
  3296. " stored in the page\n"
  3297. "InnoDB: read in are %lu:%lu,"
  3298. " should be %lu:%lu!\n",
  3299. (ulong) read_space_id, (ulong) read_page_no,
  3300. (ulong) bpage->space,
  3301. (ulong) bpage->offset);
  3302. }
  3303. if (!srv_pass_corrupt_table || !bpage->is_corrupt) {
  3304. /* From version 3.23.38 up we store the page checksum
  3305. to the 4 first bytes of the page end lsn field */
  3306. if (buf_page_is_corrupted(frame,
  3307. buf_page_get_zip_size(bpage))) {
  3308. corrupt:
  3309. fprintf(stderr,
  3310. "InnoDB: Database page corruption on disk"
  3311. " or a failed\n"
  3312. "InnoDB: file read of page %lu.\n"
  3313. "InnoDB: You may have to recover"
  3314. " from a backup.\n",
  3315. (ulong) bpage->offset);
  3316. buf_page_print(frame, buf_page_get_zip_size(bpage));
  3317. fprintf(stderr,
  3318. "InnoDB: Database page corruption on disk"
  3319. " or a failed\n"
  3320. "InnoDB: file read of page %lu.\n"
  3321. "InnoDB: You may have to recover"
  3322. " from a backup.\n",
  3323. (ulong) bpage->offset);
  3324. fputs("InnoDB: It is also possible that"
  3325. " your operating\n"
  3326. "InnoDB: system has corrupted its"
  3327. " own file cache\n"
  3328. "InnoDB: and rebooting your computer"
  3329. " removes the\n"
  3330. "InnoDB: error.\n"
  3331. "InnoDB: If the corrupt page is an index page\n"
  3332. "InnoDB: you can also try to"
  3333. " fix the corruption\n"
  3334. "InnoDB: by dumping, dropping,"
  3335. " and reimporting\n"
  3336. "InnoDB: the corrupt table."
  3337. " You can use CHECK\n"
  3338. "InnoDB: TABLE to scan your"
  3339. " table for corruption.\n"
  3340. "InnoDB: See also "
  3341. REFMAN "forcing-innodb-recovery.html\n"
  3342. "InnoDB: about forcing recovery.\n", stderr);
  3343. if (srv_pass_corrupt_table && !trx_sys_sys_space(bpage->space)
  3344. && bpage->space < SRV_LOG_SPACE_FIRST_ID) {
  3345. fprintf(stderr,
  3346. "InnoDB: space %u will be treated as corrupt.\n",
  3347. bpage->space);
  3348. fil_space_set_corrupt(bpage->space);
  3349. if (trx && trx->dict_operation_lock_mode == 0) {
  3350. dict_table_set_corrupt_by_space(bpage->space, TRUE);
  3351. } else {
  3352. dict_table_set_corrupt_by_space(bpage->space, FALSE);
  3353. }
  3354. bpage->is_corrupt = TRUE;
  3355. } else
  3356. if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
  3357. fputs("InnoDB: Ending processing because of"
  3358. " a corrupt database page.\n",
  3359. stderr);
  3360. exit(1);
  3361. }
  3362. }
  3363. } /**/
  3364. if (recv_recovery_is_on()) {
  3365. /* Pages must be uncompressed for crash recovery. */
  3366. ut_a(uncompressed);
  3367. recv_recover_page(TRUE, (buf_block_t*) bpage);
  3368. }
  3369. if (uncompressed && !recv_no_ibuf_operations) {
  3370. ibuf_merge_or_delete_for_page(
  3371. /* Delete possible entries, if bpage is_corrupt */
  3372. (srv_pass_corrupt_table && bpage->is_corrupt) ? NULL :
  3373. (buf_block_t*) bpage, bpage->space,
  3374. bpage->offset, buf_page_get_zip_size(bpage),
  3375. (srv_pass_corrupt_table && bpage->is_corrupt) ? FALSE :
  3376. TRUE);
  3377. }
  3378. }
  3379. //enum buf_flush flush_type;
  3380. //buf_pool_mutex_enter();
  3381. if (io_type == BUF_IO_WRITE) {
  3382. //flush_type = buf_page_get_flush_type(bpage);
  3383. /* to keep consistency at buf_LRU_insert_zip_clean() */
  3384. //if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
  3385. mutex_enter(&LRU_list_mutex);
  3386. //}
  3387. }
  3388. block_mutex = buf_page_get_mutex_enter(bpage);
  3389. ut_a(block_mutex);
  3390. mutex_enter(&buf_pool_mutex);
  3391. #ifdef UNIV_IBUF_COUNT_DEBUG
  3392. if (io_type == BUF_IO_WRITE || uncompressed) {
  3393. /* For BUF_IO_READ of compressed-only blocks, the
  3394. buffered operations will be merged by buf_page_get_gen()
  3395. after the block has been uncompressed. */
  3396. ut_a(ibuf_count_get(bpage->space, bpage->offset) == 0);
  3397. }
  3398. #endif
  3399. /* Because this thread which does the unlocking is not the same that
  3400. did the locking, we use a pass value != 0 in unlock, which simply
  3401. removes the newest lock debug record, without checking the thread
  3402. id. */
  3403. buf_page_set_io_fix(bpage, BUF_IO_NONE);
  3404. switch (io_type) {
  3405. case BUF_IO_READ:
  3406. /* NOTE that the call to ibuf may have moved the ownership of
  3407. the x-latch to this OS thread: do not let this confuse you in
  3408. debugging! */
  3409. ut_ad(buf_pool->n_pend_reads > 0);
  3410. buf_pool->n_pend_reads--;
  3411. buf_pool->stat.n_pages_read++;
  3412. if (uncompressed) {
  3413. rw_lock_x_unlock_gen(&((buf_block_t*) bpage)->lock,
  3414. BUF_IO_READ);
  3415. }
  3416. break;
  3417. case BUF_IO_WRITE:
  3418. /* Write means a flush operation: call the completion
  3419. routine in the flush system */
  3420. buf_flush_write_complete(bpage);
  3421. /* to keep consistency at buf_LRU_insert_zip_clean() */
  3422. //if (flush_type == BUF_FLUSH_LRU) { /* optimistic! */
  3423. mutex_exit(&LRU_list_mutex);
  3424. //}
  3425. if (uncompressed) {
  3426. rw_lock_s_unlock_gen(&((buf_block_t*) bpage)->lock,
  3427. BUF_IO_WRITE);
  3428. }
  3429. buf_pool->stat.n_pages_written++;
  3430. break;
  3431. default:
  3432. ut_error;
  3433. }
  3434. #ifdef UNIV_DEBUG
  3435. if (buf_debug_prints) {
  3436. fprintf(stderr, "Has %s page space %lu page no %lu\n",
  3437. io_type == BUF_IO_READ ? "read" : "written",
  3438. (ulong) buf_page_get_space(bpage),
  3439. (ulong) buf_page_get_page_no(bpage));
  3440. }
  3441. #endif /* UNIV_DEBUG */
  3442. mutex_exit(&buf_pool_mutex);
  3443. mutex_exit(block_mutex);
  3444. //buf_pool_mutex_exit();
  3445. }
  3446. /*********************************************************************//**
  3447. Invalidates the file pages in the buffer pool when an archive recovery is
  3448. completed. All the file pages buffered must be in a replaceable state when
  3449. this function is called: not latched and not modified. */
  3450. UNIV_INTERN
  3451. void
  3452. buf_pool_invalidate(void)
  3453. /*=====================*/
  3454. {
  3455. ibool freed;
  3456. enum buf_flush i;
  3457. buf_pool_mutex_enter();
  3458. for (i = BUF_FLUSH_LRU; i < BUF_FLUSH_N_TYPES; i++) {
  3459. /* As this function is called during startup and
  3460. during redo application phase during recovery, InnoDB
  3461. is single threaded (apart from IO helper threads) at
  3462. this stage. No new write batch can be in intialization
  3463. stage at this point. */
  3464. ut_ad(buf_pool->init_flush[i] == FALSE);
  3465. /* However, it is possible that a write batch that has
  3466. been posted earlier is still not complete. For buffer
  3467. pool invalidation to proceed we must ensure there is NO
  3468. write activity happening. */
  3469. if (buf_pool->n_flush[i] > 0) {
  3470. buf_pool_mutex_exit();
  3471. buf_flush_wait_batch_end(i);
  3472. buf_pool_mutex_enter();
  3473. }
  3474. }
  3475. buf_pool_mutex_exit();
  3476. ut_ad(buf_all_freed());
  3477. freed = TRUE;
  3478. while (freed) {
  3479. freed = buf_LRU_search_and_free_block(100);
  3480. }
  3481. //buf_pool_mutex_enter();
  3482. mutex_enter(&LRU_list_mutex);
  3483. ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0);
  3484. ut_ad(UT_LIST_GET_LEN(buf_pool->unzip_LRU) == 0);
  3485. buf_pool->freed_page_clock = 0;
  3486. buf_pool->LRU_old = NULL;
  3487. buf_pool->LRU_old_len = 0;
  3488. buf_pool->LRU_flush_ended = 0;
  3489. memset(&buf_pool->stat, 0x00, sizeof(buf_pool->stat));
  3490. buf_refresh_io_stats();
  3491. //buf_pool_mutex_exit();
  3492. mutex_exit(&LRU_list_mutex);
  3493. }
  3494. #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  3495. /*********************************************************************//**
  3496. Validates the buffer buf_pool data structure.
  3497. @return TRUE */
  3498. UNIV_INTERN
  3499. ibool
  3500. buf_validate(void)
  3501. /*==============*/
  3502. {
  3503. buf_page_t* b;
  3504. buf_chunk_t* chunk;
  3505. ulint i;
  3506. ulint n_single_flush = 0;
  3507. ulint n_lru_flush = 0;
  3508. ulint n_list_flush = 0;
  3509. ulint n_lru = 0;
  3510. ulint n_flush = 0;
  3511. ulint n_free = 0;
  3512. ulint n_zip = 0;
  3513. ut_ad(buf_pool);
  3514. //buf_pool_mutex_enter();
  3515. mutex_enter(&LRU_list_mutex);
  3516. rw_lock_x_lock(&page_hash_latch);
  3517. /* for keep the new latch order, it cannot validate correctly... */
  3518. chunk = buf_pool->chunks;
  3519. /* Check the uncompressed blocks. */
  3520. for (i = buf_pool->n_chunks; i--; chunk++) {
  3521. ulint j;
  3522. buf_block_t* block = chunk->blocks;
  3523. for (j = chunk->size; j--; block++) {
  3524. mutex_enter(&block->mutex);
  3525. switch (buf_block_get_state(block)) {
  3526. case BUF_BLOCK_ZIP_FREE:
  3527. case BUF_BLOCK_ZIP_PAGE:
  3528. case BUF_BLOCK_ZIP_DIRTY:
  3529. /* These should only occur on
  3530. zip_clean, zip_free[], or flush_list. */
  3531. ut_error;
  3532. break;
  3533. case BUF_BLOCK_FILE_PAGE:
  3534. ut_a(buf_page_hash_get(buf_block_get_space(
  3535. block),
  3536. buf_block_get_page_no(
  3537. block))
  3538. == &block->page);
  3539. #ifdef UNIV_IBUF_COUNT_DEBUG
  3540. ut_a(buf_page_get_io_fix(&block->page)
  3541. == BUF_IO_READ
  3542. || !ibuf_count_get(buf_block_get_space(
  3543. block),
  3544. buf_block_get_page_no(
  3545. block)));
  3546. #endif
  3547. switch (buf_page_get_io_fix(&block->page)) {
  3548. case BUF_IO_NONE:
  3549. break;
  3550. case BUF_IO_WRITE:
  3551. switch (buf_page_get_flush_type(
  3552. &block->page)) {
  3553. case BUF_FLUSH_LRU:
  3554. n_lru_flush++;
  3555. ut_a(rw_lock_is_locked(
  3556. &block->lock,
  3557. RW_LOCK_SHARED));
  3558. break;
  3559. case BUF_FLUSH_LIST:
  3560. n_list_flush++;
  3561. break;
  3562. case BUF_FLUSH_SINGLE_PAGE:
  3563. n_single_flush++;
  3564. break;
  3565. default:
  3566. ut_error;
  3567. }
  3568. break;
  3569. case BUF_IO_READ:
  3570. ut_a(rw_lock_is_locked(&block->lock,
  3571. RW_LOCK_EX));
  3572. break;
  3573. }
  3574. n_lru++;
  3575. if (block->page.oldest_modification > 0) {
  3576. n_flush++;
  3577. }
  3578. break;
  3579. case BUF_BLOCK_NOT_USED:
  3580. n_free++;
  3581. break;
  3582. case BUF_BLOCK_READY_FOR_USE:
  3583. case BUF_BLOCK_MEMORY:
  3584. case BUF_BLOCK_REMOVE_HASH:
  3585. /* do nothing */
  3586. break;
  3587. }
  3588. mutex_exit(&block->mutex);
  3589. }
  3590. }
  3591. mutex_enter(&buf_pool_zip_mutex);
  3592. /* Check clean compressed-only blocks. */
  3593. for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
  3594. b = UT_LIST_GET_NEXT(zip_list, b)) {
  3595. ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
  3596. switch (buf_page_get_io_fix(b)) {
  3597. case BUF_IO_NONE:
  3598. /* All clean blocks should be I/O-unfixed. */
  3599. break;
  3600. case BUF_IO_READ:
  3601. /* In buf_LRU_free_block(), we temporarily set
  3602. b->io_fix = BUF_IO_READ for a newly allocated
  3603. control block in order to prevent
  3604. buf_page_get_gen() from decompressing the block. */
  3605. break;
  3606. default:
  3607. ut_error;
  3608. break;
  3609. }
  3610. ut_a(!b->oldest_modification);
  3611. ut_a(buf_page_hash_get(b->space, b->offset) == b);
  3612. n_lru++;
  3613. n_zip++;
  3614. }
  3615. /* Check dirty compressed-only blocks. */
  3616. mutex_enter(&flush_list_mutex);
  3617. for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
  3618. b = UT_LIST_GET_NEXT(flush_list, b)) {
  3619. ut_ad(b->in_flush_list);
  3620. switch (buf_page_get_state(b)) {
  3621. case BUF_BLOCK_ZIP_DIRTY:
  3622. ut_a(b->oldest_modification);
  3623. n_lru++;
  3624. n_flush++;
  3625. n_zip++;
  3626. switch (buf_page_get_io_fix(b)) {
  3627. case BUF_IO_NONE:
  3628. case BUF_IO_READ:
  3629. break;
  3630. case BUF_IO_WRITE:
  3631. switch (buf_page_get_flush_type(b)) {
  3632. case BUF_FLUSH_LRU:
  3633. n_lru_flush++;
  3634. break;
  3635. case BUF_FLUSH_LIST:
  3636. n_list_flush++;
  3637. break;
  3638. case BUF_FLUSH_SINGLE_PAGE:
  3639. n_single_flush++;
  3640. break;
  3641. default:
  3642. ut_error;
  3643. }
  3644. break;
  3645. }
  3646. break;
  3647. case BUF_BLOCK_FILE_PAGE:
  3648. /* uncompressed page */
  3649. break;
  3650. case BUF_BLOCK_ZIP_FREE:
  3651. case BUF_BLOCK_ZIP_PAGE:
  3652. case BUF_BLOCK_NOT_USED:
  3653. case BUF_BLOCK_READY_FOR_USE:
  3654. case BUF_BLOCK_MEMORY:
  3655. case BUF_BLOCK_REMOVE_HASH:
  3656. ut_error;
  3657. break;
  3658. }
  3659. ut_a(buf_page_hash_get(b->space, b->offset) == b);
  3660. }
  3661. mutex_exit(&flush_list_mutex);
  3662. mutex_exit(&buf_pool_zip_mutex);
  3663. if (n_lru + n_free > buf_pool->curr_size + n_zip) {
  3664. fprintf(stderr, "n LRU %lu, n free %lu, pool %lu zip %lu\n",
  3665. (ulong) n_lru, (ulong) n_free,
  3666. (ulong) buf_pool->curr_size, (ulong) n_zip);
  3667. ut_error;
  3668. }
  3669. ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru);
  3670. /* because of latching order with block->mutex, we cannot get free_list_mutex before that */
  3671. /*
  3672. if (UT_LIST_GET_LEN(buf_pool->free) != n_free) {
  3673. fprintf(stderr, "Free list len %lu, free blocks %lu\n",
  3674. (ulong) UT_LIST_GET_LEN(buf_pool->free),
  3675. (ulong) n_free);
  3676. ut_error;
  3677. }
  3678. */
  3679. /* because of latching order with block->mutex, we cannot get flush_list_mutex before that */
  3680. /*
  3681. ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush);
  3682. ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush);
  3683. ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush);
  3684. ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush);
  3685. */
  3686. //buf_pool_mutex_exit();
  3687. mutex_exit(&LRU_list_mutex);
  3688. rw_lock_x_unlock(&page_hash_latch);
  3689. ut_a(buf_LRU_validate());
  3690. ut_a(buf_flush_validate());
  3691. return(TRUE);
  3692. }
  3693. #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
  3694. #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
  3695. /*********************************************************************//**
  3696. Prints info of the buffer buf_pool data structure. */
  3697. UNIV_INTERN
  3698. void
  3699. buf_print(void)
  3700. /*===========*/
  3701. {
  3702. dulint* index_ids;
  3703. ulint* counts;
  3704. ulint size;
  3705. ulint i;
  3706. ulint j;
  3707. dulint id;
  3708. ulint n_found;
  3709. buf_chunk_t* chunk;
  3710. dict_index_t* index;
  3711. ut_ad(buf_pool);
  3712. size = buf_pool->curr_size;
  3713. index_ids = mem_alloc(sizeof(dulint) * size);
  3714. counts = mem_alloc(sizeof(ulint) * size);
  3715. //buf_pool_mutex_enter();
  3716. mutex_enter(&LRU_list_mutex);
  3717. mutex_enter(&free_list_mutex);
  3718. mutex_enter(&flush_list_mutex);
  3719. fprintf(stderr,
  3720. "buf_pool size %lu\n"
  3721. "database pages %lu\n"
  3722. "free pages %lu\n"
  3723. "modified database pages %lu\n"
  3724. "n pending decompressions %lu\n"
  3725. "n pending reads %lu\n"
  3726. "n pending flush LRU %lu list %lu single page %lu\n"
  3727. "pages made young %lu, not young %lu\n"
  3728. "pages read %lu, created %lu, written %lu\n",
  3729. (ulong) size,
  3730. (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
  3731. (ulong) UT_LIST_GET_LEN(buf_pool->free),
  3732. (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
  3733. (ulong) buf_pool->n_pend_unzip,
  3734. (ulong) buf_pool->n_pend_reads,
  3735. (ulong) buf_pool->n_flush[BUF_FLUSH_LRU],
  3736. (ulong) buf_pool->n_flush[BUF_FLUSH_LIST],
  3737. (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE],
  3738. (ulong) buf_pool->stat.n_pages_made_young,
  3739. (ulong) buf_pool->stat.n_pages_not_made_young,
  3740. (ulong) buf_pool->stat.n_pages_read,
  3741. (ulong) buf_pool->stat.n_pages_created,
  3742. (ulong) buf_pool->stat.n_pages_written);
  3743. /* Count the number of blocks belonging to each index in the buffer */
  3744. n_found = 0;
  3745. chunk = buf_pool->chunks;
  3746. for (i = buf_pool->n_chunks; i--; chunk++) {
  3747. buf_block_t* block = chunk->blocks;
  3748. ulint n_blocks = chunk->size;
  3749. for (; n_blocks--; block++) {
  3750. const buf_frame_t* frame = block->frame;
  3751. if (fil_page_get_type(frame) == FIL_PAGE_INDEX) {
  3752. id = btr_page_get_index_id(frame);
  3753. /* Look for the id in the index_ids array */
  3754. j = 0;
  3755. while (j < n_found) {
  3756. if (ut_dulint_cmp(index_ids[j],
  3757. id) == 0) {
  3758. counts[j]++;
  3759. break;
  3760. }
  3761. j++;
  3762. }
  3763. if (j == n_found) {
  3764. n_found++;
  3765. index_ids[j] = id;
  3766. counts[j] = 1;
  3767. }
  3768. }
  3769. }
  3770. }
  3771. //buf_pool_mutex_exit();
  3772. mutex_exit(&LRU_list_mutex);
  3773. mutex_exit(&free_list_mutex);
  3774. mutex_exit(&flush_list_mutex);
  3775. for (i = 0; i < n_found; i++) {
  3776. index = dict_index_get_if_in_cache(index_ids[i]);
  3777. fprintf(stderr,
  3778. "Block count for index %lu in buffer is about %lu",
  3779. (ulong) ut_dulint_get_low(index_ids[i]),
  3780. (ulong) counts[i]);
  3781. if (index) {
  3782. putc(' ', stderr);
  3783. dict_index_name_print(stderr, NULL, index);
  3784. }
  3785. putc('\n', stderr);
  3786. }
  3787. mem_free(index_ids);
  3788. mem_free(counts);
  3789. ut_a(buf_validate());
  3790. }
  3791. #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
  3792. #ifdef UNIV_DEBUG
  3793. /*********************************************************************//**
  3794. Returns the number of latched pages in the buffer pool.
  3795. @return number of latched pages */
  3796. UNIV_INTERN
  3797. ulint
  3798. buf_get_latched_pages_number(void)
  3799. /*==============================*/
  3800. {
  3801. buf_chunk_t* chunk;
  3802. buf_page_t* b;
  3803. ulint i;
  3804. ulint fixed_pages_number = 0;
  3805. //buf_pool_mutex_enter();
  3806. chunk = buf_pool->chunks;
  3807. for (i = buf_pool->n_chunks; i--; chunk++) {
  3808. buf_block_t* block;
  3809. ulint j;
  3810. block = chunk->blocks;
  3811. for (j = chunk->size; j--; block++) {
  3812. if (buf_block_get_state(block)
  3813. != BUF_BLOCK_FILE_PAGE) {
  3814. continue;
  3815. }
  3816. mutex_enter(&block->mutex);
  3817. if (block->page.buf_fix_count != 0
  3818. || buf_page_get_io_fix(&block->page)
  3819. != BUF_IO_NONE) {
  3820. fixed_pages_number++;
  3821. }
  3822. mutex_exit(&block->mutex);
  3823. }
  3824. }
  3825. mutex_enter(&buf_pool_zip_mutex);
  3826. /* Traverse the lists of clean and dirty compressed-only blocks. */
  3827. for (b = UT_LIST_GET_FIRST(buf_pool->zip_clean); b;
  3828. b = UT_LIST_GET_NEXT(zip_list, b)) {
  3829. ut_a(buf_page_get_state(b) == BUF_BLOCK_ZIP_PAGE);
  3830. ut_a(buf_page_get_io_fix(b) != BUF_IO_WRITE);
  3831. if (b->buf_fix_count != 0
  3832. || buf_page_get_io_fix(b) != BUF_IO_NONE) {
  3833. fixed_pages_number++;
  3834. }
  3835. }
  3836. mutex_enter(&flush_list_mutex);
  3837. for (b = UT_LIST_GET_FIRST(buf_pool->flush_list); b;
  3838. b = UT_LIST_GET_NEXT(flush_list, b)) {
  3839. ut_ad(b->in_flush_list);
  3840. switch (buf_page_get_state(b)) {
  3841. case BUF_BLOCK_ZIP_DIRTY:
  3842. if (b->buf_fix_count != 0
  3843. || buf_page_get_io_fix(b) != BUF_IO_NONE) {
  3844. fixed_pages_number++;
  3845. }
  3846. break;
  3847. case BUF_BLOCK_FILE_PAGE:
  3848. /* uncompressed page */
  3849. break;
  3850. case BUF_BLOCK_ZIP_FREE:
  3851. case BUF_BLOCK_ZIP_PAGE:
  3852. case BUF_BLOCK_NOT_USED:
  3853. case BUF_BLOCK_READY_FOR_USE:
  3854. case BUF_BLOCK_MEMORY:
  3855. case BUF_BLOCK_REMOVE_HASH:
  3856. ut_error;
  3857. break;
  3858. }
  3859. }
  3860. mutex_exit(&flush_list_mutex);
  3861. mutex_exit(&buf_pool_zip_mutex);
  3862. //buf_pool_mutex_exit();
  3863. return(fixed_pages_number);
  3864. }
  3865. #endif /* UNIV_DEBUG */
  3866. /*********************************************************************//**
  3867. Returns the number of pending buf pool ios.
  3868. @return number of pending I/O operations */
  3869. UNIV_INTERN
  3870. ulint
  3871. buf_get_n_pending_ios(void)
  3872. /*=======================*/
  3873. {
  3874. return(buf_pool->n_pend_reads
  3875. + buf_pool->n_flush[BUF_FLUSH_LRU]
  3876. + buf_pool->n_flush[BUF_FLUSH_LIST]
  3877. + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
  3878. }
  3879. /*********************************************************************//**
  3880. Returns the ratio in percents of modified pages in the buffer pool /
  3881. database pages in the buffer pool.
  3882. @return modified page percentage ratio */
  3883. UNIV_INTERN
  3884. ulint
  3885. buf_get_modified_ratio_pct(void)
  3886. /*============================*/
  3887. {
  3888. ulint ratio;
  3889. //buf_pool_mutex_enter(); /* optimistic */
  3890. ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list))
  3891. / (1 + UT_LIST_GET_LEN(buf_pool->LRU)
  3892. + UT_LIST_GET_LEN(buf_pool->free));
  3893. /* 1 + is there to avoid division by zero */
  3894. //buf_pool_mutex_exit(); /* optimistic */
  3895. return(ratio);
  3896. }
  3897. /*********************************************************************//**
  3898. Prints info of the buffer i/o. */
  3899. UNIV_INTERN
  3900. void
  3901. buf_print_io(
  3902. /*=========*/
  3903. FILE* file) /*!< in/out: buffer where to print */
  3904. {
  3905. time_t current_time;
  3906. double time_elapsed;
  3907. ulint n_gets_diff;
  3908. ut_ad(buf_pool);
  3909. //buf_pool_mutex_enter();
  3910. mutex_enter(&LRU_list_mutex);
  3911. mutex_enter(&free_list_mutex);
  3912. mutex_enter(&buf_pool_mutex);
  3913. mutex_enter(&flush_list_mutex);
  3914. fprintf(file,
  3915. "Buffer pool size %lu\n"
  3916. "Buffer pool size, bytes %lu\n"
  3917. "Free buffers %lu\n"
  3918. "Database pages %lu\n"
  3919. "Old database pages %lu\n"
  3920. "Modified db pages %lu\n"
  3921. "Pending reads %lu\n"
  3922. "Pending writes: LRU %lu, flush list %lu, single page %lu\n",
  3923. (ulong) buf_pool->curr_size,
  3924. (ulong) buf_pool->curr_size * UNIV_PAGE_SIZE,
  3925. (ulong) UT_LIST_GET_LEN(buf_pool->free),
  3926. (ulong) UT_LIST_GET_LEN(buf_pool->LRU),
  3927. (ulong) buf_pool->LRU_old_len,
  3928. (ulong) UT_LIST_GET_LEN(buf_pool->flush_list),
  3929. (ulong) buf_pool->n_pend_reads,
  3930. (ulong) buf_pool->n_flush[BUF_FLUSH_LRU]
  3931. + buf_pool->init_flush[BUF_FLUSH_LRU],
  3932. (ulong) buf_pool->n_flush[BUF_FLUSH_LIST]
  3933. + buf_pool->init_flush[BUF_FLUSH_LIST],
  3934. (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
  3935. current_time = time(NULL);
  3936. time_elapsed = 0.001 + difftime(current_time,
  3937. buf_pool->last_printout_time);
  3938. fprintf(file,
  3939. "Pages made young %lu, not young %lu\n"
  3940. "%.2f youngs/s, %.2f non-youngs/s\n"
  3941. "Pages read %lu, created %lu, written %lu\n"
  3942. "%.2f reads/s, %.2f creates/s, %.2f writes/s\n",
  3943. (ulong) buf_pool->stat.n_pages_made_young,
  3944. (ulong) buf_pool->stat.n_pages_not_made_young,
  3945. (buf_pool->stat.n_pages_made_young
  3946. - buf_pool->old_stat.n_pages_made_young)
  3947. / time_elapsed,
  3948. (buf_pool->stat.n_pages_not_made_young
  3949. - buf_pool->old_stat.n_pages_not_made_young)
  3950. / time_elapsed,
  3951. (ulong) buf_pool->stat.n_pages_read,
  3952. (ulong) buf_pool->stat.n_pages_created,
  3953. (ulong) buf_pool->stat.n_pages_written,
  3954. (buf_pool->stat.n_pages_read
  3955. - buf_pool->old_stat.n_pages_read)
  3956. / time_elapsed,
  3957. (buf_pool->stat.n_pages_created
  3958. - buf_pool->old_stat.n_pages_created)
  3959. / time_elapsed,
  3960. (buf_pool->stat.n_pages_written
  3961. - buf_pool->old_stat.n_pages_written)
  3962. / time_elapsed);
  3963. n_gets_diff = buf_pool->stat.n_page_gets - buf_pool->old_stat.n_page_gets;
  3964. if (n_gets_diff) {
  3965. fprintf(file,
  3966. "Buffer pool hit rate %lu / 1000,"
  3967. " young-making rate %lu / 1000 not %lu / 1000\n",
  3968. (ulong)
  3969. (1000 - ((1000 * (buf_pool->stat.n_pages_read
  3970. - buf_pool->old_stat.n_pages_read))
  3971. / (buf_pool->stat.n_page_gets
  3972. - buf_pool->old_stat.n_page_gets))),
  3973. (ulong)
  3974. (1000 * (buf_pool->stat.n_pages_made_young
  3975. - buf_pool->old_stat.n_pages_made_young)
  3976. / n_gets_diff),
  3977. (ulong)
  3978. (1000 * (buf_pool->stat.n_pages_not_made_young
  3979. - buf_pool->old_stat.n_pages_not_made_young)
  3980. / n_gets_diff));
  3981. } else {
  3982. fputs("No buffer pool page gets since the last printout\n",
  3983. file);
  3984. }
  3985. /* Statistics about read ahead algorithm */
  3986. fprintf(file, "Pages read ahead %.2f/s,"
  3987. " evicted without access %.2f/s\n",
  3988. (buf_pool->stat.n_ra_pages_read
  3989. - buf_pool->old_stat.n_ra_pages_read)
  3990. / time_elapsed,
  3991. (buf_pool->stat.n_ra_pages_evicted
  3992. - buf_pool->old_stat.n_ra_pages_evicted)
  3993. / time_elapsed);
  3994. /* Print some values to help us with visualizing what is
  3995. happening with LRU eviction. */
  3996. fprintf(file,
  3997. "LRU len: %lu, unzip_LRU len: %lu\n"
  3998. "I/O sum[%lu]:cur[%lu], unzip sum[%lu]:cur[%lu]\n",
  3999. UT_LIST_GET_LEN(buf_pool->LRU),
  4000. UT_LIST_GET_LEN(buf_pool->unzip_LRU),
  4001. buf_LRU_stat_sum.io, buf_LRU_stat_cur.io,
  4002. buf_LRU_stat_sum.unzip, buf_LRU_stat_cur.unzip);
  4003. buf_refresh_io_stats();
  4004. //buf_pool_mutex_exit();
  4005. mutex_exit(&LRU_list_mutex);
  4006. mutex_exit(&free_list_mutex);
  4007. mutex_exit(&buf_pool_mutex);
  4008. mutex_exit(&flush_list_mutex);
  4009. }
  4010. /**********************************************************************//**
  4011. Refreshes the statistics used to print per-second averages. */
  4012. UNIV_INTERN
  4013. void
  4014. buf_refresh_io_stats(void)
  4015. /*======================*/
  4016. {
  4017. buf_pool->last_printout_time = time(NULL);
  4018. buf_pool->old_stat = buf_pool->stat;
  4019. }
  4020. /*********************************************************************//**
  4021. Asserts that all file pages in the buffer are in a replaceable state.
  4022. @return TRUE */
  4023. UNIV_INTERN
  4024. ibool
  4025. buf_all_freed(void)
  4026. /*===============*/
  4027. {
  4028. buf_chunk_t* chunk;
  4029. ulint i;
  4030. ut_ad(buf_pool);
  4031. //buf_pool_mutex_enter(); /* optimistic */
  4032. chunk = buf_pool->chunks;
  4033. for (i = buf_pool->n_chunks; i--; chunk++) {
  4034. const buf_block_t* block = buf_chunk_not_freed(chunk);
  4035. if (UNIV_LIKELY_NULL(block)) {
  4036. fprintf(stderr,
  4037. "Page %lu %lu still fixed or dirty\n",
  4038. (ulong) block->page.space,
  4039. (ulong) block->page.offset);
  4040. ut_error;
  4041. }
  4042. }
  4043. //buf_pool_mutex_exit(); /* optimistic */
  4044. return(TRUE);
  4045. }
  4046. /*********************************************************************//**
  4047. Checks that there currently are no pending i/o-operations for the buffer
  4048. pool.
  4049. @return TRUE if there is no pending i/o */
  4050. UNIV_INTERN
  4051. ibool
  4052. buf_pool_check_no_pending_io(void)
  4053. /*==============================*/
  4054. {
  4055. ibool ret;
  4056. //buf_pool_mutex_enter();
  4057. mutex_enter(&buf_pool_mutex);
  4058. if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU]
  4059. + buf_pool->n_flush[BUF_FLUSH_LIST]
  4060. + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) {
  4061. ret = FALSE;
  4062. } else {
  4063. ret = TRUE;
  4064. }
  4065. //buf_pool_mutex_exit();
  4066. mutex_exit(&buf_pool_mutex);
  4067. return(ret);
  4068. }
  4069. /*********************************************************************//**
  4070. Gets the current length of the free list of buffer blocks.
  4071. @return length of the free list */
  4072. UNIV_INTERN
  4073. ulint
  4074. buf_get_free_list_len(void)
  4075. /*=======================*/
  4076. {
  4077. ulint len;
  4078. //buf_pool_mutex_enter();
  4079. mutex_enter(&free_list_mutex);
  4080. len = UT_LIST_GET_LEN(buf_pool->free);
  4081. //buf_pool_mutex_exit();
  4082. mutex_exit(&free_list_mutex);
  4083. return(len);
  4084. }
  4085. #else /* !UNIV_HOTBACKUP */
  4086. /********************************************************************//**
  4087. Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
  4088. UNIV_INTERN
  4089. void
  4090. buf_page_init_for_backup_restore(
  4091. /*=============================*/
  4092. ulint space, /*!< in: space id */
  4093. ulint offset, /*!< in: offset of the page within space
  4094. in units of a page */
  4095. ulint zip_size,/*!< in: compressed page size in bytes
  4096. or 0 for uncompressed pages */
  4097. buf_block_t* block) /*!< in: block to init */
  4098. {
  4099. block->page.state = BUF_BLOCK_FILE_PAGE;
  4100. block->page.space = space;
  4101. block->page.offset = offset;
  4102. page_zip_des_init(&block->page.zip);
  4103. /* We assume that block->page.data has been allocated
  4104. with zip_size == UNIV_PAGE_SIZE. */
  4105. ut_ad(zip_size <= UNIV_PAGE_SIZE);
  4106. ut_ad(ut_is_2pow(zip_size));
  4107. page_zip_set_size(&block->page.zip, zip_size);
  4108. if (zip_size) {
  4109. block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
  4110. }
  4111. }
  4112. #endif /* !UNIV_HOTBACKUP */