You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2578 lines
65 KiB

17 years ago
17 years ago
17 years ago
16 years ago
14 years ago
16 years ago
14 years ago
14 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
14 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
16 years ago
16 years ago
14 years ago
14 years ago
14 years ago
16 years ago
17 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
16 years ago
14 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
14 years ago
16 years ago
14 years ago
16 years ago
16 years ago
14 years ago
14 years ago
16 years ago
14 years ago
14 years ago
14 years ago
14 years ago
14 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
16 years ago
14 years ago
14 years ago
  1. /*****************************************************************************
  2. Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
  3. This program is free software; you can redistribute it and/or modify it under
  4. the terms of the GNU General Public License as published by the Free Software
  5. Foundation; version 2 of the License.
  6. This program is distributed in the hope that it will be useful, but WITHOUT
  7. ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  8. FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  9. You should have received a copy of the GNU General Public License along with
  10. this program; if not, write to the Free Software Foundation, Inc.,
  11. 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
  12. *****************************************************************************/
  13. /**************************************************//**
  14. @file trx/trx0trx.cc
  15. The transaction
  16. Created 3/26/1996 Heikki Tuuri
  17. *******************************************************/
  18. #include "btr0types.h"
  19. #include "trx0trx.h"
  20. #ifdef UNIV_NONINL
  21. #include "trx0trx.ic"
  22. #endif
  23. #include <mysql/service_wsrep.h>
  24. #include "trx0undo.h"
  25. #include "trx0rseg.h"
  26. #include "log0log.h"
  27. #include "que0que.h"
  28. #include "lock0lock.h"
  29. #include "trx0roll.h"
  30. #include "usr0sess.h"
  31. #include "read0read.h"
  32. #include "srv0srv.h"
  33. #include "srv0start.h"
  34. #include "btr0sea.h"
  35. #include "os0proc.h"
  36. #include "trx0xa.h"
  37. #include "trx0rec.h"
  38. #include "trx0purge.h"
  39. #include "ha_prototypes.h"
  40. #include "srv0mon.h"
  41. #include "ut0vec.h"
  42. #include<set>
  43. /** Set of table_id */
  44. typedef std::set<table_id_t> table_id_set;
  45. /** Dummy session used currently in MySQL interface */
  46. UNIV_INTERN sess_t* trx_dummy_sess = NULL;
  47. #ifdef UNIV_PFS_MUTEX
  48. /* Key to register the mutex with performance schema */
  49. UNIV_INTERN mysql_pfs_key_t trx_mutex_key;
  50. /* Key to register the mutex with performance schema */
  51. UNIV_INTERN mysql_pfs_key_t trx_undo_mutex_key;
  52. #endif /* UNIV_PFS_MUTEX */
  53. /*************************************************************//**
  54. Set detailed error message for the transaction. */
  55. UNIV_INTERN
  56. void
  57. trx_set_detailed_error(
  58. /*===================*/
  59. trx_t* trx, /*!< in: transaction struct */
  60. const char* msg) /*!< in: detailed error message */
  61. {
  62. ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
  63. }
  64. /*************************************************************//**
  65. Set detailed error message for the transaction from a file. Note that the
  66. file is rewinded before reading from it. */
  67. UNIV_INTERN
  68. void
  69. trx_set_detailed_error_from_file(
  70. /*=============================*/
  71. trx_t* trx, /*!< in: transaction struct */
  72. FILE* file) /*!< in: file to read message from */
  73. {
  74. os_file_read_string(file, trx->detailed_error,
  75. sizeof(trx->detailed_error));
  76. }
  77. /*************************************************************//**
  78. Callback function for trx_find_descriptor() to compare trx IDs. */
  79. UNIV_INTERN
  80. int
  81. trx_descr_cmp(
  82. /*==========*/
  83. const void *a, /*!< in: pointer to first comparison argument */
  84. const void *b) /*!< in: pointer to second comparison argument */
  85. {
  86. const trx_id_t* da = (const trx_id_t*) a;
  87. const trx_id_t* db = (const trx_id_t*) b;
  88. if (*da < *db) {
  89. return -1;
  90. } else if (*da > *db) {
  91. return 1;
  92. }
  93. return 0;
  94. }
  95. /*************************************************************//**
  96. Reserve a slot for a given trx in the global descriptors array. */
  97. UNIV_INLINE
  98. void
  99. trx_reserve_descriptor(
  100. /*===================*/
  101. const trx_t* trx) /*!< in: trx pointer */
  102. {
  103. ulint n_used;
  104. ulint n_max;
  105. trx_id_t* descr;
  106. ut_ad(mutex_own(&trx_sys->mutex) || srv_is_being_started);
  107. ut_ad(srv_is_being_started ||
  108. !trx_find_descriptor(trx_sys->descriptors,
  109. trx_sys->descr_n_used,
  110. trx->id));
  111. n_used = trx_sys->descr_n_used + 1;
  112. n_max = trx_sys->descr_n_max;
  113. if (UNIV_UNLIKELY(n_used > n_max)) {
  114. n_max = n_max * 2;
  115. trx_sys->descriptors = static_cast<trx_id_t*>(
  116. ut_realloc(trx_sys->descriptors,
  117. n_max * sizeof(trx_id_t)));
  118. trx_sys->descr_n_max = n_max;
  119. srv_descriptors_memory = n_max * sizeof(trx_id_t);
  120. }
  121. descr = trx_sys->descriptors + n_used - 1;
  122. if (UNIV_UNLIKELY(n_used > 1 && trx->id < descr[-1])) {
  123. /* Find the slot where it should be inserted. We could use a
  124. binary search, but in reality linear search should be faster,
  125. because the slot we are looking for is near the array end. */
  126. trx_id_t* tdescr;
  127. for (tdescr = descr - 1;
  128. tdescr >= trx_sys->descriptors && *tdescr > trx->id;
  129. tdescr--) {
  130. }
  131. tdescr++;
  132. ut_memmove(tdescr + 1, tdescr, (descr - tdescr) *
  133. sizeof(trx_id_t));
  134. descr = tdescr;
  135. }
  136. *descr = trx->id;
  137. trx_sys->descr_n_used = n_used;
  138. }
  139. /*************************************************************//**
  140. Release a slot for a given trx in the global descriptors array. */
  141. UNIV_INTERN
  142. void
  143. trx_release_descriptor(
  144. /*===================*/
  145. trx_t* trx) /*!< in: trx pointer */
  146. {
  147. ulint size;
  148. trx_id_t* descr;
  149. ut_ad(mutex_own(&trx_sys->mutex));
  150. if (UNIV_LIKELY(trx->in_trx_serial_list)) {
  151. UT_LIST_REMOVE(trx_serial_list, trx_sys->trx_serial_list,
  152. trx);
  153. trx->in_trx_serial_list = false;
  154. }
  155. descr = trx_find_descriptor(trx_sys->descriptors,
  156. trx_sys->descr_n_used,
  157. trx->id);
  158. if (UNIV_UNLIKELY(descr == NULL)) {
  159. return;
  160. }
  161. size = (trx_sys->descriptors + trx_sys->descr_n_used - 1 - descr) *
  162. sizeof(trx_id_t);
  163. if (UNIV_LIKELY(size > 0)) {
  164. ut_memmove(descr, descr + 1, size);
  165. }
  166. trx_sys->descr_n_used--;
  167. }
  168. /****************************************************************//**
  169. Creates and initializes a transaction object. It must be explicitly
  170. started with trx_start_if_not_started() before using it. The default
  171. isolation level is TRX_ISO_REPEATABLE_READ.
  172. @return transaction instance, should never be NULL */
  173. static
  174. trx_t*
  175. trx_create(void)
  176. /*============*/
  177. {
  178. trx_t* trx;
  179. mem_heap_t* heap;
  180. ib_alloc_t* heap_alloc;
  181. trx = static_cast<trx_t*>(mem_zalloc(sizeof(*trx)));
  182. mutex_create(trx_mutex_key, &trx->mutex, SYNC_TRX);
  183. trx->magic_n = TRX_MAGIC_N;
  184. trx->active_commit_ordered = 0;
  185. trx->state = TRX_STATE_NOT_STARTED;
  186. trx->isolation_level = TRX_ISO_REPEATABLE_READ;
  187. trx->no = TRX_ID_MAX;
  188. trx->in_trx_serial_list = false;
  189. trx->support_xa = TRUE;
  190. trx->fake_changes = FALSE;
  191. trx->check_foreigns = TRUE;
  192. trx->check_unique_secondary = TRUE;
  193. trx->dict_operation = TRX_DICT_OP_NONE;
  194. trx->idle_start = 0;
  195. trx->last_stmt_start = 0;
  196. mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO);
  197. trx->error_state = DB_SUCCESS;
  198. trx->lock.que_state = TRX_QUE_RUNNING;
  199. trx->lock.lock_heap = mem_heap_create_typed(
  200. 256, MEM_HEAP_FOR_LOCK_HEAP);
  201. trx->search_latch_timeout = BTR_SEA_TIMEOUT;
  202. trx->io_reads = 0;
  203. trx->io_read = 0;
  204. trx->io_reads_wait_timer = 0;
  205. trx->lock_que_wait_timer = 0;
  206. trx->innodb_que_wait_timer = 0;
  207. trx->distinct_page_access = 0;
  208. trx->distinct_page_access_hash = NULL;
  209. trx->take_stats = FALSE;
  210. trx->xid.formatID = -1;
  211. trx->op_info = "";
  212. trx->api_trx = false;
  213. trx->api_auto_commit = false;
  214. trx->read_write = true;
  215. heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 8);
  216. heap_alloc = ib_heap_allocator_create(heap);
  217. /* Remember to free the vector explicitly in trx_free(). */
  218. trx->autoinc_locks = ib_vector_create(heap_alloc, sizeof(void**), 4);
  219. /* Remember to free the vector explicitly in trx_free(). */
  220. heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 128);
  221. heap_alloc = ib_heap_allocator_create(heap);
  222. trx->lock.table_locks = ib_vector_create(
  223. heap_alloc, sizeof(void**), 32);
  224. #ifdef WITH_WSREP
  225. trx->wsrep_event = NULL;
  226. #endif /* WITH_WSREP */
  227. return(trx);
  228. }
  229. /********************************************************************//**
  230. Creates a transaction object for background operations by the master thread.
  231. @return own: transaction object */
  232. UNIV_INTERN
  233. trx_t*
  234. trx_allocate_for_background(void)
  235. /*=============================*/
  236. {
  237. trx_t* trx;
  238. trx = trx_create();
  239. trx->sess = trx_dummy_sess;
  240. return(trx);
  241. }
  242. /********************************************************************//**
  243. Creates a transaction object for MySQL.
  244. @return own: transaction object */
  245. UNIV_INTERN
  246. trx_t*
  247. trx_allocate_for_mysql(void)
  248. /*========================*/
  249. {
  250. trx_t* trx;
  251. trx = trx_allocate_for_background();
  252. mutex_enter(&trx_sys->mutex);
  253. ut_d(trx->in_mysql_trx_list = TRUE);
  254. UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
  255. mutex_exit(&trx_sys->mutex);
  256. if (UNIV_UNLIKELY(trx->take_stats)) {
  257. trx->distinct_page_access_hash
  258. = static_cast<byte *>(mem_alloc(DPAH_SIZE));
  259. memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
  260. }
  261. return(trx);
  262. }
  263. /********************************************************************//**
  264. Frees a transaction object without releasing the corresponding descriptor.
  265. Should be used by callers that already own trx_sys->mutex. */
  266. static
  267. void
  268. trx_free_low(
  269. /*=========*/
  270. trx_t* trx) /*!< in, own: trx object */
  271. {
  272. ut_a(trx->magic_n == TRX_MAGIC_N);
  273. ut_ad(!trx->in_ro_trx_list);
  274. ut_ad(!trx->in_rw_trx_list);
  275. ut_ad(!trx->in_mysql_trx_list);
  276. mutex_free(&trx->undo_mutex);
  277. if (trx->undo_no_arr != NULL) {
  278. trx_undo_arr_free(trx->undo_no_arr);
  279. }
  280. ut_a(trx->lock.wait_lock == NULL);
  281. ut_a(trx->lock.wait_thr == NULL);
  282. ut_a(!trx->has_search_latch);
  283. #ifdef UNIV_SYNC_DEBUG
  284. ut_ad(!btr_search_own_any());
  285. #endif
  286. ut_a(trx->dict_operation_lock_mode == 0);
  287. if (trx->lock.lock_heap) {
  288. mem_heap_free(trx->lock.lock_heap);
  289. }
  290. ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
  291. ut_a(ib_vector_is_empty(trx->autoinc_locks));
  292. /* We allocated a dedicated heap for the vector. */
  293. ib_vector_free(trx->autoinc_locks);
  294. if (trx->lock.table_locks != NULL) {
  295. /* We allocated a dedicated heap for the vector. */
  296. ib_vector_free(trx->lock.table_locks);
  297. }
  298. mutex_free(&trx->mutex);
  299. read_view_free(trx->prebuilt_view);
  300. mem_free(trx);
  301. }
  302. /********************************************************************//**
  303. Frees a transaction object. */
  304. static
  305. void
  306. trx_free(
  307. /*=========*/
  308. trx_t* trx) /*!< in, own: trx object */
  309. {
  310. mutex_enter(&trx_sys->mutex);
  311. trx_release_descriptor(trx);
  312. mutex_exit(&trx_sys->mutex);
  313. trx_free_low(trx);
  314. }
  315. /********************************************************************//**
  316. Frees a transaction object of a background operation of the master thread. */
  317. UNIV_INTERN
  318. void
  319. trx_free_for_background(
  320. /*====================*/
  321. trx_t* trx) /*!< in, own: trx object */
  322. {
  323. if (trx->distinct_page_access_hash)
  324. {
  325. mem_free(trx->distinct_page_access_hash);
  326. trx->distinct_page_access_hash= NULL;
  327. }
  328. if (trx->declared_to_be_inside_innodb) {
  329. ib_logf(IB_LOG_LEVEL_ERROR,
  330. "Freeing a trx (%p, " TRX_ID_FMT ") which is declared "
  331. "to be processing inside InnoDB", trx, trx->id);
  332. trx_print(stderr, trx, 600);
  333. putc('\n', stderr);
  334. /* This is an error but not a fatal error. We must keep
  335. the counters like srv_conc_n_threads accurate. */
  336. srv_conc_force_exit_innodb(trx);
  337. }
  338. if (trx->n_mysql_tables_in_use != 0
  339. || trx->mysql_n_tables_locked != 0) {
  340. ib_logf(IB_LOG_LEVEL_ERROR,
  341. "MySQL is freeing a thd though "
  342. "trx->n_mysql_tables_in_use is %lu and "
  343. "trx->mysql_n_tables_locked is %lu.",
  344. (ulong) trx->n_mysql_tables_in_use,
  345. (ulong) trx->mysql_n_tables_locked);
  346. trx_print(stderr, trx, 600);
  347. ut_print_buf(stderr, trx, sizeof(trx_t));
  348. putc('\n', stderr);
  349. }
  350. ut_a(trx->state == TRX_STATE_NOT_STARTED);
  351. ut_a(trx->insert_undo == NULL);
  352. ut_a(trx->update_undo == NULL);
  353. ut_a(trx->read_view == NULL);
  354. trx_free(trx);
  355. }
  356. /********************************************************************//**
  357. At shutdown, frees a transaction object that is in the PREPARED state. */
  358. UNIV_INTERN
  359. void
  360. trx_free_prepared(
  361. /*==============*/
  362. trx_t* trx) /*!< in, own: trx object */
  363. {
  364. ut_ad(mutex_own(&trx_sys->mutex));
  365. ut_a(trx_state_eq(trx, TRX_STATE_PREPARED));
  366. ut_a(trx->magic_n == TRX_MAGIC_N);
  367. trx_undo_free_prepared(trx);
  368. assert_trx_in_rw_list(trx);
  369. ut_a(!trx->read_only);
  370. UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
  371. ut_d(trx->in_rw_trx_list = FALSE);
  372. trx_release_descriptor(trx);
  373. /* Undo trx_resurrect_table_locks(). */
  374. UT_LIST_INIT(trx->lock.trx_locks);
  375. trx_free_low(trx);
  376. ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->rw_trx_list));
  377. }
  378. /********************************************************************//**
  379. Frees a transaction object for MySQL. */
  380. UNIV_INTERN
  381. void
  382. trx_free_for_mysql(
  383. /*===============*/
  384. trx_t* trx) /*!< in, own: trx object */
  385. {
  386. if (trx->distinct_page_access_hash)
  387. {
  388. mem_free(trx->distinct_page_access_hash);
  389. trx->distinct_page_access_hash= NULL;
  390. }
  391. mutex_enter(&trx_sys->mutex);
  392. ut_ad(trx->in_mysql_trx_list);
  393. ut_d(trx->in_mysql_trx_list = FALSE);
  394. UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
  395. ut_ad(trx_sys_validate_trx_list());
  396. mutex_exit(&trx_sys->mutex);
  397. trx_free_for_background(trx);
  398. }
  399. /****************************************************************//**
  400. Inserts the trx handle in the trx system trx list in the right position.
  401. The list is sorted on the trx id so that the biggest id is at the list
  402. start. This function is used at the database startup to insert incomplete
  403. transactions to the list. */
  404. static
  405. void
  406. trx_list_rw_insert_ordered(
  407. /*=======================*/
  408. trx_t* trx) /*!< in: trx handle */
  409. {
  410. trx_t* trx2;
  411. ut_ad(!trx->read_only);
  412. ut_d(trx->start_file = __FILE__);
  413. ut_d(trx->start_line = __LINE__);
  414. ut_a(srv_is_being_started);
  415. ut_ad(!trx->in_ro_trx_list);
  416. ut_ad(!trx->in_rw_trx_list);
  417. ut_ad(trx->state != TRX_STATE_NOT_STARTED);
  418. ut_ad(trx->is_recovered);
  419. for (trx2 = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
  420. trx2 != NULL;
  421. trx2 = UT_LIST_GET_NEXT(trx_list, trx2)) {
  422. assert_trx_in_rw_list(trx2);
  423. if (trx->id >= trx2->id) {
  424. ut_ad(trx->id > trx2->id);
  425. break;
  426. }
  427. }
  428. if (trx2 != NULL) {
  429. trx2 = UT_LIST_GET_PREV(trx_list, trx2);
  430. if (trx2 == NULL) {
  431. UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
  432. } else {
  433. UT_LIST_INSERT_AFTER(
  434. trx_list, trx_sys->rw_trx_list, trx2, trx);
  435. }
  436. } else {
  437. UT_LIST_ADD_LAST(trx_list, trx_sys->rw_trx_list, trx);
  438. }
  439. #ifdef UNIV_DEBUG
  440. if (trx->id > trx_sys->rw_max_trx_id) {
  441. trx_sys->rw_max_trx_id = trx->id;
  442. }
  443. #endif /* UNIV_DEBUG */
  444. ut_ad(!trx->in_rw_trx_list);
  445. ut_d(trx->in_rw_trx_list = TRUE);
  446. }
  447. /****************************************************************//**
  448. Resurrect the table locks for a resurrected transaction. */
  449. static
  450. void
  451. trx_resurrect_table_locks(
  452. /*======================*/
  453. trx_t* trx, /*!< in/out: transaction */
  454. const trx_undo_t* undo) /*!< in: undo log */
  455. {
  456. mtr_t mtr;
  457. page_t* undo_page;
  458. trx_undo_rec_t* undo_rec;
  459. table_id_set tables;
  460. ut_ad(undo == trx->insert_undo || undo == trx->update_undo);
  461. if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
  462. || undo->empty) {
  463. return;
  464. }
  465. mtr_start(&mtr);
  466. /* trx_rseg_mem_create() may have acquired an X-latch on this
  467. page, so we cannot acquire an S-latch. */
  468. undo_page = trx_undo_page_get(
  469. undo->space, undo->zip_size, undo->top_page_no, &mtr);
  470. undo_rec = undo_page + undo->top_offset;
  471. do {
  472. ulint type;
  473. ulint cmpl_info;
  474. bool updated_extern;
  475. undo_no_t undo_no;
  476. table_id_t table_id;
  477. page_t* undo_rec_page = page_align(undo_rec);
  478. if (undo_rec_page != undo_page) {
  479. if (!mtr_memo_release(&mtr,
  480. buf_block_align(undo_page),
  481. MTR_MEMO_PAGE_X_FIX)) {
  482. /* The page of the previous undo_rec
  483. should have been latched by
  484. trx_undo_page_get() or
  485. trx_undo_get_prev_rec(). */
  486. ut_ad(0);
  487. }
  488. undo_page = undo_rec_page;
  489. }
  490. trx_undo_rec_get_pars(
  491. undo_rec, &type, &cmpl_info,
  492. &updated_extern, &undo_no, &table_id);
  493. tables.insert(table_id);
  494. undo_rec = trx_undo_get_prev_rec(
  495. undo_rec, undo->hdr_page_no,
  496. undo->hdr_offset, false, &mtr);
  497. } while (undo_rec);
  498. mtr_commit(&mtr);
  499. for (table_id_set::const_iterator i = tables.begin();
  500. i != tables.end(); i++) {
  501. if (dict_table_t* table = dict_table_open_on_id(
  502. *i, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) {
  503. if (table->ibd_file_missing
  504. || dict_table_is_temporary(table)) {
  505. mutex_enter(&dict_sys->mutex);
  506. dict_table_close(table, TRUE, FALSE);
  507. dict_table_remove_from_cache(table);
  508. mutex_exit(&dict_sys->mutex);
  509. continue;
  510. }
  511. lock_table_ix_resurrect(table, trx);
  512. DBUG_PRINT("ib_trx",
  513. ("resurrect" TRX_ID_FMT
  514. " table '%s' IX lock from %s undo",
  515. trx->id, table->name,
  516. undo == trx->insert_undo
  517. ? "insert" : "update"));
  518. dict_table_close(table, FALSE, FALSE);
  519. }
  520. }
  521. }
  522. /****************************************************************//**
  523. Resurrect the transactions that were doing inserts the time of the
  524. crash, they need to be undone.
  525. @return trx_t instance */
  526. static
  527. trx_t*
  528. trx_resurrect_insert(
  529. /*=================*/
  530. trx_undo_t* undo, /*!< in: entry to UNDO */
  531. trx_rseg_t* rseg) /*!< in: rollback segment */
  532. {
  533. trx_t* trx;
  534. trx = trx_allocate_for_background();
  535. trx->rseg = rseg;
  536. trx->xid = undo->xid;
  537. trx->id = undo->trx_id;
  538. trx->insert_undo = undo;
  539. trx->is_recovered = TRUE;
  540. /* This is single-threaded startup code, we do not need the
  541. protection of trx->mutex or trx_sys->mutex here. */
  542. if (undo->state != TRX_UNDO_ACTIVE) {
  543. /* Prepared transactions are left in the prepared state
  544. waiting for a commit or abort decision from MySQL */
  545. if (undo->state == TRX_UNDO_PREPARED) {
  546. fprintf(stderr,
  547. "InnoDB: Transaction " TRX_ID_FMT " was in the"
  548. " XA prepared state.\n", trx->id);
  549. if (srv_force_recovery == 0) {
  550. trx->state = TRX_STATE_PREPARED;
  551. trx_sys->n_prepared_trx++;
  552. trx_sys->n_prepared_recovered_trx++;
  553. } else {
  554. fprintf(stderr,
  555. "InnoDB: Since innodb_force_recovery"
  556. " > 0, we will rollback it anyway.\n");
  557. trx->state = TRX_STATE_ACTIVE;
  558. }
  559. } else {
  560. trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
  561. }
  562. /* We give a dummy value for the trx no; this should have no
  563. relevance since purge is not interested in committed
  564. transaction numbers, unless they are in the history
  565. list, in which case it looks the number from the disk based
  566. undo log structure */
  567. trx->no = trx->id;
  568. } else {
  569. trx->state = TRX_STATE_ACTIVE;
  570. /* A running transaction always has the number
  571. field inited to TRX_ID_MAX */
  572. trx->no = TRX_ID_MAX;
  573. }
  574. /* trx_start_low() is not called with resurrect, so need to initialize
  575. start time here.*/
  576. if (trx->state == TRX_STATE_ACTIVE
  577. || trx->state == TRX_STATE_PREPARED) {
  578. trx->start_time = ut_time();
  579. }
  580. if (undo->dict_operation) {
  581. trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
  582. trx->table_id = undo->table_id;
  583. }
  584. if (!undo->empty) {
  585. trx->undo_no = undo->top_undo_no + 1;
  586. }
  587. return(trx);
  588. }
  589. /****************************************************************//**
  590. Prepared transactions are left in the prepared state waiting for a
  591. commit or abort decision from MySQL */
  592. static
  593. void
  594. trx_resurrect_update_in_prepared_state(
  595. /*===================================*/
  596. trx_t* trx, /*!< in,out: transaction */
  597. const trx_undo_t* undo) /*!< in: update UNDO record */
  598. {
  599. /* This is single-threaded startup code, we do not need the
  600. protection of trx->mutex or trx_sys->mutex here. */
  601. if (undo->state == TRX_UNDO_PREPARED) {
  602. fprintf(stderr,
  603. "InnoDB: Transaction " TRX_ID_FMT
  604. " was in the XA prepared state.\n", trx->id);
  605. if (srv_force_recovery == 0) {
  606. if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
  607. trx_sys->n_prepared_trx++;
  608. trx_sys->n_prepared_recovered_trx++;
  609. } else {
  610. ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
  611. }
  612. trx->state = TRX_STATE_PREPARED;
  613. } else {
  614. fprintf(stderr,
  615. "InnoDB: Since innodb_force_recovery"
  616. " > 0, we will rollback it anyway.\n");
  617. trx->state = TRX_STATE_ACTIVE;
  618. }
  619. } else {
  620. trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
  621. }
  622. }
  623. /****************************************************************//**
  624. Resurrect the transactions that were doing updates the time of the
  625. crash, they need to be undone. */
  626. static
  627. void
  628. trx_resurrect_update(
  629. /*=================*/
  630. trx_t* trx, /*!< in/out: transaction */
  631. trx_undo_t* undo, /*!< in/out: update UNDO record */
  632. trx_rseg_t* rseg) /*!< in/out: rollback segment */
  633. {
  634. trx->rseg = rseg;
  635. trx->xid = undo->xid;
  636. trx->id = undo->trx_id;
  637. trx->update_undo = undo;
  638. trx->is_recovered = TRUE;
  639. /* This is single-threaded startup code, we do not need the
  640. protection of trx->mutex or trx_sys->mutex here. */
  641. if (undo->state != TRX_UNDO_ACTIVE) {
  642. trx_resurrect_update_in_prepared_state(trx, undo);
  643. /* We give a dummy value for the trx number */
  644. trx->no = trx->id;
  645. } else {
  646. trx->state = TRX_STATE_ACTIVE;
  647. /* A running transaction always has the number field inited to
  648. TRX_ID_MAX */
  649. trx->no = TRX_ID_MAX;
  650. }
  651. /* trx_start_low() is not called with resurrect, so need to initialize
  652. start time here.*/
  653. if (trx->state == TRX_STATE_ACTIVE
  654. || trx->state == TRX_STATE_PREPARED) {
  655. trx->start_time = ut_time();
  656. }
  657. if (undo->dict_operation) {
  658. trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
  659. trx->table_id = undo->table_id;
  660. }
  661. if (!undo->empty && undo->top_undo_no >= trx->undo_no) {
  662. trx->undo_no = undo->top_undo_no + 1;
  663. }
  664. }
  665. /****************************************************************//**
  666. Creates trx objects for transactions and initializes the trx list of
  667. trx_sys at database start. Rollback segment and undo log lists must
  668. already exist when this function is called, because the lists of
  669. transactions to be rolled back or cleaned up are built based on the
  670. undo log lists. */
  671. UNIV_INTERN
  672. void
  673. trx_lists_init_at_db_start(void)
  674. /*============================*/
  675. {
  676. ulint i;
  677. ut_a(srv_is_being_started);
  678. UT_LIST_INIT(trx_sys->ro_trx_list);
  679. UT_LIST_INIT(trx_sys->rw_trx_list);
  680. UT_LIST_INIT(trx_sys->trx_serial_list);
  681. /* Look from the rollback segments if there exist undo logs for
  682. transactions */
  683. for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
  684. trx_undo_t* undo;
  685. trx_rseg_t* rseg;
  686. rseg = trx_sys->rseg_array[i];
  687. if (rseg == NULL) {
  688. continue;
  689. }
  690. /* Resurrect transactions that were doing inserts. */
  691. for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
  692. undo != NULL;
  693. undo = UT_LIST_GET_NEXT(undo_list, undo)) {
  694. trx_t* trx;
  695. trx = trx_resurrect_insert(undo, rseg);
  696. if (trx->state == TRX_STATE_ACTIVE ||
  697. trx->state == TRX_STATE_PREPARED) {
  698. trx_reserve_descriptor(trx);
  699. }
  700. trx_list_rw_insert_ordered(trx);
  701. trx_resurrect_table_locks(trx, undo);
  702. }
  703. /* Ressurrect transactions that were doing updates. */
  704. for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
  705. undo != NULL;
  706. undo = UT_LIST_GET_NEXT(undo_list, undo)) {
  707. trx_t* trx;
  708. ibool trx_created;
  709. /* Check the trx_sys->rw_trx_list first. */
  710. mutex_enter(&trx_sys->mutex);
  711. trx = trx_get_rw_trx_by_id(undo->trx_id);
  712. mutex_exit(&trx_sys->mutex);
  713. if (trx == NULL) {
  714. trx = trx_allocate_for_background();
  715. trx_created = TRUE;
  716. } else {
  717. trx_created = FALSE;
  718. }
  719. trx_resurrect_update(trx, undo, rseg);
  720. if (trx_created) {
  721. if (trx->state == TRX_STATE_ACTIVE ||
  722. trx->state == TRX_STATE_PREPARED) {
  723. trx_reserve_descriptor(trx);
  724. }
  725. trx_list_rw_insert_ordered(trx);
  726. }
  727. trx_resurrect_table_locks(trx, undo);
  728. }
  729. }
  730. }
  731. /******************************************************************//**
  732. Assigns a rollback segment to a transaction in a round-robin fashion.
  733. @return assigned rollback segment instance */
  734. static
  735. trx_rseg_t*
  736. trx_assign_rseg_low(
  737. /*================*/
  738. ulong max_undo_logs, /*!< in: maximum number of UNDO logs to use */
  739. ulint n_tablespaces) /*!< in: number of rollback tablespaces */
  740. {
  741. ulint i;
  742. trx_rseg_t* rseg;
  743. static ulint latest_rseg = 0;
  744. if (srv_read_only_mode) {
  745. ut_a(max_undo_logs == ULONG_UNDEFINED);
  746. return(NULL);
  747. }
  748. /* This breaks true round robin but that should be OK. */
  749. ut_a(max_undo_logs > 0 && max_undo_logs <= TRX_SYS_N_RSEGS);
  750. i = latest_rseg++;
  751. i %= max_undo_logs;
  752. /* Note: The assumption here is that there can't be any gaps in
  753. the array. Once we implement more flexible rollback segment
  754. management this may not hold. The assertion checks for that case. */
  755. if (trx_sys->rseg_array[0] == NULL) {
  756. return(NULL);
  757. }
  758. /* Skip the system tablespace if we have more than one tablespace
  759. defined for rollback segments. We want all UNDO records to be in
  760. the non-system tablespaces. */
  761. do {
  762. rseg = trx_sys->rseg_array[i];
  763. ut_a(rseg == NULL || i == rseg->id);
  764. i = (rseg == NULL) ? 0 : i + 1;
  765. } while (rseg == NULL
  766. || (rseg->space == 0
  767. && n_tablespaces > 0
  768. && trx_sys->rseg_array[1] != NULL));
  769. return(rseg);
  770. }
  771. /****************************************************************//**
  772. Assign a read-only transaction a rollback-segment, if it is attempting
  773. to write to a TEMPORARY table. */
  774. UNIV_INTERN
  775. void
  776. trx_assign_rseg(
  777. /*============*/
  778. trx_t* trx) /*!< A read-only transaction that
  779. needs to be assigned a RBS. */
  780. {
  781. ut_a(trx->rseg == 0);
  782. ut_a(trx->read_only);
  783. ut_a(!srv_read_only_mode);
  784. ut_a(!trx_is_autocommit_non_locking(trx));
  785. trx->rseg = trx_assign_rseg_low(srv_undo_logs, srv_undo_tablespaces);
  786. }
  787. /****************************************************************//**
  788. Starts a transaction. */
  789. static
  790. void
  791. trx_start_low(
  792. /*==========*/
  793. trx_t* trx) /*!< in: transaction */
  794. {
  795. ut_ad(trx->rseg == NULL);
  796. ut_ad(trx->start_file != 0);
  797. ut_ad(trx->start_line != 0);
  798. ut_ad(!trx->is_recovered);
  799. ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
  800. ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
  801. /* Check whether it is an AUTOCOMMIT SELECT */
  802. trx->auto_commit = (trx->api_trx && trx->api_auto_commit)
  803. || thd_trx_is_auto_commit(trx->mysql_thd);
  804. trx->read_only =
  805. (trx->api_trx && !trx->read_write)
  806. || (!trx->ddl && thd_trx_is_read_only(trx->mysql_thd))
  807. || srv_read_only_mode;
  808. if (!trx->auto_commit) {
  809. ++trx->will_lock;
  810. } else if (trx->will_lock == 0) {
  811. trx->read_only = TRUE;
  812. }
  813. if (!trx->read_only) {
  814. trx->rseg = trx_assign_rseg_low(
  815. srv_undo_logs, srv_undo_tablespaces);
  816. }
  817. #ifdef WITH_WSREP
  818. memset(&trx->xid, 0, sizeof(trx->xid));
  819. trx->xid.formatID = -1;
  820. #endif /* WITH_WSREP */
  821. /* The initial value for trx->no: TRX_ID_MAX is used in
  822. read_view_open_now: */
  823. trx->no = TRX_ID_MAX;
  824. ut_a(ib_vector_is_empty(trx->autoinc_locks));
  825. ut_a(ib_vector_is_empty(trx->lock.table_locks));
  826. mutex_enter(&trx_sys->mutex);
  827. /* If this transaction came from trx_allocate_for_mysql(),
  828. trx->in_mysql_trx_list would hold. In that case, the trx->state
  829. change must be protected by the trx_sys->mutex, so that
  830. lock_print_info_all_transactions() will have a consistent view. */
  831. trx->state = TRX_STATE_ACTIVE;
  832. trx->id = trx_sys_get_new_trx_id();
  833. ut_ad(!trx->in_rw_trx_list);
  834. ut_ad(!trx->in_ro_trx_list);
  835. if (trx->read_only) {
  836. /* Note: The trx_sys_t::ro_trx_list doesn't really need to
  837. be ordered, we should exploit this using a list type that
  838. doesn't need a list wide lock to increase concurrency. */
  839. if (!trx_is_autocommit_non_locking(trx)) {
  840. UT_LIST_ADD_FIRST(trx_list, trx_sys->ro_trx_list, trx);
  841. ut_d(trx->in_ro_trx_list = TRUE);
  842. }
  843. } else {
  844. ut_ad(trx->rseg != NULL
  845. || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
  846. ut_ad(!trx_is_autocommit_non_locking(trx));
  847. UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
  848. ut_d(trx->in_rw_trx_list = TRUE);
  849. #ifdef UNIV_DEBUG
  850. if (trx->id > trx_sys->rw_max_trx_id) {
  851. trx_sys->rw_max_trx_id = trx->id;
  852. }
  853. #endif /* UNIV_DEBUG */
  854. trx_reserve_descriptor(trx);
  855. }
  856. ut_ad(trx_sys_validate_trx_list());
  857. mutex_exit(&trx_sys->mutex);
  858. trx->start_time = ut_time();
  859. MONITOR_INC(MONITOR_TRX_ACTIVE);
  860. }
  861. /****************************************************************//**
  862. Set the transaction serialisation number. */
  863. static
  864. void
  865. trx_serialisation_number_get(
  866. /*=========================*/
  867. trx_t* trx) /*!< in: transaction */
  868. {
  869. trx_rseg_t* rseg;
  870. rseg = trx->rseg;
  871. ut_ad(mutex_own(&rseg->mutex));
  872. mutex_enter(&trx_sys->mutex);
  873. trx->no = trx_sys_get_new_trx_id();
  874. if (UNIV_LIKELY(!trx->in_trx_serial_list)) {
  875. UT_LIST_ADD_LAST(trx_serial_list, trx_sys->trx_serial_list,
  876. trx);
  877. trx->in_trx_serial_list = true;
  878. }
  879. /* If the rollack segment is not empty then the
  880. new trx_t::no can't be less than any trx_t::no
  881. already in the rollback segment. User threads only
  882. produce events when a rollback segment is empty. */
  883. if (rseg->last_page_no == FIL_NULL) {
  884. void* ptr;
  885. rseg_queue_t rseg_queue;
  886. rseg_queue.rseg = rseg;
  887. rseg_queue.trx_no = trx->no;
  888. mutex_enter(&purge_sys->bh_mutex);
  889. /* This is to reduce the pressure on the trx_sys_t::mutex
  890. though in reality it should make very little (read no)
  891. difference because this code path is only taken when the
  892. rbs is empty. */
  893. mutex_exit(&trx_sys->mutex);
  894. ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
  895. ut_a(ptr);
  896. mutex_exit(&purge_sys->bh_mutex);
  897. } else {
  898. mutex_exit(&trx_sys->mutex);
  899. }
  900. }
  901. /****************************************************************//**
  902. Assign the transaction its history serialisation number and write the
  903. update UNDO log record to the assigned rollback segment. */
  904. static __attribute__((nonnull))
  905. void
  906. trx_write_serialisation_history(
  907. /*============================*/
  908. trx_t* trx, /*!< in/out: transaction */
  909. mtr_t* mtr) /*!< in/out: mini-transaction */
  910. {
  911. #ifdef WITH_WSREP
  912. trx_sysf_t* sys_header;
  913. #endif /* WITH_WSREP */
  914. trx_rseg_t* rseg;
  915. rseg = trx->rseg;
  916. /* Change the undo log segment states from TRX_UNDO_ACTIVE
  917. to some other state: these modifications to the file data
  918. structure define the transaction as committed in the file
  919. based domain, at the serialization point of the log sequence
  920. number lsn obtained below. */
  921. if (trx->update_undo != NULL) {
  922. page_t* undo_hdr_page;
  923. trx_undo_t* undo = trx->update_undo;
  924. /* We have to hold the rseg mutex because update
  925. log headers have to be put to the history list in the
  926. (serialisation) order of the UNDO trx number. This is
  927. required for the purge in-memory data structures too. */
  928. mutex_enter(&rseg->mutex);
  929. /* Assign the transaction serialisation number and also
  930. update the purge min binary heap if this is the first
  931. UNDO log being written to the assigned rollback segment. */
  932. trx_serialisation_number_get(trx);
  933. /* It is not necessary to obtain trx->undo_mutex here
  934. because only a single OS thread is allowed to do the
  935. transaction commit for this transaction. */
  936. undo_hdr_page = trx_undo_set_state_at_finish(undo, mtr);
  937. trx_undo_update_cleanup(trx, undo_hdr_page, mtr);
  938. } else {
  939. mutex_enter(&rseg->mutex);
  940. }
  941. if (trx->insert_undo != NULL) {
  942. trx_undo_set_state_at_finish(trx->insert_undo, mtr);
  943. }
  944. mutex_exit(&rseg->mutex);
  945. MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
  946. #ifdef WITH_WSREP
  947. sys_header = trx_sysf_get(mtr);
  948. /* Update latest MySQL wsrep XID in trx sys header. */
  949. if (wsrep_is_wsrep_xid(&trx->xid))
  950. {
  951. trx_sys_update_wsrep_checkpoint(&trx->xid, sys_header, mtr);
  952. }
  953. #endif /* WITH_WSREP */
  954. /* Update the latest MySQL binlog name and offset info
  955. in trx sys header if MySQL binlogging is on or the database
  956. server is a MySQL replication slave */
  957. if (trx->mysql_log_file_name
  958. && trx->mysql_log_file_name[0] != '\0') {
  959. trx_sys_update_mysql_binlog_offset(
  960. trx->mysql_log_file_name,
  961. trx->mysql_log_offset,
  962. TRX_SYS_MYSQL_LOG_INFO,
  963. #ifdef WITH_WSREP
  964. sys_header,
  965. #endif /* WITH_WSREP */
  966. mtr);
  967. trx->mysql_log_file_name = NULL;
  968. }
  969. }
  970. /********************************************************************
  971. Finalize a transaction containing updates for a FTS table. */
  972. static __attribute__((nonnull))
  973. void
  974. trx_finalize_for_fts_table(
  975. /*=======================*/
  976. fts_trx_table_t* ftt) /* in: FTS trx table */
  977. {
  978. fts_t* fts = ftt->table->fts;
  979. fts_doc_ids_t* doc_ids = ftt->added_doc_ids;
  980. mutex_enter(&fts->bg_threads_mutex);
  981. if (fts->fts_status & BG_THREAD_STOP) {
  982. /* The table is about to be dropped, no use
  983. adding anything to its work queue. */
  984. mutex_exit(&fts->bg_threads_mutex);
  985. } else {
  986. mem_heap_t* heap;
  987. mutex_exit(&fts->bg_threads_mutex);
  988. ut_a(fts->add_wq);
  989. heap = static_cast<mem_heap_t*>(doc_ids->self_heap->arg);
  990. ib_wqueue_add(fts->add_wq, doc_ids, heap);
  991. /* fts_trx_table_t no longer owns the list. */
  992. ftt->added_doc_ids = NULL;
  993. }
  994. }
  995. /******************************************************************//**
  996. Finalize a transaction containing updates to FTS tables. */
  997. static __attribute__((nonnull))
  998. void
  999. trx_finalize_for_fts(
  1000. /*=================*/
  1001. trx_t* trx, /*!< in/out: transaction */
  1002. bool is_commit) /*!< in: true if the transaction was
  1003. committed, false if it was rolled back. */
  1004. {
  1005. if (is_commit) {
  1006. const ib_rbt_node_t* node;
  1007. ib_rbt_t* tables;
  1008. fts_savepoint_t* savepoint;
  1009. savepoint = static_cast<fts_savepoint_t*>(
  1010. ib_vector_last(trx->fts_trx->savepoints));
  1011. tables = savepoint->tables;
  1012. for (node = rbt_first(tables);
  1013. node;
  1014. node = rbt_next(tables, node)) {
  1015. fts_trx_table_t** ftt;
  1016. ftt = rbt_value(fts_trx_table_t*, node);
  1017. if ((*ftt)->added_doc_ids) {
  1018. trx_finalize_for_fts_table(*ftt);
  1019. }
  1020. }
  1021. }
  1022. fts_trx_free(trx->fts_trx);
  1023. trx->fts_trx = NULL;
  1024. }
  1025. /**********************************************************************//**
  1026. If required, flushes the log to disk based on the value of
  1027. innodb_flush_log_at_trx_commit. */
  1028. static
  1029. void
  1030. trx_flush_log_if_needed_low(
  1031. /*========================*/
  1032. lsn_t lsn, /*!< in: lsn up to which logs are to be
  1033. flushed. */
  1034. trx_t* trx) /*!< in: transaction */
  1035. {
  1036. ulint flush_log_at_trx_commit;
  1037. flush_log_at_trx_commit = srv_use_global_flush_log_at_trx_commit
  1038. ? thd_flush_log_at_trx_commit(NULL)
  1039. : thd_flush_log_at_trx_commit(trx->mysql_thd);
  1040. switch (flush_log_at_trx_commit) {
  1041. case 0:
  1042. /* Do nothing */
  1043. break;
  1044. case 1:
  1045. case 3:
  1046. /* Write the log and optionally flush it to disk */
  1047. log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
  1048. srv_unix_file_flush_method != SRV_UNIX_NOSYNC);
  1049. break;
  1050. case 2:
  1051. /* Write the log but do not flush it to disk */
  1052. log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
  1053. break;
  1054. default:
  1055. ut_error;
  1056. }
  1057. }
  1058. /**********************************************************************//**
  1059. If required, flushes the log to disk based on the value of
  1060. innodb_flush_log_at_trx_commit. */
  1061. static __attribute__((nonnull))
  1062. void
  1063. trx_flush_log_if_needed(
  1064. /*====================*/
  1065. lsn_t lsn, /*!< in: lsn up to which logs are to be
  1066. flushed. */
  1067. trx_t* trx) /*!< in/out: transaction */
  1068. {
  1069. trx->op_info = "flushing log";
  1070. trx_flush_log_if_needed_low(lsn, trx);
  1071. trx->op_info = "";
  1072. }
  1073. /****************************************************************//**
  1074. Commits a transaction in memory. */
  1075. static __attribute__((nonnull))
  1076. void
  1077. trx_commit_in_memory(
  1078. /*=================*/
  1079. trx_t* trx, /*!< in/out: transaction */
  1080. lsn_t lsn) /*!< in: log sequence number of the mini-transaction
  1081. commit of trx_write_serialisation_history(), or 0
  1082. if the transaction did not modify anything */
  1083. {
  1084. trx->must_flush_log_later = FALSE;
  1085. if (trx_is_autocommit_non_locking(trx)) {
  1086. ut_ad(trx->read_only);
  1087. ut_a(!trx->is_recovered);
  1088. ut_ad(trx->rseg == NULL);
  1089. ut_ad(!trx->in_ro_trx_list);
  1090. ut_ad(!trx->in_rw_trx_list);
  1091. /* Note: We are asserting without holding the lock mutex. But
  1092. that is OK because this transaction is not waiting and cannot
  1093. be rolled back and no new locks can (or should not) be added
  1094. becuase it is flagged as a non-locking read-only transaction. */
  1095. ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
  1096. /* This state change is not protected by any mutex, therefore
  1097. there is an inherent race here around state transition during
  1098. printouts. We ignore this race for the sake of efficiency.
  1099. However, the trx_sys_t::mutex will protect the trx_t instance
  1100. and it cannot be removed from the mysql_trx_list and freed
  1101. without first acquiring the trx_sys_t::mutex. */
  1102. ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
  1103. trx->state = TRX_STATE_NOT_STARTED;
  1104. read_view_remove(trx->global_read_view, false);
  1105. MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
  1106. } else {
  1107. lock_trx_release_locks(trx);
  1108. /* Remove the transaction from the list of active
  1109. transactions now that it no longer holds any user locks. */
  1110. ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
  1111. mutex_enter(&trx_sys->mutex);
  1112. assert_trx_in_list(trx);
  1113. if (trx->read_only) {
  1114. UT_LIST_REMOVE(trx_list, trx_sys->ro_trx_list, trx);
  1115. ut_d(trx->in_ro_trx_list = FALSE);
  1116. MONITOR_INC(MONITOR_TRX_RO_COMMIT);
  1117. } else {
  1118. UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
  1119. ut_d(trx->in_rw_trx_list = FALSE);
  1120. ut_ad(trx_sys->descr_n_used <=
  1121. UT_LIST_GET_LEN(trx_sys->rw_trx_list));
  1122. MONITOR_INC(MONITOR_TRX_RW_COMMIT);
  1123. }
  1124. /* If this transaction came from trx_allocate_for_mysql(),
  1125. trx->in_mysql_trx_list would hold. In that case, the
  1126. trx->state change must be protected by trx_sys->mutex, so that
  1127. lock_print_info_all_transactions() will have a consistent
  1128. view. */
  1129. trx->state = TRX_STATE_NOT_STARTED;
  1130. /* We already own the trx_sys_t::mutex, by doing it here we
  1131. avoid a potential context switch later. */
  1132. read_view_remove(trx->global_read_view, true);
  1133. ut_ad(trx_sys_validate_trx_list());
  1134. mutex_exit(&trx_sys->mutex);
  1135. }
  1136. if (trx->global_read_view != NULL) {
  1137. trx->global_read_view = NULL;
  1138. }
  1139. trx->read_view = NULL;
  1140. if (lsn) {
  1141. ulint flush_log_at_trx_commit;
  1142. if (trx->insert_undo != NULL) {
  1143. trx_undo_insert_cleanup(trx);
  1144. }
  1145. if (srv_use_global_flush_log_at_trx_commit) {
  1146. flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
  1147. } else {
  1148. flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
  1149. }
  1150. /* NOTE that we could possibly make a group commit more
  1151. efficient here: call os_thread_yield here to allow also other
  1152. trxs to come to commit! */
  1153. /*-------------------------------------*/
  1154. /* Depending on the my.cnf options, we may now write the log
  1155. buffer to the log files, making the transaction durable if
  1156. the OS does not crash. We may also flush the log files to
  1157. disk, making the transaction durable also at an OS crash or a
  1158. power outage.
  1159. The idea in InnoDB's group commit is that a group of
  1160. transactions gather behind a trx doing a physical disk write
  1161. to log files, and when that physical write has been completed,
  1162. one of those transactions does a write which commits the whole
  1163. group. Note that this group commit will only bring benefit if
  1164. there are > 2 users in the database. Then at least 2 users can
  1165. gather behind one doing the physical log write to disk.
  1166. If we are calling trx_commit() under prepare_commit_mutex, we
  1167. will delay possible log write and flush to a separate function
  1168. trx_commit_complete_for_mysql(), which is only called when the
  1169. thread has released the mutex. This is to make the
  1170. group commit algorithm to work. Otherwise, the prepare_commit
  1171. mutex would serialize all commits and prevent a group of
  1172. transactions from gathering. */
  1173. if (trx->flush_log_later) {
  1174. /* Do nothing yet */
  1175. trx->must_flush_log_later = TRUE;
  1176. } else if (flush_log_at_trx_commit == 0
  1177. || thd_requested_durability(trx->mysql_thd)
  1178. == HA_IGNORE_DURABILITY) {
  1179. /* Do nothing */
  1180. } else {
  1181. trx_flush_log_if_needed(lsn, trx);
  1182. }
  1183. trx->commit_lsn = lsn;
  1184. }
  1185. /* undo_no is non-zero if we're doing the final commit. */
  1186. bool not_rollback = trx->undo_no != 0;
  1187. /* Free all savepoints, starting from the first. */
  1188. trx_named_savept_t* savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
  1189. trx_roll_savepoints_free(trx, savep);
  1190. trx->rseg = NULL;
  1191. trx->undo_no = 0;
  1192. trx->last_sql_stat_start.least_undo_no = 0;
  1193. trx->ddl = false;
  1194. #ifdef UNIV_DEBUG
  1195. ut_ad(trx->start_file != 0);
  1196. ut_ad(trx->start_line != 0);
  1197. trx->start_file = 0;
  1198. trx->start_line = 0;
  1199. #endif /* UNIV_DEBUG */
  1200. trx->will_lock = 0;
  1201. trx->read_only = FALSE;
  1202. trx->auto_commit = FALSE;
  1203. if (trx->fts_trx) {
  1204. trx_finalize_for_fts(trx, not_rollback);
  1205. }
  1206. ut_ad(trx->lock.wait_thr == NULL);
  1207. ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
  1208. ut_ad(!trx->in_ro_trx_list);
  1209. ut_ad(!trx->in_rw_trx_list);
  1210. #ifdef WITH_WSREP
  1211. if (wsrep_on(trx->mysql_thd)) {
  1212. trx->lock.was_chosen_as_deadlock_victim = FALSE;
  1213. }
  1214. #endif
  1215. trx->dict_operation = TRX_DICT_OP_NONE;
  1216. trx->error_state = DB_SUCCESS;
  1217. /* trx->in_mysql_trx_list would hold between
  1218. trx_allocate_for_mysql() and trx_free_for_mysql(). It does not
  1219. hold for recovered transactions or system transactions. */
  1220. }
  1221. /****************************************************************//**
  1222. Commits a transaction and a mini-transaction. */
  1223. UNIV_INTERN
  1224. void
  1225. trx_commit_low(
  1226. /*===========*/
  1227. trx_t* trx, /*!< in/out: transaction */
  1228. mtr_t* mtr) /*!< in/out: mini-transaction (will be committed),
  1229. or NULL if trx made no modifications */
  1230. {
  1231. lsn_t lsn;
  1232. assert_trx_nonlocking_or_in_list(trx);
  1233. ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
  1234. ut_ad(!mtr || mtr->state == MTR_ACTIVE);
  1235. ut_ad(!mtr == !(trx->insert_undo || trx->update_undo));
  1236. /* undo_no is non-zero if we're doing the final commit. */
  1237. if (trx->fts_trx && trx->undo_no != 0) {
  1238. dberr_t error;
  1239. ut_a(!trx_is_autocommit_non_locking(trx));
  1240. error = fts_commit(trx);
  1241. /* FTS-FIXME: Temporarily tolerate DB_DUPLICATE_KEY
  1242. instead of dying. This is a possible scenario if there
  1243. is a crash between insert to DELETED table committing
  1244. and transaction committing. The fix would be able to
  1245. return error from this function */
  1246. if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) {
  1247. /* FTS-FIXME: once we can return values from this
  1248. function, we should do so and signal an error
  1249. instead of just dying. */
  1250. ut_error;
  1251. }
  1252. }
  1253. if (mtr) {
  1254. trx_write_serialisation_history(trx, mtr);
  1255. /* The following call commits the mini-transaction, making the
  1256. whole transaction committed in the file-based world, at this
  1257. log sequence number. The transaction becomes 'durable' when
  1258. we write the log to disk, but in the logical sense the commit
  1259. in the file-based data structures (undo logs etc.) happens
  1260. here.
  1261. NOTE that transaction numbers, which are assigned only to
  1262. transactions with an update undo log, do not necessarily come
  1263. in exactly the same order as commit lsn's, if the transactions
  1264. have different rollback segments. To get exactly the same
  1265. order we should hold the kernel mutex up to this point,
  1266. adding to the contention of the kernel mutex. However, if
  1267. a transaction T2 is able to see modifications made by
  1268. a transaction T1, T2 will always get a bigger transaction
  1269. number and a bigger commit lsn than T1. */
  1270. /*--------------*/
  1271. mtr_commit(mtr);
  1272. /*--------------*/
  1273. lsn = mtr->end_lsn;
  1274. } else {
  1275. lsn = 0;
  1276. }
  1277. trx_commit_in_memory(trx, lsn);
  1278. }
  1279. /****************************************************************//**
  1280. Commits a transaction. */
  1281. UNIV_INTERN
  1282. void
  1283. trx_commit(
  1284. /*=======*/
  1285. trx_t* trx) /*!< in/out: transaction */
  1286. {
  1287. mtr_t local_mtr;
  1288. mtr_t* mtr;
  1289. if (trx->insert_undo || trx->update_undo) {
  1290. mtr = &local_mtr;
  1291. mtr_start(mtr);
  1292. } else {
  1293. mtr = NULL;
  1294. }
  1295. trx_commit_low(trx, mtr);
  1296. }
  1297. /****************************************************************//**
  1298. Cleans up a transaction at database startup. The cleanup is needed if
  1299. the transaction already got to the middle of a commit when the database
  1300. crashed, and we cannot roll it back. */
  1301. UNIV_INTERN
  1302. void
  1303. trx_cleanup_at_db_startup(
  1304. /*======================*/
  1305. trx_t* trx) /*!< in: transaction */
  1306. {
  1307. ut_ad(trx->is_recovered);
  1308. if (trx->insert_undo != NULL) {
  1309. trx_undo_insert_cleanup(trx);
  1310. }
  1311. trx->rseg = NULL;
  1312. trx->undo_no = 0;
  1313. trx->last_sql_stat_start.least_undo_no = 0;
  1314. mutex_enter(&trx_sys->mutex);
  1315. ut_a(!trx->read_only);
  1316. UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
  1317. ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->rw_trx_list));
  1318. assert_trx_in_rw_list(trx);
  1319. ut_d(trx->in_rw_trx_list = FALSE);
  1320. trx->state = TRX_STATE_NOT_STARTED;
  1321. trx_release_descriptor(trx);
  1322. mutex_exit(&trx_sys->mutex);
  1323. /* Change the transaction state without mutex protection, now
  1324. that it no longer is in the trx_list. Recovered transactions
  1325. are never placed in the mysql_trx_list. */
  1326. ut_ad(trx->is_recovered);
  1327. ut_ad(!trx->in_ro_trx_list);
  1328. ut_ad(!trx->in_rw_trx_list);
  1329. ut_ad(!trx->in_mysql_trx_list);
  1330. }
  1331. /********************************************************************//**
  1332. Assigns a read view for a consistent read query. All the consistent reads
  1333. within the same transaction will get the same read view, which is created
  1334. when this function is first called for a new started transaction.
  1335. @return consistent read view */
  1336. UNIV_INTERN
  1337. read_view_t*
  1338. trx_assign_read_view(
  1339. /*=================*/
  1340. trx_t* trx) /*!< in: active transaction */
  1341. {
  1342. ut_ad(trx->state == TRX_STATE_ACTIVE);
  1343. if (trx->read_view != NULL) {
  1344. return(trx->read_view);
  1345. }
  1346. trx->read_view = read_view_open_now(trx->id, trx->prebuilt_view);
  1347. trx->global_read_view = trx->read_view;
  1348. return(trx->read_view);
  1349. }
  1350. /****************************************************************//**
  1351. Prepares a transaction for commit/rollback. */
  1352. UNIV_INTERN
  1353. void
  1354. trx_commit_or_rollback_prepare(
  1355. /*===========================*/
  1356. trx_t* trx) /*!< in/out: transaction */
  1357. {
  1358. /* We are reading trx->state without holding trx_sys->mutex
  1359. here, because the commit or rollback should be invoked for a
  1360. running (or recovered prepared) transaction that is associated
  1361. with the current thread. */
  1362. switch (trx->state) {
  1363. case TRX_STATE_NOT_STARTED:
  1364. #ifdef WITH_WSREP
  1365. ut_d(trx->start_file = __FILE__);
  1366. ut_d(trx->start_line = __LINE__);
  1367. #endif /* WITH_WSREP */
  1368. trx_start_low(trx);
  1369. /* fall through */
  1370. case TRX_STATE_ACTIVE:
  1371. case TRX_STATE_PREPARED:
  1372. /* If the trx is in a lock wait state, moves the waiting
  1373. query thread to the suspended state */
  1374. if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
  1375. ulint sec;
  1376. ulint ms;
  1377. ib_uint64_t now;
  1378. ut_a(trx->lock.wait_thr != NULL);
  1379. trx->lock.wait_thr->state = QUE_THR_SUSPENDED;
  1380. trx->lock.wait_thr = NULL;
  1381. if (UNIV_UNLIKELY(trx->take_stats)) {
  1382. ut_usectime(&sec, &ms);
  1383. now = (ib_uint64_t)sec * 1000000 + ms;
  1384. trx->lock_que_wait_timer
  1385. += (ulint)
  1386. (now - trx->lock_que_wait_ustarted);
  1387. }
  1388. trx->lock.que_state = TRX_QUE_RUNNING;
  1389. }
  1390. ut_a(trx->lock.n_active_thrs == 1);
  1391. return;
  1392. case TRX_STATE_COMMITTED_IN_MEMORY:
  1393. break;
  1394. }
  1395. ut_error;
  1396. }
  1397. /*********************************************************************//**
  1398. Creates a commit command node struct.
  1399. @return own: commit node struct */
  1400. UNIV_INTERN
  1401. commit_node_t*
  1402. trx_commit_node_create(
  1403. /*===================*/
  1404. mem_heap_t* heap) /*!< in: mem heap where created */
  1405. {
  1406. commit_node_t* node;
  1407. node = static_cast<commit_node_t*>(mem_heap_alloc(heap, sizeof(*node)));
  1408. node->common.type = QUE_NODE_COMMIT;
  1409. node->state = COMMIT_NODE_SEND;
  1410. return(node);
  1411. }
  1412. /***********************************************************//**
  1413. Performs an execution step for a commit type node in a query graph.
  1414. @return query thread to run next, or NULL */
  1415. UNIV_INTERN
  1416. que_thr_t*
  1417. trx_commit_step(
  1418. /*============*/
  1419. que_thr_t* thr) /*!< in: query thread */
  1420. {
  1421. commit_node_t* node;
  1422. node = static_cast<commit_node_t*>(thr->run_node);
  1423. ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
  1424. if (thr->prev_node == que_node_get_parent(node)) {
  1425. node->state = COMMIT_NODE_SEND;
  1426. }
  1427. if (node->state == COMMIT_NODE_SEND) {
  1428. trx_t* trx;
  1429. node->state = COMMIT_NODE_WAIT;
  1430. trx = thr_get_trx(thr);
  1431. ut_a(trx->lock.wait_thr == NULL);
  1432. ut_a(trx->lock.que_state != TRX_QUE_LOCK_WAIT);
  1433. trx_commit_or_rollback_prepare(trx);
  1434. trx->lock.que_state = TRX_QUE_COMMITTING;
  1435. trx_commit(trx);
  1436. ut_ad(trx->lock.wait_thr == NULL);
  1437. trx->lock.que_state = TRX_QUE_RUNNING;
  1438. thr = NULL;
  1439. } else {
  1440. ut_ad(node->state == COMMIT_NODE_WAIT);
  1441. node->state = COMMIT_NODE_SEND;
  1442. thr->run_node = que_node_get_parent(node);
  1443. }
  1444. return(thr);
  1445. }
  1446. /**********************************************************************//**
  1447. Does the transaction commit for MySQL.
  1448. @return DB_SUCCESS or error number */
  1449. UNIV_INTERN
  1450. dberr_t
  1451. trx_commit_for_mysql(
  1452. /*=================*/
  1453. trx_t* trx) /*!< in/out: transaction */
  1454. {
  1455. /* Because we do not do the commit by sending an Innobase
  1456. sig to the transaction, we must here make sure that trx has been
  1457. started. */
  1458. ut_a(trx);
  1459. switch (trx->state) {
  1460. case TRX_STATE_NOT_STARTED:
  1461. /* Update the info whether we should skip XA steps that eat
  1462. CPU time.
  1463. For the duration of the transaction trx->support_xa is
  1464. not reread from thd so any changes in the value take
  1465. effect in the next transaction. This is to avoid a
  1466. scenario where some undo log records generated by a
  1467. transaction contain XA information and other undo log
  1468. records, generated by the same transaction do not. */
  1469. trx->support_xa = thd_supports_xa(trx->mysql_thd);
  1470. ut_d(trx->start_file = __FILE__);
  1471. ut_d(trx->start_line = __LINE__);
  1472. trx_start_low(trx);
  1473. /* fall through */
  1474. case TRX_STATE_ACTIVE:
  1475. case TRX_STATE_PREPARED:
  1476. trx->op_info = "committing";
  1477. trx_commit(trx);
  1478. MONITOR_DEC(MONITOR_TRX_ACTIVE);
  1479. trx->op_info = "";
  1480. return(DB_SUCCESS);
  1481. case TRX_STATE_COMMITTED_IN_MEMORY:
  1482. break;
  1483. }
  1484. ut_error;
  1485. return(DB_CORRUPTION);
  1486. }
  1487. /**********************************************************************//**
  1488. If required, flushes the log to disk if we called trx_commit_for_mysql()
  1489. with trx->flush_log_later == TRUE. */
  1490. UNIV_INTERN
  1491. void
  1492. trx_commit_complete_for_mysql(
  1493. /*==========================*/
  1494. trx_t* trx) /*!< in/out: transaction */
  1495. {
  1496. ut_a(trx);
  1497. if (!trx->must_flush_log_later
  1498. || thd_requested_durability(trx->mysql_thd)
  1499. == HA_IGNORE_DURABILITY) {
  1500. return;
  1501. }
  1502. ulint flush_log_at_trx_commit;
  1503. flush_log_at_trx_commit = srv_use_global_flush_log_at_trx_commit
  1504. ? thd_flush_log_at_trx_commit(NULL)
  1505. : thd_flush_log_at_trx_commit(trx->mysql_thd);
  1506. if (flush_log_at_trx_commit == 1 && trx->active_commit_ordered) {
  1507. return;
  1508. }
  1509. trx_flush_log_if_needed(trx->commit_lsn, trx);
  1510. trx->must_flush_log_later = FALSE;
  1511. }
  1512. /**********************************************************************//**
  1513. Marks the latest SQL statement ended. */
  1514. UNIV_INTERN
  1515. void
  1516. trx_mark_sql_stat_end(
  1517. /*==================*/
  1518. trx_t* trx) /*!< in: trx handle */
  1519. {
  1520. ut_a(trx);
  1521. switch (trx->state) {
  1522. case TRX_STATE_PREPARED:
  1523. case TRX_STATE_COMMITTED_IN_MEMORY:
  1524. break;
  1525. case TRX_STATE_NOT_STARTED:
  1526. trx->undo_no = 0;
  1527. /* fall through */
  1528. case TRX_STATE_ACTIVE:
  1529. trx->last_sql_stat_start.least_undo_no = trx->undo_no;
  1530. if (trx->fts_trx) {
  1531. fts_savepoint_laststmt_refresh(trx);
  1532. }
  1533. return;
  1534. }
  1535. ut_error;
  1536. }
  1537. /**********************************************************************//**
  1538. Prints info about a transaction.
  1539. Caller must hold trx_sys->mutex. */
  1540. UNIV_INTERN
  1541. void
  1542. trx_print_low(
  1543. /*==========*/
  1544. FILE* f,
  1545. /*!< in: output stream */
  1546. const trx_t* trx,
  1547. /*!< in: transaction */
  1548. ulint max_query_len,
  1549. /*!< in: max query length to print,
  1550. or 0 to use the default max length */
  1551. ulint n_rec_locks,
  1552. /*!< in: lock_number_of_rows_locked(&trx->lock) */
  1553. ulint n_trx_locks,
  1554. /*!< in: length of trx->lock.trx_locks */
  1555. ulint heap_size)
  1556. /*!< in: mem_heap_get_size(trx->lock.lock_heap) */
  1557. {
  1558. ibool newline;
  1559. const char* op_info;
  1560. ut_ad(mutex_own(&trx_sys->mutex));
  1561. fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
  1562. /* trx->state cannot change from or to NOT_STARTED while we
  1563. are holding the trx_sys->mutex. It may change from ACTIVE to
  1564. PREPARED or COMMITTED. */
  1565. switch (trx->state) {
  1566. case TRX_STATE_NOT_STARTED:
  1567. fputs(", not started", f);
  1568. goto state_ok;
  1569. case TRX_STATE_ACTIVE:
  1570. fprintf(f, ", ACTIVE %lu sec",
  1571. (ulong) difftime(time(NULL), trx->start_time));
  1572. goto state_ok;
  1573. case TRX_STATE_PREPARED:
  1574. fprintf(f, ", ACTIVE (PREPARED) %lu sec",
  1575. (ulong) difftime(time(NULL), trx->start_time));
  1576. goto state_ok;
  1577. case TRX_STATE_COMMITTED_IN_MEMORY:
  1578. fputs(", COMMITTED IN MEMORY", f);
  1579. goto state_ok;
  1580. }
  1581. fprintf(f, ", state %lu", (ulong) trx->state);
  1582. ut_ad(0);
  1583. state_ok:
  1584. /* prevent a race condition */
  1585. op_info = trx->op_info;
  1586. if (*op_info) {
  1587. putc(' ', f);
  1588. fputs(op_info, f);
  1589. }
  1590. if (trx->is_recovered) {
  1591. fputs(" recovered trx", f);
  1592. }
  1593. if (trx->declared_to_be_inside_innodb) {
  1594. fprintf(f, ", thread declared inside InnoDB %lu",
  1595. (ulong) trx->n_tickets_to_enter_innodb);
  1596. }
  1597. putc('\n', f);
  1598. if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
  1599. fprintf(f, "mysql tables in use %lu, locked %lu\n",
  1600. (ulong) trx->n_mysql_tables_in_use,
  1601. (ulong) trx->mysql_n_tables_locked);
  1602. }
  1603. newline = TRUE;
  1604. /* trx->lock.que_state of an ACTIVE transaction may change
  1605. while we are not holding trx->mutex. We perform a dirty read
  1606. for performance reasons. */
  1607. switch (trx->lock.que_state) {
  1608. case TRX_QUE_RUNNING:
  1609. newline = FALSE; break;
  1610. case TRX_QUE_LOCK_WAIT:
  1611. fputs("LOCK WAIT ", f); break;
  1612. case TRX_QUE_ROLLING_BACK:
  1613. fputs("ROLLING BACK ", f); break;
  1614. case TRX_QUE_COMMITTING:
  1615. fputs("COMMITTING ", f); break;
  1616. default:
  1617. fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
  1618. }
  1619. if (n_trx_locks > 0 || heap_size > 400) {
  1620. newline = TRUE;
  1621. fprintf(f, "%lu lock struct(s), heap size %lu,"
  1622. " %lu row lock(s)",
  1623. (ulong) n_trx_locks,
  1624. (ulong) heap_size,
  1625. (ulong) n_rec_locks);
  1626. }
  1627. if (trx->has_search_latch) {
  1628. newline = TRUE;
  1629. fputs(", holds adaptive hash latch", f);
  1630. }
  1631. if (trx->undo_no != 0) {
  1632. newline = TRUE;
  1633. fprintf(f, ", undo log entries "TRX_ID_FMT, trx->undo_no);
  1634. }
  1635. if (newline) {
  1636. putc('\n', f);
  1637. }
  1638. if (trx->mysql_thd != NULL) {
  1639. innobase_mysql_print_thd(
  1640. f, trx->mysql_thd, static_cast<uint>(max_query_len));
  1641. }
  1642. }
  1643. /**********************************************************************//**
  1644. Prints info about a transaction.
  1645. The caller must hold lock_sys->mutex and trx_sys->mutex.
  1646. When possible, use trx_print() instead. */
  1647. UNIV_INTERN
  1648. void
  1649. trx_print_latched(
  1650. /*==============*/
  1651. FILE* f, /*!< in: output stream */
  1652. const trx_t* trx, /*!< in: transaction */
  1653. ulint max_query_len) /*!< in: max query length to print,
  1654. or 0 to use the default max length */
  1655. {
  1656. ut_ad(lock_mutex_own());
  1657. ut_ad(mutex_own(&trx_sys->mutex));
  1658. trx_print_low(f, trx, max_query_len,
  1659. lock_number_of_rows_locked(&trx->lock),
  1660. UT_LIST_GET_LEN(trx->lock.trx_locks),
  1661. mem_heap_get_size(trx->lock.lock_heap));
  1662. }
  1663. /**********************************************************************//**
  1664. Prints info about a transaction.
  1665. Acquires and releases lock_sys->mutex and trx_sys->mutex. */
  1666. UNIV_INTERN
  1667. void
  1668. trx_print(
  1669. /*======*/
  1670. FILE* f, /*!< in: output stream */
  1671. const trx_t* trx, /*!< in: transaction */
  1672. ulint max_query_len) /*!< in: max query length to print,
  1673. or 0 to use the default max length */
  1674. {
  1675. ulint n_rec_locks;
  1676. ulint n_trx_locks;
  1677. ulint heap_size;
  1678. lock_mutex_enter();
  1679. n_rec_locks = lock_number_of_rows_locked(&trx->lock);
  1680. n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
  1681. heap_size = mem_heap_get_size(trx->lock.lock_heap);
  1682. lock_mutex_exit();
  1683. mutex_enter(&trx_sys->mutex);
  1684. trx_print_low(f, trx, max_query_len,
  1685. n_rec_locks, n_trx_locks, heap_size);
  1686. mutex_exit(&trx_sys->mutex);
  1687. }
  1688. #ifdef UNIV_DEBUG
  1689. /**********************************************************************//**
  1690. Asserts that a transaction has been started.
  1691. The caller must hold trx_sys->mutex.
  1692. @return TRUE if started */
  1693. UNIV_INTERN
  1694. ibool
  1695. trx_assert_started(
  1696. /*===============*/
  1697. const trx_t* trx) /*!< in: transaction */
  1698. {
  1699. ut_ad(mutex_own(&trx_sys->mutex));
  1700. /* Non-locking autocommits should not hold any locks and this
  1701. function is only called from the locking code. */
  1702. assert_trx_in_list(trx);
  1703. /* trx->state can change from or to NOT_STARTED while we are holding
  1704. trx_sys->mutex for non-locking autocommit selects but not for other
  1705. types of transactions. It may change from ACTIVE to PREPARED. Unless
  1706. we are holding lock_sys->mutex, it may also change to COMMITTED. */
  1707. switch (trx->state) {
  1708. case TRX_STATE_PREPARED:
  1709. return(TRUE);
  1710. case TRX_STATE_ACTIVE:
  1711. case TRX_STATE_COMMITTED_IN_MEMORY:
  1712. return(TRUE);
  1713. case TRX_STATE_NOT_STARTED:
  1714. break;
  1715. }
  1716. ut_error;
  1717. return(FALSE);
  1718. }
  1719. #endif /* UNIV_DEBUG */
  1720. /*******************************************************************//**
  1721. Compares the "weight" (or size) of two transactions. Transactions that
  1722. have edited non-transactional tables are considered heavier than ones
  1723. that have not.
  1724. @return TRUE if weight(a) >= weight(b) */
  1725. UNIV_INTERN
  1726. ibool
  1727. trx_weight_ge(
  1728. /*==========*/
  1729. const trx_t* a, /*!< in: the first transaction to be compared */
  1730. const trx_t* b) /*!< in: the second transaction to be compared */
  1731. {
  1732. ibool a_notrans_edit;
  1733. ibool b_notrans_edit;
  1734. /* If mysql_thd is NULL for a transaction we assume that it has
  1735. not edited non-transactional tables. */
  1736. a_notrans_edit = a->mysql_thd != NULL
  1737. && thd_has_edited_nontrans_tables(a->mysql_thd);
  1738. b_notrans_edit = b->mysql_thd != NULL
  1739. && thd_has_edited_nontrans_tables(b->mysql_thd);
  1740. if (a_notrans_edit != b_notrans_edit) {
  1741. return(a_notrans_edit);
  1742. }
  1743. /* Either both had edited non-transactional tables or both had
  1744. not, we fall back to comparing the number of altered/locked
  1745. rows. */
  1746. #if 0
  1747. fprintf(stderr,
  1748. "%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
  1749. __func__,
  1750. a->undo_no, UT_LIST_GET_LEN(a->lock.trx_locks),
  1751. b->undo_no, UT_LIST_GET_LEN(b->lock.trx_locks));
  1752. #endif
  1753. return(TRX_WEIGHT(a) >= TRX_WEIGHT(b));
  1754. }
  1755. /****************************************************************//**
  1756. Prepares a transaction. */
  1757. static
  1758. void
  1759. trx_prepare(
  1760. /*========*/
  1761. trx_t* trx) /*!< in/out: transaction */
  1762. {
  1763. trx_rseg_t* rseg;
  1764. lsn_t lsn;
  1765. mtr_t mtr;
  1766. rseg = trx->rseg;
  1767. /* Only fresh user transactions can be prepared.
  1768. Recovered transactions cannot. */
  1769. ut_a(!trx->is_recovered);
  1770. if (trx->insert_undo != NULL || trx->update_undo != NULL) {
  1771. mtr_start(&mtr);
  1772. /* Change the undo log segment states from TRX_UNDO_ACTIVE
  1773. to TRX_UNDO_PREPARED: these modifications to the file data
  1774. structure define the transaction as prepared in the
  1775. file-based world, at the serialization point of lsn. */
  1776. mutex_enter(&rseg->mutex);
  1777. if (trx->insert_undo != NULL) {
  1778. /* It is not necessary to obtain trx->undo_mutex here
  1779. because only a single OS thread is allowed to do the
  1780. transaction prepare for this transaction. */
  1781. trx_undo_set_state_at_prepare(trx, trx->insert_undo,
  1782. &mtr);
  1783. }
  1784. if (trx->update_undo) {
  1785. trx_undo_set_state_at_prepare(
  1786. trx, trx->update_undo, &mtr);
  1787. }
  1788. mutex_exit(&rseg->mutex);
  1789. /*--------------*/
  1790. mtr_commit(&mtr); /* This mtr commit makes the
  1791. transaction prepared in the file-based
  1792. world */
  1793. /*--------------*/
  1794. lsn = mtr.end_lsn;
  1795. ut_ad(lsn);
  1796. } else {
  1797. lsn = 0;
  1798. }
  1799. /*--------------------------------------*/
  1800. ut_a(trx->state == TRX_STATE_ACTIVE);
  1801. mutex_enter(&trx_sys->mutex);
  1802. trx->state = TRX_STATE_PREPARED;
  1803. trx_sys->n_prepared_trx++;
  1804. mutex_exit(&trx_sys->mutex);
  1805. /*--------------------------------------*/
  1806. if (lsn) {
  1807. /* Depending on the my.cnf options, we may now write the log
  1808. buffer to the log files, making the prepared state of the
  1809. transaction durable if the OS does not crash. We may also
  1810. flush the log files to disk, making the prepared state of the
  1811. transaction durable also at an OS crash or a power outage.
  1812. The idea in InnoDB's group prepare is that a group of
  1813. transactions gather behind a trx doing a physical disk write
  1814. to log files, and when that physical write has been completed,
  1815. one of those transactions does a write which prepares the whole
  1816. group. Note that this group prepare will only bring benefit if
  1817. there are > 2 users in the database. Then at least 2 users can
  1818. gather behind one doing the physical log write to disk.
  1819. TODO: find out if MySQL holds some mutex when calling this.
  1820. That would spoil our group prepare algorithm. */
  1821. trx_flush_log_if_needed(lsn, trx);
  1822. }
  1823. }
  1824. /**********************************************************************//**
  1825. Does the transaction prepare for MySQL. */
  1826. UNIV_INTERN
  1827. void
  1828. trx_prepare_for_mysql(
  1829. /*==================*/
  1830. trx_t* trx) /*!< in/out: trx handle */
  1831. {
  1832. trx_start_if_not_started_xa(trx);
  1833. trx->op_info = "preparing";
  1834. trx_prepare(trx);
  1835. trx->op_info = "";
  1836. }
  1837. /**********************************************************************//**
  1838. This function is used to find number of prepared transactions and
  1839. their transaction objects for a recovery.
  1840. @return number of prepared transactions stored in xid_list */
  1841. UNIV_INTERN
  1842. int
  1843. trx_recover_for_mysql(
  1844. /*==================*/
  1845. XID* xid_list, /*!< in/out: prepared transactions */
  1846. ulint len) /*!< in: number of slots in xid_list */
  1847. {
  1848. const trx_t* trx;
  1849. ulint count = 0;
  1850. ut_ad(xid_list);
  1851. ut_ad(len);
  1852. /* We should set those transactions which are in the prepared state
  1853. to the xid_list */
  1854. mutex_enter(&trx_sys->mutex);
  1855. for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
  1856. trx != NULL;
  1857. trx = UT_LIST_GET_NEXT(trx_list, trx)) {
  1858. assert_trx_in_rw_list(trx);
  1859. /* The state of a read-write transaction cannot change
  1860. from or to NOT_STARTED while we are holding the
  1861. trx_sys->mutex. It may change to PREPARED, but not if
  1862. trx->is_recovered. It may also change to COMMITTED. */
  1863. if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
  1864. xid_list[count] = trx->xid;
  1865. if (count == 0) {
  1866. ut_print_timestamp(stderr);
  1867. fprintf(stderr,
  1868. " InnoDB: Starting recovery for"
  1869. " XA transactions...\n");
  1870. }
  1871. ut_print_timestamp(stderr);
  1872. fprintf(stderr,
  1873. " InnoDB: Transaction " TRX_ID_FMT " in"
  1874. " prepared state after recovery\n",
  1875. trx->id);
  1876. ut_print_timestamp(stderr);
  1877. fprintf(stderr,
  1878. " InnoDB: Transaction contains changes"
  1879. " to "TRX_ID_FMT" rows\n",
  1880. trx->undo_no);
  1881. count++;
  1882. if (count == len) {
  1883. break;
  1884. }
  1885. }
  1886. }
  1887. mutex_exit(&trx_sys->mutex);
  1888. if (count > 0){
  1889. ut_print_timestamp(stderr);
  1890. fprintf(stderr,
  1891. " InnoDB: %d transactions in prepared state"
  1892. " after recovery\n",
  1893. int (count));
  1894. }
  1895. return(int (count));
  1896. }
  1897. /*******************************************************************//**
  1898. This function is used to find one X/Open XA distributed transaction
  1899. which is in the prepared state
  1900. @return trx on match, the trx->xid will be invalidated;
  1901. note that the trx may have been committed, unless the caller is
  1902. holding lock_sys->mutex */
  1903. static __attribute__((nonnull, warn_unused_result))
  1904. trx_t*
  1905. trx_get_trx_by_xid_low(
  1906. /*===================*/
  1907. const XID* xid) /*!< in: X/Open XA transaction
  1908. identifier */
  1909. {
  1910. trx_t* trx;
  1911. ut_ad(mutex_own(&trx_sys->mutex));
  1912. for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
  1913. trx != NULL;
  1914. trx = UT_LIST_GET_NEXT(trx_list, trx)) {
  1915. assert_trx_in_rw_list(trx);
  1916. /* Compare two X/Open XA transaction id's: their
  1917. length should be the same and binary comparison
  1918. of gtrid_length+bqual_length bytes should be
  1919. the same */
  1920. if (trx->is_recovered
  1921. && trx_state_eq(trx, TRX_STATE_PREPARED)
  1922. && xid->gtrid_length == trx->xid.gtrid_length
  1923. && xid->bqual_length == trx->xid.bqual_length
  1924. && memcmp(xid->data, trx->xid.data,
  1925. xid->gtrid_length + xid->bqual_length) == 0) {
  1926. /* Invalidate the XID, so that subsequent calls
  1927. will not find it. */
  1928. memset(&trx->xid, 0, sizeof(trx->xid));
  1929. trx->xid.formatID = -1;
  1930. break;
  1931. }
  1932. }
  1933. return(trx);
  1934. }
  1935. /*******************************************************************//**
  1936. This function is used to find one X/Open XA distributed transaction
  1937. which is in the prepared state
  1938. @return trx or NULL; on match, the trx->xid will be invalidated;
  1939. note that the trx may have been committed, unless the caller is
  1940. holding lock_sys->mutex */
  1941. UNIV_INTERN
  1942. trx_t*
  1943. trx_get_trx_by_xid(
  1944. /*===============*/
  1945. const XID* xid) /*!< in: X/Open XA transaction identifier */
  1946. {
  1947. trx_t* trx;
  1948. if (xid == NULL) {
  1949. return(NULL);
  1950. }
  1951. mutex_enter(&trx_sys->mutex);
  1952. /* Recovered/Resurrected transactions are always only on the
  1953. trx_sys_t::rw_trx_list. */
  1954. trx = trx_get_trx_by_xid_low(xid);
  1955. mutex_exit(&trx_sys->mutex);
  1956. return(trx);
  1957. }
  1958. /*************************************************************//**
  1959. Starts the transaction if it is not yet started. */
  1960. UNIV_INTERN
  1961. void
  1962. trx_start_if_not_started_xa_low(
  1963. /*============================*/
  1964. trx_t* trx) /*!< in: transaction */
  1965. {
  1966. switch (trx->state) {
  1967. case TRX_STATE_NOT_STARTED:
  1968. /* Update the info whether we should skip XA steps
  1969. that eat CPU time.
  1970. For the duration of the transaction trx->support_xa is
  1971. not reread from thd so any changes in the value take
  1972. effect in the next transaction. This is to avoid a
  1973. scenario where some undo generated by a transaction,
  1974. has XA stuff, and other undo, generated by the same
  1975. transaction, doesn't. */
  1976. trx->support_xa = thd_supports_xa(trx->mysql_thd);
  1977. trx_start_low(trx);
  1978. /* fall through */
  1979. case TRX_STATE_ACTIVE:
  1980. return;
  1981. case TRX_STATE_PREPARED:
  1982. case TRX_STATE_COMMITTED_IN_MEMORY:
  1983. break;
  1984. }
  1985. ut_error;
  1986. }
  1987. /*************************************************************//**
  1988. Starts the transaction if it is not yet started. */
  1989. UNIV_INTERN
  1990. void
  1991. trx_start_if_not_started_low(
  1992. /*=========================*/
  1993. trx_t* trx) /*!< in: transaction */
  1994. {
  1995. switch (trx->state) {
  1996. case TRX_STATE_NOT_STARTED:
  1997. #ifdef WITH_WSREP
  1998. ut_d(trx->start_file = __FILE__);
  1999. ut_d(trx->start_line = __LINE__);
  2000. #endif /* WITH_WSREP */
  2001. trx_start_low(trx);
  2002. /* fall through */
  2003. case TRX_STATE_ACTIVE:
  2004. return;
  2005. case TRX_STATE_PREPARED:
  2006. case TRX_STATE_COMMITTED_IN_MEMORY:
  2007. break;
  2008. }
  2009. ut_error;
  2010. }
  2011. /*************************************************************//**
  2012. Starts the transaction for a DDL operation. */
  2013. UNIV_INTERN
  2014. void
  2015. trx_start_for_ddl_low(
  2016. /*==================*/
  2017. trx_t* trx, /*!< in/out: transaction */
  2018. trx_dict_op_t op) /*!< in: dictionary operation type */
  2019. {
  2020. switch (trx->state) {
  2021. case TRX_STATE_NOT_STARTED:
  2022. /* Flag this transaction as a dictionary operation, so that
  2023. the data dictionary will be locked in crash recovery. */
  2024. trx_set_dict_operation(trx, op);
  2025. /* Ensure it is not flagged as an auto-commit-non-locking
  2026. transation. */
  2027. trx->will_lock = 1;
  2028. trx->ddl = true;
  2029. #ifdef WITH_WSREP
  2030. ut_d(trx->start_file = __FILE__);
  2031. ut_d(trx->start_line = __LINE__);
  2032. #endif /* WITH_WSREP */
  2033. trx_start_low(trx);
  2034. return;
  2035. case TRX_STATE_ACTIVE:
  2036. /* We have this start if not started idiom, therefore we
  2037. can't add stronger checks here. */
  2038. trx->ddl = true;
  2039. ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
  2040. ut_ad(trx->will_lock > 0);
  2041. return;
  2042. case TRX_STATE_PREPARED:
  2043. case TRX_STATE_COMMITTED_IN_MEMORY:
  2044. break;
  2045. }
  2046. ut_error;
  2047. }