You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1326 lines
30 KiB

20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
20 years ago
  1. /******************************************************
  2. Transaction rollback
  3. (c) 1996 Innobase Oy
  4. Created 3/26/1996 Heikki Tuuri
  5. *******************************************************/
  6. #include "trx0roll.h"
  7. #ifdef UNIV_NONINL
  8. #include "trx0roll.ic"
  9. #endif
  10. #include "fsp0fsp.h"
  11. #include "mach0data.h"
  12. #include "trx0rseg.h"
  13. #include "trx0trx.h"
  14. #include "trx0undo.h"
  15. #include "trx0rec.h"
  16. #include "que0que.h"
  17. #include "usr0sess.h"
  18. #include "srv0que.h"
  19. #include "srv0start.h"
  20. #include "row0undo.h"
  21. #include "row0mysql.h"
  22. #include "lock0lock.h"
  23. #include "pars0pars.h"
  24. /* This many pages must be undone before a truncate is tried within rollback */
  25. #define TRX_ROLL_TRUNC_THRESHOLD 1
  26. /* In crash recovery, the current trx to be rolled back */
  27. static trx_t* trx_roll_crash_recv_trx = NULL;
  28. /* In crash recovery we set this to the undo n:o of the current trx to be
  29. rolled back. Then we can print how many % the rollback has progressed. */
  30. static ib_int64_t trx_roll_max_undo_no;
  31. /* Auxiliary variable which tells the previous progress % we printed */
  32. static ulint trx_roll_progress_printed_pct;
  33. /***********************************************************************
  34. Rollback a transaction used in MySQL. */
  35. UNIV_INTERN
  36. int
  37. trx_general_rollback_for_mysql(
  38. /*===========================*/
  39. /* out: error code or DB_SUCCESS */
  40. trx_t* trx, /* in: transaction handle */
  41. ibool partial,/* in: TRUE if partial rollback requested */
  42. trx_savept_t* savept) /* in: pointer to savepoint undo number, if
  43. partial rollback requested */
  44. {
  45. #ifndef UNIV_HOTBACKUP
  46. mem_heap_t* heap;
  47. que_thr_t* thr;
  48. roll_node_t* roll_node;
  49. /* Tell Innobase server that there might be work for
  50. utility threads: */
  51. srv_active_wake_master_thread();
  52. trx_start_if_not_started(trx);
  53. heap = mem_heap_create(512);
  54. roll_node = roll_node_create(heap);
  55. roll_node->partial = partial;
  56. if (partial) {
  57. roll_node->savept = *savept;
  58. }
  59. trx->error_state = DB_SUCCESS;
  60. thr = pars_complete_graph_for_exec(roll_node, trx, heap);
  61. ut_a(thr == que_fork_start_command(que_node_get_parent(thr)));
  62. que_run_threads(thr);
  63. mutex_enter(&kernel_mutex);
  64. while (trx->que_state != TRX_QUE_RUNNING) {
  65. mutex_exit(&kernel_mutex);
  66. os_thread_sleep(100000);
  67. mutex_enter(&kernel_mutex);
  68. }
  69. mutex_exit(&kernel_mutex);
  70. mem_heap_free(heap);
  71. ut_a(trx->error_state == DB_SUCCESS);
  72. /* Tell Innobase server that there might be work for
  73. utility threads: */
  74. srv_active_wake_master_thread();
  75. return((int) trx->error_state);
  76. #else /* UNIV_HOTBACKUP */
  77. /* This function depends on MySQL code that is not included in
  78. InnoDB Hot Backup builds. Besides, this function should never
  79. be called in InnoDB Hot Backup. */
  80. ut_error;
  81. return(DB_FAIL);
  82. #endif /* UNIV_HOTBACKUP */
  83. }
  84. /***********************************************************************
  85. Rollback a transaction used in MySQL. */
  86. UNIV_INTERN
  87. int
  88. trx_rollback_for_mysql(
  89. /*===================*/
  90. /* out: error code or DB_SUCCESS */
  91. trx_t* trx) /* in: transaction handle */
  92. {
  93. int err;
  94. if (trx->conc_state == TRX_NOT_STARTED) {
  95. return(DB_SUCCESS);
  96. }
  97. trx->op_info = "rollback";
  98. /* If we are doing the XA recovery of prepared transactions, then
  99. the transaction object does not have an InnoDB session object, and we
  100. set a dummy session that we use for all MySQL transactions. */
  101. err = trx_general_rollback_for_mysql(trx, FALSE, NULL);
  102. trx->op_info = "";
  103. return(err);
  104. }
  105. /***********************************************************************
  106. Rollback the latest SQL statement for MySQL. */
  107. UNIV_INTERN
  108. int
  109. trx_rollback_last_sql_stat_for_mysql(
  110. /*=================================*/
  111. /* out: error code or DB_SUCCESS */
  112. trx_t* trx) /* in: transaction handle */
  113. {
  114. int err;
  115. if (trx->conc_state == TRX_NOT_STARTED) {
  116. return(DB_SUCCESS);
  117. }
  118. trx->op_info = "rollback of SQL statement";
  119. err = trx_general_rollback_for_mysql(trx, TRUE,
  120. &(trx->last_sql_stat_start));
  121. /* The following call should not be needed, but we play safe: */
  122. trx_mark_sql_stat_end(trx);
  123. trx->op_info = "";
  124. return(err);
  125. }
  126. /***********************************************************************
  127. Frees savepoint structs. */
  128. UNIV_INTERN
  129. void
  130. trx_roll_savepoints_free(
  131. /*=====================*/
  132. trx_t* trx, /* in: transaction handle */
  133. trx_named_savept_t* savep) /* in: free all savepoints > this one;
  134. if this is NULL, free all savepoints
  135. of trx */
  136. {
  137. trx_named_savept_t* next_savep;
  138. if (savep == NULL) {
  139. savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
  140. } else {
  141. savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
  142. }
  143. while (savep != NULL) {
  144. next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
  145. UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
  146. mem_free(savep->name);
  147. mem_free(savep);
  148. savep = next_savep;
  149. }
  150. }
  151. /***********************************************************************
  152. Rolls back a transaction back to a named savepoint. Modifications after the
  153. savepoint are undone but InnoDB does NOT release the corresponding locks
  154. which are stored in memory. If a lock is 'implicit', that is, a new inserted
  155. row holds a lock where the lock information is carried by the trx id stored in
  156. the row, these locks are naturally released in the rollback. Savepoints which
  157. were set after this savepoint are deleted. */
  158. UNIV_INTERN
  159. ulint
  160. trx_rollback_to_savepoint_for_mysql(
  161. /*================================*/
  162. /* out: if no savepoint
  163. of the name found then
  164. DB_NO_SAVEPOINT,
  165. otherwise DB_SUCCESS */
  166. trx_t* trx, /* in: transaction handle */
  167. const char* savepoint_name, /* in: savepoint name */
  168. ib_int64_t* mysql_binlog_cache_pos) /* out: the MySQL binlog cache
  169. position corresponding to this
  170. savepoint; MySQL needs this
  171. information to remove the
  172. binlog entries of the queries
  173. executed after the savepoint */
  174. {
  175. trx_named_savept_t* savep;
  176. ulint err;
  177. savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
  178. while (savep != NULL) {
  179. if (0 == ut_strcmp(savep->name, savepoint_name)) {
  180. /* Found */
  181. break;
  182. }
  183. savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
  184. }
  185. if (savep == NULL) {
  186. return(DB_NO_SAVEPOINT);
  187. }
  188. if (trx->conc_state == TRX_NOT_STARTED) {
  189. ut_print_timestamp(stderr);
  190. fputs(" InnoDB: Error: transaction has a savepoint ", stderr);
  191. ut_print_name(stderr, trx, FALSE, savep->name);
  192. fputs(" though it is not started\n", stderr);
  193. return(DB_ERROR);
  194. }
  195. /* We can now free all savepoints strictly later than this one */
  196. trx_roll_savepoints_free(trx, savep);
  197. *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
  198. trx->op_info = "rollback to a savepoint";
  199. err = trx_general_rollback_for_mysql(trx, TRUE, &(savep->savept));
  200. /* Store the current undo_no of the transaction so that we know where
  201. to roll back if we have to roll back the next SQL statement: */
  202. trx_mark_sql_stat_end(trx);
  203. trx->op_info = "";
  204. return(err);
  205. }
  206. /***********************************************************************
  207. Creates a named savepoint. If the transaction is not yet started, starts it.
  208. If there is already a savepoint of the same name, this call erases that old
  209. savepoint and replaces it with a new. Savepoints are deleted in a transaction
  210. commit or rollback. */
  211. UNIV_INTERN
  212. ulint
  213. trx_savepoint_for_mysql(
  214. /*====================*/
  215. /* out: always DB_SUCCESS */
  216. trx_t* trx, /* in: transaction handle */
  217. const char* savepoint_name, /* in: savepoint name */
  218. ib_int64_t binlog_cache_pos) /* in: MySQL binlog cache
  219. position corresponding to this
  220. connection at the time of the
  221. savepoint */
  222. {
  223. trx_named_savept_t* savep;
  224. ut_a(trx);
  225. ut_a(savepoint_name);
  226. trx_start_if_not_started(trx);
  227. savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
  228. while (savep != NULL) {
  229. if (0 == ut_strcmp(savep->name, savepoint_name)) {
  230. /* Found */
  231. break;
  232. }
  233. savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
  234. }
  235. if (savep) {
  236. /* There is a savepoint with the same name: free that */
  237. UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
  238. mem_free(savep->name);
  239. mem_free(savep);
  240. }
  241. /* Create a new savepoint and add it as the last in the list */
  242. savep = mem_alloc(sizeof(trx_named_savept_t));
  243. savep->name = mem_strdup(savepoint_name);
  244. savep->savept = trx_savept_take(trx);
  245. savep->mysql_binlog_cache_pos = binlog_cache_pos;
  246. UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep);
  247. return(DB_SUCCESS);
  248. }
  249. /***********************************************************************
  250. Releases a named savepoint. Savepoints which
  251. were set after this savepoint are deleted. */
  252. UNIV_INTERN
  253. ulint
  254. trx_release_savepoint_for_mysql(
  255. /*============================*/
  256. /* out: if no savepoint
  257. of the name found then
  258. DB_NO_SAVEPOINT,
  259. otherwise DB_SUCCESS */
  260. trx_t* trx, /* in: transaction handle */
  261. const char* savepoint_name) /* in: savepoint name */
  262. {
  263. trx_named_savept_t* savep;
  264. savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
  265. while (savep != NULL) {
  266. if (0 == ut_strcmp(savep->name, savepoint_name)) {
  267. /* Found */
  268. break;
  269. }
  270. savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
  271. }
  272. if (savep == NULL) {
  273. return(DB_NO_SAVEPOINT);
  274. }
  275. /* We can now free all savepoints strictly later than this one */
  276. trx_roll_savepoints_free(trx, savep);
  277. /* Now we can free this savepoint too */
  278. UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep);
  279. mem_free(savep->name);
  280. mem_free(savep);
  281. return(DB_SUCCESS);
  282. }
  283. /***********************************************************************
  284. Returns a transaction savepoint taken at this point in time. */
  285. UNIV_INTERN
  286. trx_savept_t
  287. trx_savept_take(
  288. /*============*/
  289. /* out: savepoint */
  290. trx_t* trx) /* in: transaction */
  291. {
  292. trx_savept_t savept;
  293. savept.least_undo_no = trx->undo_no;
  294. return(savept);
  295. }
  296. /***********************************************************************
  297. Roll back an active transaction. */
  298. static
  299. void
  300. trx_rollback_active(
  301. /*================*/
  302. trx_t* trx) /* in/out: transaction */
  303. {
  304. mem_heap_t* heap;
  305. que_fork_t* fork;
  306. que_thr_t* thr;
  307. roll_node_t* roll_node;
  308. dict_table_t* table;
  309. ib_int64_t rows_to_undo;
  310. const char* unit = "";
  311. ibool dictionary_locked = FALSE;
  312. heap = mem_heap_create(512);
  313. fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
  314. fork->trx = trx;
  315. thr = que_thr_create(fork, heap);
  316. roll_node = roll_node_create(heap);
  317. thr->child = roll_node;
  318. roll_node->common.parent = thr;
  319. mutex_enter(&kernel_mutex);
  320. trx->graph = fork;
  321. ut_a(thr == que_fork_start_command(fork));
  322. trx_roll_crash_recv_trx = trx;
  323. trx_roll_max_undo_no = ut_conv_dulint_to_longlong(trx->undo_no);
  324. trx_roll_progress_printed_pct = 0;
  325. rows_to_undo = trx_roll_max_undo_no;
  326. if (rows_to_undo > 1000000000) {
  327. rows_to_undo = rows_to_undo / 1000000;
  328. unit = "M";
  329. }
  330. ut_print_timestamp(stderr);
  331. fprintf(stderr,
  332. " InnoDB: Rolling back trx with id " TRX_ID_FMT ", %lu%s"
  333. " rows to undo\n",
  334. TRX_ID_PREP_PRINTF(trx->id),
  335. (ulong) rows_to_undo, unit);
  336. mutex_exit(&kernel_mutex);
  337. trx->mysql_thread_id = os_thread_get_curr_id();
  338. trx->mysql_process_no = os_proc_get_number();
  339. if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) {
  340. row_mysql_lock_data_dictionary(trx);
  341. dictionary_locked = TRUE;
  342. }
  343. que_run_threads(thr);
  344. mutex_enter(&kernel_mutex);
  345. while (trx->que_state != TRX_QUE_RUNNING) {
  346. mutex_exit(&kernel_mutex);
  347. fprintf(stderr,
  348. "InnoDB: Waiting for rollback of trx id %lu to end\n",
  349. (ulong) ut_dulint_get_low(trx->id));
  350. os_thread_sleep(100000);
  351. mutex_enter(&kernel_mutex);
  352. }
  353. mutex_exit(&kernel_mutex);
  354. if (trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
  355. && !ut_dulint_is_zero(trx->table_id)) {
  356. /* If the transaction was for a dictionary operation, we
  357. drop the relevant table, if it still exists */
  358. fprintf(stderr,
  359. "InnoDB: Dropping table with id %lu %lu"
  360. " in recovery if it exists\n",
  361. (ulong) ut_dulint_get_high(trx->table_id),
  362. (ulong) ut_dulint_get_low(trx->table_id));
  363. table = dict_table_get_on_id_low(trx->table_id);
  364. if (table) {
  365. ulint err;
  366. fputs("InnoDB: Table found: dropping table ", stderr);
  367. ut_print_name(stderr, trx, TRUE, table->name);
  368. fputs(" in recovery\n", stderr);
  369. err = row_drop_table_for_mysql(table->name, trx, TRUE);
  370. ut_a(err == (int) DB_SUCCESS);
  371. }
  372. }
  373. if (dictionary_locked) {
  374. row_mysql_unlock_data_dictionary(trx);
  375. }
  376. fprintf(stderr, "\nInnoDB: Rolling back of trx id " TRX_ID_FMT
  377. " completed\n",
  378. TRX_ID_PREP_PRINTF(trx->id));
  379. mem_heap_free(heap);
  380. trx_roll_crash_recv_trx = NULL;
  381. }
  382. /***********************************************************************
  383. Rollback or clean up any incomplete transactions which were
  384. encountered in crash recovery. If the transaction already was
  385. committed, then we clean up a possible insert undo log. If the
  386. transaction was not yet committed, then we roll it back.
  387. Note: this is done in a background thread. */
  388. UNIV_INTERN
  389. os_thread_ret_t
  390. trx_rollback_or_clean_all_recovered(
  391. /*================================*/
  392. /* out: a dummy parameter */
  393. void* arg __attribute__((unused)))
  394. /* in: a dummy parameter required by
  395. os_thread_create */
  396. {
  397. trx_t* trx;
  398. mutex_enter(&kernel_mutex);
  399. if (UT_LIST_GET_FIRST(trx_sys->trx_list)) {
  400. fprintf(stderr,
  401. "InnoDB: Starting in background the rollback"
  402. " of uncommitted transactions\n");
  403. } else {
  404. goto leave_function;
  405. }
  406. mutex_exit(&kernel_mutex);
  407. loop:
  408. mutex_enter(&kernel_mutex);
  409. for (trx = UT_LIST_GET_FIRST(trx_sys->trx_list); trx;
  410. trx = UT_LIST_GET_NEXT(trx_list, trx)) {
  411. if (!trx->is_recovered) {
  412. continue;
  413. }
  414. switch (trx->conc_state) {
  415. case TRX_NOT_STARTED:
  416. case TRX_PREPARED:
  417. continue;
  418. case TRX_COMMITTED_IN_MEMORY:
  419. mutex_exit(&kernel_mutex);
  420. fprintf(stderr,
  421. "InnoDB: Cleaning up trx with id "
  422. TRX_ID_FMT "\n",
  423. TRX_ID_PREP_PRINTF(trx->id));
  424. trx_cleanup_at_db_startup(trx);
  425. goto loop;
  426. case TRX_ACTIVE:
  427. mutex_exit(&kernel_mutex);
  428. trx_rollback_active(trx);
  429. goto loop;
  430. }
  431. }
  432. ut_print_timestamp(stderr);
  433. fprintf(stderr,
  434. " InnoDB: Rollback of non-prepared transactions completed\n");
  435. leave_function:
  436. mutex_exit(&kernel_mutex);
  437. /* We count the number of threads in os_thread_exit(). A created
  438. thread should always use that to exit and not use return() to exit. */
  439. os_thread_exit(NULL);
  440. OS_THREAD_DUMMY_RETURN;
  441. }
  442. /***********************************************************************
  443. Creates an undo number array. */
  444. UNIV_INTERN
  445. trx_undo_arr_t*
  446. trx_undo_arr_create(void)
  447. /*=====================*/
  448. {
  449. trx_undo_arr_t* arr;
  450. mem_heap_t* heap;
  451. ulint i;
  452. heap = mem_heap_create(1024);
  453. arr = mem_heap_alloc(heap, sizeof(trx_undo_arr_t));
  454. arr->infos = mem_heap_alloc(heap, sizeof(trx_undo_inf_t)
  455. * UNIV_MAX_PARALLELISM);
  456. arr->n_cells = UNIV_MAX_PARALLELISM;
  457. arr->n_used = 0;
  458. arr->heap = heap;
  459. for (i = 0; i < UNIV_MAX_PARALLELISM; i++) {
  460. (trx_undo_arr_get_nth_info(arr, i))->in_use = FALSE;
  461. }
  462. return(arr);
  463. }
  464. /***********************************************************************
  465. Frees an undo number array. */
  466. UNIV_INTERN
  467. void
  468. trx_undo_arr_free(
  469. /*==============*/
  470. trx_undo_arr_t* arr) /* in: undo number array */
  471. {
  472. ut_ad(arr->n_used == 0);
  473. mem_heap_free(arr->heap);
  474. }
  475. /***********************************************************************
  476. Stores info of an undo log record to the array if it is not stored yet. */
  477. static
  478. ibool
  479. trx_undo_arr_store_info(
  480. /*====================*/
  481. /* out: FALSE if the record already existed in the
  482. array */
  483. trx_t* trx, /* in: transaction */
  484. dulint undo_no)/* in: undo number */
  485. {
  486. trx_undo_inf_t* cell;
  487. trx_undo_inf_t* stored_here;
  488. trx_undo_arr_t* arr;
  489. ulint n_used;
  490. ulint n;
  491. ulint i;
  492. n = 0;
  493. arr = trx->undo_no_arr;
  494. n_used = arr->n_used;
  495. stored_here = NULL;
  496. for (i = 0;; i++) {
  497. cell = trx_undo_arr_get_nth_info(arr, i);
  498. if (!cell->in_use) {
  499. if (!stored_here) {
  500. /* Not in use, we may store here */
  501. cell->undo_no = undo_no;
  502. cell->in_use = TRUE;
  503. arr->n_used++;
  504. stored_here = cell;
  505. }
  506. } else {
  507. n++;
  508. if (0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
  509. if (stored_here) {
  510. stored_here->in_use = FALSE;
  511. ut_ad(arr->n_used > 0);
  512. arr->n_used--;
  513. }
  514. ut_ad(arr->n_used == n_used);
  515. return(FALSE);
  516. }
  517. }
  518. if (n == n_used && stored_here) {
  519. ut_ad(arr->n_used == 1 + n_used);
  520. return(TRUE);
  521. }
  522. }
  523. }
  524. /***********************************************************************
  525. Removes an undo number from the array. */
  526. static
  527. void
  528. trx_undo_arr_remove_info(
  529. /*=====================*/
  530. trx_undo_arr_t* arr, /* in: undo number array */
  531. dulint undo_no)/* in: undo number */
  532. {
  533. trx_undo_inf_t* cell;
  534. ulint n_used;
  535. ulint n;
  536. ulint i;
  537. n_used = arr->n_used;
  538. n = 0;
  539. for (i = 0;; i++) {
  540. cell = trx_undo_arr_get_nth_info(arr, i);
  541. if (cell->in_use
  542. && 0 == ut_dulint_cmp(cell->undo_no, undo_no)) {
  543. cell->in_use = FALSE;
  544. ut_ad(arr->n_used > 0);
  545. arr->n_used--;
  546. return;
  547. }
  548. }
  549. }
  550. /***********************************************************************
  551. Gets the biggest undo number in an array. */
  552. static
  553. dulint
  554. trx_undo_arr_get_biggest(
  555. /*=====================*/
  556. /* out: biggest value, ut_dulint_zero if
  557. the array is empty */
  558. trx_undo_arr_t* arr) /* in: undo number array */
  559. {
  560. trx_undo_inf_t* cell;
  561. ulint n_used;
  562. dulint biggest;
  563. ulint n;
  564. ulint i;
  565. n = 0;
  566. n_used = arr->n_used;
  567. biggest = ut_dulint_zero;
  568. for (i = 0;; i++) {
  569. cell = trx_undo_arr_get_nth_info(arr, i);
  570. if (cell->in_use) {
  571. n++;
  572. if (ut_dulint_cmp(cell->undo_no, biggest) > 0) {
  573. biggest = cell->undo_no;
  574. }
  575. }
  576. if (n == n_used) {
  577. return(biggest);
  578. }
  579. }
  580. }
  581. /***************************************************************************
  582. Tries truncate the undo logs. */
  583. UNIV_INTERN
  584. void
  585. trx_roll_try_truncate(
  586. /*==================*/
  587. trx_t* trx) /* in: transaction */
  588. {
  589. trx_undo_arr_t* arr;
  590. dulint limit;
  591. dulint biggest;
  592. ut_ad(mutex_own(&(trx->undo_mutex)));
  593. ut_ad(mutex_own(&((trx->rseg)->mutex)));
  594. trx->pages_undone = 0;
  595. arr = trx->undo_no_arr;
  596. limit = trx->undo_no;
  597. if (arr->n_used > 0) {
  598. biggest = trx_undo_arr_get_biggest(arr);
  599. if (ut_dulint_cmp(biggest, limit) >= 0) {
  600. limit = ut_dulint_add(biggest, 1);
  601. }
  602. }
  603. if (trx->insert_undo) {
  604. trx_undo_truncate_end(trx, trx->insert_undo, limit);
  605. }
  606. if (trx->update_undo) {
  607. trx_undo_truncate_end(trx, trx->update_undo, limit);
  608. }
  609. }
  610. /***************************************************************************
  611. Pops the topmost undo log record in a single undo log and updates the info
  612. about the topmost record in the undo log memory struct. */
  613. static
  614. trx_undo_rec_t*
  615. trx_roll_pop_top_rec(
  616. /*=================*/
  617. /* out: undo log record, the page s-latched */
  618. trx_t* trx, /* in: transaction */
  619. trx_undo_t* undo, /* in: undo log */
  620. mtr_t* mtr) /* in: mtr */
  621. {
  622. page_t* undo_page;
  623. ulint offset;
  624. trx_undo_rec_t* prev_rec;
  625. page_t* prev_rec_page;
  626. ut_ad(mutex_own(&(trx->undo_mutex)));
  627. undo_page = trx_undo_page_get_s_latched(undo->space, undo->zip_size,
  628. undo->top_page_no, mtr);
  629. offset = undo->top_offset;
  630. /* fprintf(stderr, "Thread %lu undoing trx %lu undo record %lu\n",
  631. os_thread_get_curr_id(), ut_dulint_get_low(trx->id),
  632. ut_dulint_get_low(undo->top_undo_no)); */
  633. prev_rec = trx_undo_get_prev_rec(undo_page + offset,
  634. undo->hdr_page_no, undo->hdr_offset,
  635. mtr);
  636. if (prev_rec == NULL) {
  637. undo->empty = TRUE;
  638. } else {
  639. prev_rec_page = page_align(prev_rec);
  640. if (prev_rec_page != undo_page) {
  641. trx->pages_undone++;
  642. }
  643. undo->top_page_no = page_get_page_no(prev_rec_page);
  644. undo->top_offset = prev_rec - prev_rec_page;
  645. undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
  646. }
  647. return(undo_page + offset);
  648. }
  649. /************************************************************************
  650. Pops the topmost record when the two undo logs of a transaction are seen
  651. as a single stack of records ordered by their undo numbers. Inserts the
  652. undo number of the popped undo record to the array of currently processed
  653. undo numbers in the transaction. When the query thread finishes processing
  654. of this undo record, it must be released with trx_undo_rec_release. */
  655. UNIV_INTERN
  656. trx_undo_rec_t*
  657. trx_roll_pop_top_rec_of_trx(
  658. /*========================*/
  659. /* out: undo log record copied to heap, NULL
  660. if none left, or if the undo number of the
  661. top record would be less than the limit */
  662. trx_t* trx, /* in: transaction */
  663. dulint limit, /* in: least undo number we need */
  664. dulint* roll_ptr,/* out: roll pointer to undo record */
  665. mem_heap_t* heap) /* in: memory heap where copied */
  666. {
  667. trx_undo_t* undo;
  668. trx_undo_t* ins_undo;
  669. trx_undo_t* upd_undo;
  670. trx_undo_rec_t* undo_rec;
  671. trx_undo_rec_t* undo_rec_copy;
  672. dulint undo_no;
  673. ibool is_insert;
  674. trx_rseg_t* rseg;
  675. ulint progress_pct;
  676. mtr_t mtr;
  677. rseg = trx->rseg;
  678. try_again:
  679. mutex_enter(&(trx->undo_mutex));
  680. if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
  681. mutex_enter(&(rseg->mutex));
  682. trx_roll_try_truncate(trx);
  683. mutex_exit(&(rseg->mutex));
  684. }
  685. ins_undo = trx->insert_undo;
  686. upd_undo = trx->update_undo;
  687. if (!ins_undo || ins_undo->empty) {
  688. undo = upd_undo;
  689. } else if (!upd_undo || upd_undo->empty) {
  690. undo = ins_undo;
  691. } else if (ut_dulint_cmp(upd_undo->top_undo_no,
  692. ins_undo->top_undo_no) > 0) {
  693. undo = upd_undo;
  694. } else {
  695. undo = ins_undo;
  696. }
  697. if (!undo || undo->empty
  698. || (ut_dulint_cmp(limit, undo->top_undo_no) > 0)) {
  699. if ((trx->undo_no_arr)->n_used == 0) {
  700. /* Rollback is ending */
  701. mutex_enter(&(rseg->mutex));
  702. trx_roll_try_truncate(trx);
  703. mutex_exit(&(rseg->mutex));
  704. }
  705. mutex_exit(&(trx->undo_mutex));
  706. return(NULL);
  707. }
  708. if (undo == ins_undo) {
  709. is_insert = TRUE;
  710. } else {
  711. is_insert = FALSE;
  712. }
  713. *roll_ptr = trx_undo_build_roll_ptr(is_insert, (undo->rseg)->id,
  714. undo->top_page_no,
  715. undo->top_offset);
  716. mtr_start(&mtr);
  717. undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
  718. undo_no = trx_undo_rec_get_undo_no(undo_rec);
  719. ut_ad(ut_dulint_cmp(ut_dulint_add(undo_no, 1), trx->undo_no) == 0);
  720. /* We print rollback progress info if we are in a crash recovery
  721. and the transaction has at least 1000 row operations to undo. */
  722. if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) {
  723. progress_pct = 100 - (ulint)
  724. ((ut_conv_dulint_to_longlong(undo_no) * 100)
  725. / trx_roll_max_undo_no);
  726. if (progress_pct != trx_roll_progress_printed_pct) {
  727. if (trx_roll_progress_printed_pct == 0) {
  728. fprintf(stderr,
  729. "\nInnoDB: Progress in percents:"
  730. " %lu", (ulong) progress_pct);
  731. } else {
  732. fprintf(stderr,
  733. " %lu", (ulong) progress_pct);
  734. }
  735. fflush(stderr);
  736. trx_roll_progress_printed_pct = progress_pct;
  737. }
  738. }
  739. trx->undo_no = undo_no;
  740. if (!trx_undo_arr_store_info(trx, undo_no)) {
  741. /* A query thread is already processing this undo log record */
  742. mutex_exit(&(trx->undo_mutex));
  743. mtr_commit(&mtr);
  744. goto try_again;
  745. }
  746. undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
  747. mutex_exit(&(trx->undo_mutex));
  748. mtr_commit(&mtr);
  749. return(undo_rec_copy);
  750. }
  751. /************************************************************************
  752. Reserves an undo log record for a query thread to undo. This should be
  753. called if the query thread gets the undo log record not using the pop
  754. function above. */
  755. UNIV_INTERN
  756. ibool
  757. trx_undo_rec_reserve(
  758. /*=================*/
  759. /* out: TRUE if succeeded */
  760. trx_t* trx, /* in: transaction */
  761. dulint undo_no)/* in: undo number of the record */
  762. {
  763. ibool ret;
  764. mutex_enter(&(trx->undo_mutex));
  765. ret = trx_undo_arr_store_info(trx, undo_no);
  766. mutex_exit(&(trx->undo_mutex));
  767. return(ret);
  768. }
  769. /***********************************************************************
  770. Releases a reserved undo record. */
  771. UNIV_INTERN
  772. void
  773. trx_undo_rec_release(
  774. /*=================*/
  775. trx_t* trx, /* in: transaction */
  776. dulint undo_no)/* in: undo number */
  777. {
  778. trx_undo_arr_t* arr;
  779. mutex_enter(&(trx->undo_mutex));
  780. arr = trx->undo_no_arr;
  781. trx_undo_arr_remove_info(arr, undo_no);
  782. mutex_exit(&(trx->undo_mutex));
  783. }
  784. /*************************************************************************
  785. Starts a rollback operation. */
  786. UNIV_INTERN
  787. void
  788. trx_rollback(
  789. /*=========*/
  790. trx_t* trx, /* in: transaction */
  791. trx_sig_t* sig, /* in: signal starting the rollback */
  792. que_thr_t** next_thr)/* in/out: next query thread to run;
  793. if the value which is passed in is
  794. a pointer to a NULL pointer, then the
  795. calling function can start running
  796. a new query thread; if the passed value is
  797. NULL, the parameter is ignored */
  798. {
  799. que_t* roll_graph;
  800. que_thr_t* thr;
  801. /* que_thr_t* thr2; */
  802. ut_ad(mutex_own(&kernel_mutex));
  803. ut_ad((trx->undo_no_arr == NULL) || ((trx->undo_no_arr)->n_used == 0));
  804. /* Initialize the rollback field in the transaction */
  805. if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
  806. trx->roll_limit = ut_dulint_zero;
  807. } else if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
  808. trx->roll_limit = (sig->savept).least_undo_no;
  809. } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
  810. trx->roll_limit = trx->last_sql_stat_start.least_undo_no;
  811. } else {
  812. ut_error;
  813. }
  814. ut_a(ut_dulint_cmp(trx->roll_limit, trx->undo_no) <= 0);
  815. trx->pages_undone = 0;
  816. if (trx->undo_no_arr == NULL) {
  817. trx->undo_no_arr = trx_undo_arr_create();
  818. }
  819. /* Build a 'query' graph which will perform the undo operations */
  820. roll_graph = trx_roll_graph_build(trx);
  821. trx->graph = roll_graph;
  822. trx->que_state = TRX_QUE_ROLLING_BACK;
  823. thr = que_fork_start_command(roll_graph);
  824. ut_ad(thr);
  825. /* thr2 = que_fork_start_command(roll_graph);
  826. ut_ad(thr2); */
  827. if (next_thr && (*next_thr == NULL)) {
  828. *next_thr = thr;
  829. /* srv_que_task_enqueue_low(thr2); */
  830. } else {
  831. srv_que_task_enqueue_low(thr);
  832. /* srv_que_task_enqueue_low(thr2); */
  833. }
  834. }
  835. /********************************************************************
  836. Builds an undo 'query' graph for a transaction. The actual rollback is
  837. performed by executing this query graph like a query subprocedure call.
  838. The reply about the completion of the rollback will be sent by this
  839. graph. */
  840. UNIV_INTERN
  841. que_t*
  842. trx_roll_graph_build(
  843. /*=================*/
  844. /* out, own: the query graph */
  845. trx_t* trx) /* in: trx handle */
  846. {
  847. mem_heap_t* heap;
  848. que_fork_t* fork;
  849. que_thr_t* thr;
  850. /* que_thr_t* thr2; */
  851. ut_ad(mutex_own(&kernel_mutex));
  852. heap = mem_heap_create(512);
  853. fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
  854. fork->trx = trx;
  855. thr = que_thr_create(fork, heap);
  856. /* thr2 = que_thr_create(fork, heap); */
  857. thr->child = row_undo_node_create(trx, thr, heap);
  858. /* thr2->child = row_undo_node_create(trx, thr2, heap); */
  859. return(fork);
  860. }
  861. /*************************************************************************
  862. Finishes error processing after the necessary partial rollback has been
  863. done. */
  864. static
  865. void
  866. trx_finish_error_processing(
  867. /*========================*/
  868. trx_t* trx) /* in: transaction */
  869. {
  870. trx_sig_t* sig;
  871. trx_sig_t* next_sig;
  872. ut_ad(mutex_own(&kernel_mutex));
  873. sig = UT_LIST_GET_FIRST(trx->signals);
  874. while (sig != NULL) {
  875. next_sig = UT_LIST_GET_NEXT(signals, sig);
  876. if (sig->type == TRX_SIG_ERROR_OCCURRED) {
  877. trx_sig_remove(trx, sig);
  878. }
  879. sig = next_sig;
  880. }
  881. trx->que_state = TRX_QUE_RUNNING;
  882. }
  883. /*************************************************************************
  884. Finishes a partial rollback operation. */
  885. static
  886. void
  887. trx_finish_partial_rollback_off_kernel(
  888. /*===================================*/
  889. trx_t* trx, /* in: transaction */
  890. que_thr_t** next_thr)/* in/out: next query thread to run;
  891. if the value which is passed in is a pointer
  892. to a NULL pointer, then the calling function
  893. can start running a new query thread; if this
  894. parameter is NULL, it is ignored */
  895. {
  896. trx_sig_t* sig;
  897. ut_ad(mutex_own(&kernel_mutex));
  898. sig = UT_LIST_GET_FIRST(trx->signals);
  899. /* Remove the signal from the signal queue and send reply message
  900. to it */
  901. trx_sig_reply(sig, next_thr);
  902. trx_sig_remove(trx, sig);
  903. trx->que_state = TRX_QUE_RUNNING;
  904. }
  905. /********************************************************************
  906. Finishes a transaction rollback. */
  907. UNIV_INTERN
  908. void
  909. trx_finish_rollback_off_kernel(
  910. /*===========================*/
  911. que_t* graph, /* in: undo graph which can now be freed */
  912. trx_t* trx, /* in: transaction */
  913. que_thr_t** next_thr)/* in/out: next query thread to run;
  914. if the value which is passed in is
  915. a pointer to a NULL pointer, then the
  916. calling function can start running
  917. a new query thread; if this parameter is
  918. NULL, it is ignored */
  919. {
  920. trx_sig_t* sig;
  921. trx_sig_t* next_sig;
  922. ut_ad(mutex_own(&kernel_mutex));
  923. ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0);
  924. /* Free the memory reserved by the undo graph */
  925. que_graph_free(graph);
  926. sig = UT_LIST_GET_FIRST(trx->signals);
  927. if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) {
  928. trx_finish_partial_rollback_off_kernel(trx, next_thr);
  929. return;
  930. } else if (sig->type == TRX_SIG_ERROR_OCCURRED) {
  931. trx_finish_error_processing(trx);
  932. return;
  933. }
  934. #ifdef UNIV_DEBUG
  935. if (lock_print_waits) {
  936. fprintf(stderr, "Trx %lu rollback finished\n",
  937. (ulong) ut_dulint_get_low(trx->id));
  938. }
  939. #endif /* UNIV_DEBUG */
  940. trx_commit_off_kernel(trx);
  941. /* Remove all TRX_SIG_TOTAL_ROLLBACK signals from the signal queue and
  942. send reply messages to them */
  943. trx->que_state = TRX_QUE_RUNNING;
  944. while (sig != NULL) {
  945. next_sig = UT_LIST_GET_NEXT(signals, sig);
  946. if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
  947. trx_sig_reply(sig, next_thr);
  948. trx_sig_remove(trx, sig);
  949. }
  950. sig = next_sig;
  951. }
  952. }
  953. /*************************************************************************
  954. Creates a rollback command node struct. */
  955. UNIV_INTERN
  956. roll_node_t*
  957. roll_node_create(
  958. /*=============*/
  959. /* out, own: rollback node struct */
  960. mem_heap_t* heap) /* in: mem heap where created */
  961. {
  962. roll_node_t* node;
  963. node = mem_heap_alloc(heap, sizeof(roll_node_t));
  964. node->common.type = QUE_NODE_ROLLBACK;
  965. node->state = ROLL_NODE_SEND;
  966. node->partial = FALSE;
  967. return(node);
  968. }
  969. /***************************************************************
  970. Performs an execution step for a rollback command node in a query graph. */
  971. UNIV_INTERN
  972. que_thr_t*
  973. trx_rollback_step(
  974. /*==============*/
  975. /* out: query thread to run next, or NULL */
  976. que_thr_t* thr) /* in: query thread */
  977. {
  978. roll_node_t* node;
  979. ulint sig_no;
  980. trx_savept_t* savept;
  981. node = thr->run_node;
  982. ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK);
  983. if (thr->prev_node == que_node_get_parent(node)) {
  984. node->state = ROLL_NODE_SEND;
  985. }
  986. if (node->state == ROLL_NODE_SEND) {
  987. mutex_enter(&kernel_mutex);
  988. node->state = ROLL_NODE_WAIT;
  989. if (node->partial) {
  990. sig_no = TRX_SIG_ROLLBACK_TO_SAVEPT;
  991. savept = &(node->savept);
  992. } else {
  993. sig_no = TRX_SIG_TOTAL_ROLLBACK;
  994. savept = NULL;
  995. }
  996. /* Send a rollback signal to the transaction */
  997. trx_sig_send(thr_get_trx(thr), sig_no, TRX_SIG_SELF, thr,
  998. savept, NULL);
  999. thr->state = QUE_THR_SIG_REPLY_WAIT;
  1000. mutex_exit(&kernel_mutex);
  1001. return(NULL);
  1002. }
  1003. ut_ad(node->state == ROLL_NODE_WAIT);
  1004. thr->run_node = que_node_get_parent(node);
  1005. return(thr);
  1006. }