From 5d3c3b49276a5fd4a42e29fc63aba78026d1af0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 29 Jan 2018 14:48:53 +0200 Subject: [PATCH] MDEV-15090 Reduce the overhead of writing undo log records Inside InnoDB, each mini-transaction that generates any redo log records will acquire log_sys->mutex during mtr_t::commit() in order to copy the records into the global log_sys->buf for writing into the redo log file. For single-row transactions, this incurs quite a bit of overhead. We would use two mini-transactions for writing a record into a freshly updated undo log page. (Only if the undo record will not fit in that page, then we will have to commit and restart the mini-transaction.) trx_undo_assign(): Assign undo log for a persistent transaction, or return the already assigned one. trx_undo_assign_low(): Assign undo log for an operation on a persistent or temporary table. trx_undo_create(), trx_undo_reuse_cached(): Remove redundant parameters. Merge the logic from trx_undo_mark_as_dict_operation(). --- storage/innobase/include/trx0undo.h | 15 ++- storage/innobase/row/row0import.cc | 8 +- storage/innobase/row/row0trunc.cc | 8 +- storage/innobase/trx/trx0rec.cc | 40 +++----- storage/innobase/trx/trx0undo.cc | 138 ++++++++++++++++------------ 5 files changed, 121 insertions(+), 88 deletions(-) diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h index cdb1cfaf478..ee5a70b6efd 100644 --- a/storage/innobase/include/trx0undo.h +++ b/storage/innobase/include/trx0undo.h @@ -252,18 +252,31 @@ ulint trx_undo_lists_init( /*================*/ trx_rseg_t* rseg); /*!< in: rollback segment memory object */ +/** Assign an undo log for a persistent transaction. +A new undo log is created or a cached undo log reused. +@param[in,out] trx transaction +@param[in,out] mtr mini-transaction +@retval DB_SUCCESS on success +@retval DB_TOO_MANY_CONCURRENT_TRXS +@retval DB_OUT_OF_FILE_SPACE +@retval DB_READ_ONLY +@retval DB_OUT_OF_MEMORY */ +dberr_t +trx_undo_assign(trx_t* trx, mtr_t* mtr) + MY_ATTRIBUTE((nonnull, warn_unused_result)); /** Assign an undo log for a transaction. A new undo log is created or a cached undo log reused. @param[in,out] trx transaction @param[in] rseg rollback segment @param[out] undo the undo log +@param[in,out] mtr mini-transaction @retval DB_SUCCESS on success @retval DB_TOO_MANY_CONCURRENT_TRXS @retval DB_OUT_OF_FILE_SPACE @retval DB_READ_ONLY @retval DB_OUT_OF_MEMORY */ dberr_t -trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo) +trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo, mtr_t*mtr) MY_ATTRIBUTE((nonnull, warn_unused_result)); /******************************************************************//** Sets the state of the undo log segment at a transaction finish. diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index e61e9cb589c..d3dc11c7da1 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -3400,8 +3400,12 @@ row_import_for_mysql( mutex_enter(&trx->undo_mutex); /* TODO: Do not write any undo log for the IMPORT cleanup. */ - err = trx_undo_assign_undo(trx, trx->rsegs.m_redo.rseg, - &trx->rsegs.m_redo.undo); + { + mtr_t mtr; + mtr.start(); + err = trx_undo_assign(trx, &mtr); + mtr.commit(); + } mutex_exit(&trx->undo_mutex); diff --git a/storage/innobase/row/row0trunc.cc b/storage/innobase/row/row0trunc.cc index a2928d7af23..d8bd86244e6 100644 --- a/storage/innobase/row/row0trunc.cc +++ b/storage/innobase/row/row0trunc.cc @@ -1819,10 +1819,12 @@ row_truncate_table_for_mysql( /* Step-6: Truncate operation can be rolled back in case of error till some point. Associate rollback segment to record undo log. */ - if (!dict_table_is_temporary(table)) { + if (!table->is_temporary()) { mutex_enter(&trx->undo_mutex); - err = trx_undo_assign_undo(trx, trx->rsegs.m_redo.rseg, - &trx->rsegs.m_redo.undo); + mtr_t mtr; + mtr.start(); + err = trx_undo_assign(trx, &mtr); + mtr.commit(); mutex_exit(&trx->undo_mutex); DBUG_EXECUTE_IF("ib_err_trunc_assigning_undo_log", diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc index c3f7d23f3ba..536dda75eec 100644 --- a/storage/innobase/trx/trx0rec.cc +++ b/storage/innobase/trx/trx0rec.cc @@ -1901,17 +1901,12 @@ trx_undo_report_rename(trx_t* trx, const dict_table_t* table) ut_ad(trx->id); ut_ad(!table->is_temporary()); - trx_rseg_t* rseg = trx->rsegs.m_redo.rseg; - trx_undo_t** pundo = &trx->rsegs.m_redo.undo; + mtr_t mtr; + mtr.start(); mutex_enter(&trx->undo_mutex); - dberr_t err = *pundo - ? DB_SUCCESS - : trx_undo_assign_undo(trx, rseg, pundo); - ut_ad((err == DB_SUCCESS) == (*pundo != NULL)); - if (trx_undo_t* undo = *pundo) { - mtr_t mtr; - mtr.start(trx); - + dberr_t err = trx_undo_assign(trx, &mtr); + ut_ad((err == DB_SUCCESS) == (trx->rsegs.m_redo.undo != NULL)); + if (trx_undo_t* undo = trx->rsegs.m_redo.undo) { buf_block_t* block = buf_page_get_gen( page_id_t(undo->space, undo->last_page_no), univ_page_size, RW_X_LATCH, @@ -1934,12 +1929,13 @@ trx_undo_report_rename(trx_t* trx, const dict_table_t* table) undo->top_undo_no = trx->undo_no++; undo->guess_block = block; - trx->undo_rseg_space = rseg->space; + trx->undo_rseg_space + = trx->rsegs.m_redo.rseg->space; err = DB_SUCCESS; break; } else { mtr.commit(); - mtr.start(trx); + mtr.start(); block = trx_undo_add_page(trx, undo, &mtr); if (!block) { err = DB_OUT_OF_FILE_SPACE; @@ -2006,7 +2002,7 @@ trx_undo_report_row_operation( mtr.start(); trx_undo_t** pundo; trx_rseg_t* rseg; - const bool is_temp = dict_table_is_temporary(index->table); + const bool is_temp = index->table->is_temporary(); if (is_temp) { mtr.set_log_mode(MTR_LOG_NO_REDO); @@ -2021,9 +2017,9 @@ trx_undo_report_row_operation( } mutex_enter(&trx->undo_mutex); - dberr_t err = *pundo ? DB_SUCCESS : trx_undo_assign_undo( - trx, rseg, pundo); - trx_undo_t* undo = *pundo; + dberr_t err = *pundo + ? DB_SUCCESS : trx_undo_assign_low(trx, rseg, pundo, &mtr); + trx_undo_t* undo = *pundo; ut_ad((err == DB_SUCCESS) == (undo != NULL)); if (undo == NULL) { @@ -2051,12 +2047,6 @@ trx_undo_report_row_operation( cmpl_info, clust_entry, &mtr); if (UNIV_UNLIKELY(offset == 0)) { - /* The record did not fit on the page. We erase the - end segment of the undo log page and write a log - record of it: this is to ensure that in the debug - version the replicate page constructed using the log - records stays identical to the original page */ - if (!trx_undo_erase_page_end(undo_page)) { /* The record did not fit on an empty undo page. Discard the freshly allocated @@ -2071,8 +2061,8 @@ trx_undo_report_row_operation( first, because it may be holding lower-level latches, such as SYNC_FSP and SYNC_FSP_PAGE. */ - mtr_commit(&mtr); - mtr.start(trx); + mtr.commit(); + mtr.start(); if (is_temp) { mtr.set_log_mode(MTR_LOG_NO_REDO); } @@ -2132,7 +2122,7 @@ trx_undo_report_row_operation( /* We have to extend the undo log by one page */ ut_ad(++loop_count < 2); - mtr.start(trx); + mtr.start(); if (is_temp) { mtr.set_log_mode(MTR_LOG_NO_REDO); diff --git a/storage/innobase/trx/trx0undo.cc b/storage/innobase/trx/trx0undo.cc index cc2664239d3..2646d2c8bd6 100644 --- a/storage/innobase/trx/trx0undo.cc +++ b/storage/innobase/trx/trx0undo.cc @@ -1299,9 +1299,6 @@ trx_undo_create( /*============*/ trx_t* trx, /*!< in: transaction */ trx_rseg_t* rseg, /*!< in: rollback segment memory copy */ - trx_id_t trx_id, /*!< in: id of the trx for which the undo log - is created */ - const XID* xid, /*!< in: X/Open transaction identification*/ trx_undo_t** undo, /*!< out: the new undo log object, undefined * if did not succeed */ mtr_t* mtr) /*!< in: mtr */ @@ -1332,17 +1329,36 @@ trx_undo_create( page_no = page_get_page_no(undo_page); - offset = trx_undo_header_create(undo_page, trx_id, mtr); + offset = trx_undo_header_create(undo_page, trx->id, mtr); trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr); - *undo = trx_undo_mem_create(rseg, id, trx_id, xid, page_no, offset); + *undo = trx_undo_mem_create(rseg, id, trx->id, trx->xid, + page_no, offset); if (*undo == NULL) { - err = DB_OUT_OF_MEMORY; + return DB_OUT_OF_MEMORY; + } else if (rseg != trx->rsegs.m_redo.rseg) { + return DB_SUCCESS; } - return(err); + switch (trx_get_dict_operation(trx)) { + case TRX_DICT_OP_NONE: + break; + case TRX_DICT_OP_INDEX: + /* Do not discard the table on recovery. */ + trx->table_id = 0; + /* fall through */ + case TRX_DICT_OP_TABLE: + (*undo)->table_id = trx->table_id; + (*undo)->dict_operation = TRUE; + mlog_write_ulint(undo_page + offset + TRX_UNDO_DICT_TRANS, + TRUE, MLOG_1BYTE, mtr); + mlog_write_ull(undo_page + offset + TRX_UNDO_TABLE_ID, + trx->table_id, mtr); + } + + return DB_SUCCESS; } /*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/ @@ -1356,9 +1372,6 @@ trx_undo_reuse_cached( /*==================*/ trx_t* trx, /*!< in: transaction */ trx_rseg_t* rseg, /*!< in: rollback segment memory object */ - trx_id_t trx_id, /*!< in: id of the trx for which the undo log - is used */ - const XID* xid, /*!< in: X/Open XA transaction identification */ mtr_t* mtr) /*!< in: mtr */ { trx_undo_t* undo; @@ -1380,50 +1393,72 @@ trx_undo_reuse_cached( undo_page = trx_undo_page_get( page_id_t(undo->space, undo->hdr_page_no), mtr); - offset = trx_undo_header_create(undo_page, trx_id, mtr); + offset = trx_undo_header_create(undo_page, trx->id, mtr); trx_undo_header_add_space_for_xid(undo_page, undo_page + offset, mtr); - trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset); - return(undo); -} - -/**********************************************************************//** -Marks an undo log header as a header of a data dictionary operation -transaction. */ -static -void -trx_undo_mark_as_dict_operation( -/*============================*/ - trx_t* trx, /*!< in: dict op transaction */ - trx_undo_t* undo, /*!< in: assigned undo log */ - mtr_t* mtr) /*!< in: mtr */ -{ - page_t* hdr_page; + trx_undo_mem_init_for_reuse(undo, trx->id, trx->xid, offset); - hdr_page = trx_undo_page_get( - page_id_t(undo->space, undo->hdr_page_no), mtr); + if (rseg != trx->rsegs.m_redo.rseg) { + return undo; + } switch (trx_get_dict_operation(trx)) { case TRX_DICT_OP_NONE: - ut_error; + return undo; case TRX_DICT_OP_INDEX: /* Do not discard the table on recovery. */ - undo->table_id = 0; - break; + trx->table_id = 0; + /* fall through */ case TRX_DICT_OP_TABLE: undo->table_id = trx->table_id; - break; + undo->dict_operation = TRUE; + mlog_write_ulint(undo_page + offset + TRX_UNDO_DICT_TRANS, + TRUE, MLOG_1BYTE, mtr); + mlog_write_ull(undo_page + offset + TRX_UNDO_TABLE_ID, + trx->table_id, mtr); } - mlog_write_ulint(hdr_page + undo->hdr_offset - + TRX_UNDO_DICT_TRANS, - TRUE, MLOG_1BYTE, mtr); + return(undo); +} - mlog_write_ull(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID, - undo->table_id, mtr); +/** Assign an undo log for a persistent transaction. +A new undo log is created or a cached undo log reused. +@param[in,out] trx transaction +@param[in,out] mtr mini-transaction +@retval DB_SUCCESS on success +@retval DB_TOO_MANY_CONCURRENT_TRXS +@retval DB_OUT_OF_FILE_SPACE +@retval DB_READ_ONLY +@retval DB_OUT_OF_MEMORY */ +dberr_t +trx_undo_assign(trx_t* trx, mtr_t* mtr) +{ + dberr_t err = DB_SUCCESS; + + ut_ad(mutex_own(&trx->undo_mutex)); + ut_ad(mtr->get_log_mode() == MTR_LOG_ALL); - undo->dict_operation = TRUE; + if (trx->rsegs.m_redo.undo) { + return DB_SUCCESS; + } + + trx_rseg_t* rseg = trx->rsegs.m_redo.rseg; + + mutex_enter(&rseg->mutex); + if (!(trx->rsegs.m_redo.undo= trx_undo_reuse_cached(trx, rseg, mtr))) { + err = trx_undo_create(trx, rseg, &trx->rsegs.m_redo.undo, mtr); + if (err != DB_SUCCESS) { + goto func_exit; + } + } + + UT_LIST_ADD_FIRST(rseg->undo_list, trx->rsegs.m_redo.undo); + +func_exit: + mutex_exit(&rseg->mutex); + + return err; } /** Assign an undo log for a transaction. @@ -1431,16 +1466,16 @@ A new undo log is created or a cached undo log reused. @param[in,out] trx transaction @param[in] rseg rollback segment @param[out] undo the undo log +@param[in,out] mtr mini-transaction @retval DB_SUCCESS on success @retval DB_TOO_MANY_CONCURRENT_TRXS @retval DB_OUT_OF_FILE_SPACE @retval DB_READ_ONLY @retval DB_OUT_OF_MEMORY */ dberr_t -trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo) +trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo, mtr_t*mtr) { const bool is_temp = rseg == trx->rsegs.m_noredo.rseg; - mtr_t mtr; dberr_t err = DB_SUCCESS; ut_ad(mutex_own(&trx->undo_mutex)); @@ -1449,12 +1484,9 @@ trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo) ut_ad(undo == (is_temp ? &trx->rsegs.m_noredo.undo : &trx->rsegs.m_redo.undo)); - - mtr.start(trx); - - if (is_temp) { - mtr.set_log_mode(MTR_LOG_NO_REDO); - } + ut_ad(!*undo); + ut_ad(mtr->get_log_mode() + == (is_temp ? MTR_LOG_NO_REDO : MTR_LOG_ALL)); mutex_enter(&rseg->mutex); @@ -1464,10 +1496,8 @@ trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo) goto func_exit; ); - *undo = trx_undo_reuse_cached(trx, rseg, trx->id, trx->xid, &mtr); - if (*undo == NULL) { - err = trx_undo_create(trx, rseg, trx->id, trx->xid, - undo, &mtr); + if (!(*undo= trx_undo_reuse_cached(trx, rseg, mtr))) { + err = trx_undo_create(trx, rseg, undo, mtr); if (err != DB_SUCCESS) { goto func_exit; } @@ -1475,14 +1505,8 @@ trx_undo_assign_undo(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo) UT_LIST_ADD_FIRST(rseg->undo_list, *undo); - if (!is_temp && trx_get_dict_operation(trx) != TRX_DICT_OP_NONE) { - trx_undo_mark_as_dict_operation(trx, *undo, &mtr); - } - func_exit: mutex_exit(&rseg->mutex); - mtr.commit(); - return(err); }