Browse Source

applied percona patches to InnoDB as of 5.5.15

pull/47/merge
Sergei Golubchik 14 years ago
parent
commit
d4d7a8fa62
  1. 23
      btr/btr0cur.c
  2. 10
      buf/buf0buf.c
  3. 2
      buf/buf0flu.c
  4. 8
      data/data0data.c
  5. 15
      dict/dict0crea.c
  6. 60
      dict/dict0dict.c
  7. 86
      dict/dict0load.c
  8. 1
      dict/dict0mem.c
  9. 20
      fil/fil0fil.c
  10. 26
      handler/ha_innodb.cc
  11. 4
      handler/ha_innodb.h
  12. 314
      handler/handler0alter.cc
  13. 3
      include/buf0buf.h
  14. 2
      include/db0err.h
  15. 13
      include/dict0dict.h
  16. 26
      include/dict0dict.ic
  17. 7
      include/dict0load.h
  18. 42
      include/dict0mem.h
  19. 3
      include/fil0fil.h
  20. 7
      include/lock0lock.h
  21. 6
      include/lock0lock.ic
  22. 2
      include/mtr0mtr.ic
  23. 13
      include/os0file.h
  24. 3
      include/os0file.ic
  25. 19
      include/page0page.h
  26. 23
      include/page0page.ic
  27. 14
      include/rem0rec.h
  28. 41
      include/rem0rec.ic
  29. 20
      include/rem0types.h
  30. 13
      include/row0ext.h
  31. 9
      include/row0ext.ic
  32. 30
      include/row0row.h
  33. 36
      include/row0row.ic
  34. 4
      include/univ.i
  35. 151
      lock/lock0lock.c
  36. 8
      log/log0log.c
  37. 4
      log/log0recv.c
  38. 13
      mtr/mtr0mtr.c
  39. 44
      os/os0file.c
  40. 22
      page/page0page.c
  41. 2
      page/page0zip.c
  42. 109
      percona-suite/percona_mysqldump_innodb_optimize_keys.result
  43. 62
      percona-suite/percona_mysqldump_innodb_optimize_keys.test
  44. 88
      percona-suite/percona_query_cache_with_comments.inc.backup
  45. 66
      percona-suite/percona_query_response_time-replication.result
  46. 61
      percona-suite/percona_query_response_time-replication.test
  47. 306
      percona-suite/percona_query_response_time-stored.result
  48. 85
      percona-suite/percona_query_response_time-stored.test
  49. 377
      percona-suite/percona_query_response_time.result
  50. 71
      percona-suite/percona_query_response_time.test
  51. 1
      percona-suite/percona_query_response_time_flush.inc
  52. 8
      percona-suite/percona_query_response_time_show.inc
  53. 19
      percona-suite/percona_query_response_time_sleep.inc
  54. 2
      rem/rem0rec.c
  55. 19
      row/row0ext.c
  56. 13
      row/row0mysql.c
  57. 48
      row/row0row.c
  58. 11
      row/row0sel.c
  59. 4
      row/row0upd.c
  60. 18
      row/row0vers.c
  61. 4
      srv/srv0srv.c
  62. 63
      trx/trx0rec.c
  63. 3
      trx/trx0undo.c
  64. 2
      ut/ut0ut.c

23
btr/btr0cur.c

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@ -2067,6 +2067,9 @@ btr_cur_optimistic_update(
heap = mem_heap_create(1024);
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
#ifdef UNIV_BLOB_NULL_DEBUG
ut_a(!rec_offs_any_null_extern(rec, offsets));
#endif /* UNIV_BLOB_NULL_DEBUG */
#ifdef UNIV_DEBUG
if (btr_cur_print_record_ops && thr) {
@ -3347,9 +3350,14 @@ btr_estimate_n_rows_in_range_on_level(
mtr_start(&mtr);
/* fetch the page */
block = buf_page_get(space, zip_size, page_no, RW_S_LATCH,
&mtr);
/* Fetch the page. Because we are not holding the
index->lock, the tree may have changed and we may be
attempting to read a page that is no longer part of
the B-tree. We pass BUF_GET_POSSIBLY_FREED in order to
silence a debug assertion about this. */
block = buf_page_get_gen(space, zip_size, page_no, RW_S_LATCH,
NULL, BUF_GET_POSSIBLY_FREED,
__FILE__, __LINE__, &mtr);
page = buf_block_get_frame(block);
@ -3368,6 +3376,13 @@ btr_estimate_n_rows_in_range_on_level(
goto inexact;
}
/* It is possible but highly unlikely that the page was
originally written by an old version of InnoDB that did
not initialize FIL_PAGE_TYPE on other than B-tree pages.
For example, this could be an almost-empty BLOB page
that happens to contain the magic values in the fields
that we checked above. */
n_pages_read++;
if (page_no != slot1->page_no) {

10
buf/buf0buf.c

@ -2984,6 +2984,7 @@ buf_page_get_gen(
case BUF_GET_IF_IN_POOL:
case BUF_PEEK_IF_IN_POOL:
case BUF_GET_IF_IN_POOL_OR_WATCH:
case BUF_GET_POSSIBLY_FREED:
break;
default:
ut_error;
@ -3359,7 +3360,10 @@ wait_until_unfixed:
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
buf_block_buf_fix_inc(block, file, line);
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
ut_a(mode == BUF_GET_POSSIBLY_FREED
|| !block->page.file_page_was_freed);
#endif
//mutex_exit(&block->mutex);
/* Check if this is the first access to the page */
@ -3373,10 +3377,6 @@ wait_until_unfixed:
buf_page_set_accessed_make_young(&block->page, access_time);
}
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
ut_a(!block->page.file_page_was_freed);
#endif
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 5771 || buf_validate());
ut_a(block->page.buf_fix_count > 0);

2
buf/buf0flu.c

@ -867,7 +867,7 @@ corrupted_page:
flush:
/* Now flush the doublewrite buffer data to disk */
fil_flush(srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE);
fil_flush(srv_doublewrite_file ? TRX_DOUBLEWRITE_SPACE : TRX_SYS_SPACE, FALSE);
/* We know that the writes have been flushed to disk now
and in recovery we will find them in the doublewrite buffer

8
data/data0data.c

@ -585,7 +585,8 @@ dtuple_convert_big_rec(
if (dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP) {
/* up to MySQL 5.1: store a 768-byte prefix locally */
local_len = BTR_EXTERN_FIELD_REF_SIZE + DICT_MAX_INDEX_COL_LEN;
local_len = BTR_EXTERN_FIELD_REF_SIZE
+ DICT_ANTELOPE_MAX_INDEX_COL_LEN;
} else {
/* new-format table: do not store any BLOB prefix locally */
local_len = BTR_EXTERN_FIELD_REF_SIZE;
@ -757,7 +758,10 @@ dtuple_convert_back_big_rec(
local_len -= BTR_EXTERN_FIELD_REF_SIZE;
ut_ad(local_len <= DICT_MAX_INDEX_COL_LEN);
/* Only in REDUNDANT and COMPACT format, we store
up to DICT_ANTELOPE_MAX_INDEX_COL_LEN (768) bytes
locally */
ut_ad(local_len <= DICT_ANTELOPE_MAX_INDEX_COL_LEN);
dfield_set_data(dfield,
(char*) b->data - local_len,

15
dict/dict0crea.c

@ -730,9 +730,9 @@ dict_create_index_tree_step(
/* printf("Created a new index tree in space %lu root page %lu\n",
index->space, index->page_no); */
page_rec_write_index_page_no(btr_pcur_get_rec(&pcur),
DICT_SYS_INDEXES_PAGE_NO_FIELD,
node->page_no, &mtr);
page_rec_write_field(btr_pcur_get_rec(&pcur),
DICT_SYS_INDEXES_PAGE_NO_FIELD,
node->page_no, &mtr);
btr_pcur_close(&pcur);
mtr_commit(&mtr);
@ -802,9 +802,8 @@ dict_drop_index_tree(
root_page_no); */
btr_free_root(space, zip_size, root_page_no, mtr);
page_rec_write_index_page_no(rec,
DICT_SYS_INDEXES_PAGE_NO_FIELD,
FIL_NULL, mtr);
page_rec_write_field(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
FIL_NULL, mtr);
}
/*******************************************************************//**
@ -907,8 +906,8 @@ create:
in SYS_INDEXES, so that the database will not get into an
inconsistent state in case it crashes between the mtr_commit()
below and the following mtr_commit() call. */
page_rec_write_index_page_no(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
FIL_NULL, mtr);
page_rec_write_field(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
FIL_NULL, mtr);
/* We will need to commit the mini-transaction in order to avoid
deadlocks in the btr_create() call, because otherwise we would

60
dict/dict0dict.c

@ -1415,36 +1415,63 @@ dict_index_too_big_for_undo(
ulint fixed_size
= dict_col_get_fixed_size(col,
dict_table_is_comp(table));
ulint max_prefix
= col->max_prefix;
if (fixed_size) {
/* Fixed-size columns are stored locally. */
max_size = fixed_size;
} else if (max_size <= BTR_EXTERN_FIELD_REF_SIZE * 2) {
/* Short columns are stored locally. */
} else if (!col->ord_part) {
} else if (!col->ord_part
|| (col->max_prefix
< (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table))) {
/* See if col->ord_part would be set
because of new_index. */
because of new_index. Also check if the new
index could have longer prefix on columns
that already had ord_part set */
ulint j;
for (j = 0; j < new_index->n_uniq; j++) {
if (dict_index_get_nth_col(
new_index, j) == col) {
const dict_field_t* field
= dict_index_get_nth_field(
new_index, j);
if (field->prefix_len
> col->max_prefix) {
max_prefix =
field->prefix_len;
}
goto is_ord_part;
}
}
if (col->ord_part) {
goto is_ord_part;
}
/* This is not an ordering column in any index.
Thus, it can be stored completely externally. */
max_size = BTR_EXTERN_FIELD_REF_SIZE;
} else {
ulint max_field_len;
is_ord_part:
max_field_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table);
/* This is an ordering column in some index.
A long enough prefix must be written to the
undo log. See trx_undo_page_fetch_ext(). */
max_size = ut_min(max_size, max_field_len);
/* We only store the needed prefix length in undo log */
if (max_prefix) {
ut_ad(dict_table_get_format(table)
>= DICT_TF_FORMAT_ZIP);
if (max_size > REC_MAX_INDEX_COL_LEN) {
max_size = REC_MAX_INDEX_COL_LEN;
max_size = ut_min(max_prefix, max_size);
}
max_size += BTR_EXTERN_FIELD_REF_SIZE;
@ -1698,15 +1725,16 @@ too_big:
/* In dtuple_convert_big_rec(), variable-length columns
that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2
may be chosen for external storage. If the column appears
in an ordering column of an index, a longer prefix of
REC_MAX_INDEX_COL_LEN will be copied to the undo log
by trx_undo_page_report_modify() and
in an ordering column of an index, a longer prefix determined
by dict_max_field_len_store_undo() will be copied to the undo
log by trx_undo_page_report_modify() and
trx_undo_page_fetch_ext(). It suffices to check the
capacity of the undo log whenever new_index includes
a column prefix on a column that may be stored externally. */
if (field->prefix_len /* prefix index */
&& !col->ord_part /* not yet ordering column */
&& (!col->ord_part /* not yet ordering column */
|| field->prefix_len > col->max_prefix)
&& !dict_col_get_fixed_size(col, TRUE) /* variable-length */
&& dict_col_get_max_size(col)
> BTR_EXTERN_FIELD_REF_SIZE * 2 /* long enough */) {
@ -1723,11 +1751,17 @@ too_big:
}
undo_size_ok:
/* Flag the ordering columns */
/* Flag the ordering columns and also set column max_prefix */
for (i = 0; i < n_ord; i++) {
const dict_field_t* field
= dict_index_get_nth_field(new_index, i);
dict_index_get_nth_field(new_index, i)->col->ord_part = 1;
field->col->ord_part = 1;
if (field->prefix_len > field->col->max_prefix) {
field->col->max_prefix = field->prefix_len;
}
}
/* Add the new index as the last index for the table */
@ -1935,14 +1969,14 @@ dict_index_add_col(
variable-length fields, so that the extern flag can be embedded in
the length word. */
if (field->fixed_len > DICT_MAX_INDEX_COL_LEN) {
if (field->fixed_len > DICT_MAX_FIXED_COL_LEN) {
field->fixed_len = 0;
}
#if DICT_MAX_INDEX_COL_LEN != 768
#if DICT_MAX_FIXED_COL_LEN != 768
/* The comparison limit above must be constant. If it were
changed, the disk format of some fixed-length columns would
change, which would be a disaster. */
# error "DICT_MAX_INDEX_COL_LEN != 768"
# error "DICT_MAX_FIXED_COL_LEN != 768"
#endif
if (!(col->prtype & DATA_NOT_NULL)) {

86
dict/dict0load.c

@ -432,7 +432,7 @@ dict_process_sys_fields_rec(
mach_write_to_8(last_index_id, last_id);
err_msg = dict_load_field_low(buf, NULL, sys_field,
pos, last_index_id, heap, rec);
pos, last_index_id, heap, rec, NULL, 0);
*index_id = mach_read_from_8(buf);
@ -1066,6 +1066,9 @@ dict_load_columns(
/** Error message for a delete-marked record in dict_load_field_low() */
static const char* dict_load_field_del = "delete-marked record in SYS_FIELDS";
static const char* dict_load_field_too_big = "column prefix exceeds maximum"
" limit";
/********************************************************************//**
Loads an index field definition from a SYS_FIELDS record to
dict_index_t.
@ -1087,7 +1090,12 @@ dict_load_field_low(
byte* last_index_id, /*!< in: last index id */
mem_heap_t* heap, /*!< in/out: memory heap
for temporary storage */
const rec_t* rec) /*!< in: SYS_FIELDS record */
const rec_t* rec, /*!< in: SYS_FIELDS record */
char* addition_err_str,/*!< out: additional error message
that requires information to be
filled, or NULL */
ulint err_str_len) /*!< in: length of addition_err_str
in bytes */
{
const byte* field;
ulint len;
@ -1167,6 +1175,19 @@ err_len:
goto err_len;
}
if (prefix_len > REC_VERSION_56_MAX_INDEX_COL_LEN) {
if (addition_err_str) {
ut_snprintf(addition_err_str, err_str_len,
"index field '%s' has a prefix length"
" of %lu bytes",
mem_heap_strdupl(
heap, (const char*) field, len),
(ulong) prefix_len);
}
return(dict_load_field_too_big);
}
if (index) {
dict_mem_index_add_field(
index, mem_heap_strdupl(heap, (const char*) field, len),
@ -1226,14 +1247,16 @@ dict_load_fields(
btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE,
BTR_SEARCH_LEAF, &pcur, &mtr);
for (i = 0; i < index->n_fields; i++) {
const char* err_msg;
const char* err_msg;
char addition_err_str[1024];
rec = btr_pcur_get_rec(&pcur);
ut_a(btr_pcur_is_on_user_rec(&pcur));
err_msg = dict_load_field_low(buf, index, NULL, NULL, NULL,
heap, rec);
heap, rec, addition_err_str,
sizeof(addition_err_str));
if (err_msg == dict_load_field_del) {
/* There could be delete marked records in
@ -1242,7 +1265,24 @@ dict_load_fields(
goto next_rec;
} else if (err_msg) {
fprintf(stderr, "InnoDB: %s\n", err_msg);
if (err_msg == dict_load_field_too_big) {
fprintf(stderr, "InnoDB: Error: load index"
" '%s' failed.\n"
"InnoDB: %s,\n"
"InnoDB: which exceeds the"
" maximum limit of %lu bytes.\n"
"InnoDB: Please use server that"
" supports long index prefix\n"
"InnoDB: or turn on"
" innodb_force_recovery to load"
" the table\n",
index->name, addition_err_str,
(ulong) (REC_VERSION_56_MAX_INDEX_COL_LEN));
} else {
fprintf(stderr, "InnoDB: %s\n", err_msg);
}
error = DB_CORRUPTION;
goto func_exit;
}
@ -1518,7 +1558,26 @@ corrupted:
of the database server */
dict_mem_index_free(index);
} else {
dict_load_fields(index, heap);
error = dict_load_fields(index, heap);
if (error != DB_SUCCESS) {
fprintf(stderr, "InnoDB: Error: load index '%s'"
" for table '%s' failed\n",
index->name, table->name);
/* If the force recovery flag is set, and
if the failed index is not the primary index, we
will continue and open other indexes */
if (srv_force_recovery
&& !dict_index_is_clust(index)) {
error = DB_SUCCESS;
goto next_rec;
} else {
goto func_exit;
}
}
error = dict_index_add_to_cache(table, index,
index->page, FALSE);
/* The data dictionary tables should never contain
@ -1843,9 +1902,18 @@ err_exit:
} else {
table->fk_max_recusive_level = 0;
}
} else if (!srv_force_recovery) {
dict_table_remove_from_cache(table);
table = NULL;
} else {
dict_index_t* index;
/* Make sure that at least the clustered index was loaded.
Otherwise refuse to load the table */
index = dict_table_get_first_index(table);
if (!srv_force_recovery || !index
|| !dict_index_is_clust(index)) {
dict_table_remove_from_cache(table);
table = NULL;
}
}
#if 0
if (err != DB_SUCCESS && table != NULL) {

1
dict/dict0mem.c

@ -234,6 +234,7 @@ dict_mem_fill_column_struct(
column->ind = (unsigned int) col_pos;
column->ord_part = 0;
column->max_prefix = 0;
column->mtype = (unsigned int) mtype;
column->prtype = (unsigned int) prtype;
column->len = (unsigned int) col_len;

20
fil/fil0fil.c

@ -866,7 +866,8 @@ fil_node_close_file(
ut_a(node->open);
ut_a(node->n_pending == 0 || node->space->is_being_deleted);
ut_a(node->n_pending_flushes == 0);
ut_a(node->modification_counter == node->flush_counter);
ut_a(node->modification_counter == node->flush_counter
|| srv_fast_shutdown == 2);
ret = os_file_close(node->handle);
ut_a(ret);
@ -2628,7 +2629,7 @@ retry:
os_thread_sleep(20000);
fil_flush(id);
fil_flush(id, TRUE);
goto retry;
@ -2842,7 +2843,7 @@ error_exit2:
goto error_exit;
}
ret = os_file_flush(file);
ret = os_file_flush(file, TRUE);
if (!ret) {
fputs("InnoDB: Error: file flush of tablespace ", stderr);
@ -3028,7 +3029,7 @@ fil_reset_too_high_lsns(
}
}
success = os_file_flush(file);
success = os_file_flush(file, TRUE);
if (!success) {
goto func_exit;
@ -3050,7 +3051,7 @@ fil_reset_too_high_lsns(
goto func_exit;
}
success = os_file_flush(file);
success = os_file_flush(file, TRUE);
func_exit:
os_file_close(file);
ut_free(buf2);
@ -4838,7 +4839,7 @@ fil_extend_space_to_desired_size(
mutex_exit(&fil_system->mutex);
mutex_exit(&fil_system->file_extend_mutex);
fil_flush(space_id);
fil_flush(space_id, TRUE);
return(success);
}
@ -5550,8 +5551,9 @@ UNIV_INTERN
void
fil_flush(
/*======*/
ulint space_id) /*!< in: file space id (this can be a group of
ulint space_id, /*!< in: file space id (this can be a group of
log files or a tablespace of the database) */
ibool metadata)
{
fil_space_t* space;
fil_node_t* node;
@ -5622,7 +5624,7 @@ retry:
/* fprintf(stderr, "Flushing to file %s\n",
node->name); */
os_file_flush(file);
os_file_flush(file, metadata);
mutex_enter(&fil_system->mutex);
@ -5705,7 +5707,7 @@ fil_flush_file_spaces(
a non-existing space id. */
for (i = 0; i < n_space_ids; i++) {
fil_flush(space_ids[i]);
fil_flush(space_ids[i], TRUE);
}
mem_free(space_ids);

26
handler/ha_innodb.cc

@ -196,6 +196,7 @@ static my_bool innobase_stats_on_metadata = TRUE;
static my_bool innobase_use_sys_stats_table = FALSE;
static my_bool innobase_buffer_pool_shm_checksum = TRUE;
static uint innobase_buffer_pool_shm_key = 0;
static my_bool innobase_large_prefix = FALSE;
static char* internal_innobase_data_file_path = NULL;
@ -1056,7 +1057,7 @@ int
convert_error_code_to_mysql(
/*========================*/
int error, /*!< in: InnoDB error code */
ulint flags, /*!< in: InnoDB table flags, or 0 */
ulint flags, /*!< in: InnoDB table flags, or 0 */
THD* thd) /*!< in: user thread handle or NULL */
{
switch (error) {
@ -1160,6 +1161,11 @@ convert_error_code_to_mysql(
& DICT_TF_COMPACT) / 2);
return(HA_ERR_TO_BIG_ROW);
case DB_TOO_BIG_INDEX_COL:
my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags));
return(HA_ERR_INDEX_COL_TOO_LONG);
case DB_NO_SAVEPOINT:
return(HA_ERR_NO_SAVEPOINT);
@ -3040,8 +3046,10 @@ innobase_alter_table_flags(
uint flags)
{
return(HA_INPLACE_ADD_INDEX_NO_READ_WRITE
| HA_INPLACE_ADD_INDEX_NO_WRITE
| HA_INPLACE_DROP_INDEX_NO_READ_WRITE
| HA_INPLACE_ADD_UNIQUE_INDEX_NO_READ_WRITE
| HA_INPLACE_ADD_UNIQUE_INDEX_NO_WRITE
| HA_INPLACE_DROP_UNIQUE_INDEX_NO_READ_WRITE
| HA_INPLACE_ADD_PK_INDEX_NO_READ_WRITE);
}
@ -4403,7 +4411,11 @@ UNIV_INTERN
uint
ha_innobase::max_supported_key_part_length() const
{
return(DICT_MAX_INDEX_COL_LEN - 1);
/* A table format specific index column length check will be performed
at ha_innobase::add_index() and row_create_index_for_mysql() */
return(innobase_large_prefix
? REC_VERSION_56_MAX_INDEX_COL_LEN
: REC_ANTELOPE_MAX_INDEX_COL_LEN - 1);
}
/******************************************************************//**
@ -7533,8 +7545,8 @@ ha_innobase::create(
if (i != (uint) primary_key_no) {
if ((error = create_index(trx, form, flags, norm_name,
i))) {
if ((error = create_index(trx, form, flags,
norm_name, i))) {
goto cleanup;
}
}
@ -11714,6 +11726,11 @@ static MYSQL_SYSVAR_STR(flush_method, innobase_file_flush_method,
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
"With which method to flush data.", NULL, NULL, NULL);
static MYSQL_SYSVAR_BOOL(large_prefix, innobase_large_prefix,
PLUGIN_VAR_NOCMDARG,
"Support large index prefix length of REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes.",
NULL, NULL, FALSE);
static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
"Force InnoDB to not use next-key locking, to use only row-level locking.",
@ -12162,6 +12179,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(use_global_flush_log_at_trx_commit),
MYSQL_SYSVAR(flush_method),
MYSQL_SYSVAR(force_recovery),
MYSQL_SYSVAR(large_prefix),
MYSQL_SYSVAR(locks_unsafe_for_binlog),
MYSQL_SYSVAR(lock_wait_timeout),
#ifdef UNIV_LOG_ARCHIVE

4
handler/ha_innodb.h

@ -217,7 +217,9 @@ class ha_innobase: public handler
bool primary_key_is_clustered();
int cmp_ref(const uchar *ref1, const uchar *ref2);
/** Fast index creation (smart ALTER TABLE) @see handler0alter.cc @{ */
int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys);
int add_index(TABLE *table_arg, KEY *key_info, uint num_of_keys,
handler_add_index **add);
int final_add_index(handler_add_index *add, bool commit);
int prepare_drop_index(TABLE *table_arg, uint *key_num,
uint num_of_keys);
int final_drop_index(TABLE *table_arg);

314
handler/handler0alter.cc

@ -539,7 +539,7 @@ innobase_create_key_def(
if (!new_primary && (key_info->flags & HA_NOSAME)
&& (!(key_info->flags & HA_KEY_HAS_PART_KEY_SEG))
&& row_table_got_default_clust_index(table)) {
uint key_part = key_info->key_parts;
uint key_part = key_info->key_parts;
new_primary = TRUE;
@ -594,6 +594,27 @@ innobase_create_key_def(
DBUG_RETURN(indexdefs);
}
/*******************************************************************//**
Check each index column size, make sure they do not exceed the max limit
@return HA_ERR_INDEX_COL_TOO_LONG if index column size exceeds limit */
static
int
innobase_check_column_length(
/*=========================*/
const dict_table_t*table, /*!< in: table definition */
const KEY* key_info) /*!< in: Indexes to be created */
{
ulint max_col_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table);
for (ulint key_part = 0; key_part < key_info->key_parts; key_part++) {
if (key_info->key_part[key_part].length > max_col_len) {
my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0), max_col_len);
return(HA_ERR_INDEX_COL_TOO_LONG);
}
}
return(0);
}
/*******************************************************************//**
Create a temporary tablename using query id, thread id, and id
@return temporary tablename */
@ -619,6 +640,18 @@ innobase_create_temporary_tablename(
return(name);
}
class ha_innobase_add_index : public handler_add_index
{
public:
/** table where the indexes are being created */
dict_table_t* indexed_table;
ha_innobase_add_index(TABLE* table, KEY* key_info, uint num_of_keys,
dict_table_t* indexed_table_arg) :
handler_add_index(table, key_info, num_of_keys),
indexed_table (indexed_table_arg) {}
~ha_innobase_add_index() {}
};
/*******************************************************************//**
Create indexes.
@return 0 or error number */
@ -626,12 +659,15 @@ UNIV_INTERN
int
ha_innobase::add_index(
/*===================*/
TABLE* table, /*!< in: Table where indexes are created */
KEY* key_info, /*!< in: Indexes to be created */
uint num_of_keys) /*!< in: Number of indexes to be created */
TABLE* table, /*!< in: Table where indexes
are created */
KEY* key_info, /*!< in: Indexes
to be created */
uint num_of_keys, /*!< in: Number of indexes
to be created */
handler_add_index** add) /*!< out: context */
{
dict_index_t** index; /*!< Index to be created */
dict_table_t* innodb_table; /*!< InnoDB table in dictionary */
dict_table_t* indexed_table; /*!< Table where indexes are created */
merge_index_def_t* index_defs; /*!< Index definitions */
mem_heap_t* heap; /*!< Heap for index definitions */
@ -647,6 +683,8 @@ ha_innobase::add_index(
ut_a(key_info);
ut_a(num_of_keys);
*add = NULL;
if (srv_created_new_raw || srv_force_recovery) {
DBUG_RETURN(HA_ERR_WRONG_COMMAND);
}
@ -662,20 +700,32 @@ ha_innobase::add_index(
DBUG_RETURN(-1);
}
innodb_table = indexed_table
= dict_table_get(prebuilt->table->name, FALSE);
indexed_table = dict_table_get(prebuilt->table->name, FALSE);
if (UNIV_UNLIKELY(!innodb_table)) {
if (UNIV_UNLIKELY(!indexed_table)) {
DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
}
ut_a(indexed_table == prebuilt->table);
/* Check that index keys are sensible */
error = innobase_check_index_keys(key_info, num_of_keys, innodb_table);
error = innobase_check_index_keys(key_info, num_of_keys, prebuilt->table);
if (UNIV_UNLIKELY(error)) {
DBUG_RETURN(error);
}
/* Check each index's column length to make sure they do not
exceed limit */
for (ulint i = 0; i < num_of_keys; i++) {
error = innobase_check_column_length(prebuilt->table,
&key_info[i]);
if (error) {
DBUG_RETURN(error);
}
}
heap = mem_heap_create(1024);
trx_start_if_not_started(prebuilt->trx);
@ -691,7 +741,7 @@ ha_innobase::add_index(
num_of_idx = num_of_keys;
index_defs = innobase_create_key_def(
trx, innodb_table, heap, key_info, num_of_idx);
trx, prebuilt->table, heap, key_info, num_of_idx);
new_primary = DICT_CLUSTERED & index_defs[0].ind_type;
@ -705,7 +755,7 @@ ha_innobase::add_index(
trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
/* Acquire a lock on the table before creating any indexes. */
error = row_merge_lock_table(prebuilt->trx, innodb_table,
error = row_merge_lock_table(prebuilt->trx, prebuilt->table,
new_primary ? LOCK_X : LOCK_S);
if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
@ -719,7 +769,7 @@ ha_innobase::add_index(
row_mysql_lock_data_dictionary(trx);
dict_locked = TRUE;
ut_d(dict_table_check_for_dup_indexes(innodb_table, FALSE));
ut_d(dict_table_check_for_dup_indexes(prebuilt->table, FALSE));
/* If a new primary key is defined for the table we need
to drop the original table and rebuild all indexes. */
@ -727,15 +777,15 @@ ha_innobase::add_index(
if (UNIV_UNLIKELY(new_primary)) {
/* This transaction should be the only one
operating on the table. */
ut_a(innodb_table->n_mysql_handles_opened == 1);
ut_a(prebuilt->table->n_mysql_handles_opened == 1);
char* new_table_name = innobase_create_temporary_tablename(
heap, '1', innodb_table->name);
heap, '1', prebuilt->table->name);
/* Clone the table. */
trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
indexed_table = row_merge_create_temporary_table(
new_table_name, index_defs, innodb_table, trx);
new_table_name, index_defs, prebuilt->table, trx);
if (!indexed_table) {
@ -749,11 +799,12 @@ ha_innobase::add_index(
break;
default:
error = convert_error_code_to_mysql(
trx->error_state, innodb_table->flags,
trx->error_state,
prebuilt->table->flags,
user_thd);
}
ut_d(dict_table_check_for_dup_indexes(innodb_table,
ut_d(dict_table_check_for_dup_indexes(prebuilt->table,
FALSE));
mem_heap_free(heap);
trx_general_rollback_for_mysql(trx, NULL);
@ -768,17 +819,15 @@ ha_innobase::add_index(
/* Create the indexes in SYS_INDEXES and load into dictionary. */
for (ulint i = 0; i < num_of_idx; i++) {
for (num_created = 0; num_created < num_of_idx; num_created++) {
index[i] = row_merge_create_index(trx, indexed_table,
&index_defs[i]);
index[num_created] = row_merge_create_index(
trx, indexed_table, &index_defs[num_created]);
if (!index[i]) {
if (!index[num_created]) {
error = trx->error_state;
goto error_handling;
}
num_created++;
}
ut_ad(error == DB_SUCCESS);
@ -800,7 +849,7 @@ ha_innobase::add_index(
if (UNIV_UNLIKELY(new_primary)) {
/* A primary key is to be built. Acquire an exclusive
table lock also on the table that is being created. */
ut_ad(indexed_table != innodb_table);
ut_ad(indexed_table != prebuilt->table);
error = row_merge_lock_table(prebuilt->trx, indexed_table,
LOCK_X);
@ -814,7 +863,7 @@ ha_innobase::add_index(
/* Read the clustered index of the table and build indexes
based on this information using temporary files and merge sort. */
error = row_merge_build_indexes(prebuilt->trx,
innodb_table, indexed_table,
prebuilt->table, indexed_table,
index, num_of_idx, table);
error_handling:
@ -822,63 +871,15 @@ error_handling:
dictionary which were defined. */
switch (error) {
const char* old_name;
char* tmp_name;
case DB_SUCCESS:
ut_a(!dict_locked);
row_mysql_lock_data_dictionary(trx);
dict_locked = TRUE;
ut_d(mutex_enter(&dict_sys->mutex));
ut_d(dict_table_check_for_dup_indexes(prebuilt->table, TRUE));
if (!new_primary) {
error = row_merge_rename_indexes(trx, indexed_table);
if (error != DB_SUCCESS) {
row_merge_drop_indexes(trx, indexed_table,
index, num_created);
}
goto convert_error;
}
/* If a new primary key was defined for the table and
there was no error at this point, we can now rename
the old table as a temporary table, rename the new
temporary table as the old table and drop the old table. */
old_name = innodb_table->name;
tmp_name = innobase_create_temporary_tablename(heap, '2',
old_name);
error = row_merge_rename_tables(innodb_table, indexed_table,
tmp_name, trx);
if (error != DB_SUCCESS) {
row_merge_drop_table(trx, indexed_table);
switch (error) {
case DB_TABLESPACE_ALREADY_EXISTS:
case DB_DUPLICATE_KEY:
innobase_convert_tablename(tmp_name);
my_error(HA_ERR_TABLE_EXIST, MYF(0), tmp_name);
error = HA_ERR_TABLE_EXIST;
break;
default:
goto convert_error;
}
break;
}
trx_commit_for_mysql(prebuilt->trx);
row_prebuilt_free(prebuilt, TRUE);
prebuilt = row_create_prebuilt(indexed_table);
indexed_table->n_mysql_handles_opened++;
error = row_merge_drop_table(trx, innodb_table);
innodb_table = indexed_table;
goto convert_error;
ut_d(mutex_exit(&dict_sys->mutex));
*add = new ha_innobase_add_index(table, key_info, num_of_keys,
indexed_table);
break;
case DB_TOO_BIG_RECORD:
my_error(HA_ERR_TO_BIG_ROW, MYF(0));
@ -894,7 +895,7 @@ error:
trx->error_state = DB_SUCCESS;
if (new_primary) {
if (indexed_table != innodb_table) {
if (indexed_table != prebuilt->table) {
row_merge_drop_table(trx, indexed_table);
}
} else {
@ -906,38 +907,161 @@ error:
row_merge_drop_indexes(trx, indexed_table,
index, num_created);
}
convert_error:
if (error == DB_SUCCESS) {
/* Build index is successful. We will need to
rebuild index translation table. Reset the
index entry count in the translation table
to zero, so that translation table will be rebuilt */
share->idx_trans_tbl.index_count = 0;
}
error = convert_error_code_to_mysql(error,
innodb_table->flags,
user_thd);
}
mem_heap_free(heap);
trx_commit_for_mysql(trx);
if (prebuilt->trx) {
trx_commit_for_mysql(prebuilt->trx);
}
if (dict_locked) {
ut_d(dict_table_check_for_dup_indexes(innodb_table, FALSE));
row_mysql_unlock_data_dictionary(trx);
}
trx_free_for_mysql(trx);
mem_heap_free(heap);
/* There might be work for utility threads.*/
srv_active_wake_master_thread();
DBUG_RETURN(error);
DBUG_RETURN(convert_error_code_to_mysql(error, prebuilt->table->flags,
user_thd));
}
/*******************************************************************//**
Finalize or undo add_index().
@return 0 or error number */
UNIV_INTERN
int
ha_innobase::final_add_index(
/*=========================*/
handler_add_index* add_arg,/*!< in: context from add_index() */
bool commit) /*!< in: true=commit, false=rollback */
{
ha_innobase_add_index* add;
trx_t* trx;
int err = 0;
DBUG_ENTER("ha_innobase::final_add_index");
ut_ad(add_arg);
add = static_cast<class ha_innobase_add_index*>(add_arg);
/* Create a background transaction for the operations on
the data dictionary tables. */
trx = innobase_trx_allocate(user_thd);
trx_start_if_not_started(trx);
/* Flag this transaction as a dictionary operation, so that
the data dictionary will be locked in crash recovery. */
trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
/* Latch the InnoDB data dictionary exclusively so that no deadlocks
or lock waits can happen in it during an index create operation. */
row_mysql_lock_data_dictionary(trx);
if (add->indexed_table != prebuilt->table) {
ulint error;
/* We copied the table (new_primary). */
if (commit) {
mem_heap_t* heap;
char* tmp_name;
heap = mem_heap_create(1024);
/* A new primary key was defined for the table
and there was no error at this point. We can
now rename the old table as a temporary table,
rename the new temporary table as the old
table and drop the old table. */
tmp_name = innobase_create_temporary_tablename(
heap, '2', prebuilt->table->name);
error = row_merge_rename_tables(
prebuilt->table, add->indexed_table,
tmp_name, trx);
switch (error) {
case DB_TABLESPACE_ALREADY_EXISTS:
case DB_DUPLICATE_KEY:
innobase_convert_tablename(tmp_name);
my_error(HA_ERR_TABLE_EXIST, MYF(0), tmp_name);
err = HA_ERR_TABLE_EXIST;
break;
default:
err = convert_error_code_to_mysql(
error, prebuilt->table->flags,
user_thd);
break;
}
mem_heap_free(heap);
}
if (!commit || err) {
error = row_merge_drop_table(trx, add->indexed_table);
trx_commit_for_mysql(prebuilt->trx);
} else {
dict_table_t* old_table = prebuilt->table;
trx_commit_for_mysql(prebuilt->trx);
row_prebuilt_free(prebuilt, TRUE);
error = row_merge_drop_table(trx, old_table);
add->indexed_table->n_mysql_handles_opened++;
prebuilt = row_create_prebuilt(add->indexed_table);
}
err = convert_error_code_to_mysql(
error, prebuilt->table->flags, user_thd);
} else {
/* We created secondary indexes (!new_primary). */
if (commit) {
err = convert_error_code_to_mysql(
row_merge_rename_indexes(trx, prebuilt->table),
prebuilt->table->flags, user_thd);
}
if (!commit || err) {
dict_index_t* index;
dict_index_t* next_index;
for (index = dict_table_get_first_index(
prebuilt->table);
index; index = next_index) {
next_index = dict_table_get_next_index(index);
if (*index->name == TEMP_INDEX_PREFIX) {
row_merge_drop_index(
index, prebuilt->table, trx);
}
}
}
}
/* If index is successfully built, we will need to rebuild index
translation table. Set valid index entry count in the translation
table to zero. */
if (err == 0 && commit) {
share->idx_trans_tbl.index_count = 0;
}
trx_commit_for_mysql(trx);
if (prebuilt->trx) {
trx_commit_for_mysql(prebuilt->trx);
}
ut_d(dict_table_check_for_dup_indexes(prebuilt->table, FALSE));
row_mysql_unlock_data_dictionary(trx);
trx_free_for_mysql(trx);
/* There might be work for utility threads.*/
srv_active_wake_master_thread();
delete add;
DBUG_RETURN(err);
}
/*******************************************************************//**

3
include/buf0buf.h

@ -53,6 +53,9 @@ Created 11/5/1995 Heikki Tuuri
/*!< Get the page only if it's in the
buffer pool, if not then set a watch
on the page. */
#define BUF_GET_POSSIBLY_FREED 16
/*!< Like BUF_GET, but do not mind
if the file page has been freed. */
/* @} */
/** @name Modes for buf_page_get_known_nowait */
/* @{ */

2
include/db0err.h

@ -110,6 +110,8 @@ enum db_err {
DB_PARENT_NO_INDEX, /* the parent table does not
have an index that contains the
foreign keys as its prefix columns */
DB_TOO_BIG_INDEX_COL, /* index column size exceeds maximum
limit */
/* The following are partial failure codes */
DB_FAIL = 1000,

13
include/dict0dict.h

@ -136,6 +136,19 @@ dict_col_copy_type(
/*===============*/
const dict_col_t* col, /*!< in: column */
dtype_t* type); /*!< out: data type */
/**********************************************************************//**
Determine bytes of column prefix to be stored in the undo log. Please
note if the table format is UNIV_FORMAT_A (< DICT_TF_FORMAT_ZIP), no prefix
needs to be stored in the undo log.
@return bytes of column prefix to be stored in the undo log */
UNIV_INLINE
ulint
dict_max_field_len_store_undo(
/*==========================*/
dict_table_t* table, /*!< in: table */
const dict_col_t* col); /*!< in: column which index prefix
is based on */
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
/*********************************************************************//**

26
include/dict0dict.ic

@ -924,4 +924,30 @@ dict_table_get_on_id_low(
return(table);
}
/**********************************************************************//**
Determine bytes of column prefix to be stored in the undo log. Please
note if the table format is UNIV_FORMAT_A (< DICT_TF_FORMAT_ZIP), no prefix
needs to be stored in the undo log.
@return bytes of column prefix to be stored in the undo log */
UNIV_INLINE
ulint
dict_max_field_len_store_undo(
/*==========================*/
dict_table_t* table, /*!< in: table */
const dict_col_t* col) /*!< in: column which index prefix
is based on */
{
ulint prefix_len = 0;
if (dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP)
{
prefix_len = col->max_prefix
? col->max_prefix
: DICT_MAX_FIELD_LEN_BY_FORMAT(table);
}
return(prefix_len);
}
#endif /* !UNIV_HOTBACKUP */

7
include/dict0load.h

@ -156,7 +156,12 @@ dict_load_field_low(
byte* last_index_id, /*!< in: last index id */
mem_heap_t* heap, /*!< in/out: memory heap
for temporary storage */
const rec_t* rec); /*!< in: SYS_FIELDS record */
const rec_t* rec, /*!< in: SYS_FIELDS record */
char* addition_err_str,/*!< out: additional error message
that requires information to be
filled, or NULL */
ulint err_str_len); /*!< in: length of addition_err_str
in bytes */
/********************************************************************//**
Loads a table definition and also all its index definitions, and also
the cluster definition if the table is a member in a cluster. Also loads

42
include/dict0mem.h

@ -302,32 +302,58 @@ struct dict_col_struct{
unsigned ord_part:1; /*!< nonzero if this column
appears in the ordering fields
of an index */
unsigned max_prefix:12; /*!< maximum index prefix length on
this column. Our current max limit is
3072 for Barracuda table */
};
/** @brief DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
indexed column length (or indexed prefix length).
/** @brief DICT_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and
is the maximum indexed column length (or indexed prefix length) in
ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT. Also, in any format,
any fixed-length field that is longer than this will be encoded as
a variable-length field.
It is set to 3*256, so that one can create a column prefix index on
256 characters of a TEXT or VARCHAR column also in the UTF-8
charset. In that charset, a character may take at most 3 bytes. This
constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
files would be at risk! */
#define DICT_MAX_INDEX_COL_LEN REC_MAX_INDEX_COL_LEN
#define DICT_ANTELOPE_MAX_INDEX_COL_LEN REC_ANTELOPE_MAX_INDEX_COL_LEN
/** Find out maximum indexed column length by its table format.
For ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT, the maximum
field length is REC_ANTELOPE_MAX_INDEX_COL_LEN - 1 (767). For new
barracuda format, the length could be REC_VERSION_56_MAX_INDEX_COL_LEN
(3072) bytes */
#define DICT_MAX_FIELD_LEN_BY_FORMAT(table) \
((dict_table_get_format(table) < DICT_TF_FORMAT_ZIP) \
? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \
: REC_VERSION_56_MAX_INDEX_COL_LEN)
#define DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags) \
((((flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT)\
< DICT_TF_FORMAT_ZIP) \
? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \
: REC_VERSION_56_MAX_INDEX_COL_LEN)
/** Defines the maximum fixed length column size */
#define DICT_MAX_FIXED_COL_LEN DICT_ANTELOPE_MAX_INDEX_COL_LEN
/** Data structure for a field in an index */
struct dict_field_struct{
dict_col_t* col; /*!< pointer to the table column */
const char* name; /*!< name of the column */
unsigned prefix_len:10; /*!< 0 or the length of the column
unsigned prefix_len:12; /*!< 0 or the length of the column
prefix in bytes in a MySQL index of
type, e.g., INDEX (textcol(25));
must be smaller than
DICT_MAX_INDEX_COL_LEN; NOTE that
in the UTF-8 charset, MySQL sets this
to 3 * the prefix len in UTF-8 chars */
DICT_MAX_FIELD_LEN_BY_FORMAT;
NOTE that in the UTF-8 charset, MySQL
sets this to (mbmaxlen * the prefix len)
in UTF-8 chars */
unsigned fixed_len:10; /*!< 0 or the fixed length of the
column if smaller than
DICT_MAX_INDEX_COL_LEN */
DICT_ANTELOPE_MAX_INDEX_COL_LEN */
};
/** Data structure for an index. Most fields will be

3
include/fil0fil.h

@ -671,8 +671,9 @@ UNIV_INTERN
void
fil_flush(
/*======*/
ulint space_id); /*!< in: file space id (this can be a group of
ulint space_id, /*!< in: file space id (this can be a group of
log files or a tablespace of the database) */
ibool metadata);
/**********************************************************************//**
Flushes to disk writes in file spaces of the given type possibly cached by
the OS. */

7
include/lock0lock.h

@ -73,9 +73,10 @@ UNIV_INLINE
trx_t*
lock_clust_rec_some_has_impl(
/*=========================*/
const rec_t* rec, /*!< in: user record */
dict_index_t* index, /*!< in: clustered index */
const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
const rec_t* rec, /*!< in: user record */
const dict_index_t* index, /*!< in: clustered index */
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
__attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets the heap_no of the smallest user record on a page.
@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */

6
include/lock0lock.ic

@ -75,9 +75,9 @@ UNIV_INLINE
trx_t*
lock_clust_rec_some_has_impl(
/*=========================*/
const rec_t* rec, /*!< in: user record */
dict_index_t* index, /*!< in: clustered index */
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
const rec_t* rec, /*!< in: user record */
const dict_index_t* index, /*!< in: clustered index */
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
trx_id_t trx_id;

2
include/mtr0mtr.ic

@ -37,6 +37,8 @@ mtr_start(
/*======*/
mtr_t* mtr) /*!< out: mini-transaction */
{
UNIV_MEM_INVALID(mtr, sizeof *mtr);
dyn_array_create(&(mtr->memo));
dyn_array_create(&(mtr->log));

13
include/os0file.h

@ -290,8 +290,7 @@ The wrapper functions have the prefix of "innodb_". */
__FILE__, __LINE__)
# define os_file_read_trx(file, buf, offset, offset_high, n, trx) \
pfs_os_file_read_func(file, buf, offset, offset_high, n, trx, \
__FILE__, __LINE__)
os_file_read_func(file, buf, offset, offset_high, n, trx)
# define os_file_read_no_error_handling(file, buf, offset, \
offset_high, n) \
@ -303,8 +302,8 @@ The wrapper functions have the prefix of "innodb_". */
pfs_os_file_write_func(name, file, buf, offset, offset_high, \
n, __FILE__, __LINE__)
# define os_file_flush(file) \
pfs_os_file_flush_func(file, __FILE__, __LINE__)
# define os_file_flush(file, metadata) \
pfs_os_file_flush_func(file, metadata, __FILE__, __LINE__)
# define os_file_rename(key, oldpath, newpath) \
pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
@ -343,7 +342,7 @@ to original un-instrumented file I/O APIs */
# define os_file_write(name, file, buf, offset, offset_high, n) \
os_file_write_func(name, file, buf, offset, offset_high, n)
# define os_file_flush(file) os_file_flush_func(file)
# define os_file_flush(file, metadata) os_file_flush_func(file, metadata)
# define os_file_rename(key, oldpath, newpath) \
os_file_rename_func(oldpath, newpath)
@ -794,6 +793,7 @@ ibool
pfs_os_file_flush_func(
/*===================*/
os_file_t file, /*!< in, own: handle to a file */
ibool metadata,
const char* src_file,/*!< in: file name where func invoked */
ulint src_line);/*!< in: line where the func invoked */
@ -873,7 +873,8 @@ UNIV_INTERN
ibool
os_file_flush_func(
/*===============*/
os_file_t file); /*!< in, own: handle to a file */
os_file_t file, /*!< in, own: handle to a file */
ibool metadata);
/***********************************************************************//**
Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may

3
include/os0file.ic

@ -372,6 +372,7 @@ ibool
pfs_os_file_flush_func(
/*===================*/
os_file_t file, /*!< in, own: handle to a file */
ibool metadata,
const char* src_file,/*!< in: file name where func invoked */
ulint src_line)/*!< in: line where the func invoked */
{
@ -381,7 +382,7 @@ pfs_os_file_flush_func(
register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
src_file, src_line);
result = os_file_flush_func(file);
result = os_file_flush_func(file, metadata);
register_pfs_file_io_end(locker, 0);

19
include/page0page.h

@ -618,18 +618,19 @@ rec_t*
page_rec_find_owner_rec(
/*====================*/
rec_t* rec); /*!< in: the physical record */
#ifndef UNIV_HOTBACKUP
/***********************************************************************//**
This is a low-level operation which is used in a database index creation
to update the page number of a created B-tree to a data dictionary
record. */
UNIV_INTERN
Write a 32-bit field in a data dictionary record. */
UNIV_INLINE
void
page_rec_write_index_page_no(
/*=========================*/
rec_t* rec, /*!< in: record to update */
page_rec_write_field(
/*=================*/
rec_t* rec, /*!< in/out: record to update */
ulint i, /*!< in: index of the field to update */
ulint page_no,/*!< in: value to write */
mtr_t* mtr); /*!< in: mtr */
ulint val, /*!< in: value to write */
mtr_t* mtr) /*!< in/out: mini-transaction */
__attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/************************************************************//**
Returns the maximum combined size of records which can be inserted on top
of record heap.

23
include/page0page.ic

@ -962,6 +962,29 @@ page_get_free_space_of_empty(
- 2 * PAGE_DIR_SLOT_SIZE));
}
#ifndef UNIV_HOTBACKUP
/***********************************************************************//**
Write a 32-bit field in a data dictionary record. */
UNIV_INLINE
void
page_rec_write_field(
/*=================*/
rec_t* rec, /*!< in/out: record to update */
ulint i, /*!< in: index of the field to update */
ulint val, /*!< in: value to write */
mtr_t* mtr) /*!< in/out: mini-transaction */
{
byte* data;
ulint len;
data = rec_get_nth_field_old(rec, i, &len);
ut_ad(len == 4);
mlog_write_ulint(data, val, MLOG_4BYTES, mtr);
}
#endif /* !UNIV_HOTBACKUP */
/************************************************************//**
Each user record on a page, and also the deleted user records in the heap
takes its size plus the fraction of the dir cell size /

14
include/rem0rec.h

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -480,6 +480,18 @@ ulint
rec_offs_any_extern(
/*================*/
const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
#ifdef UNIV_BLOB_NULL_DEBUG
/******************************************************//**
Determine if the offsets are for a record containing null BLOB pointers.
@return first field containing a null BLOB pointer, or NULL if none found */
UNIV_INLINE
const byte*
rec_offs_any_null_extern(
/*=====================*/
const rec_t* rec, /*!< in: record */
const ulint* offsets) /*!< in: rec_get_offsets(rec) */
__attribute__((nonnull, warn_unused_result));
#endif /* UNIV_BLOB_NULL_DEBUG */
/******************************************************//**
Returns nonzero if the extern bit is set in nth field of rec.
@return nonzero if externally stored */

41
include/rem0rec.ic

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -26,6 +26,7 @@ Created 5/30/1994 Heikki Tuuri
#include "mach0data.h"
#include "ut0byte.h"
#include "dict0dict.h"
#include "btr0types.h"
/* Compact flag ORed to the extra size returned by rec_get_offsets() */
#define REC_OFFS_COMPACT ((ulint) 1 << 31)
@ -1087,6 +1088,44 @@ rec_offs_any_extern(
return(UNIV_UNLIKELY(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL));
}
#ifdef UNIV_BLOB_NULL_DEBUG
/******************************************************//**
Determine if the offsets are for a record containing null BLOB pointers.
@return first field containing a null BLOB pointer, or NULL if none found */
UNIV_INLINE
const byte*
rec_offs_any_null_extern(
/*=====================*/
const rec_t* rec, /*!< in: record */
const ulint* offsets) /*!< in: rec_get_offsets(rec) */
{
ulint i;
ut_ad(rec_offs_validate(rec, NULL, offsets));
if (!rec_offs_any_extern(offsets)) {
return(NULL);
}
for (i = 0; i < rec_offs_n_fields(offsets); i++) {
if (rec_offs_nth_extern(offsets, i)) {
ulint len;
const byte* field
= rec_get_nth_field(rec, offsets, i, &len);
ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
if (!memcmp(field + len
- BTR_EXTERN_FIELD_REF_SIZE,
field_ref_zero,
BTR_EXTERN_FIELD_REF_SIZE)) {
return(field);
}
}
}
return(NULL);
}
#endif /* UNIV_BLOB_NULL_DEBUG */
/******************************************************//**
Returns nonzero if the extern bit is set in nth field of rec.
@return nonzero if externally stored */

20
include/rem0types.h

@ -34,13 +34,21 @@ typedef byte rec_t;
#define REC_MAX_HEAP_NO (2 * 8192 - 1)
#define REC_MAX_N_OWNED (16 - 1)
/* REC_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
indexed column length (or indexed prefix length). It is set to 3*256,
so that one can create a column prefix index on 256 characters of a
TEXT or VARCHAR column also in the UTF-8 charset. In that charset,
a character may take at most 3 bytes.
/* REC_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
indexed field length (or indexed prefix length) for indexes on tables of
ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT format.
Before we support UTF-8 encodings with mbmaxlen = 4, a UTF-8 character
may take at most 3 bytes. So the limit was set to 3*256, so that one
can create a column prefix index on 256 characters of a TEXT or VARCHAR
column also in the UTF-8 charset.
This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
files would be at risk! */
#define REC_MAX_INDEX_COL_LEN 768
#define REC_ANTELOPE_MAX_INDEX_COL_LEN 768
/** Maximum indexed field length for table format DICT_TF_FORMAT_ZIP and
beyond.
This (3072) is the maximum index row length allowed, so we cannot create index
prefix column longer than that. */
#define REC_VERSION_56_MAX_INDEX_COL_LEN 3072
#endif

13
include/row0ext.h

@ -30,6 +30,7 @@ Created September 2006 Marko Makela
#include "row0types.h"
#include "data0types.h"
#include "mem0mem.h"
#include "dict0types.h"
/********************************************************************//**
Creates a cache of column prefixes of externally stored columns.
@ -43,13 +44,13 @@ row_ext_create(
in the InnoDB table object, as reported by
dict_col_get_no(); NOT relative to the records
in the clustered index */
ulint flags, /*!< in: table->flags */
const dtuple_t* tuple, /*!< in: data tuple containing the field
references of the externally stored
columns; must be indexed by col_no;
the clustered index record must be
covered by a lock or a page latch
to prevent deletion (rollback or purge). */
ulint zip_size,/*!< compressed page size in bytes, or 0 */
mem_heap_t* heap); /*!< in: heap where created */
/********************************************************************//**
@ -63,7 +64,8 @@ row_ext_lookup_ith(
const row_ext_t* ext, /*!< in/out: column prefix cache */
ulint i, /*!< in: index of ext->ext[] */
ulint* len); /*!< out: length of prefix, in bytes,
at most REC_MAX_INDEX_COL_LEN */
at most the length determined by
DICT_MAX_FIELD_LEN_BY_FORMAT() */
/********************************************************************//**
Looks up a column prefix of an externally stored column.
@return column prefix, or NULL if the column is not stored externally,
@ -78,13 +80,18 @@ row_ext_lookup(
dict_col_get_no(); NOT relative to the
records in the clustered index */
ulint* len); /*!< out: length of prefix, in bytes,
at most REC_MAX_INDEX_COL_LEN */
at most the length determined by
DICT_MAX_FIELD_LEN_BY_FORMAT() */
/** Prefixes of externally stored columns */
struct row_ext_struct{
ulint n_ext; /*!< number of externally stored columns */
const ulint* ext; /*!< col_no's of externally stored columns */
byte* buf; /*!< backing store of the column prefix cache */
ulint max_len;/*!< maximum prefix length, it could be
REC_ANTELOPE_MAX_INDEX_COL_LEN or
REC_VERSION_56_MAX_INDEX_COL_LEN depending
on row format */
ulint len[1]; /*!< prefix lengths; 0 if not cached */
};

9
include/row0ext.ic

@ -37,7 +37,7 @@ row_ext_lookup_ith(
const row_ext_t* ext, /*!< in/out: column prefix cache */
ulint i, /*!< in: index of ext->ext[] */
ulint* len) /*!< out: length of prefix, in bytes,
at most REC_MAX_INDEX_COL_LEN */
at most ext->max_len */
{
ut_ad(ext);
ut_ad(len);
@ -45,11 +45,14 @@ row_ext_lookup_ith(
*len = ext->len[i];
ut_ad(*len <= ext->max_len);
ut_ad(ext->max_len > 0);
if (UNIV_UNLIKELY(*len == 0)) {
/* The BLOB could not be fetched to the cache. */
return(field_ref_zero);
} else {
return(ext->buf + i * REC_MAX_INDEX_COL_LEN);
return(ext->buf + i * ext->max_len);
}
}
@ -67,7 +70,7 @@ row_ext_lookup(
dict_col_get_no(); NOT relative to the
records in the clustered index */
ulint* len) /*!< out: length of prefix, in bytes,
at most REC_MAX_INDEX_COL_LEN */
at most ext->max_len */
{
ulint i;

30
include/row0row.h

@ -41,13 +41,24 @@ Created 4/20/1996 Heikki Tuuri
Gets the offset of the trx id field, in bytes relative to the origin of
a clustered index record.
@return offset of DATA_TRX_ID */
UNIV_INTERN
UNIV_INLINE
ulint
row_get_trx_id_offset(
/*==================*/
const rec_t* rec, /*!< in: record */
dict_index_t* index, /*!< in: clustered index */
const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
row_get_trx_id_offset_func(
/*=======================*/
#ifdef UNIV_DEBUG
const rec_t* rec, /*!< in: record */
#endif /* UNIV_DEBUG */
const dict_index_t* index, /*!< in: clustered index */
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
__attribute__((nonnull, warn_unused_result));
#ifdef UNIV_DEBUG
# define row_get_trx_id_offset(rec, index, offsets) \
row_get_trx_id_offset_func(rec, index, offsets)
#else /* UNIV_DEBUG */
# define row_get_trx_id_offset(rec, index, offsets) \
row_get_trx_id_offset_func(index, offsets)
#endif /* UNIV_DEBUG */
/*********************************************************************//**
Reads the trx id field from a clustered index record.
@return value of the field */
@ -55,9 +66,10 @@ UNIV_INLINE
trx_id_t
row_get_rec_trx_id(
/*===============*/
const rec_t* rec, /*!< in: record */
dict_index_t* index, /*!< in: clustered index */
const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
const rec_t* rec, /*!< in: record */
const dict_index_t* index, /*!< in: clustered index */
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
__attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Reads the roll pointer field from a clustered index record.
@return value of the field */

36
include/row0row.ic

@ -27,6 +27,36 @@ Created 4/20/1996 Heikki Tuuri
#include "rem0rec.h"
#include "trx0undo.h"
/*********************************************************************//**
Gets the offset of trx id field, in bytes relative to the origin of
a clustered index record.
@return offset of DATA_TRX_ID */
UNIV_INLINE
ulint
row_get_trx_id_offset_func(
/*=======================*/
#ifdef UNIV_DEBUG
const rec_t* rec, /*!< in: record */
#endif /* UNIV_DEBUG */
const dict_index_t* index, /*!< in: clustered index */
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
ulint pos;
ulint offset;
ulint len;
ut_ad(dict_index_is_clust(index));
ut_ad(rec_offs_validate(rec, index, offsets));
pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
offset = rec_get_nth_field_offs(offsets, pos, &len);
ut_ad(len == DATA_TRX_ID_LEN);
return(offset);
}
/*********************************************************************//**
Reads the trx id field from a clustered index record.
@return value of the field */
@ -34,9 +64,9 @@ UNIV_INLINE
trx_id_t
row_get_rec_trx_id(
/*===============*/
const rec_t* rec, /*!< in: record */
dict_index_t* index, /*!< in: clustered index */
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
const rec_t* rec, /*!< in: record */
const dict_index_t* index, /*!< in: clustered index */
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
ulint offset;

4
include/univ.i

@ -51,7 +51,7 @@ Created 1/20/1994 Heikki Tuuri
#define INNODB_VERSION_MAJOR 1
#define INNODB_VERSION_MINOR 1
#define INNODB_VERSION_BUGFIX 7
#define INNODB_VERSION_BUGFIX 8
#ifndef PERCONA_INNODB_VERSION
#define PERCONA_INNODB_VERSION 20.1
@ -192,6 +192,8 @@ command. Not tested on Windows. */
debugging without UNIV_DEBUG */
#define UNIV_BLOB_LIGHT_DEBUG /* Enable off-page column
debugging without UNIV_DEBUG */
#define UNIV_BLOB_NULL_DEBUG /* Enable deep off-page
column debugging */
#define UNIV_DEBUG /* Enable ut_ad() assertions
and disable UNIV_INLINE */
#define UNIV_DEBUG_LOCK_VALIDATE /* Enable

151
lock/lock0lock.c

@ -359,10 +359,8 @@ static
ibool
lock_rec_validate_page(
/*===================*/
ulint space, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint page_no);/*!< in: page number */
const buf_block_t* block) /*!< in: buffer block */
__attribute__((nonnull, warn_unused_result));
#endif /* UNIV_DEBUG */
/* The lock system */
@ -1101,10 +1099,10 @@ lock_rec_reset_nth_bit(
Gets the first or next record lock on a page.
@return next lock, NULL if none exists */
UNIV_INLINE
lock_t*
lock_rec_get_next_on_page(
/*======================*/
lock_t* lock) /*!< in: a record lock */
const lock_t*
lock_rec_get_next_on_page_const(
/*============================*/
const lock_t* lock) /*!< in: a record lock */
{
ulint space;
ulint page_no;
@ -1133,6 +1131,18 @@ lock_rec_get_next_on_page(
return(lock);
}
/*********************************************************************//**
Gets the first or next record lock on a page.
@return next lock, NULL if none exists */
UNIV_INLINE
lock_t*
lock_rec_get_next_on_page(
/*======================*/
lock_t* lock) /*!< in: a record lock */
{
return((lock_t*) lock_rec_get_next_on_page_const(lock));
}
/*********************************************************************//**
Gets the first record lock on a page, where the page is identified by its
file address.
@ -2655,9 +2665,7 @@ lock_move_reorganize_page(
mem_heap_free(heap);
#ifdef UNIV_DEBUG_LOCK_VALIDATE
ut_ad(lock_rec_validate_page(buf_block_get_space(block),
buf_block_get_zip_size(block),
buf_block_get_page_no(block)));
ut_ad(lock_rec_validate_page(block));
#endif
}
@ -2745,12 +2753,8 @@ lock_move_rec_list_end(
lock_mutex_exit_kernel();
#ifdef UNIV_DEBUG_LOCK_VALIDATE
ut_ad(lock_rec_validate_page(buf_block_get_space(block),
buf_block_get_zip_size(block),
buf_block_get_page_no(block)));
ut_ad(lock_rec_validate_page(buf_block_get_space(new_block),
buf_block_get_zip_size(block),
buf_block_get_page_no(new_block)));
ut_ad(lock_rec_validate_page(block));
ut_ad(lock_rec_validate_page(new_block));
#endif
}
@ -2858,9 +2862,7 @@ lock_move_rec_list_start(
lock_mutex_exit_kernel();
#ifdef UNIV_DEBUG_LOCK_VALIDATE
ut_ad(lock_rec_validate_page(buf_block_get_space(block),
buf_block_get_zip_size(block),
buf_block_get_page_no(block)));
ut_ad(lock_rec_validate_page(block));
#endif
}
@ -3850,17 +3852,18 @@ Checks if other transactions have an incompatible mode lock request in
the lock queue.
@return lock or NULL */
UNIV_INLINE
lock_t*
const lock_t*
lock_table_other_has_incompatible(
/*==============================*/
trx_t* trx, /*!< in: transaction, or NULL if all
transactions should be included */
ulint wait, /*!< in: LOCK_WAIT if also waiting locks are
taken into account, or 0 if not */
dict_table_t* table, /*!< in: table */
enum lock_mode mode) /*!< in: lock mode */
const trx_t* trx, /*!< in: transaction, or NULL if all
transactions should be included */
ulint wait, /*!< in: LOCK_WAIT if also
waiting locks are taken into
account, or 0 if not */
const dict_table_t* table, /*!< in: table */
enum lock_mode mode) /*!< in: lock mode */
{
lock_t* lock;
const lock_t* lock;
ut_ad(mutex_own(&kernel_mutex));
@ -3951,10 +3954,10 @@ static
ibool
lock_table_has_to_wait_in_queue(
/*============================*/
lock_t* wait_lock) /*!< in: waiting table lock */
const lock_t* wait_lock) /*!< in: waiting table lock */
{
dict_table_t* table;
lock_t* lock;
const dict_table_t* table;
const lock_t* lock;
ut_ad(mutex_own(&kernel_mutex));
ut_ad(lock_get_wait(wait_lock));
@ -4696,9 +4699,9 @@ static
ibool
lock_table_queue_validate(
/*======================*/
dict_table_t* table) /*!< in: table */
const dict_table_t* table) /*!< in: table */
{
lock_t* lock;
const lock_t* lock;
ut_ad(mutex_own(&kernel_mutex));
@ -4734,7 +4737,7 @@ lock_rec_queue_validate(
/*====================*/
const buf_block_t* block, /*!< in: buffer block containing rec */
const rec_t* rec, /*!< in: record to look at */
dict_index_t* index, /*!< in: index, or NULL if not known */
const dict_index_t* index, /*!< in: index, or NULL if not known */
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
trx_t* impl_trx;
@ -4883,46 +4886,37 @@ static
ibool
lock_rec_validate_page(
/*===================*/
ulint space, /*!< in: space id */
ulint zip_size,/*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint page_no)/*!< in: page number */
const buf_block_t* block) /*!< in: buffer block */
{
dict_index_t* index;
buf_block_t* block;
const page_t* page;
lock_t* lock;
const lock_t* lock;
const rec_t* rec;
ulint nth_lock = 0;
ulint nth_bit = 0;
ulint i;
mtr_t mtr;
mem_heap_t* heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE];
ulint* offsets = offsets_;
rec_offs_init(offsets_);
ut_ad(!mutex_own(&kernel_mutex));
mtr_start(&mtr);
ut_ad(zip_size != ULINT_UNDEFINED);
block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, &mtr);
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
page = block->frame;
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
lock_mutex_enter_kernel();
loop:
lock = lock_rec_get_first_on_page_addr(space, page_no);
lock = lock_rec_get_first_on_page_addr(buf_block_get_space(block),
buf_block_get_page_no(block));
if (!lock) {
goto function_exit;
}
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
ut_a(!block->page.file_page_was_freed);
#endif
for (i = 0; i < nth_lock; i++) {
lock = lock_rec_get_next_on_page(lock);
lock = lock_rec_get_next_on_page_const(lock);
if (!lock) {
goto function_exit;
@ -4945,15 +4939,14 @@ loop:
if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
index = lock->index;
rec = page_find_rec_with_heap_no(page, i);
rec = page_find_rec_with_heap_no(block->frame, i);
ut_a(rec);
offsets = rec_get_offsets(rec, index, offsets,
offsets = rec_get_offsets(rec, lock->index, offsets,
ULINT_UNDEFINED, &heap);
#if 0
fprintf(stderr,
"Validating %lu %lu\n",
(ulong) space, (ulong) page_no);
"Validating %u %u\n",
block->page.space, block->page.offset);
#endif
lock_mutex_exit_kernel();
@ -4962,7 +4955,8 @@ loop:
check WILL break the latching order and may
cause a deadlock of threads. */
lock_rec_queue_validate(block, rec, index, offsets);
lock_rec_queue_validate(block, rec, lock->index,
offsets);
lock_mutex_enter_kernel();
@ -4980,8 +4974,6 @@ loop:
function_exit:
lock_mutex_exit_kernel();
mtr_commit(&mtr);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
}
@ -4996,11 +4988,8 @@ ibool
lock_validate(void)
/*===============*/
{
lock_t* lock;
trx_t* trx;
ib_uint64_t limit;
ulint space;
ulint page_no;
const lock_t* lock;
const trx_t* trx;
ulint i;
lock_mutex_enter_kernel();
@ -5025,9 +5014,14 @@ lock_validate(void)
for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
limit = 0;
ulint space;
ulint page_no;
ib_uint64_t limit = 0;
for (;;) {
mtr_t mtr;
buf_block_t* block;
lock = HASH_GET_FIRST(lock_sys->rec_hash, i);
while (lock) {
@ -5053,13 +5047,26 @@ lock_validate(void)
lock_mutex_exit_kernel();
lock_rec_validate_page(space,
fil_space_get_zip_size(space),
page_no);
/* The lock and the block that it is referring
to may be freed at this point. We pass
BUF_GET_POSSIBLY_FREED to skip a debug check.
If the lock exists in lock_rec_validate_page()
we assert !block->page.file_page_was_freed. */
lock_mutex_enter_kernel();
mtr_start(&mtr);
block = buf_page_get_gen(
space, fil_space_get_zip_size(space),
page_no, RW_X_LATCH, NULL,
BUF_GET_POSSIBLY_FREED,
__FILE__, __LINE__, &mtr);
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
ut_ad(lock_rec_validate_page(block));
mtr_commit(&mtr);
limit++;
limit = ut_ull_create(space, page_no + 1);
lock_mutex_enter_kernel();
}
}

8
log/log0log.c

@ -1133,7 +1133,7 @@ log_io_complete(
&& srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
fil_flush(group->space_id);
fil_flush(group->space_id, FALSE);
}
#ifdef UNIV_DEBUG
@ -1156,7 +1156,7 @@ log_io_complete(
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC
&& thd_flush_log_at_trx_commit(NULL) != 2) {
fil_flush(group->space_id);
fil_flush(group->space_id, FALSE);
}
mutex_enter(&(log_sys->mutex));
@ -1547,7 +1547,7 @@ loop:
group = UT_LIST_GET_FIRST(log_sys->log_groups);
fil_flush(group->space_id);
fil_flush(group->space_id, FALSE);
log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
}
@ -2644,7 +2644,7 @@ log_io_complete_archive(void)
mutex_exit(&(log_sys->mutex));
fil_flush(group->archive_space_id);
fil_flush(group->archive_space_id, TRUE);
mutex_enter(&(log_sys->mutex));

4
log/log0recv.c

@ -3643,7 +3643,7 @@ recv_reset_log_files_for_backup(
exit(1);
}
os_file_flush(log_file);
os_file_flush(log_file, TRUE);
os_file_close(log_file);
}
@ -3667,7 +3667,7 @@ recv_reset_log_files_for_backup(
os_file_write(name, log_file, buf, 0, 0,
LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
os_file_flush(log_file);
os_file_flush(log_file, TRUE);
os_file_close(log_file);
ut_free(buf);

13
mtr/mtr0mtr.c

@ -265,9 +265,20 @@ mtr_commit(
mtr_memo_pop_all(mtr);
#endif /* !UNIV_HOTBACKUP */
ut_d(mtr->state = MTR_COMMITTED);
dyn_array_free(&(mtr->memo));
dyn_array_free(&(mtr->log));
#ifdef UNIV_DEBUG_VALGRIND
/* Declare everything uninitialized except
mtr->start_lsn, mtr->end_lsn and mtr->state. */
{
ib_uint64_t start_lsn = mtr->start_lsn;
ib_uint64_t end_lsn = mtr->end_lsn;
UNIV_MEM_INVALID(mtr, sizeof *mtr);
mtr->start_lsn = start_lsn;
mtr->end_lsn = end_lsn;
}
#endif /* UNIV_DEBUG_VALGRIND */
ut_d(mtr->state = MTR_COMMITTED);
}
#ifndef UNIV_HOTBACKUP

44
os/os0file.c

@ -2017,7 +2017,7 @@ os_file_set_size(
ut_free(buf2);
ret = os_file_flush(file);
ret = os_file_flush(file, TRUE);
if (ret) {
return(TRUE);
@ -2055,7 +2055,8 @@ static
int
os_file_fsync(
/*==========*/
os_file_t file) /*!< in: handle to a file */
os_file_t file, /*!< in: handle to a file */
ibool metadata)
{
int ret;
int failures;
@ -2064,7 +2065,16 @@ os_file_fsync(
failures = 0;
do {
#if defined(HAVE_FDATASYNC) && HAVE_DECL_FDATASYNC
if (metadata) {
ret = fsync(file);
} else {
ret = fdatasync(file);
}
#else
(void) metadata;
ret = fsync(file);
#endif
os_n_fsyncs++;
@ -2104,7 +2114,8 @@ UNIV_INTERN
ibool
os_file_flush_func(
/*===============*/
os_file_t file) /*!< in, own: handle to a file */
os_file_t file, /*!< in, own: handle to a file */
ibool metadata)
{
#ifdef __WIN__
BOOL ret;
@ -2154,18 +2165,18 @@ os_file_flush_func(
/* If we are not on an operating system that supports this,
then fall back to a plain fsync. */
ret = os_file_fsync(file);
ret = os_file_fsync(file, metadata);
} else {
ret = fcntl(file, F_FULLFSYNC, NULL);
if (ret) {
/* If we are not on a file system that supports this,
then fall back to a plain fsync. */
ret = os_file_fsync(file);
ret = os_file_fsync(file, metadata);
}
}
#else
ret = os_file_fsync(file);
ret = os_file_fsync(file, metadata);
#endif
if (ret == 0) {
@ -2411,7 +2422,7 @@ os_file_pwrite(
the OS crashes, a database page is only partially
physically written to disk. */
ut_a(TRUE == os_file_flush(file));
ut_a(TRUE == os_file_flush(file, TRUE));
}
# endif /* UNIV_DO_FLUSH */
@ -2463,7 +2474,7 @@ os_file_pwrite(
the OS crashes, a database page is only partially
physically written to disk. */
ut_a(TRUE == os_file_flush(file));
ut_a(TRUE == os_file_flush(file, TRUE));
}
# endif /* UNIV_DO_FLUSH */
@ -2836,7 +2847,7 @@ retry:
# ifdef UNIV_DO_FLUSH
if (!os_do_not_call_flush_at_each_write) {
ut_a(TRUE == os_file_flush(file));
ut_a(TRUE == os_file_flush(file, TRUE));
}
# endif /* UNIV_DO_FLUSH */
@ -4141,7 +4152,13 @@ os_aio_func(
Windows async i/o, Windows does not allow us to use
ordinary synchronous os_file_read etc. on the same file,
therefore we have built a special mechanism for synchronous
wait in the Windows case. */
wait in the Windows case.
Also note that the Performance Schema instrumentation has
been performed by current os_aio_func()'s wrapper function
pfs_os_aio_func(). So we would no longer need to call
Performance Schema instrumented os_file_read() and
os_file_write(). Instead, we should use os_file_read_func()
and os_file_write_func() */
if (type == OS_FILE_READ) {
return(os_file_read_trx(file, buf, offset,
@ -4150,7 +4167,8 @@ os_aio_func(
ut_a(type == OS_FILE_WRITE);
return(os_file_write(name, file, buf, offset, offset_high, n));
return(os_file_write_func(name, file, buf, offset,
offset_high, n));
}
try_again:
@ -4398,7 +4416,7 @@ os_aio_windows_handle(
#ifdef UNIV_DO_FLUSH
if (slot->type == OS_FILE_WRITE
&& !os_do_not_call_flush_at_each_write) {
if (!os_file_flush(slot->file)) {
if (!os_file_flush(slot->file, TRUE)) {
ut_error;
}
}
@ -4701,7 +4719,7 @@ found:
#ifdef UNIV_DO_FLUSH
if (slot->type == OS_FILE_WRITE
&& !os_do_not_call_flush_at_each_write)
&& !os_file_flush(slot->file) {
&& !os_file_flush(slot->file, TRUE) {
ut_error;
}
#endif /* UNIV_DO_FLUSH */

22
page/page0page.c

@ -1253,28 +1253,6 @@ page_move_rec_list_start(
return(TRUE);
}
/***********************************************************************//**
This is a low-level operation which is used in a database index creation
to update the page number of a created B-tree to a data dictionary record. */
UNIV_INTERN
void
page_rec_write_index_page_no(
/*=========================*/
rec_t* rec, /*!< in: record to update */
ulint i, /*!< in: index of the field to update */
ulint page_no,/*!< in: value to write */
mtr_t* mtr) /*!< in: mtr */
{
byte* data;
ulint len;
data = rec_get_nth_field_old(rec, i, &len);
ut_ad(len == 4);
mlog_write_ulint(data, page_no, MLOG_4BYTES, mtr);
}
#endif /* !UNIV_HOTBACKUP */
/**************************************************************//**

2
page/page0zip.c

@ -464,7 +464,7 @@ page_zip_fields_encode(
if (fixed_sum && UNIV_UNLIKELY
(fixed_sum + field->fixed_len
> DICT_MAX_INDEX_COL_LEN)) {
> DICT_MAX_FIXED_COL_LEN)) {
/* Write out the length of the
preceding non-nullable fields,
to avoid exceeding the maximum

109
percona-suite/percona_mysqldump_innodb_optimize_keys.result

@ -1,109 +0,0 @@
#
# Test the --innodb-optimize-keys option.
#
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b INT, KEY(b)) ENGINE=MyISAM;
######################################
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8 */;
/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
/*!40103 SET TIME_ZONE='+00:00' */;
/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
DROP TABLE IF EXISTS `t1`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `t1` (
`a` int(11) NOT NULL,
`b` int(11) DEFAULT NULL,
PRIMARY KEY (`a`),
KEY `b` (`b`)
) ENGINE=MyISAM DEFAULT CHARSET=latin1;
/*!40101 SET character_set_client = @saved_cs_client */;
LOCK TABLES `t1` WRITE;
/*!40000 ALTER TABLE `t1` DISABLE KEYS */;
/*!40000 ALTER TABLE `t1` ENABLE KEYS */;
UNLOCK TABLES;
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
######################################
DROP TABLE t1;
CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t2 VALUES (0), (1), (2);
CREATE TABLE t1 (
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
a INT, b VARCHAR(255), c DECIMAL(10,3),
KEY (b),
UNIQUE KEY uniq(c,a),
FOREIGN KEY (a) REFERENCES t2(a) ON DELETE CASCADE
) ENGINE=InnoDB;
INSERT INTO t1(a,b,c) VALUES (0, "0", 0.0), (1, "1", 1.1), (2, "2", 2.2);
######################################
/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
/*!40101 SET NAMES utf8 */;
/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
/*!40103 SET TIME_ZONE='+00:00' */;
/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
DROP TABLE IF EXISTS `t1`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `t1` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`a` int(11) DEFAULT NULL,
`b` varchar(255) DEFAULT NULL,
`c` decimal(10,3) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=latin1;
/*!40101 SET character_set_client = @saved_cs_client */;
LOCK TABLES `t1` WRITE;
/*!40000 ALTER TABLE `t1` DISABLE KEYS */;
INSERT INTO `t1` VALUES (1,0,'0',0.000),(2,1,'1',1.100),(3,2,'2',2.200);
ALTER TABLE `t1` ADD UNIQUE KEY `uniq` (`c`,`a`), ADD KEY `b` (`b`), ADD KEY `a` (`a`), ADD CONSTRAINT `t1_ibfk_1` FOREIGN KEY (`a`) REFERENCES `t2` (`a`) ON DELETE CASCADE;
/*!40000 ALTER TABLE `t1` ENABLE KEYS */;
UNLOCK TABLES;
DROP TABLE IF EXISTS `t2`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `t2` (
`a` int(11) NOT NULL,
PRIMARY KEY (`a`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
/*!40101 SET character_set_client = @saved_cs_client */;
LOCK TABLES `t2` WRITE;
/*!40000 ALTER TABLE `t2` DISABLE KEYS */;
INSERT INTO `t2` VALUES (0),(1),(2);
/*!40000 ALTER TABLE `t2` ENABLE KEYS */;
UNLOCK TABLES;
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
######################################
DROP TABLE t1, t2;

62
percona-suite/percona_mysqldump_innodb_optimize_keys.test

@ -1,62 +0,0 @@
# Embedded server doesn't support external clients
--source include/not_embedded.inc
# Fast index creation is only available in InnoDB plugin
--source include/have_innodb.inc
# Save the initial number of concurrent sessions
--source include/count_sessions.inc
--echo #
--echo # Test the --innodb-optimize-keys option.
--echo #
--let $file=$MYSQLTEST_VARDIR/tmp/t1.sql
# First test that the option has no effect on non-InnoDB tables
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b INT, KEY(b)) ENGINE=MyISAM;
--exec $MYSQL_DUMP --skip-comments --innodb-optimize-keys test t1 >$file
--echo ######################################
--cat_file $file
--echo ######################################
--remove_file $file
DROP TABLE t1;
# Check that for InnoDB tables secondary and foreign keys are created
# after the data is dumped
CREATE TABLE t2 (a INT PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t2 VALUES (0), (1), (2);
CREATE TABLE t1 (
id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
a INT, b VARCHAR(255), c DECIMAL(10,3),
KEY (b),
UNIQUE KEY uniq(c,a),
FOREIGN KEY (a) REFERENCES t2(a) ON DELETE CASCADE
) ENGINE=InnoDB;
INSERT INTO t1(a,b,c) VALUES (0, "0", 0.0), (1, "1", 1.1), (2, "2", 2.2);
--exec $MYSQL_DUMP --skip-comments --innodb-optimize-keys test t1 t2 >$file
--echo ######################################
--cat_file $file
--echo ######################################
# Check that the resulting dump can be imported back
--exec $MYSQL test < $file
--remove_file $file
DROP TABLE t1, t2;
# Wait till we reached the initial number of concurrent sessions
--source include/wait_until_count_sessions.inc

88
percona-suite/percona_query_cache_with_comments.inc.backup

@ -0,0 +1,88 @@
--source include/percona_query_cache_with_comments_clear.inc
let $query=/* with comment first */select * from t1;
eval $query;
--source include/percona_query_cache_with_comments_eval.inc
let $query=# with comment first
select * from t1;
--source include/percona_query_cache_with_comments_eval.inc
let $query=-- with comment first
select * from t1;
--source include/percona_query_cache_with_comments_eval.inc
let $query=/* with comment first and "quote" */select * from t1;
--source include/percona_query_cache_with_comments_eval.inc
let $query=# with comment first and "quote"
select * from t1;
--source include/percona_query_cache_with_comments_eval.inc
let $query=-- with comment first and "quote"
select * from t1;
--source include/percona_query_cache_with_comments_eval.inc
let $query=
/* with comment and whitespaces first */select * from t1;
--source include/percona_query_cache_with_comments_eval.inc
let $query=
# with comment and whitespaces first
select * from t1;
--source include/percona_query_cache_with_comments_eval.inc
let $query=
-- with comment and whitespaces first
select * from t1;
--source include/percona_query_cache_with_comments_eval.inc
let $internal=* internal comment *;
let $query=select * /$internal/ from t1;
--source include/percona_query_cache_with_comments_eval.inc
let $query=select */$internal/ from t1;
--source include/percona_query_cache_with_comments_eval.inc
let $query=select */$internal/from t1;
--source include/percona_query_cache_with_comments_eval.inc
let $internal=* internal comment with "quote" *;
let $query=select * /$internal/ from t1;
--source include/percona_query_cache_with_comments_eval.inc
let $query=select */$internal/ from t1;
--source include/percona_query_cache_with_comments_eval.inc
let $query=select */$internal/from t1;
--source include/percona_query_cache_with_comments_eval.inc
let $query=select * from t1
;
--source include/percona_query_cache_with_comments_eval.inc
let $query=select * from t1 ;
--source include/percona_query_cache_with_comments_eval.inc
let $query=select * from t1 ;
--source include/percona_query_cache_with_comments_eval.inc
let $query=select * from t1
/* comment in the end */;
--source include/percona_query_cache_with_comments_eval.inc
let $query=select * from t1
/* comment in the end */
;
--source include/percona_query_cache_with_comments_eval.inc
let $query=select * from t1 #comment in the end;
--source include/percona_query_cache_with_comments_eval.inc
let $query=select * from t1 #comment in the end
;
--source include/percona_query_cache_with_comments_eval.inc
let $query=select * from t1 -- comment in the end;
--source include/percona_query_cache_with_comments_eval.inc
let $query=select * from t1 -- comment in the end
;
--source include/percona_query_cache_with_comments_eval.inc

66
percona-suite/percona_query_response_time-replication.result

@ -0,0 +1,66 @@
include/master-slave.inc
[connection master]
DROP TABLE IF EXISTS t;
CREATE TABLE t(id INT);
SELECT * from t;
id
SELECT * from t;
id
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1;
Warnings:
Warning 1292 Truncated incorrect query_response_time_range_base value: '1'
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 2
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 10
FLUSH QUERY_RESPONSE_TIME;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=ON;
INSERT INTO t SELECT SLEEP(0.4);
Warnings:
Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave.
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
0
INSERT INTO t SELECT SLEEP(0.4);
Warnings:
Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave.
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
0
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
2
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
3
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 2
FLUSH QUERY_RESPONSE_TIME;
INSERT INTO t SELECT SLEEP(0.4);
Warnings:
Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave.
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
0
INSERT INTO t SELECT SLEEP(0.4);
Warnings:
Note 1592 Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave.
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
0
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
2
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count)
3
DROP TABLE IF EXISTS t;
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=OFF;
STOP SLAVE;
include/wait_for_slave_to_stop.inc

61
percona-suite/percona_query_response_time-replication.test

@ -0,0 +1,61 @@
--source include/have_response_time_distribution.inc
--source include/master-slave.inc
--source include/have_binlog_format_statement.inc
--source include/have_debug.inc
--disable_query_log
call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT. Statement is unsafe because it uses a system function that may return a different value on the slave. Statement:");
--enable_query_log
connection master;
-- disable_warnings
DROP TABLE IF EXISTS t;
-- enable_warnings
CREATE TABLE t(id INT);
SELECT * from t;
sync_slave_with_master;
connection slave;
SELECT * from t;
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
source include/percona_query_response_time_flush.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=ON;
connection master;
INSERT INTO t SELECT SLEEP(0.4);
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
INSERT INTO t SELECT SLEEP(0.4);
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
sync_slave_with_master;
connection slave;
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
source include/percona_query_response_time_flush.inc;
connection master;
INSERT INTO t SELECT SLEEP(0.4);
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
INSERT INTO t SELECT SLEEP(0.4);
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
sync_slave_with_master;
connection slave;
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
SELECT SUM(INFORMATION_SCHEMA.QUERY_RESPONSE_TIME.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
connection master;
DROP TABLE IF EXISTS t;
sync_slave_with_master;
connection slave;
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=OFF;
STOP SLAVE;
-- source include/wait_for_slave_to_stop.inc

306
percona-suite/percona_query_response_time-stored.result

@ -0,0 +1,306 @@
SET GLOBAL debug="d,query_exec_time_debug";
CREATE FUNCTION test_f()
RETURNS CHAR(30) DETERMINISTIC
BEGIN
SET SESSION debug="+d,query_exec_time_1.1";
RETURN 'Hello, world!';
END/
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1;
Warnings:
Warning 1292 Truncated incorrect query_response_time_range_base value: '1'
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 2
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 2
FLUSH QUERY_RESPONSE_TIME;
SELECT d.count,
(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
count query_count query_total not_zero_region_count region_count
SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
region_count
44
SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
time
0.000001
0.000003
0.000007
0.000015
0.000030
0.000061
0.000122
0.000244
0.000488
0.000976
0.001953
0.003906
0.007812
0.015625
0.031250
0.062500
0.125000
0.250000
0.500000
1.000000
2.000000
4.000000
8.000000
16.000000
32.000000
64.000000
128.000000
256.000000
512.000000
1024.000000
2048.000000
4096.000000
8192.000000
16384.000000
32768.000000
65536.000000
131072.000000
262144.000000
524288.000000
1048576.00000
2097152.00000
4194304.00000
8388608.00000
TOO LONG
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
SELECT test_f();
test_f()
Hello, world!
SELECT test_f();
test_f()
Hello, world!
SELECT test_f();
test_f()
Hello, world!
SELECT test_f();
test_f()
Hello, world!
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
SELECT d.count,
(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
count query_count query_total not_zero_region_count region_count
4 4 4 1 44
SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
region_count
44
SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
time
0.000001
0.000003
0.000007
0.000015
0.000030
0.000061
0.000122
0.000244
0.000488
0.000976
0.001953
0.003906
0.007812
0.015625
0.031250
0.062500
0.125000
0.250000
0.500000
1.000000
2.000000
4.000000
8.000000
16.000000
32.000000
64.000000
128.000000
256.000000
512.000000
1024.000000
2048.000000
4096.000000
8192.000000
16384.000000
32768.000000
65536.000000
131072.000000
262144.000000
524288.000000
1048576.00000
2097152.00000
4194304.00000
8388608.00000
TOO LONG
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 2
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 10
FLUSH QUERY_RESPONSE_TIME;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
SELECT test_f();
test_f()
Hello, world!
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
SELECT d.count,
(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
count query_count query_total not_zero_region_count region_count
1 1 1 1 14
SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
region_count
14
SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
time
0.000001
0.000010
0.000100
0.001000
0.010000
0.100000
1.000000
10.000000
100.000000
1000.000000
10000.000000
100000.000000
1000000.00000
TOO LONG
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 10
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 7;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 7
FLUSH QUERY_RESPONSE_TIME;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
SELECT test_f();
test_f()
Hello, world!
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
SELECT d.count,
(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
count query_count query_total not_zero_region_count region_count
1 1 1 1 17
SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
region_count
17
SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
time
0.000001
0.000008
0.000059
0.000416
0.002915
0.020408
0.142857
1.000000
7.000000
49.000000
343.000000
2401.000000
16807.000000
117649.000000
823543.000000
5764801.00000
TOO LONG
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 7
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 156;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 156
FLUSH QUERY_RESPONSE_TIME;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
SELECT test_f();
test_f()
Hello, world!
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
SELECT d.count,
(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
count query_count query_total not_zero_region_count region_count
1 1 1 1 7
SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
region_count
7
SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
time
0.000041
0.006410
1.000000
156.000000
24336.000000
3796416.00000
TOO LONG
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 156
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1000;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 1000
FLUSH QUERY_RESPONSE_TIME;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
SELECT test_f();
test_f()
Hello, world!
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
SELECT d.count,
(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
count query_count query_total not_zero_region_count region_count
1 1 1 1 6
SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
region_count
6
SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
time
0.000001
0.001000
1.000000
1000.000000
1000000.00000
TOO LONG
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 1000
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1001;
Warnings:
Warning 1292 Truncated incorrect query_response_time_range_base value: '1001'
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 1000
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE =10;
DROP FUNCTION test_f;
SET GLOBAL debug=default;

85
percona-suite/percona_query_response_time-stored.test

@ -0,0 +1,85 @@
--source include/have_response_time_distribution.inc
--source include/have_debug.inc
SET GLOBAL debug="d,query_exec_time_debug";
delimiter /;
CREATE FUNCTION test_f()
RETURNS CHAR(30) DETERMINISTIC
BEGIN
SET SESSION debug="+d,query_exec_time_1.1";
RETURN 'Hello, world!';
END/
delimiter ;/
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
source include/percona_query_response_time_flush.inc;
source include/percona_query_response_time_show.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
SELECT test_f();
SELECT test_f();
SELECT test_f();
SELECT test_f();
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
source include/percona_query_response_time_show.inc;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
source include/percona_query_response_time_flush.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
SELECT test_f();
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
source include/percona_query_response_time_show.inc;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 7;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
source include/percona_query_response_time_flush.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
SELECT test_f();
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
source include/percona_query_response_time_show.inc;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 156;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
source include/percona_query_response_time_flush.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
SELECT test_f();
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
source include/percona_query_response_time_show.inc;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1000;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
source include/percona_query_response_time_flush.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
SELECT test_f();
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
source include/percona_query_response_time_show.inc;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1001;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE =10;
DROP FUNCTION test_f;
SET GLOBAL debug=default;

377
percona-suite/percona_query_response_time.result

@ -0,0 +1,377 @@
SET GLOBAL debug="d,query_exec_time_debug";
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1;
Warnings:
Warning 1292 Truncated incorrect query_response_time_range_base value: '1'
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 2
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 2
FLUSH QUERY_RESPONSE_TIME;
SELECT d.count,
(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
count query_count query_total not_zero_region_count region_count
SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
region_count
44
SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
time
0.000001
0.000003
0.000007
0.000015
0.000030
0.000061
0.000122
0.000244
0.000488
0.000976
0.001953
0.003906
0.007812
0.015625
0.031250
0.062500
0.125000
0.250000
0.500000
1.000000
2.000000
4.000000
8.000000
16.000000
32.000000
64.000000
128.000000
256.000000
512.000000
1024.000000
2048.000000
4096.000000
8192.000000
16384.000000
32768.000000
65536.000000
131072.000000
262144.000000
524288.000000
1048576.00000
2097152.00000
4194304.00000
8388608.00000
TOO LONG
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
SET SESSION debug="+d,query_exec_time_0.31";
SET SESSION debug="+d,query_exec_time_0.32";
SET SESSION debug="+d,query_exec_time_0.33";
SET SESSION debug="+d,query_exec_time_0.34";
SET SESSION debug="+d,query_exec_time_0.35";
SET SESSION debug="+d,query_exec_time_0.36";
SET SESSION debug="+d,query_exec_time_0.37";
SET SESSION debug="+d,query_exec_time_0.38";
SET SESSION debug="+d,query_exec_time_0.39";
SET SESSION debug="+d,query_exec_time_0.4";
SET SESSION debug="+d,query_exec_time_1.1";
SET SESSION debug="+d,query_exec_time_1.2";
SET SESSION debug="+d,query_exec_time_1.3";
SET SESSION debug="+d,query_exec_time_1.5";
SET SESSION debug="+d,query_exec_time_1.4";
SET SESSION debug="+d,query_exec_time_0.5";
SET SESSION debug="+d,query_exec_time_2.1";
SET SESSION debug="+d,query_exec_time_2.3";
SET SESSION debug="+d,query_exec_time_2.5";
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
SELECT d.count,
(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
count query_count query_total not_zero_region_count region_count
10 19 15 4 44
1 19 15 4 44
5 19 15 4 44
3 19 15 4 44
SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
region_count
44
SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
time
0.000001
0.000003
0.000007
0.000015
0.000030
0.000061
0.000122
0.000244
0.000488
0.000976
0.001953
0.003906
0.007812
0.015625
0.031250
0.062500
0.125000
0.250000
0.500000
1.000000
2.000000
4.000000
8.000000
16.000000
32.000000
64.000000
128.000000
256.000000
512.000000
1024.000000
2048.000000
4096.000000
8192.000000
16384.000000
32768.000000
65536.000000
131072.000000
262144.000000
524288.000000
1048576.00000
2097152.00000
4194304.00000
8388608.00000
TOO LONG
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 2
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 10
FLUSH QUERY_RESPONSE_TIME;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
SET SESSION debug="+d,query_exec_time_0.31";
SET SESSION debug="+d,query_exec_time_0.32";
SET SESSION debug="+d,query_exec_time_0.33";
SET SESSION debug="+d,query_exec_time_0.34";
SET SESSION debug="+d,query_exec_time_0.35";
SET SESSION debug="+d,query_exec_time_0.36";
SET SESSION debug="+d,query_exec_time_0.37";
SET SESSION debug="+d,query_exec_time_0.38";
SET SESSION debug="+d,query_exec_time_0.39";
SET SESSION debug="+d,query_exec_time_0.4";
SET SESSION debug="+d,query_exec_time_1.1";
SET SESSION debug="+d,query_exec_time_1.2";
SET SESSION debug="+d,query_exec_time_1.3";
SET SESSION debug="+d,query_exec_time_1.5";
SET SESSION debug="+d,query_exec_time_1.4";
SET SESSION debug="+d,query_exec_time_0.5";
SET SESSION debug="+d,query_exec_time_2.1";
SET SESSION debug="+d,query_exec_time_2.3";
SET SESSION debug="+d,query_exec_time_2.5";
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
SELECT d.count,
(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
count query_count query_total not_zero_region_count region_count
11 19 17 2 14
8 19 17 2 14
SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
region_count
14
SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
time
0.000001
0.000010
0.000100
0.001000
0.010000
0.100000
1.000000
10.000000
100.000000
1000.000000
10000.000000
100000.000000
1000000.00000
TOO LONG
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 10
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 7;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 7
FLUSH QUERY_RESPONSE_TIME;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
SET SESSION debug="+d,query_exec_time_0.31";
SET SESSION debug="+d,query_exec_time_0.32";
SET SESSION debug="+d,query_exec_time_0.33";
SET SESSION debug="+d,query_exec_time_0.34";
SET SESSION debug="+d,query_exec_time_0.35";
SET SESSION debug="+d,query_exec_time_0.36";
SET SESSION debug="+d,query_exec_time_0.37";
SET SESSION debug="+d,query_exec_time_0.38";
SET SESSION debug="+d,query_exec_time_0.39";
SET SESSION debug="+d,query_exec_time_0.4";
SET SESSION debug="+d,query_exec_time_1.1";
SET SESSION debug="+d,query_exec_time_1.2";
SET SESSION debug="+d,query_exec_time_1.3";
SET SESSION debug="+d,query_exec_time_1.5";
SET SESSION debug="+d,query_exec_time_1.4";
SET SESSION debug="+d,query_exec_time_0.5";
SET SESSION debug="+d,query_exec_time_2.1";
SET SESSION debug="+d,query_exec_time_2.3";
SET SESSION debug="+d,query_exec_time_2.5";
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
SELECT d.count,
(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
count query_count query_total not_zero_region_count region_count
11 19 17 2 17
8 19 17 2 17
SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
region_count
17
SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
time
0.000001
0.000008
0.000059
0.000416
0.002915
0.020408
0.142857
1.000000
7.000000
49.000000
343.000000
2401.000000
16807.000000
117649.000000
823543.000000
5764801.00000
TOO LONG
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 7
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 156;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 156
FLUSH QUERY_RESPONSE_TIME;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
SET SESSION debug="+d,query_exec_time_0.31";
SET SESSION debug="+d,query_exec_time_0.32";
SET SESSION debug="+d,query_exec_time_0.33";
SET SESSION debug="+d,query_exec_time_0.34";
SET SESSION debug="+d,query_exec_time_0.35";
SET SESSION debug="+d,query_exec_time_0.36";
SET SESSION debug="+d,query_exec_time_0.37";
SET SESSION debug="+d,query_exec_time_0.38";
SET SESSION debug="+d,query_exec_time_0.39";
SET SESSION debug="+d,query_exec_time_0.4";
SET SESSION debug="+d,query_exec_time_1.1";
SET SESSION debug="+d,query_exec_time_1.2";
SET SESSION debug="+d,query_exec_time_1.3";
SET SESSION debug="+d,query_exec_time_1.5";
SET SESSION debug="+d,query_exec_time_1.4";
SET SESSION debug="+d,query_exec_time_0.5";
SET SESSION debug="+d,query_exec_time_2.1";
SET SESSION debug="+d,query_exec_time_2.3";
SET SESSION debug="+d,query_exec_time_2.5";
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
SELECT d.count,
(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
count query_count query_total not_zero_region_count region_count
11 19 17 2 7
8 19 17 2 7
SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
region_count
7
SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
time
0.000041
0.006410
1.000000
156.000000
24336.000000
3796416.00000
TOO LONG
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 156
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1000;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 1000
FLUSH QUERY_RESPONSE_TIME;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
SET SESSION debug="+d,query_exec_time_0.31";
SET SESSION debug="+d,query_exec_time_0.32";
SET SESSION debug="+d,query_exec_time_0.33";
SET SESSION debug="+d,query_exec_time_0.34";
SET SESSION debug="+d,query_exec_time_0.35";
SET SESSION debug="+d,query_exec_time_0.36";
SET SESSION debug="+d,query_exec_time_0.37";
SET SESSION debug="+d,query_exec_time_0.38";
SET SESSION debug="+d,query_exec_time_0.39";
SET SESSION debug="+d,query_exec_time_0.4";
SET SESSION debug="+d,query_exec_time_1.1";
SET SESSION debug="+d,query_exec_time_1.2";
SET SESSION debug="+d,query_exec_time_1.3";
SET SESSION debug="+d,query_exec_time_1.5";
SET SESSION debug="+d,query_exec_time_1.4";
SET SESSION debug="+d,query_exec_time_0.5";
SET SESSION debug="+d,query_exec_time_2.1";
SET SESSION debug="+d,query_exec_time_2.3";
SET SESSION debug="+d,query_exec_time_2.5";
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
SELECT d.count,
(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
count query_count query_total not_zero_region_count region_count
11 19 17 2 6
8 19 17 2 6
SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
region_count
6
SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
time
0.000001
0.001000
1.000000
1000.000000
1000000.00000
TOO LONG
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 1000
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1001;
Warnings:
Warning 1292 Truncated incorrect query_response_time_range_base value: '1001'
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
Variable_name Value
query_response_time_range_base 1000
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE =10;
SET GLOBAL debug=default;

71
percona-suite/percona_query_response_time.test

@ -0,0 +1,71 @@
--source include/have_response_time_distribution.inc
--source include/have_debug.inc
SET GLOBAL debug="d,query_exec_time_debug";
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 2;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
source include/percona_query_response_time_flush.inc;
source include/percona_query_response_time_show.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
source include/percona_query_response_time_sleep.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
source include/percona_query_response_time_show.inc;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 10;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
source include/percona_query_response_time_flush.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
source include/percona_query_response_time_sleep.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
source include/percona_query_response_time_show.inc;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 7;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
source include/percona_query_response_time_flush.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
source include/percona_query_response_time_sleep.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
source include/percona_query_response_time_show.inc;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 156;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
source include/percona_query_response_time_flush.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
source include/percona_query_response_time_sleep.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
source include/percona_query_response_time_show.inc;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1000;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
source include/percona_query_response_time_flush.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=1;
source include/percona_query_response_time_sleep.inc;
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
source include/percona_query_response_time_show.inc;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE = 1001;
SHOW GLOBAL VARIABLES where Variable_name like 'QUERY_RESPONSE_TIME_RANGE_BASE';
SET GLOBAL QUERY_RESPONSE_TIME_STATS=0;
SET GLOBAL QUERY_RESPONSE_TIME_RANGE_BASE =10;
SET GLOBAL debug=default;

1
percona-suite/percona_query_response_time_flush.inc

@ -0,0 +1 @@
FLUSH QUERY_RESPONSE_TIME;

8
percona-suite/percona_query_response_time_show.inc

@ -0,0 +1,8 @@
SELECT d.count,
(SELECT SUM(a.count) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as a WHERE a.count != 0) as query_count,
(SELECT SUM((b.total * 1000000) DIV 1000000) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as b WHERE b.count != 0) as query_total,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as c WHERE c.count != 0) as not_zero_region_count,
(SELECT COUNT(*) FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME) as region_count
FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME as d WHERE d.count > 0;
SELECT COUNT(*) as region_count FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;
SELECT time FROM INFORMATION_SCHEMA.QUERY_RESPONSE_TIME;

19
percona-suite/percona_query_response_time_sleep.inc

@ -0,0 +1,19 @@
SET SESSION debug="+d,query_exec_time_0.31";
SET SESSION debug="+d,query_exec_time_0.32";
SET SESSION debug="+d,query_exec_time_0.33";
SET SESSION debug="+d,query_exec_time_0.34";
SET SESSION debug="+d,query_exec_time_0.35";
SET SESSION debug="+d,query_exec_time_0.36";
SET SESSION debug="+d,query_exec_time_0.37";
SET SESSION debug="+d,query_exec_time_0.38";
SET SESSION debug="+d,query_exec_time_0.39";
SET SESSION debug="+d,query_exec_time_0.4";
SET SESSION debug="+d,query_exec_time_1.1";
SET SESSION debug="+d,query_exec_time_1.2";
SET SESSION debug="+d,query_exec_time_1.3";
SET SESSION debug="+d,query_exec_time_1.5";
SET SESSION debug="+d,query_exec_time_1.4";
SET SESSION debug="+d,query_exec_time_0.5";
SET SESSION debug="+d,query_exec_time_2.1";
SET SESSION debug="+d,query_exec_time_2.3";
SET SESSION debug="+d,query_exec_time_2.5";

2
rem/rem0rec.c

@ -1174,7 +1174,7 @@ rec_convert_dtuple_to_rec_comp(
} else if (dfield_is_ext(field)) {
ut_ad(ifield->col->len >= 256
|| ifield->col->mtype == DATA_BLOB);
ut_ad(len <= REC_MAX_INDEX_COL_LEN
ut_ad(len <= REC_ANTELOPE_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE);
*lens-- = (byte) (len >> 8) | 0xc0;
*lens-- = (byte) len;

19
row/row0ext.c

@ -44,8 +44,9 @@ row_ext_cache_fill(
{
const byte* field = dfield_get_data(dfield);
ulint f_len = dfield_get_len(dfield);
byte* buf = ext->buf + i * REC_MAX_INDEX_COL_LEN;
byte* buf = ext->buf + i * ext->max_len;
ut_ad(ext->max_len > 0);
ut_ad(i < ext->n_ext);
ut_ad(dfield_is_ext(dfield));
ut_a(f_len >= BTR_EXTERN_FIELD_REF_SIZE);
@ -56,14 +57,14 @@ row_ext_cache_fill(
/* The BLOB pointer is not set: we cannot fetch it */
ext->len[i] = 0;
} else {
/* Fetch at most REC_MAX_INDEX_COL_LEN of the column.
/* Fetch at most ext->max_len of the column.
The column should be non-empty. However,
trx_rollback_or_clean_all_recovered() may try to
access a half-deleted BLOB if the server previously
crashed during the execution of
btr_free_externally_stored_field(). */
ext->len[i] = btr_copy_externally_stored_field_prefix(
buf, REC_MAX_INDEX_COL_LEN, zip_size, field, f_len);
buf, ext->max_len, zip_size, field, f_len);
}
}
@ -79,16 +80,18 @@ row_ext_create(
in the InnoDB table object, as reported by
dict_col_get_no(); NOT relative to the records
in the clustered index */
ulint flags, /*!< in: table->flags */
const dtuple_t* tuple, /*!< in: data tuple containing the field
references of the externally stored
columns; must be indexed by col_no;
the clustered index record must be
covered by a lock or a page latch
to prevent deletion (rollback or purge). */
ulint zip_size,/*!< compressed page size in bytes, or 0 */
mem_heap_t* heap) /*!< in: heap where created */
{
ulint i;
ulint zip_size = dict_table_flags_to_zip_size(flags);
row_ext_t* ret = mem_heap_alloc(heap, (sizeof *ret)
+ (n_ext - 1) * sizeof ret->len);
@ -97,10 +100,12 @@ row_ext_create(
ret->n_ext = n_ext;
ret->ext = ext;
ret->buf = mem_heap_alloc(heap, n_ext * REC_MAX_INDEX_COL_LEN);
ret->max_len = DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags);
ret->buf = mem_heap_alloc(heap, n_ext * ret->max_len);
#ifdef UNIV_DEBUG
memset(ret->buf, 0xaa, n_ext * REC_MAX_INDEX_COL_LEN);
UNIV_MEM_ALLOC(ret->buf, n_ext * REC_MAX_INDEX_COL_LEN);
memset(ret->buf, 0xaa, n_ext * ret->max_len);
UNIV_MEM_ALLOC(ret->buf, n_ext * ret->max_len);
#endif
/* Fetch the BLOB prefixes */

13
row/row0mysql.c

@ -2008,6 +2008,7 @@ row_create_index_for_mysql(
ulint i;
ulint len;
char* table_name;
dict_table_t* table;
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
@ -2021,6 +2022,8 @@ row_create_index_for_mysql(
que_run_threads()) and thus index->table_name is not available. */
table_name = mem_strdup(index->table_name);
table = dict_table_get_low(table_name);
trx_start_if_not_started(trx);
/* Check that the same column does not appear twice in the index.
@ -2053,7 +2056,7 @@ row_create_index_for_mysql(
}
/* Check also that prefix_len and actual length
< DICT_MAX_INDEX_COL_LEN */
is less than that from DICT_MAX_FIELD_LEN_BY_FORMAT() */
len = dict_index_get_nth_field(index, i)->prefix_len;
@ -2061,8 +2064,9 @@ row_create_index_for_mysql(
len = ut_max(len, field_lengths[i]);
}
if (len >= DICT_MAX_INDEX_COL_LEN) {
err = DB_TOO_BIG_RECORD;
/* Column or prefix length exceeds maximum column length */
if (len > (ulint) DICT_MAX_FIELD_LEN_BY_FORMAT(table)) {
err = DB_TOO_BIG_INDEX_COL;
goto error_handling;
}
@ -2087,6 +2091,7 @@ row_create_index_for_mysql(
que_graph_free((que_t*) que_node_get_parent(thr));
error_handling:
if (err != DB_SUCCESS) {
/* We have special error handling here */
@ -3027,7 +3032,7 @@ row_truncate_table_for_mysql(
rec = btr_pcur_get_rec(&pcur);
if (root_page_no != FIL_NULL) {
page_rec_write_index_page_no(
page_rec_write_field(
rec, DICT_SYS_INDEXES_PAGE_NO_FIELD,
root_page_no, &mtr);
/* We will need to commit and restart the

48
row/row0row.c

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -47,35 +47,6 @@ Created 4/20/1996 Heikki Tuuri
#include "read0read.h"
#include "ut0mem.h"
/*********************************************************************//**
Gets the offset of trx id field, in bytes relative to the origin of
a clustered index record.
@return offset of DATA_TRX_ID */
UNIV_INTERN
ulint
row_get_trx_id_offset(
/*==================*/
const rec_t* rec __attribute__((unused)),
/*!< in: record */
dict_index_t* index, /*!< in: clustered index */
const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
ulint pos;
ulint offset;
ulint len;
ut_ad(dict_index_is_clust(index));
ut_ad(rec_offs_validate(rec, index, offsets));
pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
offset = rec_get_nth_field_offs(offsets, pos, &len);
ut_ad(len == DATA_TRX_ID_LEN);
return(offset);
}
/*****************************************************************//**
When an insert or purge to a table is performed, this function builds
the entry to be inserted into or purged from an index on the table.
@ -151,8 +122,6 @@ row_build_index_entry(
} else if (dfield_is_ext(dfield)) {
ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
len -= BTR_EXTERN_FIELD_REF_SIZE;
ut_a(ind_field->prefix_len <= len
|| dict_index_is_clust(index));
}
len = dtype_get_at_most_n_mbchars(
@ -231,6 +200,14 @@ row_build(
ut_ad(rec_offs_validate(rec, index, offsets));
}
#if 0 && defined UNIV_BLOB_NULL_DEBUG
/* This one can fail in trx_rollback_active() if
the server crashed during an insert before the
btr_store_big_rec_extern_fields() did mtr_commit()
all BLOB pointers to the clustered index record. */
ut_a(!rec_offs_any_null_extern(rec, offsets));
#endif /* 0 && UNIV_BLOB_NULL_DEBUG */
if (type != ROW_COPY_POINTERS) {
/* Take a copy of rec to heap */
buf = mem_heap_alloc(heap, rec_offs_size(offsets));
@ -301,8 +278,7 @@ row_build(
ut_ad(dict_table_get_format(index->table)
< DICT_TF_FORMAT_ZIP);
} else if (j) {
*ext = row_ext_create(j, ext_cols, row,
dict_table_zip_size(index->table),
*ext = row_ext_create(j, ext_cols, index->table->flags, row,
heap);
} else {
*ext = NULL;
@ -431,6 +407,10 @@ row_rec_to_index_entry(
rec = rec_copy(buf, rec, offsets);
/* Avoid a debug assertion in rec_offs_validate(). */
rec_offs_make_valid(rec, index, offsets);
#ifdef UNIV_BLOB_NULL_DEBUG
} else {
ut_a(!rec_offs_any_null_extern(rec, offsets));
#endif /* UNIV_BLOB_NULL_DEBUG */
}
entry = row_rec_to_index_entry_low(rec, index, offsets, n_ext, heap);

11
row/row0sel.c

@ -99,10 +99,12 @@ row_sel_sec_rec_is_for_blob(
ulint clust_len, /*!< in: length of clust_field */
const byte* sec_field, /*!< in: column in secondary index */
ulint sec_len, /*!< in: length of sec_field */
ulint zip_size) /*!< in: compressed page size, or 0 */
dict_table_t* table) /*!< in: table */
{
ulint len;
byte buf[DICT_MAX_INDEX_COL_LEN];
byte buf[REC_VERSION_56_MAX_INDEX_COL_LEN];
ulint zip_size = dict_table_flags_to_zip_size(table->flags);
ulint max_prefix_len = DICT_MAX_FIELD_LEN_BY_FORMAT(table);
ut_a(clust_len >= BTR_EXTERN_FIELD_REF_SIZE);
@ -116,7 +118,7 @@ row_sel_sec_rec_is_for_blob(
return(FALSE);
}
len = btr_copy_externally_stored_field_prefix(buf, sizeof buf,
len = btr_copy_externally_stored_field_prefix(buf, max_prefix_len,
zip_size,
clust_field, clust_len);
@ -222,8 +224,7 @@ row_sel_sec_rec_is_for_clust_rec(
col->mbminmaxlen,
clust_field, clust_len,
sec_field, sec_len,
dict_table_zip_size(
clust_index->table))) {
clust_index->table)) {
goto inequal;
}

4
row/row0upd.c

@ -1229,8 +1229,8 @@ row_upd_replace(
}
if (n_ext_cols) {
*ext = row_ext_create(n_ext_cols, ext_cols, row,
dict_table_zip_size(table), heap);
*ext = row_ext_create(n_ext_cols, ext_cols, table->flags, row,
heap);
} else {
*ext = NULL;
}

18
row/row0vers.c

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1997, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -550,6 +550,11 @@ row_vers_build_for_consistent_read(
/* The view already sees this version: we can
copy it to in_heap and return */
#ifdef UNIV_BLOB_NULL_DEBUG
ut_a(!rec_offs_any_null_extern(
version, *offsets));
#endif /* UNIV_BLOB_NULL_DEBUG */
buf = mem_heap_alloc(in_heap,
rec_offs_size(*offsets));
*old_vers = rec_copy(buf, version, *offsets);
@ -583,6 +588,10 @@ row_vers_build_for_consistent_read(
*offsets = rec_get_offsets(prev_version, index, *offsets,
ULINT_UNDEFINED, offset_heap);
#ifdef UNIV_BLOB_NULL_DEBUG
ut_a(!rec_offs_any_null_extern(prev_version, *offsets));
#endif /* UNIV_BLOB_NULL_DEBUG */
trx_id = row_get_rec_trx_id(prev_version, index, *offsets);
if (read_view_sees_trx_id(view, trx_id)) {
@ -682,6 +691,10 @@ row_vers_build_for_semi_consistent_read(
/* We found a version that belongs to a
committed transaction: return it. */
#ifdef UNIV_BLOB_NULL_DEBUG
ut_a(!rec_offs_any_null_extern(version, *offsets));
#endif /* UNIV_BLOB_NULL_DEBUG */
if (rec == version) {
*old_vers = rec;
err = DB_SUCCESS;
@ -739,6 +752,9 @@ row_vers_build_for_semi_consistent_read(
version = prev_version;
*offsets = rec_get_offsets(version, index, *offsets,
ULINT_UNDEFINED, offset_heap);
#ifdef UNIV_BLOB_NULL_DEBUG
ut_a(!rec_offs_any_null_extern(version, *offsets));
#endif /* UNIV_BLOB_NULL_DEBUG */
}/* for (;;) */
if (heap) {

4
srv/srv0srv.c

@ -1406,7 +1406,7 @@ retry:
trx->op_info = "waiting in InnoDB queue";
thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_TABLE_LOCK);
thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
os_event_wait(slot->event);
thd_wait_end(trx->mysql_thd);
@ -1815,7 +1815,7 @@ srv_suspend_mysql_thread(
/* Suspend this thread and wait for the event. */
thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_TABLE_LOCK);
thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_LOCK);
os_event_wait(event);
thd_wait_end(trx->mysql_thd);

63
trx/trx0rec.c

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -351,10 +351,10 @@ trx_undo_rec_get_col_val(
ut_ad(*orig_len >= BTR_EXTERN_FIELD_REF_SIZE);
ut_ad(*len > *orig_len);
/* @see dtuple_convert_big_rec() */
ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE * 2);
ut_ad(*len >= BTR_EXTERN_FIELD_REF_SIZE);
/* we do not have access to index->table here
ut_ad(dict_table_get_format(index->table) >= DICT_TF_FORMAT_ZIP
|| *len >= REC_MAX_INDEX_COL_LEN
|| *len >= col->max_prefix
+ BTR_EXTERN_FIELD_REF_SIZE);
*/
@ -456,9 +456,10 @@ static
byte*
trx_undo_page_fetch_ext(
/*====================*/
byte* ext_buf, /*!< in: a buffer of
REC_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE */
byte* ext_buf, /*!< in: buffer to hold the prefix
data and BLOB pointer */
ulint prefix_len, /*!< in: prefix size to store
in the undo log */
ulint zip_size, /*!< compressed page size in bytes,
or 0 for uncompressed BLOB */
const byte* field, /*!< in: an externally stored column */
@ -467,7 +468,7 @@ trx_undo_page_fetch_ext(
{
/* Fetch the BLOB. */
ulint ext_len = btr_copy_externally_stored_field_prefix(
ext_buf, REC_MAX_INDEX_COL_LEN, zip_size, field, *len);
ext_buf, prefix_len, zip_size, field, *len);
/* BLOBs should always be nonempty. */
ut_a(ext_len);
/* Append the BLOB pointer to the prefix. */
@ -488,10 +489,11 @@ trx_undo_page_report_modify_ext(
byte* ptr, /*!< in: undo log position,
at least 15 bytes must be available */
byte* ext_buf, /*!< in: a buffer of
REC_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE,
DICT_MAX_FIELD_LEN_BY_FORMAT() size,
or NULL when should not fetch
a longer prefix */
ulint prefix_len, /*!< prefix size to store in the
undo log */
ulint zip_size, /*!< compressed page size in bytes,
or 0 for uncompressed BLOB */
const byte** field, /*!< in/out: the locally stored part of
@ -499,6 +501,8 @@ trx_undo_page_report_modify_ext(
ulint* len) /*!< in/out: length of field, in bytes */
{
if (ext_buf) {
ut_a(prefix_len > 0);
/* If an ordering column is externally stored, we will
have to store a longer prefix of the field. In this
case, write to the log a marker followed by the
@ -507,7 +511,7 @@ trx_undo_page_report_modify_ext(
ptr += mach_write_compressed(ptr, *len);
*field = trx_undo_page_fetch_ext(ext_buf, zip_size,
*field = trx_undo_page_fetch_ext(ext_buf, prefix_len, zip_size,
*field, len);
ptr += mach_write_compressed(ptr, *len);
@ -553,7 +557,7 @@ trx_undo_page_report_modify(
ulint i;
trx_id_t trx_id;
ibool ignore_prefix = FALSE;
byte ext_buf[REC_MAX_INDEX_COL_LEN
byte ext_buf[REC_VERSION_56_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE];
ut_a(dict_index_is_clust(index));
@ -706,13 +710,21 @@ trx_undo_page_report_modify(
}
if (rec_offs_nth_extern(offsets, pos)) {
const dict_col_t* col
= dict_index_get_nth_col(index, pos);
ulint prefix_len
= dict_max_field_len_store_undo(
table, col);
ut_ad(prefix_len + BTR_EXTERN_FIELD_REF_SIZE
<= sizeof ext_buf);
ptr = trx_undo_page_report_modify_ext(
ptr,
dict_index_get_nth_col(index, pos)
->ord_part
col->ord_part
&& !ignore_prefix
&& flen < REC_MAX_INDEX_COL_LEN
? ext_buf : NULL,
&& flen < REC_ANTELOPE_MAX_INDEX_COL_LEN
? ext_buf : NULL, prefix_len,
dict_table_zip_size(table),
&field, &flen);
@ -791,11 +803,20 @@ trx_undo_page_report_modify(
&flen);
if (rec_offs_nth_extern(offsets, pos)) {
const dict_col_t* col =
dict_index_get_nth_col(
index, pos);
ulint prefix_len =
dict_max_field_len_store_undo(
table, col);
ut_a(prefix_len < sizeof ext_buf);
ptr = trx_undo_page_report_modify_ext(
ptr,
flen < REC_MAX_INDEX_COL_LEN
flen < REC_ANTELOPE_MAX_INDEX_COL_LEN
&& !ignore_prefix
? ext_buf : NULL,
? ext_buf : NULL, prefix_len,
dict_table_zip_size(table),
&field, &flen);
} else {
@ -1095,11 +1116,11 @@ trx_undo_rec_get_partial_row(
undo log record. */
if (!ignore_prefix && col->ord_part) {
ut_a(dfield_get_len(dfield)
>= 2 * BTR_EXTERN_FIELD_REF_SIZE);
>= BTR_EXTERN_FIELD_REF_SIZE);
ut_a(dict_table_get_format(index->table)
>= DICT_TF_FORMAT_ZIP
|| dfield_get_len(dfield)
>= REC_MAX_INDEX_COL_LEN
>= REC_ANTELOPE_MAX_INDEX_COL_LEN
+ BTR_EXTERN_FIELD_REF_SIZE);
}
}
@ -1584,6 +1605,10 @@ trx_undo_prev_version_build(
return(DB_ERROR);
}
# ifdef UNIV_BLOB_NULL_DEBUG
ut_a(!rec_offs_any_null_extern(rec, offsets));
# endif /* UNIV_BLOB_NULL_DEBUG */
if (row_upd_changes_field_size_or_external(index, offsets, update)) {
ulint n_ext;

3
trx/trx0undo.c

@ -1985,8 +1985,6 @@ trx_undo_free_prepared(
/*===================*/
trx_t* trx) /*!< in/out: PREPARED transaction */
{
mutex_enter(&trx->rseg->mutex);
ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
if (trx->update_undo) {
@ -2001,6 +1999,5 @@ trx_undo_free_prepared(
trx->insert_undo);
trx_undo_mem_free(trx->insert_undo);
}
mutex_exit(&trx->rseg->mutex);
}
#endif /* !UNIV_HOTBACKUP */

2
ut/ut0ut.c

@ -662,6 +662,8 @@ ut_strerr(
return("Table is being used");
case DB_TOO_BIG_RECORD:
return("Record too big");
case DB_TOO_BIG_INDEX_COL:
return("Index columns size too big");
case DB_LOCK_WAIT_TIMEOUT:
return("Lock wait timeout");
case DB_NO_REFERENCED_ROW:

Loading…
Cancel
Save