You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

148 lines
5.0 KiB

BUG#40337 Fsyncing master and relay log to disk after every event is too slow NOTE: Backporting the patch to next-mr. The fix proposed in BUG#35542 and BUG#31665 introduces a performance issue when fsyncing the master.info, relay.info and relay-log.bin* after #th events. Although such solution has been proposed to reduce the probability of corrupted files due to a slave-crash, the performance penalty introduced by it has made the approach impractical for highly intensive workloads. In a nutshell, the option --syn-relay-log proposed in BUG#35542 and BUG#31665 simultaneously fsyncs master.info, relay-log.info and relay-log.bin* and this is the main source of performance issues. This patch introduces new options that give more control to the user on what should be fsynced and how often: 1) (--sync-master-info, integer) which syncs the master.info after #th event; 2) (--sync-relay-log, integer) which syncs the relay-log.bin* after #th events. 3) (--sync-relay-log-info, integer) which syncs the relay.info after #th transactions. To provide both performance and increased reliability, we recommend the following setup: 1) --sync-master-info = 0 eventually the operating system will fsync it; 2) --sync-relay-log = 0 eventually the operating system will fsync it; 3) --sync-relay-log-info = 1 fsyncs it after every transaction; Notice, that the previous setup does not reduce the probability of corrupted master.info and relay-log.bin*. To overcome the issue, this patch also introduces a recovery mechanism that right after restart throws away relay-log.bin* retrieved from a master and updates the master.info based on the relay.info: 4) (--relay-log-recovery, boolean) which enables a recovery mechanism that throws away relay-log.bin* after a crash. However, it can only recover the incorrect binlog file and position in master.info, if other informations (host, port password, etc) are corrupted or incorrect, then this recovery mechanism will fail to work.
16 years ago
  1. ########################################################################################
  2. # This test verifies the options --sync-relay-log-info and --relay-log-recovery by
  3. # crashing the slave in two different situations:
  4. # (case-1) - Corrupt the relay log with changes which were not processed by
  5. # the SQL Thread and crashes it.
  6. # (case-2) - Corrupt the master.info with wrong coordinates and crashes it.
  7. #
  8. # Case 1:
  9. # 1 - Stops the SQL Thread
  10. # 2 - Inserts new records into the master.
  11. # 3 - Corrupts the relay-log.bin* which most likely has such changes.
  12. # 4 - Crashes the slave
  13. # 5 - Verifies if the slave is sync with the master which means that the information
  14. # loss was circumvented by the recovery process.
  15. #
  16. # Case 2:
  17. # 1 - Stops the SQL/IO Threads
  18. # 2 - Inserts new records into the master.
  19. # 3 - Corrupts the master.info with wrong coordinates.
  20. # 4 - Crashes the slave
  21. # 5 - Verifies if the slave is sync with the master which means that the information
  22. # loss was circumvented by the recovery process.
  23. ########################################################################################
  24. ########################################################################################
  25. # Configuring the environment
  26. ########################################################################################
  27. --echo =====Configuring the enviroment=======;
  28. --source include/master-slave.inc
  29. --source include/not_embedded.inc
  30. --source include/not_valgrind.inc
  31. --source include/have_debug.inc
  32. --source include/have_innodb.inc
  33. call mtr.add_suppression('Attempting backtrace');
  34. call mtr.add_suppression("Recovery from master pos .* and file master-bin.000001");
  35. CREATE TABLE t1(a INT, PRIMARY KEY(a)) engine=innodb;
  36. insert into t1(a) values(1);
  37. insert into t1(a) values(2);
  38. insert into t1(a) values(3);
  39. ########################################################################################
  40. # Case 1: Corrupt a relay-log.bin*
  41. ########################################################################################
  42. --echo =====Inserting data on the master but without the SQL Thread being running=======;
  43. sync_slave_with_master;
  44. connection slave;
  45. let $MYSQLD_SLAVE_DATADIR= `select @@datadir`;
  46. --replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR
  47. --copy_file $MYSQLD_SLAVE_DATADIR/master.info $MYSQLD_SLAVE_DATADIR/master.backup
  48. stop slave SQL_THREAD;
  49. source include/wait_for_slave_sql_to_stop.inc;
  50. connection master;
  51. insert into t1(a) values(4);
  52. insert into t1(a) values(5);
  53. insert into t1(a) values(6);
  54. --echo =====Removing relay log files and crashing/recoverying the slave=======;
  55. connection slave;
  56. stop slave IO_THREAD;
  57. source include/wait_for_slave_io_to_stop.inc;
  58. let $file= query_get_value("SHOW SLAVE STATUS", Relay_Log_File, 1);
  59. --replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR
  60. --exec echo "failure" > $MYSQLD_SLAVE_DATADIR/$file
  61. --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
  62. SET SESSION debug="d,crash_before_rotate_relaylog";
  63. --error 2013
  64. FLUSH LOGS;
  65. --enable_reconnect
  66. --source include/wait_until_connected_again.inc
  67. --echo =====Dumping and comparing tables=======;
  68. start slave;
  69. source include/wait_for_slave_to_start.inc;
  70. connection master;
  71. sync_slave_with_master;
  72. let $diff_table_1=master:test.t1;
  73. let $diff_table_2=slave:test.t1;
  74. source include/diff_tables.inc;
  75. ########################################################################################
  76. # Case 2: Corrupt a master.info
  77. ########################################################################################
  78. --echo =====Corrupting the master.info=======;
  79. connection slave;
  80. stop slave;
  81. source include/wait_for_slave_to_stop.inc;
  82. connection master;
  83. FLUSH LOGS;
  84. insert into t1(a) values(7);
  85. insert into t1(a) values(8);
  86. insert into t1(a) values(9);
  87. connection slave;
  88. --replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR
  89. --exec cat $MYSQLD_SLAVE_DATADIR/master.backup > $MYSQLD_SLAVE_DATADIR/master.info
  90. let MYSQLD_SLAVE_DATADIR=`select @@datadir`;
  91. --perl
  92. use strict;
  93. use warnings;
  94. my $src= "$ENV{'MYSQLD_SLAVE_DATADIR'}/master.backup";
  95. my $dst= "$ENV{'MYSQLD_SLAVE_DATADIR'}/master.info";
  96. open(FILE, "<", $src) or die;
  97. my @content= <FILE>;
  98. close FILE;
  99. open(FILE, ">", $dst) or die;
  100. binmode FILE;
  101. print FILE @content;
  102. close FILE;
  103. EOF
  104. --exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
  105. SET SESSION debug="d,crash_before_rotate_relaylog";
  106. --error 2013
  107. FLUSH LOGS;
  108. --enable_reconnect
  109. --source include/wait_until_connected_again.inc
  110. --echo =====Dumping and comparing tables=======;
  111. start slave;
  112. source include/wait_for_slave_to_start.inc;
  113. connection master;
  114. sync_slave_with_master;
  115. let $diff_table_1=master:test.t1;
  116. let $diff_table_2=slave:test.t1;
  117. source include/diff_tables.inc;
  118. ########################################################################################
  119. # Clean up
  120. ########################################################################################
  121. --echo =====Clean up=======;
  122. connection master;
  123. drop table t1;