diff --git a/lib/PerconaTest.pm b/lib/PerconaTest.pm index 5d41d08f..40175e6e 100644 --- a/lib/PerconaTest.pm +++ b/lib/PerconaTest.pm @@ -236,6 +236,26 @@ sub wait_until { return 0; } +sub wait_until_no_lag { + my (@dbhs) = @_; + foreach my $dbh (@dbhs) { + PTDEVDEBUG && _d('Waiting for no slave lag'); + wait_until( # slaves aren't lagging + sub { + my $row = $dbh->selectrow_hashref('SHOW SLAVE STATUS'); + my $lag = exists $row->{Seconds_Behind_Master} + ? $row->{Seconds_Behind_Master} + : $row->{seconds_behind_master}; + PTDEVDEBUG && _d('Slave lag:', $lag); + if ( !defined $lag ) { + BAIL_OUT("Slave is stopped: " . Dumper($row)); + } + return $lag ? 0 : 1; + } + ); + } +} + # Wait t seconds for code to return. sub wait_for { my ( $code, $t ) = @_; diff --git a/sandbox/test-env b/sandbox/test-env index 26558d29..54bea934 100755 --- a/sandbox/test-env +++ b/sandbox/test-env @@ -354,18 +354,16 @@ case $opt in # don't replicate to new sandbox servers. This makes creating new # sandbox servers a lot faster. There's no check if this works or # not, so... yeah. - /tmp/12347/use -e "STOP SLAVE" - /tmp/12346/use -e "STOP SLAVE" + /tmp/12347/use -e "STOP SLAVE; FLUSH SLAVE;" + /tmp/12346/use -e "STOP SLAVE; FLUSH SLAVE; FLUSH MASTER;" + /tmp/12345/use -e "FLUSH MASTER" - /tmp/12346/use -e "RESET MASTER" - /tmp/12345/use -e "RESET MASTER" - - /tmp/12347/use -e "change master to master_host='127.0.0.1', master_user='msandbox', master_password='msandbox', master_port=12346, master_log_file='mysql-bin.000001', master_log_pos=0" - /tmp/12346/use -e "change master to master_host='127.0.0.1', master_user='msandbox', master_password='msandbox', master_port=12345, master_log_file='mysql-bin.000001', master_log_pos=0" - - /tmp/12347/use -e "START SLAVE" + /tmp/12346/use -e "CHANGE MASTER TO master_host='127.0.0.1', master_user='msandbox', master_password='msandbox', master_port=12345, master_log_file='mysql-bin.000001', master_log_pos=0" /tmp/12346/use -e "START SLAVE" + /tmp/12347/use -e "CHANGE MASTER TO master_host='127.0.0.1', master_user='msandbox', master_password='msandbox', master_port=12346, master_log_file='mysql-bin.000001', master_log_pos=0" + /tmp/12347/use -e "START SLAVE" + exit_status=0 ;; version) diff --git a/t/pt-table-checksum/basics.t b/t/pt-table-checksum/basics.t index 0fe30d17..a110c6eb 100644 --- a/t/pt-table-checksum/basics.t +++ b/t/pt-table-checksum/basics.t @@ -61,6 +61,7 @@ sub reset_repl_db { $master_dbh->do("use $repl_db"); } +diag(`$trunk/sandbox/test-env reset >/dev/null`); $sb->wipe_clean($master_dbh); diag(`rm $outfile >/dev/null 2>&1`); diff --git a/t/pt-table-checksum/progress.t b/t/pt-table-checksum/progress.t index a08070a8..812cf755 100644 --- a/t/pt-table-checksum/progress.t +++ b/t/pt-table-checksum/progress.t @@ -42,8 +42,6 @@ else { plan tests => 4; } -# Must have empty checksums table for these tests. -$master_dbh->do('drop table if exists percona.checksums'); # The sandbox servers run with lock_wait_timeout=3 and it's not dynamic # so we need to specify --lock-wait-timeout=3 else the tool will die. @@ -58,15 +56,12 @@ my $scripts = "$trunk/t/pt-table-checksum/scripts/"; # ############################################################################ # Tool should check all slaves' lag, so slave2, not just slave1. # ############################################################################ -wait_until( # slaves aren't lagging - sub { - $row = $slave1_dbh->selectrow_hashref('show slave status'); - return 0 if $row->{Seconds_Behind_Master}; - $row = $slave2_dbh->selectrow_hashref('show slave status'); - return 0 if $row->{Seconds_Behind_Master}; - return 1; - } -) or die "Slaves are still lagging"; + +# Must have empty checksums table for these tests. +$master_dbh->do('drop table if exists percona.checksums'); + +# Must not be lagging. +PerconaTest::wait_until_no_lag($slave1_dbh, $slave2_dbh); # This big fancy command waits until it sees the checksum for sakila.city # in the repl table on the master, then it stops slave2 for 2 seconds, @@ -100,6 +95,5 @@ is( # Done. # ############################################################################# $sb->wipe_clean($master_dbh); -diag(`$trunk/sandbox/test-env reset >/dev/null`); ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); exit; diff --git a/t/pt-table-checksum/replication_filters.t b/t/pt-table-checksum/replication_filters.t index 4a716720..29600db1 100644 --- a/t/pt-table-checksum/replication_filters.t +++ b/t/pt-table-checksum/replication_filters.t @@ -52,22 +52,36 @@ my @args = ($master_dsn, qw(--lock-wait-timeout 3), '--max-load', ''); my $output; my $row; +# You must call this sub if the master 12345 or slave1 12346 is restarted, +# else a slave might notice that its master went away and enter the "trying +# to reconnect" state, and then replication will break as the tests continue. +sub reset_slaves { + $slave1_dbh->do('STOP SLAVE'); + $slave2_dbh->do('STOP SLAVE'); + $slave1_dbh->do('START SLAVE'); + $slave2_dbh->do('START SLAVE'); +} + +# ############################################################################# +# Repl filters on all slaves, at all depths, should be found. +# ############################################################################# + # Add a replication filter to the slaves. +diag(`/tmp/12347/stop >/dev/null`); +diag(`/tmp/12346/stop >/dev/null`); for my $port ( qw(12346 12347) ) { - diag(`/tmp/$port/stop >/dev/null`); diag(`cp /tmp/$port/my.sandbox.cnf /tmp/$port/orig.cnf`); diag(`echo "replicate-ignore-db=foo" >> /tmp/$port/my.sandbox.cnf`); diag(`/tmp/$port/start >/dev/null`); } $slave1_dbh = $sb->get_dbh_for('slave1'); $slave2_dbh = $sb->get_dbh_for('slave2'); -$sb->ok() or BAIL_OUT("Sandbox is broken"); my $pos = PerconaTest::get_master_binlog_pos($master_dbh); $output = output( sub { pt_table_checksum::main(@args, qw(-t sakila.country)) }, - #stderr => 1, + stderr => 1, ); is( @@ -92,7 +106,7 @@ like( $output = output( sub { pt_table_checksum::main(@args, qw(-t sakila.country), qw(--no-check-replication-filters)) }, - #stderr => 1, + stderr => 1, ); like( @@ -102,8 +116,9 @@ like( ); # Remove the replication filter from the slave. +diag(`/tmp/12347/stop >/dev/null`); +diag(`/tmp/12346/stop >/dev/null`); for my $port ( qw(12346 12347) ) { - diag(`/tmp/$port/stop >/dev/null`); diag(`mv /tmp/$port/orig.cnf /tmp/$port/my.sandbox.cnf`); diag(`/tmp/$port/start >/dev/null`); } @@ -120,21 +135,14 @@ pt_table_checksum::main(@args, qw(--chunk-time 0 --chunk-size 100), '-t', 'mysql.user,sakila.city', qw(--quiet)); PerconaTest::wait_for_table($slave1_dbh, 'percona.checksums', "db='sakila' and tbl='city' and chunk=6"); +# Add a replication filter to the master: ignore db mysql. $master_dbh->disconnect(); -$slave1_dbh->disconnect(); - -# Add a replication filter to the slave: ignore db mysql. -diag(`/tmp/12346/stop >/dev/null`); diag(`/tmp/12345/stop >/dev/null`); - diag(`cp /tmp/12345/my.sandbox.cnf /tmp/12345/orig.cnf`); diag(`echo "binlog-ignore-db=mysql" >> /tmp/12345/my.sandbox.cnf`); - diag(`/tmp/12345/start >/dev/null`); -diag(`/tmp/12346/start >/dev/null`); +reset_slaves(); $master_dbh = $sb->get_dbh_for('master'); -$slave1_dbh = $sb->get_dbh_for('slave1'); -$sb->ok() or BAIL_OUT("Sandbox is broken"); # Checksum the tables again in 1 chunk. Since db percona isn't being # ignored, deleting old results in the repl table should replicate. @@ -157,38 +165,30 @@ $master_dbh->do("use percona"); $master_dbh->do("truncate table percona.checksums"); wait_until( sub { - $row = $slave1_dbh->selectall_arrayref("select * from percona.checksums"); + $row=$slave1_dbh->selectall_arrayref("select * from percona.checksums"); return !@$row; } ); -$master_dbh->disconnect(); -$slave1_dbh->disconnect(); - -# Restore original config. -diag(`/tmp/12346/stop >/dev/null`); -diag(`/tmp/12345/stop >/dev/null`); -diag(`cp /tmp/12345/orig.cnf /tmp/12345/my.sandbox.cnf`); - # ############################################################################# # Test --replicate-database which resulted from this issue. # ############################################################################# -# Add a binlog-do-db filter so master will only replicate -# statements when USE mysql is in effect. +# Restore original config. Then add a binlog-do-db filter so master +# will only replicate statements when USE mysql is in effect. +$master_dbh->disconnect(); +diag(`/tmp/12345/stop >/dev/null`); +diag(`cp /tmp/12345/orig.cnf /tmp/12345/my.sandbox.cnf`); diag(`echo "binlog-do-db=mysql" >> /tmp/12345/my.sandbox.cnf`); diag(`/tmp/12345/start >/dev/null`); -diag(`/tmp/12346/start >/dev/null`); - $master_dbh = $sb->get_dbh_for('master'); -$slave1_dbh = $sb->get_dbh_for('slave1'); -$sb->ok() or BAIL_OUT("Sandbox is broken"); +reset_slaves(); $output = output( sub { pt_table_checksum::main(@args, qw(--no-check-replication-filters), qw(-d mysql -t user)) }, - #stderr => 1, + stderr => 1, ); # Because we did not use --replicate-database, pt-table-checksum should @@ -208,7 +208,7 @@ $master_dbh->do("use mysql"); $master_dbh->do("truncate table percona.checksums"); wait_until( sub { - $row = $slave1_dbh->selectall_arrayref("select * from percona.checksums"); + $row=$slave1_dbh->selectall_arrayref("select * from percona.checksums"); return !@$row; } ); @@ -233,22 +233,18 @@ is( ); # ############################################################################# -# Restore original config. +# Check that only the expected dbs are used. # ############################################################################# -$master_dbh->disconnect(); -$slave1_dbh->disconnect(); -diag(`/tmp/12346/stop >/dev/null`); +# Restore the original config. +$master_dbh->disconnect(); diag(`/tmp/12345/stop >/dev/null`); diag(`mv /tmp/12345/orig.cnf /tmp/12345/my.sandbox.cnf`); diag(`/tmp/12345/start >/dev/null`); -diag(`/tmp/12346/start >/dev/null`); - -diag(`$trunk/sandbox/test-env reset`); - $master_dbh = $sb->get_dbh_for('master'); -$slave1_dbh = $sb->get_dbh_for('slave1'); -$sb->ok() or BAIL_OUT("Sandbox is broken"); + +# Reset the slaves and clear the binlogs. +diag(`$trunk/sandbox/test-env reset`); pt_table_checksum::main(@args, qw(--quiet)); @@ -264,6 +260,7 @@ use sakila/*!*/; "USE each table's database (binlog dump)" ); +# Clear the binlogs. diag(`$trunk/sandbox/test-env reset`); pt_table_checksum::main(@args, qw(--quiet --replicate-database percona)); @@ -276,16 +273,6 @@ is( "USE only --replicate-database (binlog dump)" ); -# ############################################################################# -# Stop and start slaves to avoid sandbox breakage caused by restarting servers. -# ############################################################################# -#$slave1_dbh = $sb->get_dbh_for('slave1'); -#$slave2_dbh = $sb->get_dbh_for('slave2'); -#$slave1_dbh->do('STOP SLAVE'); -#$slave2_dbh->do('STOP SLAVE'); -#$slave1_dbh->do('START SLAVE'); -#$slave2_dbh->do('START SLAVE'); - # ############################################################################# # Done. # #############################################################################