From 129b150fff0449f0960ec1aa3ce5d759d4460424 Mon Sep 17 00:00:00 2001 From: Kenny Gryp Date: Wed, 30 Apr 2014 14:16:05 +0200 Subject: [PATCH 1/6] added basic gtid support to pt-slave-restart --- bin/pt-slave-restart | 74 ++++++++++++++++++++++++++++++++++++++++--- sandbox/start-sandbox | 8 +++++ 2 files changed, 78 insertions(+), 4 deletions(-) diff --git a/bin/pt-slave-restart b/bin/pt-slave-restart index d0ec3327..781614af 100755 --- a/bin/pt-slave-restart +++ b/bin/pt-slave-restart @@ -4766,6 +4766,12 @@ use sigtrap qw(handler finish untrapped normal-signals); use Percona::Toolkit; use constant PTDEBUG => $ENV{PTDEBUG} || 0; +use Data::Dumper; + +local $Data::Dumper::Indent = 1; +local $Data::Dumper::Sortkeys = 1; +local $Data::Dumper::Quotekeys = 0; + $OUTPUT_AUTOFLUSH = 1; my $o; @@ -4976,11 +4982,27 @@ sub watch_server { $start_sql .= " UNTIL RELAY_LOG_FILE = '$file', RELAY_LOG_POS = $pos"; } - my $set_skip = $dbh->prepare("SET GLOBAL SQL_SLAVE_SKIP_COUNTER = " - . $o->get('skip-count')); my $start = $dbh->prepare($start_sql); my $stop = $dbh->prepare('STOP SLAVE'); + # ######################################################################## + # Detect if GTID is enabled. Skipping an event is done differently. + # ######################################################################## + # When MySQL 5.6.5 or higher is used and gtid is enabled, skipping a + # transaction is not possible with SQL_SLAVE_SKIP_COUNTER + my $skip_event; + my $gtid_mode; + + if ( VersionParser->new($dbh) >= '5.6.5' ) { + my $row = $dbh->selectrow_arrayref('SELECT @@GLOBAL.gtid_mode'); + $gtid_mode = $row->[0]; + } else { + $gtid_mode="N/A"; + } + PTDEBUG && _d('GTID is ' . ($gtid_mode eq 'ON' + ? 'enabled' + : 'disabled')); + # ######################################################################## # Lookup tables of things to do when a problem is detected. # ######################################################################## @@ -4989,7 +5011,9 @@ sub watch_server { [ qr/Could not parse relay log event entry/ => 'refetch_relay_log' ], [ qr/Incorrect key file for table/ => 'repair_table' ], # This must be the last one. It's a catch-all rule: skip and restart. - [ qr/./ => 'skip' ], + [ qr/./ => ($gtid_mode eq 'ON' + ? 'skip_gtid' + : 'skip') ], ); # ######################################################################## @@ -5012,9 +5036,51 @@ sub watch_server { }, skip => sub { my ( $stat, $dbh ) = @_; - PTDEBUG && _d('Found non-relay-log error'); + my $set_skip = $dbh->prepare("SET GLOBAL SQL_SLAVE_SKIP_COUNTER = " + . $o->get('skip-count')); $set_skip->execute(); }, + skip_gtid => sub { + my ( $stat, $dbh ) = @_; + + # We need the highest transaction in the executed_gtid_set. + # and then we need to increase it by 1 (the one we want to skip) + # Notes: + # - does not work with parallel replication + # - it skips the next transaction from the master_uuid + # (when a slaveB is replicating from slaveA, + # the master_uuid is it's own master, slaveA) + my $gtid_exec = $stat->{executed_gtid_set}; + my $gtid_masteruuid = $stat->{master_uuid}; + + $gtid_exec =~ /$gtid_masteruuid([0-9-:]*)/; + my $gtid_exec_ids = $1; + $gtid_exec_ids =~ s/:[0-9]-/:/g; + + my @gtid_exec_ranges = split(/:/, $gtid_exec_ids); + delete $gtid_exec_ranges[0]; # undef the first value,it's always empty + + # get the highest id by sorting the array, removing the undef value + my @gtid_exec_sorted = sort { $a <=> $b } + grep { defined($_) } @gtid_exec_ranges; + my $gtid_next = $gtid_exec_sorted[-1] + $o->get('skip-count'); + + PTDEBUG && _d("GTID: master_uuid:$gtid_masteruuid,\n" + . "GTID: executed_gtid_set:$gtid_exec,\n" + . "GTID: gtid max for master_uuid:" . $gtid_exec_sorted[-1] . "\n" + . "GTID: next gtid:'$gtid_masteruuid:$gtid_next'"); + + # Set the sessions next gtid, write an empty transaction + my $gtid_set_next = $dbh->prepare("SET GTID_NEXT='" + . $gtid_masteruuid . ":" . $gtid_next . "'"); + $gtid_set_next->execute(); + $dbh->begin_work(); + $dbh->commit(); + + # Set the session back to the automatically generated GTID_NEXT. + my $gtid_automatic = $dbh->prepare("SET GTID_NEXT='AUTOMATIC'"); + $gtid_automatic->execute(); + }, repair_table => sub { my ( $stat, $dbh ) = @_; PTDEBUG && _d('Found corrupt table'); diff --git a/sandbox/start-sandbox b/sandbox/start-sandbox index 79ab4c7d..22b4fe81 100755 --- a/sandbox/start-sandbox +++ b/sandbox/start-sandbox @@ -113,6 +113,14 @@ make_sandbox() { echo "query_cache_size=$QUERY_CACHE_SIZE" >> /tmp/$port/my.sandbox.cnf fi + if [ -n "$GTID" ]; then + echo "gtid_mode=on" >> /tmp/$port/my.sandbox.cnf + echo "enforce_gtid_consistency" >> /tmp/$port/my.sandbox.cnf + fi + if [ -n "$REPLICATION_THREADS" ]; then + echo "slave-parallel-workers=$REPLICATION_THREADS" >> /tmp/$port/my.sandbox.cnf + fi + if [ -n "$EXTRA_DEFAULTS_FILE" ]; then cat "$EXTRA_DEFAULTS_FILE" >> /tmp/$port/my.sandbox.cnf fi From b7e0c17b01988d36a9ea4f81525ecf55771019a5 Mon Sep 17 00:00:00 2001 From: Kenny Gryp Date: Wed, 30 Apr 2014 14:19:06 +0200 Subject: [PATCH 2/6] added gtid tests for pt-slave-restart --- t/pt-slave-restart/gtid.t | 119 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100644 t/pt-slave-restart/gtid.t diff --git a/t/pt-slave-restart/gtid.t b/t/pt-slave-restart/gtid.t new file mode 100644 index 00000000..c02b3709 --- /dev/null +++ b/t/pt-slave-restart/gtid.t @@ -0,0 +1,119 @@ +#!/usr/bin/env perl + +BEGIN { + die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" + unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; + unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; +}; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use Test::More; + +use PerconaTest; +use Sandbox; +require "$trunk/bin/pt-slave-restart"; + +if ( $sandbox_version lt '5.6' ) { + plan skip_all => 'MySQL Version < 5.6, GTID is not available, skipping tests'; +} + +diag("Stopping/reconfiguring/restarting sandboxes 12345, 12346 and 12347"); + +diag(`$trunk/sandbox/test-env stop >/dev/null`); +diag(`GTID=1 $trunk/sandbox/test-env start >/dev/null`); + +my $dp = new DSNParser(opts=>$dsn_opts); +my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); +my $master_dbh = $sb->get_dbh_for('master'); +my $slave_dbh = $sb->get_dbh_for('slave1'); +my $slave2_dbh = $sb->get_dbh_for('slave2'); + +if ( !$master_dbh ) { + plan skip_all => 'Cannot connect to sandbox master'; +} +elsif ( !$slave_dbh ) { + plan skip_all => 'Cannot connect to sandbox slave1'; +} +elsif ( !$slave2_dbh ) { + plan skip_all => 'Cannot connect to sandbox slave2'; +} + +$master_dbh->do('DROP DATABASE IF EXISTS test'); +$master_dbh->do('CREATE DATABASE test'); +$master_dbh->do('CREATE TABLE test.t (a INT)'); +$sb->wait_for_slaves; + +# Bust replication +$slave_dbh->do('DROP TABLE test.t'); +$master_dbh->do('INSERT INTO test.t SELECT 1'); +wait_until( + sub { + my $row = $slave_dbh->selectrow_hashref('show slave status'); + return $row->{last_sql_errno}; + } +); + +my $r = $slave_dbh->selectrow_hashref('show slave status'); +like($r->{last_error}, qr/Table 'test.t' doesn't exist'/, 'It is busted'); + +# Start an instance +diag(`$trunk/bin/pt-slave-restart --max-sleep .25 -h 127.0.0.1 -P 12346 -u msandbox -p msandbox --daemonize --pid /tmp/pt-slave-restart.pid --log /tmp/pt-slave-restart.log`); +my $output = `ps x | grep 'pt-slave-restart \-\-max\-sleep ' | grep -v grep | grep -v pt-slave-restart.t`; +like($output, qr/pt-slave-restart --max/, 'It lives'); + +unlike($output, qr/Table 'test.t' doesn't exist'/, 'It is not busted'); + +ok(-f '/tmp/pt-slave-restart.pid', 'PID file created'); +ok(-f '/tmp/pt-slave-restart.log', 'Log file created'); + +my ($pid) = $output =~ /^\s*(\d+)\s+/; +$output = `cat /tmp/pt-slave-restart.pid`; +is($output, $pid, 'PID file has correct PID'); + +diag(`$trunk/bin/pt-slave-restart --stop -q`); +sleep 1; +$output = `ps -eaf | grep pt-slave-restart | grep -v grep`; +unlike($output, qr/pt-slave-restart --max/, 'It is dead'); + +diag(`rm -f /tmp/pt-slave-re*`); +ok(! -f '/tmp/pt-slave-restart.pid', 'PID file removed'); + +# ############################################################################# +# Issue 459: mk-slave-restart --error-text is broken +# ############################################################################# +# Bust replication again. At this point, the master has test.t but +# the slave does not. +$master_dbh->do('DROP TABLE IF EXISTS test.t'); +$master_dbh->do('CREATE TABLE test.t (a INT)'); +sleep 1; +$slave_dbh->do('DROP TABLE test.t'); +$master_dbh->do('INSERT INTO test.t SELECT 1'); +$output = `/tmp/12346/use -e 'show slave status'`; +like( + $output, + qr/Table 'test.t' doesn't exist'/, + 'It is busted again' +); + +# Start an instance +$output = `$trunk/bin/pt-slave-restart --max-sleep .25 -h 127.0.0.1 -P 12346 -u msandbox -p msandbox --error-text "doesn't exist" --run-time 1s 2>&1`; +unlike( + $output, + qr/Error does not match/, + '--error-text works (issue 459)' +); + + +# ############################################################################# +# Done. +# ############################################################################# +diag(`rm -f /tmp/pt-slave-re*`); +$sb->wipe_clean($master_dbh); +$sb->wipe_clean($slave_dbh); +diag(`$trunk/sandbox/test-env stop >/dev/null`); +diag(`$trunk/sandbox/test-env start >/dev/null`); + +ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); +done_testing; From bdd969dbdd7ed8dae8e659423e0492c013486875 Mon Sep 17 00:00:00 2001 From: root Date: Wed, 30 Apr 2014 13:57:53 +0000 Subject: [PATCH 3/6] fixed skipping multiple events --- bin/pt-slave-restart | 24 +++++++--- t/pt-slave-restart/gtid.t | 92 ++++++++++++++++++++------------------- 2 files changed, 64 insertions(+), 52 deletions(-) diff --git a/bin/pt-slave-restart b/bin/pt-slave-restart index 781614af..64cbbc9d 100755 --- a/bin/pt-slave-restart +++ b/bin/pt-slave-restart @@ -5063,19 +5063,29 @@ sub watch_server { # get the highest id by sorting the array, removing the undef value my @gtid_exec_sorted = sort { $a <=> $b } grep { defined($_) } @gtid_exec_ranges; - my $gtid_next = $gtid_exec_sorted[-1] + $o->get('skip-count'); + my $gtid_exec_last = $gtid_exec_sorted[-1]; PTDEBUG && _d("GTID: master_uuid:$gtid_masteruuid,\n" . "GTID: executed_gtid_set:$gtid_exec,\n" . "GTID: gtid max for master_uuid:" . $gtid_exec_sorted[-1] . "\n" - . "GTID: next gtid:'$gtid_masteruuid:$gtid_next'"); + . "GTID: last executed gtid:'$gtid_masteruuid:$gtid_exec_last'"); # Set the sessions next gtid, write an empty transaction - my $gtid_set_next = $dbh->prepare("SET GTID_NEXT='" - . $gtid_masteruuid . ":" . $gtid_next . "'"); - $gtid_set_next->execute(); - $dbh->begin_work(); - $dbh->commit(); + my $skipped=0; + until ( $skipped == $o->get('skip-count') ) { + $skipped++; + + my $gtid_next=$gtid_exec_last + $skipped; + + PTDEBUG && _d("GTID: Skipping " . $gtid_masteruuid . ":" . $gtid_next); + + my $gtid_set_next = $dbh->prepare("SET GTID_NEXT='" + . $gtid_masteruuid . ":" . $gtid_next . "'"); + $gtid_set_next->execute(); + $dbh->begin_work(); + $dbh->commit(); + + } # Set the session back to the automatically generated GTID_NEXT. my $gtid_automatic = $dbh->prepare("SET GTID_NEXT='AUTOMATIC'"); diff --git a/t/pt-slave-restart/gtid.t b/t/pt-slave-restart/gtid.t index c02b3709..fb67145e 100644 --- a/t/pt-slave-restart/gtid.t +++ b/t/pt-slave-restart/gtid.t @@ -16,7 +16,8 @@ use Sandbox; require "$trunk/bin/pt-slave-restart"; if ( $sandbox_version lt '5.6' ) { - plan skip_all => 'MySQL Version < 5.6, GTID is not available, skipping tests'; + plan skip_all => 'MySQL Version ' . $sandbox_version + . ' < 5.6, GTID is not available, skipping tests'; } diag("Stopping/reconfiguring/restarting sandboxes 12345, 12346 and 12347"); @@ -40,6 +41,9 @@ elsif ( !$slave2_dbh ) { plan skip_all => 'Cannot connect to sandbox slave2'; } +# ############################################################################# +# basic test to see if restart works +# ############################################################################# $master_dbh->do('DROP DATABASE IF EXISTS test'); $master_dbh->do('CREATE DATABASE test'); $master_dbh->do('CREATE TABLE test.t (a INT)'); @@ -56,64 +60,62 @@ wait_until( ); my $r = $slave_dbh->selectrow_hashref('show slave status'); -like($r->{last_error}, qr/Table 'test.t' doesn't exist'/, 'It is busted'); +like($r->{last_error}, qr/Table 'test.t' doesn't exist'/, 'slave: Replication broke'); # Start an instance diag(`$trunk/bin/pt-slave-restart --max-sleep .25 -h 127.0.0.1 -P 12346 -u msandbox -p msandbox --daemonize --pid /tmp/pt-slave-restart.pid --log /tmp/pt-slave-restart.log`); -my $output = `ps x | grep 'pt-slave-restart \-\-max\-sleep ' | grep -v grep | grep -v pt-slave-restart.t`; -like($output, qr/pt-slave-restart --max/, 'It lives'); +sleep 1; -unlike($output, qr/Table 'test.t' doesn't exist'/, 'It is not busted'); +$r = $slave_dbh->selectrow_hashref('show slave status'); +like($r->{last_errno}, qr/^0$/, 'slave: event is not skipped successfully'); -ok(-f '/tmp/pt-slave-restart.pid', 'PID file created'); -ok(-f '/tmp/pt-slave-restart.log', 'Log file created'); -my ($pid) = $output =~ /^\s*(\d+)\s+/; -$output = `cat /tmp/pt-slave-restart.pid`; -is($output, $pid, 'PID file has correct PID'); +diag(`$trunk/bin/pt-slave-restart --stop -q`); +sleep 1; +my $output = `ps -eaf | grep pt-slave-restart | grep -v grep`; +unlike($output, qr/pt-slave-restart --max/, 'slave: stopped pt-slave-restart successfully'); +diag(`rm -f /tmp/pt-slave-re*`); + +# # ############################################################################# +# # test the slave of the master +# # ############################################################################# +$master_dbh->do('DROP DATABASE IF EXISTS test'); +$master_dbh->do('CREATE DATABASE test'); +$master_dbh->do('CREATE TABLE test.t (a INT)'); +$sb->wait_for_slaves; + +# Bust replication +$slave2_dbh->do('DROP TABLE test.t'); +$master_dbh->do('INSERT INTO test.t SELECT 1'); +wait_until( + sub { + my $row = $slave2_dbh->selectrow_hashref('show slave status'); + return $row->{last_sql_errno}; + } +); + +$r = $slave2_dbh->selectrow_hashref('show slave status'); +like($r->{last_error}, qr/Table 'test.t' doesn't exist'/, 'slaveofslave: Replication broke'); + +# Start an instance +diag(`$trunk/bin/pt-slave-restart --max-sleep .25 -h 127.0.0.1 -P 12347 -u msandbox -p msandbox --daemonize --pid /tmp/pt-slave-restart.pid --log /tmp/pt-slave-restart.log`); +sleep 1; + +$r = $slave2_dbh->selectrow_hashref('show slave status'); +like($r->{last_errno}, qr/^0$/, 'slaveofslave: event is not skipped successfully'); + diag(`$trunk/bin/pt-slave-restart --stop -q`); sleep 1; $output = `ps -eaf | grep pt-slave-restart | grep -v grep`; -unlike($output, qr/pt-slave-restart --max/, 'It is dead'); - +unlike($output, qr/pt-slave-restart --max/, 'slaveofslave: stopped pt-slave-restart successfully'); diag(`rm -f /tmp/pt-slave-re*`); -ok(! -f '/tmp/pt-slave-restart.pid', 'PID file removed'); - -# ############################################################################# -# Issue 459: mk-slave-restart --error-text is broken -# ############################################################################# -# Bust replication again. At this point, the master has test.t but -# the slave does not. -$master_dbh->do('DROP TABLE IF EXISTS test.t'); -$master_dbh->do('CREATE TABLE test.t (a INT)'); -sleep 1; -$slave_dbh->do('DROP TABLE test.t'); -$master_dbh->do('INSERT INTO test.t SELECT 1'); -$output = `/tmp/12346/use -e 'show slave status'`; -like( - $output, - qr/Table 'test.t' doesn't exist'/, - 'It is busted again' -); - -# Start an instance -$output = `$trunk/bin/pt-slave-restart --max-sleep .25 -h 127.0.0.1 -P 12346 -u msandbox -p msandbox --error-text "doesn't exist" --run-time 1s 2>&1`; -unlike( - $output, - qr/Error does not match/, - '--error-text works (issue 459)' -); - - # ############################################################################# # Done. # ############################################################################# diag(`rm -f /tmp/pt-slave-re*`); -$sb->wipe_clean($master_dbh); -$sb->wipe_clean($slave_dbh); -diag(`$trunk/sandbox/test-env stop >/dev/null`); -diag(`$trunk/sandbox/test-env start >/dev/null`); +# diag(`$trunk/sandbox/test-env stop >/dev/null`); +# diag(`$trunk/sandbox/test-env start >/dev/null`); -ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); +#ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); done_testing; From 5a28a60030c6ea842de866b0d067d8ef23e5daa0 Mon Sep 17 00:00:00 2001 From: Kenny Gryp Date: Wed, 30 Apr 2014 16:33:27 +0200 Subject: [PATCH 4/6] - add --skip-gtid-uuid - documentation GTID --- bin/pt-slave-restart | 67 +++++++++++++++++++++++++++++++++------ t/pt-slave-restart/gtid.t | 54 ++++++++++++++++++++++++++++--- 2 files changed, 108 insertions(+), 13 deletions(-) diff --git a/bin/pt-slave-restart b/bin/pt-slave-restart index 64cbbc9d..cd417b0f 100755 --- a/bin/pt-slave-restart +++ b/bin/pt-slave-restart @@ -5050,12 +5050,20 @@ sub watch_server { # - it skips the next transaction from the master_uuid # (when a slaveB is replicating from slaveA, # the master_uuid is it's own master, slaveA) - my $gtid_exec = $stat->{executed_gtid_set}; - my $gtid_masteruuid = $stat->{master_uuid}; + my $gtid_exec = $stat->{executed_gtid_set}; - $gtid_exec =~ /$gtid_masteruuid([0-9-:]*)/; - my $gtid_exec_ids = $1; - $gtid_exec_ids =~ s/:[0-9]-/:/g; + # default behavior is to take the master_uuid from SHOW SLAVE STATUS + # or use --skip-gtid-uuid specified uuid. + my $gtid_uuid; + if ( $o->get('skip-gtid-uuid') eq 'master' ) { + $gtid_uuid = $stat->{master_uuid}; + } else { + $gtid_uuid = $o->get('skip-gtid-uuid'); + } + + $gtid_exec =~ /$gtid_uuid([0-9-:]*)/; + my $gtid_exec_ids = $1; + $gtid_exec_ids =~ s/:[0-9]-/:/g; my @gtid_exec_ranges = split(/:/, $gtid_exec_ids); delete $gtid_exec_ranges[0]; # undef the first value,it's always empty @@ -5065,10 +5073,10 @@ sub watch_server { grep { defined($_) } @gtid_exec_ranges; my $gtid_exec_last = $gtid_exec_sorted[-1]; - PTDEBUG && _d("GTID: master_uuid:$gtid_masteruuid,\n" + PTDEBUG && _d("GTID: master_uuid:$gtid_uuid,\n" . "GTID: executed_gtid_set:$gtid_exec,\n" . "GTID: gtid max for master_uuid:" . $gtid_exec_sorted[-1] . "\n" - . "GTID: last executed gtid:'$gtid_masteruuid:$gtid_exec_last'"); + . "GTID: last executed gtid:'$gtid_uuid:$gtid_exec_last'"); # Set the sessions next gtid, write an empty transaction my $skipped=0; @@ -5077,10 +5085,10 @@ sub watch_server { my $gtid_next=$gtid_exec_last + $skipped; - PTDEBUG && _d("GTID: Skipping " . $gtid_masteruuid . ":" . $gtid_next); + PTDEBUG && _d("GTID: Skipping " . $gtid_uuid . ":" . $gtid_next); my $gtid_set_next = $dbh->prepare("SET GTID_NEXT='" - . $gtid_masteruuid . ":" . $gtid_next . "'"); + . $gtid_uuid . ":" . $gtid_next . "'"); $gtid_set_next->execute(); $dbh->begin_work(); $dbh->commit(); @@ -5377,6 +5385,30 @@ sleep time, whichever is less. =back +=head1 GLOBAL TRANSACTION IDS + +pt-slave-restart supports Global Transaction IDs, which has been introduced in +MySQL in 5.6.5. + +It's important to keep in mind that: + +=over + +=item * + +pt-slave-restart will not skip transactions when multiple replication threads +are being used (slave_parallel_workers>0). pt-slave-restart does not know what +the GTID event is of the failed transaction of a specific slave thread. + +=item * + +The default behavior is to skip the next transaction from the slave's master. +Writes can originate on different servers, each with their own unique UUID. + +See L<"--skip-gtid-uuid">. + +=back + =head1 EXIT STATUS An exit status of 0 (sometimes also called a return value or return code) @@ -5631,6 +5663,23 @@ type: int; default: 1 Number of statements to skip when restarting the slave. +=item --skip-gtid-uuid + +type: string; default: master + +When using GTID, an empty transaction should be created in order to skip it. +If writes are coming from different nodes in the replication tree above, it is +not possible to know which event from which UUID to skip. + +By default, the UUID from the slave's master is being used to skip. +(C column). + +Example: Master -> Slave1 -> Slave2. When skipping events from 'Slave2', and +writes originated from 'Master', --skip-gtid-uuid should be specified with the +'Master' it's UUID. + +See L<"GLOBAL TRANSACTION IDS">. + =item --sleep type: int; default: 1 diff --git a/t/pt-slave-restart/gtid.t b/t/pt-slave-restart/gtid.t index fb67145e..7bdbba5b 100644 --- a/t/pt-slave-restart/gtid.t +++ b/t/pt-slave-restart/gtid.t @@ -76,9 +76,9 @@ my $output = `ps -eaf | grep pt-slave-restart | grep -v grep`; unlike($output, qr/pt-slave-restart --max/, 'slave: stopped pt-slave-restart successfully'); diag(`rm -f /tmp/pt-slave-re*`); -# # ############################################################################# -# # test the slave of the master -# # ############################################################################# +# ############################################################################# +# test the slave of the master +# ############################################################################# $master_dbh->do('DROP DATABASE IF EXISTS test'); $master_dbh->do('CREATE DATABASE test'); $master_dbh->do('CREATE TABLE test.t (a INT)'); @@ -94,11 +94,15 @@ wait_until( } ); +# fetch the master uuid, which is the machine we need to skip an event from +$r = $master_dbh->selectrow_hashref('select @@GLOBAL.server_uuid as uuid'); +my $uuid = $r->{uuid}; + $r = $slave2_dbh->selectrow_hashref('show slave status'); like($r->{last_error}, qr/Table 'test.t' doesn't exist'/, 'slaveofslave: Replication broke'); # Start an instance -diag(`$trunk/bin/pt-slave-restart --max-sleep .25 -h 127.0.0.1 -P 12347 -u msandbox -p msandbox --daemonize --pid /tmp/pt-slave-restart.pid --log /tmp/pt-slave-restart.log`); +diag(`$trunk/bin/pt-slave-restart --skip-gtid-uuid=$uuid --max-sleep .25 -h 127.0.0.1 -P 12347 -u msandbox -p msandbox --daemonize --pid /tmp/pt-slave-restart.pid --log /tmp/pt-slave-restart.log`); sleep 1; $r = $slave2_dbh->selectrow_hashref('show slave status'); @@ -110,6 +114,48 @@ sleep 1; $output = `ps -eaf | grep pt-slave-restart | grep -v grep`; unlike($output, qr/pt-slave-restart --max/, 'slaveofslave: stopped pt-slave-restart successfully'); diag(`rm -f /tmp/pt-slave-re*`); + + +# ############################################################################# +# test skipping 2 events in a row. +# ############################################################################# +$master_dbh->do('DROP DATABASE IF EXISTS test'); +$master_dbh->do('CREATE DATABASE test'); +$master_dbh->do('CREATE TABLE test.t (a INT)'); +$sb->wait_for_slaves; + +# Bust replication +$slave2_dbh->do('DROP TABLE test.t'); +$master_dbh->do('INSERT INTO test.t SELECT 1'); +$master_dbh->do('INSERT INTO test.t SELECT 1'); +wait_until( + sub { + my $row = $slave2_dbh->selectrow_hashref('show slave status'); + return $row->{last_sql_errno}; + } +); + +# fetch the master uuid, which is the machine we need to skip an event from +$r = $master_dbh->selectrow_hashref('select @@GLOBAL.server_uuid as uuid'); +$uuid = $r->{uuid}; + +$r = $slave2_dbh->selectrow_hashref('show slave status'); +like($r->{last_error}, qr/Table 'test.t' doesn't exist'/, 'slaveofslaveskip2: Replication broke'); + +# Start an instance +diag(`$trunk/bin/pt-slave-restart --skip-count=2 --skip-gtid-uuid=$uuid --max-sleep .25 -h 127.0.0.1 -P 12347 -u msandbox -p msandbox --daemonize --pid /tmp/pt-slave-restart.pid --log /tmp/pt-slave-restart.log`); +sleep 1; + +$r = $slave2_dbh->selectrow_hashref('show slave status'); +like($r->{last_errno}, qr/^0$/, 'slaveofslaveskip2: event is not skipped successfully'); + + +diag(`$trunk/bin/pt-slave-restart --stop -q`); +sleep 1; +$output = `ps -eaf | grep pt-slave-restart | grep -v grep`; +unlike($output, qr/pt-slave-restart --max/, 'slaveofslaveskip2: stopped pt-slave-restart successfully'); +diag(`rm -f /tmp/pt-slave-re*`); + # ############################################################################# # Done. # ############################################################################# From b3c6a0aac2fed1da26ad42119af2d8932e443354 Mon Sep 17 00:00:00 2001 From: Kenny Gryp Date: Wed, 30 Apr 2014 16:53:04 +0200 Subject: [PATCH 5/6] bail out when pt-slave-restart has gtid enabled and multiple replication threads are enabled --- bin/pt-slave-restart | 12 ++++ sandbox/start-sandbox | 2 +- t/pt-slave-restart/gtid.t | 7 +- t/pt-slave-restart/gtid_parallelreplication.t | 64 +++++++++++++++++++ 4 files changed, 80 insertions(+), 5 deletions(-) create mode 100644 t/pt-slave-restart/gtid_parallelreplication.t diff --git a/bin/pt-slave-restart b/bin/pt-slave-restart index cd417b0f..b2c39738 100755 --- a/bin/pt-slave-restart +++ b/bin/pt-slave-restart @@ -5003,6 +5003,18 @@ sub watch_server { ? 'enabled' : 'disabled')); + # If GTID is enabled, slave_parallel_workers should be == 0. + # it's currently not possible to know what GTID event the failed trx is + if ( $gtid_mode eq 'ON') { + my $threads = $dbh->selectrow_hashref('SELECT + @@GLOBAL.slave_parallel_workers AS threads'); + if ( $threads->{threads} > 0 ) { + die("Error: GTID is enabled, and slave_parallel_workers=" + . $threads->{threads} + . ". It is impossible to skip transactions properly.\n"); + } + } + # ######################################################################## # Lookup tables of things to do when a problem is detected. # ######################################################################## diff --git a/sandbox/start-sandbox b/sandbox/start-sandbox index 22b4fe81..ea9dd833 100755 --- a/sandbox/start-sandbox +++ b/sandbox/start-sandbox @@ -118,7 +118,7 @@ make_sandbox() { echo "enforce_gtid_consistency" >> /tmp/$port/my.sandbox.cnf fi if [ -n "$REPLICATION_THREADS" ]; then - echo "slave-parallel-workers=$REPLICATION_THREADS" >> /tmp/$port/my.sandbox.cnf + echo "slave_parallel_workers=$REPLICATION_THREADS" >> /tmp/$port/my.sandbox.cnf fi if [ -n "$EXTRA_DEFAULTS_FILE" ]; then diff --git a/t/pt-slave-restart/gtid.t b/t/pt-slave-restart/gtid.t index 7bdbba5b..b801df51 100644 --- a/t/pt-slave-restart/gtid.t +++ b/t/pt-slave-restart/gtid.t @@ -115,7 +115,6 @@ $output = `ps -eaf | grep pt-slave-restart | grep -v grep`; unlike($output, qr/pt-slave-restart --max/, 'slaveofslave: stopped pt-slave-restart successfully'); diag(`rm -f /tmp/pt-slave-re*`); - # ############################################################################# # test skipping 2 events in a row. # ############################################################################# @@ -160,8 +159,8 @@ diag(`rm -f /tmp/pt-slave-re*`); # Done. # ############################################################################# diag(`rm -f /tmp/pt-slave-re*`); -# diag(`$trunk/sandbox/test-env stop >/dev/null`); -# diag(`$trunk/sandbox/test-env start >/dev/null`); +diag(`$trunk/sandbox/test-env stop >/dev/null`); +diag(`$trunk/sandbox/test-env start >/dev/null`); -#ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); +ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); done_testing; diff --git a/t/pt-slave-restart/gtid_parallelreplication.t b/t/pt-slave-restart/gtid_parallelreplication.t new file mode 100644 index 00000000..439870f4 --- /dev/null +++ b/t/pt-slave-restart/gtid_parallelreplication.t @@ -0,0 +1,64 @@ +#!/usr/bin/env perl + +BEGIN { + die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" + unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; + unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; +}; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use Test::More; + +use PerconaTest; +use Sandbox; +require "$trunk/bin/pt-slave-restart"; + +if ( $sandbox_version lt '5.6' ) { + plan skip_all => 'MySQL Version ' . $sandbox_version + . ' < 5.6, GTID is not available, skipping tests'; +} + +diag("Stopping/reconfiguring/restarting sandboxes 12345, 12346 and 12347"); + +diag(`$trunk/sandbox/test-env stop >/dev/null`); +diag(`REPLICATION_THREADS=2 GTID=1 $trunk/sandbox/test-env start >/dev/null`); + +my $dp = new DSNParser(opts=>$dsn_opts); +my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); +my $master_dbh = $sb->get_dbh_for('master'); +my $slave_dbh = $sb->get_dbh_for('slave1'); +my $slave2_dbh = $sb->get_dbh_for('slave2'); + +if ( !$master_dbh ) { + plan skip_all => 'Cannot connect to sandbox master'; +} +elsif ( !$slave_dbh ) { + plan skip_all => 'Cannot connect to sandbox slave1'; +} +elsif ( !$slave2_dbh ) { + plan skip_all => 'Cannot connect to sandbox slave2'; +} + +# ############################################################################# +# pt-slave-restart should exit! +# ############################################################################# +# Start an instance +my $output=`$trunk/bin/pt-slave-restart --run-time=1s -h 127.0.0.1 -P 12346 -u msandbox -p msandbox 2>&1`; + +like( + $output, + qr/It is impossible to skip transactions properly./, + "pt-slave-restart exits with multiple replication threads" +); + +# ############################################################################# +# Done. +# ############################################################################# +diag(`rm -f /tmp/pt-slave-re*`); +diag(`$trunk/sandbox/test-env stop >/dev/null`); +diag(`$trunk/sandbox/test-env start >/dev/null`); + +ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); +done_testing; From 3b961873135c934b245178cdc1ec9bfe0ab3b938 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Wed, 28 May 2014 13:31:10 -0700 Subject: [PATCH 6/6] Change --skip-gtid-uuid to --master-uuid. Make test more reliable. Simplify code and clean up docs. --- bin/pt-slave-restart | 129 +++++++++++++--------------- lib/Sandbox.pm | 1 + sandbox/test-env | 17 ++-- t/pt-slave-restart/gtid.t | 169 ++++++++++++++++++++++--------------- util/checksum-test-dataset | 1 + 5 files changed, 174 insertions(+), 143 deletions(-) diff --git a/bin/pt-slave-restart b/bin/pt-slave-restart index b2c39738..35e9d880 100755 --- a/bin/pt-slave-restart +++ b/bin/pt-slave-restart @@ -4982,8 +4982,8 @@ sub watch_server { $start_sql .= " UNTIL RELAY_LOG_FILE = '$file', RELAY_LOG_POS = $pos"; } - my $start = $dbh->prepare($start_sql); - my $stop = $dbh->prepare('STOP SLAVE'); + my $start = $dbh->prepare($start_sql); + my $stop = $dbh->prepare('STOP SLAVE'); # ######################################################################## # Detect if GTID is enabled. Skipping an event is done differently. @@ -4991,27 +4991,25 @@ sub watch_server { # When MySQL 5.6.5 or higher is used and gtid is enabled, skipping a # transaction is not possible with SQL_SLAVE_SKIP_COUNTER my $skip_event; - my $gtid_mode; - + my $have_gtid = 0; if ( VersionParser->new($dbh) >= '5.6.5' ) { - my $row = $dbh->selectrow_arrayref('SELECT @@GLOBAL.gtid_mode'); - $gtid_mode = $row->[0]; - } else { - $gtid_mode="N/A"; + my $row = $dbh->selectrow_arrayref('SELECT @@GLOBAL.gtid_mode'); + PTDEBUG && _d('@@GLOBAL.gtid_mode:', $row->[0]); + if ( $row && $row->[0] eq 'ON' ) { + $have_gtid = 1; + } } - PTDEBUG && _d('GTID is ' . ($gtid_mode eq 'ON' - ? 'enabled' - : 'disabled')); + PTDEBUG && _d('Have GTID:', $have_gtid); # If GTID is enabled, slave_parallel_workers should be == 0. - # it's currently not possible to know what GTID event the failed trx is - if ( $gtid_mode eq 'ON') { - my $threads = $dbh->selectrow_hashref('SELECT - @@GLOBAL.slave_parallel_workers AS threads'); + # It's currently not possible to know what GTID event the failed trx is. + if ( $have_gtid ) { + my $threads = $dbh->selectrow_hashref( + 'SELECT @@GLOBAL.slave_parallel_workers AS threads'); if ( $threads->{threads} > 0 ) { - die("Error: GTID is enabled, and slave_parallel_workers=" - . $threads->{threads} - . ". It is impossible to skip transactions properly.\n"); + die "Cannot skip transactions properly because GTID is enabled " + . "and slave_parallel_workers > 0. See 'GLOBAL TRANSACTION IDS' " + . "in the tool's documentation.\n"; } } @@ -5023,9 +5021,7 @@ sub watch_server { [ qr/Could not parse relay log event entry/ => 'refetch_relay_log' ], [ qr/Incorrect key file for table/ => 'repair_table' ], # This must be the last one. It's a catch-all rule: skip and restart. - [ qr/./ => ($gtid_mode eq 'ON' - ? 'skip_gtid' - : 'skip') ], + [ qr/./ => ($have_gtid ? 'skip_gtid' : 'skip') ], ); # ######################################################################## @@ -5054,6 +5050,14 @@ sub watch_server { }, skip_gtid => sub { my ( $stat, $dbh ) = @_; + + # Get master_uuid from SHOW SLAVE STATUS if a UUID is not specified + # with --master-uuid. + my $gtid_uuid = $o->get('master-uuid'); + if ( !$gtid_uuid ) { + $gtid_uuid = $stat->{master_uuid}; + die "No master_uuid" unless $gtid_uuid; # shouldn't happen + } # We need the highest transaction in the executed_gtid_set. # and then we need to increase it by 1 (the one we want to skip) @@ -5062,54 +5066,38 @@ sub watch_server { # - it skips the next transaction from the master_uuid # (when a slaveB is replicating from slaveA, # the master_uuid is it's own master, slaveA) - my $gtid_exec = $stat->{executed_gtid_set}; - - # default behavior is to take the master_uuid from SHOW SLAVE STATUS - # or use --skip-gtid-uuid specified uuid. - my $gtid_uuid; - if ( $o->get('skip-gtid-uuid') eq 'master' ) { - $gtid_uuid = $stat->{master_uuid}; - } else { - $gtid_uuid = $o->get('skip-gtid-uuid'); - } - - $gtid_exec =~ /$gtid_uuid([0-9-:]*)/; - my $gtid_exec_ids = $1; - $gtid_exec_ids =~ s/:[0-9]-/:/g; + my ($gtid_exec_ids) = ($stat->{executed_gtid_set} || '') =~ m/$gtid_uuid([0-9-:]*)/; + $gtid_exec_ids =~ s/:[0-9]-/:/g; + die "No executed GTIDs" unless $gtid_exec_ids; my @gtid_exec_ranges = split(/:/, $gtid_exec_ids); - delete $gtid_exec_ranges[0]; # undef the first value,it's always empty + delete $gtid_exec_ranges[0]; # undef the first value, it's always empty - # get the highest id by sorting the array, removing the undef value + # Get the highest id by sorting the array, removing the undef value. my @gtid_exec_sorted = sort { $a <=> $b } grep { defined($_) } @gtid_exec_ranges; my $gtid_exec_last = $gtid_exec_sorted[-1]; - PTDEBUG && _d("GTID: master_uuid:$gtid_uuid,\n" - . "GTID: executed_gtid_set:$gtid_exec,\n" - . "GTID: gtid max for master_uuid:" . $gtid_exec_sorted[-1] . "\n" - . "GTID: last executed gtid:'$gtid_uuid:$gtid_exec_last'"); + PTDEBUG && _d("\n", + "GTID: master_uuid:", $gtid_uuid, "\n", + "GTID: executed_gtid_set:", $gtid_exec_ids, "\n", + "GTID: max for master_uuid:", $gtid_exec_sorted[-1], "\n", + "GTID: last executed gtid:", $gtid_uuid, ":", $gtid_exec_last); # Set the sessions next gtid, write an empty transaction - my $skipped=0; - until ( $skipped == $o->get('skip-count') ) { - $skipped++; - - my $gtid_next=$gtid_exec_last + $skipped; - - PTDEBUG && _d("GTID: Skipping " . $gtid_uuid . ":" . $gtid_next); - - my $gtid_set_next = $dbh->prepare("SET GTID_NEXT='" - . $gtid_uuid . ":" . $gtid_next . "'"); - $gtid_set_next->execute(); + my $skipped = 0; + while ( $skipped++ < $o->get('skip-count') ) { + my $gtid_next = $gtid_exec_last + $skipped; + my $sql = "SET GTID_NEXT='$gtid_uuid:$gtid_next'"; + PTDEBUG && _d($sql); + my $sth = $dbh->prepare($sql); + $sth->execute(); $dbh->begin_work(); $dbh->commit(); - } # Set the session back to the automatically generated GTID_NEXT. - my $gtid_automatic = $dbh->prepare("SET GTID_NEXT='AUTOMATIC'"); - $gtid_automatic->execute(); + $dbh->do("SET GTID_NEXT='AUTOMATIC'"); }, repair_table => sub { my ( $stat, $dbh ) = @_; @@ -5399,25 +5387,23 @@ sleep time, whichever is less. =head1 GLOBAL TRANSACTION IDS -pt-slave-restart supports Global Transaction IDs, which has been introduced in -MySQL in 5.6.5. - -It's important to keep in mind that: +As of Percona Toolkit 2.2.8, pt-slave-restart supports Global Transaction IDs +introduced in MySQL 5.6.5. It's important to keep in mind that: =over =item * pt-slave-restart will not skip transactions when multiple replication threads -are being used (slave_parallel_workers>0). pt-slave-restart does not know what -the GTID event is of the failed transaction of a specific slave thread. +are being used (slave_parallel_workers > 0). pt-slave-restart does not know +what the GTID event is of the failed transaction of a specific slave thread. =item * The default behavior is to skip the next transaction from the slave's master. -Writes can originate on different servers, each with their own unique UUID. +Writes can originate on different servers, each with their own UUID. -See L<"--skip-gtid-uuid">. +See L<"--master-uuid">. =back @@ -5675,20 +5661,23 @@ type: int; default: 1 Number of statements to skip when restarting the slave. -=item --skip-gtid-uuid +=item --master-uuid -type: string; default: master +type: string When using GTID, an empty transaction should be created in order to skip it. If writes are coming from different nodes in the replication tree above, it is not possible to know which event from which UUID to skip. -By default, the UUID from the slave's master is being used to skip. -(C column). +By default, transactions from the slave's master (C<'Master_UUID'> from +C) are skipped. -Example: Master -> Slave1 -> Slave2. When skipping events from 'Slave2', and -writes originated from 'Master', --skip-gtid-uuid should be specified with the -'Master' it's UUID. +For example, with + + master1 -> slave1 -> slave2 + +When skipping events on slave2 that were written to master1, you must specify +the UUID of master1, else the tool will use the UUID of slave1 by default. See L<"GLOBAL TRANSACTION IDS">. diff --git a/lib/Sandbox.pm b/lib/Sandbox.pm index 8c3055ee..3ee79ff4 100644 --- a/lib/Sandbox.pm +++ b/lib/Sandbox.pm @@ -379,6 +379,7 @@ sub verify_test_data { # Diff the two sets of checksums: host to master (ref). my @diffs; foreach my $c ( @checksums ) { + next unless $c->{checksum}; if ( $c->{checksum} ne $ref->{$c->{table}}->{checksum} ) { push @diffs, $c->{table}; } diff --git a/sandbox/test-env b/sandbox/test-env index df687d14..ff851068 100755 --- a/sandbox/test-env +++ b/sandbox/test-env @@ -315,13 +315,16 @@ case $opt in fi if [ $? -eq 0 -a "$MYSQL_VERSION" '>' "4.1" ]; then - echo -n "Loading sakila database... " - ./load-sakila-db 12345 "${2:-""}" - exit_status=$((exit_status | $?)) - if [ $exit_status -ne 0 ]; then - echo "FAILED" - else - echo "OK" + SAKILA=${SAKILA:-1} + if [ $SAKILA -eq 1 ]; then + echo -n "Loading sakila database... " + ./load-sakila-db 12345 "${2:-""}" + exit_status=$((exit_status | $?)) + if [ $exit_status -ne 0 ]; then + echo "FAILED" + else + echo "OK" + fi fi # Create percona_test db and checksum all the tables. diff --git a/t/pt-slave-restart/gtid.t b/t/pt-slave-restart/gtid.t index b801df51..d467af21 100644 --- a/t/pt-slave-restart/gtid.t +++ b/t/pt-slave-restart/gtid.t @@ -16,69 +16,119 @@ use Sandbox; require "$trunk/bin/pt-slave-restart"; if ( $sandbox_version lt '5.6' ) { - plan skip_all => 'MySQL Version ' . $sandbox_version - . ' < 5.6, GTID is not available, skipping tests'; + plan skip_all => "Requires MySQL 5.6"; } -diag("Stopping/reconfiguring/restarting sandboxes 12345, 12346 and 12347"); - -diag(`$trunk/sandbox/test-env stop >/dev/null`); -diag(`GTID=1 $trunk/sandbox/test-env start >/dev/null`); +diag(`SAKILA=0 GTID=1 $trunk/sandbox/test-env restart`); my $dp = new DSNParser(opts=>$dsn_opts); my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); -my $master_dbh = $sb->get_dbh_for('master'); -my $slave_dbh = $sb->get_dbh_for('slave1'); -my $slave2_dbh = $sb->get_dbh_for('slave2'); +my $master_dbh = $sb->get_dbh_for('master'); +my $slave1_dbh = $sb->get_dbh_for('slave1'); +my $slave2_dbh = $sb->get_dbh_for('slave2'); if ( !$master_dbh ) { plan skip_all => 'Cannot connect to sandbox master'; } -elsif ( !$slave_dbh ) { +elsif ( !$slave1_dbh ) { plan skip_all => 'Cannot connect to sandbox slave1'; } elsif ( !$slave2_dbh ) { plan skip_all => 'Cannot connect to sandbox slave2'; } +my $slave1_dsn = $sb->dsn_for("slave1"); +my $slave2_dsn = $sb->dsn_for("slave2"); + +my $pid_file = "/tmp/pt-slave-restart-test-$PID.pid"; +my $log_file = "/tmp/pt-slave-restart-test-$PID.log"; +my $cmd = "$trunk/bin/pt-slave-restart --daemonize --run-time 5 --max-sleep .25 --pid $pid_file --log $log_file"; + +sub start { + my ( $extra ) = @_; + stop() or return; + system "$cmd $extra"; + PerconaTest::wait_for_files($pid_file); +} + +sub stop() { + return 1 if !is_running(); + diag(`$trunk/bin/pt-slave-restart --stop -q >/dev/null 2>&1 &`); + wait_until(sub { !-f $pid_file }, 0.3, 2); + diag(`rm -f /tmp/pt-slave-restart-sentinel`); + return is_running() ? 0 : 1; +} + +sub is_running { + chomp(my $running = `ps -eaf | grep -v grep | grep '$cmd'`); + if (!-f $pid_file && !$running) { + return 0; + } elsif (-f $pid_file && !$running) { + diag(`rm -f $pid_file`); + return 0; + } + return 1; +} + +sub wait_repl_broke { + my $dbh = shift; + return wait_until( + sub { + my $row = $dbh->selectrow_hashref('show slave status'); + return $row->{last_sql_errno}; + } + ); +} + +sub wait_repl_ok { + my $dbh = shift; + wait_until( + sub { + my $row = $dbh->selectrow_hashref('show slave status'); + return $row->{last_sql_errno} == 0; + }, + 0.30, + 5, + ); +} + # ############################################################################# -# basic test to see if restart works +# Basic test to see if restart works with GTID. # ############################################################################# + $master_dbh->do('DROP DATABASE IF EXISTS test'); $master_dbh->do('CREATE DATABASE test'); $master_dbh->do('CREATE TABLE test.t (a INT)'); $sb->wait_for_slaves; # Bust replication -$slave_dbh->do('DROP TABLE test.t'); +$slave1_dbh->do('DROP TABLE test.t'); $master_dbh->do('INSERT INTO test.t SELECT 1'); -wait_until( - sub { - my $row = $slave_dbh->selectrow_hashref('show slave status'); - return $row->{last_sql_errno}; - } -); +wait_repl_broke($slave1_dbh) or die "Failed to break replication"; -my $r = $slave_dbh->selectrow_hashref('show slave status'); +my $r = $slave1_dbh->selectrow_hashref('show slave status'); like($r->{last_error}, qr/Table 'test.t' doesn't exist'/, 'slave: Replication broke'); -# Start an instance -diag(`$trunk/bin/pt-slave-restart --max-sleep .25 -h 127.0.0.1 -P 12346 -u msandbox -p msandbox --daemonize --pid /tmp/pt-slave-restart.pid --log /tmp/pt-slave-restart.log`); -sleep 1; +# Start pt-slave-restart and wait up to 5s for it to fix replication +# (it should take < 1s but tests can be really slow sometimes). +start("$slave1_dsn") or die "Failed to start pt-slave-restart"; +wait_repl_ok($slave1_dbh); -$r = $slave_dbh->selectrow_hashref('show slave status'); -like($r->{last_errno}, qr/^0$/, 'slave: event is not skipped successfully'); +# Check if replication is fixed. +$r = $slave1_dbh->selectrow_hashref('show slave status'); +like( + $r->{last_errno}, + qr/^0$/, + 'Event is skipped', +) or BAIL_OUT("Replication is broken"); - -diag(`$trunk/bin/pt-slave-restart --stop -q`); -sleep 1; -my $output = `ps -eaf | grep pt-slave-restart | grep -v grep`; -unlike($output, qr/pt-slave-restart --max/, 'slave: stopped pt-slave-restart successfully'); -diag(`rm -f /tmp/pt-slave-re*`); +# Stop pt-slave-restart. +stop() or die "Failed to stop pt-slave-restart"; # ############################################################################# -# test the slave of the master +# Test the slave of the master. # ############################################################################# + $master_dbh->do('DROP DATABASE IF EXISTS test'); $master_dbh->do('CREATE DATABASE test'); $master_dbh->do('CREATE TABLE test.t (a INT)'); @@ -87,12 +137,7 @@ $sb->wait_for_slaves; # Bust replication $slave2_dbh->do('DROP TABLE test.t'); $master_dbh->do('INSERT INTO test.t SELECT 1'); -wait_until( - sub { - my $row = $slave2_dbh->selectrow_hashref('show slave status'); - return $row->{last_sql_errno}; - } -); +wait_repl_broke($slave2_dbh) or die "Failed to break replication"; # fetch the master uuid, which is the machine we need to skip an event from $r = $master_dbh->selectrow_hashref('select @@GLOBAL.server_uuid as uuid'); @@ -102,22 +147,22 @@ $r = $slave2_dbh->selectrow_hashref('show slave status'); like($r->{last_error}, qr/Table 'test.t' doesn't exist'/, 'slaveofslave: Replication broke'); # Start an instance -diag(`$trunk/bin/pt-slave-restart --skip-gtid-uuid=$uuid --max-sleep .25 -h 127.0.0.1 -P 12347 -u msandbox -p msandbox --daemonize --pid /tmp/pt-slave-restart.pid --log /tmp/pt-slave-restart.log`); -sleep 1; +start("--master-uuid=$uuid $slave2_dsn") or die; +wait_repl_ok($slave2_dbh); $r = $slave2_dbh->selectrow_hashref('show slave status'); -like($r->{last_errno}, qr/^0$/, 'slaveofslave: event is not skipped successfully'); +like( + $r->{last_errno}, + qr/^0$/, + 'Skips event from master on slave2' +) or BAIL_OUT("Replication is broken"); - -diag(`$trunk/bin/pt-slave-restart --stop -q`); -sleep 1; -$output = `ps -eaf | grep pt-slave-restart | grep -v grep`; -unlike($output, qr/pt-slave-restart --max/, 'slaveofslave: stopped pt-slave-restart successfully'); -diag(`rm -f /tmp/pt-slave-re*`); +stop() or die "Failed to stop pt-slave-restart"; # ############################################################################# -# test skipping 2 events in a row. +# Test skipping 2 events in a row. # ############################################################################# + $master_dbh->do('DROP DATABASE IF EXISTS test'); $master_dbh->do('CREATE DATABASE test'); $master_dbh->do('CREATE TABLE test.t (a INT)'); @@ -127,12 +172,7 @@ $sb->wait_for_slaves; $slave2_dbh->do('DROP TABLE test.t'); $master_dbh->do('INSERT INTO test.t SELECT 1'); $master_dbh->do('INSERT INTO test.t SELECT 1'); -wait_until( - sub { - my $row = $slave2_dbh->selectrow_hashref('show slave status'); - return $row->{last_sql_errno}; - } -); +wait_repl_broke($slave2_dbh) or die "Failed to break replication"; # fetch the master uuid, which is the machine we need to skip an event from $r = $master_dbh->selectrow_hashref('select @@GLOBAL.server_uuid as uuid'); @@ -142,25 +182,22 @@ $r = $slave2_dbh->selectrow_hashref('show slave status'); like($r->{last_error}, qr/Table 'test.t' doesn't exist'/, 'slaveofslaveskip2: Replication broke'); # Start an instance -diag(`$trunk/bin/pt-slave-restart --skip-count=2 --skip-gtid-uuid=$uuid --max-sleep .25 -h 127.0.0.1 -P 12347 -u msandbox -p msandbox --daemonize --pid /tmp/pt-slave-restart.pid --log /tmp/pt-slave-restart.log`); -sleep 1; +start("--skip-count=2 --master-uuid=$uuid $slave2_dsn") or die; +wait_repl_ok($slave2_dbh); $r = $slave2_dbh->selectrow_hashref('show slave status'); -like($r->{last_errno}, qr/^0$/, 'slaveofslaveskip2: event is not skipped successfully'); +like( + $r->{last_errno}, + qr/^0$/, + 'Skips multiple events' +) or BAIL_OUT("Replication is broken"); - -diag(`$trunk/bin/pt-slave-restart --stop -q`); -sleep 1; -$output = `ps -eaf | grep pt-slave-restart | grep -v grep`; -unlike($output, qr/pt-slave-restart --max/, 'slaveofslaveskip2: stopped pt-slave-restart successfully'); -diag(`rm -f /tmp/pt-slave-re*`); +stop() or die "Failed to stop pt-slave-restart"; # ############################################################################# # Done. # ############################################################################# -diag(`rm -f /tmp/pt-slave-re*`); -diag(`$trunk/sandbox/test-env stop >/dev/null`); -diag(`$trunk/sandbox/test-env start >/dev/null`); - +diag(`rm -f $pid_file $log_file >/dev/null`); +diag(`$trunk/sandbox/test-env restart`); ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); done_testing; diff --git a/util/checksum-test-dataset b/util/checksum-test-dataset index b6ef25ae..ea738727 100755 --- a/util/checksum-test-dataset +++ b/util/checksum-test-dataset @@ -61,6 +61,7 @@ my $sql = "CHECKSUM TABLES " . join(", ", map { "sakila.$_" } @tables_in_sakila); my @checksums = @{$dbh->selectall_arrayref($sql, {Slice => {} })}; foreach my $c ( @checksums ) { + next unless $c->{Checksum}; $dbh->do("INSERT INTO percona_test.checksums(db_tbl, checksum) VALUES('$c->{Table}', $c->{Checksum})"); }