From e9e8285b5446407208d5ee42d6b43d0576e8a3d2 Mon Sep 17 00:00:00 2001 From: Baron Schwartz Date: Mon, 4 Jun 2012 11:10:41 -0400 Subject: [PATCH] pt-slave-delay doesn't reconnect reliably, this is a start --- bin/pt-slave-delay | 9 +++++++++ t/pt-slave-delay/auto_restart.t | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/bin/pt-slave-delay b/bin/pt-slave-delay index ddbb946d..99947767 100755 --- a/bin/pt-slave-delay +++ b/bin/pt-slave-delay @@ -2117,6 +2117,7 @@ sub main { $now = time(); + # TODO: this is a race condition. See 0xdeadbeef below. if ( !$slave_dbh || !$slave_dbh->ping() ) { # Try 10 times, for about 2 minutes, to reconnect to the slave, # increasing wait time from 3 to 15 seconds. @@ -2151,6 +2152,8 @@ sub main { ); last unless $oktorun; # might have gotten interrupt while waiting } + # 0xdeadbeef (see above): just because we reconnected in the above Retry + # does not mean we have a connection here! $status = $slave_dbh->selectrow_hashref("SHOW SLAVE STATUS"); if ( !$status || ! %$status ) { die "No SLAVE STATUS found"; @@ -2456,6 +2459,12 @@ C with Control-C. Fork to the background and detach from the shell. POSIX operating systems only. +=item --database + +short form: -D; type: string + +The database to use for the connection. + =item --defaults-file short form: -F; type: string diff --git a/t/pt-slave-delay/auto_restart.t b/t/pt-slave-delay/auto_restart.t index 7dc81d63..3e26b22f 100644 --- a/t/pt-slave-delay/auto_restart.t +++ b/t/pt-slave-delay/auto_restart.t @@ -12,6 +12,7 @@ use strict; use warnings FATAL => 'all'; use English qw( -no_match_vars ); use Test::More; +use Data::Dumper; use PerconaTest; use Sandbox; @@ -28,7 +29,7 @@ elsif ( !@{$dbh->selectcol_arrayref('SHOW DATABASES LIKE "sakila"')} ) { plan skip_all => 'sakila db not loaded'; } else { - plan tests => 1; + plan tests => 2; } my $cnf = '/tmp/12346/my.sandbox.cnf'; @@ -47,7 +48,7 @@ my $output; my $pid = fork(); if ( $pid ) { # parent - $output = `$cmd --interval 1 --run-time 4 2>&1`; + $output = `$cmd --interval 1 --run-time 8 2>&1`; like( $output, qr/Lost connection.+?Reconnected to slave.+Setting slave to run/ms, @@ -58,13 +59,39 @@ else { # child sleep 1; diag(`/tmp/12346/stop >/dev/null`); - sleep 1; diag(`/tmp/12346/start >/dev/null`); diag(`/tmp/12346/use -e "set global read_only=1"`); exit; } +# Reap the child. +waitpid ($pid, 0); +# Do it all over again, but this time KILL instead of restart. +$pid = fork(); +if ( $pid ) { + # parent. Note the --database mysql + $output = `$cmd --database mysql --interval 1 --run-time 8 2>&1`; + like( + $output, + qr/Lost connection.+?Reconnected to slave.+Setting slave to run/ms, + "Reconnect to slave" + ); +} +else { + # child. Note that we'll kill the parent's 'mysql' connection + sleep 1; + my $c_dbh = $sb->get_dbh_for('slave1'); + my @cxn = @{$c_dbh->selectall_arrayref('show processlist', {Slice => {}})}; + foreach my $c ( @cxn ) { + # The parent's connection: + # {command => 'Sleep',db => 'mysql',host => 'localhost',id => '5',info => undef,state => '',time => '1',user => 'msandbox'} + if ( ($c->{db} || '') eq 'mysql' && ($c->{user} || '') eq 'msandbox' ) { + $c_dbh->do("KILL $c->{id}"); + } + } + exit; +} # Reap the child. waitpid ($pid, 0);