mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-17 17:27:57 +00:00
Add another test for reconnect, and fix failure/race condition in pt-slave-delay
This commit is contained in:
@@ -2117,44 +2117,43 @@ sub main {
|
|||||||
|
|
||||||
$now = time();
|
$now = time();
|
||||||
|
|
||||||
# TODO: this is a race condition. See 0xdeadbeef below.
|
# If the database connection is gone, we must live on!
|
||||||
if ( !$slave_dbh || !$slave_dbh->ping() ) {
|
# Try 10 times, for about 2 minutes, to reconnect to the slave,
|
||||||
# Try 10 times, for about 2 minutes, to reconnect to the slave,
|
# increasing wait time from 3 to 15 seconds.
|
||||||
# increasing wait time from 3 to 15 seconds.
|
$o->set('ask-pass', 0); # don't ask again
|
||||||
$o->set('ask-pass', 0); # don't ask again
|
my $tries = 10;
|
||||||
my $tries = 10;
|
my $rt = new Retry();
|
||||||
my $rt = new Retry();
|
$rt->retry(
|
||||||
$rt->retry(
|
tries => $tries,
|
||||||
tries => $tries,
|
retry_on_die => 1,
|
||||||
retry_on_die => 1,
|
wait => sub {
|
||||||
wait => sub {
|
my ( %args ) = @_;
|
||||||
my ( %args ) = @_;
|
return unless $oktorun;
|
||||||
return unless $oktorun;
|
my $t = min($args{tryno} * 3, 15);
|
||||||
my $t = min($args{tryno} * 3, 15);
|
info("Lost connection, sleeping $t seconds "
|
||||||
info("Could not reconnect to slave, sleeping $t seconds "
|
. "and trying " . ($tries-$args{tryno}) . " more times")
|
||||||
. "and trying " . ($tries-$args{tryno}) . " more times")
|
if $tries - $args{tryno};
|
||||||
if $tries - $args{tryno};
|
sleep $t;
|
||||||
sleep $t;
|
info("Trying to reconnect");
|
||||||
},
|
eval {
|
||||||
try => sub {
|
|
||||||
return unless $oktorun;
|
|
||||||
info("Lost connection to slave, trying to reconnect");
|
|
||||||
$slave_dbh = get_dbh($dp, $slave_dsn);
|
$slave_dbh = get_dbh($dp, $slave_dsn);
|
||||||
return $slave_dbh;
|
};
|
||||||
},
|
},
|
||||||
on_success => sub {
|
try => sub {
|
||||||
info("Reconnected to slave");
|
return unless $oktorun;
|
||||||
},
|
$status = $slave_dbh->selectrow_hashref("SHOW SLAVE STATUS");
|
||||||
on_failure => sub {
|
return $status;
|
||||||
return unless $oktorun;
|
},
|
||||||
die "Failed to reconnect to slave";
|
on_success => sub {
|
||||||
},
|
info("Reconnected to slave");
|
||||||
);
|
},
|
||||||
last unless $oktorun; # might have gotten interrupt while waiting
|
on_failure => sub {
|
||||||
}
|
return unless $oktorun;
|
||||||
# 0xdeadbeef (see above): just because we reconnected in the above Retry
|
die "Failed to reconnect to slave";
|
||||||
# does not mean we have a connection here!
|
},
|
||||||
$status = $slave_dbh->selectrow_hashref("SHOW SLAVE STATUS");
|
);
|
||||||
|
last unless $oktorun; # might have gotten interrupt while waiting
|
||||||
|
|
||||||
if ( !$status || ! %$status ) {
|
if ( !$status || ! %$status ) {
|
||||||
die "No SLAVE STATUS found";
|
die "No SLAVE STATUS found";
|
||||||
}
|
}
|
||||||
|
@@ -45,10 +45,11 @@ my $output;
|
|||||||
# the child should restart the slave, and the tool should report
|
# the child should restart the slave, and the tool should report
|
||||||
# that it reconnected and did some work, ending with "Setting slave
|
# that it reconnected and did some work, ending with "Setting slave
|
||||||
# to run normally".
|
# to run normally".
|
||||||
|
diag('Running...');
|
||||||
my $pid = fork();
|
my $pid = fork();
|
||||||
if ( $pid ) {
|
if ( $pid ) {
|
||||||
# parent
|
# parent
|
||||||
$output = `$cmd --interval 1 --run-time 8 2>&1`;
|
$output = `$cmd --interval 1 --run-time 4 2>&1`;
|
||||||
like(
|
like(
|
||||||
$output,
|
$output,
|
||||||
qr/Lost connection.+?Reconnected to slave.+Setting slave to run/ms,
|
qr/Lost connection.+?Reconnected to slave.+Setting slave to run/ms,
|
||||||
@@ -71,11 +72,11 @@ waitpid ($pid, 0);
|
|||||||
$pid = fork();
|
$pid = fork();
|
||||||
if ( $pid ) {
|
if ( $pid ) {
|
||||||
# parent. Note the --database mysql
|
# parent. Note the --database mysql
|
||||||
$output = `$cmd --database mysql --interval 1 --run-time 8 2>&1`;
|
$output = `$cmd --database mysql --interval 1 --run-time 4 2>&1`;
|
||||||
like(
|
like(
|
||||||
$output,
|
$output,
|
||||||
qr/Lost connection.+?Reconnected to slave.+Setting slave to run/ms,
|
qr/Lost connection.+?Reconnected to slave.+Setting slave to run/ms,
|
||||||
"Reconnect to slave"
|
"Reconnect to slave when KILL'ed"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
Reference in New Issue
Block a user