Merge pt-kill-reconnect-bug-941469.

This commit is contained in:
Daniel Nichter
2012-07-13 10:57:28 -06:00
7 changed files with 402 additions and 33 deletions

View File

@@ -3675,6 +3675,232 @@ sub _d {
# End QueryRewriter package
# ###########################################################################
# ###########################################################################
# Retry package
# This package is a copy without comments from the original. The original
# with comments and its test file can be found in the Bazaar repository at,
# lib/Retry.pm
# t/lib/Retry.t
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################
{
package Retry;
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
sub new {
my ( $class, %args ) = @_;
my $self = {
%args,
};
return bless $self, $class;
}
sub retry {
my ( $self, %args ) = @_;
my @required_args = qw(try fail final_fail);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
};
my ($try, $fail, $final_fail) = @args{@required_args};
my $wait = $args{wait} || sub { sleep 1; };
my $tries = $args{tries} || 3;
my $last_error;
my $tryno = 0;
TRY:
while ( ++$tryno <= $tries ) {
PTDEBUG && _d("Try", $tryno, "of", $tries);
my $result;
eval {
$result = $try->(tryno=>$tryno);
};
if ( $EVAL_ERROR ) {
PTDEBUG && _d("Try code failed:", $EVAL_ERROR);
$last_error = $EVAL_ERROR;
if ( $tryno < $tries ) { # more retries
my $retry = $fail->(tryno=>$tryno, error=>$last_error);
last TRY unless $retry;
PTDEBUG && _d("Calling wait code");
$wait->(tryno=>$tryno);
}
}
else {
PTDEBUG && _d("Try code succeeded");
return $result;
}
}
PTDEBUG && _d('Try code did not succeed');
return $final_fail->(error=>$last_error);
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;
}
# ###########################################################################
# End Retry package
# ###########################################################################
# ###########################################################################
# Cxn package
# This package is a copy without comments from the original. The original
# with comments and its test file can be found in the Bazaar repository at,
# lib/Cxn.pm
# t/lib/Cxn.t
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################
{
package Cxn;
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Scalar::Util qw(blessed);
use constant {
PTDEBUG => $ENV{PTDEBUG} || 0,
PERCONA_TOOLKIT_TEST_USE_DSN_NAMES => $ENV{PERCONA_TOOLKIT_TEST_USE_DSN_NAMES} || 0,
};
sub new {
my ( $class, %args ) = @_;
my @required_args = qw(DSNParser OptionParser);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
};
my ($dp, $o) = @args{@required_args};
my $dsn_defaults = $dp->parse_options($o);
my $prev_dsn = $args{prev_dsn};
my $dsn = $args{dsn};
if ( !$dsn ) {
$args{dsn_string} ||= 'h=' . ($dsn_defaults->{h} || 'localhost');
$dsn = $dp->parse(
$args{dsn_string}, $prev_dsn, $dsn_defaults);
}
elsif ( $prev_dsn ) {
$dsn = $dp->copy($prev_dsn, $dsn);
}
my $self = {
dsn => $dsn,
dbh => $args{dbh},
dsn_name => $dp->as_string($dsn, [qw(h P S)]),
hostname => '',
set => $args{set},
NAME_lc => $args{NAME_lc},
dbh_set => 0,
OptionParser => $o,
DSNParser => $dp,
};
return bless $self, $class;
}
sub connect {
my ( $self ) = @_;
my $dsn = $self->{dsn};
my $dp = $self->{DSNParser};
my $o = $self->{OptionParser};
my $dbh = $self->{dbh};
if ( !$dbh || !$dbh->ping() ) {
if ( $o->get('ask-pass') && !$self->{asked_for_pass} ) {
$dsn->{p} = OptionParser::prompt_noecho("Enter MySQL password: ");
$self->{asked_for_pass} = 1;
}
$dbh = $dp->get_dbh($dp->get_cxn_params($dsn), { AutoCommit => 1 });
}
PTDEBUG && _d($dbh, 'Connected dbh to', $self->{name});
return $self->set_dbh($dbh);
}
sub set_dbh {
my ($self, $dbh) = @_;
if ( $self->{dbh} && $self->{dbh} == $dbh && $self->{dbh_set} ) {
PTDEBUG && _d($dbh, 'Already set dbh');
return $dbh;
}
PTDEBUG && _d($dbh, 'Setting dbh');
if ( !exists $self->{NAME_lc}
|| (defined $self->{NAME_lc} && $self->{NAME_lc}) ) {
$dbh->{FetchHashKeyName} = 'NAME_lc';
}
my $sql = 'SELECT @@hostname, @@server_id';
PTDEBUG && _d($dbh, $sql);
my ($hostname, $server_id) = $dbh->selectrow_array($sql);
PTDEBUG && _d($dbh, 'hostname:', $hostname, $server_id);
if ( $hostname ) {
$self->{hostname} = $hostname;
}
if ( my $set = $self->{set}) {
$set->($dbh);
}
$self->{dbh} = $dbh;
$self->{dbh_set} = 1;
return $dbh;
}
sub dbh {
my ($self) = @_;
return $self->{dbh};
}
sub dsn {
my ($self) = @_;
return $self->{dsn};
}
sub name {
my ($self) = @_;
return $self->{dsn_name} if PERCONA_TOOLKIT_TEST_USE_DSN_NAMES;
return $self->{hostname} || $self->{dsn_name} || 'unknown host';
}
sub DESTROY {
my ($self) = @_;
if ( $self->{dbh}
&& blessed($self->{dbh})
&& $self->{dbh}->can("disconnect") ) {
PTDEBUG && _d('Disconnecting dbh', $self->{dbh}, $self->{name});
$self->{dbh}->disconnect();
}
return;
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;
}
# ###########################################################################
# End Cxn package
# ###########################################################################
# ###########################################################################
# This is a combination of modules and programs in one -- a runnable module.
# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last
@@ -3788,10 +4014,10 @@ sub main {
# ########################################################################
# Make input sub that will either get processlist from MySQL or a file.
# ########################################################################
my $dsn;
my $dbh;
my $kill_sth;
my $get_proclist;
my $cxn;
my $dbh; # $cxn->dbh
my $get_proclist; # callback to SHOW PROCESSLIST
my $kill; # callback to KILL
my $files;
if ( $files = $o->get('test-matching') ) {
PTDEBUG && _d('Getting processlist from files:', @$files);
@@ -3830,14 +4056,83 @@ sub main {
}
else {
PTDEBUG && _d('Getting processlist from MySQL');
$dsn = $dp->parse_options($o);
$dbh = get_cxn($dp, $dsn, 1);
$kill_sth = $o->get('kill-query') ? $dbh->prepare('KILL QUERY ?')
: $dbh->prepare('KILL ?');
$cxn = Cxn->new(
dsn_string => shift @ARGV,
NAME_lc => 0,
DSNParser => $dp,
OptionParser => $o,
);
$dbh = $cxn->connect();
# Make the get_proclist and kill callbacks. Use Retry in case
# the connection to MySQL is lost, then the dbh and the sths
# will need to be re-initialized.
my $retry = Retry->new();
my $proc_sth = $dbh->prepare('SHOW FULL PROCESSLIST');
$get_proclist = sub {
$proc_sth->execute();
return $proc_sth->fetchall_arrayref({});
return $retry->retry(
# Retry for an hour: 1,200 tries x 3 seconds = 3600s/1hr
tries => 1200,
wait => sub { sleep 3; },
try => sub {
$proc_sth->execute();
return $proc_sth->fetchall_arrayref({});
},
fail => sub {
my (%args) = @_;
my $error = $args{error};
# The 1st pattern means that MySQL itself died or was stopped.
# The 2nd pattern means that our cxn was killed (KILL <id>).
if ( $error =~ m/MySQL server has gone away/
|| $error =~ m/Lost connection to MySQL server/ ) {
eval {
$dbh = $cxn->connect();
$proc_sth = $dbh->prepare('SHOW FULL PROCESSLIST');
msg('Reconnected to ' . $cxn->name());
};
return 1 unless $EVAL_ERROR; # try again
}
return 0; # call final_fail
},
final_fail => sub {
my (%args) = @_;
die $args{error};
},
);
};
my $kill_sql = $o->get('kill-query') ? 'KILL QUERY ?' : 'KILL ?';
my $kill_sth = $dbh->prepare($kill_sql);
$kill = sub {
my ($id) = @_;
PTDEBUG && _d('Killing process', $id);
return $retry->retry(
tries => 2,
try => sub {
return $kill_sth->execute($id);
},
fail => sub {
my (%args) = @_;
my $error = $args{error};
# The 1st pattern means that MySQL itself died or was stopped.
# The 2nd pattern means that our cxn was killed (KILL <id>).
if ( $error =~ m/MySQL server has gone away/
|| $error =~ m/Lost connection to MySQL server/ ) {
eval {
$dbh = $cxn->connect();
$kill_sth = $dbh->prepare($kill_sql);
msg('Reconnected to ' . $cxn->name());
};
return 1 unless $EVAL_ERROR; # try again
}
return 0; # call final_fail
},
final_fail => sub {
my (%args) = @_;
die $args{error};
},
);
};
}
@@ -3860,7 +4155,7 @@ sub main {
# Start working.
# ########################################################################
msg("$PROGRAM_NAME starting");
msg($dbh ? "Connected to host " . $dp->as_string($dsn)
msg($dbh ? "Connected to host " . $cxn->name()
: "Test matching files @$files");
# Class-based match criteria.
@@ -4044,7 +4339,7 @@ sub main {
. " seconds before kill");
sleep $o->get('wait-before-kill');
}
eval { $kill_sth->execute($query->{Id}); };
eval { $kill->($query->{Id}) };
if ( $EVAL_ERROR ) {
msg("Error killing $query->{Id}: $EVAL_ERROR");
}
@@ -4083,16 +4378,6 @@ sub main {
# Subroutines.
# ############################################################################
sub get_cxn {
my ( $dp, $dsn, $ac ) = @_;
if ( $o->get('ask-pass') ) {
$dsn->{p} = OptionParser::prompt_noecho("Enter password: ");
}
my $dbh = $dp->get_dbh($dp->get_cxn_params($dsn), {AutoCommit => $ac});
$dbh->{InactiveDestroy} = 1; # Because of forking.
return $dbh;
}
# Forks and detaches from parent to execute the given command;
# does not block parent.
sub exec_cmd {
@@ -4177,7 +4462,7 @@ pt-kill - Kill MySQL queries that match certain criteria.
=head1 SYNOPSIS
Usage: pt-kill [OPTIONS]
Usage: pt-kill [OPTIONS] [DSN]
pt-kill kills MySQL connections. pt-kill connects to MySQL and gets queries
from SHOW PROCESSLIST if no FILE is given. Else, it reads queries from one
@@ -4316,6 +4601,9 @@ L<"--kill"> and L<"--kill-query"> are mutually exclusive.
L<"--daemonize"> and L<"--test-matching"> are mutually exclusive.
This tool accepts additional command-line arguments. Refer to the
L<"SYNOPSIS"> and usage information for details.
=over
=item --ask-pass
@@ -4338,6 +4626,12 @@ type: Array
Read this comma-separated list of config files; if specified, this must be the
first option on the command line.
=item --database
short form: -D; type: string
The database to use for the connection.
=item --daemonize
Fork to the background and detach from the shell. POSIX operating systems

View File

@@ -103,6 +103,7 @@ sub new {
dsn_name => $dp->as_string($dsn, [qw(h P S)]),
hostname => '',
set => $args{set},
NAME_lc => $args{NAME_lc},
dbh_set => 0,
OptionParser => $o,
DSNParser => $dp,
@@ -149,7 +150,10 @@ sub set_dbh {
PTDEBUG && _d($dbh, 'Setting dbh');
# Set stuff for this dbh (i.e. initialize it).
$dbh->{FetchHashKeyName} = 'NAME_lc';
if ( !exists $self->{NAME_lc}
|| (defined $self->{NAME_lc} && $self->{NAME_lc}) ) {
$dbh->{FetchHashKeyName} = 'NAME_lc';
}
# Update the cxn's name. Until we connect, the DSN parts
# h and P are used. Once connected, use @@hostname.

View File

@@ -4,6 +4,7 @@ BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
$ENV{PERCONA_TOOLKIT_TEST_USE_DSN_NAMES} = 1;
};
use strict;
@@ -25,7 +26,7 @@ if ( !$master_dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
else {
plan tests => 4;
plan tests => 6;
}
my $output;
@@ -52,7 +53,7 @@ my @times = $output =~ m/\(Query (\d+) sec\)/g;
ok(
@times > 2 && @times < 7,
"There were 2 to 5 captures"
) or print STDERR Dumper($output);
) or diag($output);
# This is to catch a bad bug where there wasn't any sleep time when
# --iterations was 0, and another bug when --run-time was not respected.
@@ -64,8 +65,7 @@ $output = `$cmd --busy-time 1s --print --run-time 11s`;
ok(
@times > 7 && @times < 12,
'Approximately 9 or 10 captures with --iterations 0'
) or print STDERR Dumper($output);
) or diag($output);
# ############################################################################
# --verbose
@@ -80,6 +80,32 @@ like(
'--verbose'
);
# #############################################################################
# Reconnect if cxn lost.
# #############################################################################
$master_dbh->do("CREATE DATABASE IF NOT EXISTS pt_kill_test");
system(qq($trunk/util/kill-mysql-process db=pt_kill_test wait=2 &));
$output = output(
sub { pt_kill::main('-F', $cnf, qw(-D pt_kill_test),
qw(--run-time 4 --interval 1 --print --verbose)) },
stderr => 1,
);
like(
$output,
qr/Reconnected/,
"kill-mysql-process says it reconnected"
);
my $n_checks =()= $output =~ m/Checking processlist/g;
is(
$n_checks,
4,
"pt-kill still checked the processlist 4 times"
) or diag($output);
# #############################################################################
# Done.
# #############################################################################

View File

@@ -47,6 +47,7 @@ diag(`rm $out`);
SKIP: {
skip 'Cannot connect to sandbox master', 2 unless $master_dbh;
$master_dbh->do("CREATE DATABASE IF NOT EXISTS pt_kill_zombie_test");
system "/tmp/12345/use -e 'select sleep(2)' >/dev/null 2>&1 &";
@@ -70,7 +71,7 @@ SKIP: {
diag(`rm $out`);
# Don't make zombies (https://bugs.launchpad.net/percona-toolkit/+bug/919819)
system "/tmp/12345/use -e 'select sleep(2)' >/dev/null 2>&1 &";
$master_dbh->do("USE pt_kill_zombie_test");
my $sentinel = "/tmp/pt-kill-test.$PID.stop";
my $pid_file = "/tmp/pt-kill-test.$PID.pid";
@@ -79,8 +80,8 @@ SKIP: {
diag(`rm $pid_file 2>/dev/null`);
diag(`rm $log_file 2>/dev/null`);
`$cmd --daemonize --match-info 'select sleep' --interval 1 --print --execute-command 'echo zombie > $out' --verbose --pid $pid_file --log $log_file --sentinel $sentinel`;
sleep 1;
`$cmd --daemonize --match-db pt_kill_zombie_test --interval 1 --print --execute-command 'echo zombie > $out' --verbose --pid $pid_file --log $log_file --sentinel $sentinel`;
PerconaTest::wait_for_files($pid_file, $log_file, $out);
$output = `grep Executed $log_file`;
like(
$output,

View File

@@ -41,7 +41,7 @@ my $cnf='/tmp/12345/my.sandbox.cnf';
# Shell out to a sleep(10) query and try to capture the query.
# Backticks don't work here.
system("/tmp/12345/use -h127.1 -P12345 -umsandbox -pmsandbox -e 'select sleep(4)' >/dev/null&");
system("/tmp/12345/use -h127.1 -P12345 -umsandbox -pmsandbox -e 'select sleep(4)' >/dev/null 2>&1 &");
sleep 0.5;
my $rows = $dbh->selectall_hashref('show processlist', 'id');
my $pid;

View File

@@ -48,7 +48,7 @@ SKIP: {
'Log file created'
);
sleep 3; # --run-time=2; if we sleep 2 we'll get intermittent failures.
wait_until(sub { return !-f '/tmp/pt-kill.pid' });
ok(
!-f '/tmp/pt-kill.pid',
'PID file removed'

44
util/kill-mysql-process Executable file
View File

@@ -0,0 +1,44 @@
#!/usr/bin/env perl
# This script helps test that tools reconnect to MySQL. Its meant to be ran
# in the background like system(qq($trunk/util/kill-mysql-process DB)) where
# DB is the name of special "tracer" database used to isolate the test in
# the process list. So, do something like CREATE DATABASE pt_kill_test, then
# run the test with D=pt_kill_test (presuming pt_kill_test is unique to
# the test). This script will then kill any and all processes that are using
# the pt_kill_test db.
#
# Exits 0 if the tracer db is observed and procs are killed, else exits 1.
use strict;
use warnings FATAL => 'all';
use Time::HiRes qw(sleep time);
if ( !@ARGV || @ARGV < 1 || @ARGV > 3 ) {
print STDERR "Usage: kill-mysql-process OPTION=VALUE\n";
print STDERR "Options: db, wait, runtime, interval\n";
exit 1;
}
my %opt = map { my ($op, $val) = split '=', $_; $op => $val; } @ARGV;
$opt{db} ||= 'tracer_db';
$opt{runtime} ||= 5.0;
$opt{interval} ||= 0.2;
sleep $opt{wait} if $opt{wait};
my $t_start = time;
while ( time - $t_start < $opt{runtime} ) {
my $procs = `/tmp/12345/use -ss -e "show processlist" | grep $opt{db} | cut -f1`;
if ( $procs && $procs =~ /\d/ ) {
foreach my $proc ( split "\n", $procs ) {
chomp $proc;
`/tmp/12345/use -e "KILL $proc"`;
}
exit 0;
}
sleep $opt{interval};
}
exit 1;