Merge pt-kill-reconnect-bug-941469.

This commit is contained in:
Daniel Nichter
2012-07-13 10:57:28 -06:00
7 changed files with 402 additions and 33 deletions

View File

@@ -3675,6 +3675,232 @@ sub _d {
# End QueryRewriter package # End QueryRewriter package
# ########################################################################### # ###########################################################################
# ###########################################################################
# Retry package
# This package is a copy without comments from the original. The original
# with comments and its test file can be found in the Bazaar repository at,
# lib/Retry.pm
# t/lib/Retry.t
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################
{
package Retry;
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
sub new {
my ( $class, %args ) = @_;
my $self = {
%args,
};
return bless $self, $class;
}
sub retry {
my ( $self, %args ) = @_;
my @required_args = qw(try fail final_fail);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
};
my ($try, $fail, $final_fail) = @args{@required_args};
my $wait = $args{wait} || sub { sleep 1; };
my $tries = $args{tries} || 3;
my $last_error;
my $tryno = 0;
TRY:
while ( ++$tryno <= $tries ) {
PTDEBUG && _d("Try", $tryno, "of", $tries);
my $result;
eval {
$result = $try->(tryno=>$tryno);
};
if ( $EVAL_ERROR ) {
PTDEBUG && _d("Try code failed:", $EVAL_ERROR);
$last_error = $EVAL_ERROR;
if ( $tryno < $tries ) { # more retries
my $retry = $fail->(tryno=>$tryno, error=>$last_error);
last TRY unless $retry;
PTDEBUG && _d("Calling wait code");
$wait->(tryno=>$tryno);
}
}
else {
PTDEBUG && _d("Try code succeeded");
return $result;
}
}
PTDEBUG && _d('Try code did not succeed');
return $final_fail->(error=>$last_error);
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;
}
# ###########################################################################
# End Retry package
# ###########################################################################
# ###########################################################################
# Cxn package
# This package is a copy without comments from the original. The original
# with comments and its test file can be found in the Bazaar repository at,
# lib/Cxn.pm
# t/lib/Cxn.t
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################
{
package Cxn;
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Scalar::Util qw(blessed);
use constant {
PTDEBUG => $ENV{PTDEBUG} || 0,
PERCONA_TOOLKIT_TEST_USE_DSN_NAMES => $ENV{PERCONA_TOOLKIT_TEST_USE_DSN_NAMES} || 0,
};
sub new {
my ( $class, %args ) = @_;
my @required_args = qw(DSNParser OptionParser);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
};
my ($dp, $o) = @args{@required_args};
my $dsn_defaults = $dp->parse_options($o);
my $prev_dsn = $args{prev_dsn};
my $dsn = $args{dsn};
if ( !$dsn ) {
$args{dsn_string} ||= 'h=' . ($dsn_defaults->{h} || 'localhost');
$dsn = $dp->parse(
$args{dsn_string}, $prev_dsn, $dsn_defaults);
}
elsif ( $prev_dsn ) {
$dsn = $dp->copy($prev_dsn, $dsn);
}
my $self = {
dsn => $dsn,
dbh => $args{dbh},
dsn_name => $dp->as_string($dsn, [qw(h P S)]),
hostname => '',
set => $args{set},
NAME_lc => $args{NAME_lc},
dbh_set => 0,
OptionParser => $o,
DSNParser => $dp,
};
return bless $self, $class;
}
sub connect {
my ( $self ) = @_;
my $dsn = $self->{dsn};
my $dp = $self->{DSNParser};
my $o = $self->{OptionParser};
my $dbh = $self->{dbh};
if ( !$dbh || !$dbh->ping() ) {
if ( $o->get('ask-pass') && !$self->{asked_for_pass} ) {
$dsn->{p} = OptionParser::prompt_noecho("Enter MySQL password: ");
$self->{asked_for_pass} = 1;
}
$dbh = $dp->get_dbh($dp->get_cxn_params($dsn), { AutoCommit => 1 });
}
PTDEBUG && _d($dbh, 'Connected dbh to', $self->{name});
return $self->set_dbh($dbh);
}
sub set_dbh {
my ($self, $dbh) = @_;
if ( $self->{dbh} && $self->{dbh} == $dbh && $self->{dbh_set} ) {
PTDEBUG && _d($dbh, 'Already set dbh');
return $dbh;
}
PTDEBUG && _d($dbh, 'Setting dbh');
if ( !exists $self->{NAME_lc}
|| (defined $self->{NAME_lc} && $self->{NAME_lc}) ) {
$dbh->{FetchHashKeyName} = 'NAME_lc';
}
my $sql = 'SELECT @@hostname, @@server_id';
PTDEBUG && _d($dbh, $sql);
my ($hostname, $server_id) = $dbh->selectrow_array($sql);
PTDEBUG && _d($dbh, 'hostname:', $hostname, $server_id);
if ( $hostname ) {
$self->{hostname} = $hostname;
}
if ( my $set = $self->{set}) {
$set->($dbh);
}
$self->{dbh} = $dbh;
$self->{dbh_set} = 1;
return $dbh;
}
sub dbh {
my ($self) = @_;
return $self->{dbh};
}
sub dsn {
my ($self) = @_;
return $self->{dsn};
}
sub name {
my ($self) = @_;
return $self->{dsn_name} if PERCONA_TOOLKIT_TEST_USE_DSN_NAMES;
return $self->{hostname} || $self->{dsn_name} || 'unknown host';
}
sub DESTROY {
my ($self) = @_;
if ( $self->{dbh}
&& blessed($self->{dbh})
&& $self->{dbh}->can("disconnect") ) {
PTDEBUG && _d('Disconnecting dbh', $self->{dbh}, $self->{name});
$self->{dbh}->disconnect();
}
return;
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;
}
# ###########################################################################
# End Cxn package
# ###########################################################################
# ########################################################################### # ###########################################################################
# This is a combination of modules and programs in one -- a runnable module. # This is a combination of modules and programs in one -- a runnable module.
# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last # http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last
@@ -3788,10 +4014,10 @@ sub main {
# ######################################################################## # ########################################################################
# Make input sub that will either get processlist from MySQL or a file. # Make input sub that will either get processlist from MySQL or a file.
# ######################################################################## # ########################################################################
my $dsn; my $cxn;
my $dbh; my $dbh; # $cxn->dbh
my $kill_sth; my $get_proclist; # callback to SHOW PROCESSLIST
my $get_proclist; my $kill; # callback to KILL
my $files; my $files;
if ( $files = $o->get('test-matching') ) { if ( $files = $o->get('test-matching') ) {
PTDEBUG && _d('Getting processlist from files:', @$files); PTDEBUG && _d('Getting processlist from files:', @$files);
@@ -3830,14 +4056,83 @@ sub main {
} }
else { else {
PTDEBUG && _d('Getting processlist from MySQL'); PTDEBUG && _d('Getting processlist from MySQL');
$dsn = $dp->parse_options($o); $cxn = Cxn->new(
$dbh = get_cxn($dp, $dsn, 1); dsn_string => shift @ARGV,
$kill_sth = $o->get('kill-query') ? $dbh->prepare('KILL QUERY ?') NAME_lc => 0,
: $dbh->prepare('KILL ?'); DSNParser => $dp,
OptionParser => $o,
);
$dbh = $cxn->connect();
# Make the get_proclist and kill callbacks. Use Retry in case
# the connection to MySQL is lost, then the dbh and the sths
# will need to be re-initialized.
my $retry = Retry->new();
my $proc_sth = $dbh->prepare('SHOW FULL PROCESSLIST'); my $proc_sth = $dbh->prepare('SHOW FULL PROCESSLIST');
$get_proclist = sub { $get_proclist = sub {
$proc_sth->execute(); return $retry->retry(
return $proc_sth->fetchall_arrayref({}); # Retry for an hour: 1,200 tries x 3 seconds = 3600s/1hr
tries => 1200,
wait => sub { sleep 3; },
try => sub {
$proc_sth->execute();
return $proc_sth->fetchall_arrayref({});
},
fail => sub {
my (%args) = @_;
my $error = $args{error};
# The 1st pattern means that MySQL itself died or was stopped.
# The 2nd pattern means that our cxn was killed (KILL <id>).
if ( $error =~ m/MySQL server has gone away/
|| $error =~ m/Lost connection to MySQL server/ ) {
eval {
$dbh = $cxn->connect();
$proc_sth = $dbh->prepare('SHOW FULL PROCESSLIST');
msg('Reconnected to ' . $cxn->name());
};
return 1 unless $EVAL_ERROR; # try again
}
return 0; # call final_fail
},
final_fail => sub {
my (%args) = @_;
die $args{error};
},
);
};
my $kill_sql = $o->get('kill-query') ? 'KILL QUERY ?' : 'KILL ?';
my $kill_sth = $dbh->prepare($kill_sql);
$kill = sub {
my ($id) = @_;
PTDEBUG && _d('Killing process', $id);
return $retry->retry(
tries => 2,
try => sub {
return $kill_sth->execute($id);
},
fail => sub {
my (%args) = @_;
my $error = $args{error};
# The 1st pattern means that MySQL itself died or was stopped.
# The 2nd pattern means that our cxn was killed (KILL <id>).
if ( $error =~ m/MySQL server has gone away/
|| $error =~ m/Lost connection to MySQL server/ ) {
eval {
$dbh = $cxn->connect();
$kill_sth = $dbh->prepare($kill_sql);
msg('Reconnected to ' . $cxn->name());
};
return 1 unless $EVAL_ERROR; # try again
}
return 0; # call final_fail
},
final_fail => sub {
my (%args) = @_;
die $args{error};
},
);
}; };
} }
@@ -3860,7 +4155,7 @@ sub main {
# Start working. # Start working.
# ######################################################################## # ########################################################################
msg("$PROGRAM_NAME starting"); msg("$PROGRAM_NAME starting");
msg($dbh ? "Connected to host " . $dp->as_string($dsn) msg($dbh ? "Connected to host " . $cxn->name()
: "Test matching files @$files"); : "Test matching files @$files");
# Class-based match criteria. # Class-based match criteria.
@@ -4044,7 +4339,7 @@ sub main {
. " seconds before kill"); . " seconds before kill");
sleep $o->get('wait-before-kill'); sleep $o->get('wait-before-kill');
} }
eval { $kill_sth->execute($query->{Id}); }; eval { $kill->($query->{Id}) };
if ( $EVAL_ERROR ) { if ( $EVAL_ERROR ) {
msg("Error killing $query->{Id}: $EVAL_ERROR"); msg("Error killing $query->{Id}: $EVAL_ERROR");
} }
@@ -4083,16 +4378,6 @@ sub main {
# Subroutines. # Subroutines.
# ############################################################################ # ############################################################################
sub get_cxn {
my ( $dp, $dsn, $ac ) = @_;
if ( $o->get('ask-pass') ) {
$dsn->{p} = OptionParser::prompt_noecho("Enter password: ");
}
my $dbh = $dp->get_dbh($dp->get_cxn_params($dsn), {AutoCommit => $ac});
$dbh->{InactiveDestroy} = 1; # Because of forking.
return $dbh;
}
# Forks and detaches from parent to execute the given command; # Forks and detaches from parent to execute the given command;
# does not block parent. # does not block parent.
sub exec_cmd { sub exec_cmd {
@@ -4177,7 +4462,7 @@ pt-kill - Kill MySQL queries that match certain criteria.
=head1 SYNOPSIS =head1 SYNOPSIS
Usage: pt-kill [OPTIONS] Usage: pt-kill [OPTIONS] [DSN]
pt-kill kills MySQL connections. pt-kill connects to MySQL and gets queries pt-kill kills MySQL connections. pt-kill connects to MySQL and gets queries
from SHOW PROCESSLIST if no FILE is given. Else, it reads queries from one from SHOW PROCESSLIST if no FILE is given. Else, it reads queries from one
@@ -4316,6 +4601,9 @@ L<"--kill"> and L<"--kill-query"> are mutually exclusive.
L<"--daemonize"> and L<"--test-matching"> are mutually exclusive. L<"--daemonize"> and L<"--test-matching"> are mutually exclusive.
This tool accepts additional command-line arguments. Refer to the
L<"SYNOPSIS"> and usage information for details.
=over =over
=item --ask-pass =item --ask-pass
@@ -4338,6 +4626,12 @@ type: Array
Read this comma-separated list of config files; if specified, this must be the Read this comma-separated list of config files; if specified, this must be the
first option on the command line. first option on the command line.
=item --database
short form: -D; type: string
The database to use for the connection.
=item --daemonize =item --daemonize
Fork to the background and detach from the shell. POSIX operating systems Fork to the background and detach from the shell. POSIX operating systems

View File

@@ -103,6 +103,7 @@ sub new {
dsn_name => $dp->as_string($dsn, [qw(h P S)]), dsn_name => $dp->as_string($dsn, [qw(h P S)]),
hostname => '', hostname => '',
set => $args{set}, set => $args{set},
NAME_lc => $args{NAME_lc},
dbh_set => 0, dbh_set => 0,
OptionParser => $o, OptionParser => $o,
DSNParser => $dp, DSNParser => $dp,
@@ -149,7 +150,10 @@ sub set_dbh {
PTDEBUG && _d($dbh, 'Setting dbh'); PTDEBUG && _d($dbh, 'Setting dbh');
# Set stuff for this dbh (i.e. initialize it). # Set stuff for this dbh (i.e. initialize it).
$dbh->{FetchHashKeyName} = 'NAME_lc'; if ( !exists $self->{NAME_lc}
|| (defined $self->{NAME_lc} && $self->{NAME_lc}) ) {
$dbh->{FetchHashKeyName} = 'NAME_lc';
}
# Update the cxn's name. Until we connect, the DSN parts # Update the cxn's name. Until we connect, the DSN parts
# h and P are used. Once connected, use @@hostname. # h and P are used. Once connected, use @@hostname.

View File

@@ -4,6 +4,7 @@ BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
$ENV{PERCONA_TOOLKIT_TEST_USE_DSN_NAMES} = 1;
}; };
use strict; use strict;
@@ -25,7 +26,7 @@ if ( !$master_dbh ) {
plan skip_all => 'Cannot connect to sandbox master'; plan skip_all => 'Cannot connect to sandbox master';
} }
else { else {
plan tests => 4; plan tests => 6;
} }
my $output; my $output;
@@ -52,7 +53,7 @@ my @times = $output =~ m/\(Query (\d+) sec\)/g;
ok( ok(
@times > 2 && @times < 7, @times > 2 && @times < 7,
"There were 2 to 5 captures" "There were 2 to 5 captures"
) or print STDERR Dumper($output); ) or diag($output);
# This is to catch a bad bug where there wasn't any sleep time when # This is to catch a bad bug where there wasn't any sleep time when
# --iterations was 0, and another bug when --run-time was not respected. # --iterations was 0, and another bug when --run-time was not respected.
@@ -64,8 +65,7 @@ $output = `$cmd --busy-time 1s --print --run-time 11s`;
ok( ok(
@times > 7 && @times < 12, @times > 7 && @times < 12,
'Approximately 9 or 10 captures with --iterations 0' 'Approximately 9 or 10 captures with --iterations 0'
) or print STDERR Dumper($output); ) or diag($output);
# ############################################################################ # ############################################################################
# --verbose # --verbose
@@ -80,6 +80,32 @@ like(
'--verbose' '--verbose'
); );
# #############################################################################
# Reconnect if cxn lost.
# #############################################################################
$master_dbh->do("CREATE DATABASE IF NOT EXISTS pt_kill_test");
system(qq($trunk/util/kill-mysql-process db=pt_kill_test wait=2 &));
$output = output(
sub { pt_kill::main('-F', $cnf, qw(-D pt_kill_test),
qw(--run-time 4 --interval 1 --print --verbose)) },
stderr => 1,
);
like(
$output,
qr/Reconnected/,
"kill-mysql-process says it reconnected"
);
my $n_checks =()= $output =~ m/Checking processlist/g;
is(
$n_checks,
4,
"pt-kill still checked the processlist 4 times"
) or diag($output);
# ############################################################################# # #############################################################################
# Done. # Done.
# ############################################################################# # #############################################################################

View File

@@ -47,6 +47,7 @@ diag(`rm $out`);
SKIP: { SKIP: {
skip 'Cannot connect to sandbox master', 2 unless $master_dbh; skip 'Cannot connect to sandbox master', 2 unless $master_dbh;
$master_dbh->do("CREATE DATABASE IF NOT EXISTS pt_kill_zombie_test");
system "/tmp/12345/use -e 'select sleep(2)' >/dev/null 2>&1 &"; system "/tmp/12345/use -e 'select sleep(2)' >/dev/null 2>&1 &";
@@ -70,7 +71,7 @@ SKIP: {
diag(`rm $out`); diag(`rm $out`);
# Don't make zombies (https://bugs.launchpad.net/percona-toolkit/+bug/919819) # Don't make zombies (https://bugs.launchpad.net/percona-toolkit/+bug/919819)
system "/tmp/12345/use -e 'select sleep(2)' >/dev/null 2>&1 &"; $master_dbh->do("USE pt_kill_zombie_test");
my $sentinel = "/tmp/pt-kill-test.$PID.stop"; my $sentinel = "/tmp/pt-kill-test.$PID.stop";
my $pid_file = "/tmp/pt-kill-test.$PID.pid"; my $pid_file = "/tmp/pt-kill-test.$PID.pid";
@@ -79,8 +80,8 @@ SKIP: {
diag(`rm $pid_file 2>/dev/null`); diag(`rm $pid_file 2>/dev/null`);
diag(`rm $log_file 2>/dev/null`); diag(`rm $log_file 2>/dev/null`);
`$cmd --daemonize --match-info 'select sleep' --interval 1 --print --execute-command 'echo zombie > $out' --verbose --pid $pid_file --log $log_file --sentinel $sentinel`; `$cmd --daemonize --match-db pt_kill_zombie_test --interval 1 --print --execute-command 'echo zombie > $out' --verbose --pid $pid_file --log $log_file --sentinel $sentinel`;
sleep 1; PerconaTest::wait_for_files($pid_file, $log_file, $out);
$output = `grep Executed $log_file`; $output = `grep Executed $log_file`;
like( like(
$output, $output,

View File

@@ -41,7 +41,7 @@ my $cnf='/tmp/12345/my.sandbox.cnf';
# Shell out to a sleep(10) query and try to capture the query. # Shell out to a sleep(10) query and try to capture the query.
# Backticks don't work here. # Backticks don't work here.
system("/tmp/12345/use -h127.1 -P12345 -umsandbox -pmsandbox -e 'select sleep(4)' >/dev/null&"); system("/tmp/12345/use -h127.1 -P12345 -umsandbox -pmsandbox -e 'select sleep(4)' >/dev/null 2>&1 &");
sleep 0.5; sleep 0.5;
my $rows = $dbh->selectall_hashref('show processlist', 'id'); my $rows = $dbh->selectall_hashref('show processlist', 'id');
my $pid; my $pid;

View File

@@ -48,7 +48,7 @@ SKIP: {
'Log file created' 'Log file created'
); );
sleep 3; # --run-time=2; if we sleep 2 we'll get intermittent failures. wait_until(sub { return !-f '/tmp/pt-kill.pid' });
ok( ok(
!-f '/tmp/pt-kill.pid', !-f '/tmp/pt-kill.pid',
'PID file removed' 'PID file removed'

44
util/kill-mysql-process Executable file
View File

@@ -0,0 +1,44 @@
#!/usr/bin/env perl
# This script helps test that tools reconnect to MySQL. Its meant to be ran
# in the background like system(qq($trunk/util/kill-mysql-process DB)) where
# DB is the name of special "tracer" database used to isolate the test in
# the process list. So, do something like CREATE DATABASE pt_kill_test, then
# run the test with D=pt_kill_test (presuming pt_kill_test is unique to
# the test). This script will then kill any and all processes that are using
# the pt_kill_test db.
#
# Exits 0 if the tracer db is observed and procs are killed, else exits 1.
use strict;
use warnings FATAL => 'all';
use Time::HiRes qw(sleep time);
if ( !@ARGV || @ARGV < 1 || @ARGV > 3 ) {
print STDERR "Usage: kill-mysql-process OPTION=VALUE\n";
print STDERR "Options: db, wait, runtime, interval\n";
exit 1;
}
my %opt = map { my ($op, $val) = split '=', $_; $op => $val; } @ARGV;
$opt{db} ||= 'tracer_db';
$opt{runtime} ||= 5.0;
$opt{interval} ||= 0.2;
sleep $opt{wait} if $opt{wait};
my $t_start = time;
while ( time - $t_start < $opt{runtime} ) {
my $procs = `/tmp/12345/use -ss -e "show processlist" | grep $opt{db} | cut -f1`;
if ( $procs && $procs =~ /\d/ ) {
foreach my $proc ( split "\n", $procs ) {
chomp $proc;
`/tmp/12345/use -e "KILL $proc"`;
}
exit 0;
}
sleep $opt{interval};
}
exit 1;