Move retry_timeouts.t tests to error_handling.t and remove retry_timeouts.t. Change max_chunk value for checking slave diffs to avoid infinite wait for nonexistent chunks. Use undef instead of 0 for skipped chunks. Make lock wait timeout skipped++ instead of errors++.

This commit is contained in:
Daniel Nichter
2011-10-15 15:45:11 -06:00
parent 23e9b91d78
commit c28461aaef
3 changed files with 62 additions and 120 deletions

View File

@@ -5822,7 +5822,6 @@ sub main {
if ( ($expl->{key} || '') ne $nibble_iter->nibble_index() ) {
MKDEBUG && _d('Chunk', $args{nibbleno}, 'of table',
"$tbl->{db}.$tbl->{tbl} not using chunk index, skipping");
$tbl->{nibble_time} = 0;
return 0; # next boundary
}
@@ -5835,7 +5834,6 @@ sub main {
&& $oversize_chunk ) {
MKDEBUG && _d('Chunk', $args{nibbleno}, 'of table',
"$tbl->{db}.$tbl->{tbl} is too large, skipping");
$tbl->{nibble_time} = 0;
return 0; # next boundary
}
}
@@ -5867,12 +5865,18 @@ sub main {
my $chunk = $nibble_iter->nibble_number();
# Nibble time will be zero if the chunk was skipped.
if ( $tbl->{nibble_time} == 0 ) {
if ( !defined $tbl->{nibble_time} ) {
MKDEBUG && _d('Skipping chunk', $chunk);
$tbl->{checksum_results}->{skipped}++;
return;
}
# Max chunk number that worked. This may be less than the total
# number of chunks if, for example, chunk 16 of 16 times out, but
# chunk 15 worked. The max chunk is used for checking for diffs
# on the slaves, in the done callback.
$tbl->{max_chunk} = $chunk;
# Fetch the checksum that we just executed from the replicate table.
$fetch_sth->execute(@{$tbl}{qw(db tbl)}, $chunk);
my ($crc, $cnt) = $fetch_sth->fetchrow_array();
@@ -5951,14 +5955,14 @@ sub main {
my (%args) = @_;
my $tbl = $args{tbl};
my $nibble_iter = $args{NibbleIterator};
my $max_chunk = $nibble_iter->nibble_number();
my $max_chunk = $tbl->{max_chunk};
# Don't need to do anything here if we're just --explain'ing.
return if $o->get('explain');
# Wait for all slaves to run all checksum chunks,
# then check for differences.
if ( $o->get('replicate-check') && scalar @$slaves ) {
if ( $max_chunk && $o->get('replicate-check') && scalar @$slaves ) {
MKDEBUG && _d('Checking slave diffs');
my $check_pr;
@@ -6318,8 +6322,7 @@ sub exec_nibble {
if ( $o->get('quiet') < 2 ) {
warn "$error\n";
}
$tbl->{checksum_results}->{errors}++;
return 0; # zero nibble time, skip this nibble
return; # skip this nibble
}
# This die will be caught by the eval inside the TABLE loop.

View File

@@ -23,7 +23,7 @@ if ( !$master_dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
else {
plan tests => 2;
plan tests => 6;
}
# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic
@@ -59,6 +59,57 @@ is(
"Only one warning for MySQL error 1265"
);
# ############################################################################
# Lock wait timeout
# ############################################################################
$master_dbh->do('use sakila');
$master_dbh->do('begin');
$master_dbh->do('select * from city for update');
$output = output(
sub { pt_table_checksum::main(@args, qw(-t sakila.city)) },
stderr => 1,
trf => sub { return PerconaTest::normalize_checksum_results(@_) },
);
like(
$output,
qr/Lock wait timeout exceeded/,
"Catches lock wait timeout"
);
like(
$output,
qr/^0 0 0 1 1 sakila.city/m,
"Skips chunk that times out"
);
# Lock wait timeout for sandbox servers is 3s, so sleep 4 then commit
# to release the lock. That should allow the checksum query to finish.
my ($id) = $master_dbh->selectrow_array('select connection_id()');
system("sleep 4 ; /tmp/12345/use -e 'KILL $id' >/dev/null");
$output = output(
sub { pt_table_checksum::main(@args, qw(-t sakila.city)) },
stderr => 1,
trf => sub { return PerconaTest::normalize_checksum_results(@_) },
);
unlike(
$output,
qr/Lock wait timeout exceeded/,
"Lock wait timeout retried"
);
like(
$output,
qr/^0 0 600 1 0 sakila.city/m,
"Checksum retried after lock wait timeout"
);
# Reconnect to master since we just killed ourself.
$master_dbh = $sb->get_dbh_for('master');
# #############################################################################
# Done.
# #############################################################################

View File

@@ -1,112 +0,0 @@
#!/usr/bin/env perl
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More;
use PerconaTest;
use Sandbox;
require "$trunk/bin/pt-table-checksum";
my $vp = new VersionParser();
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $dbh = $sb->get_dbh_for('master');
if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
else {
plan tests => 3;
}
my $output;
my $cnf = '/tmp/12345/my.sandbox.cnf';
my @args = ('-F', $cnf, 'h=127.1', qw(--replicate test.checksums --create-replicate-table -t sakila.city));
$sb->create_dbs($dbh, ['test']);
$dbh->do('use sakila');
$dbh->do('begin');
$dbh->do('select * from city for update');
# city table is now locked until we commit. The child proc is going
# to wait 3 seconds for innodb_lock_wait_timeout, then it should try
# again. So if we commit at 4 seconds, the child should succeed and
# the checksum will appear in test.checksums.
my $pid = fork();
if ( !$pid ) {
# child
my $output = output(
sub { pt_table_checksum::main(@args) },
stderr => 1,
);
exit 0;
}
sleep 4;
$dbh->do('commit');
waitpid ($pid, 0); # reap child
my $row = $dbh->selectrow_hashref('select * from test.checksums');
ok(
$row && $row->{db} eq 'sakila' && $row->{tbl} eq 'city',
"Checksum after lock wait timeout"
);
# Repeat the test but this time let the retry fail to see that the
# failure is captured.
my $outfile = '/tmp/mk-table-checksum-output.txt';
diag(`rm -rf $outfile >/dev/null`);
$dbh->do('truncate table test.checksums');
$dbh->do('begin');
$dbh->do('select * from city for update');
$pid = fork();
if ( !$pid ) {
# child
my $output = output(
sub { pt_table_checksum::main(@args) },
stderr => 1,
file => $outfile,
);
exit 0;
}
sleep 8;
$dbh->do('commit');
waitpid ($pid, 0); # reap child
$row = $dbh->selectrow_hashref('select * from test.checksums');
ok(
!defined $row,
"No checksum due to lock wait timeout"
);
$output = `cat $outfile`;
like(
$output,
qr/Lock wait timeout exceeded/i,
"Lock wait timeout exceeded error captured"
);
diag(`rm -rf $outfile >/dev/null`);
# #############################################################################
# Done.
# #############################################################################
$sb->wipe_clean($dbh);
exit;