Use --chunk-size-limit || 1 for checking if a table can be done in one chunk to avoid chunking empty tables. Checksums for empty tables are now written.

This commit is contained in:
Daniel Nichter
2012-05-10 09:55:42 -06:00
parent 5079930b72
commit f4957ba911
5 changed files with 124 additions and 19 deletions

View File

@@ -3727,8 +3727,8 @@ sub next {
while ( $self->{have_rows} || $self->_next_boundaries() ) {
if ( !$self->{have_rows} ) {
$self->{nibbleno}++;
PTDEBUG && _d($self->{nibble_sth}->{Statement}, 'params:',
join(', ', (@{$self->{lower}}, @{$self->{upper}})));
PTDEBUG && _d('Nibble:', $self->{nibble_sth}->{Statement}, 'params:',
join(', ', (@{$self->{lower} || []}, @{$self->{upper} || []})));
if ( my $callback = $self->{callbacks}->{exec_nibble} ) {
$self->{have_rows} = $callback->(%callback_args);
}
@@ -3870,8 +3870,9 @@ sub can_nibble {
$mysql_index = undef;
}
my $chunk_size_limit = $o->get('chunk-size-limit') || 1;
my $one_nibble = !defined $args{one_nibble} || $args{one_nibble}
? $row_est <= $chunk_size * $o->get('chunk-size-limit')
? $row_est <= $chunk_size * $chunk_size_limit
: 0;
PTDEBUG && _d('One nibble:', $one_nibble ? 'yes' : 'no');
@@ -4056,12 +4057,10 @@ sub _get_bounds {
if ( !$self->{next_lower} ) {
PTDEBUG && _d('At end of table, or no more boundaries to resume');
$self->{no_more_boundaries} = 1;
$self->{last_upper} = $dbh->selectrow_arrayref($self->{last_ub_sql});
PTDEBUG && _d('Last upper boundary:', Dumper($self->{last_upper}));
$self->{no_more_boundaries} = 1;
$self->{no_more_boundaries} = 1;
}
return;
@@ -6338,10 +6337,28 @@ sub main {
my $total_rows = 0;
my $total_time = 0;
my $total_rate = 0;
my $limit = $o->get('chunk-size-limit');
my $tn = new TableNibbler(TableParser => $tp, Quoter => $q);
my $retry = new Retry();
# --chunk-size-limit has two purposes. The 1st, as documented, is
# to prevent oversized chunks when the chunk index is not unique.
# The 2nd is to determine if the table can be processed in one chunk
# (WHERE 1=1 instead of nibbling). This creates a problem when
# the user does --chunk-size-limit=0 to disable the 1st, documented
# purpose because, apparently, they're using non-unique indexes and
# they don't care about potentially large chunks. But disabling the
# 1st purpose adversely affects the 2nd purpose becuase 0 * the chunk size
# will always be zero, so tables will only be single-chunked if EXPLAIN
# says there are 0 rows, but sometimes EXPLAIN says there is 1 row
# even when the table is empty. This wouldn't matter except that nibbling
# an empty table doesn't currently work becuase there are no boundaries,
# so no checksum is written for the empty table. To fix this and
# preserve the two purposes of this option, usages of the 2nd purpose
# do || 1 so the limit is never 0 and empty tables are single-chunked.
# See: https://bugs.launchpad.net/percona-toolkit/+bug/987393
# This is used for the 1st purpose of --chunk-size-limit:
my $limit = $o->get('chunk-size-limit');
# ########################################################################
# Callbacks for each table's nibble iterator. All checksum work is done
# in these callbacks and the subs that they call.
@@ -6394,9 +6411,16 @@ sub main {
else {
if ( $nibble_iter->one_nibble() ) {
PTDEBUG && _d('Getting table row estimate on replicas');
my $chunk_size_limit = $o->get('chunk-size-limit');
# This is used for the 2nd purpose for --chunkr-size-limit;
# see the large comment block above near my $limit = $o->...
my $chunk_size_limit = $o->get('chunk-size-limit') || 1;
my @too_large;
foreach my $slave ( @$slaves ) {
# TODO: This duplicates NibbleIterator::can_nibble();
# probably best to have 1 code path to determine if
# a given table is oversized on a given host.
my ($n_rows) = NibbleIterator::get_row_estimate(
Cxn => $slave,
tbl => $tbl,

View File

@@ -293,12 +293,14 @@ sub next {
# the next nibble.
if ( !$self->{have_rows} ) {
$self->{nibbleno}++;
PTDEBUG && _d($self->{nibble_sth}->{Statement}, 'params:',
join(', ', (@{$self->{lower}}, @{$self->{upper}})));
PTDEBUG && _d('Nibble:', $self->{nibble_sth}->{Statement}, 'params:',
join(', ', (@{$self->{lower} || []}, @{$self->{upper} || []})));
if ( my $callback = $self->{callbacks}->{exec_nibble} ) {
$self->{have_rows} = $callback->(%callback_args);
}
else {
# XXX This call and others like it are relying on a Perl oddity.
# See https://bugs.launchpad.net/percona-toolkit/+bug/987393
$self->{nibble_sth}->execute(@{$self->{lower}}, @{$self->{upper}});
$self->{have_rows} = $self->{nibble_sth}->rows();
}
@@ -449,9 +451,12 @@ sub can_nibble {
}
# Can all those rows be nibbled in one chunk? If one_nibble is defined,
# then do as it says; else, look at the chunk size limit.
# then do as it says; else, look at the chunk size limit. If the chunk
# size limit is disabled (=0), then use the chunk size because there
# always needs to be a limit to the one-chunk table.
my $chunk_size_limit = $o->get('chunk-size-limit') || 1;
my $one_nibble = !defined $args{one_nibble} || $args{one_nibble}
? $row_est <= $chunk_size * $o->get('chunk-size-limit')
? $row_est <= $chunk_size * $chunk_size_limit
: 0;
PTDEBUG && _d('One nibble:', $one_nibble ? 'yes' : 'no');
@@ -654,6 +659,7 @@ sub _get_bounds {
# This happens if we resume from the end of the table, or if the
# last chunk for resuming isn't bounded.
PTDEBUG && _d('At end of table, or no more boundaries to resume');
$self->{no_more_boundaries} = 1;
# Get the real last upper boundary, i.e. the last row of the table
# at this moment. If rows are inserted after, we won't see them.
@@ -662,9 +668,6 @@ sub _get_bounds {
# boundary of the table (we already have the first).
$self->{last_upper} = $dbh->selectrow_arrayref($self->{last_ub_sql});
PTDEBUG && _d('Last upper boundary:', Dumper($self->{last_upper}));
$self->{no_more_boundaries} = 1;
$self->{no_more_boundaries} = 1;
}
return;

View File

@@ -41,7 +41,7 @@ if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
else {
plan tests => 15;
plan tests => 16;
}
my $q = new Quoter();
@@ -252,6 +252,29 @@ ok(
"No more boundaries"
);
# #############################################################################
# Empty table
# https://bugs.launchpad.net/percona-toolkit/+bug/987393
# #############################################################################
$sb->load_file('master', "t/pt-table-checksum/samples/empty-table-bug-987393.sql");
PerconaTest::wait_for_table($dbh, "test.test_full", "id=1");
$ni = make_nibble_iter(
db => 'test',
tbl => 'test_empty',
argv => [qw(--databases test --chunk-size-limit 0)],
);
@rows = ();
for (1..5) {
push @rows, $ni->next();
}
is_deeply(
\@rows,
[],
"Empty table"
);
# #############################################################################
# Done.
# #############################################################################

View File

@@ -38,7 +38,7 @@ elsif ( !$slave_dbh ) {
plan skip_all => 'Cannot connect to sandbox slave1';
}
else {
plan tests => 2;
plan tests => 5;
}
# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic
@@ -67,16 +67,53 @@ $output = output(
is(
$exit_status,
0,
"Bug 995274: zero exit status"
"Bug 995274 (undef array): zero exit status"
);
cmp_ok(
PerconaTest::count_checksum_results($output, 'rows'),
'>',
1,
"Bug 995274: checksummed rows"
"Bug 995274 (undef array): checksummed rows"
);
# #############################################################################
# https://bugs.launchpad.net/percona-toolkit/+bug/987393
# Empy tables cause "undefined value as an ARRAY" errors
# #############################################################################
$master_dbh->do("DROP DATABASE IF EXISTS percona"); # clear old checksums
$sb->load_file('master', "$sample/empty-table-bug-987393.sql");
PerconaTest::wait_for_table($slave_dbh, "test.test_full", "id=1");
$output = output(
sub { $exit_status = pt_table_checksum::main(
@args, qw(-d test --chunk-size-limit 0)) },
stderr => 1,
);
is(
$exit_status,
0,
"Bug 987393 (empty table): zero exit status"
);
is(
PerconaTest::count_checksum_results($output, 'errors'),
0,
"Bug 987393 (empty table): no errors"
);
my $rows = $master_dbh->selectall_arrayref("SELECT db, tbl, chunk, master_crc, master_cnt FROM percona.checksums ORDER BY db, tbl, chunk");
is_deeply(
$rows,
[
['test', 'test_empty', '1', '0', '0'], # empty
['test', 'test_full', '1', 'ac967054', '1'], # row
],
"Bug 987393 (empty table): checksums"
) or print STDERR Dumper($rows);
# #############################################################################
# Done.
# #############################################################################

View File

@@ -0,0 +1,18 @@
DROP DATABASE IF EXISTS test;
CREATE DATABASE test;
USE test;
CREATE TABLE `test_empty` (
`id` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=1 ;
CREATE TABLE `test_full` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`d` text NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=2 ;
INSERT INTO `test_full` (`id`, `d`) VALUES
(1, '2');