Merge ~percona-toolkit-dev/percona-toolkit/explain-checksum-chunks.

This commit is contained in:
Daniel Nichter
2012-06-10 13:25:44 -04:00
8 changed files with 582 additions and 193 deletions

View File

@@ -1889,8 +1889,12 @@ sub generate_asc_stmt {
my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
if ( $args{asc_first} ) {
@asc_cols = $asc_cols[0];
PTDEBUG && _d('Ascending only first column');
@asc_cols = $asc_cols[0];
}
elsif ( my $n = $args{n_index_cols} ) {
PTDEBUG && _d('Ascending only first', $n, 'columns');
@asc_cols = @asc_cols[0..($n-1)];
}
PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
@@ -4113,6 +4117,7 @@ sub new {
%args,
tbl_struct => $tbl->{tbl_struct},
index => $index,
n_index_cols => $args{n_chunk_index_cols},
cols => \@cols,
asc_only => 1,
);
@@ -4202,6 +4207,7 @@ sub new {
last_ub_sql => $last_ub_sql,
ub_sql => $ub_sql,
nibble_sql => $nibble_sql,
explain_first_lb_sql => "EXPLAIN $first_lb_sql",
explain_ub_sql => "EXPLAIN $ub_sql",
explain_nibble_sql => $explain_nibble_sql,
resume_lb_sql => $resume_lb_sql,
@@ -4313,6 +4319,7 @@ sub nibble_index {
sub statements {
my ($self) = @_;
return {
explain_first_lower_boundary => $self->{explain_first_lb_sth},
nibble => $self->{nibble_sth},
explain_nibble => $self->{explain_nibble_sth},
upper_boundary => $self->{ub_sth},
@@ -4545,6 +4552,7 @@ sub _prepare_sths {
$self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql});
if ( !$self->{one_nibble} ) {
$self->{explain_first_lb_sth} = $dbh->prepare($self->{explain_first_lb_sql});
$self->{ub_sth} = $dbh->prepare($self->{ub_sql});
$self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});
}
@@ -5201,6 +5209,22 @@ sub main {
}
}
# Parse --chunk-index INDEX:N where N is the number of
# left-most columns of INDEX to use.
# https://bugs.launchpad.net/percona-toolkit/+bug/1010232
my ($chunk_index, $n_chunk_index_cols)
= split(':', $o->get('chunk-index') || '');
if ( defined $chunk_index && $chunk_index eq '' ) {
$o->save_error('--chunk-index cannot be an empty string');
}
if ( defined $n_chunk_index_cols
&& (!$n_chunk_index_cols
|| $n_chunk_index_cols =~ m/\D/
|| $n_chunk_index_cols < 1) ) {
$o->save_error('Invalid number of --chunk-index columns: '
. $n_chunk_index_cols);
}
if ( !$o->get('help') ) {
if ( @ARGV ) {
$o->save_error('Specify only one DSN on the command line');
@@ -5832,6 +5856,7 @@ sub main {
my (%args) = @_;
my $tbl = $args{tbl};
my $nibble_iter = $args{NibbleIterator};
my $statements = $nibble_iter->statements();
if ( $o->get('dry-run') ) {
print "Not copying rows because this is a dry run.\n";
@@ -5843,7 +5868,6 @@ sub main {
if ( $o->get('print') ) {
# Print the checksum and next boundary statements.
my $statements = $nibble_iter->statements();
foreach my $sth ( sort keys %$statements ) {
next if $sth =~ m/^explain/;
if ( $statements->{$sth} ) {
@@ -5887,6 +5911,34 @@ sub main {
die $msg;
}
}
else { # chunking the table
if ( $o->get('check-plan') ) {
my $expl = explain_statement(
sth => $statements->{explain_first_lower_boundary},
tbl => $tbl,
vals => [],
);
if ( !$expl->{key}
|| lc($expl->{key}) ne lc($nibble_iter->nibble_index()) )
{
die "Cannot determine the key_len of the chunk index "
. "because MySQL chose "
. ($expl->{key} ? "the $expl->{key}" : "no") . " index "
. "instead of the " . $nibble_iter->nibble_index()
. " index for the first lower boundary statement. "
. "See --[no]check-plan in the documentation for more "
. "information.";
}
elsif ( !$expl->{key_len} ) {
die "The key_len of the $expl->{key} index is "
. (defined $expl->{key_len} ? "zero" : "NULL")
. ", but this should not be possible. "
. "See --[no]check-plan in the documentation for more "
. "information.";
}
$tbl->{key_len} = $expl->{key_len};
}
}
return 1; # continue nibbling table
},
@@ -5946,59 +5998,12 @@ sub main {
# Count every chunk, even if it's ultimately skipped, etc.
$tbl->{results}->{n_chunks}++;
# If the table is being chunk (i.e., it's not small enough to be
# consumed by one nibble), then check index usage and chunk size.
if ( !$nibble_iter->one_nibble() ) {
my $expl = explain_statement(
tbl => $tbl,
sth => $sth->{explain_nibble},
vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
# Die unless the nibble is safe.
nibble_is_safe(
%args,
OptionParser => $o,
);
# Ensure that MySQL is using the chunk index.
if ( lc($expl->{key} || '')
ne lc($nibble_iter->nibble_index() || '') ) {
my $msg
= "Aborting copying table $tbl->{name} at chunk "
. $nibble_iter->nibble_number()
. " because it is not safe to chunk. Chunking should "
. "use the "
. ($nibble_iter->nibble_index() || '?')
. " index, but MySQL EXPLAIN reports that "
. ($expl->{key} ? "the $expl->{key}" : "no")
. " index will be used for "
. $sth->{explain_nibble}->{Statement}
. " with values "
. join(", ", map { defined $_ ? $_ : "NULL" }
(@{$boundary->{lower}}, @{$boundary->{upper}}))
. "\n";
die $msg;
}
# Check chunk size limit if the upper boundary and next lower
# boundary are identical.
if ( $limit ) {
my $boundary = $nibble_iter->boundaries();
my $oversize_chunk
= $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
: 0;
if ( $oversize_chunk
&& $nibble_iter->identical_boundaries(
$boundary->{upper}, $boundary->{next_lower}) )
{
my $msg
= "Aborting copying table $tbl->{name} at chunk "
. $nibble_iter->nibble_number()
. " because the chunk is too large: MySQL estimates "
. ($expl->{rows} || 0) . "rows. The current chunk "
. "size limit is " . ($tbl->{chunk_size} * $limit)
. " rows (chunk size=$tbl->{chunk_size}"
. " * chunk size limit=$limit).\n";
die $msg;
}
}
}
# Exec and time the chunk checksum query.
$tbl->{nibble_time} = exec_nibble(
%args,
@@ -6103,7 +6108,8 @@ sub main {
Cxn => $cxn,
tbl => $orig_tbl,
chunk_size => $orig_tbl->{chunk_size},
chunk_index => $o->get('chunk-index'),
chunk_index => $chunk_index,
n_chunk_index_cols => $n_chunk_index_cols,
dml => $dml,
select => $select,
callbacks => $callbacks,
@@ -6301,6 +6307,82 @@ sub main {
# ############################################################################
# Subroutines.
# ############################################################################
sub nibble_is_safe {
my (%args) = @_;
my @required_args = qw(Cxn tbl NibbleIterator OptionParser);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($cxn, $tbl, $nibble_iter, $o)= @args{@required_args};
# EXPLAIN the checksum chunk query to get its row estimate and index.
# XXX This call and others like it are relying on a Perl oddity.
# See https://bugs.launchpad.net/percona-toolkit/+bug/987393
my $sth = $nibble_iter->statements();
my $boundary = $nibble_iter->boundaries();
my $expl = explain_statement(
tbl => $tbl,
sth => $sth->{explain_nibble},
vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
);
# Ensure that MySQL is using the chunk index if the table is being chunked.
if ( !$nibble_iter->one_nibble()
&& lc($expl->{key} || '') ne lc($nibble_iter->nibble_index() || '') ) {
if ( !$tbl->{warned}->{not_using_chunk_index}++
&& $o->get('quiet') < 2 ) {
die "Error copying rows at chunk " . $nibble_iter->nibble_number()
. " of $tbl->{db}.$tbl->{tbl} because MySQL chose "
. ($expl->{key} ? "the $expl->{key}" : "no") . " index "
. " instead of the " . $nibble_iter->nibble_index() . "index.\n";
}
}
# Ensure that the chunk isn't too large if there's a --chunk-size-limit.
# If single-chunking the table, this has already been checked, so it
# shouldn't have changed. If chunking the table with a non-unique key,
# oversize chunks are possible.
if ( my $limit = $o->get('chunk-size-limit') ) {
my $oversize_chunk
= $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
: 0;
if ( $oversize_chunk
&& $nibble_iter->identical_boundaries($boundary->{upper},
$boundary->{next_lower}) ) {
if ( !$tbl->{warned}->{oversize_chunk}++
&& $o->get('quiet') < 2 ) {
die "Error copying rows at chunk " . $nibble_iter->nibble_number()
. " of $tbl->{db}.$tbl->{tbl} because it is oversized. "
. "The current chunk size limit is "
. ($tbl->{chunk_size} * $limit)
. " rows (chunk size=$tbl->{chunk_size}"
. " * chunk size limit=$limit), but MySQL estimates "
. "that there are " . ($expl->{rows} || 0)
. " rows in the chunk.\n";
}
}
}
# Ensure that MySQL is still using the entire index.
# https://bugs.launchpad.net/percona-toolkit/+bug/1010232
if ( !$nibble_iter->one_nibble()
&& $tbl->{key_len}
&& ($expl->{key_len} || 0) < $tbl->{key_len} ) {
if ( !$tbl->{warned}->{key_len}++
&& $o->get('quiet') < 2 ) {
die "Error copying rows at chunk " . $nibble_iter->nibble_number()
. " of $tbl->{db}.$tbl->{tbl} because MySQL used "
. "only " . ($expl->{key_len} || 0) . " bytes "
. "of the " . ($expl->{key} || '?') . " index instead of "
. $tbl->{key_len} . ". See the --[no]check-plan documentation "
. "for more information.\n";
}
}
return 1; # safe
}
sub create_new_table{
my (%args) = @_;
my @required_args = qw(orig_tbl Cxn Quoter OptionParser TableParser);
@@ -7302,6 +7384,39 @@ type: time; default: 1
Sleep time between checks for L<"--max-lag">.
=item --[no]check-plan
default: yes
Check query execution plans for safety. By default, this option causes
the tool to run EXPLAIN before running queries that are meant to access
a small amount of data, but which could access many rows if MySQL chooses a bad
execution plan. These include the queries to determine chunk boundaries and the
chunk queries themselves. If it appears that MySQL will use a bad query
execution plan, the tool will skip the chunk of the table.
The tool uses several heuristics to determine whether an execution plan is bad.
The first is whether EXPLAIN reports that MySQL intends to use the desired index
to access the rows. If MySQL chooses a different index, the tool considers the
query unsafe.
The tool also checks how much of the index MySQL reports that it will use for
the query. The EXPLAIN output shows this in the key_len column. The tool
remembers the largest key_len seen, and skips chunks where MySQL reports that it
will use a smaller prefix of the index. This heuristic can be understood as
skipping chunks that have a worse execution plan than other chunks.
The tool prints a warning the first time a chunk is skipped due to
a bad execution plan in each table. Subsequent chunks are skipped silently,
although you can see the count of skipped chunks in the SKIPPED column in
the tool's output.
This option adds some setup work to each table and chunk. Although the work is
not intrusive for MySQL, it results in more round-trips to the server, which
consumes time. Making chunks too small will cause the overhead to become
relatively larger. It is therefore recommended that you not make chunks too
small, because the tool may take a very long time to complete if you do.
=item --[no]check-replication-filters
default: yes
@@ -7338,6 +7453,17 @@ behavior of choosing an index. The tool adds the index to the SQL statements in
a C<FORCE INDEX> clause. Be careful when using this option; a poor choice of
index could cause bad performance.
This option supports a special syntax to select a prefix of the index instead of
the entire index. The syntax is NAME:N, where NAME is the name of the index, and
N is the number of columns you wish to use. This works only for compound
indexes, and is useful in cases where a bug in the MySQL query optimizer
(planner) causes it to scan a large range of rows instead of using the index to
locate starting and ending points precisely. This problem sometimes occurs on
indexes with many columns, such as 4 or more. If this happens, the tool might
print a warning related to the L<"--[no]check-plan"> option. Instructing
the tool to use only the first N columns from the index is a workaround for
the bug in some cases.
=item --chunk-size
type: size; default: 1000

View File

@@ -2290,8 +2290,12 @@ sub generate_asc_stmt {
my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
if ( $args{asc_first} ) {
@asc_cols = $asc_cols[0];
PTDEBUG && _d('Ascending only first column');
@asc_cols = $asc_cols[0];
}
elsif ( my $n = $args{n_index_cols} ) {
PTDEBUG && _d('Ascending only first', $n, 'columns');
@asc_cols = @asc_cols[0..($n-1)];
}
PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
@@ -3678,6 +3682,7 @@ sub new {
%args,
tbl_struct => $tbl->{tbl_struct},
index => $index,
n_index_cols => $args{n_chunk_index_cols},
cols => \@cols,
asc_only => 1,
);
@@ -3767,6 +3772,7 @@ sub new {
last_ub_sql => $last_ub_sql,
ub_sql => $ub_sql,
nibble_sql => $nibble_sql,
explain_first_lb_sql => "EXPLAIN $first_lb_sql",
explain_ub_sql => "EXPLAIN $ub_sql",
explain_nibble_sql => $explain_nibble_sql,
resume_lb_sql => $resume_lb_sql,
@@ -3878,6 +3884,7 @@ sub nibble_index {
sub statements {
my ($self) = @_;
return {
explain_first_lower_boundary => $self->{explain_first_lb_sth},
nibble => $self->{nibble_sth},
explain_nibble => $self->{explain_nibble_sth},
upper_boundary => $self->{ub_sth},
@@ -4110,6 +4117,7 @@ sub _prepare_sths {
$self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql});
if ( !$self->{one_nibble} ) {
$self->{explain_first_lb_sth} = $dbh->prepare($self->{explain_first_lb_sql});
$self->{ub_sth} = $dbh->prepare($self->{ub_sql});
$self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});
}
@@ -6162,6 +6170,22 @@ sub main {
}
}
# Parse --chunk-index INDEX:N where N is the number of
# left-most columns of INDEX to use.
# https://bugs.launchpad.net/percona-toolkit/+bug/1010232
my ($chunk_index, $n_chunk_index_cols)
= split(':', $o->get('chunk-index') || '');
if ( defined $chunk_index && $chunk_index eq '' ) {
$o->save_error('--chunk-index cannot be an empty string');
}
if ( defined $n_chunk_index_cols
&& (!$n_chunk_index_cols
|| $n_chunk_index_cols =~ m/\D/
|| $n_chunk_index_cols < 1) ) {
$o->save_error('Invalid number of --chunk-index columns: '
. $n_chunk_index_cols);
}
if ( !$o->get('help') ) {
if ( @ARGV > 1 ) {
$o->save_error("More than one host specified; only one allowed");
@@ -6640,6 +6664,7 @@ sub main {
my (%args) = @_;
my $tbl = $args{tbl};
my $nibble_iter = $args{NibbleIterator};
my $statements = $nibble_iter->statements();
my $oktonibble = 1;
if ( $last_chunk ) { # resuming
@@ -6668,7 +6693,7 @@ sub main {
print "--\n",
"-- $tbl->{db}.$tbl->{tbl}\n",
"--\n\n";
my $statements = $nibble_iter->statements();
foreach my $sth ( sort keys %$statements ) {
next if $sth =~ m/^explain/;
if ( $statements->{$sth} ) {
@@ -6726,6 +6751,34 @@ sub main {
$oktonibble = 0;
}
}
else { # chunking the table
if ( $o->get('check-plan') ) {
my $expl = explain_statement(
sth => $statements->{explain_first_lower_boundary},
tbl => $tbl,
vals => [],
);
if ( !$expl->{key}
|| lc($expl->{key}) ne lc($nibble_iter->nibble_index()) )
{
die "Cannot determine the key_len of the chunk index "
. "because MySQL chose "
. ($expl->{key} ? "the $expl->{key}" : "no") . " index "
. "instead of the " . $nibble_iter->nibble_index()
. " index for the first lower boundary statement. "
. "See --[no]check-plan in the documentation for more "
. "information.";
}
elsif ( !$expl->{key_len} ) {
die "The key_len of the $expl->{key} index is "
. (defined $expl->{key_len} ? "zero" : "NULL")
. ", but this should not be possible. "
. "See --[no]check-plan in the documentation for more "
. "information.";
}
$tbl->{key_len} = $expl->{key_len};
}
}
if ( $oktonibble && $o->get('empty-replicate-table') ) {
use_repl_db(
@@ -6779,16 +6832,14 @@ sub main {
ne lc($nibble_iter->nibble_index() || '') ) {
PTDEBUG && _d('Cannot nibble next chunk, aborting table');
if ( $o->get('quiet') < 2 ) {
my $msg
= "Aborting table $tbl->{db}.$tbl->{tbl} at chunk "
warn ts("Aborting table $tbl->{db}.$tbl->{tbl} at chunk "
. ($nibble_iter->nibble_number() + 1)
. " because it is not safe to chunk. Chunking should "
. "use the "
. ($nibble_iter->nibble_index() || '?')
. " index, but MySQL EXPLAIN reports that "
. " index, but MySQL chose "
. ($expl->{key} ? "the $expl->{key}" : "no")
. " index will be used.\n";
warn ts($msg);
. " index.\n");
}
$tbl->{checksum_results}->{errors}++;
return 0; # stop nibbling table
@@ -6833,43 +6884,13 @@ sub main {
return 0; # next boundary
}
# If the table is being chunk (i.e., it's not small enough to be
# consumed by one nibble), then check index usage and chunk size.
# XXX This call and others like it are relying on a Perl oddity.
# See https://bugs.launchpad.net/percona-toolkit/+bug/987393
if ( !$nibble_iter->one_nibble() ) {
my $expl = explain_statement(
tbl => $tbl,
sth => $sth->{explain_nibble},
vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
# Skip this nibble unless it's safe.
return 0 unless nibble_is_safe(
%args,
OptionParser => $o,
);
my $oversize_chunk
= $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
: 0;
# Ensure that MySQL is using the chunk index.
if ( lc($expl->{key} || '')
ne lc($nibble_iter->nibble_index() || '') ) {
PTDEBUG && _d('Chunk', $args{nibbleno}, 'of table',
"$tbl->{db}.$tbl->{tbl} not using chunk index, skipping");
return 0; # next boundary
}
# Check chunk size limit if the upper boundary and next lower
# boundary are identical.
if ( $limit ) {
my $boundary = $nibble_iter->boundaries();
if ( $nibble_iter->identical_boundaries(
$boundary->{upper}, $boundary->{next_lower})
&& $oversize_chunk ) {
PTDEBUG && _d('Chunk', $args{nibbleno}, 'of table',
"$tbl->{db}.$tbl->{tbl} is too large, skipping");
return 0; # next boundary
}
}
}
# Exec and time the chunk checksum query.
# Exec and time the nibble.
$tbl->{nibble_time} = exec_nibble(
%args,
Retry => $retry,
@@ -6951,7 +6972,7 @@ sub main {
$tbl->{chunk_size} = 1;
# This warning is printed once per table.
if ( !$tbl->{warned_slow} && $o->get('quiet') < 2 ) {
if ( !$tbl->{warned}->{slow}++ && $o->get('quiet') < 2 ) {
warn ts("Checksum queries for table "
. "$tbl->{db}.$tbl->{tbl} are executing very slowly. "
. "--chunk-size has been automatically reduced to 1. "
@@ -6961,7 +6982,6 @@ sub main {
. "selected $cnt rows and took "
. sprintf('%.3f', $tbl->{nibble_time})
. " seconds to execute.\n");
$tbl->{warned_slow} = 1;
}
}
@@ -7096,7 +7116,8 @@ sub main {
Cxn => $master_cxn,
tbl => $tbl,
chunk_size => $tbl->{chunk_size},
chunk_index => $o->get('chunk-index'),
chunk_index => $chunk_index,
n_chunk_index_cols => $n_chunk_index_cols,
dml => $checksum_dml,
select => $checksum_cols,
past_dml => $checksum_dml,
@@ -7183,6 +7204,84 @@ sub ts {
return $msg ? "$ts $msg" : $ts;
}
sub nibble_is_safe {
my (%args) = @_;
my @required_args = qw(Cxn tbl NibbleIterator OptionParser);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($cxn, $tbl, $nibble_iter, $o)= @args{@required_args};
# EXPLAIN the checksum chunk query to get its row estimate and index.
# XXX This call and others like it are relying on a Perl oddity.
# See https://bugs.launchpad.net/percona-toolkit/+bug/987393
my $sth = $nibble_iter->statements();
my $boundary = $nibble_iter->boundaries();
my $expl = explain_statement(
tbl => $tbl,
sth => $sth->{explain_nibble},
vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
);
# Ensure that MySQL is using the chunk index if the table is being chunked.
if ( !$nibble_iter->one_nibble()
&& lc($expl->{key} || '') ne lc($nibble_iter->nibble_index() || '') ) {
if ( !$tbl->{warned}->{not_using_chunk_index}++
&& $o->get('quiet') < 2 ) {
warn ts("Skipping chunk " . $nibble_iter->nibble_number()
. " of $tbl->{db}.$tbl->{tbl} because MySQL chose "
. ($expl->{key} ? "the $expl->{key}" : "no") . " index "
. " instead of the " . $nibble_iter->nibble_index() . "index.\n");
}
return 0; # not safe
}
# Ensure that the chunk isn't too large if there's a --chunk-size-limit.
# If single-chunking the table, this has already been checked, so it
# shouldn't have changed. If chunking the table with a non-unique key,
# oversize chunks are possible.
if ( my $limit = $o->get('chunk-size-limit') ) {
my $oversize_chunk
= $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
: 0;
if ( $oversize_chunk
&& $nibble_iter->identical_boundaries($boundary->{upper},
$boundary->{next_lower}) ) {
if ( !$tbl->{warned}->{oversize_chunk}++
&& $o->get('quiet') < 2 ) {
warn ts("Skipping chunk " . $nibble_iter->nibble_number()
. " of $tbl->{db}.$tbl->{tbl} because it is oversized. "
. "The current chunk size limit is "
. ($tbl->{chunk_size} * $limit)
. " rows (chunk size=$tbl->{chunk_size}"
. " * chunk size limit=$limit), but MySQL estimates "
. "that there are " . ($expl->{rows} || 0)
. " rows in the chunk.\n");
}
return 0; # not safe
}
}
# Ensure that MySQL is still using the entire index.
# https://bugs.launchpad.net/percona-toolkit/+bug/1010232
if ( !$nibble_iter->one_nibble()
&& $tbl->{key_len}
&& ($expl->{key_len} || 0) < $tbl->{key_len} ) {
if ( !$tbl->{warned}->{key_len}++
&& $o->get('quiet') < 2 ) {
warn ts("Skipping chunk " . $nibble_iter->nibble_number()
. " of $tbl->{db}.$tbl->{tbl} because MySQL used "
. "only " . ($expl->{key_len} || 0) . " bytes "
. "of the " . ($expl->{key} || '?') . " index instead of "
. $tbl->{key_len} . ". See the --[no]check-plan documentation "
. "for more information.\n");
}
return 0; # not safe
}
return 1; # safe
}
sub exec_nibble {
my (%args) = @_;
my @required_args = qw(Cxn tbl NibbleIterator Retry Quoter OptionParser);
@@ -7249,7 +7348,7 @@ sub exec_nibble {
&& (!$warn_code{$code}->{pattern}
|| $message =~ m/$warn_code{$code}->{pattern}/) )
{
if ( !$tbl->{"warned_code_$code"} ) { # warn once per table
if ( !$tbl->{warned}->{$code}++ ) { # warn once per table
if ( $o->get('quiet') < 2 ) {
warn ts("Checksum query for table $tbl->{db}.$tbl->{tbl} "
. "caused MySQL error $code: "
@@ -7258,7 +7357,6 @@ sub exec_nibble {
: $message)
. "\n");
}
$tbl->{"warned_code_$code"} = 1;
$tbl->{checksum_results}->{errors}++;
}
}
@@ -8122,6 +8220,39 @@ type: time; default: 1; group: Throttle
Sleep time between checks for L<"--max-lag">.
=item --[no]check-plan
default: yes
Check query execution plans for safety. By default, this option causes
pt-table-checksum to run EXPLAIN before running queries that are meant to access
a small amount of data, but which could access many rows if MySQL chooses a bad
execution plan. These include the queries to determine chunk boundaries and the
chunk queries themselves. If it appears that MySQL will use a bad query
execution plan, the tool will skip the chunk of the table.
The tool uses several heuristics to determine whether an execution plan is bad.
The first is whether EXPLAIN reports that MySQL intends to use the desired index
to access the rows. If MySQL chooses a different index, the tool considers the
query unsafe.
The tool also checks how much of the index MySQL reports that it will use for
the query. The EXPLAIN output shows this in the key_len column. The tool
remembers the largest key_len seen, and skips chunks where MySQL reports that it
will use a smaller prefix of the index. This heuristic can be understood as
skipping chunks that have a worse execution plan than other chunks.
The tool prints a warning the first time a chunk is skipped due to
a bad execution plan in each table. Subsequent chunks are skipped silently,
although you can see the count of skipped chunks in the SKIPPED column in
the tool's output.
This option adds some setup work to each table and chunk. Although the work is
not intrusive for MySQL, it results in more round-trips to the server, which
consumes time. Making chunks too small will cause the overhead to become
relatively larger. It is therefore recommended that you not make chunks too
small, because the tool may take a very long time to complete if you do.
=item --[no]check-replication-filters
default: yes; group: Safety
@@ -8171,12 +8302,24 @@ when using this option; a poor choice of index could cause bad performance.
This is probably best to use when you are checksumming only a single table, not
an entire server.
This option supports a special syntax to select a prefix of the index instead of
the entire index. The syntax is NAME:N, where NAME is the name of the index, and
N is the number of columns you wish to use. This works only for compound
indexes, and is useful in cases where a bug in the MySQL query optimizer
(planner) causes it to scan a large range of rows instead of using the index to
locate starting and ending points precisely. This problem sometimes occurs on
indexes with many columns, such as 4 or more. If this happens, the tool might
print a warning related to the L<"--[no]check-plan"> option. Instructing
the tool to use only the first N columns from the index is a workaround for
the bug in some cases.
=item --chunk-size
type: size; default: 1000
Number of rows to select for each checksum query. Allowable suffixes are
k, M, G.
k, M, G. You should not use this option in most cases; prefer L<"--chunk-time">
instead.
This option can override the default behavior, which is to adjust chunk size
dynamically to try to make chunks run in exactly L<"--chunk-time"> seconds.
@@ -8192,6 +8335,9 @@ clause that matches only 1,000 of the values, and that chunk will be at least
10,000 rows large. Such a chunk will probably be skipped because of
L<"--chunk-size-limit">.
Selecting a small chunk size will cause the tool to become much slower, in part
because of the setup work required for L<"--[no]check-plan">.
=item --chunk-size-limit
type: float; default: 2.0; group: Safety

View File

@@ -126,6 +126,7 @@ sub new {
%args,
tbl_struct => $tbl->{tbl_struct},
index => $index,
n_index_cols => $args{n_chunk_index_cols},
cols => \@cols,
asc_only => 1,
);
@@ -235,6 +236,7 @@ sub new {
last_ub_sql => $last_ub_sql,
ub_sql => $ub_sql,
nibble_sql => $nibble_sql,
explain_first_lb_sql => "EXPLAIN $first_lb_sql",
explain_ub_sql => "EXPLAIN $ub_sql",
explain_nibble_sql => $explain_nibble_sql,
resume_lb_sql => $resume_lb_sql,
@@ -357,6 +359,7 @@ sub nibble_index {
sub statements {
my ($self) = @_;
return {
explain_first_lower_boundary => $self->{explain_first_lb_sth},
nibble => $self->{nibble_sth},
explain_nibble => $self->{explain_nibble_sth},
upper_boundary => $self->{ub_sth},
@@ -613,6 +616,7 @@ sub _prepare_sths {
$self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql});
if ( !$self->{one_nibble} ) {
$self->{explain_first_lb_sth} = $dbh->prepare($self->{explain_first_lb_sql});
$self->{ub_sth} = $dbh->prepare($self->{ub_sql});
$self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql});
}

View File

@@ -41,6 +41,7 @@ sub new {
# * tbl_struct Hashref returned from TableParser::parse().
# * cols Arrayref of columns to SELECT from the table
# * index Which index to ascend; optional.
# * n_index_cols The number of left-most index columns to use.
# * asc_only Whether to ascend strictly, that is, the WHERE clause for
# the asc_stmt will fetch the next row > the given arguments.
# The option is to fetch the row >=, which could loop
@@ -77,8 +78,12 @@ sub generate_asc_stmt {
# These are the columns we'll ascend.
my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
if ( $args{asc_first} ) {
@asc_cols = $asc_cols[0];
PTDEBUG && _d('Ascending only first column');
@asc_cols = $asc_cols[0];
}
elsif ( my $n = $args{n_index_cols} ) {
PTDEBUG && _d('Ascending only first', $n, 'columns');
@asc_cols = @asc_cols[0..($n-1)];
}
PTDEBUG && _d('Will ascend columns', join(', ', @asc_cols));

View File

@@ -9,7 +9,7 @@ BEGIN {
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More tests => 24;
use Test::More tests => 25;
use TableParser;
use TableNibbler;
@@ -297,6 +297,34 @@ is_deeply(
'Alternate index with asc_first on sakila.rental',
);
is_deeply(
$n->generate_asc_stmt(
tbl_struct => $t,
cols => $t->{cols},
index => 'rental_date',
n_index_cols => 2,
),
{
cols => [qw(rental_id rental_date inventory_id customer_id
return_date staff_id last_update)],
index => 'rental_date',
where => '((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?))',
slice => [qw(1 1 2)],
scols => [qw(rental_date rental_date inventory_id)],
boundaries => {
'<' =>
'((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` < ?))',
'<=' =>
'((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` <= ?))',
'>' =>
'((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` > ?))',
'>=' =>
'((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?))'
},
},
'Use only N left-most columns of the index',
);
is_deeply(
$n->generate_asc_stmt(
tbl_struct => $t,

View File

@@ -25,7 +25,7 @@ if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
else {
plan tests => 12;
plan tests => 14;
}
# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic
@@ -156,6 +156,50 @@ ok(
"Smarter chunk index selection (bug 978432)"
);
# #############################################################################
# PK but bad explain plan.
# https://bugs.launchpad.net/percona-toolkit/+bug/1010232
# #############################################################################
$sb->load_file('master', "t/pt-table-checksum/samples/bad-plan-bug-1010232.sql");
PerconaTest::wait_for_table($dbh, "bad_plan.t", "(c1,c2,c3,c4)=(1,1,2,100)");
$output = output(sub {
$exit_status = pt_table_checksum::main(
$master_dsn, '--max-load', '',
qw(--lock-wait-timeout 3 --chunk-size 10 -t bad_plan.t)
) },
stderr => 1,
);
is(
$exit_status,
0,
"Bad key_len chunks are not errors"
);
cmp_ok(
PerconaTest::count_checksum_results($output, 'skipped'),
'>',
1,
"Skipped bad key_len chunks"
);
# Use --chunk-index:3 to use only the first 3 left-most columns of the index.
# Can't use bad_plan.t, however, because its row are almost all identical,
# so using 3 of 4 pk cols creates an infinite loop.
ok(
no_diff(
sub {
pt_table_checksum::main(
$master_dsn, '--max-load', '',
qw(--lock-wait-timeout 3 --chunk-size 5000 -t sakila.rental),
qw(--chunk-index rental_date:2 --explain --explain));
},
"t/pt-table-checksum/samples/n-chunk-index-cols.txt",
),
"--chunk-index index:n"
);
# #############################################################################
# Done.
# #############################################################################

View File

@@ -0,0 +1,17 @@
DROP DATABASE IF EXISTS bad_plan;
CREATE DATABASE bad_plan;
USE bad_plan;
CREATE TABLE t (
`c1` smallint(5) unsigned NOT NULL,
`c2` mediumint(8) unsigned NOT NULL DEFAULT '0',
`c3` smallint(5) unsigned NOT NULL DEFAULT '0',
`c4` smallint(5) unsigned NOT NULL DEFAULT '0',
PRIMARY KEY (`c1`,`c2`,`c3`,`c4`)
) ENGINE=InnoDB;
INSERT INTO t VALUES
(1,1,1,1),(1,1,1,2),(1,1,1,3),(1,1,1,4),(1,1,1,5),(1,1,1,6),(1,1,1,7),(1,1,1,8),(1,1,1,9),(1,1,1,10),(1,1,1,11),(1,1,1,12),(1,1,1,13),(1,1,1,14),(1,1,1,15),(1,1,1,16),(1,1,1,17),(1,1,1,18),(1,1,1,19),(1,1,1,20),(1,1,1,21),(1,1,1,22),(1,1,1,23),(1,1,1,24),(1,1,1,25),(1,1,1,26),(1,1,1,27),(1,1,1,28),(1,1,1,29),(1,1,1,30),(1,1,1,31),(1,1,1,32),(1,1,1,33),(1,1,1,34),(1,1,1,35),(1,1,1,36),(1,1,1,37),(1,1,1,38),(1,1,1,39),(1,1,1,40),(1,1,1,41),(1,1,1,42),(1,1,1,43),(1,1,1,44),(1,1,1,45),(1,1,1,46),(1,1,1,47),(1,1,1,48),(1,1,1,49),(1,1,1,50),(1,1,1,51),(1,1,1,52),(1,1,1,53),(1,1,1,54),(1,1,1,55),(1,1,1,56),(1,1,1,57),(1,1,1,58),(1,1,1,59),(1,1,1,60),(1,1,1,61),(1,1,1,62),(1,1,1,63),(1,1,1,64),(1,1,1,65),(1,1,1,66),(1,1,1,67),(1,1,1,68),(1,1,1,69),(1,1,1,70),(1,1,1,71),(1,1,1,72),(1,1,1,73),(1,1,1,74),(1,1,1,75),(1,1,1,76),(1,1,1,77),(1,1,1,78),(1,1,1,79),(1,1,1,80),(1,1,1,81),(1,1,1,82),(1,1,1,83),(1,1,1,84),(1,1,1,85),(1,1,1,86),(1,1,1,87),(1,1,1,88),(1,1,1,89),(1,1,1,90),(1,1,1,91),(1,1,1,92),(1,1,1,93),(1,1,1,94),(1,1,1,95),(1,1,1,96),(1,1,1,97),(1,1,1,98),(1,1,1,99),(1,1,1,100),
(1,1,2,1),(1,1,2,2),(1,1,2,3),(1,1,2,4),(1,1,2,5),(1,1,2,6),(1,1,2,7),(1,1,2,8),(1,1,2,9),(1,1,2,10),(1,1,2,11),(1,1,2,12),(1,1,2,13),(1,1,2,14),(1,1,2,15),(1,1,2,16),(1,1,2,17),(1,1,2,18),(1,1,2,19),(1,1,2,20),(1,1,2,21),(1,1,2,22),(1,1,2,23),(1,1,2,24),(1,1,2,25),(1,1,2,26),(1,1,2,27),(1,1,2,28),(1,1,2,29),(1,1,2,30),(1,1,2,31),(1,1,2,32),(1,1,2,33),(1,1,2,34),(1,1,2,35),(1,1,2,36),(1,1,2,37),(1,1,2,38),(1,1,2,39),(1,1,2,40),(1,1,2,41),(1,1,2,42),(1,1,2,43),(1,1,2,44),(1,1,2,45),(1,1,2,46),(1,1,2,47),(1,1,2,48),(1,1,2,49),(1,1,2,50),(1,1,2,51),(1,1,2,52),(1,1,2,53),(1,1,2,54),(1,1,2,55),(1,1,2,56),(1,1,2,57),(1,1,2,58),(1,1,2,59),(1,1,2,60),(1,1,2,61),(1,1,2,62),(1,1,2,63),(1,1,2,64),(1,1,2,65),(1,1,2,66),(1,1,2,67),(1,1,2,68),(1,1,2,69),(1,1,2,70),(1,1,2,71),(1,1,2,72),(1,1,2,73),(1,1,2,74),(1,1,2,75),(1,1,2,76),(1,1,2,77),(1,1,2,78),(1,1,2,79),(1,1,2,80),(1,1,2,81),(1,1,2,82),(1,1,2,83),(1,1,2,84),(1,1,2,85),(1,1,2,86),(1,1,2,87),(1,1,2,88),(1,1,2,89),(1,1,2,90),(1,1,2,91),(1,1,2,92),(1,1,2,93),(1,1,2,94),(1,1,2,95),(1,1,2,96),(1,1,2,97),(1,1,2,98),(1,1,2,99),(1,1,2,100);
ANALYZE TABLE bad_plan.t;

View File

@@ -0,0 +1,19 @@
--
-- sakila.rental
--
REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `rental_id`, `rental_date`, `inventory_id`, `customer_id`, `return_date`, `staff_id`, `last_update` + 0, CONCAT(ISNULL(`return_date`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?)) AND ((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` <= ?)) /*checksum chunk*/
REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*), '0' FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` < ?) OR (`rental_date` = ? AND `inventory_id` < ?)) ORDER BY `rental_date`, `inventory_id`, `customer_id` /*past lower chunk*/
REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*), '0' FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` > ?)) ORDER BY `rental_date`, `inventory_id`, `customer_id` /*past upper chunk*/
SELECT /*!40001 SQL_NO_CACHE */ `rental_date`, `rental_date`, `inventory_id` FROM `sakila`.`rental` FORCE INDEX(`rental_date`) WHERE ((`rental_date` > ?) OR (`rental_date` = ? AND `inventory_id` >= ?)) ORDER BY `rental_date`, `inventory_id`, `customer_id` LIMIT ?, 2 /*next chunk boundary*/
1 2005-05-24 22:53:30,2005-05-24 22:53:30,367 2005-07-09 01:17:08,2005-07-09 01:17:08,2388
2 2005-07-09 01:19:03,2005-07-09 01:19:03,3438 2005-07-31 17:51:23,2005-07-31 17:51:23,2396
3 2005-07-31 17:53:51,2005-07-31 17:53:51,928 2005-08-22 13:58:23,2005-08-22 13:58:23,2553
4 2005-08-22 13:59:19,2005-08-22 13:59:19,3704 2006-02-14 15:16:03,2006-02-14 15:16:03,4568
5 2005-05-24 22:53:30,2005-05-24 22:53:30,367
6 2006-02-14 15:16:03,2006-02-14 15:16:03,4568