mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-25 05:44:59 +00:00
Implement --chunk-index name:n in pt-osc. Fix pod in pt-table-checksum.
This commit is contained in:
@@ -5127,6 +5127,22 @@ sub main {
|
||||
}
|
||||
}
|
||||
|
||||
# Parse --chunk-index INDEX:N where N is the number of
|
||||
# left-most columns of INDEX to use.
|
||||
# https://bugs.launchpad.net/percona-toolkit/+bug/1010232
|
||||
my ($chunk_index, $n_chunk_index_cols)
|
||||
= split(':', $o->get('chunk-index') || '');
|
||||
if ( defined $chunk_index && !$chunk_index ) {
|
||||
$o->save_error('--chunk-index cannot be an empty string');
|
||||
}
|
||||
if ( defined $n_chunk_index_cols
|
||||
&& (!$n_chunk_index_cols
|
||||
|| $n_chunk_index_cols =~ m/[^\d]/
|
||||
|| $n_chunk_index_cols < 1) ) {
|
||||
$o->save_error('Invalid number of --chunk-index columns: '
|
||||
. $n_chunk_index_cols);
|
||||
}
|
||||
|
||||
if ( !$o->get('help') ) {
|
||||
if ( @ARGV ) {
|
||||
$o->save_error('Specify only one DSN on the command line');
|
||||
@@ -5741,6 +5757,7 @@ sub main {
|
||||
my (%args) = @_;
|
||||
my $tbl = $args{tbl};
|
||||
my $nibble_iter = $args{NibbleIterator};
|
||||
my $statements = $nibble_iter->statements();
|
||||
|
||||
if ( $o->get('dry-run') ) {
|
||||
print "Not copying rows because this is a dry run.\n";
|
||||
@@ -5752,7 +5769,6 @@ sub main {
|
||||
|
||||
if ( $o->get('print') ) {
|
||||
# Print the checksum and next boundary statements.
|
||||
my $statements = $nibble_iter->statements();
|
||||
foreach my $sth ( sort keys %$statements ) {
|
||||
next if $sth =~ m/^explain/;
|
||||
if ( $statements->{$sth} ) {
|
||||
@@ -5796,6 +5812,30 @@ sub main {
|
||||
die $msg;
|
||||
}
|
||||
}
|
||||
else { # chunking the table
|
||||
if ( $o->get('check-plan') ) {
|
||||
my $expl = explain_statement(
|
||||
sth => $statements->{explain_first_lower_boundary},
|
||||
tbl => $tbl,
|
||||
vals => [],
|
||||
);
|
||||
if ( !$expl->{key}
|
||||
|| lc($expl->{key}) ne lc($nibble_iter->nibble_index())
|
||||
|| !$expl->{key_len} ) {
|
||||
# XXX this message doesn't give good info if key_len is
|
||||
# NULL. We need an elsif() for that, instead of lumping it
|
||||
# into this if().
|
||||
die "Cannot determine the key_len of the chunk index "
|
||||
. "because MySQL chose "
|
||||
. ($expl->{key} ? "the $expl->{key}" : "no") . " index "
|
||||
. "instead of the " . $nibble_iter->nibble_index()
|
||||
. " index for the first lower boundary statement. "
|
||||
. "See --[no]check-plan in the documentation for more "
|
||||
. "information.";
|
||||
}
|
||||
$tbl->{key_len} = $expl->{key_len}
|
||||
}
|
||||
}
|
||||
|
||||
return 1; # continue nibbling table
|
||||
},
|
||||
@@ -5855,58 +5895,11 @@ sub main {
|
||||
# Count every chunk, even if it's ultimately skipped, etc.
|
||||
$tbl->{results}->{n_chunks}++;
|
||||
|
||||
# If the table is being chunk (i.e., it's not small enough to be
|
||||
# consumed by one nibble), then check index usage and chunk size.
|
||||
if ( !$nibble_iter->one_nibble() ) {
|
||||
my $expl = explain_statement(
|
||||
tbl => $tbl,
|
||||
sth => $sth->{explain_nibble},
|
||||
vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
|
||||
);
|
||||
|
||||
# Ensure that MySQL is using the chunk index.
|
||||
if ( lc($expl->{key} || '')
|
||||
ne lc($nibble_iter->nibble_index() || '') ) {
|
||||
my $msg
|
||||
= "Aborting copying table $tbl->{name} at chunk "
|
||||
. $nibble_iter->nibble_number()
|
||||
. " because it is not safe to chunk. Chunking should "
|
||||
. "use the "
|
||||
. ($nibble_iter->nibble_index() || '?')
|
||||
. " index, but MySQL EXPLAIN reports that "
|
||||
. ($expl->{key} ? "the $expl->{key}" : "no")
|
||||
. " index will be used for "
|
||||
. $sth->{explain_nibble}->{Statement}
|
||||
. " with values "
|
||||
. join(", ", map { defined $_ ? $_ : "NULL" }
|
||||
(@{$boundary->{lower}}, @{$boundary->{upper}}))
|
||||
. "\n";
|
||||
die $msg;
|
||||
}
|
||||
|
||||
# Check chunk size limit if the upper boundary and next lower
|
||||
# boundary are identical.
|
||||
if ( $limit ) {
|
||||
my $boundary = $nibble_iter->boundaries();
|
||||
my $oversize_chunk
|
||||
= $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
|
||||
: 0;
|
||||
if ( $oversize_chunk
|
||||
&& $nibble_iter->identical_boundaries(
|
||||
$boundary->{upper}, $boundary->{next_lower}) )
|
||||
{
|
||||
my $msg
|
||||
= "Aborting copying table $tbl->{name} at chunk "
|
||||
. $nibble_iter->nibble_number()
|
||||
. " because the chunk is too large: MySQL estimates "
|
||||
. ($expl->{rows} || 0) . "rows. The current chunk "
|
||||
. "size limit is " . ($tbl->{chunk_size} * $limit)
|
||||
. " rows (chunk size=$tbl->{chunk_size}"
|
||||
. " * chunk size limit=$limit).\n";
|
||||
die $msg;
|
||||
}
|
||||
}
|
||||
}
|
||||
# Die unless the nibble is safe.
|
||||
nibble_is_safe(
|
||||
%args,
|
||||
OptionParser => $o,
|
||||
);
|
||||
|
||||
# Exec and time the chunk checksum query.
|
||||
$tbl->{nibble_time} = exec_nibble(
|
||||
@@ -6009,18 +6002,19 @@ sub main {
|
||||
# This won't (shouldn't) fail because we already verified in
|
||||
# check_orig_table() table we can NibbleIterator::can_nibble().
|
||||
my $nibble_iter = new NibbleIterator(
|
||||
Cxn => $cxn,
|
||||
tbl => $orig_tbl,
|
||||
chunk_size => $orig_tbl->{chunk_size},
|
||||
chunk_index => $o->get('chunk-index'),
|
||||
dml => $dml,
|
||||
select => $select,
|
||||
callbacks => $callbacks,
|
||||
OptionParser => $o,
|
||||
Quoter => $q,
|
||||
TableParser => $tp,
|
||||
TableNibbler => new TableNibbler(TableParser => $tp, Quoter => $q),
|
||||
comments => {
|
||||
Cxn => $cxn,
|
||||
tbl => $orig_tbl,
|
||||
chunk_size => $orig_tbl->{chunk_size},
|
||||
chunk_index => $chunk_index,
|
||||
n_chunk_index_cols => $n_chunk_index_cols,
|
||||
dml => $dml,
|
||||
select => $select,
|
||||
callbacks => $callbacks,
|
||||
OptionParser => $o,
|
||||
Quoter => $q,
|
||||
TableParser => $tp,
|
||||
TableNibbler => new TableNibbler(TableParser => $tp, Quoter => $q),
|
||||
comments => {
|
||||
bite => "pt-online-schema-change $PID copy table",
|
||||
nibble => "pt-online-schema-change $PID copy nibble",
|
||||
},
|
||||
@@ -6210,6 +6204,82 @@ sub main {
|
||||
# ############################################################################
|
||||
# Subroutines.
|
||||
# ############################################################################
|
||||
|
||||
sub nibble_is_safe {
|
||||
my (%args) = @_;
|
||||
my @required_args = qw(Cxn tbl NibbleIterator OptionParser);
|
||||
foreach my $arg ( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
my ($cxn, $tbl, $nibble_iter, $o)= @args{@required_args};
|
||||
|
||||
# EXPLAIN the checksum chunk query to get its row estimate and index.
|
||||
# XXX This call and others like it are relying on a Perl oddity.
|
||||
# See https://bugs.launchpad.net/percona-toolkit/+bug/987393
|
||||
my $sth = $nibble_iter->statements();
|
||||
my $boundary = $nibble_iter->boundaries();
|
||||
my $expl = explain_statement(
|
||||
tbl => $tbl,
|
||||
sth => $sth->{explain_nibble},
|
||||
vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
|
||||
);
|
||||
|
||||
# Ensure that MySQL is using the chunk index if the table is being chunked.
|
||||
if ( !$nibble_iter->one_nibble()
|
||||
&& lc($expl->{key} || '') ne lc($nibble_iter->nibble_index() || '') ) {
|
||||
if ( !$tbl->{warned}->{not_using_chunk_index}++
|
||||
&& $o->get('quiet') < 2 ) {
|
||||
die "Error copying rows at chunk " . $nibble_iter->nibble_number()
|
||||
. " of $tbl->{db}.$tbl->{tbl} because MySQL chose "
|
||||
. ($expl->{key} ? "the $expl->{key}" : "no") . " index "
|
||||
. " instead of the " . $nibble_iter->nibble_index() . "index.\n";
|
||||
}
|
||||
}
|
||||
|
||||
# Ensure that the chunk isn't too large if there's a --chunk-size-limit.
|
||||
# If single-chunking the table, this has already been checked, so it
|
||||
# shouldn't have changed. If chunking the table with a non-unique key,
|
||||
# oversize chunks are possible.
|
||||
if ( my $limit = $o->get('chunk-size-limit') ) {
|
||||
my $oversize_chunk
|
||||
= $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
|
||||
: 0;
|
||||
if ( $oversize_chunk
|
||||
&& $nibble_iter->identical_boundaries($boundary->{upper},
|
||||
$boundary->{next_lower}) ) {
|
||||
if ( !$tbl->{warned}->{oversize_chunk}++
|
||||
&& $o->get('quiet') < 2 ) {
|
||||
die "Error copying rows at chunk " . $nibble_iter->nibble_number()
|
||||
. " of $tbl->{db}.$tbl->{tbl} because it is oversized. "
|
||||
. "The current chunk size limit is "
|
||||
. ($tbl->{chunk_size} * $limit)
|
||||
. " rows (chunk size=$tbl->{chunk_size}"
|
||||
. " * chunk size limit=$limit), but MySQL estimates "
|
||||
. "that there are " . ($expl->{rows} || 0)
|
||||
. " rows in the chunk.\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# Ensure that MySQL is still using the entire index.
|
||||
# https://bugs.launchpad.net/percona-toolkit/+bug/1010232
|
||||
if ( !$nibble_iter->one_nibble()
|
||||
&& $tbl->{key_len}
|
||||
&& ($expl->{key_len} || 0) < $tbl->{key_len} ) {
|
||||
if ( !$tbl->{warned}->{key_len}++
|
||||
&& $o->get('quiet') < 2 ) {
|
||||
die "Error copying rows at chunk " . $nibble_iter->nibble_number()
|
||||
. " of $tbl->{db}.$tbl->{tbl} because MySQL used "
|
||||
. "only " . ($expl->{key_len} || 0) . " bytes "
|
||||
. "of the " . ($expl->{key} || '?') . " index instead of "
|
||||
. $tbl->{key_len} . ". See the --[no]check-plan documentation "
|
||||
. "for more information.\n";
|
||||
}
|
||||
}
|
||||
|
||||
return 1; # safe
|
||||
}
|
||||
|
||||
sub create_new_table{
|
||||
my (%args) = @_;
|
||||
my @required_args = qw(orig_tbl Cxn Quoter OptionParser TableParser);
|
||||
@@ -7209,6 +7279,38 @@ type: time; default: 1
|
||||
|
||||
Sleep time between checks for L<"--max-lag">.
|
||||
|
||||
=item --[no]check-plan
|
||||
|
||||
default: yes
|
||||
|
||||
Check query execution plans for safety. By default, this option causes
|
||||
the tool to run EXPLAIN before running queries that are meant to access
|
||||
a small amount of data, but which could access many rows if MySQL chooses a bad
|
||||
execution plan. These include the queries to determine chunk boundaries and the
|
||||
chunk queries themselves. If it appears that MySQL will use a bad query
|
||||
execution plan, the tool will skip the table or the chunk of the table.
|
||||
|
||||
The tool uses several heuristics to determine whether an execution plan is bad.
|
||||
The first is whether EXPLAIN reports that MySQL intends to use the desired index
|
||||
to access the rows. If MySQL chooses a different index, the tool considers the
|
||||
query unsafe.
|
||||
|
||||
The tool also checks how much of the index MySQL reports that it will use for
|
||||
the query. The EXPLAIN output shows this in the key_len column. The tool
|
||||
remembers the largest key_len seen, and skips chunks where MySQL reports that it
|
||||
will use a smaller prefix of the index. This heuristic can be understood as
|
||||
skipping chunks that have a worse execution plan than other chunks.
|
||||
|
||||
The tool prints a warning the first time a chunk is skipped due to a bad execution
|
||||
plan in each table. Subsequent chunks are skipped silently, although you can see
|
||||
the count of skipped chunks in the SKIPPED column in the tool's output.
|
||||
|
||||
This option adds some setup work to each table and chunk. Although the work is
|
||||
not intrusive for MySQL, it results in more round-trips to the server, which
|
||||
consumes time. Making chunks too small will cause the overhead to become
|
||||
relatively larger. It is therefore recommended that you not make chunks too
|
||||
small, because the tool may take a very long time to complete if you do.
|
||||
|
||||
=item --[no]check-replication-filters
|
||||
|
||||
default: yes
|
||||
@@ -7245,6 +7347,17 @@ behavior of choosing an index. The tool adds the index to the SQL statements in
|
||||
a C<FORCE INDEX> clause. Be careful when using this option; a poor choice of
|
||||
index could cause bad performance.
|
||||
|
||||
This option supports a special syntax to select a prefix of the index instead of
|
||||
the entire index. The syntax is NAME:N, where NAME is the name of the index, and
|
||||
N is the number of columns you wish to use. This works only for compound
|
||||
indexes, and is useful in cases where a bug in the MySQL query optimizer
|
||||
(planner) causes it to scan a large range of rows instead of using the index to
|
||||
locate starting and ending points precisely. This problem sometimes occurs on
|
||||
indexes with many columns, such as 4 or more. If this happens, the tool might
|
||||
print a warning related to the L<"--[no]check-plan"> option. Instructing
|
||||
the tool to use only the first N columns from the index is a workaround for
|
||||
the bug in some cases.
|
||||
|
||||
=item --chunk-size
|
||||
|
||||
type: size; default: 1000
|
||||
|
@@ -8184,9 +8184,9 @@ indexes, and is useful in cases where a bug in the MySQL query optimizer
|
||||
(planner) causes it to scan a large range of rows instead of using the index to
|
||||
locate starting and ending points precisely. This problem sometimes occurs on
|
||||
indexes with many columns, such as 4 or more. If this happens, the tool might
|
||||
print a warning related to the L<"--check-plan"> option. Instructing the tool to
|
||||
use only the first N columns from the index is a workaround for the bug in some
|
||||
cases.
|
||||
print a warning related to the L<"--[no]check-plan"> option. Instructing
|
||||
the tool to use only the first N columns from the index is a workaround for
|
||||
the bug in some cases.
|
||||
|
||||
=item --chunk-size
|
||||
|
||||
@@ -8211,7 +8211,7 @@ clause that matches only 1,000 of the values, and that chunk will be at least
|
||||
L<"--chunk-size-limit">.
|
||||
|
||||
Selecting a small chunk size will cause the tool to become much slower, in part
|
||||
because of the setup work required for L<"--[no]-check-plan">.
|
||||
because of the setup work required for L<"--[no]check-plan">.
|
||||
|
||||
=item --chunk-size-limit
|
||||
|
||||
|
Reference in New Issue
Block a user