Implement IndexLength in pt-table-checksum and pt-online-schema-change. Tag the IndexLength queries with "key_len" and make n_index_cols optional--get full key_len if not given.

This commit is contained in:
Daniel Nichter
2012-06-11 12:54:26 -04:00
parent 4f3bdabf7c
commit 3f2295ec46
5 changed files with 377 additions and 42 deletions

View File

@@ -5127,6 +5127,158 @@ sub _d {
# End CleanupTask package # End CleanupTask package
# ########################################################################### # ###########################################################################
# ###########################################################################
# IndexLength package
# This package is a copy without comments from the original. The original
# with comments and its test file can be found in the Bazaar repository at,
# lib/IndexLength.pm
# t/lib/IndexLength.t
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################
{
package IndexLength;
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Quotekeys = 0;
sub new {
my ( $class, %args ) = @_;
my @required_args = qw(Quoter);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my $self = {
Quoter => $args{Quoter},
};
return bless $self, $class;
}
sub index_length {
my ($self, %args) = @_;
my @required_args = qw(Cxn tbl index);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($cxn) = @args{@required_args};
die "The tbl argument does not have a tbl_struct"
unless exists $args{tbl}->{tbl_struct};
die "Index $args{index} does not exist in table $args{tbl}->{name}"
unless $args{tbl}->{tbl_struct}->{keys}->{$args{index}};
my $index_struct = $args{tbl}->{tbl_struct}->{keys}->{$args{index}};
my $index_cols = $index_struct->{cols};
my $n_index_cols = $args{n_index_cols};
if ( !$n_index_cols || $n_index_cols > @$index_cols ) {
$n_index_cols = scalar @$index_cols;
}
my $vals = $self->_get_first_values(
%args,
n_index_cols => $n_index_cols,
);
my $sql = $self->_make_range_query(
%args,
n_index_cols => $n_index_cols,
vals => $vals,
);
my $sth = $cxn->dbh()->prepare($sql);
PTDEBUG && _d($sth->{Statement}, 'params:', @$vals);
$sth->execute(@$vals);
my $row = $sth->fetchrow_hashref();
$sth->finish();
PTDEBUG && _d('Range scan:', Dumper($row));
return $row->{key_len}, $row->{key};
}
sub _get_first_values {
my ($self, %args) = @_;
my @required_args = qw(Cxn tbl index n_index_cols);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($cxn, $tbl, $index, $n_index_cols) = @args{@required_args};
my $q = $self->{Quoter};
my $index_struct = $tbl->{tbl_struct}->{keys}->{$index};
my $index_cols = $index_struct->{cols};
my $index_columns = join (', ',
map { $q->quote($_) } @{$index_cols}[0..($n_index_cols - 1)]);
my @where;
foreach my $col ( @{$index_cols}[0..($n_index_cols - 1)] ) {
push @where, $q->quote($col) . " IS NOT NULL"
}
my $sql = "SELECT /*!40001 SQL_NO_CACHE */ $index_columns "
. "FROM $tbl->{name} FORCE INDEX (" . $q->quote($index) . ") "
. "WHERE " . join(' AND ', @where)
. " ORDER BY $index_columns "
. "LIMIT 1 /*key_len*/"; # only need 1 row
PTDEBUG && _d($sql);
my $vals = $cxn->dbh()->selectrow_arrayref($sql);
return $vals;
}
sub _make_range_query {
my ($self, %args) = @_;
my @required_args = qw(tbl index n_index_cols vals);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($tbl, $index, $n_index_cols, $vals) = @args{@required_args};
my $q = $self->{Quoter};
my $index_struct = $tbl->{tbl_struct}->{keys}->{$index};
my $index_cols = $index_struct->{cols};
my @where;
if ( $n_index_cols > 1 ) {
foreach my $n ( 0..($n_index_cols - 2) ) {
my $col = $index_cols->[$n];
my $val = $vals->[$n];
push @where, $q->quote($col) . " = ?";
}
}
my $col = $index_cols->[$n_index_cols - 1];
my $val = $vals->[-1]; # should only be as many vals as cols
push @where, $q->quote($col) . " >= ?";
my $sql = "EXPLAIN SELECT /*!40001 SQL_NO_CACHE */ * "
. "FROM $tbl->{name} FORCE INDEX (" . $q->quote($index) . ") "
. "WHERE " . join(' AND ', @where)
. " /*key_len*/";
return $sql;
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;
}
# ###########################################################################
# End IndexLength package
# ###########################################################################
# ########################################################################### # ###########################################################################
# This is a combination of modules and programs in one -- a runnable module. # This is a combination of modules and programs in one -- a runnable module.
# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last # http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last
@@ -5908,30 +6060,30 @@ sub main {
} }
else { # chunking the table else { # chunking the table
if ( $o->get('check-plan') ) { if ( $o->get('check-plan') ) {
my $expl = explain_statement( my $idx_len = new IndexLength(Quoter => $q);
sth => $statements->{explain_first_lower_boundary}, my ($key_len, $key) = $idx_len->index_length(
tbl => $tbl, Cxn => $args{Cxn},
vals => [], tbl => $tbl,
index => $nibble_iter->nibble_index(),
n_index_cols => $o->get('chunk-index-columns'),
); );
if ( !$expl->{key} if ( !$key || lc($key) ne lc($nibble_iter->nibble_index()) ) {
|| lc($expl->{key}) ne lc($nibble_iter->nibble_index()) )
{
die "Cannot determine the key_len of the chunk index " die "Cannot determine the key_len of the chunk index "
. "because MySQL chose " . "because MySQL chose "
. ($expl->{key} ? "the $expl->{key}" : "no") . " index " . ($key ? "the $key" : "no") . " index "
. "instead of the " . $nibble_iter->nibble_index() . "instead of the " . $nibble_iter->nibble_index()
. " index for the first lower boundary statement. " . " index for the first lower boundary statement. "
. "See --[no]check-plan in the documentation for more " . "See --[no]check-plan in the documentation for more "
. "information."; . "information.";
} }
elsif ( !$expl->{key_len} ) { elsif ( !$key_len ) {
die "The key_len of the $expl->{key} index is " die "The key_len of the $key index is "
. (defined $expl->{key_len} ? "zero" : "NULL") . (defined $key_len ? "zero" : "NULL")
. ", but this should not be possible. " . ", but this should not be possible. "
. "See --[no]check-plan in the documentation for more " . "See --[no]check-plan in the documentation for more "
. "information."; . "information.";
} }
$tbl->{key_len} = $expl->{key_len}; $tbl->{key_len} = $key_len;
} }
} }

View File

@@ -6080,6 +6080,158 @@ sub _d {
# End WeightedAvgRate package # End WeightedAvgRate package
# ########################################################################### # ###########################################################################
# ###########################################################################
# IndexLength package
# This package is a copy without comments from the original. The original
# with comments and its test file can be found in the Bazaar repository at,
# lib/IndexLength.pm
# t/lib/IndexLength.t
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################
{
package IndexLength;
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Quotekeys = 0;
sub new {
my ( $class, %args ) = @_;
my @required_args = qw(Quoter);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my $self = {
Quoter => $args{Quoter},
};
return bless $self, $class;
}
sub index_length {
my ($self, %args) = @_;
my @required_args = qw(Cxn tbl index);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($cxn) = @args{@required_args};
die "The tbl argument does not have a tbl_struct"
unless exists $args{tbl}->{tbl_struct};
die "Index $args{index} does not exist in table $args{tbl}->{name}"
unless $args{tbl}->{tbl_struct}->{keys}->{$args{index}};
my $index_struct = $args{tbl}->{tbl_struct}->{keys}->{$args{index}};
my $index_cols = $index_struct->{cols};
my $n_index_cols = $args{n_index_cols};
if ( !$n_index_cols || $n_index_cols > @$index_cols ) {
$n_index_cols = scalar @$index_cols;
}
my $vals = $self->_get_first_values(
%args,
n_index_cols => $n_index_cols,
);
my $sql = $self->_make_range_query(
%args,
n_index_cols => $n_index_cols,
vals => $vals,
);
my $sth = $cxn->dbh()->prepare($sql);
PTDEBUG && _d($sth->{Statement}, 'params:', @$vals);
$sth->execute(@$vals);
my $row = $sth->fetchrow_hashref();
$sth->finish();
PTDEBUG && _d('Range scan:', Dumper($row));
return $row->{key_len}, $row->{key};
}
sub _get_first_values {
my ($self, %args) = @_;
my @required_args = qw(Cxn tbl index n_index_cols);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($cxn, $tbl, $index, $n_index_cols) = @args{@required_args};
my $q = $self->{Quoter};
my $index_struct = $tbl->{tbl_struct}->{keys}->{$index};
my $index_cols = $index_struct->{cols};
my $index_columns = join (', ',
map { $q->quote($_) } @{$index_cols}[0..($n_index_cols - 1)]);
my @where;
foreach my $col ( @{$index_cols}[0..($n_index_cols - 1)] ) {
push @where, $q->quote($col) . " IS NOT NULL"
}
my $sql = "SELECT /*!40001 SQL_NO_CACHE */ $index_columns "
. "FROM $tbl->{name} FORCE INDEX (" . $q->quote($index) . ") "
. "WHERE " . join(' AND ', @where)
. " ORDER BY $index_columns "
. "LIMIT 1 /*key_len*/"; # only need 1 row
PTDEBUG && _d($sql);
my $vals = $cxn->dbh()->selectrow_arrayref($sql);
return $vals;
}
sub _make_range_query {
my ($self, %args) = @_;
my @required_args = qw(tbl index n_index_cols vals);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($tbl, $index, $n_index_cols, $vals) = @args{@required_args};
my $q = $self->{Quoter};
my $index_struct = $tbl->{tbl_struct}->{keys}->{$index};
my $index_cols = $index_struct->{cols};
my @where;
if ( $n_index_cols > 1 ) {
foreach my $n ( 0..($n_index_cols - 2) ) {
my $col = $index_cols->[$n];
my $val = $vals->[$n];
push @where, $q->quote($col) . " = ?";
}
}
my $col = $index_cols->[$n_index_cols - 1];
my $val = $vals->[-1]; # should only be as many vals as cols
push @where, $q->quote($col) . " >= ?";
my $sql = "EXPLAIN SELECT /*!40001 SQL_NO_CACHE */ * "
. "FROM $tbl->{name} FORCE INDEX (" . $q->quote($index) . ") "
. "WHERE " . join(' AND ', @where)
. " /*key_len*/";
return $sql;
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;
}
# ###########################################################################
# End IndexLength package
# ###########################################################################
# ########################################################################### # ###########################################################################
# This is a combination of modules and programs in one -- a runnable module. # This is a combination of modules and programs in one -- a runnable module.
# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last # http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last
@@ -6748,30 +6900,30 @@ sub main {
} }
else { # chunking the table else { # chunking the table
if ( $o->get('check-plan') ) { if ( $o->get('check-plan') ) {
my $expl = explain_statement( my $idx_len = new IndexLength(Quoter => $q);
sth => $statements->{explain_first_lower_boundary}, my ($key_len, $key) = $idx_len->index_length(
tbl => $tbl, Cxn => $args{Cxn},
vals => [], tbl => $tbl,
index => $nibble_iter->nibble_index(),
n_index_cols => $o->get('chunk-index-columns'),
); );
if ( !$expl->{key} if ( !$key || lc($key) ne lc($nibble_iter->nibble_index()) ) {
|| lc($expl->{key}) ne lc($nibble_iter->nibble_index()) )
{
die "Cannot determine the key_len of the chunk index " die "Cannot determine the key_len of the chunk index "
. "because MySQL chose " . "because MySQL chose "
. ($expl->{key} ? "the $expl->{key}" : "no") . " index " . ($key ? "the $key" : "no") . " index "
. "instead of the " . $nibble_iter->nibble_index() . "instead of the " . $nibble_iter->nibble_index()
. " index for the first lower boundary statement. " . " index for the first lower boundary statement. "
. "See --[no]check-plan in the documentation for more " . "See --[no]check-plan in the documentation for more "
. "information."; . "information.";
} }
elsif ( !$expl->{key_len} ) { elsif ( !$key_len ) {
die "The key_len of the $expl->{key} index is " die "The key_len of the $key index is "
. (defined $expl->{key_len} ? "zero" : "NULL") . (defined $key_len ? "zero" : "NULL")
. ", but this should not be possible. " . ", but this should not be possible. "
. "See --[no]check-plan in the documentation for more " . "See --[no]check-plan in the documentation for more "
. "information."; . "information.";
} }
$tbl->{key_len} = $expl->{key_len}; $tbl->{key_len} = $key_len;
} }
} }

View File

@@ -51,7 +51,7 @@ sub new {
# the first N left-most columns of the index. # the first N left-most columns of the index.
sub index_length { sub index_length {
my ($self, %args) = @_; my ($self, %args) = @_;
my @required_args = qw(Cxn tbl index n_index_cols); my @required_args = qw(Cxn tbl index);
foreach my $arg ( @required_args ) { foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg}; die "I need a $arg argument" unless $args{$arg};
} }
@@ -62,18 +62,32 @@ sub index_length {
die "Index $args{index} does not exist in table $args{tbl}->{name}" die "Index $args{index} does not exist in table $args{tbl}->{name}"
unless $args{tbl}->{tbl_struct}->{keys}->{$args{index}}; unless $args{tbl}->{tbl_struct}->{keys}->{$args{index}};
my $index_struct = $args{tbl}->{tbl_struct}->{keys}->{$args{index}};
my $index_cols = $index_struct->{cols};
my $n_index_cols = $args{n_index_cols};
if ( !$n_index_cols || $n_index_cols > @$index_cols ) {
$n_index_cols = scalar @$index_cols;
}
# Get the first row with non-NULL values. # Get the first row with non-NULL values.
my $vals = $self->_get_first_values(%args); my $vals = $self->_get_first_values(
%args,
n_index_cols => $n_index_cols,
);
# Make an EXPLAIN query to scan the range and execute it. # Make an EXPLAIN query to scan the range and execute it.
my $sql = $self->_make_range_query(%args, vals => $vals); my $sql = $self->_make_range_query(
%args,
n_index_cols => $n_index_cols,
vals => $vals,
);
my $sth = $cxn->dbh()->prepare($sql); my $sth = $cxn->dbh()->prepare($sql);
PTDEBUG && _d($sth->{Statement}, 'params:', @$vals); PTDEBUG && _d($sth->{Statement}, 'params:', @$vals);
$sth->execute(@$vals); $sth->execute(@$vals);
my $row = $sth->fetchrow_hashref(); my $row = $sth->fetchrow_hashref();
$sth->finish(); $sth->finish();
PTDEBUG && _d('Range scan:', Dumper($row)); PTDEBUG && _d('Range scan:', Dumper($row));
return $row->{key_len}; return $row->{key_len}, $row->{key};
} }
sub _get_first_values { sub _get_first_values {
@@ -86,11 +100,9 @@ sub _get_first_values {
my $q = $self->{Quoter}; my $q = $self->{Quoter};
my $index_struct = $tbl->{tbl_struct}->{keys}->{$index};
my $index_cols = $index_struct->{cols};
$n_index_cols = @$index_cols - 1 if $n_index_cols > @$index_cols;
# Select just the index columns. # Select just the index columns.
my $index_struct = $tbl->{tbl_struct}->{keys}->{$index};
my $index_cols = $index_struct->{cols};
my $index_columns = join (', ', my $index_columns = join (', ',
map { $q->quote($_) } @{$index_cols}[0..($n_index_cols - 1)]); map { $q->quote($_) } @{$index_cols}[0..($n_index_cols - 1)]);
@@ -104,7 +116,7 @@ sub _get_first_values {
. "FROM $tbl->{name} FORCE INDEX (" . $q->quote($index) . ") " . "FROM $tbl->{name} FORCE INDEX (" . $q->quote($index) . ") "
. "WHERE " . join(' AND ', @where) . "WHERE " . join(' AND ', @where)
. " ORDER BY $index_columns " . " ORDER BY $index_columns "
. "LIMIT 1"; # only need 1 row . "LIMIT 1 /*key_len*/"; # only need 1 row
PTDEBUG && _d($sql); PTDEBUG && _d($sql);
my $vals = $cxn->dbh()->selectrow_arrayref($sql); my $vals = $cxn->dbh()->selectrow_arrayref($sql);
return $vals; return $vals;
@@ -122,11 +134,12 @@ sub _make_range_query {
my $index_struct = $tbl->{tbl_struct}->{keys}->{$index}; my $index_struct = $tbl->{tbl_struct}->{keys}->{$index};
my $index_cols = $index_struct->{cols}; my $index_cols = $index_struct->{cols};
$n_index_cols = @$index_cols - 1 if $n_index_cols > @$index_cols;
# All but the last index col = val. # All but the last index col = val.
my @where; my @where;
if ( $n_index_cols > 1 ) { if ( $n_index_cols > 1 ) {
# -1 for zero-index array as usual, then -1 again because
# we don't want the last column; that's added below.
foreach my $n ( 0..($n_index_cols - 2) ) { foreach my $n ( 0..($n_index_cols - 2) ) {
my $col = $index_cols->[$n]; my $col = $index_cols->[$n];
my $val = $vals->[$n]; my $val = $vals->[$n];
@@ -137,12 +150,13 @@ sub _make_range_query {
# The last index col > val. This causes the range scan using just # The last index col > val. This causes the range scan using just
# the N left-most index columns. # the N left-most index columns.
my $col = $index_cols->[$n_index_cols - 1]; my $col = $index_cols->[$n_index_cols - 1];
my $val = $vals->[$n_index_cols - 1]; my $val = $vals->[-1]; # should only be as many vals as cols
push @where, $q->quote($col) . " >= ?"; push @where, $q->quote($col) . " >= ?";
my $sql = "EXPLAIN SELECT /*!40001 SQL_NO_CACHE */ * " my $sql = "EXPLAIN SELECT /*!40001 SQL_NO_CACHE */ * "
. "FROM $tbl->{name} FORCE INDEX (" . $q->quote($index) . ") " . "FROM $tbl->{name} FORCE INDEX (" . $q->quote($index) . ") "
. "WHERE " . join(' AND ', @where); . "WHERE " . join(' AND ', @where)
. " /*key_len*/";
return $sql; return $sql;
} }

View File

@@ -37,7 +37,7 @@ if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox master'; plan skip_all => 'Cannot connect to sandbox master';
} }
else { else {
plan tests => 6; plan tests => 7;
} }
my $output; my $output;
@@ -55,12 +55,12 @@ my $cxn = new Cxn(
sub test_index_len { sub test_index_len {
my (%args) = @_; my (%args) = @_;
my @required_args = qw(name tbl index n_index_cols len); my @required_args = qw(name tbl index len);
foreach my $arg ( @required_args ) { foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg}; die "I need a $arg argument" unless $args{$arg};
} }
my $len = $il->index_length( my ($len, $key) = $il->index_length(
Cxn => $cxn, Cxn => $cxn,
tbl => $args{tbl}, tbl => $args{tbl},
index => $args{index}, index => $args{index},
@@ -114,6 +114,19 @@ test_index_len(
len => 2, len => 2,
); );
# #############################################################################
# Use full index if no n_index_cols
# #############################################################################
# Use sakila.film_actor stuff from previous tests.
test_index_len(
name => "sakila.film_actor all cols = 4 bytes",
tbl => $tbl,
index => "PRIMARY",
len => 4,
);
# ############################################################################# # #############################################################################
# Done. # Done.
# ############################################################################# # #############################################################################

View File

@@ -175,7 +175,7 @@ is(
$exit_status, $exit_status,
0, 0,
"Bad key_len chunks are not errors" "Bad key_len chunks are not errors"
); ) or diag($output);
cmp_ok( cmp_ok(
PerconaTest::count_checksum_results($output, 'skipped'), PerconaTest::count_checksum_results($output, 'skipped'),
@@ -223,14 +223,18 @@ $output = output(
$exit_status = pt_table_checksum::main( $exit_status = pt_table_checksum::main(
$master_dsn, '--max-load', '', $master_dsn, '--max-load', '',
qw(--lock-wait-timeout 3 --chunk-size 1000 -t sakila.film_actor), qw(--lock-wait-timeout 3 --chunk-size 1000 -t sakila.film_actor),
qw(--chunk-index-columns 1), qw(--chunk-index-columns 1 --chunk-size-limit 3),
); );
}, },
stderr => 1, stderr => 1,
); );
is( # Since we're not using the full index, it's basically a non-unique index,
# so there are dupes. The table really has 5462 rows, so we must get
# at least that many, and probably a few more.
cmp_ok(
PerconaTest::count_checksum_results($output, 'rows'), PerconaTest::count_checksum_results($output, 'rows'),
'>=',
5462, 5462,
"Initial key_len reflects --chunk-index-columns" "Initial key_len reflects --chunk-index-columns"
) or diag($output); ) or diag($output);