mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-16 16:23:30 +00:00
Detect infinite loops. Use best non-unique index. Disable chunk size limit if chunk index is unique.
This commit is contained in:
@@ -5249,17 +5249,19 @@ sub main {
|
||||
|
||||
# Check if the chunk is too large. If yes, then return 0 to
|
||||
# skip this chunk and get fetch the next boundary.
|
||||
my $is_oversize = is_oversize_chunk(
|
||||
%args,
|
||||
chunk_size => $tbl->{chunk_size},
|
||||
chunk_size_limit => $o->get('chunk-size-limit'),
|
||||
);
|
||||
if ( $is_oversize ) {
|
||||
MKDEBUG && _d('Chunk', $args{nibbleno}, 'of table',
|
||||
"$tbl->{db}.$tbl->{tbl}", 'is too large');
|
||||
$tbl->{checksum_results}->{skipped}++;
|
||||
$tbl->{nibble_time} = 0;
|
||||
return 0; # next boundary
|
||||
if ( $tbl->{chunk_size_limit} ) {
|
||||
my $is_oversize = is_oversize_chunk(
|
||||
%args,
|
||||
chunk_size => $tbl->{chunk_size},
|
||||
limit => $tbl->{chunk_size_limit},
|
||||
);
|
||||
if ( $is_oversize ) {
|
||||
MKDEBUG && _d('Chunk', $args{nibbleno}, 'of table',
|
||||
"$tbl->{db}.$tbl->{tbl}", 'is too large');
|
||||
$tbl->{checksum_results}->{skipped}++;
|
||||
$tbl->{nibble_time} = 0;
|
||||
return 0; # next boundary
|
||||
}
|
||||
}
|
||||
|
||||
# Exec and time the chunk checksum query. If it fails, retry.
|
||||
@@ -5426,6 +5428,16 @@ sub main {
|
||||
TableNibbler => $tn,
|
||||
TableParser => $tp,
|
||||
);
|
||||
|
||||
my $chunk_index = $nibble_iter->nibble_index();
|
||||
if ( $tbl->{tbl_struct}->{keys}->{$chunk_index}->{is_unique} ) {
|
||||
MKDEBUG && _d('Disabling chunk size limit for table because',
|
||||
'chunk index', $chunk_index, 'is unique');
|
||||
$tbl->{chunk_size_limit} = 0;
|
||||
}
|
||||
else {
|
||||
$tbl->{chunk_size_limit} = $o->get('chunk-size-limit');
|
||||
}
|
||||
|
||||
# Finally, checksum the table.
|
||||
# The "1 while" loop is necessary because we're executing REPLACE
|
||||
@@ -5487,21 +5499,22 @@ sub exec_nibble {
|
||||
|
||||
my $lb_quoted = join(',', map { $q->quote_val($_) } @$lb);
|
||||
my $ub_quoted = join(',', map { $q->quote_val($_) } @$ub);
|
||||
my $chunk_idx = $$nibble_iter->nibble_index();
|
||||
|
||||
# Execute the REPLACE...SELECT checksum query.
|
||||
# MKDEBUG && _d($sth->{Statement}, 'params:',
|
||||
# );
|
||||
$sth->execute(
|
||||
# REPLACE INTO repl_table SELECT
|
||||
$tbl->{db}, # db
|
||||
$tbl->{tbl}, # tbl
|
||||
$args{nibbleno}, # chunk
|
||||
$nibble_iter->nibble_index(), # chunk_index
|
||||
$lb_quoted, # lower_boundary
|
||||
$ub_quoted, # upper_boundary
|
||||
$tbl->{db}, # db
|
||||
$tbl->{tbl}, # tbl
|
||||
$args{nibbleno}, # chunk
|
||||
$chunk_idx, # chunk_index
|
||||
$lb_quoted, # lower_boundary
|
||||
$ub_quoted, # upper_boundary
|
||||
# this_cnt, this_crc WHERE
|
||||
@$lb, # upper boundary values
|
||||
@$ub, # lower boundary values
|
||||
@$lb, # upper boundary values
|
||||
@$ub, # lower boundary values
|
||||
);
|
||||
|
||||
# Check if checksum query caused any warnings.
|
||||
@@ -5749,12 +5762,12 @@ sub create_repl_table {
|
||||
# Determine if the chunk is oversize.
|
||||
#
|
||||
# Required Arguments:
|
||||
# * tbl - Standard tbl hashref
|
||||
# * explain_sth - Sth to EXPLAIN the chunking query
|
||||
# * lb - Arrayref with lower boundary values for explain_sth
|
||||
# * ub - Arrayref with upper boundary values for explain_sth
|
||||
# * chunk_size - Chunk size
|
||||
# * chunk_size_limit - Chunk size limit
|
||||
# * tbl - Standard tbl hashref
|
||||
# * explain_sth - Sth to EXPLAIN the chunking query
|
||||
# * lb - Arrayref with lower boundary values for explain_sth
|
||||
# * ub - Arrayref with upper boundary values for explain_sth
|
||||
# * chunk_size - Chunk size
|
||||
# * limit - Chunk size limit
|
||||
#
|
||||
# Returns:
|
||||
# True if EXPLAIN rows is >= chunk-size * chunk-size-limit, else false
|
||||
|
@@ -57,10 +57,7 @@ sub new {
|
||||
my ($dbh, $tbl, $chunk_size, $o, $q) = @args{@required_args};
|
||||
|
||||
# Get an index to nibble by. We'll order rows by the index's columns.
|
||||
my $index = $args{TableParser}->find_best_index(
|
||||
$tbl->{tbl_struct},
|
||||
$args{chunk_index},
|
||||
);
|
||||
my $index = _find_best_index(%args);
|
||||
die "No index to nibble table $tbl->{db}.$tbl->{tbl}" unless $index;
|
||||
my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};
|
||||
|
||||
@@ -290,6 +287,86 @@ sub set_chunk_size {
|
||||
return;
|
||||
}
|
||||
|
||||
sub _find_best_index {
|
||||
my (%args) = @_;
|
||||
my @required_args = qw(tbl TableParser dbh Quoter);
|
||||
my ($tbl, $tp) = @args{@required_args};
|
||||
|
||||
my $tbl_struct = $tbl->{tbl_struct};
|
||||
my $indexes = $tbl_struct->{keys};
|
||||
|
||||
my $best_index;
|
||||
my @possible_indexes;
|
||||
if ( my $want_index = $args{chunk_index} ) {
|
||||
MKDEBUG && _d('Want to use nibble index', $want_index);
|
||||
if ( $want_index eq 'PRIMARY' || $indexes->{$want_index}->{is_unique} ) {
|
||||
$best_index = $want_index;
|
||||
}
|
||||
else {
|
||||
push @possible_indexes, $want_index;
|
||||
}
|
||||
}
|
||||
else {
|
||||
foreach my $index ( $tp->sort_indexes($tbl_struct) ) {
|
||||
if ( $index eq 'PRIMARY' || $indexes->{$index}->{is_unique} ) {
|
||||
$best_index = $index;
|
||||
last;
|
||||
}
|
||||
else {
|
||||
push @possible_indexes, $index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( !$best_index && @possible_indexes ) {
|
||||
MKDEBUG && _d('No PRIMARY or unique indexes;',
|
||||
'will use index with highest cardinality');
|
||||
foreach my $index ( @possible_indexes ) {
|
||||
$indexes->{$index}->{cardinality} = _get_index_cardinality(
|
||||
%args,
|
||||
index => $index,
|
||||
);
|
||||
}
|
||||
@possible_indexes = sort {
|
||||
# Prefer the index with the highest cardinality.
|
||||
my $cmp
|
||||
= $indexes->{$b}->{cardinality} <=> $indexes->{$b}->{cardinality};
|
||||
if ( $cmp == 0 ) {
|
||||
# Indexes have the same cardinality; prefer the one with
|
||||
# more columns.
|
||||
$cmp = scalar @{$indexes->{$b}->{cols}}
|
||||
<=> scalar @{$indexes->{$a}->{cols}};
|
||||
}
|
||||
$cmp;
|
||||
} @possible_indexes;
|
||||
$best_index = $possible_indexes[0];
|
||||
}
|
||||
|
||||
MKDEBUG && _d('Best index:', $best_index);
|
||||
return $best_index;
|
||||
}
|
||||
|
||||
sub _get_index_cardinality {
|
||||
my (%args) = @_;
|
||||
my @required_args = qw(dbh tbl index Quoter);
|
||||
my ($dbh, $tbl, $index, $q) = @args{@required_args};
|
||||
|
||||
my $sql = "SHOW INDEXES FROM " . $q->quote(@{$tbl}{qw(db tbl)})
|
||||
. " WHERE Key_name = '$index'";
|
||||
MKDEBUG && _d($sql);
|
||||
my $cardinality = 1;
|
||||
my $rows = $dbh->selectall_hashref($sql, 'key_name');
|
||||
foreach my $row ( values %$rows ) {
|
||||
$cardinality *= $row->{cardinality} if $row->{cardinality};
|
||||
}
|
||||
MKDEBUG && _d('Index', $index, 'cardinality:', $cardinality);
|
||||
return $cardinality;
|
||||
}
|
||||
|
||||
sub _can_nibble_index {
|
||||
my ($index) = @_;
|
||||
}
|
||||
|
||||
sub _can_nibble_once {
|
||||
my ($self) = @_;
|
||||
my ($dbh, $tbl, $tp) = @{$self}{qw(dbh tbl TableParser)};
|
||||
@@ -380,6 +457,22 @@ sub _next_boundaries {
|
||||
if ( $boundary && @$boundary ) {
|
||||
$self->{ub} = $boundary->[0]; # this nibble
|
||||
if ( $boundary->[1] ) {
|
||||
if ( $self->_identical_boundaries($boundary) ) {
|
||||
my $tbl = $self->{tbl};
|
||||
my $index = $tbl->{tbl_struct}->{keys}->{$self->{index}};
|
||||
my $n_cols = scalar @{$index->{cols}};
|
||||
my $chunkno = $self->{nibbleno} + 1;
|
||||
die "Possible infinite loop detected! "
|
||||
. "The upper boundary for chunk $chunkno is "
|
||||
. "<" . join(', ', @{$boundary->[0]}) . "> and the lower "
|
||||
. "boundary for chunk " . ($chunkno + 1) . " is also "
|
||||
. "<" . join(', ', @{$boundary->[1]}) . ">. "
|
||||
. "This usually happens when using a non-unique single "
|
||||
. "column index. The current chunk index for table "
|
||||
. "$tbl->{db}.$tbl->{tbl} is $self->{index} which is"
|
||||
. ($index->{is_unique} ? '' : ' not') . " unique and covers "
|
||||
. ($n_cols > 1 ? "$n_cols columns" : "1 column") . ".\n";
|
||||
}
|
||||
$self->{next_lb} = $boundary->[1]; # next nibble
|
||||
}
|
||||
else {
|
||||
@@ -397,6 +490,20 @@ sub _next_boundaries {
|
||||
return 1; # have boundary
|
||||
}
|
||||
|
||||
sub _identical_boundaries {
|
||||
my ($self, $boundaries) = @_;
|
||||
my $ub = $boundaries->[0];
|
||||
my $lb = $boundaries->[1];
|
||||
return 0 unless $ub && $lb;
|
||||
my $n_vals = scalar @$ub;
|
||||
for my $i ( 0..($n_vals-1) ) {
|
||||
# One diff means the bounds aren't identical.
|
||||
return 0 if $lb->[$i] ne $ub->[$i];
|
||||
}
|
||||
MKDEBUG && _d('Infinite loop detected');
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub DESTROY {
|
||||
my ( $self ) = @_;
|
||||
foreach my $key ( keys %$self ) {
|
||||
|
@@ -38,7 +38,7 @@ if ( !$dbh ) {
|
||||
plan skip_all => 'Cannot connect to sandbox master';
|
||||
}
|
||||
else {
|
||||
plan tests => 21;
|
||||
plan tests => 25;
|
||||
}
|
||||
|
||||
my $q = new Quoter();
|
||||
@@ -469,6 +469,55 @@ is_deeply(
|
||||
"Nibble by 1 row"
|
||||
);
|
||||
|
||||
# ############################################################################
|
||||
# Avoid infinite loops.
|
||||
# ############################################################################
|
||||
$sb->load_file('master', "$in/bad_tables.sql");
|
||||
$dbh->do('analyze table bad_tables.inv');
|
||||
$ni = make_nibble_iter(
|
||||
db => 'bad_tables',
|
||||
tbl => 'inv',
|
||||
argv => [qw(--databases bad_tables --chunk-size 3)],
|
||||
);
|
||||
|
||||
$all_rows = $dbh->selectall_arrayref('select * from bad_tables.inv order by tee_id, on_id');
|
||||
|
||||
is(
|
||||
$ni->nibble_index(),
|
||||
'index_inv_on_tee_id_and_on_id',
|
||||
'Use index with higest cardinality'
|
||||
);
|
||||
|
||||
@rows = ();
|
||||
while (my $row = $ni->next()) {
|
||||
push @rows, $row;
|
||||
}
|
||||
|
||||
is_deeply(
|
||||
\@rows,
|
||||
$all_rows,
|
||||
'Selected all rows from non-unique index'
|
||||
);
|
||||
|
||||
$dbh->do('alter table bad_tables.inv drop index index_inv_on_tee_id_and_on_id');
|
||||
$ni = make_nibble_iter(
|
||||
db => 'bad_tables',
|
||||
tbl => 'inv',
|
||||
argv => [qw(--databases bad_tables --chunk-size 7)],
|
||||
);
|
||||
|
||||
is(
|
||||
$ni->nibble_index(),
|
||||
'index_inv_on_on_id',
|
||||
'Using bad index'
|
||||
);
|
||||
|
||||
throws_ok(
|
||||
sub { for (1..50) { $ni->next() } },
|
||||
qr/infinite loop/,
|
||||
'Detects infinite loop'
|
||||
);
|
||||
|
||||
# #############################################################################
|
||||
# Done.
|
||||
# #############################################################################
|
||||
|
21
t/lib/samples/NibbleIterator/bad_tables.sql
Normal file
21
t/lib/samples/NibbleIterator/bad_tables.sql
Normal file
@@ -0,0 +1,21 @@
|
||||
DROP DATABASE IF EXISTS bad_tables;
|
||||
CREATE DATABASE bad_tables;
|
||||
USE bad_tables;
|
||||
|
||||
-- This table can cause an infinite nibbling loop.
|
||||
CREATE TABLE `inv` (
|
||||
`tee_id` int(11) NOT NULL,
|
||||
`on_id` int(11) NOT NULL,
|
||||
`updated_at` datetime DEFAULT NULL,
|
||||
KEY `index_inv_on_on_id` (`on_id`),
|
||||
KEY `index_inv_on_tee_id_and_on_id` (`tee_id`,`on_id`)
|
||||
);
|
||||
|
||||
INSERT INTO inv (tee_id, on_id) VALUES
|
||||
(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 7), (1, 8), (1, 9),
|
||||
(2, 1), (2, 2), (2, 3), (2, 5), (2, 6), (2, 7), (2, 8),
|
||||
(3, 1), (3, 2), (3, 3), (3, 4),
|
||||
(4, 3), (4, 4), (4, 5), (4, 6), (4, 7), (4, 8), (4, 9),
|
||||
(5,1),
|
||||
(6, 1), (6, 2), (6, 3), (6, 4), (6, 5), (6, 6), (6, 7), (6, 8), (6, 9),
|
||||
(7, 1), (7, 2), (7, 3), (7, 4), (7, 5), (7, 6), (7, 7), (7, 8), (7, 9);
|
Reference in New Issue
Block a user