Detect infinite loops. Use best non-unique index. Disable chunk size limit if chunk index is unique.

This commit is contained in:
Daniel Nichter
2011-09-23 17:33:23 -06:00
parent 07cb6010a2
commit 9f3e05691c
4 changed files with 220 additions and 30 deletions

View File

@@ -5249,17 +5249,19 @@ sub main {
# Check if the chunk is too large. If yes, then return 0 to
# skip this chunk and get fetch the next boundary.
my $is_oversize = is_oversize_chunk(
%args,
chunk_size => $tbl->{chunk_size},
chunk_size_limit => $o->get('chunk-size-limit'),
);
if ( $is_oversize ) {
MKDEBUG && _d('Chunk', $args{nibbleno}, 'of table',
"$tbl->{db}.$tbl->{tbl}", 'is too large');
$tbl->{checksum_results}->{skipped}++;
$tbl->{nibble_time} = 0;
return 0; # next boundary
if ( $tbl->{chunk_size_limit} ) {
my $is_oversize = is_oversize_chunk(
%args,
chunk_size => $tbl->{chunk_size},
limit => $tbl->{chunk_size_limit},
);
if ( $is_oversize ) {
MKDEBUG && _d('Chunk', $args{nibbleno}, 'of table',
"$tbl->{db}.$tbl->{tbl}", 'is too large');
$tbl->{checksum_results}->{skipped}++;
$tbl->{nibble_time} = 0;
return 0; # next boundary
}
}
# Exec and time the chunk checksum query. If it fails, retry.
@@ -5426,6 +5428,16 @@ sub main {
TableNibbler => $tn,
TableParser => $tp,
);
my $chunk_index = $nibble_iter->nibble_index();
if ( $tbl->{tbl_struct}->{keys}->{$chunk_index}->{is_unique} ) {
MKDEBUG && _d('Disabling chunk size limit for table because',
'chunk index', $chunk_index, 'is unique');
$tbl->{chunk_size_limit} = 0;
}
else {
$tbl->{chunk_size_limit} = $o->get('chunk-size-limit');
}
# Finally, checksum the table.
# The "1 while" loop is necessary because we're executing REPLACE
@@ -5487,21 +5499,22 @@ sub exec_nibble {
my $lb_quoted = join(',', map { $q->quote_val($_) } @$lb);
my $ub_quoted = join(',', map { $q->quote_val($_) } @$ub);
my $chunk_idx = $$nibble_iter->nibble_index();
# Execute the REPLACE...SELECT checksum query.
# MKDEBUG && _d($sth->{Statement}, 'params:',
# );
$sth->execute(
# REPLACE INTO repl_table SELECT
$tbl->{db}, # db
$tbl->{tbl}, # tbl
$args{nibbleno}, # chunk
$nibble_iter->nibble_index(), # chunk_index
$lb_quoted, # lower_boundary
$ub_quoted, # upper_boundary
$tbl->{db}, # db
$tbl->{tbl}, # tbl
$args{nibbleno}, # chunk
$chunk_idx, # chunk_index
$lb_quoted, # lower_boundary
$ub_quoted, # upper_boundary
# this_cnt, this_crc WHERE
@$lb, # upper boundary values
@$ub, # lower boundary values
@$lb, # upper boundary values
@$ub, # lower boundary values
);
# Check if checksum query caused any warnings.
@@ -5749,12 +5762,12 @@ sub create_repl_table {
# Determine if the chunk is oversize.
#
# Required Arguments:
# * tbl - Standard tbl hashref
# * explain_sth - Sth to EXPLAIN the chunking query
# * lb - Arrayref with lower boundary values for explain_sth
# * ub - Arrayref with upper boundary values for explain_sth
# * chunk_size - Chunk size
# * chunk_size_limit - Chunk size limit
# * tbl - Standard tbl hashref
# * explain_sth - Sth to EXPLAIN the chunking query
# * lb - Arrayref with lower boundary values for explain_sth
# * ub - Arrayref with upper boundary values for explain_sth
# * chunk_size - Chunk size
# * limit - Chunk size limit
#
# Returns:
# True if EXPLAIN rows is >= chunk-size * chunk-size-limit, else false

View File

@@ -57,10 +57,7 @@ sub new {
my ($dbh, $tbl, $chunk_size, $o, $q) = @args{@required_args};
# Get an index to nibble by. We'll order rows by the index's columns.
my $index = $args{TableParser}->find_best_index(
$tbl->{tbl_struct},
$args{chunk_index},
);
my $index = _find_best_index(%args);
die "No index to nibble table $tbl->{db}.$tbl->{tbl}" unless $index;
my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};
@@ -290,6 +287,86 @@ sub set_chunk_size {
return;
}
sub _find_best_index {
my (%args) = @_;
my @required_args = qw(tbl TableParser dbh Quoter);
my ($tbl, $tp) = @args{@required_args};
my $tbl_struct = $tbl->{tbl_struct};
my $indexes = $tbl_struct->{keys};
my $best_index;
my @possible_indexes;
if ( my $want_index = $args{chunk_index} ) {
MKDEBUG && _d('Want to use nibble index', $want_index);
if ( $want_index eq 'PRIMARY' || $indexes->{$want_index}->{is_unique} ) {
$best_index = $want_index;
}
else {
push @possible_indexes, $want_index;
}
}
else {
foreach my $index ( $tp->sort_indexes($tbl_struct) ) {
if ( $index eq 'PRIMARY' || $indexes->{$index}->{is_unique} ) {
$best_index = $index;
last;
}
else {
push @possible_indexes, $index;
}
}
}
if ( !$best_index && @possible_indexes ) {
MKDEBUG && _d('No PRIMARY or unique indexes;',
'will use index with highest cardinality');
foreach my $index ( @possible_indexes ) {
$indexes->{$index}->{cardinality} = _get_index_cardinality(
%args,
index => $index,
);
}
@possible_indexes = sort {
# Prefer the index with the highest cardinality.
my $cmp
= $indexes->{$b}->{cardinality} <=> $indexes->{$b}->{cardinality};
if ( $cmp == 0 ) {
# Indexes have the same cardinality; prefer the one with
# more columns.
$cmp = scalar @{$indexes->{$b}->{cols}}
<=> scalar @{$indexes->{$a}->{cols}};
}
$cmp;
} @possible_indexes;
$best_index = $possible_indexes[0];
}
MKDEBUG && _d('Best index:', $best_index);
return $best_index;
}
sub _get_index_cardinality {
my (%args) = @_;
my @required_args = qw(dbh tbl index Quoter);
my ($dbh, $tbl, $index, $q) = @args{@required_args};
my $sql = "SHOW INDEXES FROM " . $q->quote(@{$tbl}{qw(db tbl)})
. " WHERE Key_name = '$index'";
MKDEBUG && _d($sql);
my $cardinality = 1;
my $rows = $dbh->selectall_hashref($sql, 'key_name');
foreach my $row ( values %$rows ) {
$cardinality *= $row->{cardinality} if $row->{cardinality};
}
MKDEBUG && _d('Index', $index, 'cardinality:', $cardinality);
return $cardinality;
}
sub _can_nibble_index {
my ($index) = @_;
}
sub _can_nibble_once {
my ($self) = @_;
my ($dbh, $tbl, $tp) = @{$self}{qw(dbh tbl TableParser)};
@@ -380,6 +457,22 @@ sub _next_boundaries {
if ( $boundary && @$boundary ) {
$self->{ub} = $boundary->[0]; # this nibble
if ( $boundary->[1] ) {
if ( $self->_identical_boundaries($boundary) ) {
my $tbl = $self->{tbl};
my $index = $tbl->{tbl_struct}->{keys}->{$self->{index}};
my $n_cols = scalar @{$index->{cols}};
my $chunkno = $self->{nibbleno} + 1;
die "Possible infinite loop detected! "
. "The upper boundary for chunk $chunkno is "
. "<" . join(', ', @{$boundary->[0]}) . "> and the lower "
. "boundary for chunk " . ($chunkno + 1) . " is also "
. "<" . join(', ', @{$boundary->[1]}) . ">. "
. "This usually happens when using a non-unique single "
. "column index. The current chunk index for table "
. "$tbl->{db}.$tbl->{tbl} is $self->{index} which is"
. ($index->{is_unique} ? '' : ' not') . " unique and covers "
. ($n_cols > 1 ? "$n_cols columns" : "1 column") . ".\n";
}
$self->{next_lb} = $boundary->[1]; # next nibble
}
else {
@@ -397,6 +490,20 @@ sub _next_boundaries {
return 1; # have boundary
}
sub _identical_boundaries {
my ($self, $boundaries) = @_;
my $ub = $boundaries->[0];
my $lb = $boundaries->[1];
return 0 unless $ub && $lb;
my $n_vals = scalar @$ub;
for my $i ( 0..($n_vals-1) ) {
# One diff means the bounds aren't identical.
return 0 if $lb->[$i] ne $ub->[$i];
}
MKDEBUG && _d('Infinite loop detected');
return 1;
}
sub DESTROY {
my ( $self ) = @_;
foreach my $key ( keys %$self ) {

View File

@@ -38,7 +38,7 @@ if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
else {
plan tests => 21;
plan tests => 25;
}
my $q = new Quoter();
@@ -469,6 +469,55 @@ is_deeply(
"Nibble by 1 row"
);
# ############################################################################
# Avoid infinite loops.
# ############################################################################
$sb->load_file('master', "$in/bad_tables.sql");
$dbh->do('analyze table bad_tables.inv');
$ni = make_nibble_iter(
db => 'bad_tables',
tbl => 'inv',
argv => [qw(--databases bad_tables --chunk-size 3)],
);
$all_rows = $dbh->selectall_arrayref('select * from bad_tables.inv order by tee_id, on_id');
is(
$ni->nibble_index(),
'index_inv_on_tee_id_and_on_id',
'Use index with higest cardinality'
);
@rows = ();
while (my $row = $ni->next()) {
push @rows, $row;
}
is_deeply(
\@rows,
$all_rows,
'Selected all rows from non-unique index'
);
$dbh->do('alter table bad_tables.inv drop index index_inv_on_tee_id_and_on_id');
$ni = make_nibble_iter(
db => 'bad_tables',
tbl => 'inv',
argv => [qw(--databases bad_tables --chunk-size 7)],
);
is(
$ni->nibble_index(),
'index_inv_on_on_id',
'Using bad index'
);
throws_ok(
sub { for (1..50) { $ni->next() } },
qr/infinite loop/,
'Detects infinite loop'
);
# #############################################################################
# Done.
# #############################################################################

View File

@@ -0,0 +1,21 @@
DROP DATABASE IF EXISTS bad_tables;
CREATE DATABASE bad_tables;
USE bad_tables;
-- This table can cause an infinite nibbling loop.
CREATE TABLE `inv` (
`tee_id` int(11) NOT NULL,
`on_id` int(11) NOT NULL,
`updated_at` datetime DEFAULT NULL,
KEY `index_inv_on_on_id` (`on_id`),
KEY `index_inv_on_tee_id_and_on_id` (`tee_id`,`on_id`)
);
INSERT INTO inv (tee_id, on_id) VALUES
(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 7), (1, 8), (1, 9),
(2, 1), (2, 2), (2, 3), (2, 5), (2, 6), (2, 7), (2, 8),
(3, 1), (3, 2), (3, 3), (3, 4),
(4, 3), (4, 4), (4, 5), (4, 6), (4, 7), (4, 8), (4, 9),
(5,1),
(6, 1), (6, 2), (6, 3), (6, 4), (6, 5), (6, 6), (6, 7), (6, 8), (6, 9),
(7, 1), (7, 2), (7, 3), (7, 4), (7, 5), (7, 6), (7, 7), (7, 8), (7, 9);