From 4e8b00b4cccc9ff0c6ed56bcb461661b7802070a Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Fri, 23 Sep 2011 17:41:49 -0600 Subject: [PATCH] Update NibbleIterator in pt-table-checksum. --- bin/pt-table-checksum | 122 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 115 insertions(+), 7 deletions(-) diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum index b8d43e32..36ddea97 100755 --- a/bin/pt-table-checksum +++ b/bin/pt-table-checksum @@ -3331,10 +3331,7 @@ sub new { } my ($dbh, $tbl, $chunk_size, $o, $q) = @args{@required_args}; - my $index = $args{TableParser}->find_best_index( - $tbl->{tbl_struct}, - $args{chunk_index}, - ); + my $index = _find_best_index(%args); die "No index to nibble table $tbl->{db}.$tbl->{tbl}" unless $index; my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols}; @@ -3537,6 +3534,83 @@ sub set_chunk_size { return; } +sub _find_best_index { + my (%args) = @_; + my @required_args = qw(tbl TableParser dbh Quoter); + my ($tbl, $tp) = @args{@required_args}; + + my $tbl_struct = $tbl->{tbl_struct}; + my $indexes = $tbl_struct->{keys}; + + my $best_index; + my @possible_indexes; + if ( my $want_index = $args{chunk_index} ) { + MKDEBUG && _d('Want to use nibble index', $want_index); + if ( $want_index eq 'PRIMARY' || $indexes->{$want_index}->{is_unique} ) { + $best_index = $want_index; + } + else { + push @possible_indexes, $want_index; + } + } + else { + foreach my $index ( $tp->sort_indexes($tbl_struct) ) { + if ( $index eq 'PRIMARY' || $indexes->{$index}->{is_unique} ) { + $best_index = $index; + last; + } + else { + push @possible_indexes, $index; + } + } + } + + if ( !$best_index && @possible_indexes ) { + MKDEBUG && _d('No PRIMARY or unique indexes;', + 'will use index with highest cardinality'); + foreach my $index ( @possible_indexes ) { + $indexes->{$index}->{cardinality} = _get_index_cardinality( + %args, + index => $index, + ); + } + @possible_indexes = sort { + my $cmp + = $indexes->{$b}->{cardinality} <=> $indexes->{$b}->{cardinality}; + if ( $cmp == 0 ) { + $cmp = scalar @{$indexes->{$b}->{cols}} + <=> scalar @{$indexes->{$a}->{cols}}; + } + $cmp; + } @possible_indexes; + $best_index = $possible_indexes[0]; + } + + MKDEBUG && _d('Best index:', $best_index); + return $best_index; +} + +sub _get_index_cardinality { + my (%args) = @_; + my @required_args = qw(dbh tbl index Quoter); + my ($dbh, $tbl, $index, $q) = @args{@required_args}; + + my $sql = "SHOW INDEXES FROM " . $q->quote(@{$tbl}{qw(db tbl)}) + . " WHERE Key_name = '$index'"; + MKDEBUG && _d($sql); + my $cardinality = 1; + my $rows = $dbh->selectall_hashref($sql, 'key_name'); + foreach my $row ( values %$rows ) { + $cardinality *= $row->{cardinality} if $row->{cardinality}; + } + MKDEBUG && _d('Index', $index, 'cardinality:', $cardinality); + return $cardinality; +} + +sub _can_nibble_index { + my ($index) = @_; +} + sub _can_nibble_once { my ($self) = @_; my ($dbh, $tbl, $tp) = @{$self}{qw(dbh tbl TableParser)}; @@ -3627,6 +3701,22 @@ sub _next_boundaries { if ( $boundary && @$boundary ) { $self->{ub} = $boundary->[0]; # this nibble if ( $boundary->[1] ) { + if ( $self->_identical_boundaries($boundary) ) { + my $tbl = $self->{tbl}; + my $index = $tbl->{tbl_struct}->{keys}->{$self->{index}}; + my $n_cols = scalar @{$index->{cols}}; + my $chunkno = $self->{nibbleno} + 1; + die "Possible infinite loop detected! " + . "The upper boundary for chunk $chunkno is " + . "<" . join(', ', @{$boundary->[0]}) . "> and the lower " + . "boundary for chunk " . ($chunkno + 1) . " is also " + . "<" . join(', ', @{$boundary->[1]}) . ">. " + . "This usually happens when using a non-unique single " + . "column index. The current chunk index for table " + . "$tbl->{db}.$tbl->{tbl} is $self->{index} which is" + . ($index->{is_unique} ? '' : ' not') . " unique and covers " + . ($n_cols > 1 ? "$n_cols columns" : "1 column") . ".\n"; + } $self->{next_lb} = $boundary->[1]; # next nibble } else { @@ -3644,6 +3734,19 @@ sub _next_boundaries { return 1; # have boundary } +sub _identical_boundaries { + my ($self, $boundaries) = @_; + my $ub = $boundaries->[0]; + my $lb = $boundaries->[1]; + return 0 unless $ub && $lb; + my $n_vals = scalar @$ub; + for my $i ( 0..($n_vals-1) ) { + return 0 if $lb->[$i] ne $ub->[$i]; + } + MKDEBUG && _d('Infinite loop detected'); + return 1; +} + sub DESTROY { my ( $self ) = @_; foreach my $key ( keys %$self ) { @@ -5428,7 +5531,10 @@ sub main { TableNibbler => $tn, TableParser => $tp, ); - + + # If the chunk index is unique, then we'll always get the exact number of + # rows request (or less for the final chunk sometimes), so we disable the + # chunk size limit. my $chunk_index = $nibble_iter->nibble_index(); if ( $tbl->{tbl_struct}->{keys}->{$chunk_index}->{is_unique} ) { MKDEBUG && _d('Disabling chunk size limit for table because', @@ -5436,6 +5542,8 @@ sub main { $tbl->{chunk_size_limit} = 0; } else { + MKDEBUG && _d('Enabling chunk size limit for table because', + 'chunk index', $chunk_index, 'is not unique'); $tbl->{chunk_size_limit} = $o->get('chunk-size-limit'); } @@ -5499,7 +5607,7 @@ sub exec_nibble { my $lb_quoted = join(',', map { $q->quote_val($_) } @$lb); my $ub_quoted = join(',', map { $q->quote_val($_) } @$ub); - my $chunk_idx = $$nibble_iter->nibble_index(); + my $chunk_idx = $nibble_iter->nibble_index(); # Execute the REPLACE...SELECT checksum query. # MKDEBUG && _d($sth->{Statement}, 'params:', @@ -5773,7 +5881,7 @@ sub create_repl_table { # True if EXPLAIN rows is >= chunk-size * chunk-size-limit, else false sub is_oversize_chunk { my ( %args ) = @_; - my @required_args = qw(tbl explain_sth lb ub chunk_size chunk_size_limit); + my @required_args = qw(tbl explain_sth lb ub chunk_size limit); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless defined $args{$arg}; }