percona-toolkit/lib/NibbleIterator.pm

# This program is copyright 2011 Percona Ireland Ltd.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA  02111-1307  USA.
# ###########################################################################
# NibbleIterator package
# ###########################################################################
{
# Package: NibbleIterator
# NibbleIterator nibbles tables.
package NibbleIterator;

use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;

use Data::Dumper;
$Data::Dumper::Indent    = 1;
$Data::Dumper::Sortkeys  = 1;
$Data::Dumper::Quotekeys = 0;

# Sub: new
#
# Required Arguments:
#   Cxn          - <Cxn> object
#   tbl          - Standard tbl ref
#   chunk_size   - Number of rows to nibble per chunk
#   OptionParser - <OptionParser> object
#   Quoter       - <Quoter> object
#   TableNibbler - <TableNibbler> object
#   TableParser  - <TableParser> object
#
# Optional Arguments:
#   dml         - Data manipulation statement to precede the SELECT statement
#   select      - Arrayref of table columns to select
#   chunk_index - Index to use for nibbling
#   one_nibble  - Allow one-chunk tables (default yes)
#   resume      - Hashref with lower_boundary and upper_boundary values
#                 to resume nibble from
#   order_by    - Add ORDER BY to nibble SQL (default no)
#
# Returns:
#  NibbleIterator object
sub new {
   my ( $class, %args ) = @_;
   my @required_args = qw(Cxn tbl chunk_size OptionParser Quoter TableNibbler TableParser);
   foreach my $arg ( @required_args ) {
      die "I need a $arg argument" unless $args{$arg};
   }
   my ($cxn, $tbl, $chunk_size, $o, $q) = @args{@required_args};

   # Die unless table can be nibbled, else return row estimate, nibble index,
   # and if table can be nibbled in one chunk.
   my $nibble_params = can_nibble(%args);

   # Text appended to the queries in comments so caller can identify
   # them in processlist, binlog, etc.
   my %comments = (
      bite   => "bite table",
      nibble => "nibble table",
   );
   if ( $args{comments} ) {
      map  { $comments{$_} = $args{comments}->{$_} }
      grep { defined $args{comments}->{$_}         }
      keys %{$args{comments}};
   }

   my $where      = $o->has('where') ? $o->get('where') : '';
   my $tbl_struct = $tbl->{tbl_struct};
   my $ignore_col = $o->has('ignore-columns')
                  ? ($o->get('ignore-columns') || {})
                  : {};
   my $all_cols   = $o->has('columns')
                  ? ($o->get('columns') || $tbl_struct->{cols})
                  : $tbl_struct->{cols};
   my @cols       = grep { !$ignore_col->{$_} } @$all_cols;
   my $self;
   if ( $nibble_params->{one_nibble} ) {
      my $params = _one_nibble(\%args, \@cols, $where, $tbl, \%comments);
      $self = {
         %args,
         one_nibble         => 1,
         limit              => 0,
         nibble_sql         => $params->{nibble_sql},
         explain_nibble_sql => $params->{explain_nibble_sql},
      };
   } else {
      my $params = _nibble_params($nibble_params, $tbl, \%args, \@cols, $chunk_size, $where, \%comments, $q);
      $self = {
         %args,
         index                => $params->{index},
         limit                => $params->{limit},
         first_lb_sql         => $params->{first_lb_sql},
         last_ub_sql          => $params->{last_ub_sql},
         ub_sql               => $params->{ub_sql},
         nibble_sql           => $params->{nibble_sql},
         explain_first_lb_sql => $params->{explain_first_lb_sql},
         explain_ub_sql       => $params->{explain_ub_sql},
         explain_nibble_sql   => $params->{explain_nibble_sql},
         resume_lb_sql        => $params->{resume_lb_sql},
         sql                  => $params->{sql},
      };
   }

   $self->{row_est}    = $nibble_params->{row_est},
   $self->{nibbleno}   = 0;
   $self->{have_rows}  = 0;
   $self->{rowno}      = 0;
   $self->{oktonibble} = 1;
   $self->{pause_file} = $nibble_params->{pause_file};
   $self->{sleep}      = $args{sleep} || 60;

   $self->{nibble_params} = $nibble_params;
   $self->{tbl}           = $tbl;
   $self->{args}          = \%args;
   $self->{cols}          = \@cols;
   $self->{chunk_size}    = $chunk_size;
   $self->{where}         = $where;
   $self->{comments}      = \%comments;

   return bless $self, $class;
}

sub switch_to_nibble {
    my $self = shift;
    my $params = _nibble_params($self->{nibble_params}, $self->{tbl}, $self->{args}, $self->{cols},
                                $self->{chunk_size}, $self->{where}, $self->{comments}, $self->{Quoter});

    $self->{one_nibble}           = 0;
    $self->{index}                = $params->{index};
    $self->{limit}                = $params->{limit};
    $self->{first_lb_sql}         = $params->{first_lb_sql};
    $self->{last_ub_sql}          = $params->{last_ub_sql};
    $self->{ub_sql}               = $params->{ub_sql};
    $self->{nibble_sql}           = $params->{nibble_sql};
    $self->{explain_first_lb_sql} = $params->{explain_first_lb_sql};
    $self->{explain_ub_sql}       = $params->{explain_ub_sql};
    $self->{explain_nibble_sql}   = $params->{explain_nibble_sql};
    $self->{resume_lb_sql}        = $params->{resume_lb_sql};
    $self->{sql}                  = $params->{sql};
    $self->_get_bounds();
    $self->_prepare_sths();
}

sub _one_nibble {
    my ($args, $cols, $where, $tbl, $comments) = @_;
    my $q        = new Quoter();

      # If the chunk size is >= number of rows in table, then we don't
      # need to chunk; we can just select all rows, in order, at once.
      my $nibble_sql
         = ($args->{dml} ? "$args->{dml} " : "SELECT ")
         . ($args->{select} ? $args->{select}
             #                          : join(', ', map { $q->quote($_) } @$cols))
         : join(', ', map{ $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum' ?
                                   "CAST(".$q->quote($_)." AS UNSIGNED)" : $q->quote($_) } @$cols))
         . " FROM $tbl->{name}"
         . ($where ? " WHERE $where" : '')
         . ($args->{lock_in_share_mode} ? " LOCK IN SHARE MODE" : "")
         . " /*$comments->{bite}*/";
      PTDEBUG && _d('One nibble statement:', $nibble_sql);

      my $explain_nibble_sql
         = "EXPLAIN SELECT "
         . ($args->{select} ? $args->{select}
                          : join(', ', map{ $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum'
                          ? "CAST(".$q->quote($_)." AS UNSIGNED)" : $q->quote($_) } @$cols))
         . " FROM $tbl->{name}"
         . ($where ? " WHERE $where" : '')
         . ($args->{lock_in_share_mode} ? " LOCK IN SHARE MODE" : "")
         . " /*explain $comments->{bite}*/";
      PTDEBUG && _d('Explain one nibble statement:', $explain_nibble_sql);

      return {
         one_nibble         => 1,
         limit              => 0,
         nibble_sql         => $nibble_sql,
         explain_nibble_sql => $explain_nibble_sql,
      };
}

sub _nibble_params {
      my ($nibble_params, $tbl, $args, $cols, $chunk_size, $where, $comments, $q) = @_;
      my $index      = $nibble_params->{index}; # brevity
      my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};

      # Figure out how to nibble the table with the index.
      my $asc = $args->{TableNibbler}->generate_asc_stmt(
         %$args,
         tbl_struct   => $tbl->{tbl_struct},
         index        => $index,
         n_index_cols => $args->{n_chunk_index_cols},
         cols         => $cols,
         asc_only     => 1,
      );
      PTDEBUG && _d('Ascend params:', Dumper($asc));

      # Make SQL statements, prepared on first call to next().  FROM and
      # ORDER BY are the same for all statements.  FORCE IDNEX and ORDER BY
      # are needed to ensure deterministic nibbling.

      my $from     = "$tbl->{name} FORCE INDEX(`$index`)";
      my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
      my $order_by_dec = join(' DESC,', map {$q->quote($_)} @{$index_cols});

      # The real first row in the table.  Usually we start nibbling from
      # this row.  Called once in _get_bounds().
      my $first_lb_sql
         = "SELECT /*!40001 SQL_NO_CACHE */ "
         . join(', ', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum' ? "CAST(".$q->quote($_)." AS UNSIGNED)" : $q->quote($_)} @{$asc->{scols}})
         . " FROM $from"
         . ($where ? " WHERE $where" : '')
         . " ORDER BY $order_by"
         . " LIMIT 1"
         . " /*first lower boundary*/";
      PTDEBUG && _d('First lower boundary statement:', $first_lb_sql);

      # If we're resuming, this fetches the effective first row, which
      # should differ from the real first row.  Called once in _get_bounds().
      my $resume_lb_sql;
      if ( $args->{resume} ) {
         $resume_lb_sql
            = "SELECT /*!40001 SQL_NO_CACHE */ "
            . join(', ', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum' ? "CAST(".$q->quote($_)." AS UNSIGNED)" : $q->quote($_)} @{$asc->{scols}})
            . " FROM $from"
            . " WHERE " . $asc->{boundaries}->{'>'}
            . ($where ? " AND ($where)" : '')
            . " ORDER BY $order_by"
            . " LIMIT 1"
            . " /*resume lower boundary*/";
         PTDEBUG && _d('Resume lower boundary statement:', $resume_lb_sql);
      }

      # The nibbles are inclusive, so we need to fetch the real last row
      # in the table.  Saved as boundary last_upper and used as boundary
      # upper in some cases.  Called once in _get_bounds().
      my $last_ub_sql
         = "SELECT /*!40001 SQL_NO_CACHE */ "
         . join(', ', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum' ? "CAST(".$q->quote($_)." AS UNSIGNED)" : $q->quote($_)} @{$asc->{scols}})
         . " FROM $from"
         . ($where ? " WHERE $where" : '')
         . " ORDER BY "
         . $order_by_dec . ' DESC'
         . " LIMIT 1"
         . " /*last upper boundary*/";
      PTDEBUG && _d('Last upper boundary statement:', $last_ub_sql);

      # Nibbles are inclusive, so for a..z, the nibbles are: a-e, f-j, k-o, p-t,
      # u-y, and z.  This complicates getting the next upper boundary because
      # if we use either (col >= lb AND col < ub) or (col > lb AND col <= ub)
      # in nibble_sql (below), then that fails for either the last or first
      # nibble respectively.  E.g. (col >= z AND col < z) doesn't work, nor
      # does (col > a AND col <= e).  Hence the fancy LIMIT 2 which returns
      # the upper boundary for the current nibble *and* the lower boundary
      # for the next nibble.  See _next_boundaries().
      my $ub_sql
         = "SELECT /*!40001 SQL_NO_CACHE */ "
         . join(', ', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum' ? "CAST(".$q->quote($_)." AS UNSIGNED)" : $q->quote($_)} @{$asc->{scols}})
         . " FROM $from"
         . " WHERE " . $asc->{boundaries}->{'>='}
                     . ($where ? " AND ($where)" : '')
         . " ORDER BY $order_by"
         . " LIMIT ?, 2"
         . " /*next chunk boundary*/";
      PTDEBUG && _d('Upper boundary statement:', $ub_sql);

      # This statement does the actual nibbling work; its rows are returned
      # to the caller via next().
      my $nibble_sql
         = ($args->{dml} ? "$args->{dml} " : "SELECT ")
         . ($args->{select} ? $args->{select}
                          : join(', ', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum' ? "CAST(".$q->quote($_)." AS UNSIGNED)" : $q->quote($_)} @{$asc->{cols}}))
         . " FROM $from"
         . " WHERE " . $asc->{boundaries}->{'>='}  # lower boundary
         . " AND "   . $asc->{boundaries}->{'<='}  # upper boundary
         . ($where ? " AND ($where)" : '')
         . ($args->{order_by} ? " ORDER BY $order_by" : "")
         . ($args->{lock_in_share_mode} ? " LOCK IN SHARE MODE" : "")
         . " /*$comments->{nibble}*/";
      PTDEBUG && _d('Nibble statement:', $nibble_sql);

      my $explain_nibble_sql
         = "EXPLAIN SELECT "
         . ($args->{select} ? $args->{select}
                          : join(', ', map { $q->quote($_) } @{$asc->{cols}}))
         . " FROM $from"
         . " WHERE " . $asc->{boundaries}->{'>='}  # lower boundary
         . " AND "   . $asc->{boundaries}->{'<='}  # upper boundary
         . ($where ? " AND ($where)" : '')
         . ($args->{order_by} ? " ORDER BY $order_by" : "")
         . ($args->{lock_in_share_mode} ? " LOCK IN SHARE MODE" : "")
         . " /*explain $comments->{nibble}*/";
      PTDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);

      my $limit = $chunk_size - 1;
      PTDEBUG && _d('Initial chunk size (LIMIT):', $limit);

      my $params = {
         one_nibble           => 0,
         index                => $index,
         limit                => $limit,
         first_lb_sql         => $first_lb_sql,
         last_ub_sql          => $last_ub_sql,
         ub_sql               => $ub_sql,
         nibble_sql           => $nibble_sql,
         explain_first_lb_sql => "EXPLAIN $first_lb_sql",
         explain_ub_sql       => "EXPLAIN $ub_sql",
         explain_nibble_sql   => $explain_nibble_sql,
         resume_lb_sql        => $resume_lb_sql,
         sql                  => {
            columns    => $asc->{scols},
            from       => $from,
            where      => $where,
            boundaries => $asc->{boundaries},
            order_by   => $order_by,
         },
      };
      return $params;
}

sub next {
   my ($self) = @_;

   if ( !$self->{oktonibble} ) {
      PTDEBUG && _d('Not ok to nibble');
      return;
   }

   my %callback_args = (
      Cxn            => $self->{Cxn},
      tbl            => $self->{tbl},
      NibbleIterator => $self,
   );

   # First call, init everything.  This could be done in new(), but
   # all work is delayed until actually needed.
   if ($self->{nibbleno} == 0) {
      $self->_prepare_sths();
      $self->_get_bounds();
      if ( my $callback = $self->{callbacks}->{init} ) {
         $self->{oktonibble} = $callback->(%callback_args);
         PTDEBUG && _d('init callback returned', $self->{oktonibble});
         if ( !$self->{oktonibble} ) {
            $self->{no_more_boundaries} = 1;
            return;
         }
      }
      if ( !$self->{one_nibble} && !$self->{first_lower} ) {
         PTDEBUG && _d('No first lower boundary, table must be empty');
         $self->{no_more_boundaries} = 1;
         return;
      }
   }

   # If there's another nibble, fetch the rows within it.
   NIBBLE:
   while ( $self->{have_rows} || $self->_next_boundaries() ) {
      if ($self->{pause_file}) {
         while(-f $self->{pause_file}) {
            print "Sleeping $self->{sleep} seconds because $self->{pause_file} exists\n";
            my $dbh = $self->{Cxn}->dbh();
            if ( !$dbh || !$dbh->ping() ) {
               eval { $dbh = $self->{Cxn}->connect() }; # connect or die trying
               if ( $EVAL_ERROR ) {
                  chomp $EVAL_ERROR;
                  die "Lost connection to " . $self->{Cxn}->name() . " while waiting for "
                  . "replica lag ($EVAL_ERROR)\n";
               }
            }
            $dbh->do("SELECT 'nibble iterator keepalive'");
            sleep($self->{sleep});
         }
      }

      # If no rows, then we just got the next boundaries, which start
      # the next nibble.
      if ( !$self->{have_rows} ) {
         $self->{nibbleno}++;
         PTDEBUG && _d('Nibble:', $self->{nibble_sth}->{Statement}, 'params:',
            join(', ', (@{$self->{lower} || []}, @{$self->{upper} || []})));
         if ( my $callback = $self->{callbacks}->{exec_nibble} ) {
            $self->{have_rows} = $callback->(%callback_args);
         }
         else {
            # XXX This call and others like it are relying on a Perl oddity.
            # See https://bugs.launchpad.net/percona-toolkit/+bug/987393
            $self->{nibble_sth}->execute(@{$self->{lower}}, @{$self->{upper}});
            $self->{have_rows} = $self->{nibble_sth}->rows();
         }
         PTDEBUG && _d($self->{have_rows}, 'rows in nibble', $self->{nibbleno});
      }

      # Return rows in this nibble.
      if ( $self->{have_rows} ) {
         # Return rows in nibble.  sth->{Active} is always true with
         # DBD::mysql v3, so we track the status manually.
         my $row = $self->{nibble_sth}->fetchrow_arrayref();
         if ( $row ) {
            $self->{rowno}++;
            PTDEBUG && _d('Row', $self->{rowno}, 'in nibble',$self->{nibbleno});
            # fetchrow_arraryref re-uses an internal arrayref, so we must copy.
            return [ @$row ];
         }
      }

      PTDEBUG && _d('No rows in nibble or nibble skipped');
      if ( my $callback = $self->{callbacks}->{after_nibble} ) {
         $callback->(%callback_args);
      }
      $self->{rowno}     = 0;
      $self->{have_rows} = 0;

   }

   PTDEBUG && _d('Done nibbling');
   if ( my $callback = $self->{callbacks}->{done} ) {
      $callback->(%callback_args);
   }

   return;
}

sub nibble_number {
   my ($self) = @_;
   return $self->{nibbleno};
}

sub set_nibble_number {
   my ($self, $n) = @_;
   die "I need a number" unless $n;
   $self->{nibbleno} = $n;
   PTDEBUG && _d('Set new nibble number:', $n);
   return;
}

sub nibble_index {
   my ($self) = @_;
   return $self->{index};
}

sub statements {
   my ($self) = @_;
   return {
      explain_first_lower_boundary => $self->{explain_first_lb_sth},
      nibble                       => $self->{nibble_sth},
      explain_nibble               => $self->{explain_nibble_sth},
      upper_boundary               => $self->{ub_sth},
      explain_upper_boundary       => $self->{explain_ub_sth},
   }
}

sub boundaries {
   my ($self) = @_;
   return {
      first_lower => $self->{first_lower},
      lower       => $self->{lower},
      upper       => $self->{upper},
      next_lower  => $self->{next_lower},
      last_upper  => $self->{last_upper},
   };
}

sub set_boundary {
   my ($self, $boundary, $values) = @_;
   die "I need a boundary parameter"
      unless $boundary;
   die "Invalid boundary: $boundary"
      unless $boundary =~ m/^(?:lower|upper|next_lower|last_upper)$/;
   die "I need a values arrayref parameter"
      unless $values && ref $values eq 'ARRAY';
   $self->{$boundary} = $values;
   PTDEBUG && _d('Set new', $boundary, 'boundary:', Dumper($values));
   return;
}

sub one_nibble {
   my ($self) = @_;
   return $self->{one_nibble};
}

sub limit {
   my ($self) = @_;
   return $self->{limit};
}

sub set_chunk_size {
   my ($self, $limit) = @_;
   return if $self->{one_nibble};
   die "Chunk size must be > 0" unless $limit;
   $self->{limit} = $limit - 1;
   PTDEBUG && _d('Set new chunk size (LIMIT):', $limit);
   return;
}

sub sql {
   my ($self) = @_;
   return $self->{sql};
}

sub more_boundaries {
   my ($self) = @_;
   return !$self->{no_more_boundaries};
}

sub row_estimate {
   my ($self) = @_;
   return $self->{row_est};
}

sub can_nibble {
   my (%args) = @_;
   my @required_args = qw(Cxn tbl chunk_size OptionParser TableParser);
   foreach my $arg ( @required_args ) {
      die "I need a $arg argument" unless $args{$arg};
   }
   my ($cxn, $tbl, $chunk_size, $o) = @args{@required_args};

   my $where = $o->has('where') ? $o->get('where') : '';

   # About how many rows are there?
   my ($row_est, $mysql_index) = get_row_estimate(
      Cxn   => $cxn,
      tbl   => $tbl,
      where => $where,
   );

   # MySQL's chosen index is only something we should prefer
   # if --where is used.  Else, we can chose our own index
   # and disregard the MySQL index from the row estimate.
   # If there's a --where, however, then MySQL's chosen index
   # is used because it tells us how MySQL plans to optimize
   # for the --where.
   # https://bugs.launchpad.net/percona-toolkit/+bug/978432
   if ( !$where ) {
      $mysql_index = undef;
   }

   # Can all those rows be nibbled in one chunk?  If one_nibble is defined,
   # then do as it says; else, look at the chunk size limit.  If the chunk
   # size limit is disabled (=0), then use the chunk size because there
   # always needs to be a limit to the one-chunk table.
   my $chunk_size_limit = $o->get('chunk-size-limit') || 1;
   my $one_nibble = !defined $args{one_nibble} || $args{one_nibble}
                  ? $row_est <= $chunk_size * $chunk_size_limit
                  : 0;
   PTDEBUG && _d('One nibble:', $one_nibble ? 'yes' : 'no');

   # Special case: we're resuming and there's no boundaries, so the table
   # being resumed was originally nibbled in one chunk, so do the same again.
   if ( $args{resume}
        && !defined $args{resume}->{lower_boundary}
        && !defined $args{resume}->{upper_boundary} ) {
      PTDEBUG && _d('Resuming from one nibble table');
      $one_nibble = 1;
   }

   # Get an index to nibble by.  We'll order rows by the index's columns.
   my $index = _find_best_index(%args, mysql_index => $mysql_index);
   if ( !$index && !$one_nibble ) {
      die "There is no good index and the table is oversized.";
   }

   # The table can be nibbled if this point is reached, else we would have
   # died earlier.  Return some values about nibbling the table.
   my $pause_file = ($o->has('pause-file') && $o->get('pause-file')) || undef;

   return {
      row_est     => $row_est,      # nibble about this many rows
      index       => $index,        # using this index
      one_nibble  => $one_nibble,   # if the table fits in one nibble/chunk
      pause_file  => $pause_file,
   };
}

sub _find_best_index {
   my (%args) = @_;
   my @required_args = qw(Cxn tbl TableParser);
   my ($cxn, $tbl, $tp) = @args{@required_args};
   my $tbl_struct = $tbl->{tbl_struct};
   my $indexes    = $tbl_struct->{keys};

   my $best_index;
   my $want_index = $args{chunk_index};
   # check if the user defined index exists
   # and declare it best_index if so
   if ( $want_index ) {
      PTDEBUG && _d('User wants to use index', $want_index);
      if ( !exists $indexes->{$want_index} ) {
         PTDEBUG && _d('Cannot use user index because it does not exist');
         $want_index = undef;
      } else {
         $best_index = $want_index;
      }
   }

   # if no user defined index or user defined index not valid
   # consider mysql's preferred index a candidate
   if ( !$best_index && !$want_index && $args{mysql_index} ) {
      PTDEBUG && _d('MySQL wants to use index', $args{mysql_index});
      $want_index = $args{mysql_index};
   }


   my @possible_indexes;
   # if haven't got a valid user chosen index
   # check if mysql's preferred index is unique, and if so
   # consider it the best, otherwise include it with other candidates
   if ( !$best_index && $want_index ) {
      if ( $indexes->{$want_index}->{is_unique} ) {
         PTDEBUG && _d('Will use wanted index');
         $best_index = $want_index;
      }
      else {
         PTDEBUG && _d('Wanted index is a possible index');
         push @possible_indexes, $want_index;
      }
   }

   # still no best index?
   # prefer unique index. otherwise put in candidates array.
   if (!$best_index) {
      PTDEBUG && _d('Auto-selecting best index');
      foreach my $index ( $tp->sort_indexes($tbl_struct) ) {
         if ( $index eq 'PRIMARY' || $indexes->{$index}->{is_unique} ) {
            $best_index = $index;
            last;
         }
         else {
            push @possible_indexes, $index;
         }
      }
   }

   # choose the one with best cardinality
   if ( !$best_index && @possible_indexes ) {
      PTDEBUG && _d('No PRIMARY or unique indexes;',
         'will use index with highest cardinality');
      foreach my $index ( @possible_indexes ) {
         $indexes->{$index}->{cardinality} = _get_index_cardinality(
            %args,
            index => $index,
         );
      }
      @possible_indexes = sort {
         # Prefer the index with the highest cardinality.
         my $cmp
            = $indexes->{$b}->{cardinality} <=> $indexes->{$a}->{cardinality};
         if ( $cmp == 0 ) {
            # Indexes have the same cardinality; prefer the one with
            # more columns.
            $cmp = scalar @{$indexes->{$b}->{cols}}
               <=> scalar @{$indexes->{$a}->{cols}};
         }
         $cmp;
      } @possible_indexes;
      $best_index = $possible_indexes[0];
   }

   PTDEBUG && _d('Best index:', $best_index);
   return $best_index;
}

sub _get_index_cardinality {
   my (%args) = @_;
   my @required_args = qw(Cxn tbl index);
   my ($cxn, $tbl, $index) = @args{@required_args};

   my $sql = "SHOW INDEXES FROM $tbl->{name} "
           . "WHERE Key_name = '$index'";
   PTDEBUG && _d($sql);
   my $cardinality = 1;
   my $dbh         = $cxn->dbh();
   my $key_name    = $dbh && ($dbh->{FetchHashKeyName} || '') eq 'NAME_lc'
                   ? 'key_name'
                   : 'Key_name';
   my $rows = $dbh->selectall_hashref($sql, $key_name);
   foreach my $row ( values %$rows ) {
      $cardinality *= $row->{cardinality} if $row->{cardinality};
   }
   PTDEBUG && _d('Index', $index, 'cardinality:', $cardinality);
   return $cardinality;
}

sub get_row_estimate {
   my (%args) = @_;
   my @required_args = qw(Cxn tbl);
   foreach my $arg ( @required_args ) {
      die "I need a $arg argument" unless $args{$arg};
   }
   my ($cxn, $tbl) = @args{@required_args};

   my $sql = "EXPLAIN SELECT * FROM $tbl->{name} "
           . "WHERE " . ($args{where} || '1=1');
   PTDEBUG && _d($sql);
   my $expl = $cxn->dbh()->selectrow_hashref($sql);
   PTDEBUG && _d(Dumper($expl));
   # MySQL's chosen index must be lowercase because TableParser::parse()
   # lowercases all idents (search in that module for \L) except for
   # the PRIMARY KEY which it leaves uppercase.
   # https://bugs.launchpad.net/percona-toolkit/+bug/995274
   my $mysql_index = $expl->{key} || '';
   if ( $mysql_index ne 'PRIMARY' ) {
      $mysql_index = lc($mysql_index);
   }
   return ($expl->{rows} || 0), $mysql_index;
}

sub _prepare_sths {
   my ($self) = @_;
   PTDEBUG && _d('Preparing statement handles');

   my $dbh = $self->{Cxn}->dbh();

   $self->{nibble_sth}         = $dbh->prepare($self->{nibble_sql});
   $self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql});

   if ( !$self->{one_nibble} ) {
      $self->{explain_first_lb_sth} = $dbh->prepare($self->{explain_first_lb_sql});
      $self->{ub_sth}               = $dbh->prepare($self->{ub_sql});
      $self->{explain_ub_sth}       = $dbh->prepare($self->{explain_ub_sql});
   }

   return;
}

sub _get_bounds {
   my ($self) = @_;

   if ( $self->{one_nibble} ) {
      if ( $self->{resume} ) {
         $self->{no_more_boundaries} = 1;
      }
      return;
   }

   my $dbh = $self->{Cxn}->dbh();

   # Get the real first lower boundary.
   $self->{first_lower} = $dbh->selectrow_arrayref($self->{first_lb_sql});
   PTDEBUG && _d('First lower boundary:', Dumper($self->{first_lower}));

   # The next boundary is the first lower boundary.  If resuming,
   # this should be something > the real first lower boundary and
   # bounded (else it's not one of our chunks).
   if ( my $nibble = $self->{resume} ) {
      if (    defined $nibble->{lower_boundary}
           && defined $nibble->{upper_boundary} ) {
         my $sth = $dbh->prepare($self->{resume_lb_sql});
         #my @ub  = split ',', $nibble->{upper_boundary};
         my @ub = $self->{Quoter}->deserialize_list($nibble->{upper_boundary});
         PTDEBUG && _d($sth->{Statement}, 'params:', @ub);
         $sth->execute(@ub);
         $self->{next_lower} = $sth->fetchrow_arrayref();
         $sth->finish();
      }
   }
   else {
      $self->{next_lower}  = $self->{first_lower};
   }
   PTDEBUG && _d('Next lower boundary:', Dumper($self->{next_lower}));

   if ( !$self->{next_lower} ) {
      # This happens if we resume from the end of the table, or if the
      # last chunk for resuming isn't bounded.
      PTDEBUG && _d('At end of table, or no more boundaries to resume');
      $self->{no_more_boundaries} = 1;

      # Get the real last upper boundary, i.e. the last row of the table
      # at this moment.  If rows are inserted after, we won't see them.
      # This is required for OobNibbleIterator because if we resume at
      # the lower or upper oob nibble, we also need to know the last upper
      # boundary of the table (we already have the first).
      $self->{last_upper} = $dbh->selectrow_arrayref($self->{last_ub_sql});
      PTDEBUG && _d('Last upper boundary:', Dumper($self->{last_upper}));
   }

   return;
}

sub _next_boundaries {
   my ($self) = @_;

   if ( $self->{no_more_boundaries} ) {
      PTDEBUG && _d('No more boundaries');
      return; # stop nibbling
   }

   if ( $self->{one_nibble} ) {
      $self->{lower} = $self->{upper} = [];
      $self->{no_more_boundaries} = 1;  # for next call
      return 1; # continue nibbling
   }

   # Detect infinite loops.  If the lower boundary we just nibbled from
   # is identical to the next lower boundary, then this next nibble won't
   # go anywhere, so to speak, unless perhaps the chunk size has changed
   # which will cause us to nibble further ahead and maybe get a new lower
   # boundary that isn't identical, but we can't detect this, and in any
   # case, if there's one infinite loop there will probably be others.


   if ( $self->identical_boundaries($self->{lower}, $self->{next_lower}) ) {
      PTDEBUG && _d('Infinite loop detected');
      my $tbl     = $self->{tbl};
      my $index   = $tbl->{tbl_struct}->{keys}->{$self->{index}};
      my $n_cols  = scalar @{$index->{cols}};
      my $chunkno = $self->{nibbleno};

      # XXX This call and others like it are relying on a Perl oddity.
      # See https://bugs.launchpad.net/percona-toolkit/+bug/987393
      die "Possible infinite loop detected!  "
         . "The lower boundary for chunk $chunkno is "
         . "<" . join(', ', @{$self->{lower}}) . "> and the lower "
         . "boundary for chunk " . ($chunkno + 1) . " is also "
         . "<" . join(', ', @{$self->{next_lower}}) . ">.  "
         . "This usually happens when using a non-unique single "
         . "column index.  The current chunk index for table "
         . "$tbl->{db}.$tbl->{tbl} is $self->{index} which is"
         . ($index->{is_unique} ? '' : ' not') . " unique and covers "
         . ($n_cols > 1 ? "$n_cols columns" : "1 column") . ".\n";
   }
   $self->{lower} = $self->{next_lower};

   if ( my $callback = $self->{callbacks}->{next_boundaries} ) {
      my $oktonibble = $callback->(
         Cxn            => $self->{Cxn},
         tbl            => $self->{tbl},
         NibbleIterator => $self,
      );
      PTDEBUG && _d('next_boundaries callback returned', $oktonibble);
      if ( !$oktonibble ) {
         $self->{no_more_boundaries} = 1;
         return; # stop nibbling
      }
   }

   # Two boundaries are being fetched: the upper boundary for this nibble,
   # i.e. the nibble the caller is trying to exec, and the next_lower boundary
   # for the next nibble that the caller will try to exec.  For example,
   # if chunking the alphabet, a-z, with chunk size 3, the first call will
   # fetch:
   #
   #    a <- lower
   #    b
   #    c <- upper      ($boundary->[0])
   #    d <- next_lower ($boundary->[1])
   #
   # Then the second call will fetch:
   #
   #    d <- lower
   #    e
   #    f <- upper
   #    g <- next_lower
   #
   # Why fetch both upper and next_lower?  We wanted to keep nibbling simple,
   # i.e. one nibble statement, not one for the first nibble, one for "middle"
   # nibbles, and another for the end (this is how older code worked).  So the
   # nibble statement is inclusive, but this requires both boundaries for
   # reasons explained in a comment above my $ub_sql in new().

   # XXX This call and others like it are relying on a Perl oddity.
   # See https://bugs.launchpad.net/percona-toolkit/+bug/987393
   PTDEBUG && _d($self->{ub_sth}->{Statement}, 'params:',
      join(', ', @{$self->{lower}}), $self->{limit});
   $self->{ub_sth}->execute(@{$self->{lower}}, $self->{limit});
   my $boundary = $self->{ub_sth}->fetchall_arrayref();
   PTDEBUG && _d('Next boundary:', Dumper($boundary));
   if ( $boundary && @$boundary ) {
      # upper boundary for the current nibble.
      $self->{upper} = $boundary->[0];

      if ( $boundary->[1] ) {
         # next_lower boundary for the next nibble (will become the lower
         # boundary when that nibble becomes the current nibble).
         $self->{next_lower} = $boundary->[1];
      }
      else {
         # There's no next_lower boundary, so the upper boundary of
         # the current nibble is the end of the table.  For example,
         # if chunking a-z, then the upper boundary of the current
         # nibble ($boundary->[0]) is z.
         PTDEBUG && _d('End of table boundary:', Dumper($boundary->[0]));
         $self->{no_more_boundaries} = 1;  # for next call

         # OobNibbleIterator needs to know the last upper boundary.
         $self->{last_upper} = $boundary->[0];
      }
   }
   else {
      # This code is reached in cases like chunking a-z and the next_lower
      # boundary ($boundary->[1]) falls on z.  When called again, no upper
      # or next_lower is found past z so if($boundary && @$boundary) is false.
      # But there's a problem: between the previous call that made next_lower=z
      # and this call, rows might have been inserted, so maybe z is no longer
      # the end of the table.  To handle this, we fetch the end of the table
      # once and make the final nibble z-<whatever>.
      my $dbh = $self->{Cxn}->dbh();
      $self->{upper} = $dbh->selectrow_arrayref($self->{last_ub_sql});
      PTDEBUG && _d('Last upper boundary:', Dumper($self->{upper}));
      $self->{no_more_boundaries} = 1;  # for next call

      # OobNibbleIterator needs to know the last upper boundary.
      $self->{last_upper} = $self->{upper};
   }
   $self->{ub_sth}->finish();

   return 1; # continue nibbling
}

sub identical_boundaries {
   my ($self, $b1, $b2) = @_;

   # If only one boundary isn't defined, then they can't be identical.
   return 0 if ($b1 && !$b2) || (!$b1 && $b2);

   # If both boundaries aren't defined, then they're identical.
   return 1 if !$b1 && !$b2;

   # Both boundaries are defined; compare their values and return false
   # on the first difference because only one diff is needed to prove
   # that they're not identical.
   die "Boundaries have different numbers of values"
      if scalar @$b1 != scalar @$b2;  # shouldn't happen
   my $n_vals = scalar @$b1;
   for my $i ( 0..($n_vals-1) ) {
      return 0 if ($b1->[$i] || '') ne ($b2->[$i] || ''); # diff
   }
   return 1;
}

sub DESTROY {
   my ( $self ) = @_;
   foreach my $key ( keys %$self ) {
      if ( $key =~ m/_sth$/ ) {
         PTDEBUG && _d('Finish', $key);
         $self->{$key}->finish();
      }
   }
   return;
}

sub _d {
   my ($package, undef, $line) = caller 0;
   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
        map { defined $_ ? $_ : 'undef' }
        @_;
   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}

1;
}
# ###########################################################################
# End NibbleIterator package
# ###########################################################################