Merge nibble-iterator.

2026-05-17 01:01:27 +08:00 · 2011-09-14 08:31:08 -06:00
parent 43aebffad1 792c3a6638
commit bbbdabbaa3
7 changed files with 1807 additions and 8 deletions
@@ -0,0 +1,444 @@
+# This program is copyright 2011 Percona Inc.
+# Feedback and improvements are welcome.
+#
+# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
+# systems, you can issue `man perlgpl' or `man perlartistic' to read these
+# licenses.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+# Place, Suite 330, Boston, MA  02111-1307  USA.
+# ###########################################################################
+# NibbleIterator package
+# ###########################################################################
+{
+# Package: NibbleIterator
+# NibbleIterator nibbles tables.
+package NibbleIterator;
+
+use strict;
+use warnings FATAL => 'all';
+use English qw(-no_match_vars);
+use constant MKDEBUG => $ENV{MKDEBUG} || 0;
+
+use Data::Dumper;
+$Data::Dumper::Indent    = 1;
+$Data::Dumper::Sortkeys  = 1;
+$Data::Dumper::Quotekeys = 0;
+
+sub new {
+   my ( $class, %args ) = @_;
+   my @required_args = qw(dbh tbl OptionParser Quoter TableNibbler TableParser);
+   foreach my $arg ( @required_args ) {
+      die "I need a $arg argument" unless $args{$arg};
+   }
+   my ($dbh, $tbl, $o, $q) = @args{@required_args};
+
+   # Get an index to nibble by.  We'll order rows by the index's columns.
+   my $index = $args{TableParser}->find_best_index(
+      $tbl->{tbl_struct},
+      $o->get('chunk-index'),
+   );
+   die "No index to nibble table $tbl->{db}.$tbl->{tbl}" unless $index;
+   my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};
+
+   # Figure out how to nibble the table with the index.
+   my $asc = $args{TableNibbler}->generate_asc_stmt(
+      %args,
+      tbl_struct => $tbl->{tbl_struct},
+      index      => $index,
+      asc_only   => 1,
+   );
+   MKDEBUG && _d('Ascend params:', Dumper($asc));
+
+   # Make SQL statements, prepared on first call to next().  FROM and
+   # ORDER BY are the same for all statements.  FORCE IDNEX and ORDER BY
+   # are needed to ensure deterministic nibbling.
+   my $from     = $q->quote(@{$tbl}{qw(db tbl)}) . " FORCE INDEX(`$index`)";
+   my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
+
+   # These statements are only executed once, so they don't use sths.
+   my $first_lb_sql
+      = "SELECT /*!40001 SQL_NO_CACHE */ "
+      . join(', ', map { $q->quote($_) } @{$asc->{scols}})
+      . " FROM $from"
+      . ($args{where} ? " WHERE $args{where}" : '')
+      . " ORDER BY $order_by"
+      . " LIMIT 1"
+      . " /*first lower boundary*/";
+   MKDEBUG && _d('First lower boundary statement:', $first_lb_sql);
+
+   my $last_ub_sql
+      = "SELECT /*!40001 SQL_NO_CACHE */ "
+      . join(', ', map { $q->quote($_) } @{$asc->{scols}})
+      . " FROM $from"
+      . ($args{where} ? " WHERE $args{where}" : '')
+      . " ORDER BY "
+      . join(' DESC, ', map {$q->quote($_)} @{$index_cols}) . ' DESC'
+      . " LIMIT 1"
+      . " /*last upper boundary*/";
+   MKDEBUG && _d('Last upper boundary statement:', $last_ub_sql);
+
+   # Nibbles are inclusive, so for a..z, the nibbles are: a-e, f-j, k-o, p-t,
+   # u-y, and z.  This complicates getting the next upper boundary because
+   # if we use either (col >= lb AND col < ub) or (col > lb AND col <= ub)
+   # in nibble_sql (below), then that fails for either the last or first
+   # nibble respectively.  E.g. (col >= z AND col < z) doesn't work, nor
+   # does (col > a AND col <= e).  Hence the fancy LIMIT 2 which returns
+   # the upper boundary for the current nibble *and* the lower boundary
+   # for the next nibble.  See _next_boundaries().
+   my $ub_sql = _make_ub_sql(
+      cols     => $asc->{scols},
+      from     => $from,
+      where    => $asc->{boundaries}->{'>='}
+                . ($args{where} ? " AND ($args{where})" : ''),
+      order_by => $order_by,
+      limit    => $o->get('chunk-size'),
+      Quoter   => $q,
+   );
+
+   # This statement does the actual nibbling work; its rows are returned
+   # to the caller via next().
+   my $nibble_sql
+      = ($args{dms} ? "$args{dms} " : "SELECT ")
+      . ($args{select} ? $args{select}
+                       : join(', ', map { $q->quote($_) } @{$asc->{cols}}))
+      . " FROM $from"
+      . " WHERE " . $asc->{boundaries}->{'>='}  # lower boundary
+      . " AND "   . $asc->{boundaries}->{'<='}  # upper boundary
+      . ($args{where} ? " AND ($args{where})" : '')
+      . " ORDER BY $order_by"
+      . " /*nibble*/";
+   MKDEBUG && _d('Nibble statement:', $nibble_sql);
+
+   my $explain_nibble_sql 
+      = "EXPLAIN SELECT "
+      . ($args{select} ? $args{select}
+                       : join(', ', map { $q->quote($_) } @{$asc->{cols}}))
+      . " FROM $from"
+      . " WHERE " . $asc->{boundaries}->{'>='}  # lower boundary
+      . " AND "   . $asc->{boundaries}->{'<='}  # upper boundary
+      . ($args{where} ? " AND ($args{where})" : '')
+      . " ORDER BY $order_by"
+      . " /*explain nibble*/";
+   MKDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);
+
+   # If the chunk size is >= number of rows in table, then we don't
+   # need to chunk; we can just select all rows, in order, at once.
+   my $one_nibble_sql
+      = ($args{dms} ? "$args{dms} " : "SELECT ")
+      . ($args{select} ? $args{select}
+                       : join(', ', map { $q->quote($_) } @{$asc->{cols}}))
+      . " FROM $from"
+      . ($args{where} ? " AND ($args{where})" : '')
+      . " ORDER BY $order_by"
+      . " /*one nibble*/";
+   MKDEBUG && _d('One nibble statement:', $one_nibble_sql);
+
+   my $explain_one_nibble_sql
+      = "EXPLAIN SELECT "
+      . ($args{select} ? $args{select}
+                       : join(', ', map { $q->quote($_) } @{$asc->{cols}}))
+      . " FROM $from"
+      . ($args{where} ? " AND ($args{where})" : '')
+      . " ORDER BY $order_by"
+      . " /*explain one nibble*/";
+   MKDEBUG && _d('Explain one nibble statement:', $explain_one_nibble_sql);
+
+   my $self = {
+      %args,
+      asc                    => $asc,
+      index                  => $index,
+      from                   => $from,
+      order_by               => $order_by,
+      first_lb_sql           => $first_lb_sql,
+      last_ub_sql            => $last_ub_sql,
+      ub_sql                 => $ub_sql,
+      nibble_sql             => $nibble_sql,
+      explain_nibble_sql     => $explain_nibble_sql,
+      one_nibble_sql         => $one_nibble_sql,
+      explain_one_nibble_sql => $explain_one_nibble_sql,
+      nibbleno               => 0,
+      have_rows              => 0,
+      rowno                  => 0,
+   };
+
+   return bless $self, $class;
+}
+
+sub next {
+   my ($self) = @_;
+
+   # First call, init everything.  This could be done in new(), but
+   # all work is delayed until actually needed.
+   if ($self->{nibbleno} == 0) {
+      $self->_can_nibble_once();
+      $self->_prepare_sths();
+      $self->_get_bounds();
+      # $self->_check_index_usage();
+      if ( my $callback = $self->{callbacks}->{init} ) {
+         $callback->();
+      }
+   }
+
+   # If there's another nibble, fetch the rows within it.
+   NIBBLE:
+   while ( $self->{have_rows} || $self->_next_boundaries() ) {
+      # If no rows, then we just got the next boundaries, which start
+      # the next nibble.
+      if ( !$self->{have_rows} ) {
+         $self->{nibbleno}++;
+         MKDEBUG && _d($self->{nibble_sth}->{Statement}, 'params:',
+            join(', ', (@{$self->{lb}}, @{$self->{ub}})));
+         if ( my $callback = $self->{callbacks}->{exec_nibble} ) {
+            $self->{have_rows} = $callback->(
+               dbh         => $self->{dbh},
+               tbl         => $self->{tbl},
+               sth         => $self->{nibble_sth},
+               lb          => $self->{lb},
+               ub          => $self->{ub},
+               nibbleno    => $self->{nibbleno},
+               explain_sth => $self->{explain_sth},
+            );
+         }
+         else {
+            $self->{nibble_sth}->execute(@{$self->{lb}}, @{$self->{ub}});
+            $self->{have_rows} = $self->{nibble_sth}->rows();
+         }
+      }
+
+      # Return rows in this nibble.
+      if ( $self->{have_rows} ) {
+         MKDEBUG && _d($self->{have_rows}, 'rows in nibble', $self->{nibbleno});
+         # Return rows in nibble.  sth->{Active} is always true with
+         # DBD::mysql v3, so we track the status manually.
+         my $row = $self->{nibble_sth}->fetchrow_arrayref();
+         if ( $row ) {
+            $self->{rowno}++;
+            MKDEBUG && _d('Row', $self->{rowno}, 'in nibble',$self->{nibbleno});
+            # fetchrow_arraryref re-uses an internal arrayref, so we must copy.
+            return [ @$row ];
+         }
+      }
+
+      MKDEBUG && _d('No rows in nibble or nibble skipped');
+      if ( my $callback = $self->{callbacks}->{after_nibble} ) {
+         $callback->(
+            dbh         => $self->{dbh},
+            tbl         => $self->{tbl},
+            nibbleno    => $self->{nibbleno},
+            explain_sth => $self->{explain_sth},
+         );
+      }
+      $self->{rowno}     = 0;
+      $self->{have_rows} = 0;
+   }
+
+   MKDEBUG && _d('Done nibbling');
+   if ( my $callback = $self->{callbacks}->{done} ) {
+      $callback->(
+         dbh => $self->{dbh},
+         tbl => $self->{tbl},
+      );
+   }
+   return;
+}
+
+sub nibble_number {
+   my ($self) = @_;
+   return $self->{nibbleno};
+}
+
+sub set_chunk_size {
+   my ($self, $limit) = @_;
+   MKDEBUG && _d('Setting new chunk size (LIMIT):', $limit);
+
+   $self->{ub_sql} = _make_ub_sql(
+      cols     => $self->{asc}->{scols},
+      from     => $self->{from},
+      where    => $self->{asc}->{boundaries}->{'>='}
+                . ($self->{where} ? " AND ($self->{where})" : ''),
+      order_by => $self->{order_by},
+      limit    => $limit,
+      Quoter   => $self->{Quoter},
+   );
+
+   # ub_sth won't exist if user calls this sub before calling next() once.
+   if ($self->{ub_sth}) {
+      $self->{ub_sth}->finish();
+      $self->{ub_sth} = undef;
+   }
+
+   $self->_prepare_sths();
+
+   return;
+}
+
+sub _make_ub_sql {
+   my (%args) = @_;
+   my @required_args = qw(cols from where order_by limit Quoter);
+   foreach my $arg ( @required_args ) {
+      die "I need a $arg argument" unless $args{$arg};
+   }
+   my ($cols, $from, $where, $order_by, $limit, $q) = @args{@required_args};
+   my $ub_sql
+      = "SELECT /*!40001 SQL_NO_CACHE */ "
+      . join(', ', map { $q->quote($_) } @{$cols})
+      . " FROM $from"
+      . " WHERE $where"
+      . " ORDER BY $order_by"
+      . " LIMIT 2 OFFSET " . ((int($limit) || 1) - 1)
+      . " /*upper boundary*/";
+   MKDEBUG && _d('Upper boundary statement:', $ub_sql);
+   return $ub_sql;
+}
+
+sub _can_nibble_once {
+   my ($self) = @_;
+   my ($dbh, $tbl, $q) = @{$self}{qw(dbh tbl Quoter)};
+   my $table_status;
+   eval {
+      my $sql = "SHOW TABLE STATUS FROM " . $q->quote($tbl->{db})
+              . " LIKE " . $q->literal_like($tbl->{tbl});
+      MKDEBUG && _d($sql);
+      $table_status = $dbh->selectrow_hashref($sql);
+      MKDEBUG && _d('Table status:', Dumper($table_status));
+   };
+   if ( $EVAL_ERROR ) {
+      warn $EVAL_ERROR;
+      return 0;
+   }
+   my $n_rows = defined $table_status->{Rows} ? $table_status->{Rows}
+              : defined $table_status->{rows} ? $table_status->{rows}
+              : 0;
+   my $chunk_size = $self->{OptionParser}->get('chunk-size') || 1;
+   $self->{one_nibble} = $n_rows <= $chunk_size ? 1 : 0;
+   MKDEBUG && _d('One nibble:', $self->{one_nibble} ? 'yes' : 'no');
+   return $self->{one_nibble};
+}
+
+sub _prepare_sths {
+   my ($self) = @_;
+   MKDEBUG && _d('Preparing statement handles');
+   if ( $self->{one_nibble} ) {
+      $self->{nibble_sth}  = $self->{dbh}->prepare($self->{one_nibble_sql})
+         unless $self->{nibble_sth};
+      $self->{explain_sth} = $self->{dbh}->prepare($self->{explain_one_nibble_sql})
+         unless $self->{explain_sth};
+   }
+   else {
+      $self->{ub_sth} = $self->{dbh}->prepare($self->{ub_sql})
+         unless $self->{ub_sth};
+      $self->{nibble_sth}  = $self->{dbh}->prepare($self->{nibble_sql})
+         unless $self->{nibble_sth};
+      $self->{explain_sth} = $self->{dbh}->prepare($self->{explain_nibble_sql})
+         unless $self->{explain_sth};
+   }
+}
+
+sub _get_bounds { 
+   my ($self) = @_;
+   return if $self->{one_nibble};
+
+   $self->{next_lb} = $self->{dbh}->selectrow_arrayref($self->{first_lb_sql});
+   MKDEBUG && _d('First lower boundary:', Dumper($self->{next_lb}));
+   
+   $self->{last_ub} = $self->{dbh}->selectrow_arrayref($self->{last_ub_sql});
+   MKDEBUG && _d('Last upper boundary:', Dumper($self->{last_ub}));
+   
+   return;
+}
+
+sub _check_index_usage {
+   my ($self) = @_;
+   my ($dbh, $tbl, $q) = @{$self}{qw(dbh tbl Quoter)};
+
+   my $explain;
+   eval {
+      $explain = $dbh->selectall_arrayref("", {Slice => {}});
+   };
+   if ( $EVAL_ERROR ) {
+      warn "Cannot check if MySQL is using the chunk index: $EVAL_ERROR";
+      return;
+   }
+   my $explain_index = lc($explain->[0]->{key} || '');
+   MKDEBUG && _d('EXPLAIN index:', $explain_index);
+   if ( $explain_index ne $self->{index} ) {
+      die "Cannot nibble table $tbl->{db}.$tbl->{tbl} because MySQL chose "
+         . ($explain_index ? "the `$explain_index`" : 'no') . ' index'
+         . " instead of the chunk index `$self->{asc}->{index}`";
+   }
+
+   return;
+}
+
+sub _next_boundaries {
+   my ($self) = @_;
+
+   if ( $self->{no_more_boundaries} ) {
+      MKDEBUG && _d('No more boundaries');
+      return;
+   }
+
+   if ( $self->{one_nibble} ) {
+      $self->{lb} = $self->{ub} = [];
+      $self->{no_more_boundaries} = 1;  # for next call
+      return 1;
+   }
+
+   $self->{lb} = $self->{next_lb};
+
+   MKDEBUG && _d($self->{ub_sth}->{Statement}, 'params:',
+      join(', ', @{$self->{lb}}));
+   $self->{ub_sth}->execute(@{$self->{lb}});
+   my $boundary = $self->{ub_sth}->fetchall_arrayref();
+   MKDEBUG && _d('Next boundary:', Dumper($boundary));
+   if ( $boundary && @$boundary ) {
+      $self->{ub} = $boundary->[0]; # this nibble
+      if ( $boundary->[1] ) {
+         $self->{next_lb} = $boundary->[1]; # next nibble
+      }
+      else {
+         $self->{no_more_boundaries} = 1;  # for next call
+         MKDEBUG && _d('Last upper boundary:', Dumper($boundary->[0]));
+      }
+   }
+   else {
+      $self->{no_more_boundaries} = 1;  # for next call
+      $self->{ub} = $self->{last_ub};
+      MKDEBUG && _d('Last upper boundary:', Dumper($self->{ub}));
+   }
+   $self->{ub_sth}->finish();
+
+   return 1; # have boundary
+}
+
+sub DESTROY {
+   my ( $self ) = @_;
+   foreach my $key ( keys %$self ) {
+      if ( $key =~ m/_sth$/ ) {
+         $self->{$key}->finish();
+      }
+   }
+   return;
+}
+
+sub _d {
+   my ($package, undef, $line) = caller 0;
+   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
+        map { defined $_ ? $_ : 'undef' }
+        @_;
+   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
+}
+
+1;
+}
+# ###########################################################################
+# End NibbleIterator package
+# ###########################################################################
@@ -0,0 +1,473 @@
+# This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc.
+# Feedback and improvements are welcome.
+#
+# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
+# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
+# systems, you can issue `man perlgpl' or `man perlartistic' to read these
+# licenses.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+# Place, Suite 330, Boston, MA  02111-1307  USA.
+# ###########################################################################
+# RowChecksum package
+# ###########################################################################
+{
+# Package: RowChecksum
+# RowChecksum makes checksum expressions for checksumming rows and chunks.
+package RowChecksum;
+
+use strict;
+use warnings FATAL => 'all';
+use English qw(-no_match_vars);
+use constant MKDEBUG => $ENV{MKDEBUG} || 0;
+
+use List::Util qw(max);
+use Data::Dumper;
+$Data::Dumper::Indent    = 1;
+$Data::Dumper::Sortkeys  = 1;
+$Data::Dumper::Quotekeys = 0;
+
+sub new {
+   my ( $class, %args ) = @_;
+   foreach my $arg ( qw(OptionParser Quoter) ) {
+      die "I need a $arg argument" unless defined $args{$arg};
+   }
+   my $self = { %args };
+   return bless $self, $class;
+}
+
+# Sub: make_row_checksum
+#   Make a SELECT column list to checksum a row.
+#
+# Parameters:
+#   %args - Arguments
+#
+# Required Arguments:
+#   tbl  - Table ref
+#
+# Optional Arguments:
+#   sep        - Separator for CONCAT_WS(); default #
+#   cols       - Arrayref of columns to checksum
+#   trim       - Wrap VARCHAR cols in TRIM() for v4/v5 compatibility
+#   ignorecols - Arrayref of columns to exclude from checksum
+#
+# Returns:
+#   Column list for SELECT
+sub make_row_checksum {
+   my ( $self, %args ) = @_;
+   my @required_args = qw(tbl);
+   foreach my $arg( @required_args ) {
+      die "I need a $arg argument" unless $args{$arg};
+   }
+   my ($tbl) = @args{@required_args};
+
+   my $o          = $self->{OptionParser};
+   my $q          = $self->{Quoter};
+   my $tbl_struct = $tbl->{tbl_struct};
+   my $func       = $args{func} || uc($o->get('function'));
+
+   my $sep = $args{sep} || '#';
+   $sep =~ s/'//g;
+   $sep ||= '#';
+
+   # This allows a simpler grep when building %cols below.
+   my $ignorecols = $args{ignorecols} || {};
+
+   # Generate the expression that will turn a row into a checksum.
+   # Choose columns.  Normalize query results: make FLOAT and TIMESTAMP
+   # stringify uniformly.
+   my %cols = map { lc($_) => 1 }
+              grep { !exists $ignorecols->{$_} }
+              ($args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}});
+   my %seen;
+   my @cols =
+      map {
+         my $type = $tbl_struct->{type_for}->{$_};
+         my $result = $q->quote($_);
+         if ( $type eq 'timestamp' ) {
+            $result .= ' + 0';
+         }
+         elsif ( $args{float_precision} && $type =~ m/float|double/ ) {
+            $result = "ROUND($result, $args{float_precision})";
+         }
+         elsif ( $args{trim} && $type =~ m/varchar/ ) {
+            $result = "TRIM($result)";
+         }
+         $result;
+      }
+      grep {
+         $cols{$_} && !$seen{$_}++
+      }
+      @{$tbl_struct->{cols}};
+
+   # Prepend columns to query, resulting in "col1, col2, FUNC(..col1, col2...)",
+   # unless caller says not to.  The only caller that says not to is
+   # make_chunk_checksum() which uses this row checksum as part of a larger
+   # checksum.  Other callers, like TableSyncer::make_checksum_queries() call
+   # this sub directly and want the actual columns.
+   my $query;
+   if ( !$args{no_cols} ) {
+      $query = join(', ',
+                  map { 
+                     my $col = $_;
+                     if ( $col =~ m/\+ 0/ ) {
+                        # Alias col name back to itself else its name becomes
+                        # "col + 0" instead of just "col".
+                        my ($real_col) = /^(\S+)/;
+                        $col .= " AS $real_col";
+                     }
+                     elsif ( $col =~ m/TRIM/ ) {
+                        my ($real_col) = m/TRIM\(([^\)]+)\)/;
+                        $col .= " AS $real_col";
+                     }
+                     $col;
+                  } @cols)
+             . ', ';
+   }
+
+   if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
+      # Add a bitmap of which nullable columns are NULL.
+      my @nulls = grep { $cols{$_} } @{$tbl_struct->{null_cols}};
+      if ( @nulls ) {
+         my $bitmap = "CONCAT("
+            . join(', ', map { 'ISNULL(' . $q->quote($_) . ')' } @nulls)
+            . ")";
+         push @cols, $bitmap;
+      }
+
+      $query .= @cols > 1
+              ? "$func(CONCAT_WS('$sep', " . join(', ', @cols) . '))'
+              : "$func($cols[0])";
+   }
+   else {
+      # As a special case, FNV1A_64/FNV_64 doesn't need its arguments
+      # concatenated, and doesn't need a bitmap of NULLs.
+      my $fnv_func = uc $func;
+      $query .= "$fnv_func(" . join(', ', @cols) . ')';
+   }
+
+   MKDEBUG && _d('Row checksum:', $query);
+   return $query;
+}
+
+# Sub: make_chunk_checksum
+#   Make a SELECT column list to checksum a chunk of rows.
+#
+# Parameters:
+#   %args - Arguments
+#
+# Required Arguments:
+#   tbl - Table ref
+#   dbh - dbh if func, crc_width, and crc_type aren't given
+#
+# Optional Arguments:
+#   func      - Hash function name
+#   crc_width - CRC width
+#   crc_type  - CRC type
+# 
+# Returns:
+#   Column list for SELECT
+sub make_chunk_checksum {
+   my ( $self, %args ) = @_;
+   my @required_args = qw(tbl);
+   foreach my $arg( @required_args ) {
+      die "I need a $arg argument" unless $args{$arg};
+   }
+   if ( !$args{dbh} && !($args{func} && $args{crc_width} && $args{crc_type}) ) {
+      die "I need a dbh argument"
+   }
+   my ($tbl) = @args{@required_args};
+   my $o     = $self->{OptionParser};
+   my $q     = $self->{Quoter};
+
+   my %crc_args = $self->get_crc_args(%args);
+   my $opt_slice; 
+   if ( $o->get('optimize-xor') ) {
+      if ( $crc_args{crc_type} !~ m/int$/ ) {
+         $opt_slice = $self->_optimize_xor(%args, %crc_args);
+         warn "Cannot use --optimize-xor" unless defined $opt_slice;
+      }
+   }
+   MKDEBUG && _d("Checksum strat:", Dumper(\%crc_args));
+
+   # This checksum algorithm concatenates the columns in each row and
+   # checksums them, then slices this checksum up into 16-character chunks.
+   # It then converts them BIGINTs with the CONV() function, and then
+   # groupwise XORs them to produce an order-independent checksum of the
+   # slice over all the rows.  It then converts these back to base 16 and
+   # puts them back together.  The effect is the same as XORing a very wide
+   # (32 characters = 128 bits for MD5, and SHA1 is even larger) unsigned
+   # integer over all the rows.
+   #
+   # As a special case, integer functions do not need to be sliced.  They
+   # can be fed right into BIT_XOR after a cast to UNSIGNED.
+   my $row_checksum = $self->make_row_checksum(
+      %args,
+      %crc_args,
+      no_cols => 1
+   );
+   my $crc;
+   if ( $crc_args{crc_type} =~ m/int$/ ) {
+      $crc = "COALESCE(LOWER(CONV(BIT_XOR(CAST($row_checksum AS UNSIGNED)), "
+           . "10, 16)), 0)";
+   }
+   else {
+      my $slices = $self->_make_xor_slices(
+         row_checksum => $row_checksum,
+         %crc_args,
+      );
+      $crc = "COALESCE(LOWER(CONCAT($slices)), 0)";
+   }
+
+   my $select = "COUNT(*) AS cnt, $crc AS crc";
+   MKDEBUG && _d('Chunk checksum:', $select);
+   return $select;
+}
+
+sub get_crc_args {
+   my ($self, %args) = @_;
+   my $func      = $args{func}     || $self->_get_hash_func(%args);
+   my $crc_width = $args{crc_width}|| $self->_get_crc_width(%args, func=>$func);
+   my $crc_type  = $args{crc_type} || $self->_get_crc_type(%args, func=>$func);
+   return (
+      func      => $func,
+      crc_width => $crc_width,
+      crc_type  => $crc_type,
+   );
+}
+
+# Sub: _get_hash_func
+#   Get the fastest available hash function.
+#
+# Parameters:
+#   %args - Arguments
+#
+# Required Arguments:
+#   dbh - dbh
+#
+# Returns:
+#   Function name
+sub _get_hash_func {
+   my ( $self, %args ) = @_;
+   my @required_args = qw(dbh);
+   foreach my $arg( @required_args ) {
+      die "I need a $arg argument" unless $args{$arg};
+   }
+   my ($dbh) = @args{@required_args};
+   my $o     = $self->{OptionParser};
+   my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1);
+
+   if ( my $func = $o->get('function') ) {
+      unshift @funcs, $func;
+   }
+
+   my ($result, $error);
+   foreach my $func ( @funcs ) {
+      eval {
+         my $sql = "SELECT $func('test-string')";
+         MKDEBUG && _d($sql);
+         $args{dbh}->do($sql);
+      };
+      if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) {
+         $error .= qq{$func cannot be used because "$1"\n};
+         MKDEBUG && _d($func, 'cannot be used because', $1);
+      }
+      MKDEBUG && _d('Chosen hash func:', $result);
+      return $func;
+   }
+   die $error || 'No hash functions (CRC32, MD5, etc.) are available';
+}
+
+# Returns how wide/long, in characters, a CRC function is.
+sub _get_crc_width {
+   my ( $self, %args ) = @_;
+   my @required_args = qw(dbh func);
+   foreach my $arg( @required_args ) {
+      die "I need a $arg argument" unless $args{$arg};
+   }
+   my ($dbh, $func) = @args{@required_args};
+
+   my $crc_width = 16;
+   if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
+      eval {
+         my ($val) = $dbh->selectrow_array("SELECT $func('a')");
+         $crc_width = max(16, length($val));
+      };
+   }
+   return $crc_width;
+}
+
+# Returns a CRC function's MySQL type.
+sub _get_crc_type {
+   my ( $self, %args ) = @_;
+   my @required_args = qw(dbh func);
+   foreach my $arg( @required_args ) {
+      die "I need a $arg argument" unless $args{$arg};
+   }
+   my ($dbh, $func) = @args{@required_args};
+
+   my $type   = '';
+   my $length = 0;
+   my $sql    = "SELECT $func('a')";
+   my $sth    = $dbh->prepare($sql);
+   eval {
+      $sth->execute();
+      $type   = $sth->{mysql_type_name}->[0];
+      $length = $sth->{mysql_length}->[0];
+      MKDEBUG && _d($sql, $type, $length);
+      if ( $type eq 'bigint' && $length < 20 ) {
+         $type = 'int';
+      }
+   };
+   $sth->finish;
+   MKDEBUG && _d('crc_type:', $type, 'length:', $length);
+   return $type;
+}
+
+# Figure out which slice in a sliced BIT_XOR checksum should have the actual
+# concat-columns-and-checksum, and which should just get variable references.
+# Returns the slice.  I'm really not sure if this code is needed.  It always
+# seems the last slice is the one that works.  But I'd rather be paranoid.
+   # TODO: this function needs a hint to know when a function returns an
+   # integer.  CRC32 is an example.  In these cases no optimization or slicing
+   # is necessary.
+sub _optimize_xor {
+   my ( $self, %args ) = @_;
+   my @required_args = qw(dbh func);
+   foreach my $arg( @required_args ) {
+      die "I need a $arg argument" unless $args{$arg};
+   }
+   my ($dbh, $func) = @args{@required_args};
+
+   die "$func never needs BIT_XOR optimization"
+      if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i;
+
+   my $opt_slice = 0;
+   my $unsliced  = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0];
+   my $sliced    = '';
+   my $start     = 1;
+   my $crc_width = length($unsliced) < 16 ? 16 : length($unsliced);
+
+   do { # Try different positions till sliced result equals non-sliced.
+      MKDEBUG && _d('Trying slice', $opt_slice);
+      $dbh->do('SET @crc := "", @cnt := 0');
+      my $slices = $self->_make_xor_slices(
+         row_checksum => "\@crc := $func('a')",
+         crc_width    => $crc_width,
+         opt_slice    => $opt_slice,
+      );
+
+      my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x";
+      $sliced = ($dbh->selectrow_array($sql))[0];
+      if ( $sliced ne $unsliced ) {
+         MKDEBUG && _d('Slice', $opt_slice, 'does not work');
+         $start += 16;
+         ++$opt_slice;
+      }
+   } while ( $start < $crc_width && $sliced ne $unsliced );
+
+   if ( $sliced eq $unsliced ) {
+      MKDEBUG && _d('Slice', $opt_slice, 'works');
+      return $opt_slice;
+   }
+   else {
+      MKDEBUG && _d('No slice works');
+      return undef;
+   }
+}
+
+# Sub: _make_xor_slices
+#   Make an expression that will do a bitwise XOR over a very wide integer,
+#   such as that returned by SHA1, which is too large to put into BIT_XOR().
+#   If an opt_slice is given, a variable is used to avoid calling row_checksum
+#   multiple times.
+#
+# Parameters:
+#   %args - Arguments
+#
+# Required Arguments:
+#   row_checksum - <make_row_checksum()> query
+#   crc_width    - CRC width (<_get_crc_width()>
+#
+# Optional Arguments:
+#   opt_slice - Slice number.  Use a variable to avoid calling row_checksum
+#               multiple times.
+#
+# Returns:
+#   SQL expression
+sub _make_xor_slices {
+   my ( $self, %args ) = @_;
+   my @required_args = qw(row_checksum crc_width);
+   foreach my $arg( @required_args ) {
+      die "I need a $arg argument" unless $args{$arg};
+   }
+   my ($row_checksum, $crc_width) = @args{@required_args};
+   my ($opt_slice) = $args{opt_slice};
+
+   # Create a series of slices with @crc as a placeholder.
+   my @slices;
+   for ( my $start = 1; $start <= $crc_width; $start += 16 ) {
+      my $len = $crc_width - $start + 1;
+      if ( $len > 16 ) {
+         $len = 16;
+      }
+      push @slices,
+         "LPAD(CONV(BIT_XOR("
+         . "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))"
+         . ", 10, 16), $len, '0')";
+   }
+
+   # Replace the placeholder with the expression.  If specified, add a
+   # user-variable optimization so the expression goes in only one of the
+   # slices.  This optimization relies on @crc being '' when the query begins.
+   if ( defined $opt_slice && $opt_slice < @slices ) {
+      $slices[$opt_slice] =~ s/\@crc/\@crc := $row_checksum/;
+   }
+   else {
+      map { s/\@crc/$row_checksum/ } @slices;
+   }
+
+   return join(', ', @slices);
+}
+
+# Queries the replication table for chunks that differ from the master's data.
+sub find_replication_differences {
+   my ( $self, $dbh, $table ) = @_;
+
+   (my $sql = <<"   EOF") =~ s/\s+/ /gm;
+      SELECT db, tbl, chunk, boundaries,
+         COALESCE(this_cnt-master_cnt, 0) AS cnt_diff,
+         COALESCE(
+            this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc),
+            0
+         ) AS crc_diff,
+         this_cnt, master_cnt, this_crc, master_crc
+      FROM $table
+      WHERE master_cnt <> this_cnt OR master_crc <> this_crc
+      OR ISNULL(master_crc) <> ISNULL(this_crc)
+   EOF
+
+   MKDEBUG && _d($sql);
+   my $diffs = $dbh->selectall_arrayref($sql, { Slice => {} });
+   return @$diffs;
+}
+
+sub _d {
+   my ($package, undef, $line) = caller 0;
+   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
+        map { defined $_ ? $_ : 'undef' }
+        @_;
+   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
+}
+
+1;
+}
+# ###########################################################################
+# End RowChecksum package
+# ###########################################################################
@@ -209,9 +209,9 @@ sub next_schema_object {
      if ( my $schema = $self->{Schema} ) {
         $schema->add_schema_object($schema_obj);
      }
+      MKDEBUG && _d('Next schema object:', $schema_obj->{db}, $schema_obj->{tbl});
   }

-   MKDEBUG && _d('Next schema object:', $schema_obj->{db}, $schema_obj->{tbl});
   return $schema_obj;
 }

@@ -65,28 +65,26 @@ sub generate_asc_stmt {
      die "I need a $arg argument" unless defined $args{$arg};
   }
   my ($tbl_struct, $index) = @args{@required_args};
-   my @cols = $args{cols}  ? @{$args{cols}} : @{$tbl_struct->{cols}};
+   my @cols = $args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}};
   my $q    = $self->{Quoter};

   # This shouldn't happen.  TableSyncNibble shouldn't call us with
   # a nonexistent index.
   die "Index '$index' does not exist in table"
      unless exists $tbl_struct->{keys}->{$index};
-
-   my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
-   my @asc_slice;
+   MKDEBUG && _d('Will ascend index', $index);  

   # These are the columns we'll ascend.
-   @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
-   MKDEBUG && _d('Will ascend index', $index);
-   MKDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
+   my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
   if ( $args{asc_first} ) {
      @asc_cols = $asc_cols[0];
      MKDEBUG && _d('Ascending only first column');
   }
+   MKDEBUG && _d('Will ascend columns', join(', ', @asc_cols));

   # We found the columns by name, now find their positions for use as
   # array slices, and make sure they are included in the SELECT list.
+   my @asc_slice;
   my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols };
   foreach my $col ( @asc_cols ) {
      if ( !exists $col_posn{$col} ) {
@@ -0,0 +1,457 @@
+#!/usr/bin/perl
+
+BEGIN {
+   die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
+      unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
+   unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
+};
+
+use strict;
+use warnings FATAL => 'all';
+use English qw(-no_match_vars);
+use Test::More;
+
+use Schema;
+use SchemaIterator;
+use Quoter;
+use DSNParser;
+use Sandbox;
+use OptionParser;
+use MySQLDump;
+use TableParser;
+use TableNibbler;
+use RowChecksum;
+use NibbleIterator;
+use PerconaTest;
+
+use constant MKDEBUG => $ENV{MKDEBUG} || 0;
+
+use Data::Dumper;
+$Data::Dumper::Indent    = 1;
+$Data::Dumper::Sortkeys  = 1;
+$Data::Dumper::Quotekeys = 0;
+
+my $dp  = new DSNParser(opts=>$dsn_opts);
+my $sb  = new Sandbox(basedir => '/tmp', DSNParser => $dp);
+my $dbh = $sb->get_dbh_for('master');
+
+if ( !$dbh ) {
+   plan skip_all => 'Cannot connect to sandbox master';
+}
+else {
+   plan tests => 20;
+}
+
+my $q  = new Quoter();
+my $tp = new TableParser(Quoter=>$q);
+my $du = new MySQLDump();
+my $nb = new TableNibbler(TableParser=>$tp, Quoter=>$q);
+my $o  = new OptionParser(description => 'NibbleIterator');
+my $rc = new RowChecksum(OptionParser => $o, Quoter=>$q);
+
+$o->get_specs("$trunk/bin/pt-table-checksum");
+
+my %common_modules = (
+   Quoter       => $q,
+   TableParser  => $tp,
+   MySQLDump    => $du,
+   TableNibbler => $nb,
+   OptionParser => $o,
+);
+my $in = "/t/lib/samples/NibbleIterator/";
+
+sub make_nibble_iter {
+   my (%args) = @_;
+
+   if (my $file = $args{sql_file}) {
+      $sb->load_file('master', "$in/$file");
+   }
+
+   @ARGV = $args{argv} ? @{$args{argv}} : ();
+   $o->get_opts();
+
+   my $schema = new Schema();
+   my $si     = new SchemaIterator(
+      dbh          => $dbh,
+      keep_ddl     => 1,
+      Schema       => $schema,
+      %common_modules,
+   );
+   1 while $si->next_schema_object();
+
+   my $ni = new NibbleIterator(
+      dbh       => $dbh,
+      tbl       => $schema->get_table($args{db}, $args{tbl}),
+      callbacks => $args{callbacks},
+      select    => $args{select},
+      %common_modules,
+   );
+
+   return $ni;
+}
+
+# ############################################################################
+# a-z w/ chunk-size 5, z is final boundary and single value
+# ############################################################################
+my $ni = make_nibble_iter(
+   sql_file => "a-z.sql",
+   db       => 'test',
+   tbl      => 't',
+   argv     => [qw(--databases test --chunk-size 5)],
+);
+
+my @rows = ();
+for (1..5) {
+   push @rows, $ni->next();
+}
+is_deeply(
+   \@rows,
+   [['a'],['b'],['c'],['d'],['e']],
+   'a-z nibble 1'
+) or print Dumper(\@rows);
+
+@rows = ();
+for (1..5) {
+   push @rows, $ni->next();
+}
+is_deeply(
+   \@rows,
+   [['f'],['g'],['h'],['i'],['j']],
+   'a-z nibble 2'
+) or print Dumper(\@rows);
+
+@rows = ();
+for (1..5) {
+   push @rows, $ni->next();
+}
+is_deeply(
+   \@rows,
+   [['k'],['l'],['m'],['n'],['o']],
+   'a-z nibble 3'
+) or print Dumper(\@rows);
+
+@rows = ();
+for (1..5) {
+   push @rows, $ni->next();
+}
+is_deeply(
+   \@rows,
+   [['p'],['q'],['r'],['s'],['t']],
+   'a-z nibble 4'
+) or print Dumper(\@rows);
+
+@rows = ();
+for (1..5) {
+   push @rows, $ni->next();
+}
+is_deeply(
+   \@rows,
+   [['u'],['v'],['w'],['x'],['y']],
+   'a-z nibble 5'
+) or print Dumper(\@rows);
+
+# There's only 1 row left but extra calls shouldn't return anything or crash.
+@rows = ();
+for (1..5) {
+   push @rows, $ni->next();
+}
+is_deeply(
+   \@rows,
+   [['z']],
+   'a-z nibble 6'
+) or print Dumper(\@rows);
+
+# ############################################################################
+# a-y w/ chunk-size 5, even nibbles
+# ############################################################################
+$dbh->do('delete from test.t where c="z"');
+my $all_rows = $dbh->selectall_arrayref('select * from test.t order by c');
+$ni = make_nibble_iter(
+   db       => 'test',
+   tbl      => 't',
+   argv     => [qw(--databases test --chunk-size 5)],
+);
+
+@rows = ();
+for (1..26) {
+   push @rows, $ni->next();
+}
+is_deeply(
+   \@rows,
+   $all_rows,
+   'a-y even nibble'
+) or print Dumper(\@rows);
+
+# ############################################################################
+# chunk-size exceeds number of rows, 1 nibble
+# ############################################################################
+$ni = make_nibble_iter(
+   db       => 'test',
+   tbl      => 't',
+   argv     => [qw(--databases test --chunk-size 100)],
+);
+
+@rows = ();
+for (1..27) {
+   push @rows, $ni->next();
+}
+is_deeply(
+   \@rows,
+   $all_rows,
+   '1 nibble'
+) or print Dumper(\@rows);
+
+# ############################################################################
+# single row table
+# ############################################################################
+$dbh->do('delete from test.t where c != "d"');
+$ni = make_nibble_iter(
+   db       => 'test',
+   tbl      => 't',
+   argv     => [qw(--databases test --chunk-size 100)],
+);
+
+@rows = ();
+for (1..3) {
+   push @rows, $ni->next();
+}
+is_deeply(
+   \@rows,
+   [['d']],
+   'single row table'
+) or print Dumper(\@rows);
+
+# ############################################################################
+# empty table
+# ############################################################################
+$dbh->do('truncate table test.t');
+$ni = make_nibble_iter(
+   db       => 'test',
+   tbl      => 't',
+   argv     => [qw(--databases test --chunk-size 100)],
+);
+
+@rows = ();
+for (1..3) {
+   push @rows, $ni->next();
+}
+is_deeply(
+   \@rows,
+   [],
+   'empty table'
+) or print Dumper(\@rows);
+
+# ############################################################################
+# Callbacks
+# ############################################################################
+$ni = make_nibble_iter(
+   sql_file  => "a-z.sql",
+   db        => 'test',
+   tbl       => 't',
+   argv      => [qw(--databases test --chunk-size 2)],
+   callbacks => {
+      init          => sub { print "init\n" },
+      after_nibble  => sub { print "after nibble ".$ni->nibble_number()."\n" },
+      done          => sub { print "done\n" },
+   }
+);
+
+$dbh->do('delete from test.t limit 20');  # 6 rows left
+
+my $output = output(
+   sub {
+      for (1..8) { $ni->next() }
+   },
+);
+
+is(
+   $output,
+"init
+after nibble 1
+after nibble 2
+after nibble 3
+done
+done
+",
+   "callbacks"
+);
+
+# ############################################################################
+# Nibble a larger table by numeric pk id
+# ############################################################################
+SKIP: {
+   skip "Sakila database is not loaded", 8
+      unless @{ $dbh->selectall_arrayref('show databases like "sakila"') };
+
+   $ni = make_nibble_iter(
+      db       => 'sakila',
+      tbl      => 'payment',
+      argv     => [qw(--databases sakila --tables payment --chunk-size 100)],
+   );
+
+   my $n_nibbles = 0;
+   $n_nibbles++ while $ni->next();
+   is(
+      $n_nibbles,
+      16049,
+      "Nibble sakila.payment (16049 rows)"
+   );
+
+   my $tbl = {
+      db         => 'sakila',
+      tbl        => 'country',
+      tbl_struct => $tp->parse(
+         $du->get_create_table($dbh, $q, 'sakila', 'country')),
+   };
+   my $chunk_checksum = $rc->make_chunk_checksum(
+      dbh => $dbh,
+      tbl => $tbl,
+   );
+   $ni = make_nibble_iter(
+      db     => 'sakila',
+      tbl    => 'country',
+      argv   => [qw(--databases sakila --tables country --chunk-size 25)],
+      select => $chunk_checksum,
+   );
+
+   my $row = $ni->next();
+   is_deeply(
+      $row,
+      [25, 'da79784d'],
+      "SELECT chunk checksum 1 FROM sakila.country"
+   ) or print STDERR Dumper($row); 
+   
+   $row = $ni->next();
+   is_deeply(
+      $row,
+      [25, 'e860c4f9'],
+      "SELECT chunk checksum 2 FROM sakila.country"
+   ) or print STDERR Dumper($row); 
+   
+   $row = $ni->next();
+   is_deeply(
+      $row,
+      [25, 'eb651f58'],
+      "SELECT chunk checksum 3 FROM sakila.country"
+   ) or print STDERR Dumper($row); 
+  
+   $row = $ni->next();
+   is_deeply(
+      $row,
+      [25, '2d87d588'],
+      "SELECT chunk checksum 4 FROM sakila.country"
+   ) or print STDERR Dumper($row); 
+   
+   $row = $ni->next();
+   is_deeply(
+      $row,
+      [9, 'beb4a180'],
+      "SELECT chunk checksum 5 FROM sakila.country"
+   ) or print STDERR Dumper($row); 
+
+
+   # #########################################################################
+   # exec_nibble callback and explain_sth
+   # #########################################################################
+   my @expl;
+   $ni = make_nibble_iter(
+      db     => 'sakila',
+      tbl    => 'country',
+      argv   => [qw(--databases sakila --tables country --chunk-size 60)],
+      select => $chunk_checksum,
+      callbacks => {
+         exec_nibble  => sub {
+            my (%args) = @_;
+            my ($expl_sth, $lb, $ub) = @args{qw(explain_sth lb ub)};
+            $expl_sth->execute(@$lb, @$ub);
+            push @expl, $expl_sth->fetchrow_hashref();
+            return 0;
+         },
+      }
+   );
+   $ni->next();
+   $ni->next();
+   is_deeply(
+      \@expl,
+      [
+         {
+            id            => '1',
+            key           => 'PRIMARY',
+            key_len       => '2',
+            possible_keys => 'PRIMARY',
+            ref           => undef,
+            rows          => '54',
+            select_type   => 'SIMPLE',
+            table         => 'country',
+            type          => 'range',
+            extra         => 'Using where',
+         },
+         {
+            id             => '1',
+            key            => 'PRIMARY',
+            key_len        => '2',
+            possible_keys  => 'PRIMARY',
+            ref            => undef,
+            rows           => '49',
+            select_type    => 'SIMPLE',
+            table          => 'country',
+            type           => 'range',
+            extra          => 'Using where',
+         },
+      ],
+   'exec_nibble callbackup and explain_sth'
+   );
+    
+   # #########################################################################
+   # film_actor, multi-column pk
+   # #########################################################################
+   $ni = make_nibble_iter(
+      db       => 'sakila',
+      tbl      => 'film_actor',
+      argv     => [qw(--tables sakila.film_actor --chunk-size 1000)],
+   );
+
+   $n_nibbles = 0;
+   $n_nibbles++ while $ni->next();
+   is(
+      $n_nibbles,
+      5462,
+      "Nibble sakila.film_actor (multi-column pk)"
+   );
+}
+
+# ############################################################################
+# Reset chunk size on-the-fly. 
+# ############################################################################
+$ni = make_nibble_iter(
+   sql_file  => "a-z.sql",
+   db        => 'test',
+   tbl       => 't',
+   argv      => [qw(--databases test --chunk-size 5)],
+);
+
+@rows = ();
+my $i = 0;
+while (my $row = $ni->next()) {
+   push @{$rows[$ni->nibble_number()]}, @$row;
+   if ( ++$i == 5 ) {
+      $ni->set_chunk_size(20);
+   }
+}
+
+is_deeply(
+   \@rows,
+   [
+      undef,          # no 0 nibble
+      [ ('a'..'e') ], # nibble 1
+      [ ('f'..'y') ], # nibble 2, should contain 20 chars
+      [ 'z'        ], # last nibble
+   ],
+   "Change chunk size while nibbling"
+) or print STDERR Dumper(\@rows);
+
+# #############################################################################
+# Done.
+# #############################################################################
+$sb->wipe_clean($dbh);
+exit;
@@ -0,0 +1,417 @@
+#!/usr/bin/perl
+
+BEGIN {
+   die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
+      unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
+   unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
+};
+
+use strict;
+use warnings FATAL => 'all';
+use English qw(-no_match_vars);
+use Test::More;
+
+use RowChecksum;
+use TableParser;
+use Quoter;
+use MySQLDump;
+use DSNParser;
+use OptionParser;
+use Sandbox;
+use PerconaTest;
+
+my $dp = new DSNParser(opts=>$dsn_opts);
+my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
+my $dbh = $sb->get_dbh_for('master');
+
+if ( !$dbh ) {
+   plan skip_all => "Cannot connect to sandbox master";
+}
+else {
+   plan tests => 28;
+}
+
+$sb->create_dbs($dbh, ['test']);
+
+my $q  = new Quoter();
+my $tp = new TableParser(Quoter => $q);
+my $du = new MySQLDump();
+my $o  = new OptionParser(description => 'NibbleIterator');
+$o->get_specs("$trunk/bin/pt-table-checksum");
+
+my $c  = new RowChecksum(
+   OptionParser  => $o,
+   Quoter        => $q,
+);
+
+# ############################################################################
+# _make_xor_slices
+# ############################################################################
+is(
+   $c->_make_xor_slices(
+      row_checksum => 'FOO',
+      crc_width    => 1,
+   ),
+   "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 1), 16, 10) "
+      . "AS UNSIGNED)), 10, 16), 1, '0')",
+   'FOO XOR slices 1 wide',
+);
+
+is(
+   $c->_make_xor_slices(
+      row_checksum => 'FOO',
+      crc_width    => 16,
+   ),
+   "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
+      . "AS UNSIGNED)), 10, 16), 16, '0')",
+   'FOO XOR slices 16 wide',
+);
+
+is(
+   $c->_make_xor_slices(
+      row_checksum => 'FOO',
+      crc_width    => 17,
+   ),
+   "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
+      . "AS UNSIGNED)), 10, 16), 16, '0'), "
+      . "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 17, 1), 16, 10) "
+      . "AS UNSIGNED)), 10, 16), 1, '0')",
+   'FOO XOR slices 17 wide',
+);
+
+is(
+   $c->_make_xor_slices(
+      row_checksum => 'FOO',
+      crc_width    => 32,
+   ),
+   "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
+      . "AS UNSIGNED)), 10, 16), 16, '0'), "
+      . "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 17, 16), 16, 10) "
+      . "AS UNSIGNED)), 10, 16), 16, '0')",
+   'FOO XOR slices 32 wide',
+);
+
+is(
+   $c->_make_xor_slices(
+      row_checksum => 'FOO',
+      crc_width    => 32,
+      opt_slice    => 0,
+   ),
+   "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc := FOO, 1, 16), 16, 10) "
+      . "AS UNSIGNED)), 10, 16), 16, '0'), "
+      . "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc, 17, 16), 16, 10) "
+      . "AS UNSIGNED)), 10, 16), 16, '0')",
+   'XOR slice optimized in slice 0',
+);
+
+is(
+   $c->_make_xor_slices(
+      row_checksum => 'FOO',
+      crc_width    => 32,
+      opt_slice    => 1,
+   ),
+   "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc, 1, 16), 16, 10) "
+      . "AS UNSIGNED)), 10, 16), 16, '0'), "
+      . "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc := FOO, 17, 16), 16, 10) "
+      . "AS UNSIGNED)), 10, 16), 16, '0')",
+   'XOR slice optimized in slice 1',
+);
+
+# ############################################################################
+# make_row_checksum
+# ############################################################################
+my $tbl = {
+   db         => 'sakila',
+   tbl        => 'film',
+   tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.film.sql')),
+};
+
+is(
+   $c->make_row_checksum(
+      tbl  => $tbl,
+      func => 'SHA1',
+   ),
+     q{`film_id`, `title`, `description`, `release_year`, `language_id`, `original_language_id`, `rental_duration`, `rental_rate`, `length`, `replacement_cost`, `rating`, `special_features`, `last_update` + 0 AS `last_update`, }
+   . q{SHA1(CONCAT_WS('#', }
+   . q{`film_id`, `title`, `description`, `release_year`, `language_id`, }
+   . q{`original_language_id`, `rental_duration`, `rental_rate`, `length`, }
+   . q{`replacement_cost`, `rating`, `special_features`, `last_update` + 0, }
+   . q{CONCAT(ISNULL(`description`), ISNULL(`release_year`), }
+   . q{ISNULL(`original_language_id`), ISNULL(`length`), }
+   . q{ISNULL(`rating`), ISNULL(`special_features`))))},
+   'SHA1 query for sakila.film',
+);
+
+is(
+   $c->make_row_checksum(
+      tbl  => $tbl,
+      func => 'FNV_64',
+   ),
+     q{`film_id`, `title`, `description`, `release_year`, `language_id`, `original_language_id`, `rental_duration`, `rental_rate`, `length`, `replacement_cost`, `rating`, `special_features`, `last_update` + 0 AS `last_update`, }
+   . q{FNV_64(}
+   . q{`film_id`, `title`, `description`, `release_year`, `language_id`, }
+   . q{`original_language_id`, `rental_duration`, `rental_rate`, `length`, }
+   . q{`replacement_cost`, `rating`, `special_features`, `last_update` + 0)},
+   'FNV_64 query for sakila.film',
+);
+
+is(
+   $c->make_row_checksum(
+      tbl  => $tbl,
+      func => 'SHA1',
+      cols => [qw(film_id)],
+   ),
+   q{`film_id`, SHA1(`film_id`)},
+   'SHA1 query for sakila.film with only one column',
+);
+
+is(
+   $c->make_row_checksum(
+      tbl  => $tbl,
+      func => 'SHA1',
+      cols => [qw(FILM_ID)],
+   ),
+   q{`film_id`, SHA1(`film_id`)},
+   'Column names are case-insensitive',
+);
+
+is(
+   $c->make_row_checksum(
+      tbl  => $tbl,
+      func => 'SHA1',
+      cols => [qw(film_id title)],
+      sep  => '%',
+   ),
+   q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
+   'Separator',
+);
+
+is(
+   $c->make_row_checksum(
+      tbl  => $tbl,
+      func => 'SHA1',
+      cols => [qw(film_id title)],
+      sep  => "'%'",
+   ),
+   q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
+   'Bad separator',
+);
+
+is(
+   $c->make_row_checksum(
+      tbl  => $tbl,
+      func => 'SHA1',
+      cols => [qw(film_id title)],
+      sep  => "'''",
+   ),
+   q{`film_id`, `title`, SHA1(CONCAT_WS('#', `film_id`, `title`))},
+   'Really bad separator',
+);
+
+# sakila.rental
+$tbl = {
+   db         => 'sakila',
+   tbl        => 'rental',
+   tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.rental.float.sql')),
+};
+
+is(
+   $c->make_row_checksum(
+      tbl  => $tbl,
+      func => 'SHA1',
+   ),
+   q{`rental_id`, `foo`, SHA1(CONCAT_WS('#', `rental_id`, `foo`))},
+   'FLOAT column is like any other',
+);
+
+is(
+   $c->make_row_checksum(
+      tbl  => $tbl,
+      func => 'SHA1',
+      float_precision => 5,
+   ),
+   q{`rental_id`, ROUND(`foo`, 5), SHA1(CONCAT_WS('#', `rental_id`, ROUND(`foo`, 5)))},
+   'FLOAT column is rounded to 5 places',
+);
+
+# sakila.film
+$tbl = {
+   db         => 'sakila',
+   tbl        => 'film',
+   tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.film.sql')),
+};
+
+like(
+   $c->make_row_checksum(
+      tbl  => $tbl,
+      func => 'SHA1',
+      trim => 1,
+   ),
+   qr{TRIM\(`title`\)},
+   'VARCHAR column is trimmed',
+);
+
+# ############################################################################
+# make_chunk_checksum
+# ############################################################################
+is(
+   $c->make_chunk_checksum(
+      tbl      => $tbl,
+      func     => 'SHA1',
+      crc_width=> 40,
+      cols     => [qw(film_id)],
+      crc_type => 'varchar',
+   ),
+   q{COUNT(*) AS cnt, }
+   . q{COALESCE(LOWER(CONCAT(LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 1, }
+   . q{16), 16, 10) AS UNSIGNED)), 10, 16), 16, '0'), }
+   . q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 17, 16), 16, }
+   . q{10) AS UNSIGNED)), 10, 16), 16, '0'), }
+   . q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 33, 8), 16, }
+   . q{10) AS UNSIGNED)), 10, 16), 8, '0'))), 0) AS crc},
+   'sakila.film SHA1',
+);
+
+is(
+   $c->make_chunk_checksum(
+      tbl      => $tbl,
+      func     => 'FNV_64',
+      crc_width=> 99,
+      cols     => [qw(film_id)],
+      crc_type => 'bigint',
+   ),
+   q{COUNT(*) AS cnt, }
+   . q{COALESCE(LOWER(CONV(BIT_XOR(CAST(FNV_64(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc},
+   'sakila.film FNV_64',
+);
+
+is(
+   $c->make_chunk_checksum(
+      tbl      => $tbl,
+      func     => 'FNV_64',
+      crc_width=> 99,
+      cols     => [qw(film_id)],
+      buffer   => 1,
+      crc_type => 'bigint',
+   ),
+   q{COUNT(*) AS cnt, }
+   . q{COALESCE(LOWER(CONV(BIT_XOR(CAST(FNV_64(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc},
+   'sakila.film FNV_64',
+);
+
+is(
+   $c->make_chunk_checksum(
+      tbl      => $tbl,
+      func     => 'CRC32',
+      crc_width=> 99,
+      cols     => [qw(film_id)],
+      buffer   => 1,
+      crc_type => 'int',
+   ),
+   q{COUNT(*) AS cnt, }
+   . q{COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc},
+   'sakila.film CRC32',
+);
+
+# #############################################################################
+# Sandbox tests.
+# #############################################################################
+like(
+   $c->_get_hash_func(
+      dbh => $dbh,
+   ),
+   qr/CRC32|FNV_64|MD5/,
+   'CRC32, FNV_64 or MD5 is default',
+);
+
+like(
+   $c->_get_hash_func(
+      dbh  => $dbh,
+      func => 'SHA99',
+   ),
+   qr/CRC32|FNV_64|MD5/,
+   'SHA99 does not exist so I get CRC32 or friends',
+);
+
+@ARGV = qw(--function MD5);
+$o->get_opts();
+is(
+   $c->_get_hash_func(
+      dbh  => $dbh,
+      func => 'MD5',
+   ),
+   'MD5',
+   'MD5 requested and MD5 granted',
+);
+@ARGV = qw();
+$o->get_opts();
+
+is(
+   $c->_optimize_xor(
+      dbh  => $dbh,
+      func => 'SHA1',
+   ),
+   '2',
+   'SHA1 slice is 2',
+);
+
+is(
+   $c->_optimize_xor(
+      dbh  => $dbh,
+      func => 'MD5',
+   ),
+   '1',
+   'MD5 slice is 1',
+);
+
+is(
+   $c->_get_crc_type(
+      dbh  => $dbh,
+      func => 'CRC32',
+   ),
+   'int',
+   'CRC32 type'
+);
+
+is(
+   $c->_get_crc_type(
+      dbh  => $dbh,
+      func => 'MD5',
+   ),
+   'varchar',
+   'MD5 type'
+);
+
+# #############################################################################
+# Issue 94: Enhance mk-table-checksum, add a --ignorecols option
+# #############################################################################
+$sb->load_file('master', 't/lib/samples/issue_94.sql');
+$tbl = {
+   db         => 'test',
+   tbl        => 'issue_94',
+   tbl_struct => $tp->parse($du->get_create_table($dbh, $q, 'test', 'issue_94')),
+};
+my $query = $c->make_chunk_checksum(
+   tbl        => $tbl,
+   func       => 'CRC32',
+   crc_width  => 16,
+   crc_type   => 'int',
+   opt_slice  => undef,
+   cols       => undef,
+   sep        => '#',
+   replicate  => undef,
+   precision  => undef,
+   trim       => undef,
+   ignorecols => {'c'=>1},
+);
+is(
+   $query,
+   "COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `a`, `b`)) AS UNSIGNED)), 10, 16)), 0) AS crc",
+   'Ignores specified columns'
+);
+
+# ############################################################################
+# Done.
+# ############################################################################
+$sb->wipe_clean($dbh);
+exit;
@@ -0,0 +1,10 @@
+DROP DATABASE IF EXISTS test;
+CREATE DATABASE test;
+USE test;
+
+CREATE TABLE t (
+  c varchar(16) not null,
+  index (c)
+);
+
+INSERT INTO t VALUES ('a'), ('b'), ('c'), ('d'), ('e'), ('f'), ('g'), ('h'), ('i'), ('j'), ('k'), ('l'), ('m'), ('n'), ('o'), ('p'), ('q'), ('r'), ('s'), ('t'), ('u'), ('v'), ('w'), ('x'), ('y'), ('z');