From e2073065b1a5a17acd3516dbb7c00622420af416 Mon Sep 17 00:00:00 2001
From: Daniel Nichter <daniel@percona.com>
Date: Fri, 4 May 2012 17:46:34 -0600
Subject: [PATCH 1/2] EXPLAIN actual ascending nibble query to get MySQL's
 chosen index, and use that instead of the tool's chosen index. TODO: fix
 OobNibbleIterator.t and chunk_index.t.

---
 bin/pt-table-checksum                         |  80 ++++++++++----
 lib/NibbleIterator.pm                         | 102 ++++++++++++++----
 t/pt-table-checksum/chunk_index.t             |  18 +++-
 .../samples/not-using-pk-bug.out              |  20 ++++
 .../samples/not-using-pk-bug.sql              |  20 ++++
 5 files changed, 196 insertions(+), 44 deletions(-)
 create mode 100644 t/pt-table-checksum/samples/not-using-pk-bug.out
 create mode 100644 t/pt-table-checksum/samples/not-using-pk-bug.sql

diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum
index 135f9888..8a2fc127 100755
--- a/bin/pt-table-checksum
+++ b/bin/pt-table-checksum
@@ -3580,6 +3580,9 @@ sub new {
    else {
       my $index      = $nibble_params->{index}; # brevity
       my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};
+      my $order_by   = join(', ', map {$q->quote($_)} @{$index_cols});
+      my $limit      = $chunk_size - 1;
+      PTDEBUG && _d('Initial chunk size (LIMIT):', $limit);
 
       my $asc = $args{TableNibbler}->generate_asc_stmt(
          %args,
@@ -3590,18 +3593,52 @@ sub new {
       );
       PTDEBUG && _d('Ascend params:', Dumper($asc));
 
-      my $from     = "$tbl->{name} FORCE INDEX(`$index`)";
-      my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
-
       my $first_lb_sql
          = "SELECT /*!40001 SQL_NO_CACHE */ "
          . join(', ', map { $q->quote($_) } @{$asc->{scols}})
-         . " FROM $from"
+         . " FROM $tbl->{name}"
          . ($where ? " WHERE $where" : '')
          . " ORDER BY $order_by"
          . " LIMIT 1"
          . " /*first lower boundary*/";
-      PTDEBUG && _d('First lower boundary statement:', $first_lb_sql);
+      PTDEBUG && _d($first_lb_sql);
+      my $first_lower = $cxn->dbh()->selectrow_arrayref($first_lb_sql);
+      PTDEBUG && _d('First lower boundary:', Dumper($first_lower));  
+
+      if ( !$args{chunk_index} || (lc($args{chunk_index}) ne lc($index)) ) {
+
+         my $sql
+            = "EXPLAIN SELECT /*!40001 SQL_NO_CACHE */ "
+            . join(', ', map { $q->quote($_) } @{$asc->{scols}})
+            . " FROM $tbl->{name}"
+            . " WHERE " . $asc->{boundaries}->{'>='}
+            . ($where ? " AND ($where)" : '')
+            . " ORDER BY $order_by"
+            . " LIMIT ?, 2"
+            . " /*get MySQL index*/";
+         my $sth         = $cxn->dbh()->prepare($sql);
+         my $mysql_index = _get_mysql_index(
+            Cxn    => $cxn,
+            sth    => $sth,
+            params => [@$first_lower, $limit],
+         );
+         PTDEBUG && _d('MySQL index:', $mysql_index);
+
+         if ( lc($index) ne lc($mysql_index) ) {
+            my $chosen_index_struct = $tbl->{tbl_struct}->{keys}->{$index};
+            my $mysql_index_struct  = $tbl->{tbl_struct}->{keys}->{$mysql_index};
+            warn "The best index for chunking $tbl->{name} is $index ("
+               . ($chosen_index_struct->{is_unique} ? "unique" : "not unique")
+               . ", covers " . scalar @{$chosen_index_struct->{cols}}
+               . " columns), but index $mysql_index ("
+               . ($mysql_index_struct->{is_unique} ? "unique" : "not unique")
+               . ", covers " . scalar @{$mysql_index_struct->{cols}}
+               . " columns) that MySQL chose will be used instead.\n";
+            $index = $mysql_index;
+         }
+      }
+
+      my $from = "$tbl->{name} FORCE INDEX(`$index`)";
 
       my $resume_lb_sql;
       if ( $args{resume} ) {
@@ -3663,14 +3700,11 @@ sub new {
          . " /*explain $comments{nibble}*/";
       PTDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);
 
-      my $limit = $chunk_size - 1;
-      PTDEBUG && _d('Initial chunk size (LIMIT):', $limit);
-
       $self = {
          %args,
          index              => $index,
          limit              => $limit,
-         first_lb_sql       => $first_lb_sql,
+         first_lower        => $first_lower,
          last_ub_sql        => $last_ub_sql,
          ub_sql             => $ub_sql,
          nibble_sql         => $nibble_sql,
@@ -3858,7 +3892,7 @@ sub can_nibble {
    }
    my ($cxn, $tbl, $chunk_size, $o) = @args{@required_args};
 
-   my ($row_est, $mysql_index) = get_row_estimate(
+   my $row_est = get_row_estimate(
       Cxn   => $cxn,
       tbl   => $tbl,
       where => $o->has('where') ? $o->get('where') : '',
@@ -3876,7 +3910,7 @@ sub can_nibble {
       $one_nibble = 1;
    }
 
-   my $index = _find_best_index(%args, mysql_index => $mysql_index);
+   my $index = _find_best_index(%args);
    if ( !$index && !$one_nibble ) {
       die "There is no good index and the table is oversized.";
    }
@@ -3980,6 +4014,18 @@ sub _get_index_cardinality {
    return $cardinality;
 }
 
+sub _get_mysql_index {
+   my (%args) = @_;
+   my @required_args = qw(Cxn sth params);
+   my ($cxn, $sth, $params) = @args{@required_args};
+   PTDEBUG && _d($sth->{Statement}, 'params:', @$params); 
+   $sth->execute(@$params);
+   my $row = $sth->fetchrow_hashref();
+   $sth->finish();
+   PTDEBUG && _d(Dumper($row));
+   return $row->{key};
+}
+
 sub get_row_estimate {
    my (%args) = @_;
    my @required_args = qw(Cxn tbl);
@@ -3989,11 +4035,11 @@ sub get_row_estimate {
    my ($cxn, $tbl) = @args{@required_args};
 
    my $sql = "EXPLAIN SELECT * FROM $tbl->{name} "
-           . "WHERE " . ($args{where} || '1=1');
+           . "WHERE " . ($args{where} || '1=1 /*get row estimate*/');
    PTDEBUG && _d($sql);
    my $expl = $cxn->dbh()->selectrow_hashref($sql);
    PTDEBUG && _d(Dumper($expl));
-   return ($expl->{rows} || 0), $expl->{key};
+   return $expl->{rows} || 0;
 }
 
 sub _prepare_sths {
@@ -4025,9 +4071,6 @@ sub _get_bounds {
 
    my $dbh = $self->{Cxn}->dbh();
 
-   $self->{first_lower} = $dbh->selectrow_arrayref($self->{first_lb_sql});
-   PTDEBUG && _d('First lower boundary:', Dumper($self->{first_lower}));  
-
    if ( my $nibble = $self->{resume} ) {
       if (    defined $nibble->{lower_boundary}
            && defined $nibble->{upper_boundary} ) {
@@ -6387,10 +6430,7 @@ sub main {
                my $chunk_size_limit = $o->get('chunk-size-limit');
                my @too_large;
                foreach my $slave ( @$slaves ) {
-                  # get_row_estimate() returns (row_est, index), but
-                  # we only need the row_est.  Maybe in the future we'll
-                  # care what index MySQL will use on a slave.
-                  my ($n_rows) = NibbleIterator::get_row_estimate(
+                  my $n_rows = NibbleIterator::get_row_estimate(
                      Cxn   => $slave,
                      tbl   => $tbl,
                      where => $o->get('where'),
diff --git a/lib/NibbleIterator.pm b/lib/NibbleIterator.pm
index 2730fe6a..0be676df 100644
--- a/lib/NibbleIterator.pm
+++ b/lib/NibbleIterator.pm
@@ -120,8 +120,11 @@ sub new {
    else {
       my $index      = $nibble_params->{index}; # brevity
       my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};
+      my $order_by   = join(', ', map {$q->quote($_)} @{$index_cols});
+      my $limit      = $chunk_size - 1;
+      PTDEBUG && _d('Initial chunk size (LIMIT):', $limit);
 
-      # Figure out how to nibble the table with the index.
+      # Figure out how to nibble the table with the chosen index.
       my $asc = $args{TableNibbler}->generate_asc_stmt(
          %args,
          tbl_struct => $tbl->{tbl_struct},
@@ -131,23 +134,71 @@ sub new {
       );
       PTDEBUG && _d('Ascend params:', Dumper($asc));
 
-      # Make SQL statements, prepared on first call to next().  FROM and
-      # ORDER BY are the same for all statements.  FORCE IDNEX and ORDER BY
-      # are needed to ensure deterministic nibbling.
-      my $from     = "$tbl->{name} FORCE INDEX(`$index`)";
-      my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
-
-      # The real first row in the table.  Usually we start nibbling from
-      # this row.  Called once in _get_bounds().
+      # Get the real first lower boundary.  Using this plus the chosen index,
+      # we'll see what index MySQL wants to use to ascend the table.  This
+      # is only executed once, and the first lower boundary is saved so we
+      # can start nibbling from it later.
       my $first_lb_sql
          = "SELECT /*!40001 SQL_NO_CACHE */ "
          . join(', ', map { $q->quote($_) } @{$asc->{scols}})
-         . " FROM $from"
+         . " FROM $tbl->{name}"
          . ($where ? " WHERE $where" : '')
          . " ORDER BY $order_by"
          . " LIMIT 1"
          . " /*first lower boundary*/";
-      PTDEBUG && _d('First lower boundary statement:', $first_lb_sql);
+      PTDEBUG && _d($first_lb_sql);
+      my $first_lower = $cxn->dbh()->selectrow_arrayref($first_lb_sql);
+      PTDEBUG && _d('First lower boundary:', Dumper($first_lower));  
+
+      # If the user didn't request a --chunk-index or they did but
+      # it wasn't chosen, then check which index MySQL wants to use
+      # to ascend the table.
+      if ( !$args{chunk_index} || (lc($args{chunk_index}) ne lc($index)) ) {
+
+         # This statment must be identical to the (poorly named) ub_sql below
+         # (aka "next chunk boundary") because ub_sql is what ascends the table
+         # and therefore might cause a table scan.  The difference between this
+         # statement and the real ub_sql below is that here we do not add
+         # FORCE INDEX but let MySQL chose the index.
+         my $sql
+            = "EXPLAIN SELECT /*!40001 SQL_NO_CACHE */ "
+            . join(', ', map { $q->quote($_) } @{$asc->{scols}})
+            . " FROM $tbl->{name}"
+            . " WHERE " . $asc->{boundaries}->{'>='}
+            . ($where ? " AND ($where)" : '')
+            . " ORDER BY $order_by"
+            . " LIMIT ?, 2"
+            . " /*get MySQL index*/";
+         my $sth         = $cxn->dbh()->prepare($sql);
+         my $mysql_index = _get_mysql_index(
+            Cxn    => $cxn,
+            sth    => $sth,
+            params => [@$first_lower, $limit],
+         );
+         PTDEBUG && _d('MySQL index:', $mysql_index);
+
+         if ( lc($index) ne lc($mysql_index) ) {
+            # Our chosen index and MySQL's chosen index are different.
+            # This probably happens due to a --where clause that we don't
+            # know anything about but MySQL can optimize for by using
+            # another index.  We use the MySQL instead of our chosen index
+            # because the MySQL optimizer should know best.
+            my $chosen_index_struct = $tbl->{tbl_struct}->{keys}->{$index};
+            my $mysql_index_struct  = $tbl->{tbl_struct}->{keys}->{$mysql_index};
+            warn "The best index for chunking $tbl->{name} is $index ("
+               . ($chosen_index_struct->{is_unique} ? "unique" : "not unique")
+               . ", covers " . scalar @{$chosen_index_struct->{cols}}
+               . " columns), but index $mysql_index ("
+               . ($mysql_index_struct->{is_unique} ? "unique" : "not unique")
+               . ", covers " . scalar @{$mysql_index_struct->{cols}}
+               . " columns) that MySQL chose will be used instead.\n";
+            $index = $mysql_index;
+         }
+      }
+
+      # All statements from here on will use FORCE INDEX now that we know
+      # which index is best.
+      my $from = "$tbl->{name} FORCE INDEX(`$index`)";
 
       # If we're resuming, this fetches the effective first row, which
       # should differ from the real first row.  Called once in _get_bounds().
@@ -224,14 +275,11 @@ sub new {
          . " /*explain $comments{nibble}*/";
       PTDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);
 
-      my $limit = $chunk_size - 1;
-      PTDEBUG && _d('Initial chunk size (LIMIT):', $limit);
-
       $self = {
          %args,
          index              => $index,
          limit              => $limit,
-         first_lb_sql       => $first_lb_sql,
+         first_lower        => $first_lower,
          last_ub_sql        => $last_ub_sql,
          ub_sql             => $ub_sql,
          nibble_sql         => $nibble_sql,
@@ -429,7 +477,7 @@ sub can_nibble {
    my ($cxn, $tbl, $chunk_size, $o) = @args{@required_args};
 
    # About how many rows are there?
-   my ($row_est, $mysql_index) = get_row_estimate(
+   my $row_est = get_row_estimate(
       Cxn   => $cxn,
       tbl   => $tbl,
       where => $o->has('where') ? $o->get('where') : '',
@@ -452,7 +500,7 @@ sub can_nibble {
    }
 
    # Get an index to nibble by.  We'll order rows by the index's columns.
-   my $index = _find_best_index(%args, mysql_index => $mysql_index);
+   my $index = _find_best_index(%args);
    if ( !$index && !$one_nibble ) {
       die "There is no good index and the table is oversized.";
    }
@@ -561,6 +609,18 @@ sub _get_index_cardinality {
    return $cardinality;
 }
 
+sub _get_mysql_index {
+   my (%args) = @_;
+   my @required_args = qw(Cxn sth params);
+   my ($cxn, $sth, $params) = @args{@required_args};
+   PTDEBUG && _d($sth->{Statement}, 'params:', @$params); 
+   $sth->execute(@$params);
+   my $row = $sth->fetchrow_hashref();
+   $sth->finish();
+   PTDEBUG && _d(Dumper($row));
+   return $row->{key};
+}
+
 sub get_row_estimate {
    my (%args) = @_;
    my @required_args = qw(Cxn tbl);
@@ -570,11 +630,11 @@ sub get_row_estimate {
    my ($cxn, $tbl) = @args{@required_args};
 
    my $sql = "EXPLAIN SELECT * FROM $tbl->{name} "
-           . "WHERE " . ($args{where} || '1=1');
+           . "WHERE " . ($args{where} || '1=1 /*get row estimate*/');
    PTDEBUG && _d($sql);
    my $expl = $cxn->dbh()->selectrow_hashref($sql);
    PTDEBUG && _d(Dumper($expl));
-   return ($expl->{rows} || 0), $expl->{key};
+   return $expl->{rows} || 0;
 }
 
 sub _prepare_sths {
@@ -606,10 +666,6 @@ sub _get_bounds {
 
    my $dbh = $self->{Cxn}->dbh();
 
-   # Get the real first lower boundary.
-   $self->{first_lower} = $dbh->selectrow_arrayref($self->{first_lb_sql});
-   PTDEBUG && _d('First lower boundary:', Dumper($self->{first_lower}));  
-
    # The next boundary is the first lower boundary.  If resuming,
    # this should be something > the real first lower boundary and
    # bounded (else it's not one of our chunks).
diff --git a/t/pt-table-checksum/chunk_index.t b/t/pt-table-checksum/chunk_index.t
index 3aae20f7..48bab4e5 100644
--- a/t/pt-table-checksum/chunk_index.t
+++ b/t/pt-table-checksum/chunk_index.t
@@ -25,7 +25,7 @@ if ( !$dbh ) {
    plan skip_all => 'Cannot connect to sandbox master';
 }
 else {
-   plan tests => 10;
+   plan tests => 11;
 }
 
 # The sandbox servers run with lock_wait_timeout=3 and it's not dynamic
@@ -141,6 +141,22 @@ is(
    "14 rows checksummed (bug 925855)"
 );
 
+# #############################################################################
+# Bug 978432: PK is ignored
+# #############################################################################
+$sb->load_file('master', "t/pt-table-checksum/samples/not-using-pk-bug.sql");
+PerconaTest::wait_for_table($dbh, "test.multi_resource_apt", "apt_id=4 AND res_id=4");
+
+ok(
+   no_diff(
+      sub { pt_table_checksum::main(@args,
+         qw(-t test.multi_resource_apt --chunk-size 2 --explain --explain))
+      },
+      "t/pt-table-checksum/samples/not-using-pk-bug.out",
+   ),
+   "Smarter chunk index selection (bug 978432)"
+);
+
 # #############################################################################
 # Done.
 # #############################################################################
diff --git a/t/pt-table-checksum/samples/not-using-pk-bug.out b/t/pt-table-checksum/samples/not-using-pk-bug.out
new file mode 100644
index 00000000..f9640d33
--- /dev/null
+++ b/t/pt-table-checksum/samples/not-using-pk-bug.out
@@ -0,0 +1,20 @@
+--
+-- test.multi_resource_apt
+--
+
+REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `apt_id`, `res_id`)) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `test`.`multi_resource_apt` FORCE INDEX(`PRIMARY`) WHERE ((`apt_id` > ?) OR (`apt_id` = ? AND `res_id` >= ?)) AND ((`apt_id` < ?) OR (`apt_id` = ? AND `res_id` <= ?)) /*checksum chunk*/
+
+REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*), '0' FROM `test`.`multi_resource_apt` FORCE INDEX(`PRIMARY`) WHERE ((`apt_id` < ?) OR (`apt_id` = ? AND `res_id` < ?)) ORDER BY `apt_id`, `res_id` /*past lower chunk*/
+
+REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*), '0' FROM `test`.`multi_resource_apt` FORCE INDEX(`PRIMARY`) WHERE ((`apt_id` > ?) OR (`apt_id` = ? AND `res_id` > ?)) ORDER BY `apt_id`, `res_id` /*past upper chunk*/
+
+SELECT /*!40001 SQL_NO_CACHE */ `apt_id`, `apt_id`, `res_id` FROM `test`.`multi_resource_apt` FORCE INDEX(`PRIMARY`) WHERE ((`apt_id` > ?) OR (`apt_id` = ? AND `res_id` >= ?)) ORDER BY `apt_id`, `res_id` LIMIT ?, 2 /*next chunk boundary*/
+
+1 1,1,1 2,2,1
+2 2,2,2 3,3,1
+3 3,3,2 3,3,3
+4 4,4,1 4,4,2
+5 4,4,3 4,4,4
+6  1,1,1
+7 4,4,4 
+
diff --git a/t/pt-table-checksum/samples/not-using-pk-bug.sql b/t/pt-table-checksum/samples/not-using-pk-bug.sql
new file mode 100644
index 00000000..b299513e
--- /dev/null
+++ b/t/pt-table-checksum/samples/not-using-pk-bug.sql
@@ -0,0 +1,20 @@
+DROP DATABASE IF EXISTS test;
+CREATE DATABASE test;
+USE test;
+CREATE TABLE `multi_resource_apt` (
+  `apt_id` int(10) unsigned NOT NULL DEFAULT '0',
+  `res_id` int(10) unsigned NOT NULL DEFAULT '0',
+  PRIMARY KEY (`apt_id`,`res_id`),
+  KEY `resid` (`res_id`)
+) ENGINE=InnoDB;
+INSERT INTO multi_resource_apt VALUES
+  (1, 1),
+  (2, 1),
+  (2, 2),
+  (3, 1),
+  (3, 2),
+  (3, 3),
+  (4, 1),
+  (4, 2),
+  (4, 3),
+  (4, 4);

From 30b6b887666c4c07b40a85fd5e0c005a4edab171 Mon Sep 17 00:00:00 2001
From: Daniel Nichter <daniel@percona.com>
Date: Tue, 8 May 2012 12:43:47 -0600
Subject: [PATCH 2/2] Restore original NibbleIterator and implement simpler
 solution: only use MySQL's chosen index if --where.

---
 bin/pt-table-checksum |  85 +++++++++---------------------
 lib/NibbleIterator.pm | 117 +++++++++++++-----------------------------
 2 files changed, 61 insertions(+), 141 deletions(-)

diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum
index 8a2fc127..472f29f9 100755
--- a/bin/pt-table-checksum
+++ b/bin/pt-table-checksum
@@ -3580,9 +3580,6 @@ sub new {
    else {
       my $index      = $nibble_params->{index}; # brevity
       my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};
-      my $order_by   = join(', ', map {$q->quote($_)} @{$index_cols});
-      my $limit      = $chunk_size - 1;
-      PTDEBUG && _d('Initial chunk size (LIMIT):', $limit);
 
       my $asc = $args{TableNibbler}->generate_asc_stmt(
          %args,
@@ -3593,52 +3590,18 @@ sub new {
       );
       PTDEBUG && _d('Ascend params:', Dumper($asc));
 
+      my $from     = "$tbl->{name} FORCE INDEX(`$index`)";
+      my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
+
       my $first_lb_sql
          = "SELECT /*!40001 SQL_NO_CACHE */ "
          . join(', ', map { $q->quote($_) } @{$asc->{scols}})
-         . " FROM $tbl->{name}"
+         . " FROM $from"
          . ($where ? " WHERE $where" : '')
          . " ORDER BY $order_by"
          . " LIMIT 1"
          . " /*first lower boundary*/";
-      PTDEBUG && _d($first_lb_sql);
-      my $first_lower = $cxn->dbh()->selectrow_arrayref($first_lb_sql);
-      PTDEBUG && _d('First lower boundary:', Dumper($first_lower));  
-
-      if ( !$args{chunk_index} || (lc($args{chunk_index}) ne lc($index)) ) {
-
-         my $sql
-            = "EXPLAIN SELECT /*!40001 SQL_NO_CACHE */ "
-            . join(', ', map { $q->quote($_) } @{$asc->{scols}})
-            . " FROM $tbl->{name}"
-            . " WHERE " . $asc->{boundaries}->{'>='}
-            . ($where ? " AND ($where)" : '')
-            . " ORDER BY $order_by"
-            . " LIMIT ?, 2"
-            . " /*get MySQL index*/";
-         my $sth         = $cxn->dbh()->prepare($sql);
-         my $mysql_index = _get_mysql_index(
-            Cxn    => $cxn,
-            sth    => $sth,
-            params => [@$first_lower, $limit],
-         );
-         PTDEBUG && _d('MySQL index:', $mysql_index);
-
-         if ( lc($index) ne lc($mysql_index) ) {
-            my $chosen_index_struct = $tbl->{tbl_struct}->{keys}->{$index};
-            my $mysql_index_struct  = $tbl->{tbl_struct}->{keys}->{$mysql_index};
-            warn "The best index for chunking $tbl->{name} is $index ("
-               . ($chosen_index_struct->{is_unique} ? "unique" : "not unique")
-               . ", covers " . scalar @{$chosen_index_struct->{cols}}
-               . " columns), but index $mysql_index ("
-               . ($mysql_index_struct->{is_unique} ? "unique" : "not unique")
-               . ", covers " . scalar @{$mysql_index_struct->{cols}}
-               . " columns) that MySQL chose will be used instead.\n";
-            $index = $mysql_index;
-         }
-      }
-
-      my $from = "$tbl->{name} FORCE INDEX(`$index`)";
+      PTDEBUG && _d('First lower boundary statement:', $first_lb_sql);
 
       my $resume_lb_sql;
       if ( $args{resume} ) {
@@ -3700,11 +3663,14 @@ sub new {
          . " /*explain $comments{nibble}*/";
       PTDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);
 
+      my $limit = $chunk_size - 1;
+      PTDEBUG && _d('Initial chunk size (LIMIT):', $limit);
+
       $self = {
          %args,
          index              => $index,
          limit              => $limit,
-         first_lower        => $first_lower,
+         first_lb_sql       => $first_lb_sql,
          last_ub_sql        => $last_ub_sql,
          ub_sql             => $ub_sql,
          nibble_sql         => $nibble_sql,
@@ -3892,12 +3858,18 @@ sub can_nibble {
    }
    my ($cxn, $tbl, $chunk_size, $o) = @args{@required_args};
 
-   my $row_est = get_row_estimate(
+   my $where = $o->has('where') ? $o->get('where') : '';
+
+   my ($row_est, $mysql_index) = get_row_estimate(
       Cxn   => $cxn,
       tbl   => $tbl,
-      where => $o->has('where') ? $o->get('where') : '',
+      where => $where,
    );
 
+   if ( !$where ) {
+      $mysql_index = undef;
+   }
+
    my $one_nibble = !defined $args{one_nibble} || $args{one_nibble}
                   ? $row_est <= $chunk_size * $o->get('chunk-size-limit')
                   : 0;
@@ -3910,7 +3882,7 @@ sub can_nibble {
       $one_nibble = 1;
    }
 
-   my $index = _find_best_index(%args);
+   my $index = _find_best_index(%args, mysql_index => $mysql_index);
    if ( !$index && !$one_nibble ) {
       die "There is no good index and the table is oversized.";
    }
@@ -4014,18 +3986,6 @@ sub _get_index_cardinality {
    return $cardinality;
 }
 
-sub _get_mysql_index {
-   my (%args) = @_;
-   my @required_args = qw(Cxn sth params);
-   my ($cxn, $sth, $params) = @args{@required_args};
-   PTDEBUG && _d($sth->{Statement}, 'params:', @$params); 
-   $sth->execute(@$params);
-   my $row = $sth->fetchrow_hashref();
-   $sth->finish();
-   PTDEBUG && _d(Dumper($row));
-   return $row->{key};
-}
-
 sub get_row_estimate {
    my (%args) = @_;
    my @required_args = qw(Cxn tbl);
@@ -4035,11 +3995,11 @@ sub get_row_estimate {
    my ($cxn, $tbl) = @args{@required_args};
 
    my $sql = "EXPLAIN SELECT * FROM $tbl->{name} "
-           . "WHERE " . ($args{where} || '1=1 /*get row estimate*/');
+           . "WHERE " . ($args{where} || '1=1');
    PTDEBUG && _d($sql);
    my $expl = $cxn->dbh()->selectrow_hashref($sql);
    PTDEBUG && _d(Dumper($expl));
-   return $expl->{rows} || 0;
+   return ($expl->{rows} || 0), $expl->{key};
 }
 
 sub _prepare_sths {
@@ -4071,6 +4031,9 @@ sub _get_bounds {
 
    my $dbh = $self->{Cxn}->dbh();
 
+   $self->{first_lower} = $dbh->selectrow_arrayref($self->{first_lb_sql});
+   PTDEBUG && _d('First lower boundary:', Dumper($self->{first_lower}));  
+
    if ( my $nibble = $self->{resume} ) {
       if (    defined $nibble->{lower_boundary}
            && defined $nibble->{upper_boundary} ) {
@@ -6430,7 +6393,7 @@ sub main {
                my $chunk_size_limit = $o->get('chunk-size-limit');
                my @too_large;
                foreach my $slave ( @$slaves ) {
-                  my $n_rows = NibbleIterator::get_row_estimate(
+                  my ($n_rows) = NibbleIterator::get_row_estimate(
                      Cxn   => $slave,
                      tbl   => $tbl,
                      where => $o->get('where'),
diff --git a/lib/NibbleIterator.pm b/lib/NibbleIterator.pm
index 0be676df..6e446f4c 100644
--- a/lib/NibbleIterator.pm
+++ b/lib/NibbleIterator.pm
@@ -120,11 +120,8 @@ sub new {
    else {
       my $index      = $nibble_params->{index}; # brevity
       my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};
-      my $order_by   = join(', ', map {$q->quote($_)} @{$index_cols});
-      my $limit      = $chunk_size - 1;
-      PTDEBUG && _d('Initial chunk size (LIMIT):', $limit);
 
-      # Figure out how to nibble the table with the chosen index.
+      # Figure out how to nibble the table with the index.
       my $asc = $args{TableNibbler}->generate_asc_stmt(
          %args,
          tbl_struct => $tbl->{tbl_struct},
@@ -134,71 +131,23 @@ sub new {
       );
       PTDEBUG && _d('Ascend params:', Dumper($asc));
 
-      # Get the real first lower boundary.  Using this plus the chosen index,
-      # we'll see what index MySQL wants to use to ascend the table.  This
-      # is only executed once, and the first lower boundary is saved so we
-      # can start nibbling from it later.
+      # Make SQL statements, prepared on first call to next().  FROM and
+      # ORDER BY are the same for all statements.  FORCE IDNEX and ORDER BY
+      # are needed to ensure deterministic nibbling.
+      my $from     = "$tbl->{name} FORCE INDEX(`$index`)";
+      my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
+
+      # The real first row in the table.  Usually we start nibbling from
+      # this row.  Called once in _get_bounds().
       my $first_lb_sql
          = "SELECT /*!40001 SQL_NO_CACHE */ "
          . join(', ', map { $q->quote($_) } @{$asc->{scols}})
-         . " FROM $tbl->{name}"
+         . " FROM $from"
          . ($where ? " WHERE $where" : '')
          . " ORDER BY $order_by"
          . " LIMIT 1"
          . " /*first lower boundary*/";
-      PTDEBUG && _d($first_lb_sql);
-      my $first_lower = $cxn->dbh()->selectrow_arrayref($first_lb_sql);
-      PTDEBUG && _d('First lower boundary:', Dumper($first_lower));  
-
-      # If the user didn't request a --chunk-index or they did but
-      # it wasn't chosen, then check which index MySQL wants to use
-      # to ascend the table.
-      if ( !$args{chunk_index} || (lc($args{chunk_index}) ne lc($index)) ) {
-
-         # This statment must be identical to the (poorly named) ub_sql below
-         # (aka "next chunk boundary") because ub_sql is what ascends the table
-         # and therefore might cause a table scan.  The difference between this
-         # statement and the real ub_sql below is that here we do not add
-         # FORCE INDEX but let MySQL chose the index.
-         my $sql
-            = "EXPLAIN SELECT /*!40001 SQL_NO_CACHE */ "
-            . join(', ', map { $q->quote($_) } @{$asc->{scols}})
-            . " FROM $tbl->{name}"
-            . " WHERE " . $asc->{boundaries}->{'>='}
-            . ($where ? " AND ($where)" : '')
-            . " ORDER BY $order_by"
-            . " LIMIT ?, 2"
-            . " /*get MySQL index*/";
-         my $sth         = $cxn->dbh()->prepare($sql);
-         my $mysql_index = _get_mysql_index(
-            Cxn    => $cxn,
-            sth    => $sth,
-            params => [@$first_lower, $limit],
-         );
-         PTDEBUG && _d('MySQL index:', $mysql_index);
-
-         if ( lc($index) ne lc($mysql_index) ) {
-            # Our chosen index and MySQL's chosen index are different.
-            # This probably happens due to a --where clause that we don't
-            # know anything about but MySQL can optimize for by using
-            # another index.  We use the MySQL instead of our chosen index
-            # because the MySQL optimizer should know best.
-            my $chosen_index_struct = $tbl->{tbl_struct}->{keys}->{$index};
-            my $mysql_index_struct  = $tbl->{tbl_struct}->{keys}->{$mysql_index};
-            warn "The best index for chunking $tbl->{name} is $index ("
-               . ($chosen_index_struct->{is_unique} ? "unique" : "not unique")
-               . ", covers " . scalar @{$chosen_index_struct->{cols}}
-               . " columns), but index $mysql_index ("
-               . ($mysql_index_struct->{is_unique} ? "unique" : "not unique")
-               . ", covers " . scalar @{$mysql_index_struct->{cols}}
-               . " columns) that MySQL chose will be used instead.\n";
-            $index = $mysql_index;
-         }
-      }
-
-      # All statements from here on will use FORCE INDEX now that we know
-      # which index is best.
-      my $from = "$tbl->{name} FORCE INDEX(`$index`)";
+      PTDEBUG && _d('First lower boundary statement:', $first_lb_sql);
 
       # If we're resuming, this fetches the effective first row, which
       # should differ from the real first row.  Called once in _get_bounds().
@@ -275,11 +224,14 @@ sub new {
          . " /*explain $comments{nibble}*/";
       PTDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);
 
+      my $limit = $chunk_size - 1;
+      PTDEBUG && _d('Initial chunk size (LIMIT):', $limit);
+
       $self = {
          %args,
          index              => $index,
          limit              => $limit,
-         first_lower        => $first_lower,
+         first_lb_sql       => $first_lb_sql,
          last_ub_sql        => $last_ub_sql,
          ub_sql             => $ub_sql,
          nibble_sql         => $nibble_sql,
@@ -476,13 +428,26 @@ sub can_nibble {
    }
    my ($cxn, $tbl, $chunk_size, $o) = @args{@required_args};
 
+   my $where = $o->has('where') ? $o->get('where') : '';
+
    # About how many rows are there?
-   my $row_est = get_row_estimate(
+   my ($row_est, $mysql_index) = get_row_estimate(
       Cxn   => $cxn,
       tbl   => $tbl,
-      where => $o->has('where') ? $o->get('where') : '',
+      where => $where,
    );
 
+   # MySQL's chosen index is only something we should prefer
+   # if --where is used.  Else, we can chose our own index
+   # and disregard the MySQL index from the row estimate.
+   # If there's a --where, however, then MySQL's chosen index
+   # is used because it tells us how MySQL plans to optimize
+   # for the --where.
+   # https://bugs.launchpad.net/percona-toolkit/+bug/978432
+   if ( !$where ) {
+      $mysql_index = undef;
+   }
+
    # Can all those rows be nibbled in one chunk?  If one_nibble is defined,
    # then do as it says; else, look at the chunk size limit.
    my $one_nibble = !defined $args{one_nibble} || $args{one_nibble}
@@ -500,7 +465,7 @@ sub can_nibble {
    }
 
    # Get an index to nibble by.  We'll order rows by the index's columns.
-   my $index = _find_best_index(%args);
+   my $index = _find_best_index(%args, mysql_index => $mysql_index);
    if ( !$index && !$one_nibble ) {
       die "There is no good index and the table is oversized.";
    }
@@ -609,18 +574,6 @@ sub _get_index_cardinality {
    return $cardinality;
 }
 
-sub _get_mysql_index {
-   my (%args) = @_;
-   my @required_args = qw(Cxn sth params);
-   my ($cxn, $sth, $params) = @args{@required_args};
-   PTDEBUG && _d($sth->{Statement}, 'params:', @$params); 
-   $sth->execute(@$params);
-   my $row = $sth->fetchrow_hashref();
-   $sth->finish();
-   PTDEBUG && _d(Dumper($row));
-   return $row->{key};
-}
-
 sub get_row_estimate {
    my (%args) = @_;
    my @required_args = qw(Cxn tbl);
@@ -630,11 +583,11 @@ sub get_row_estimate {
    my ($cxn, $tbl) = @args{@required_args};
 
    my $sql = "EXPLAIN SELECT * FROM $tbl->{name} "
-           . "WHERE " . ($args{where} || '1=1 /*get row estimate*/');
+           . "WHERE " . ($args{where} || '1=1');
    PTDEBUG && _d($sql);
    my $expl = $cxn->dbh()->selectrow_hashref($sql);
    PTDEBUG && _d(Dumper($expl));
-   return $expl->{rows} || 0;
+   return ($expl->{rows} || 0), $expl->{key};
 }
 
 sub _prepare_sths {
@@ -666,6 +619,10 @@ sub _get_bounds {
 
    my $dbh = $self->{Cxn}->dbh();
 
+   # Get the real first lower boundary.
+   $self->{first_lower} = $dbh->selectrow_arrayref($self->{first_lb_sql});
+   PTDEBUG && _d('First lower boundary:', Dumper($self->{first_lower}));  
+
    # The next boundary is the first lower boundary.  If resuming,
    # this should be something > the real first lower boundary and
    # bounded (else it's not one of our chunks).