Merge pt-osc-2.1. Fix NibbleItertor.pm to work with OobNibbleIterator when resuming at oob boundaries (t/pt-table-checksum/resume.t was failing).

2025-12-24 02:01:42 +08:00 · 2012-04-03 09:33:10 -06:00
parent c4927399bd 0dd19a6fc9
commit 0089f31a35
23 changed files with 6209 additions and 3459 deletions
--- a/lib/NibbleIterator.pm
+++ b/lib/NibbleIterator.pm
@@ -61,51 +61,52 @@ sub new {
      die "I need a $arg argument" unless $args{$arg};
   }
   my ($cxn, $tbl, $chunk_size, $o, $q) = @args{@required_args};
-   
-   my $where = $o->get('where');
-   my ($row_est, $mysql_index) = get_row_estimate(%args, where => $where);
-   my $one_nibble = !defined $args{one_nibble} || $args{one_nibble}
-                  ? $row_est <= $chunk_size * $o->get('chunk-size-limit')
-                  : 0;
-   PTDEBUG && _d('One nibble:', $one_nibble ? 'yes' : 'no');

-   if ( $args{resume}
-        && !defined $args{resume}->{lower_boundary}
-        && !defined $args{resume}->{upper_boundary} ) {
-      PTDEBUG && _d('Resuming from one nibble table');
-      $one_nibble = 1;
-   }
-
-   # Get an index to nibble by.  We'll order rows by the index's columns.
-   my $index = _find_best_index(%args, mysql_index => $mysql_index);
-   if ( !$index && !$one_nibble ) {
-      die "There is no good index and the table is oversized.";
+   # Die unless table can be nibbled, else return row estimate, nibble index,
+   # and if table can be nibbled in one chunk.
+   my $nibble_params = can_nibble(%args);
+
+   # Text appended to the queries in comments so caller can identify
+   # them in processlist, binlog, etc.
+   my %comments = (
+      bite   => "bite table",
+      nibble => "nibble table",
+   );
+   if ( $args{comments} ) {
+      map  { $comments{$_} = $args{comments}->{$_} }
+      grep { defined $args{comments}->{$_}         }
+      keys %{$args{comments}};
   }

+   my $where      = $o->has('where') ? $o->get('where') : '';
   my $tbl_struct = $tbl->{tbl_struct};
-   my $ignore_col = $o->get('ignore-columns') || {};
-   my $all_cols   = $o->get('columns') || $tbl_struct->{cols};
+   my $ignore_col = $o->has('ignore-columns')
+                  ? ($o->get('ignore-columns') || {})
+                  : {};
+   my $all_cols   = $o->has('columns')
+                  ? ($o->get('columns') || $tbl_struct->{cols})
+                  : $tbl_struct->{cols};
   my @cols       = grep { !$ignore_col->{$_} } @$all_cols;
   my $self;
-   if ( $one_nibble ) {
+   if ( $nibble_params->{one_nibble} ) {
      # If the chunk size is >= number of rows in table, then we don't
      # need to chunk; we can just select all rows, in order, at once.
      my $nibble_sql
         = ($args{dml} ? "$args{dml} " : "SELECT ")
         . ($args{select} ? $args{select}
                          : join(', ', map { $q->quote($_) } @cols))
-         . " FROM " . $q->quote(@{$tbl}{qw(db tbl)})
+         . " FROM $tbl->{name}"
         . ($where ? " WHERE $where" : '')
-         . " /*checksum table*/";
+         . " /*$comments{bite}*/";
      PTDEBUG && _d('One nibble statement:', $nibble_sql);

      my $explain_nibble_sql
         = "EXPLAIN SELECT "
         . ($args{select} ? $args{select}
                          : join(', ', map { $q->quote($_) } @cols))
-         . " FROM " . $q->quote(@{$tbl}{qw(db tbl)})
+         . " FROM $tbl->{name}"
         . ($where ? " WHERE $where" : '')
-         . " /*explain checksum table*/";
+         . " /*explain $comments{bite}*/";
      PTDEBUG && _d('Explain one nibble statement:', $explain_nibble_sql);

      $self = {
@@ -117,6 +118,7 @@ sub new {
      };
   }
   else {
+      my $index      = $nibble_params->{index}; # brevity
      my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};

      # Figure out how to nibble the table with the index.
@@ -132,7 +134,7 @@ sub new {
      # Make SQL statements, prepared on first call to next().  FROM and
      # ORDER BY are the same for all statements.  FORCE IDNEX and ORDER BY
      # are needed to ensure deterministic nibbling.
-      my $from     = $q->quote(@{$tbl}{qw(db tbl)}) . " FORCE INDEX(`$index`)";
+      my $from     = "$tbl->{name} FORCE INDEX(`$index`)";
      my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});

      # The real first row in the table.  Usually we start nibbling from
@@ -207,7 +209,7 @@ sub new {
         . " AND "   . $asc->{boundaries}->{'<='}  # upper boundary
         . ($where ? " AND ($where)" : '')
         . ($args{order_by} ? " ORDER BY $order_by" : "")
-         . " /*checksum chunk*/";
+         . " /*$comments{nibble}*/";
      PTDEBUG && _d('Nibble statement:', $nibble_sql);

      my $explain_nibble_sql 
@@ -219,7 +221,7 @@ sub new {
         . " AND "   . $asc->{boundaries}->{'<='}  # upper boundary
         . ($where ? " AND ($where)" : '')
         . ($args{order_by} ? " ORDER BY $order_by" : "")
-         . " /*explain checksum chunk*/";
+         . " /*explain $comments{nibble}*/";
      PTDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);

      my $limit = $chunk_size - 1;
@@ -246,7 +248,7 @@ sub new {
      };
   }

-   $self->{row_est}    = $row_est;
+   $self->{row_est}    = $nibble_params->{row_est},
   $self->{nibbleno}   = 0;
   $self->{have_rows}  = 0;
   $self->{rowno}      = 0;
@@ -418,6 +420,52 @@ sub row_estimate {
   return $self->{row_est};
 }

+sub can_nibble {
+   my (%args) = @_;
+   my @required_args = qw(Cxn tbl chunk_size OptionParser TableParser);
+   foreach my $arg ( @required_args ) {
+      die "I need a $arg argument" unless $args{$arg};
+   }
+   my ($cxn, $tbl, $chunk_size, $o) = @args{@required_args};
+
+   # About how many rows are there?
+   my ($row_est, $mysql_index) = get_row_estimate(
+      Cxn   => $cxn,
+      tbl   => $tbl,
+      where => $o->has('where') ? $o->get('where') : '',
+   );
+
+   # Can all those rows be nibbled in one chunk?  If one_nibble is defined,
+   # then do as it says; else, look at the chunk size limit.
+   my $one_nibble = !defined $args{one_nibble} || $args{one_nibble}
+                  ? $row_est <= $chunk_size * $o->get('chunk-size-limit')
+                  : 0;
+   PTDEBUG && _d('One nibble:', $one_nibble ? 'yes' : 'no');
+
+   # Special case: we're resuming and there's no boundaries, so the table
+   # being resumed was originally nibbled in one chunk, so do the same again.
+   if ( $args{resume}
+        && !defined $args{resume}->{lower_boundary}
+        && !defined $args{resume}->{upper_boundary} ) {
+      PTDEBUG && _d('Resuming from one nibble table');
+      $one_nibble = 1;
+   }
+
+   # Get an index to nibble by.  We'll order rows by the index's columns.
+   my $index = _find_best_index(%args, mysql_index => $mysql_index);
+   if ( !$index && !$one_nibble ) {
+      die "There is no good index and the table is oversized.";
+   }
+
+   # The table can be nibbled if this point is reached, else we would have
+   # died earlier.  Return some values about nibbling the table.
+   return {
+      row_est     => $row_est,      # nibble about this many rows
+      index       => $index,        # using this index
+      one_nibble  => $one_nibble,   # if the table fits in one nibble/chunk
+   };
+}
+
 sub _find_best_index {
   my (%args) = @_;
   my @required_args = qw(Cxn tbl TableParser);
@@ -494,14 +542,18 @@ sub _find_best_index {

 sub _get_index_cardinality {
   my (%args) = @_;
-   my @required_args = qw(Cxn tbl index Quoter);
-   my ($cxn, $tbl, $index, $q) = @args{@required_args};
+   my @required_args = qw(Cxn tbl index);
+   my ($cxn, $tbl, $index) = @args{@required_args};

-   my $sql = "SHOW INDEXES FROM " . $q->quote(@{$tbl}{qw(db tbl)})
-           . " WHERE Key_name = '$index'";
+   my $sql = "SHOW INDEXES FROM $tbl->{name} "
+           . "WHERE Key_name = '$index'";
   PTDEBUG && _d($sql);
   my $cardinality = 1;
-   my $rows = $cxn->dbh()->selectall_hashref($sql, 'key_name');
+   my $dbh         = $cxn->dbh();
+   my $key_name    = $dbh && ($dbh->{FetchHashKeyName} || '') eq 'NAME_lc'
+                   ? 'key_name'
+                   : 'Key_name';
+   my $rows = $dbh->selectall_hashref($sql, $key_name);
   foreach my $row ( values %$rows ) {
      $cardinality *= $row->{cardinality} if $row->{cardinality};
   }
@@ -512,6 +564,9 @@ sub _get_index_cardinality {
 sub get_row_estimate {
   my (%args) = @_;
   my @required_args = qw(Cxn tbl);
+   foreach my $arg ( @required_args ) {
+      die "I need a $arg argument" unless $args{$arg};
+   }
   my ($cxn, $tbl) = @args{@required_args};

   my $sql = "EXPLAIN SELECT * FROM $tbl->{name} "
@@ -578,14 +633,19 @@ sub _get_bounds {
      # This happens if we resume from the end of the table, or if the
      # last chunk for resuming isn't bounded.
      PTDEBUG && _d('At end of table, or no more boundaries to resume');
+
+      # Get the real last upper boundary, i.e. the last row of the table
+      # at this moment.  If rows are inserted after, we won't see them.
+      # This is required for OobNibbleIterator because if we resume at
+      # the lower or upper oob nibble, we also need to know the last upper
+      # boundary of the table (we already have the first).
+      $self->{last_upper} = $dbh->selectrow_arrayref($self->{last_ub_sql});
+      PTDEBUG && _d('Last upper boundary:', Dumper($self->{last_upper}));
+      $self->{no_more_boundaries} = 1;
+
      $self->{no_more_boundaries} = 1;
   }

-   # Get the real last upper boundary, i.e. the last row of the table
-   # at this moment.  If rows are inserted after, we won't see them.
-   $self->{last_upper} = $dbh->selectrow_arrayref($self->{last_ub_sql});
-   PTDEBUG && _d('Last upper boundary:', Dumper($self->{last_upper}));
-
   return;
 }

@@ -641,25 +701,70 @@ sub _next_boundaries {
      }
   }

+   # Two boundaries are being fetched: the upper boundary for this nibble,
+   # i.e. the nibble the caller is trying to exec, and the next_lower boundary
+   # for the next nibble that the caller will try to exec.  For example,
+   # if chunking the alphabet, a-z, with chunk size 3, the first call will
+   # fetch:
+   #
+   #    a <- lower
+   #    b
+   #    c <- upper      ($boundary->[0])
+   #    d <- next_lower ($boundary->[1])
+   #
+   # Then the second call will fetch:
+   #
+   #    d <- lower
+   #    e
+   #    f <- upper
+   #    g <- next_lower
+   #
+   # Why fetch both upper and next_lower?  We wanted to keep nibbling simple,
+   # i.e. one nibble statment, not one for the first nibble, one for "middle"
+   # nibbles, and another for the end (this is how older code worked).  So the
+   # nibble statement is inclusive, but this requires both boundaries for
+   # reasons explained in a comment above my $ub_sql in new().
   PTDEBUG && _d($self->{ub_sth}->{Statement}, 'params:',
      join(', ', @{$self->{lower}}), $self->{limit});
   $self->{ub_sth}->execute(@{$self->{lower}}, $self->{limit});
   my $boundary = $self->{ub_sth}->fetchall_arrayref();
   PTDEBUG && _d('Next boundary:', Dumper($boundary));
   if ( $boundary && @$boundary ) {
-      $self->{upper} = $boundary->[0]; # this nibble
+      # upper boundary for the current nibble.
+      $self->{upper} = $boundary->[0];
+
      if ( $boundary->[1] ) {
-         $self->{next_lower} = $boundary->[1]; # next nibble
+         # next_lower boundary for the next nibble (will become the lower
+         # boundary when that nibble becomes the current nibble).
+         $self->{next_lower} = $boundary->[1];
      }
      else {
+         # There's no next_lower boundary, so the upper boundary of
+         # the current nibble is the end of the table.  For example,
+         # if chunking a-z, then the upper boundary of the current
+         # nibble ($boundary->[0]) is z.
+         PTDEBUG && _d('End of table boundary:', Dumper($boundary->[0]));
         $self->{no_more_boundaries} = 1;  # for next call
-         PTDEBUG && _d('Last upper boundary:', Dumper($boundary->[0]));
+
+         # OobNibbleIterator needs to know the last upper boundary.
+         $self->{last_upper} = $boundary->[0];
      }
   }
   else {
-      $self->{no_more_boundaries} = 1;  # for next call
-      $self->{upper} = $self->{last_upper};
+      # This code is reached in cases like chunking a-z and the next_lower
+      # boundary ($boundary->[1]) falls on z.  When called again, no upper
+      # or next_lower is found past z so if($boundary && @$boundary) is false.
+      # But there's a problem: between the previous call that made next_lower=z
+      # and this call, rows might have been inserted, so maybe z is no longer
+      # the end of the table.  To handle this, we fetch the end of the table
+      # once and make the final nibble z-<whatever>.
+      my $dbh = $self->{Cxn}->dbh();
+      $self->{upper} = $dbh->selectrow_arrayref($self->{last_ub_sql});
      PTDEBUG && _d('Last upper boundary:', Dumper($self->{upper}));
+      $self->{no_more_boundaries} = 1;  # for next call
+      
+      # OobNibbleIterator needs to know the last upper boundary.
+      $self->{last_upper} = $self->{upper};
   }
   $self->{ub_sth}->finish();