Implement --chunk-index name:n in pt-osc. Fix pod in pt-table-checksum.

2025-09-25 05:44:59 +00:00 · 2012-06-10 12:04:42 -04:00
parent 1011eff2bb
commit 75c79ae204
2 changed files with 182 additions and 69 deletions
--- a/bin/pt-online-schema-change
+++ b/bin/pt-online-schema-change
@@ -5127,6 +5127,22 @@ sub main {
      }
   }

+   # Parse --chunk-index INDEX:N where N is the number of
+   # left-most columns of INDEX to use.
+   # https://bugs.launchpad.net/percona-toolkit/+bug/1010232
+   my ($chunk_index, $n_chunk_index_cols)
+      = split(':', $o->get('chunk-index') || '');
+   if ( defined $chunk_index && !$chunk_index ) {
+      $o->save_error('--chunk-index cannot be an empty string');
+   }
+   if ( defined $n_chunk_index_cols
+        && (!$n_chunk_index_cols
+            || $n_chunk_index_cols =~ m/[^\d]/
+            || $n_chunk_index_cols < 1) ) {
+      $o->save_error('Invalid number of --chunk-index columns: '
+         . $n_chunk_index_cols);
+   }
+
   if ( !$o->get('help') ) {
      if ( @ARGV ) {
         $o->save_error('Specify only one DSN on the command line');
@@ -5741,6 +5757,7 @@ sub main {
         my (%args) = @_;
         my $tbl         = $args{tbl};
         my $nibble_iter = $args{NibbleIterator};
+         my $statements  = $nibble_iter->statements();

         if ( $o->get('dry-run') ) {
            print "Not copying rows because this is a dry run.\n";
@@ -5752,7 +5769,6 @@ sub main {

         if ( $o->get('print') ) {
            # Print the checksum and next boundary statements.
-            my $statements = $nibble_iter->statements();
            foreach my $sth ( sort keys %$statements ) {
               next if $sth =~ m/^explain/;
               if ( $statements->{$sth} ) {
@@ -5796,6 +5812,30 @@ sub main {
               die $msg;
            }
         }
+         else { # chunking the table
+            if ( $o->get('check-plan') ) {
+               my $expl = explain_statement(
+                  sth  => $statements->{explain_first_lower_boundary},
+                  tbl  => $tbl,
+                  vals => [],
+               );
+               if ( !$expl->{key}
+                    || lc($expl->{key}) ne lc($nibble_iter->nibble_index())
+                    || !$expl->{key_len} ) {
+                  # XXX this message doesn't give good info if key_len is
+                  # NULL. We need an elsif() for that, instead of lumping it
+                  # into this if().
+                  die "Cannot determine the key_len of the chunk index "
+                     . "because MySQL chose "
+                     . ($expl->{key} ? "the $expl->{key}" : "no") . " index "
+                     . "instead of the " . $nibble_iter->nibble_index()
+                     . " index for the first lower boundary statement.  "
+                     . "See --[no]check-plan in the documentation for more "
+                     . "information.";
+               }
+               $tbl->{key_len} = $expl->{key_len}
+            }
+         }

         return 1; # continue nibbling table
      },
@@ -5855,58 +5895,11 @@ sub main {
         # Count every chunk, even if it's ultimately skipped, etc.
         $tbl->{results}->{n_chunks}++;

-         # If the table is being chunk (i.e., it's not small enough to be
-         # consumed by one nibble), then check index usage and chunk size.
-         if ( !$nibble_iter->one_nibble() ) {
-            my $expl = explain_statement(
-               tbl  => $tbl,
-               sth  => $sth->{explain_nibble},
-               vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
-            );
-
-            # Ensure that MySQL is using the chunk index.
-            if (   lc($expl->{key} || '')
-                ne lc($nibble_iter->nibble_index() || '') ) {
-               my $msg
-                  = "Aborting copying table $tbl->{name} at chunk "
-                  . $nibble_iter->nibble_number()
-                  . " because it is not safe to chunk.  Chunking should "
-                  . "use the "
-                  . ($nibble_iter->nibble_index() || '?')
-                  . " index, but MySQL EXPLAIN reports that "
-                  . ($expl->{key} ? "the $expl->{key}" : "no")
-                  . " index will be used for "
-                  . $sth->{explain_nibble}->{Statement}
-                  . " with values "
-                  . join(", ", map { defined $_ ? $_ : "NULL" }
-                         (@{$boundary->{lower}}, @{$boundary->{upper}}))
-                  . "\n";
-               die $msg;
-            }
-
-            # Check chunk size limit if the upper boundary and next lower
-            # boundary are identical.
-            if ( $limit ) {
-               my $boundary = $nibble_iter->boundaries(); 
-               my $oversize_chunk
-                  = $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
-                  :          0;
-               if ( $oversize_chunk
-                    && $nibble_iter->identical_boundaries(
-                          $boundary->{upper}, $boundary->{next_lower}) )
-               {
-                  my $msg
-                     = "Aborting copying table $tbl->{name} at chunk "
-                     . $nibble_iter->nibble_number()
-                     . " because the chunk is too large: MySQL estimates "
-                     . ($expl->{rows} || 0) . "rows.  The current chunk "
-                     . "size limit is " . ($tbl->{chunk_size} * $limit)
-                     . " rows (chunk size=$tbl->{chunk_size}"
-                     . " * chunk size limit=$limit).\n";
-                  die $msg;
-               }
-            }
-         }
+         # Die unless the nibble is safe.
+         nibble_is_safe(
+            %args,
+            OptionParser => $o,
+         );

         # Exec and time the chunk checksum query.
         $tbl->{nibble_time} = exec_nibble(
@@ -6009,18 +6002,19 @@ sub main {
   # This won't (shouldn't) fail because we already verified in
   # check_orig_table() table we can NibbleIterator::can_nibble().
   my $nibble_iter = new NibbleIterator(
-      Cxn          => $cxn,
-      tbl          => $orig_tbl,
-      chunk_size   => $orig_tbl->{chunk_size},
-      chunk_index  => $o->get('chunk-index'),
-      dml          => $dml,
-      select       => $select,
-      callbacks    => $callbacks,
-      OptionParser => $o,
-      Quoter       => $q,
-      TableParser  => $tp,
-      TableNibbler => new TableNibbler(TableParser => $tp, Quoter => $q),
-      comments     => {
+      Cxn                => $cxn,
+      tbl                => $orig_tbl,
+      chunk_size         => $orig_tbl->{chunk_size},
+      chunk_index        => $chunk_index,
+      n_chunk_index_cols => $n_chunk_index_cols,
+      dml                => $dml,
+      select             => $select,
+      callbacks          => $callbacks,
+      OptionParser       => $o,
+      Quoter             => $q,
+      TableParser        => $tp,
+      TableNibbler       => new TableNibbler(TableParser => $tp, Quoter => $q),
+      comments           => {
         bite   => "pt-online-schema-change $PID copy table",
         nibble => "pt-online-schema-change $PID copy nibble",
      },
@@ -6210,6 +6204,82 @@ sub main {
 # ############################################################################
 # Subroutines.
 # ############################################################################
+
+sub nibble_is_safe {
+   my (%args) = @_;
+   my @required_args = qw(Cxn tbl NibbleIterator OptionParser);
+   foreach my $arg ( @required_args ) {
+      die "I need a $arg argument" unless $args{$arg};
+   }
+   my ($cxn, $tbl, $nibble_iter, $o)= @args{@required_args};
+
+   # EXPLAIN the checksum chunk query to get its row estimate and index.
+   # XXX This call and others like it are relying on a Perl oddity.
+   # See https://bugs.launchpad.net/percona-toolkit/+bug/987393
+   my $sth      = $nibble_iter->statements();
+   my $boundary = $nibble_iter->boundaries();
+   my $expl     = explain_statement(
+      tbl  => $tbl,
+      sth  => $sth->{explain_nibble},
+      vals => [ @{$boundary->{lower}}, @{$boundary->{upper}} ],
+   );
+
+   # Ensure that MySQL is using the chunk index if the table is being chunked.
+   if ( !$nibble_iter->one_nibble()
+        && lc($expl->{key} || '') ne lc($nibble_iter->nibble_index() || '') ) { 
+      if ( !$tbl->{warned}->{not_using_chunk_index}++
+           && $o->get('quiet') < 2 ) {
+         die "Error copying rows at chunk " . $nibble_iter->nibble_number()
+            . " of $tbl->{db}.$tbl->{tbl} because MySQL chose "
+            . ($expl->{key} ? "the $expl->{key}" : "no") . " index "
+            . " instead of the " . $nibble_iter->nibble_index() . "index.\n";
+      }
+   }
+
+   # Ensure that the chunk isn't too large if there's a --chunk-size-limit.
+   # If single-chunking the table, this has already been checked, so it
+   # shouldn't have changed.  If chunking the table with a non-unique key,
+   # oversize chunks are possible. 
+   if ( my $limit = $o->get('chunk-size-limit') ) {
+      my $oversize_chunk
+         = $limit ? ($expl->{rows} || 0) >= $tbl->{chunk_size} * $limit
+         :          0;
+      if ( $oversize_chunk
+           && $nibble_iter->identical_boundaries($boundary->{upper},
+                                                 $boundary->{next_lower}) ) {
+         if ( !$tbl->{warned}->{oversize_chunk}++
+              && $o->get('quiet') < 2 ) {
+            die "Error copying rows at chunk " . $nibble_iter->nibble_number()
+               . " of $tbl->{db}.$tbl->{tbl} because it is oversized.  "
+               . "The current chunk size limit is "
+               . ($tbl->{chunk_size} * $limit)
+               . " rows (chunk size=$tbl->{chunk_size}"
+               . " * chunk size limit=$limit), but MySQL estimates "
+               . "that there are " . ($expl->{rows} || 0)
+               . " rows in the chunk.\n";
+         }
+      }
+   }
+
+   # Ensure that MySQL is still using the entire index.
+   # https://bugs.launchpad.net/percona-toolkit/+bug/1010232
+   if ( !$nibble_iter->one_nibble()
+        && $tbl->{key_len}
+        && ($expl->{key_len} || 0) < $tbl->{key_len} ) {
+      if ( !$tbl->{warned}->{key_len}++
+           && $o->get('quiet') < 2 ) {
+         die "Error copying rows at chunk " . $nibble_iter->nibble_number()
+            . " of $tbl->{db}.$tbl->{tbl} because MySQL used "
+            . "only " . ($expl->{key_len} || 0) . " bytes "
+            . "of the " . ($expl->{key} || '?') . " index instead of "
+            . $tbl->{key_len} . ".  See the --[no]check-plan documentation "
+            . "for more information.\n";
+      }
+   }
+
+   return 1; # safe
+}
+
 sub create_new_table{
   my (%args) = @_;
   my @required_args = qw(orig_tbl Cxn Quoter OptionParser TableParser);
@@ -7209,6 +7279,38 @@ type: time; default: 1

 Sleep time between checks for L<"--max-lag">.

+=item --[no]check-plan
+
+default: yes
+
+Check query execution plans for safety. By default, this option causes
+the tool to run EXPLAIN before running queries that are meant to access
+a small amount of data, but which could access many rows if MySQL chooses a bad
+execution plan. These include the queries to determine chunk boundaries and the
+chunk queries themselves. If it appears that MySQL will use a bad query
+execution plan, the tool will skip the table or the chunk of the table.
+
+The tool uses several heuristics to determine whether an execution plan is bad.
+The first is whether EXPLAIN reports that MySQL intends to use the desired index
+to access the rows. If MySQL chooses a different index, the tool considers the
+query unsafe.
+
+The tool also checks how much of the index MySQL reports that it will use for
+the query. The EXPLAIN output shows this in the key_len column. The tool
+remembers the largest key_len seen, and skips chunks where MySQL reports that it
+will use a smaller prefix of the index. This heuristic can be understood as
+skipping chunks that have a worse execution plan than other chunks.
+
+The tool prints a warning the first time a chunk is skipped due to a bad execution
+plan in each table. Subsequent chunks are skipped silently, although you can see
+the count of skipped chunks in the SKIPPED column in the tool's output.
+
+This option adds some setup work to each table and chunk. Although the work is
+not intrusive for MySQL, it results in more round-trips to the server, which
+consumes time. Making chunks too small will cause the overhead to become
+relatively larger. It is therefore recommended that you not make chunks too
+small, because the tool may take a very long time to complete if you do.
+
 =item --[no]check-replication-filters

 default: yes
@@ -7245,6 +7347,17 @@ behavior of choosing an index.  The tool adds the index to the SQL statements in
 a C<FORCE INDEX> clause.  Be careful when using this option; a poor choice of
 index could cause bad performance.

+This option supports a special syntax to select a prefix of the index instead of
+the entire index. The syntax is NAME:N, where NAME is the name of the index, and
+N is the number of columns you wish to use. This works only for compound
+indexes, and is useful in cases where a bug in the MySQL query optimizer
+(planner) causes it to scan a large range of rows instead of using the index to
+locate starting and ending points precisely. This problem sometimes occurs on
+indexes with many columns, such as 4 or more. If this happens, the tool might
+print a warning related to the L<"--[no]check-plan"> option. Instructing
+the tool to use only the first N columns from the index is a workaround for
+the bug in some cases.
+
 =item --chunk-size

 type: size; default: 1000
--- a/bin/pt-table-checksum
+++ b/bin/pt-table-checksum
@@ -8184,9 +8184,9 @@ indexes, and is useful in cases where a bug in the MySQL query optimizer
 (planner) causes it to scan a large range of rows instead of using the index to
 locate starting and ending points precisely. This problem sometimes occurs on
 indexes with many columns, such as 4 or more. If this happens, the tool might
-print a warning related to the L<"--check-plan"> option. Instructing the tool to
-use only the first N columns from the index is a workaround for the bug in some
-cases.
+print a warning related to the L<"--[no]check-plan"> option. Instructing
+the tool to use only the first N columns from the index is a workaround for
+the bug in some cases.

 =item --chunk-size

@@ -8211,7 +8211,7 @@ clause that matches only 1,000 of the values, and that chunk will be at least
 L<"--chunk-size-limit">.

 Selecting a small chunk size will cause the tool to become much slower, in part
-because of the setup work required for L<"--[no]-check-plan">.
+because of the setup work required for L<"--[no]check-plan">.

 =item --chunk-size-limit