From 30b6b887666c4c07b40a85fd5e0c005a4edab171 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 8 May 2012 12:43:47 -0600 Subject: [PATCH] Restore original NibbleIterator and implement simpler solution: only use MySQL's chosen index if --where. --- bin/pt-table-checksum | 85 +++++++++--------------------- lib/NibbleIterator.pm | 117 +++++++++++++----------------------------- 2 files changed, 61 insertions(+), 141 deletions(-) diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum index 8a2fc127..472f29f9 100755 --- a/bin/pt-table-checksum +++ b/bin/pt-table-checksum @@ -3580,9 +3580,6 @@ sub new { else { my $index = $nibble_params->{index}; # brevity my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols}; - my $order_by = join(', ', map {$q->quote($_)} @{$index_cols}); - my $limit = $chunk_size - 1; - PTDEBUG && _d('Initial chunk size (LIMIT):', $limit); my $asc = $args{TableNibbler}->generate_asc_stmt( %args, @@ -3593,52 +3590,18 @@ sub new { ); PTDEBUG && _d('Ascend params:', Dumper($asc)); + my $from = "$tbl->{name} FORCE INDEX(`$index`)"; + my $order_by = join(', ', map {$q->quote($_)} @{$index_cols}); + my $first_lb_sql = "SELECT /*!40001 SQL_NO_CACHE */ " . join(', ', map { $q->quote($_) } @{$asc->{scols}}) - . " FROM $tbl->{name}" + . " FROM $from" . ($where ? " WHERE $where" : '') . " ORDER BY $order_by" . " LIMIT 1" . " /*first lower boundary*/"; - PTDEBUG && _d($first_lb_sql); - my $first_lower = $cxn->dbh()->selectrow_arrayref($first_lb_sql); - PTDEBUG && _d('First lower boundary:', Dumper($first_lower)); - - if ( !$args{chunk_index} || (lc($args{chunk_index}) ne lc($index)) ) { - - my $sql - = "EXPLAIN SELECT /*!40001 SQL_NO_CACHE */ " - . join(', ', map { $q->quote($_) } @{$asc->{scols}}) - . " FROM $tbl->{name}" - . " WHERE " . $asc->{boundaries}->{'>='} - . ($where ? " AND ($where)" : '') - . " ORDER BY $order_by" - . " LIMIT ?, 2" - . " /*get MySQL index*/"; - my $sth = $cxn->dbh()->prepare($sql); - my $mysql_index = _get_mysql_index( - Cxn => $cxn, - sth => $sth, - params => [@$first_lower, $limit], - ); - PTDEBUG && _d('MySQL index:', $mysql_index); - - if ( lc($index) ne lc($mysql_index) ) { - my $chosen_index_struct = $tbl->{tbl_struct}->{keys}->{$index}; - my $mysql_index_struct = $tbl->{tbl_struct}->{keys}->{$mysql_index}; - warn "The best index for chunking $tbl->{name} is $index (" - . ($chosen_index_struct->{is_unique} ? "unique" : "not unique") - . ", covers " . scalar @{$chosen_index_struct->{cols}} - . " columns), but index $mysql_index (" - . ($mysql_index_struct->{is_unique} ? "unique" : "not unique") - . ", covers " . scalar @{$mysql_index_struct->{cols}} - . " columns) that MySQL chose will be used instead.\n"; - $index = $mysql_index; - } - } - - my $from = "$tbl->{name} FORCE INDEX(`$index`)"; + PTDEBUG && _d('First lower boundary statement:', $first_lb_sql); my $resume_lb_sql; if ( $args{resume} ) { @@ -3700,11 +3663,14 @@ sub new { . " /*explain $comments{nibble}*/"; PTDEBUG && _d('Explain nibble statement:', $explain_nibble_sql); + my $limit = $chunk_size - 1; + PTDEBUG && _d('Initial chunk size (LIMIT):', $limit); + $self = { %args, index => $index, limit => $limit, - first_lower => $first_lower, + first_lb_sql => $first_lb_sql, last_ub_sql => $last_ub_sql, ub_sql => $ub_sql, nibble_sql => $nibble_sql, @@ -3892,12 +3858,18 @@ sub can_nibble { } my ($cxn, $tbl, $chunk_size, $o) = @args{@required_args}; - my $row_est = get_row_estimate( + my $where = $o->has('where') ? $o->get('where') : ''; + + my ($row_est, $mysql_index) = get_row_estimate( Cxn => $cxn, tbl => $tbl, - where => $o->has('where') ? $o->get('where') : '', + where => $where, ); + if ( !$where ) { + $mysql_index = undef; + } + my $one_nibble = !defined $args{one_nibble} || $args{one_nibble} ? $row_est <= $chunk_size * $o->get('chunk-size-limit') : 0; @@ -3910,7 +3882,7 @@ sub can_nibble { $one_nibble = 1; } - my $index = _find_best_index(%args); + my $index = _find_best_index(%args, mysql_index => $mysql_index); if ( !$index && !$one_nibble ) { die "There is no good index and the table is oversized."; } @@ -4014,18 +3986,6 @@ sub _get_index_cardinality { return $cardinality; } -sub _get_mysql_index { - my (%args) = @_; - my @required_args = qw(Cxn sth params); - my ($cxn, $sth, $params) = @args{@required_args}; - PTDEBUG && _d($sth->{Statement}, 'params:', @$params); - $sth->execute(@$params); - my $row = $sth->fetchrow_hashref(); - $sth->finish(); - PTDEBUG && _d(Dumper($row)); - return $row->{key}; -} - sub get_row_estimate { my (%args) = @_; my @required_args = qw(Cxn tbl); @@ -4035,11 +3995,11 @@ sub get_row_estimate { my ($cxn, $tbl) = @args{@required_args}; my $sql = "EXPLAIN SELECT * FROM $tbl->{name} " - . "WHERE " . ($args{where} || '1=1 /*get row estimate*/'); + . "WHERE " . ($args{where} || '1=1'); PTDEBUG && _d($sql); my $expl = $cxn->dbh()->selectrow_hashref($sql); PTDEBUG && _d(Dumper($expl)); - return $expl->{rows} || 0; + return ($expl->{rows} || 0), $expl->{key}; } sub _prepare_sths { @@ -4071,6 +4031,9 @@ sub _get_bounds { my $dbh = $self->{Cxn}->dbh(); + $self->{first_lower} = $dbh->selectrow_arrayref($self->{first_lb_sql}); + PTDEBUG && _d('First lower boundary:', Dumper($self->{first_lower})); + if ( my $nibble = $self->{resume} ) { if ( defined $nibble->{lower_boundary} && defined $nibble->{upper_boundary} ) { @@ -6430,7 +6393,7 @@ sub main { my $chunk_size_limit = $o->get('chunk-size-limit'); my @too_large; foreach my $slave ( @$slaves ) { - my $n_rows = NibbleIterator::get_row_estimate( + my ($n_rows) = NibbleIterator::get_row_estimate( Cxn => $slave, tbl => $tbl, where => $o->get('where'), diff --git a/lib/NibbleIterator.pm b/lib/NibbleIterator.pm index 0be676df..6e446f4c 100644 --- a/lib/NibbleIterator.pm +++ b/lib/NibbleIterator.pm @@ -120,11 +120,8 @@ sub new { else { my $index = $nibble_params->{index}; # brevity my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols}; - my $order_by = join(', ', map {$q->quote($_)} @{$index_cols}); - my $limit = $chunk_size - 1; - PTDEBUG && _d('Initial chunk size (LIMIT):', $limit); - # Figure out how to nibble the table with the chosen index. + # Figure out how to nibble the table with the index. my $asc = $args{TableNibbler}->generate_asc_stmt( %args, tbl_struct => $tbl->{tbl_struct}, @@ -134,71 +131,23 @@ sub new { ); PTDEBUG && _d('Ascend params:', Dumper($asc)); - # Get the real first lower boundary. Using this plus the chosen index, - # we'll see what index MySQL wants to use to ascend the table. This - # is only executed once, and the first lower boundary is saved so we - # can start nibbling from it later. + # Make SQL statements, prepared on first call to next(). FROM and + # ORDER BY are the same for all statements. FORCE IDNEX and ORDER BY + # are needed to ensure deterministic nibbling. + my $from = "$tbl->{name} FORCE INDEX(`$index`)"; + my $order_by = join(', ', map {$q->quote($_)} @{$index_cols}); + + # The real first row in the table. Usually we start nibbling from + # this row. Called once in _get_bounds(). my $first_lb_sql = "SELECT /*!40001 SQL_NO_CACHE */ " . join(', ', map { $q->quote($_) } @{$asc->{scols}}) - . " FROM $tbl->{name}" + . " FROM $from" . ($where ? " WHERE $where" : '') . " ORDER BY $order_by" . " LIMIT 1" . " /*first lower boundary*/"; - PTDEBUG && _d($first_lb_sql); - my $first_lower = $cxn->dbh()->selectrow_arrayref($first_lb_sql); - PTDEBUG && _d('First lower boundary:', Dumper($first_lower)); - - # If the user didn't request a --chunk-index or they did but - # it wasn't chosen, then check which index MySQL wants to use - # to ascend the table. - if ( !$args{chunk_index} || (lc($args{chunk_index}) ne lc($index)) ) { - - # This statment must be identical to the (poorly named) ub_sql below - # (aka "next chunk boundary") because ub_sql is what ascends the table - # and therefore might cause a table scan. The difference between this - # statement and the real ub_sql below is that here we do not add - # FORCE INDEX but let MySQL chose the index. - my $sql - = "EXPLAIN SELECT /*!40001 SQL_NO_CACHE */ " - . join(', ', map { $q->quote($_) } @{$asc->{scols}}) - . " FROM $tbl->{name}" - . " WHERE " . $asc->{boundaries}->{'>='} - . ($where ? " AND ($where)" : '') - . " ORDER BY $order_by" - . " LIMIT ?, 2" - . " /*get MySQL index*/"; - my $sth = $cxn->dbh()->prepare($sql); - my $mysql_index = _get_mysql_index( - Cxn => $cxn, - sth => $sth, - params => [@$first_lower, $limit], - ); - PTDEBUG && _d('MySQL index:', $mysql_index); - - if ( lc($index) ne lc($mysql_index) ) { - # Our chosen index and MySQL's chosen index are different. - # This probably happens due to a --where clause that we don't - # know anything about but MySQL can optimize for by using - # another index. We use the MySQL instead of our chosen index - # because the MySQL optimizer should know best. - my $chosen_index_struct = $tbl->{tbl_struct}->{keys}->{$index}; - my $mysql_index_struct = $tbl->{tbl_struct}->{keys}->{$mysql_index}; - warn "The best index for chunking $tbl->{name} is $index (" - . ($chosen_index_struct->{is_unique} ? "unique" : "not unique") - . ", covers " . scalar @{$chosen_index_struct->{cols}} - . " columns), but index $mysql_index (" - . ($mysql_index_struct->{is_unique} ? "unique" : "not unique") - . ", covers " . scalar @{$mysql_index_struct->{cols}} - . " columns) that MySQL chose will be used instead.\n"; - $index = $mysql_index; - } - } - - # All statements from here on will use FORCE INDEX now that we know - # which index is best. - my $from = "$tbl->{name} FORCE INDEX(`$index`)"; + PTDEBUG && _d('First lower boundary statement:', $first_lb_sql); # If we're resuming, this fetches the effective first row, which # should differ from the real first row. Called once in _get_bounds(). @@ -275,11 +224,14 @@ sub new { . " /*explain $comments{nibble}*/"; PTDEBUG && _d('Explain nibble statement:', $explain_nibble_sql); + my $limit = $chunk_size - 1; + PTDEBUG && _d('Initial chunk size (LIMIT):', $limit); + $self = { %args, index => $index, limit => $limit, - first_lower => $first_lower, + first_lb_sql => $first_lb_sql, last_ub_sql => $last_ub_sql, ub_sql => $ub_sql, nibble_sql => $nibble_sql, @@ -476,13 +428,26 @@ sub can_nibble { } my ($cxn, $tbl, $chunk_size, $o) = @args{@required_args}; + my $where = $o->has('where') ? $o->get('where') : ''; + # About how many rows are there? - my $row_est = get_row_estimate( + my ($row_est, $mysql_index) = get_row_estimate( Cxn => $cxn, tbl => $tbl, - where => $o->has('where') ? $o->get('where') : '', + where => $where, ); + # MySQL's chosen index is only something we should prefer + # if --where is used. Else, we can chose our own index + # and disregard the MySQL index from the row estimate. + # If there's a --where, however, then MySQL's chosen index + # is used because it tells us how MySQL plans to optimize + # for the --where. + # https://bugs.launchpad.net/percona-toolkit/+bug/978432 + if ( !$where ) { + $mysql_index = undef; + } + # Can all those rows be nibbled in one chunk? If one_nibble is defined, # then do as it says; else, look at the chunk size limit. my $one_nibble = !defined $args{one_nibble} || $args{one_nibble} @@ -500,7 +465,7 @@ sub can_nibble { } # Get an index to nibble by. We'll order rows by the index's columns. - my $index = _find_best_index(%args); + my $index = _find_best_index(%args, mysql_index => $mysql_index); if ( !$index && !$one_nibble ) { die "There is no good index and the table is oversized."; } @@ -609,18 +574,6 @@ sub _get_index_cardinality { return $cardinality; } -sub _get_mysql_index { - my (%args) = @_; - my @required_args = qw(Cxn sth params); - my ($cxn, $sth, $params) = @args{@required_args}; - PTDEBUG && _d($sth->{Statement}, 'params:', @$params); - $sth->execute(@$params); - my $row = $sth->fetchrow_hashref(); - $sth->finish(); - PTDEBUG && _d(Dumper($row)); - return $row->{key}; -} - sub get_row_estimate { my (%args) = @_; my @required_args = qw(Cxn tbl); @@ -630,11 +583,11 @@ sub get_row_estimate { my ($cxn, $tbl) = @args{@required_args}; my $sql = "EXPLAIN SELECT * FROM $tbl->{name} " - . "WHERE " . ($args{where} || '1=1 /*get row estimate*/'); + . "WHERE " . ($args{where} || '1=1'); PTDEBUG && _d($sql); my $expl = $cxn->dbh()->selectrow_hashref($sql); PTDEBUG && _d(Dumper($expl)); - return $expl->{rows} || 0; + return ($expl->{rows} || 0), $expl->{key}; } sub _prepare_sths { @@ -666,6 +619,10 @@ sub _get_bounds { my $dbh = $self->{Cxn}->dbh(); + # Get the real first lower boundary. + $self->{first_lower} = $dbh->selectrow_arrayref($self->{first_lb_sql}); + PTDEBUG && _d('First lower boundary:', Dumper($self->{first_lower})); + # The next boundary is the first lower boundary. If resuming, # this should be something > the real first lower boundary and # bounded (else it's not one of our chunks).