Restore original NibbleIterator and implement simpler solution: only use MySQL's chosen index if --where.

This commit is contained in:
Daniel Nichter
2012-05-08 12:43:47 -06:00
parent e2073065b1
commit 30b6b88766
2 changed files with 61 additions and 141 deletions

View File

@@ -3580,9 +3580,6 @@ sub new {
else {
my $index = $nibble_params->{index}; # brevity
my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};
my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
my $limit = $chunk_size - 1;
PTDEBUG && _d('Initial chunk size (LIMIT):', $limit);
my $asc = $args{TableNibbler}->generate_asc_stmt(
%args,
@@ -3593,52 +3590,18 @@ sub new {
);
PTDEBUG && _d('Ascend params:', Dumper($asc));
my $from = "$tbl->{name} FORCE INDEX(`$index`)";
my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
my $first_lb_sql
= "SELECT /*!40001 SQL_NO_CACHE */ "
. join(', ', map { $q->quote($_) } @{$asc->{scols}})
. " FROM $tbl->{name}"
. " FROM $from"
. ($where ? " WHERE $where" : '')
. " ORDER BY $order_by"
. " LIMIT 1"
. " /*first lower boundary*/";
PTDEBUG && _d($first_lb_sql);
my $first_lower = $cxn->dbh()->selectrow_arrayref($first_lb_sql);
PTDEBUG && _d('First lower boundary:', Dumper($first_lower));
if ( !$args{chunk_index} || (lc($args{chunk_index}) ne lc($index)) ) {
my $sql
= "EXPLAIN SELECT /*!40001 SQL_NO_CACHE */ "
. join(', ', map { $q->quote($_) } @{$asc->{scols}})
. " FROM $tbl->{name}"
. " WHERE " . $asc->{boundaries}->{'>='}
. ($where ? " AND ($where)" : '')
. " ORDER BY $order_by"
. " LIMIT ?, 2"
. " /*get MySQL index*/";
my $sth = $cxn->dbh()->prepare($sql);
my $mysql_index = _get_mysql_index(
Cxn => $cxn,
sth => $sth,
params => [@$first_lower, $limit],
);
PTDEBUG && _d('MySQL index:', $mysql_index);
if ( lc($index) ne lc($mysql_index) ) {
my $chosen_index_struct = $tbl->{tbl_struct}->{keys}->{$index};
my $mysql_index_struct = $tbl->{tbl_struct}->{keys}->{$mysql_index};
warn "The best index for chunking $tbl->{name} is $index ("
. ($chosen_index_struct->{is_unique} ? "unique" : "not unique")
. ", covers " . scalar @{$chosen_index_struct->{cols}}
. " columns), but index $mysql_index ("
. ($mysql_index_struct->{is_unique} ? "unique" : "not unique")
. ", covers " . scalar @{$mysql_index_struct->{cols}}
. " columns) that MySQL chose will be used instead.\n";
$index = $mysql_index;
}
}
my $from = "$tbl->{name} FORCE INDEX(`$index`)";
PTDEBUG && _d('First lower boundary statement:', $first_lb_sql);
my $resume_lb_sql;
if ( $args{resume} ) {
@@ -3700,11 +3663,14 @@ sub new {
. " /*explain $comments{nibble}*/";
PTDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);
my $limit = $chunk_size - 1;
PTDEBUG && _d('Initial chunk size (LIMIT):', $limit);
$self = {
%args,
index => $index,
limit => $limit,
first_lower => $first_lower,
first_lb_sql => $first_lb_sql,
last_ub_sql => $last_ub_sql,
ub_sql => $ub_sql,
nibble_sql => $nibble_sql,
@@ -3892,12 +3858,18 @@ sub can_nibble {
}
my ($cxn, $tbl, $chunk_size, $o) = @args{@required_args};
my $row_est = get_row_estimate(
my $where = $o->has('where') ? $o->get('where') : '';
my ($row_est, $mysql_index) = get_row_estimate(
Cxn => $cxn,
tbl => $tbl,
where => $o->has('where') ? $o->get('where') : '',
where => $where,
);
if ( !$where ) {
$mysql_index = undef;
}
my $one_nibble = !defined $args{one_nibble} || $args{one_nibble}
? $row_est <= $chunk_size * $o->get('chunk-size-limit')
: 0;
@@ -3910,7 +3882,7 @@ sub can_nibble {
$one_nibble = 1;
}
my $index = _find_best_index(%args);
my $index = _find_best_index(%args, mysql_index => $mysql_index);
if ( !$index && !$one_nibble ) {
die "There is no good index and the table is oversized.";
}
@@ -4014,18 +3986,6 @@ sub _get_index_cardinality {
return $cardinality;
}
sub _get_mysql_index {
my (%args) = @_;
my @required_args = qw(Cxn sth params);
my ($cxn, $sth, $params) = @args{@required_args};
PTDEBUG && _d($sth->{Statement}, 'params:', @$params);
$sth->execute(@$params);
my $row = $sth->fetchrow_hashref();
$sth->finish();
PTDEBUG && _d(Dumper($row));
return $row->{key};
}
sub get_row_estimate {
my (%args) = @_;
my @required_args = qw(Cxn tbl);
@@ -4035,11 +3995,11 @@ sub get_row_estimate {
my ($cxn, $tbl) = @args{@required_args};
my $sql = "EXPLAIN SELECT * FROM $tbl->{name} "
. "WHERE " . ($args{where} || '1=1 /*get row estimate*/');
. "WHERE " . ($args{where} || '1=1');
PTDEBUG && _d($sql);
my $expl = $cxn->dbh()->selectrow_hashref($sql);
PTDEBUG && _d(Dumper($expl));
return $expl->{rows} || 0;
return ($expl->{rows} || 0), $expl->{key};
}
sub _prepare_sths {
@@ -4071,6 +4031,9 @@ sub _get_bounds {
my $dbh = $self->{Cxn}->dbh();
$self->{first_lower} = $dbh->selectrow_arrayref($self->{first_lb_sql});
PTDEBUG && _d('First lower boundary:', Dumper($self->{first_lower}));
if ( my $nibble = $self->{resume} ) {
if ( defined $nibble->{lower_boundary}
&& defined $nibble->{upper_boundary} ) {
@@ -6430,7 +6393,7 @@ sub main {
my $chunk_size_limit = $o->get('chunk-size-limit');
my @too_large;
foreach my $slave ( @$slaves ) {
my $n_rows = NibbleIterator::get_row_estimate(
my ($n_rows) = NibbleIterator::get_row_estimate(
Cxn => $slave,
tbl => $tbl,
where => $o->get('where'),

View File

@@ -120,11 +120,8 @@ sub new {
else {
my $index = $nibble_params->{index}; # brevity
my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};
my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
my $limit = $chunk_size - 1;
PTDEBUG && _d('Initial chunk size (LIMIT):', $limit);
# Figure out how to nibble the table with the chosen index.
# Figure out how to nibble the table with the index.
my $asc = $args{TableNibbler}->generate_asc_stmt(
%args,
tbl_struct => $tbl->{tbl_struct},
@@ -134,71 +131,23 @@ sub new {
);
PTDEBUG && _d('Ascend params:', Dumper($asc));
# Get the real first lower boundary. Using this plus the chosen index,
# we'll see what index MySQL wants to use to ascend the table. This
# is only executed once, and the first lower boundary is saved so we
# can start nibbling from it later.
# Make SQL statements, prepared on first call to next(). FROM and
# ORDER BY are the same for all statements. FORCE IDNEX and ORDER BY
# are needed to ensure deterministic nibbling.
my $from = "$tbl->{name} FORCE INDEX(`$index`)";
my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
# The real first row in the table. Usually we start nibbling from
# this row. Called once in _get_bounds().
my $first_lb_sql
= "SELECT /*!40001 SQL_NO_CACHE */ "
. join(', ', map { $q->quote($_) } @{$asc->{scols}})
. " FROM $tbl->{name}"
. " FROM $from"
. ($where ? " WHERE $where" : '')
. " ORDER BY $order_by"
. " LIMIT 1"
. " /*first lower boundary*/";
PTDEBUG && _d($first_lb_sql);
my $first_lower = $cxn->dbh()->selectrow_arrayref($first_lb_sql);
PTDEBUG && _d('First lower boundary:', Dumper($first_lower));
# If the user didn't request a --chunk-index or they did but
# it wasn't chosen, then check which index MySQL wants to use
# to ascend the table.
if ( !$args{chunk_index} || (lc($args{chunk_index}) ne lc($index)) ) {
# This statment must be identical to the (poorly named) ub_sql below
# (aka "next chunk boundary") because ub_sql is what ascends the table
# and therefore might cause a table scan. The difference between this
# statement and the real ub_sql below is that here we do not add
# FORCE INDEX but let MySQL chose the index.
my $sql
= "EXPLAIN SELECT /*!40001 SQL_NO_CACHE */ "
. join(', ', map { $q->quote($_) } @{$asc->{scols}})
. " FROM $tbl->{name}"
. " WHERE " . $asc->{boundaries}->{'>='}
. ($where ? " AND ($where)" : '')
. " ORDER BY $order_by"
. " LIMIT ?, 2"
. " /*get MySQL index*/";
my $sth = $cxn->dbh()->prepare($sql);
my $mysql_index = _get_mysql_index(
Cxn => $cxn,
sth => $sth,
params => [@$first_lower, $limit],
);
PTDEBUG && _d('MySQL index:', $mysql_index);
if ( lc($index) ne lc($mysql_index) ) {
# Our chosen index and MySQL's chosen index are different.
# This probably happens due to a --where clause that we don't
# know anything about but MySQL can optimize for by using
# another index. We use the MySQL instead of our chosen index
# because the MySQL optimizer should know best.
my $chosen_index_struct = $tbl->{tbl_struct}->{keys}->{$index};
my $mysql_index_struct = $tbl->{tbl_struct}->{keys}->{$mysql_index};
warn "The best index for chunking $tbl->{name} is $index ("
. ($chosen_index_struct->{is_unique} ? "unique" : "not unique")
. ", covers " . scalar @{$chosen_index_struct->{cols}}
. " columns), but index $mysql_index ("
. ($mysql_index_struct->{is_unique} ? "unique" : "not unique")
. ", covers " . scalar @{$mysql_index_struct->{cols}}
. " columns) that MySQL chose will be used instead.\n";
$index = $mysql_index;
}
}
# All statements from here on will use FORCE INDEX now that we know
# which index is best.
my $from = "$tbl->{name} FORCE INDEX(`$index`)";
PTDEBUG && _d('First lower boundary statement:', $first_lb_sql);
# If we're resuming, this fetches the effective first row, which
# should differ from the real first row. Called once in _get_bounds().
@@ -275,11 +224,14 @@ sub new {
. " /*explain $comments{nibble}*/";
PTDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);
my $limit = $chunk_size - 1;
PTDEBUG && _d('Initial chunk size (LIMIT):', $limit);
$self = {
%args,
index => $index,
limit => $limit,
first_lower => $first_lower,
first_lb_sql => $first_lb_sql,
last_ub_sql => $last_ub_sql,
ub_sql => $ub_sql,
nibble_sql => $nibble_sql,
@@ -476,13 +428,26 @@ sub can_nibble {
}
my ($cxn, $tbl, $chunk_size, $o) = @args{@required_args};
my $where = $o->has('where') ? $o->get('where') : '';
# About how many rows are there?
my $row_est = get_row_estimate(
my ($row_est, $mysql_index) = get_row_estimate(
Cxn => $cxn,
tbl => $tbl,
where => $o->has('where') ? $o->get('where') : '',
where => $where,
);
# MySQL's chosen index is only something we should prefer
# if --where is used. Else, we can chose our own index
# and disregard the MySQL index from the row estimate.
# If there's a --where, however, then MySQL's chosen index
# is used because it tells us how MySQL plans to optimize
# for the --where.
# https://bugs.launchpad.net/percona-toolkit/+bug/978432
if ( !$where ) {
$mysql_index = undef;
}
# Can all those rows be nibbled in one chunk? If one_nibble is defined,
# then do as it says; else, look at the chunk size limit.
my $one_nibble = !defined $args{one_nibble} || $args{one_nibble}
@@ -500,7 +465,7 @@ sub can_nibble {
}
# Get an index to nibble by. We'll order rows by the index's columns.
my $index = _find_best_index(%args);
my $index = _find_best_index(%args, mysql_index => $mysql_index);
if ( !$index && !$one_nibble ) {
die "There is no good index and the table is oversized.";
}
@@ -609,18 +574,6 @@ sub _get_index_cardinality {
return $cardinality;
}
sub _get_mysql_index {
my (%args) = @_;
my @required_args = qw(Cxn sth params);
my ($cxn, $sth, $params) = @args{@required_args};
PTDEBUG && _d($sth->{Statement}, 'params:', @$params);
$sth->execute(@$params);
my $row = $sth->fetchrow_hashref();
$sth->finish();
PTDEBUG && _d(Dumper($row));
return $row->{key};
}
sub get_row_estimate {
my (%args) = @_;
my @required_args = qw(Cxn tbl);
@@ -630,11 +583,11 @@ sub get_row_estimate {
my ($cxn, $tbl) = @args{@required_args};
my $sql = "EXPLAIN SELECT * FROM $tbl->{name} "
. "WHERE " . ($args{where} || '1=1 /*get row estimate*/');
. "WHERE " . ($args{where} || '1=1');
PTDEBUG && _d($sql);
my $expl = $cxn->dbh()->selectrow_hashref($sql);
PTDEBUG && _d(Dumper($expl));
return $expl->{rows} || 0;
return ($expl->{rows} || 0), $expl->{key};
}
sub _prepare_sths {
@@ -666,6 +619,10 @@ sub _get_bounds {
my $dbh = $self->{Cxn}->dbh();
# Get the real first lower boundary.
$self->{first_lower} = $dbh->selectrow_arrayref($self->{first_lb_sql});
PTDEBUG && _d('First lower boundary:', Dumper($self->{first_lower}));
# The next boundary is the first lower boundary. If resuming,
# this should be something > the real first lower boundary and
# bounded (else it's not one of our chunks).