mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-10 05:00:45 +00:00
423 lines
14 KiB
Perl
423 lines
14 KiB
Perl
# This program is copyright 2011 Percona Inc.
|
|
# Feedback and improvements are welcome.
|
|
#
|
|
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
|
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
|
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify it under
|
|
# the terms of the GNU General Public License as published by the Free Software
|
|
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
|
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
|
# licenses.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along with
|
|
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
|
# Place, Suite 330, Boston, MA 02111-1307 USA.
|
|
# ###########################################################################
|
|
# NibbleIterator package
|
|
# ###########################################################################
|
|
{
|
|
# Package: NibbleIterator
|
|
# NibbleIterator nibbles tables.
|
|
package NibbleIterator;
|
|
|
|
use strict;
|
|
use warnings FATAL => 'all';
|
|
use English qw(-no_match_vars);
|
|
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
|
|
|
|
use Data::Dumper;
|
|
$Data::Dumper::Indent = 1;
|
|
$Data::Dumper::Sortkeys = 1;
|
|
$Data::Dumper::Quotekeys = 0;
|
|
|
|
# Sub: new
|
|
#
|
|
# Required Arguments:
|
|
# dbh - dbh
|
|
# tbl - Standard tbl ref
|
|
# chunk_size - Number of rows to nibble per chunk
|
|
# OptionParser - <OptionParser> object
|
|
# TableNibbler - <TableNibbler> object
|
|
# TableParser - <TableParser> object
|
|
# Quoter - <Quoter> object
|
|
#
|
|
# Optional Arguments:
|
|
# chunk_index - Index to use for nibbling
|
|
#
|
|
# Returns:
|
|
# NibbleIterator object
|
|
sub new {
|
|
my ( $class, %args ) = @_;
|
|
my @required_args = qw(dbh tbl chunk_size OptionParser Quoter TableNibbler TableParser);
|
|
foreach my $arg ( @required_args ) {
|
|
die "I need a $arg argument" unless $args{$arg};
|
|
}
|
|
my ($dbh, $tbl, $chunk_size, $o, $q) = @args{@required_args};
|
|
|
|
# Get an index to nibble by. We'll order rows by the index's columns.
|
|
my $index = $args{TableParser}->find_best_index(
|
|
$tbl->{tbl_struct},
|
|
$args{chunk_index},
|
|
);
|
|
die "No index to nibble table $tbl->{db}.$tbl->{tbl}" unless $index;
|
|
my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};
|
|
|
|
# Figure out how to nibble the table with the index.
|
|
my $asc = $args{TableNibbler}->generate_asc_stmt(
|
|
%args,
|
|
tbl_struct => $tbl->{tbl_struct},
|
|
index => $index,
|
|
asc_only => 1,
|
|
);
|
|
MKDEBUG && _d('Ascend params:', Dumper($asc));
|
|
|
|
# Make SQL statements, prepared on first call to next(). FROM and
|
|
# ORDER BY are the same for all statements. FORCE IDNEX and ORDER BY
|
|
# are needed to ensure deterministic nibbling.
|
|
my $from = $q->quote(@{$tbl}{qw(db tbl)}) . " FORCE INDEX(`$index`)";
|
|
my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
|
|
|
|
# These statements are only executed once, so they don't use sths.
|
|
my $first_lb_sql
|
|
= "SELECT /*!40001 SQL_NO_CACHE */ "
|
|
. join(', ', map { $q->quote($_) } @{$asc->{scols}})
|
|
. " FROM $from"
|
|
. ($args{where} ? " WHERE $args{where}" : '')
|
|
. " ORDER BY $order_by"
|
|
. " LIMIT 1"
|
|
. " /*first lower boundary*/";
|
|
MKDEBUG && _d('First lower boundary statement:', $first_lb_sql);
|
|
|
|
my $last_ub_sql
|
|
= "SELECT /*!40001 SQL_NO_CACHE */ "
|
|
. join(', ', map { $q->quote($_) } @{$asc->{scols}})
|
|
. " FROM $from"
|
|
. ($args{where} ? " WHERE $args{where}" : '')
|
|
. " ORDER BY "
|
|
. join(' DESC, ', map {$q->quote($_)} @{$index_cols}) . ' DESC'
|
|
. " LIMIT 1"
|
|
. " /*last upper boundary*/";
|
|
MKDEBUG && _d('Last upper boundary statement:', $last_ub_sql);
|
|
|
|
# Nibbles are inclusive, so for a..z, the nibbles are: a-e, f-j, k-o, p-t,
|
|
# u-y, and z. This complicates getting the next upper boundary because
|
|
# if we use either (col >= lb AND col < ub) or (col > lb AND col <= ub)
|
|
# in nibble_sql (below), then that fails for either the last or first
|
|
# nibble respectively. E.g. (col >= z AND col < z) doesn't work, nor
|
|
# does (col > a AND col <= e). Hence the fancy LIMIT 2 which returns
|
|
# the upper boundary for the current nibble *and* the lower boundary
|
|
# for the next nibble. See _next_boundaries().
|
|
my $ub_sql
|
|
= "SELECT /*!40001 SQL_NO_CACHE */ "
|
|
. join(', ', map { $q->quote($_) } @{$asc->{scols}})
|
|
. " FROM $from"
|
|
. " WHERE " . $asc->{boundaries}->{'>='}
|
|
. ($args{where} ? " AND ($args{where})" : '')
|
|
. " ORDER BY $order_by"
|
|
. " LIMIT ?, 2"
|
|
. " /*upper boundary*/";
|
|
MKDEBUG && _d('Upper boundary statement:', $ub_sql);
|
|
|
|
# This statement does the actual nibbling work; its rows are returned
|
|
# to the caller via next().
|
|
my $nibble_sql
|
|
= ($args{dms} ? "$args{dms} " : "SELECT ")
|
|
. ($args{select} ? $args{select}
|
|
: join(', ', map { $q->quote($_) } @{$asc->{cols}}))
|
|
. " FROM $from"
|
|
. " WHERE " . $asc->{boundaries}->{'>='} # lower boundary
|
|
. " AND " . $asc->{boundaries}->{'<='} # upper boundary
|
|
. ($args{where} ? " AND ($args{where})" : '')
|
|
. " ORDER BY $order_by"
|
|
. " /*nibble*/";
|
|
MKDEBUG && _d('Nibble statement:', $nibble_sql);
|
|
|
|
my $explain_nibble_sql
|
|
= "EXPLAIN SELECT "
|
|
. ($args{select} ? $args{select}
|
|
: join(', ', map { $q->quote($_) } @{$asc->{cols}}))
|
|
. " FROM $from"
|
|
. " WHERE " . $asc->{boundaries}->{'>='} # lower boundary
|
|
. " AND " . $asc->{boundaries}->{'<='} # upper boundary
|
|
. ($args{where} ? " AND ($args{where})" : '')
|
|
. " ORDER BY $order_by"
|
|
. " /*explain nibble*/";
|
|
MKDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);
|
|
|
|
# If the chunk size is >= number of rows in table, then we don't
|
|
# need to chunk; we can just select all rows, in order, at once.
|
|
my $one_nibble_sql
|
|
= ($args{dms} ? "$args{dms} " : "SELECT ")
|
|
. ($args{select} ? $args{select}
|
|
: join(', ', map { $q->quote($_) } @{$asc->{cols}}))
|
|
. " FROM $from"
|
|
. ($args{where} ? " AND ($args{where})" : '')
|
|
. " ORDER BY $order_by"
|
|
. " /*one nibble*/";
|
|
MKDEBUG && _d('One nibble statement:', $one_nibble_sql);
|
|
|
|
my $explain_one_nibble_sql
|
|
= "EXPLAIN SELECT "
|
|
. ($args{select} ? $args{select}
|
|
: join(', ', map { $q->quote($_) } @{$asc->{cols}}))
|
|
. " FROM $from"
|
|
. ($args{where} ? " AND ($args{where})" : '')
|
|
. " ORDER BY $order_by"
|
|
. " /*explain one nibble*/";
|
|
MKDEBUG && _d('Explain one nibble statement:', $explain_one_nibble_sql);
|
|
|
|
my $limit = $chunk_size - 1;
|
|
MKDEBUG && _d('Initial chunk size (LIMIT):', $limit);
|
|
|
|
my $self = {
|
|
%args,
|
|
asc => $asc,
|
|
index => $index,
|
|
from => $from,
|
|
order_by => $order_by,
|
|
limit => $limit,
|
|
first_lb_sql => $first_lb_sql,
|
|
last_ub_sql => $last_ub_sql,
|
|
ub_sql => $ub_sql,
|
|
nibble_sql => $nibble_sql,
|
|
explain_nibble_sql => $explain_nibble_sql,
|
|
one_nibble_sql => $one_nibble_sql,
|
|
explain_one_nibble_sql => $explain_one_nibble_sql,
|
|
nibbleno => 0,
|
|
have_rows => 0,
|
|
rowno => 0,
|
|
};
|
|
|
|
return bless $self, $class;
|
|
}
|
|
|
|
sub next {
|
|
my ($self) = @_;
|
|
|
|
# First call, init everything. This could be done in new(), but
|
|
# all work is delayed until actually needed.
|
|
if ($self->{nibbleno} == 0) {
|
|
$self->_can_nibble_once();
|
|
$self->_prepare_sths();
|
|
$self->_get_bounds();
|
|
# $self->_check_index_usage();
|
|
if ( my $callback = $self->{callbacks}->{init} ) {
|
|
$callback->();
|
|
}
|
|
}
|
|
|
|
# If there's another nibble, fetch the rows within it.
|
|
NIBBLE:
|
|
while ( $self->{have_rows} || $self->_next_boundaries() ) {
|
|
# If no rows, then we just got the next boundaries, which start
|
|
# the next nibble.
|
|
if ( !$self->{have_rows} ) {
|
|
$self->{nibbleno}++;
|
|
MKDEBUG && _d($self->{nibble_sth}->{Statement}, 'params:',
|
|
join(', ', (@{$self->{lb}}, @{$self->{ub}})));
|
|
if ( my $callback = $self->{callbacks}->{exec_nibble} ) {
|
|
$self->{have_rows} = $callback->(
|
|
dbh => $self->{dbh},
|
|
tbl => $self->{tbl},
|
|
sth => $self->{nibble_sth},
|
|
lb => $self->{lb},
|
|
ub => $self->{ub},
|
|
nibbleno => $self->{nibbleno},
|
|
explain_sth => $self->{explain_sth},
|
|
NibbleIterator => $self,
|
|
);
|
|
}
|
|
else {
|
|
$self->{nibble_sth}->execute(@{$self->{lb}}, @{$self->{ub}});
|
|
$self->{have_rows} = $self->{nibble_sth}->rows();
|
|
}
|
|
}
|
|
|
|
# Return rows in this nibble.
|
|
if ( $self->{have_rows} ) {
|
|
MKDEBUG && _d($self->{have_rows}, 'rows in nibble', $self->{nibbleno});
|
|
# Return rows in nibble. sth->{Active} is always true with
|
|
# DBD::mysql v3, so we track the status manually.
|
|
my $row = $self->{nibble_sth}->fetchrow_arrayref();
|
|
if ( $row ) {
|
|
$self->{rowno}++;
|
|
MKDEBUG && _d('Row', $self->{rowno}, 'in nibble',$self->{nibbleno});
|
|
# fetchrow_arraryref re-uses an internal arrayref, so we must copy.
|
|
return [ @$row ];
|
|
}
|
|
}
|
|
|
|
MKDEBUG && _d('No rows in nibble or nibble skipped');
|
|
if ( my $callback = $self->{callbacks}->{after_nibble} ) {
|
|
$callback->(
|
|
dbh => $self->{dbh},
|
|
tbl => $self->{tbl},
|
|
nibbleno => $self->{nibbleno},
|
|
explain_sth => $self->{explain_sth},
|
|
NibbleIterator => $self,
|
|
);
|
|
}
|
|
$self->{rowno} = 0;
|
|
$self->{have_rows} = 0;
|
|
}
|
|
|
|
MKDEBUG && _d('Done nibbling');
|
|
if ( my $callback = $self->{callbacks}->{done} ) {
|
|
$callback->(
|
|
dbh => $self->{dbh},
|
|
tbl => $self->{tbl},
|
|
);
|
|
}
|
|
return;
|
|
}
|
|
|
|
sub nibble_number {
|
|
my ($self) = @_;
|
|
return $self->{nibbleno};
|
|
}
|
|
|
|
sub nibble_index {
|
|
my ($self) = @_;
|
|
return $self->{index};
|
|
}
|
|
|
|
sub set_chunk_size {
|
|
my ($self, $limit) = @_;
|
|
MKDEBUG && _d('Setting new chunk size (LIMIT):', $limit);
|
|
$self->{limit} = $limit - 1;
|
|
return;
|
|
}
|
|
|
|
sub _can_nibble_once {
|
|
my ($self) = @_;
|
|
my ($dbh, $tbl, $tp) = @{$self}{qw(dbh tbl TableParser)};
|
|
my ($table_status) = $tp->get_table_status($dbh, $tbl->{db}, $tbl->{tbl});
|
|
my $n_rows = $table_status->{rows} || 0;
|
|
my $chunk_size = $self->{OptionParser}->get('chunk-size') || 1;
|
|
$self->{one_nibble} = $n_rows <= $chunk_size ? 1 : 0;
|
|
MKDEBUG && _d('One nibble:', $self->{one_nibble} ? 'yes' : 'no');
|
|
return $self->{one_nibble};
|
|
}
|
|
|
|
sub _prepare_sths {
|
|
my ($self) = @_;
|
|
MKDEBUG && _d('Preparing statement handles');
|
|
if ( $self->{one_nibble} ) {
|
|
$self->{nibble_sth} = $self->{dbh}->prepare($self->{one_nibble_sql})
|
|
unless $self->{nibble_sth};
|
|
$self->{explain_sth} = $self->{dbh}->prepare($self->{explain_one_nibble_sql})
|
|
unless $self->{explain_sth};
|
|
}
|
|
else {
|
|
$self->{ub_sth} = $self->{dbh}->prepare($self->{ub_sql})
|
|
unless $self->{ub_sth};
|
|
$self->{nibble_sth} = $self->{dbh}->prepare($self->{nibble_sql})
|
|
unless $self->{nibble_sth};
|
|
$self->{explain_sth} = $self->{dbh}->prepare($self->{explain_nibble_sql})
|
|
unless $self->{explain_sth};
|
|
}
|
|
}
|
|
|
|
sub _get_bounds {
|
|
my ($self) = @_;
|
|
return if $self->{one_nibble};
|
|
|
|
$self->{next_lb} = $self->{dbh}->selectrow_arrayref($self->{first_lb_sql});
|
|
MKDEBUG && _d('First lower boundary:', Dumper($self->{next_lb}));
|
|
|
|
$self->{last_ub} = $self->{dbh}->selectrow_arrayref($self->{last_ub_sql});
|
|
MKDEBUG && _d('Last upper boundary:', Dumper($self->{last_ub}));
|
|
|
|
return;
|
|
}
|
|
|
|
sub _check_index_usage {
|
|
my ($self) = @_;
|
|
my ($dbh, $tbl, $q) = @{$self}{qw(dbh tbl Quoter)};
|
|
|
|
my $explain;
|
|
eval {
|
|
$explain = $dbh->selectall_arrayref("", {Slice => {}});
|
|
};
|
|
if ( $EVAL_ERROR ) {
|
|
warn "Cannot check if MySQL is using the chunk index: $EVAL_ERROR";
|
|
return;
|
|
}
|
|
my $explain_index = lc($explain->[0]->{key} || '');
|
|
MKDEBUG && _d('EXPLAIN index:', $explain_index);
|
|
if ( $explain_index ne $self->{index} ) {
|
|
die "Cannot nibble table $tbl->{db}.$tbl->{tbl} because MySQL chose "
|
|
. ($explain_index ? "the `$explain_index`" : 'no') . ' index'
|
|
. " instead of the chunk index `$self->{asc}->{index}`";
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
sub _next_boundaries {
|
|
my ($self) = @_;
|
|
|
|
if ( $self->{no_more_boundaries} ) {
|
|
MKDEBUG && _d('No more boundaries');
|
|
return;
|
|
}
|
|
|
|
if ( $self->{one_nibble} ) {
|
|
$self->{lb} = $self->{ub} = [];
|
|
$self->{no_more_boundaries} = 1; # for next call
|
|
return 1;
|
|
}
|
|
|
|
$self->{lb} = $self->{next_lb};
|
|
|
|
MKDEBUG && _d($self->{ub_sth}->{Statement}, 'params:',
|
|
join(', ', @{$self->{lb}}), $self->{limit});
|
|
$self->{ub_sth}->execute(@{$self->{lb}}, $self->{limit});
|
|
my $boundary = $self->{ub_sth}->fetchall_arrayref();
|
|
MKDEBUG && _d('Next boundary:', Dumper($boundary));
|
|
if ( $boundary && @$boundary ) {
|
|
$self->{ub} = $boundary->[0]; # this nibble
|
|
if ( $boundary->[1] ) {
|
|
$self->{next_lb} = $boundary->[1]; # next nibble
|
|
}
|
|
else {
|
|
$self->{no_more_boundaries} = 1; # for next call
|
|
MKDEBUG && _d('Last upper boundary:', Dumper($boundary->[0]));
|
|
}
|
|
}
|
|
else {
|
|
$self->{no_more_boundaries} = 1; # for next call
|
|
$self->{ub} = $self->{last_ub};
|
|
MKDEBUG && _d('Last upper boundary:', Dumper($self->{ub}));
|
|
}
|
|
$self->{ub_sth}->finish();
|
|
|
|
return 1; # have boundary
|
|
}
|
|
|
|
sub DESTROY {
|
|
my ( $self ) = @_;
|
|
foreach my $key ( keys %$self ) {
|
|
if ( $key =~ m/_sth$/ ) {
|
|
$self->{$key}->finish();
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
sub _d {
|
|
my ($package, undef, $line) = caller 0;
|
|
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
|
map { defined $_ ? $_ : 'undef' }
|
|
@_;
|
|
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
|
}
|
|
|
|
1;
|
|
}
|
|
# ###########################################################################
|
|
# End NibbleIterator package
|
|
# ###########################################################################
|