mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-12-24 02:01:42 +08:00
Merge pt-osc-2.1. Fix NibbleItertor.pm to work with OobNibbleIterator when resuming at oob boundaries (t/pt-table-checksum/resume.t was failing).
This commit is contained in:
@@ -61,51 +61,52 @@ sub new {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
my ($cxn, $tbl, $chunk_size, $o, $q) = @args{@required_args};
|
||||
|
||||
my $where = $o->get('where');
|
||||
my ($row_est, $mysql_index) = get_row_estimate(%args, where => $where);
|
||||
my $one_nibble = !defined $args{one_nibble} || $args{one_nibble}
|
||||
? $row_est <= $chunk_size * $o->get('chunk-size-limit')
|
||||
: 0;
|
||||
PTDEBUG && _d('One nibble:', $one_nibble ? 'yes' : 'no');
|
||||
|
||||
if ( $args{resume}
|
||||
&& !defined $args{resume}->{lower_boundary}
|
||||
&& !defined $args{resume}->{upper_boundary} ) {
|
||||
PTDEBUG && _d('Resuming from one nibble table');
|
||||
$one_nibble = 1;
|
||||
}
|
||||
|
||||
# Get an index to nibble by. We'll order rows by the index's columns.
|
||||
my $index = _find_best_index(%args, mysql_index => $mysql_index);
|
||||
if ( !$index && !$one_nibble ) {
|
||||
die "There is no good index and the table is oversized.";
|
||||
# Die unless table can be nibbled, else return row estimate, nibble index,
|
||||
# and if table can be nibbled in one chunk.
|
||||
my $nibble_params = can_nibble(%args);
|
||||
|
||||
# Text appended to the queries in comments so caller can identify
|
||||
# them in processlist, binlog, etc.
|
||||
my %comments = (
|
||||
bite => "bite table",
|
||||
nibble => "nibble table",
|
||||
);
|
||||
if ( $args{comments} ) {
|
||||
map { $comments{$_} = $args{comments}->{$_} }
|
||||
grep { defined $args{comments}->{$_} }
|
||||
keys %{$args{comments}};
|
||||
}
|
||||
|
||||
my $where = $o->has('where') ? $o->get('where') : '';
|
||||
my $tbl_struct = $tbl->{tbl_struct};
|
||||
my $ignore_col = $o->get('ignore-columns') || {};
|
||||
my $all_cols = $o->get('columns') || $tbl_struct->{cols};
|
||||
my $ignore_col = $o->has('ignore-columns')
|
||||
? ($o->get('ignore-columns') || {})
|
||||
: {};
|
||||
my $all_cols = $o->has('columns')
|
||||
? ($o->get('columns') || $tbl_struct->{cols})
|
||||
: $tbl_struct->{cols};
|
||||
my @cols = grep { !$ignore_col->{$_} } @$all_cols;
|
||||
my $self;
|
||||
if ( $one_nibble ) {
|
||||
if ( $nibble_params->{one_nibble} ) {
|
||||
# If the chunk size is >= number of rows in table, then we don't
|
||||
# need to chunk; we can just select all rows, in order, at once.
|
||||
my $nibble_sql
|
||||
= ($args{dml} ? "$args{dml} " : "SELECT ")
|
||||
. ($args{select} ? $args{select}
|
||||
: join(', ', map { $q->quote($_) } @cols))
|
||||
. " FROM " . $q->quote(@{$tbl}{qw(db tbl)})
|
||||
. " FROM $tbl->{name}"
|
||||
. ($where ? " WHERE $where" : '')
|
||||
. " /*checksum table*/";
|
||||
. " /*$comments{bite}*/";
|
||||
PTDEBUG && _d('One nibble statement:', $nibble_sql);
|
||||
|
||||
my $explain_nibble_sql
|
||||
= "EXPLAIN SELECT "
|
||||
. ($args{select} ? $args{select}
|
||||
: join(', ', map { $q->quote($_) } @cols))
|
||||
. " FROM " . $q->quote(@{$tbl}{qw(db tbl)})
|
||||
. " FROM $tbl->{name}"
|
||||
. ($where ? " WHERE $where" : '')
|
||||
. " /*explain checksum table*/";
|
||||
. " /*explain $comments{bite}*/";
|
||||
PTDEBUG && _d('Explain one nibble statement:', $explain_nibble_sql);
|
||||
|
||||
$self = {
|
||||
@@ -117,6 +118,7 @@ sub new {
|
||||
};
|
||||
}
|
||||
else {
|
||||
my $index = $nibble_params->{index}; # brevity
|
||||
my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};
|
||||
|
||||
# Figure out how to nibble the table with the index.
|
||||
@@ -132,7 +134,7 @@ sub new {
|
||||
# Make SQL statements, prepared on first call to next(). FROM and
|
||||
# ORDER BY are the same for all statements. FORCE IDNEX and ORDER BY
|
||||
# are needed to ensure deterministic nibbling.
|
||||
my $from = $q->quote(@{$tbl}{qw(db tbl)}) . " FORCE INDEX(`$index`)";
|
||||
my $from = "$tbl->{name} FORCE INDEX(`$index`)";
|
||||
my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
|
||||
|
||||
# The real first row in the table. Usually we start nibbling from
|
||||
@@ -207,7 +209,7 @@ sub new {
|
||||
. " AND " . $asc->{boundaries}->{'<='} # upper boundary
|
||||
. ($where ? " AND ($where)" : '')
|
||||
. ($args{order_by} ? " ORDER BY $order_by" : "")
|
||||
. " /*checksum chunk*/";
|
||||
. " /*$comments{nibble}*/";
|
||||
PTDEBUG && _d('Nibble statement:', $nibble_sql);
|
||||
|
||||
my $explain_nibble_sql
|
||||
@@ -219,7 +221,7 @@ sub new {
|
||||
. " AND " . $asc->{boundaries}->{'<='} # upper boundary
|
||||
. ($where ? " AND ($where)" : '')
|
||||
. ($args{order_by} ? " ORDER BY $order_by" : "")
|
||||
. " /*explain checksum chunk*/";
|
||||
. " /*explain $comments{nibble}*/";
|
||||
PTDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);
|
||||
|
||||
my $limit = $chunk_size - 1;
|
||||
@@ -246,7 +248,7 @@ sub new {
|
||||
};
|
||||
}
|
||||
|
||||
$self->{row_est} = $row_est;
|
||||
$self->{row_est} = $nibble_params->{row_est},
|
||||
$self->{nibbleno} = 0;
|
||||
$self->{have_rows} = 0;
|
||||
$self->{rowno} = 0;
|
||||
@@ -418,6 +420,52 @@ sub row_estimate {
|
||||
return $self->{row_est};
|
||||
}
|
||||
|
||||
sub can_nibble {
|
||||
my (%args) = @_;
|
||||
my @required_args = qw(Cxn tbl chunk_size OptionParser TableParser);
|
||||
foreach my $arg ( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
my ($cxn, $tbl, $chunk_size, $o) = @args{@required_args};
|
||||
|
||||
# About how many rows are there?
|
||||
my ($row_est, $mysql_index) = get_row_estimate(
|
||||
Cxn => $cxn,
|
||||
tbl => $tbl,
|
||||
where => $o->has('where') ? $o->get('where') : '',
|
||||
);
|
||||
|
||||
# Can all those rows be nibbled in one chunk? If one_nibble is defined,
|
||||
# then do as it says; else, look at the chunk size limit.
|
||||
my $one_nibble = !defined $args{one_nibble} || $args{one_nibble}
|
||||
? $row_est <= $chunk_size * $o->get('chunk-size-limit')
|
||||
: 0;
|
||||
PTDEBUG && _d('One nibble:', $one_nibble ? 'yes' : 'no');
|
||||
|
||||
# Special case: we're resuming and there's no boundaries, so the table
|
||||
# being resumed was originally nibbled in one chunk, so do the same again.
|
||||
if ( $args{resume}
|
||||
&& !defined $args{resume}->{lower_boundary}
|
||||
&& !defined $args{resume}->{upper_boundary} ) {
|
||||
PTDEBUG && _d('Resuming from one nibble table');
|
||||
$one_nibble = 1;
|
||||
}
|
||||
|
||||
# Get an index to nibble by. We'll order rows by the index's columns.
|
||||
my $index = _find_best_index(%args, mysql_index => $mysql_index);
|
||||
if ( !$index && !$one_nibble ) {
|
||||
die "There is no good index and the table is oversized.";
|
||||
}
|
||||
|
||||
# The table can be nibbled if this point is reached, else we would have
|
||||
# died earlier. Return some values about nibbling the table.
|
||||
return {
|
||||
row_est => $row_est, # nibble about this many rows
|
||||
index => $index, # using this index
|
||||
one_nibble => $one_nibble, # if the table fits in one nibble/chunk
|
||||
};
|
||||
}
|
||||
|
||||
sub _find_best_index {
|
||||
my (%args) = @_;
|
||||
my @required_args = qw(Cxn tbl TableParser);
|
||||
@@ -494,14 +542,18 @@ sub _find_best_index {
|
||||
|
||||
sub _get_index_cardinality {
|
||||
my (%args) = @_;
|
||||
my @required_args = qw(Cxn tbl index Quoter);
|
||||
my ($cxn, $tbl, $index, $q) = @args{@required_args};
|
||||
my @required_args = qw(Cxn tbl index);
|
||||
my ($cxn, $tbl, $index) = @args{@required_args};
|
||||
|
||||
my $sql = "SHOW INDEXES FROM " . $q->quote(@{$tbl}{qw(db tbl)})
|
||||
. " WHERE Key_name = '$index'";
|
||||
my $sql = "SHOW INDEXES FROM $tbl->{name} "
|
||||
. "WHERE Key_name = '$index'";
|
||||
PTDEBUG && _d($sql);
|
||||
my $cardinality = 1;
|
||||
my $rows = $cxn->dbh()->selectall_hashref($sql, 'key_name');
|
||||
my $dbh = $cxn->dbh();
|
||||
my $key_name = $dbh && ($dbh->{FetchHashKeyName} || '') eq 'NAME_lc'
|
||||
? 'key_name'
|
||||
: 'Key_name';
|
||||
my $rows = $dbh->selectall_hashref($sql, $key_name);
|
||||
foreach my $row ( values %$rows ) {
|
||||
$cardinality *= $row->{cardinality} if $row->{cardinality};
|
||||
}
|
||||
@@ -512,6 +564,9 @@ sub _get_index_cardinality {
|
||||
sub get_row_estimate {
|
||||
my (%args) = @_;
|
||||
my @required_args = qw(Cxn tbl);
|
||||
foreach my $arg ( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
my ($cxn, $tbl) = @args{@required_args};
|
||||
|
||||
my $sql = "EXPLAIN SELECT * FROM $tbl->{name} "
|
||||
@@ -578,14 +633,19 @@ sub _get_bounds {
|
||||
# This happens if we resume from the end of the table, or if the
|
||||
# last chunk for resuming isn't bounded.
|
||||
PTDEBUG && _d('At end of table, or no more boundaries to resume');
|
||||
|
||||
# Get the real last upper boundary, i.e. the last row of the table
|
||||
# at this moment. If rows are inserted after, we won't see them.
|
||||
# This is required for OobNibbleIterator because if we resume at
|
||||
# the lower or upper oob nibble, we also need to know the last upper
|
||||
# boundary of the table (we already have the first).
|
||||
$self->{last_upper} = $dbh->selectrow_arrayref($self->{last_ub_sql});
|
||||
PTDEBUG && _d('Last upper boundary:', Dumper($self->{last_upper}));
|
||||
$self->{no_more_boundaries} = 1;
|
||||
|
||||
$self->{no_more_boundaries} = 1;
|
||||
}
|
||||
|
||||
# Get the real last upper boundary, i.e. the last row of the table
|
||||
# at this moment. If rows are inserted after, we won't see them.
|
||||
$self->{last_upper} = $dbh->selectrow_arrayref($self->{last_ub_sql});
|
||||
PTDEBUG && _d('Last upper boundary:', Dumper($self->{last_upper}));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -641,25 +701,70 @@ sub _next_boundaries {
|
||||
}
|
||||
}
|
||||
|
||||
# Two boundaries are being fetched: the upper boundary for this nibble,
|
||||
# i.e. the nibble the caller is trying to exec, and the next_lower boundary
|
||||
# for the next nibble that the caller will try to exec. For example,
|
||||
# if chunking the alphabet, a-z, with chunk size 3, the first call will
|
||||
# fetch:
|
||||
#
|
||||
# a <- lower
|
||||
# b
|
||||
# c <- upper ($boundary->[0])
|
||||
# d <- next_lower ($boundary->[1])
|
||||
#
|
||||
# Then the second call will fetch:
|
||||
#
|
||||
# d <- lower
|
||||
# e
|
||||
# f <- upper
|
||||
# g <- next_lower
|
||||
#
|
||||
# Why fetch both upper and next_lower? We wanted to keep nibbling simple,
|
||||
# i.e. one nibble statment, not one for the first nibble, one for "middle"
|
||||
# nibbles, and another for the end (this is how older code worked). So the
|
||||
# nibble statement is inclusive, but this requires both boundaries for
|
||||
# reasons explained in a comment above my $ub_sql in new().
|
||||
PTDEBUG && _d($self->{ub_sth}->{Statement}, 'params:',
|
||||
join(', ', @{$self->{lower}}), $self->{limit});
|
||||
$self->{ub_sth}->execute(@{$self->{lower}}, $self->{limit});
|
||||
my $boundary = $self->{ub_sth}->fetchall_arrayref();
|
||||
PTDEBUG && _d('Next boundary:', Dumper($boundary));
|
||||
if ( $boundary && @$boundary ) {
|
||||
$self->{upper} = $boundary->[0]; # this nibble
|
||||
# upper boundary for the current nibble.
|
||||
$self->{upper} = $boundary->[0];
|
||||
|
||||
if ( $boundary->[1] ) {
|
||||
$self->{next_lower} = $boundary->[1]; # next nibble
|
||||
# next_lower boundary for the next nibble (will become the lower
|
||||
# boundary when that nibble becomes the current nibble).
|
||||
$self->{next_lower} = $boundary->[1];
|
||||
}
|
||||
else {
|
||||
# There's no next_lower boundary, so the upper boundary of
|
||||
# the current nibble is the end of the table. For example,
|
||||
# if chunking a-z, then the upper boundary of the current
|
||||
# nibble ($boundary->[0]) is z.
|
||||
PTDEBUG && _d('End of table boundary:', Dumper($boundary->[0]));
|
||||
$self->{no_more_boundaries} = 1; # for next call
|
||||
PTDEBUG && _d('Last upper boundary:', Dumper($boundary->[0]));
|
||||
|
||||
# OobNibbleIterator needs to know the last upper boundary.
|
||||
$self->{last_upper} = $boundary->[0];
|
||||
}
|
||||
}
|
||||
else {
|
||||
$self->{no_more_boundaries} = 1; # for next call
|
||||
$self->{upper} = $self->{last_upper};
|
||||
# This code is reached in cases like chunking a-z and the next_lower
|
||||
# boundary ($boundary->[1]) falls on z. When called again, no upper
|
||||
# or next_lower is found past z so if($boundary && @$boundary) is false.
|
||||
# But there's a problem: between the previous call that made next_lower=z
|
||||
# and this call, rows might have been inserted, so maybe z is no longer
|
||||
# the end of the table. To handle this, we fetch the end of the table
|
||||
# once and make the final nibble z-<whatever>.
|
||||
my $dbh = $self->{Cxn}->dbh();
|
||||
$self->{upper} = $dbh->selectrow_arrayref($self->{last_ub_sql});
|
||||
PTDEBUG && _d('Last upper boundary:', Dumper($self->{upper}));
|
||||
$self->{no_more_boundaries} = 1; # for next call
|
||||
|
||||
# OobNibbleIterator needs to know the last upper boundary.
|
||||
$self->{last_upper} = $self->{upper};
|
||||
}
|
||||
$self->{ub_sth}->finish();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user