mirror of
https://github.com/percona/percona-toolkit.git
synced 2026-03-30 02:00:12 +08:00
Merge nibble-iterator.
This commit is contained in:
444
lib/NibbleIterator.pm
Normal file
444
lib/NibbleIterator.pm
Normal file
@@ -0,0 +1,444 @@
|
||||
# This program is copyright 2011 Percona Inc.
|
||||
# Feedback and improvements are welcome.
|
||||
#
|
||||
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
||||
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
||||
# licenses.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
# Place, Suite 330, Boston, MA 02111-1307 USA.
|
||||
# ###########################################################################
|
||||
# NibbleIterator package
|
||||
# ###########################################################################
|
||||
{
|
||||
# Package: NibbleIterator
|
||||
# NibbleIterator nibbles tables.
|
||||
package NibbleIterator;
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
|
||||
|
||||
use Data::Dumper;
|
||||
$Data::Dumper::Indent = 1;
|
||||
$Data::Dumper::Sortkeys = 1;
|
||||
$Data::Dumper::Quotekeys = 0;
|
||||
|
||||
sub new {
|
||||
my ( $class, %args ) = @_;
|
||||
my @required_args = qw(dbh tbl OptionParser Quoter TableNibbler TableParser);
|
||||
foreach my $arg ( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
my ($dbh, $tbl, $o, $q) = @args{@required_args};
|
||||
|
||||
# Get an index to nibble by. We'll order rows by the index's columns.
|
||||
my $index = $args{TableParser}->find_best_index(
|
||||
$tbl->{tbl_struct},
|
||||
$o->get('chunk-index'),
|
||||
);
|
||||
die "No index to nibble table $tbl->{db}.$tbl->{tbl}" unless $index;
|
||||
my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};
|
||||
|
||||
# Figure out how to nibble the table with the index.
|
||||
my $asc = $args{TableNibbler}->generate_asc_stmt(
|
||||
%args,
|
||||
tbl_struct => $tbl->{tbl_struct},
|
||||
index => $index,
|
||||
asc_only => 1,
|
||||
);
|
||||
MKDEBUG && _d('Ascend params:', Dumper($asc));
|
||||
|
||||
# Make SQL statements, prepared on first call to next(). FROM and
|
||||
# ORDER BY are the same for all statements. FORCE IDNEX and ORDER BY
|
||||
# are needed to ensure deterministic nibbling.
|
||||
my $from = $q->quote(@{$tbl}{qw(db tbl)}) . " FORCE INDEX(`$index`)";
|
||||
my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
|
||||
|
||||
# These statements are only executed once, so they don't use sths.
|
||||
my $first_lb_sql
|
||||
= "SELECT /*!40001 SQL_NO_CACHE */ "
|
||||
. join(', ', map { $q->quote($_) } @{$asc->{scols}})
|
||||
. " FROM $from"
|
||||
. ($args{where} ? " WHERE $args{where}" : '')
|
||||
. " ORDER BY $order_by"
|
||||
. " LIMIT 1"
|
||||
. " /*first lower boundary*/";
|
||||
MKDEBUG && _d('First lower boundary statement:', $first_lb_sql);
|
||||
|
||||
my $last_ub_sql
|
||||
= "SELECT /*!40001 SQL_NO_CACHE */ "
|
||||
. join(', ', map { $q->quote($_) } @{$asc->{scols}})
|
||||
. " FROM $from"
|
||||
. ($args{where} ? " WHERE $args{where}" : '')
|
||||
. " ORDER BY "
|
||||
. join(' DESC, ', map {$q->quote($_)} @{$index_cols}) . ' DESC'
|
||||
. " LIMIT 1"
|
||||
. " /*last upper boundary*/";
|
||||
MKDEBUG && _d('Last upper boundary statement:', $last_ub_sql);
|
||||
|
||||
# Nibbles are inclusive, so for a..z, the nibbles are: a-e, f-j, k-o, p-t,
|
||||
# u-y, and z. This complicates getting the next upper boundary because
|
||||
# if we use either (col >= lb AND col < ub) or (col > lb AND col <= ub)
|
||||
# in nibble_sql (below), then that fails for either the last or first
|
||||
# nibble respectively. E.g. (col >= z AND col < z) doesn't work, nor
|
||||
# does (col > a AND col <= e). Hence the fancy LIMIT 2 which returns
|
||||
# the upper boundary for the current nibble *and* the lower boundary
|
||||
# for the next nibble. See _next_boundaries().
|
||||
my $ub_sql = _make_ub_sql(
|
||||
cols => $asc->{scols},
|
||||
from => $from,
|
||||
where => $asc->{boundaries}->{'>='}
|
||||
. ($args{where} ? " AND ($args{where})" : ''),
|
||||
order_by => $order_by,
|
||||
limit => $o->get('chunk-size'),
|
||||
Quoter => $q,
|
||||
);
|
||||
|
||||
# This statement does the actual nibbling work; its rows are returned
|
||||
# to the caller via next().
|
||||
my $nibble_sql
|
||||
= ($args{dms} ? "$args{dms} " : "SELECT ")
|
||||
. ($args{select} ? $args{select}
|
||||
: join(', ', map { $q->quote($_) } @{$asc->{cols}}))
|
||||
. " FROM $from"
|
||||
. " WHERE " . $asc->{boundaries}->{'>='} # lower boundary
|
||||
. " AND " . $asc->{boundaries}->{'<='} # upper boundary
|
||||
. ($args{where} ? " AND ($args{where})" : '')
|
||||
. " ORDER BY $order_by"
|
||||
. " /*nibble*/";
|
||||
MKDEBUG && _d('Nibble statement:', $nibble_sql);
|
||||
|
||||
my $explain_nibble_sql
|
||||
= "EXPLAIN SELECT "
|
||||
. ($args{select} ? $args{select}
|
||||
: join(', ', map { $q->quote($_) } @{$asc->{cols}}))
|
||||
. " FROM $from"
|
||||
. " WHERE " . $asc->{boundaries}->{'>='} # lower boundary
|
||||
. " AND " . $asc->{boundaries}->{'<='} # upper boundary
|
||||
. ($args{where} ? " AND ($args{where})" : '')
|
||||
. " ORDER BY $order_by"
|
||||
. " /*explain nibble*/";
|
||||
MKDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);
|
||||
|
||||
# If the chunk size is >= number of rows in table, then we don't
|
||||
# need to chunk; we can just select all rows, in order, at once.
|
||||
my $one_nibble_sql
|
||||
= ($args{dms} ? "$args{dms} " : "SELECT ")
|
||||
. ($args{select} ? $args{select}
|
||||
: join(', ', map { $q->quote($_) } @{$asc->{cols}}))
|
||||
. " FROM $from"
|
||||
. ($args{where} ? " AND ($args{where})" : '')
|
||||
. " ORDER BY $order_by"
|
||||
. " /*one nibble*/";
|
||||
MKDEBUG && _d('One nibble statement:', $one_nibble_sql);
|
||||
|
||||
my $explain_one_nibble_sql
|
||||
= "EXPLAIN SELECT "
|
||||
. ($args{select} ? $args{select}
|
||||
: join(', ', map { $q->quote($_) } @{$asc->{cols}}))
|
||||
. " FROM $from"
|
||||
. ($args{where} ? " AND ($args{where})" : '')
|
||||
. " ORDER BY $order_by"
|
||||
. " /*explain one nibble*/";
|
||||
MKDEBUG && _d('Explain one nibble statement:', $explain_one_nibble_sql);
|
||||
|
||||
my $self = {
|
||||
%args,
|
||||
asc => $asc,
|
||||
index => $index,
|
||||
from => $from,
|
||||
order_by => $order_by,
|
||||
first_lb_sql => $first_lb_sql,
|
||||
last_ub_sql => $last_ub_sql,
|
||||
ub_sql => $ub_sql,
|
||||
nibble_sql => $nibble_sql,
|
||||
explain_nibble_sql => $explain_nibble_sql,
|
||||
one_nibble_sql => $one_nibble_sql,
|
||||
explain_one_nibble_sql => $explain_one_nibble_sql,
|
||||
nibbleno => 0,
|
||||
have_rows => 0,
|
||||
rowno => 0,
|
||||
};
|
||||
|
||||
return bless $self, $class;
|
||||
}
|
||||
|
||||
sub next {
|
||||
my ($self) = @_;
|
||||
|
||||
# First call, init everything. This could be done in new(), but
|
||||
# all work is delayed until actually needed.
|
||||
if ($self->{nibbleno} == 0) {
|
||||
$self->_can_nibble_once();
|
||||
$self->_prepare_sths();
|
||||
$self->_get_bounds();
|
||||
# $self->_check_index_usage();
|
||||
if ( my $callback = $self->{callbacks}->{init} ) {
|
||||
$callback->();
|
||||
}
|
||||
}
|
||||
|
||||
# If there's another nibble, fetch the rows within it.
|
||||
NIBBLE:
|
||||
while ( $self->{have_rows} || $self->_next_boundaries() ) {
|
||||
# If no rows, then we just got the next boundaries, which start
|
||||
# the next nibble.
|
||||
if ( !$self->{have_rows} ) {
|
||||
$self->{nibbleno}++;
|
||||
MKDEBUG && _d($self->{nibble_sth}->{Statement}, 'params:',
|
||||
join(', ', (@{$self->{lb}}, @{$self->{ub}})));
|
||||
if ( my $callback = $self->{callbacks}->{exec_nibble} ) {
|
||||
$self->{have_rows} = $callback->(
|
||||
dbh => $self->{dbh},
|
||||
tbl => $self->{tbl},
|
||||
sth => $self->{nibble_sth},
|
||||
lb => $self->{lb},
|
||||
ub => $self->{ub},
|
||||
nibbleno => $self->{nibbleno},
|
||||
explain_sth => $self->{explain_sth},
|
||||
);
|
||||
}
|
||||
else {
|
||||
$self->{nibble_sth}->execute(@{$self->{lb}}, @{$self->{ub}});
|
||||
$self->{have_rows} = $self->{nibble_sth}->rows();
|
||||
}
|
||||
}
|
||||
|
||||
# Return rows in this nibble.
|
||||
if ( $self->{have_rows} ) {
|
||||
MKDEBUG && _d($self->{have_rows}, 'rows in nibble', $self->{nibbleno});
|
||||
# Return rows in nibble. sth->{Active} is always true with
|
||||
# DBD::mysql v3, so we track the status manually.
|
||||
my $row = $self->{nibble_sth}->fetchrow_arrayref();
|
||||
if ( $row ) {
|
||||
$self->{rowno}++;
|
||||
MKDEBUG && _d('Row', $self->{rowno}, 'in nibble',$self->{nibbleno});
|
||||
# fetchrow_arraryref re-uses an internal arrayref, so we must copy.
|
||||
return [ @$row ];
|
||||
}
|
||||
}
|
||||
|
||||
MKDEBUG && _d('No rows in nibble or nibble skipped');
|
||||
if ( my $callback = $self->{callbacks}->{after_nibble} ) {
|
||||
$callback->(
|
||||
dbh => $self->{dbh},
|
||||
tbl => $self->{tbl},
|
||||
nibbleno => $self->{nibbleno},
|
||||
explain_sth => $self->{explain_sth},
|
||||
);
|
||||
}
|
||||
$self->{rowno} = 0;
|
||||
$self->{have_rows} = 0;
|
||||
}
|
||||
|
||||
MKDEBUG && _d('Done nibbling');
|
||||
if ( my $callback = $self->{callbacks}->{done} ) {
|
||||
$callback->(
|
||||
dbh => $self->{dbh},
|
||||
tbl => $self->{tbl},
|
||||
);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
sub nibble_number {
|
||||
my ($self) = @_;
|
||||
return $self->{nibbleno};
|
||||
}
|
||||
|
||||
sub set_chunk_size {
|
||||
my ($self, $limit) = @_;
|
||||
MKDEBUG && _d('Setting new chunk size (LIMIT):', $limit);
|
||||
|
||||
$self->{ub_sql} = _make_ub_sql(
|
||||
cols => $self->{asc}->{scols},
|
||||
from => $self->{from},
|
||||
where => $self->{asc}->{boundaries}->{'>='}
|
||||
. ($self->{where} ? " AND ($self->{where})" : ''),
|
||||
order_by => $self->{order_by},
|
||||
limit => $limit,
|
||||
Quoter => $self->{Quoter},
|
||||
);
|
||||
|
||||
# ub_sth won't exist if user calls this sub before calling next() once.
|
||||
if ($self->{ub_sth}) {
|
||||
$self->{ub_sth}->finish();
|
||||
$self->{ub_sth} = undef;
|
||||
}
|
||||
|
||||
$self->_prepare_sths();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
sub _make_ub_sql {
|
||||
my (%args) = @_;
|
||||
my @required_args = qw(cols from where order_by limit Quoter);
|
||||
foreach my $arg ( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
my ($cols, $from, $where, $order_by, $limit, $q) = @args{@required_args};
|
||||
my $ub_sql
|
||||
= "SELECT /*!40001 SQL_NO_CACHE */ "
|
||||
. join(', ', map { $q->quote($_) } @{$cols})
|
||||
. " FROM $from"
|
||||
. " WHERE $where"
|
||||
. " ORDER BY $order_by"
|
||||
. " LIMIT 2 OFFSET " . ((int($limit) || 1) - 1)
|
||||
. " /*upper boundary*/";
|
||||
MKDEBUG && _d('Upper boundary statement:', $ub_sql);
|
||||
return $ub_sql;
|
||||
}
|
||||
|
||||
sub _can_nibble_once {
|
||||
my ($self) = @_;
|
||||
my ($dbh, $tbl, $q) = @{$self}{qw(dbh tbl Quoter)};
|
||||
my $table_status;
|
||||
eval {
|
||||
my $sql = "SHOW TABLE STATUS FROM " . $q->quote($tbl->{db})
|
||||
. " LIKE " . $q->literal_like($tbl->{tbl});
|
||||
MKDEBUG && _d($sql);
|
||||
$table_status = $dbh->selectrow_hashref($sql);
|
||||
MKDEBUG && _d('Table status:', Dumper($table_status));
|
||||
};
|
||||
if ( $EVAL_ERROR ) {
|
||||
warn $EVAL_ERROR;
|
||||
return 0;
|
||||
}
|
||||
my $n_rows = defined $table_status->{Rows} ? $table_status->{Rows}
|
||||
: defined $table_status->{rows} ? $table_status->{rows}
|
||||
: 0;
|
||||
my $chunk_size = $self->{OptionParser}->get('chunk-size') || 1;
|
||||
$self->{one_nibble} = $n_rows <= $chunk_size ? 1 : 0;
|
||||
MKDEBUG && _d('One nibble:', $self->{one_nibble} ? 'yes' : 'no');
|
||||
return $self->{one_nibble};
|
||||
}
|
||||
|
||||
sub _prepare_sths {
|
||||
my ($self) = @_;
|
||||
MKDEBUG && _d('Preparing statement handles');
|
||||
if ( $self->{one_nibble} ) {
|
||||
$self->{nibble_sth} = $self->{dbh}->prepare($self->{one_nibble_sql})
|
||||
unless $self->{nibble_sth};
|
||||
$self->{explain_sth} = $self->{dbh}->prepare($self->{explain_one_nibble_sql})
|
||||
unless $self->{explain_sth};
|
||||
}
|
||||
else {
|
||||
$self->{ub_sth} = $self->{dbh}->prepare($self->{ub_sql})
|
||||
unless $self->{ub_sth};
|
||||
$self->{nibble_sth} = $self->{dbh}->prepare($self->{nibble_sql})
|
||||
unless $self->{nibble_sth};
|
||||
$self->{explain_sth} = $self->{dbh}->prepare($self->{explain_nibble_sql})
|
||||
unless $self->{explain_sth};
|
||||
}
|
||||
}
|
||||
|
||||
sub _get_bounds {
|
||||
my ($self) = @_;
|
||||
return if $self->{one_nibble};
|
||||
|
||||
$self->{next_lb} = $self->{dbh}->selectrow_arrayref($self->{first_lb_sql});
|
||||
MKDEBUG && _d('First lower boundary:', Dumper($self->{next_lb}));
|
||||
|
||||
$self->{last_ub} = $self->{dbh}->selectrow_arrayref($self->{last_ub_sql});
|
||||
MKDEBUG && _d('Last upper boundary:', Dumper($self->{last_ub}));
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
sub _check_index_usage {
|
||||
my ($self) = @_;
|
||||
my ($dbh, $tbl, $q) = @{$self}{qw(dbh tbl Quoter)};
|
||||
|
||||
my $explain;
|
||||
eval {
|
||||
$explain = $dbh->selectall_arrayref("", {Slice => {}});
|
||||
};
|
||||
if ( $EVAL_ERROR ) {
|
||||
warn "Cannot check if MySQL is using the chunk index: $EVAL_ERROR";
|
||||
return;
|
||||
}
|
||||
my $explain_index = lc($explain->[0]->{key} || '');
|
||||
MKDEBUG && _d('EXPLAIN index:', $explain_index);
|
||||
if ( $explain_index ne $self->{index} ) {
|
||||
die "Cannot nibble table $tbl->{db}.$tbl->{tbl} because MySQL chose "
|
||||
. ($explain_index ? "the `$explain_index`" : 'no') . ' index'
|
||||
. " instead of the chunk index `$self->{asc}->{index}`";
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
sub _next_boundaries {
|
||||
my ($self) = @_;
|
||||
|
||||
if ( $self->{no_more_boundaries} ) {
|
||||
MKDEBUG && _d('No more boundaries');
|
||||
return;
|
||||
}
|
||||
|
||||
if ( $self->{one_nibble} ) {
|
||||
$self->{lb} = $self->{ub} = [];
|
||||
$self->{no_more_boundaries} = 1; # for next call
|
||||
return 1;
|
||||
}
|
||||
|
||||
$self->{lb} = $self->{next_lb};
|
||||
|
||||
MKDEBUG && _d($self->{ub_sth}->{Statement}, 'params:',
|
||||
join(', ', @{$self->{lb}}));
|
||||
$self->{ub_sth}->execute(@{$self->{lb}});
|
||||
my $boundary = $self->{ub_sth}->fetchall_arrayref();
|
||||
MKDEBUG && _d('Next boundary:', Dumper($boundary));
|
||||
if ( $boundary && @$boundary ) {
|
||||
$self->{ub} = $boundary->[0]; # this nibble
|
||||
if ( $boundary->[1] ) {
|
||||
$self->{next_lb} = $boundary->[1]; # next nibble
|
||||
}
|
||||
else {
|
||||
$self->{no_more_boundaries} = 1; # for next call
|
||||
MKDEBUG && _d('Last upper boundary:', Dumper($boundary->[0]));
|
||||
}
|
||||
}
|
||||
else {
|
||||
$self->{no_more_boundaries} = 1; # for next call
|
||||
$self->{ub} = $self->{last_ub};
|
||||
MKDEBUG && _d('Last upper boundary:', Dumper($self->{ub}));
|
||||
}
|
||||
$self->{ub_sth}->finish();
|
||||
|
||||
return 1; # have boundary
|
||||
}
|
||||
|
||||
sub DESTROY {
|
||||
my ( $self ) = @_;
|
||||
foreach my $key ( keys %$self ) {
|
||||
if ( $key =~ m/_sth$/ ) {
|
||||
$self->{$key}->finish();
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
sub _d {
|
||||
my ($package, undef, $line) = caller 0;
|
||||
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
||||
map { defined $_ ? $_ : 'undef' }
|
||||
@_;
|
||||
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
||||
}
|
||||
|
||||
1;
|
||||
}
|
||||
# ###########################################################################
|
||||
# End NibbleIterator package
|
||||
# ###########################################################################
|
||||
473
lib/RowChecksum.pm
Normal file
473
lib/RowChecksum.pm
Normal file
@@ -0,0 +1,473 @@
|
||||
# This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc.
|
||||
# Feedback and improvements are welcome.
|
||||
#
|
||||
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
||||
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
||||
# licenses.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
# Place, Suite 330, Boston, MA 02111-1307 USA.
|
||||
# ###########################################################################
|
||||
# RowChecksum package
|
||||
# ###########################################################################
|
||||
{
|
||||
# Package: RowChecksum
|
||||
# RowChecksum makes checksum expressions for checksumming rows and chunks.
|
||||
package RowChecksum;
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
|
||||
|
||||
use List::Util qw(max);
|
||||
use Data::Dumper;
|
||||
$Data::Dumper::Indent = 1;
|
||||
$Data::Dumper::Sortkeys = 1;
|
||||
$Data::Dumper::Quotekeys = 0;
|
||||
|
||||
sub new {
|
||||
my ( $class, %args ) = @_;
|
||||
foreach my $arg ( qw(OptionParser Quoter) ) {
|
||||
die "I need a $arg argument" unless defined $args{$arg};
|
||||
}
|
||||
my $self = { %args };
|
||||
return bless $self, $class;
|
||||
}
|
||||
|
||||
# Sub: make_row_checksum
|
||||
# Make a SELECT column list to checksum a row.
|
||||
#
|
||||
# Parameters:
|
||||
# %args - Arguments
|
||||
#
|
||||
# Required Arguments:
|
||||
# tbl - Table ref
|
||||
#
|
||||
# Optional Arguments:
|
||||
# sep - Separator for CONCAT_WS(); default #
|
||||
# cols - Arrayref of columns to checksum
|
||||
# trim - Wrap VARCHAR cols in TRIM() for v4/v5 compatibility
|
||||
# ignorecols - Arrayref of columns to exclude from checksum
|
||||
#
|
||||
# Returns:
|
||||
# Column list for SELECT
|
||||
sub make_row_checksum {
|
||||
my ( $self, %args ) = @_;
|
||||
my @required_args = qw(tbl);
|
||||
foreach my $arg( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
my ($tbl) = @args{@required_args};
|
||||
|
||||
my $o = $self->{OptionParser};
|
||||
my $q = $self->{Quoter};
|
||||
my $tbl_struct = $tbl->{tbl_struct};
|
||||
my $func = $args{func} || uc($o->get('function'));
|
||||
|
||||
my $sep = $args{sep} || '#';
|
||||
$sep =~ s/'//g;
|
||||
$sep ||= '#';
|
||||
|
||||
# This allows a simpler grep when building %cols below.
|
||||
my $ignorecols = $args{ignorecols} || {};
|
||||
|
||||
# Generate the expression that will turn a row into a checksum.
|
||||
# Choose columns. Normalize query results: make FLOAT and TIMESTAMP
|
||||
# stringify uniformly.
|
||||
my %cols = map { lc($_) => 1 }
|
||||
grep { !exists $ignorecols->{$_} }
|
||||
($args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}});
|
||||
my %seen;
|
||||
my @cols =
|
||||
map {
|
||||
my $type = $tbl_struct->{type_for}->{$_};
|
||||
my $result = $q->quote($_);
|
||||
if ( $type eq 'timestamp' ) {
|
||||
$result .= ' + 0';
|
||||
}
|
||||
elsif ( $args{float_precision} && $type =~ m/float|double/ ) {
|
||||
$result = "ROUND($result, $args{float_precision})";
|
||||
}
|
||||
elsif ( $args{trim} && $type =~ m/varchar/ ) {
|
||||
$result = "TRIM($result)";
|
||||
}
|
||||
$result;
|
||||
}
|
||||
grep {
|
||||
$cols{$_} && !$seen{$_}++
|
||||
}
|
||||
@{$tbl_struct->{cols}};
|
||||
|
||||
# Prepend columns to query, resulting in "col1, col2, FUNC(..col1, col2...)",
|
||||
# unless caller says not to. The only caller that says not to is
|
||||
# make_chunk_checksum() which uses this row checksum as part of a larger
|
||||
# checksum. Other callers, like TableSyncer::make_checksum_queries() call
|
||||
# this sub directly and want the actual columns.
|
||||
my $query;
|
||||
if ( !$args{no_cols} ) {
|
||||
$query = join(', ',
|
||||
map {
|
||||
my $col = $_;
|
||||
if ( $col =~ m/\+ 0/ ) {
|
||||
# Alias col name back to itself else its name becomes
|
||||
# "col + 0" instead of just "col".
|
||||
my ($real_col) = /^(\S+)/;
|
||||
$col .= " AS $real_col";
|
||||
}
|
||||
elsif ( $col =~ m/TRIM/ ) {
|
||||
my ($real_col) = m/TRIM\(([^\)]+)\)/;
|
||||
$col .= " AS $real_col";
|
||||
}
|
||||
$col;
|
||||
} @cols)
|
||||
. ', ';
|
||||
}
|
||||
|
||||
if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
|
||||
# Add a bitmap of which nullable columns are NULL.
|
||||
my @nulls = grep { $cols{$_} } @{$tbl_struct->{null_cols}};
|
||||
if ( @nulls ) {
|
||||
my $bitmap = "CONCAT("
|
||||
. join(', ', map { 'ISNULL(' . $q->quote($_) . ')' } @nulls)
|
||||
. ")";
|
||||
push @cols, $bitmap;
|
||||
}
|
||||
|
||||
$query .= @cols > 1
|
||||
? "$func(CONCAT_WS('$sep', " . join(', ', @cols) . '))'
|
||||
: "$func($cols[0])";
|
||||
}
|
||||
else {
|
||||
# As a special case, FNV1A_64/FNV_64 doesn't need its arguments
|
||||
# concatenated, and doesn't need a bitmap of NULLs.
|
||||
my $fnv_func = uc $func;
|
||||
$query .= "$fnv_func(" . join(', ', @cols) . ')';
|
||||
}
|
||||
|
||||
MKDEBUG && _d('Row checksum:', $query);
|
||||
return $query;
|
||||
}
|
||||
|
||||
# Sub: make_chunk_checksum
|
||||
# Make a SELECT column list to checksum a chunk of rows.
|
||||
#
|
||||
# Parameters:
|
||||
# %args - Arguments
|
||||
#
|
||||
# Required Arguments:
|
||||
# tbl - Table ref
|
||||
# dbh - dbh if func, crc_width, and crc_type aren't given
|
||||
#
|
||||
# Optional Arguments:
|
||||
# func - Hash function name
|
||||
# crc_width - CRC width
|
||||
# crc_type - CRC type
|
||||
#
|
||||
# Returns:
|
||||
# Column list for SELECT
|
||||
sub make_chunk_checksum {
|
||||
my ( $self, %args ) = @_;
|
||||
my @required_args = qw(tbl);
|
||||
foreach my $arg( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
if ( !$args{dbh} && !($args{func} && $args{crc_width} && $args{crc_type}) ) {
|
||||
die "I need a dbh argument"
|
||||
}
|
||||
my ($tbl) = @args{@required_args};
|
||||
my $o = $self->{OptionParser};
|
||||
my $q = $self->{Quoter};
|
||||
|
||||
my %crc_args = $self->get_crc_args(%args);
|
||||
my $opt_slice;
|
||||
if ( $o->get('optimize-xor') ) {
|
||||
if ( $crc_args{crc_type} !~ m/int$/ ) {
|
||||
$opt_slice = $self->_optimize_xor(%args, %crc_args);
|
||||
warn "Cannot use --optimize-xor" unless defined $opt_slice;
|
||||
}
|
||||
}
|
||||
MKDEBUG && _d("Checksum strat:", Dumper(\%crc_args));
|
||||
|
||||
# This checksum algorithm concatenates the columns in each row and
|
||||
# checksums them, then slices this checksum up into 16-character chunks.
|
||||
# It then converts them BIGINTs with the CONV() function, and then
|
||||
# groupwise XORs them to produce an order-independent checksum of the
|
||||
# slice over all the rows. It then converts these back to base 16 and
|
||||
# puts them back together. The effect is the same as XORing a very wide
|
||||
# (32 characters = 128 bits for MD5, and SHA1 is even larger) unsigned
|
||||
# integer over all the rows.
|
||||
#
|
||||
# As a special case, integer functions do not need to be sliced. They
|
||||
# can be fed right into BIT_XOR after a cast to UNSIGNED.
|
||||
my $row_checksum = $self->make_row_checksum(
|
||||
%args,
|
||||
%crc_args,
|
||||
no_cols => 1
|
||||
);
|
||||
my $crc;
|
||||
if ( $crc_args{crc_type} =~ m/int$/ ) {
|
||||
$crc = "COALESCE(LOWER(CONV(BIT_XOR(CAST($row_checksum AS UNSIGNED)), "
|
||||
. "10, 16)), 0)";
|
||||
}
|
||||
else {
|
||||
my $slices = $self->_make_xor_slices(
|
||||
row_checksum => $row_checksum,
|
||||
%crc_args,
|
||||
);
|
||||
$crc = "COALESCE(LOWER(CONCAT($slices)), 0)";
|
||||
}
|
||||
|
||||
my $select = "COUNT(*) AS cnt, $crc AS crc";
|
||||
MKDEBUG && _d('Chunk checksum:', $select);
|
||||
return $select;
|
||||
}
|
||||
|
||||
sub get_crc_args {
|
||||
my ($self, %args) = @_;
|
||||
my $func = $args{func} || $self->_get_hash_func(%args);
|
||||
my $crc_width = $args{crc_width}|| $self->_get_crc_width(%args, func=>$func);
|
||||
my $crc_type = $args{crc_type} || $self->_get_crc_type(%args, func=>$func);
|
||||
return (
|
||||
func => $func,
|
||||
crc_width => $crc_width,
|
||||
crc_type => $crc_type,
|
||||
);
|
||||
}
|
||||
|
||||
# Sub: _get_hash_func
|
||||
# Get the fastest available hash function.
|
||||
#
|
||||
# Parameters:
|
||||
# %args - Arguments
|
||||
#
|
||||
# Required Arguments:
|
||||
# dbh - dbh
|
||||
#
|
||||
# Returns:
|
||||
# Function name
|
||||
sub _get_hash_func {
|
||||
my ( $self, %args ) = @_;
|
||||
my @required_args = qw(dbh);
|
||||
foreach my $arg( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
my ($dbh) = @args{@required_args};
|
||||
my $o = $self->{OptionParser};
|
||||
my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1);
|
||||
|
||||
if ( my $func = $o->get('function') ) {
|
||||
unshift @funcs, $func;
|
||||
}
|
||||
|
||||
my ($result, $error);
|
||||
foreach my $func ( @funcs ) {
|
||||
eval {
|
||||
my $sql = "SELECT $func('test-string')";
|
||||
MKDEBUG && _d($sql);
|
||||
$args{dbh}->do($sql);
|
||||
};
|
||||
if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) {
|
||||
$error .= qq{$func cannot be used because "$1"\n};
|
||||
MKDEBUG && _d($func, 'cannot be used because', $1);
|
||||
}
|
||||
MKDEBUG && _d('Chosen hash func:', $result);
|
||||
return $func;
|
||||
}
|
||||
die $error || 'No hash functions (CRC32, MD5, etc.) are available';
|
||||
}
|
||||
|
||||
# Returns how wide/long, in characters, a CRC function is.
|
||||
sub _get_crc_width {
|
||||
my ( $self, %args ) = @_;
|
||||
my @required_args = qw(dbh func);
|
||||
foreach my $arg( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
my ($dbh, $func) = @args{@required_args};
|
||||
|
||||
my $crc_width = 16;
|
||||
if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
|
||||
eval {
|
||||
my ($val) = $dbh->selectrow_array("SELECT $func('a')");
|
||||
$crc_width = max(16, length($val));
|
||||
};
|
||||
}
|
||||
return $crc_width;
|
||||
}
|
||||
|
||||
# Returns a CRC function's MySQL type.
|
||||
sub _get_crc_type {
|
||||
my ( $self, %args ) = @_;
|
||||
my @required_args = qw(dbh func);
|
||||
foreach my $arg( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
my ($dbh, $func) = @args{@required_args};
|
||||
|
||||
my $type = '';
|
||||
my $length = 0;
|
||||
my $sql = "SELECT $func('a')";
|
||||
my $sth = $dbh->prepare($sql);
|
||||
eval {
|
||||
$sth->execute();
|
||||
$type = $sth->{mysql_type_name}->[0];
|
||||
$length = $sth->{mysql_length}->[0];
|
||||
MKDEBUG && _d($sql, $type, $length);
|
||||
if ( $type eq 'bigint' && $length < 20 ) {
|
||||
$type = 'int';
|
||||
}
|
||||
};
|
||||
$sth->finish;
|
||||
MKDEBUG && _d('crc_type:', $type, 'length:', $length);
|
||||
return $type;
|
||||
}
|
||||
|
||||
# Figure out which slice in a sliced BIT_XOR checksum should have the actual
|
||||
# concat-columns-and-checksum, and which should just get variable references.
|
||||
# Returns the slice. I'm really not sure if this code is needed. It always
|
||||
# seems the last slice is the one that works. But I'd rather be paranoid.
|
||||
# TODO: this function needs a hint to know when a function returns an
|
||||
# integer. CRC32 is an example. In these cases no optimization or slicing
|
||||
# is necessary.
|
||||
sub _optimize_xor {
|
||||
my ( $self, %args ) = @_;
|
||||
my @required_args = qw(dbh func);
|
||||
foreach my $arg( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
my ($dbh, $func) = @args{@required_args};
|
||||
|
||||
die "$func never needs BIT_XOR optimization"
|
||||
if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i;
|
||||
|
||||
my $opt_slice = 0;
|
||||
my $unsliced = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0];
|
||||
my $sliced = '';
|
||||
my $start = 1;
|
||||
my $crc_width = length($unsliced) < 16 ? 16 : length($unsliced);
|
||||
|
||||
do { # Try different positions till sliced result equals non-sliced.
|
||||
MKDEBUG && _d('Trying slice', $opt_slice);
|
||||
$dbh->do('SET @crc := "", @cnt := 0');
|
||||
my $slices = $self->_make_xor_slices(
|
||||
row_checksum => "\@crc := $func('a')",
|
||||
crc_width => $crc_width,
|
||||
opt_slice => $opt_slice,
|
||||
);
|
||||
|
||||
my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x";
|
||||
$sliced = ($dbh->selectrow_array($sql))[0];
|
||||
if ( $sliced ne $unsliced ) {
|
||||
MKDEBUG && _d('Slice', $opt_slice, 'does not work');
|
||||
$start += 16;
|
||||
++$opt_slice;
|
||||
}
|
||||
} while ( $start < $crc_width && $sliced ne $unsliced );
|
||||
|
||||
if ( $sliced eq $unsliced ) {
|
||||
MKDEBUG && _d('Slice', $opt_slice, 'works');
|
||||
return $opt_slice;
|
||||
}
|
||||
else {
|
||||
MKDEBUG && _d('No slice works');
|
||||
return undef;
|
||||
}
|
||||
}
|
||||
|
||||
# Sub: _make_xor_slices
|
||||
# Make an expression that will do a bitwise XOR over a very wide integer,
|
||||
# such as that returned by SHA1, which is too large to put into BIT_XOR().
|
||||
# If an opt_slice is given, a variable is used to avoid calling row_checksum
|
||||
# multiple times.
|
||||
#
|
||||
# Parameters:
|
||||
# %args - Arguments
|
||||
#
|
||||
# Required Arguments:
|
||||
# row_checksum - <make_row_checksum()> query
|
||||
# crc_width - CRC width (<_get_crc_width()>
|
||||
#
|
||||
# Optional Arguments:
|
||||
# opt_slice - Slice number. Use a variable to avoid calling row_checksum
|
||||
# multiple times.
|
||||
#
|
||||
# Returns:
|
||||
# SQL expression
|
||||
sub _make_xor_slices {
|
||||
my ( $self, %args ) = @_;
|
||||
my @required_args = qw(row_checksum crc_width);
|
||||
foreach my $arg( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
my ($row_checksum, $crc_width) = @args{@required_args};
|
||||
my ($opt_slice) = $args{opt_slice};
|
||||
|
||||
# Create a series of slices with @crc as a placeholder.
|
||||
my @slices;
|
||||
for ( my $start = 1; $start <= $crc_width; $start += 16 ) {
|
||||
my $len = $crc_width - $start + 1;
|
||||
if ( $len > 16 ) {
|
||||
$len = 16;
|
||||
}
|
||||
push @slices,
|
||||
"LPAD(CONV(BIT_XOR("
|
||||
. "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))"
|
||||
. ", 10, 16), $len, '0')";
|
||||
}
|
||||
|
||||
# Replace the placeholder with the expression. If specified, add a
|
||||
# user-variable optimization so the expression goes in only one of the
|
||||
# slices. This optimization relies on @crc being '' when the query begins.
|
||||
if ( defined $opt_slice && $opt_slice < @slices ) {
|
||||
$slices[$opt_slice] =~ s/\@crc/\@crc := $row_checksum/;
|
||||
}
|
||||
else {
|
||||
map { s/\@crc/$row_checksum/ } @slices;
|
||||
}
|
||||
|
||||
return join(', ', @slices);
|
||||
}
|
||||
|
||||
# Queries the replication table for chunks that differ from the master's data.
|
||||
sub find_replication_differences {
|
||||
my ( $self, $dbh, $table ) = @_;
|
||||
|
||||
(my $sql = <<" EOF") =~ s/\s+/ /gm;
|
||||
SELECT db, tbl, chunk, boundaries,
|
||||
COALESCE(this_cnt-master_cnt, 0) AS cnt_diff,
|
||||
COALESCE(
|
||||
this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc),
|
||||
0
|
||||
) AS crc_diff,
|
||||
this_cnt, master_cnt, this_crc, master_crc
|
||||
FROM $table
|
||||
WHERE master_cnt <> this_cnt OR master_crc <> this_crc
|
||||
OR ISNULL(master_crc) <> ISNULL(this_crc)
|
||||
EOF
|
||||
|
||||
MKDEBUG && _d($sql);
|
||||
my $diffs = $dbh->selectall_arrayref($sql, { Slice => {} });
|
||||
return @$diffs;
|
||||
}
|
||||
|
||||
sub _d {
|
||||
my ($package, undef, $line) = caller 0;
|
||||
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
||||
map { defined $_ ? $_ : 'undef' }
|
||||
@_;
|
||||
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
||||
}
|
||||
|
||||
1;
|
||||
}
|
||||
# ###########################################################################
|
||||
# End RowChecksum package
|
||||
# ###########################################################################
|
||||
@@ -209,9 +209,9 @@ sub next_schema_object {
|
||||
if ( my $schema = $self->{Schema} ) {
|
||||
$schema->add_schema_object($schema_obj);
|
||||
}
|
||||
MKDEBUG && _d('Next schema object:', $schema_obj->{db}, $schema_obj->{tbl});
|
||||
}
|
||||
|
||||
MKDEBUG && _d('Next schema object:', $schema_obj->{db}, $schema_obj->{tbl});
|
||||
return $schema_obj;
|
||||
}
|
||||
|
||||
|
||||
@@ -65,28 +65,26 @@ sub generate_asc_stmt {
|
||||
die "I need a $arg argument" unless defined $args{$arg};
|
||||
}
|
||||
my ($tbl_struct, $index) = @args{@required_args};
|
||||
my @cols = $args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}};
|
||||
my @cols = $args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}};
|
||||
my $q = $self->{Quoter};
|
||||
|
||||
# This shouldn't happen. TableSyncNibble shouldn't call us with
|
||||
# a nonexistent index.
|
||||
die "Index '$index' does not exist in table"
|
||||
unless exists $tbl_struct->{keys}->{$index};
|
||||
|
||||
my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
|
||||
my @asc_slice;
|
||||
MKDEBUG && _d('Will ascend index', $index);
|
||||
|
||||
# These are the columns we'll ascend.
|
||||
@asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
|
||||
MKDEBUG && _d('Will ascend index', $index);
|
||||
MKDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
|
||||
my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
|
||||
if ( $args{asc_first} ) {
|
||||
@asc_cols = $asc_cols[0];
|
||||
MKDEBUG && _d('Ascending only first column');
|
||||
}
|
||||
MKDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
|
||||
|
||||
# We found the columns by name, now find their positions for use as
|
||||
# array slices, and make sure they are included in the SELECT list.
|
||||
my @asc_slice;
|
||||
my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols };
|
||||
foreach my $col ( @asc_cols ) {
|
||||
if ( !exists $col_posn{$col} ) {
|
||||
|
||||
457
t/lib/NibbleIterator.t
Normal file
457
t/lib/NibbleIterator.t
Normal file
@@ -0,0 +1,457 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
BEGIN {
|
||||
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
|
||||
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
|
||||
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
|
||||
};
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use Test::More;
|
||||
|
||||
use Schema;
|
||||
use SchemaIterator;
|
||||
use Quoter;
|
||||
use DSNParser;
|
||||
use Sandbox;
|
||||
use OptionParser;
|
||||
use MySQLDump;
|
||||
use TableParser;
|
||||
use TableNibbler;
|
||||
use RowChecksum;
|
||||
use NibbleIterator;
|
||||
use PerconaTest;
|
||||
|
||||
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
|
||||
|
||||
use Data::Dumper;
|
||||
$Data::Dumper::Indent = 1;
|
||||
$Data::Dumper::Sortkeys = 1;
|
||||
$Data::Dumper::Quotekeys = 0;
|
||||
|
||||
my $dp = new DSNParser(opts=>$dsn_opts);
|
||||
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
|
||||
my $dbh = $sb->get_dbh_for('master');
|
||||
|
||||
if ( !$dbh ) {
|
||||
plan skip_all => 'Cannot connect to sandbox master';
|
||||
}
|
||||
else {
|
||||
plan tests => 20;
|
||||
}
|
||||
|
||||
my $q = new Quoter();
|
||||
my $tp = new TableParser(Quoter=>$q);
|
||||
my $du = new MySQLDump();
|
||||
my $nb = new TableNibbler(TableParser=>$tp, Quoter=>$q);
|
||||
my $o = new OptionParser(description => 'NibbleIterator');
|
||||
my $rc = new RowChecksum(OptionParser => $o, Quoter=>$q);
|
||||
|
||||
$o->get_specs("$trunk/bin/pt-table-checksum");
|
||||
|
||||
my %common_modules = (
|
||||
Quoter => $q,
|
||||
TableParser => $tp,
|
||||
MySQLDump => $du,
|
||||
TableNibbler => $nb,
|
||||
OptionParser => $o,
|
||||
);
|
||||
my $in = "/t/lib/samples/NibbleIterator/";
|
||||
|
||||
sub make_nibble_iter {
|
||||
my (%args) = @_;
|
||||
|
||||
if (my $file = $args{sql_file}) {
|
||||
$sb->load_file('master', "$in/$file");
|
||||
}
|
||||
|
||||
@ARGV = $args{argv} ? @{$args{argv}} : ();
|
||||
$o->get_opts();
|
||||
|
||||
my $schema = new Schema();
|
||||
my $si = new SchemaIterator(
|
||||
dbh => $dbh,
|
||||
keep_ddl => 1,
|
||||
Schema => $schema,
|
||||
%common_modules,
|
||||
);
|
||||
1 while $si->next_schema_object();
|
||||
|
||||
my $ni = new NibbleIterator(
|
||||
dbh => $dbh,
|
||||
tbl => $schema->get_table($args{db}, $args{tbl}),
|
||||
callbacks => $args{callbacks},
|
||||
select => $args{select},
|
||||
%common_modules,
|
||||
);
|
||||
|
||||
return $ni;
|
||||
}
|
||||
|
||||
# ############################################################################
|
||||
# a-z w/ chunk-size 5, z is final boundary and single value
|
||||
# ############################################################################
|
||||
my $ni = make_nibble_iter(
|
||||
sql_file => "a-z.sql",
|
||||
db => 'test',
|
||||
tbl => 't',
|
||||
argv => [qw(--databases test --chunk-size 5)],
|
||||
);
|
||||
|
||||
my @rows = ();
|
||||
for (1..5) {
|
||||
push @rows, $ni->next();
|
||||
}
|
||||
is_deeply(
|
||||
\@rows,
|
||||
[['a'],['b'],['c'],['d'],['e']],
|
||||
'a-z nibble 1'
|
||||
) or print Dumper(\@rows);
|
||||
|
||||
@rows = ();
|
||||
for (1..5) {
|
||||
push @rows, $ni->next();
|
||||
}
|
||||
is_deeply(
|
||||
\@rows,
|
||||
[['f'],['g'],['h'],['i'],['j']],
|
||||
'a-z nibble 2'
|
||||
) or print Dumper(\@rows);
|
||||
|
||||
@rows = ();
|
||||
for (1..5) {
|
||||
push @rows, $ni->next();
|
||||
}
|
||||
is_deeply(
|
||||
\@rows,
|
||||
[['k'],['l'],['m'],['n'],['o']],
|
||||
'a-z nibble 3'
|
||||
) or print Dumper(\@rows);
|
||||
|
||||
@rows = ();
|
||||
for (1..5) {
|
||||
push @rows, $ni->next();
|
||||
}
|
||||
is_deeply(
|
||||
\@rows,
|
||||
[['p'],['q'],['r'],['s'],['t']],
|
||||
'a-z nibble 4'
|
||||
) or print Dumper(\@rows);
|
||||
|
||||
@rows = ();
|
||||
for (1..5) {
|
||||
push @rows, $ni->next();
|
||||
}
|
||||
is_deeply(
|
||||
\@rows,
|
||||
[['u'],['v'],['w'],['x'],['y']],
|
||||
'a-z nibble 5'
|
||||
) or print Dumper(\@rows);
|
||||
|
||||
# There's only 1 row left but extra calls shouldn't return anything or crash.
|
||||
@rows = ();
|
||||
for (1..5) {
|
||||
push @rows, $ni->next();
|
||||
}
|
||||
is_deeply(
|
||||
\@rows,
|
||||
[['z']],
|
||||
'a-z nibble 6'
|
||||
) or print Dumper(\@rows);
|
||||
|
||||
# ############################################################################
|
||||
# a-y w/ chunk-size 5, even nibbles
|
||||
# ############################################################################
|
||||
$dbh->do('delete from test.t where c="z"');
|
||||
my $all_rows = $dbh->selectall_arrayref('select * from test.t order by c');
|
||||
$ni = make_nibble_iter(
|
||||
db => 'test',
|
||||
tbl => 't',
|
||||
argv => [qw(--databases test --chunk-size 5)],
|
||||
);
|
||||
|
||||
@rows = ();
|
||||
for (1..26) {
|
||||
push @rows, $ni->next();
|
||||
}
|
||||
is_deeply(
|
||||
\@rows,
|
||||
$all_rows,
|
||||
'a-y even nibble'
|
||||
) or print Dumper(\@rows);
|
||||
|
||||
# ############################################################################
|
||||
# chunk-size exceeds number of rows, 1 nibble
|
||||
# ############################################################################
|
||||
$ni = make_nibble_iter(
|
||||
db => 'test',
|
||||
tbl => 't',
|
||||
argv => [qw(--databases test --chunk-size 100)],
|
||||
);
|
||||
|
||||
@rows = ();
|
||||
for (1..27) {
|
||||
push @rows, $ni->next();
|
||||
}
|
||||
is_deeply(
|
||||
\@rows,
|
||||
$all_rows,
|
||||
'1 nibble'
|
||||
) or print Dumper(\@rows);
|
||||
|
||||
# ############################################################################
|
||||
# single row table
|
||||
# ############################################################################
|
||||
$dbh->do('delete from test.t where c != "d"');
|
||||
$ni = make_nibble_iter(
|
||||
db => 'test',
|
||||
tbl => 't',
|
||||
argv => [qw(--databases test --chunk-size 100)],
|
||||
);
|
||||
|
||||
@rows = ();
|
||||
for (1..3) {
|
||||
push @rows, $ni->next();
|
||||
}
|
||||
is_deeply(
|
||||
\@rows,
|
||||
[['d']],
|
||||
'single row table'
|
||||
) or print Dumper(\@rows);
|
||||
|
||||
# ############################################################################
|
||||
# empty table
|
||||
# ############################################################################
|
||||
$dbh->do('truncate table test.t');
|
||||
$ni = make_nibble_iter(
|
||||
db => 'test',
|
||||
tbl => 't',
|
||||
argv => [qw(--databases test --chunk-size 100)],
|
||||
);
|
||||
|
||||
@rows = ();
|
||||
for (1..3) {
|
||||
push @rows, $ni->next();
|
||||
}
|
||||
is_deeply(
|
||||
\@rows,
|
||||
[],
|
||||
'empty table'
|
||||
) or print Dumper(\@rows);
|
||||
|
||||
# ############################################################################
|
||||
# Callbacks
|
||||
# ############################################################################
|
||||
$ni = make_nibble_iter(
|
||||
sql_file => "a-z.sql",
|
||||
db => 'test',
|
||||
tbl => 't',
|
||||
argv => [qw(--databases test --chunk-size 2)],
|
||||
callbacks => {
|
||||
init => sub { print "init\n" },
|
||||
after_nibble => sub { print "after nibble ".$ni->nibble_number()."\n" },
|
||||
done => sub { print "done\n" },
|
||||
}
|
||||
);
|
||||
|
||||
$dbh->do('delete from test.t limit 20'); # 6 rows left
|
||||
|
||||
my $output = output(
|
||||
sub {
|
||||
for (1..8) { $ni->next() }
|
||||
},
|
||||
);
|
||||
|
||||
is(
|
||||
$output,
|
||||
"init
|
||||
after nibble 1
|
||||
after nibble 2
|
||||
after nibble 3
|
||||
done
|
||||
done
|
||||
",
|
||||
"callbacks"
|
||||
);
|
||||
|
||||
# ############################################################################
|
||||
# Nibble a larger table by numeric pk id
|
||||
# ############################################################################
|
||||
SKIP: {
|
||||
skip "Sakila database is not loaded", 8
|
||||
unless @{ $dbh->selectall_arrayref('show databases like "sakila"') };
|
||||
|
||||
$ni = make_nibble_iter(
|
||||
db => 'sakila',
|
||||
tbl => 'payment',
|
||||
argv => [qw(--databases sakila --tables payment --chunk-size 100)],
|
||||
);
|
||||
|
||||
my $n_nibbles = 0;
|
||||
$n_nibbles++ while $ni->next();
|
||||
is(
|
||||
$n_nibbles,
|
||||
16049,
|
||||
"Nibble sakila.payment (16049 rows)"
|
||||
);
|
||||
|
||||
my $tbl = {
|
||||
db => 'sakila',
|
||||
tbl => 'country',
|
||||
tbl_struct => $tp->parse(
|
||||
$du->get_create_table($dbh, $q, 'sakila', 'country')),
|
||||
};
|
||||
my $chunk_checksum = $rc->make_chunk_checksum(
|
||||
dbh => $dbh,
|
||||
tbl => $tbl,
|
||||
);
|
||||
$ni = make_nibble_iter(
|
||||
db => 'sakila',
|
||||
tbl => 'country',
|
||||
argv => [qw(--databases sakila --tables country --chunk-size 25)],
|
||||
select => $chunk_checksum,
|
||||
);
|
||||
|
||||
my $row = $ni->next();
|
||||
is_deeply(
|
||||
$row,
|
||||
[25, 'da79784d'],
|
||||
"SELECT chunk checksum 1 FROM sakila.country"
|
||||
) or print STDERR Dumper($row);
|
||||
|
||||
$row = $ni->next();
|
||||
is_deeply(
|
||||
$row,
|
||||
[25, 'e860c4f9'],
|
||||
"SELECT chunk checksum 2 FROM sakila.country"
|
||||
) or print STDERR Dumper($row);
|
||||
|
||||
$row = $ni->next();
|
||||
is_deeply(
|
||||
$row,
|
||||
[25, 'eb651f58'],
|
||||
"SELECT chunk checksum 3 FROM sakila.country"
|
||||
) or print STDERR Dumper($row);
|
||||
|
||||
$row = $ni->next();
|
||||
is_deeply(
|
||||
$row,
|
||||
[25, '2d87d588'],
|
||||
"SELECT chunk checksum 4 FROM sakila.country"
|
||||
) or print STDERR Dumper($row);
|
||||
|
||||
$row = $ni->next();
|
||||
is_deeply(
|
||||
$row,
|
||||
[9, 'beb4a180'],
|
||||
"SELECT chunk checksum 5 FROM sakila.country"
|
||||
) or print STDERR Dumper($row);
|
||||
|
||||
|
||||
# #########################################################################
|
||||
# exec_nibble callback and explain_sth
|
||||
# #########################################################################
|
||||
my @expl;
|
||||
$ni = make_nibble_iter(
|
||||
db => 'sakila',
|
||||
tbl => 'country',
|
||||
argv => [qw(--databases sakila --tables country --chunk-size 60)],
|
||||
select => $chunk_checksum,
|
||||
callbacks => {
|
||||
exec_nibble => sub {
|
||||
my (%args) = @_;
|
||||
my ($expl_sth, $lb, $ub) = @args{qw(explain_sth lb ub)};
|
||||
$expl_sth->execute(@$lb, @$ub);
|
||||
push @expl, $expl_sth->fetchrow_hashref();
|
||||
return 0;
|
||||
},
|
||||
}
|
||||
);
|
||||
$ni->next();
|
||||
$ni->next();
|
||||
is_deeply(
|
||||
\@expl,
|
||||
[
|
||||
{
|
||||
id => '1',
|
||||
key => 'PRIMARY',
|
||||
key_len => '2',
|
||||
possible_keys => 'PRIMARY',
|
||||
ref => undef,
|
||||
rows => '54',
|
||||
select_type => 'SIMPLE',
|
||||
table => 'country',
|
||||
type => 'range',
|
||||
extra => 'Using where',
|
||||
},
|
||||
{
|
||||
id => '1',
|
||||
key => 'PRIMARY',
|
||||
key_len => '2',
|
||||
possible_keys => 'PRIMARY',
|
||||
ref => undef,
|
||||
rows => '49',
|
||||
select_type => 'SIMPLE',
|
||||
table => 'country',
|
||||
type => 'range',
|
||||
extra => 'Using where',
|
||||
},
|
||||
],
|
||||
'exec_nibble callbackup and explain_sth'
|
||||
);
|
||||
|
||||
# #########################################################################
|
||||
# film_actor, multi-column pk
|
||||
# #########################################################################
|
||||
$ni = make_nibble_iter(
|
||||
db => 'sakila',
|
||||
tbl => 'film_actor',
|
||||
argv => [qw(--tables sakila.film_actor --chunk-size 1000)],
|
||||
);
|
||||
|
||||
$n_nibbles = 0;
|
||||
$n_nibbles++ while $ni->next();
|
||||
is(
|
||||
$n_nibbles,
|
||||
5462,
|
||||
"Nibble sakila.film_actor (multi-column pk)"
|
||||
);
|
||||
}
|
||||
|
||||
# ############################################################################
|
||||
# Reset chunk size on-the-fly.
|
||||
# ############################################################################
|
||||
$ni = make_nibble_iter(
|
||||
sql_file => "a-z.sql",
|
||||
db => 'test',
|
||||
tbl => 't',
|
||||
argv => [qw(--databases test --chunk-size 5)],
|
||||
);
|
||||
|
||||
@rows = ();
|
||||
my $i = 0;
|
||||
while (my $row = $ni->next()) {
|
||||
push @{$rows[$ni->nibble_number()]}, @$row;
|
||||
if ( ++$i == 5 ) {
|
||||
$ni->set_chunk_size(20);
|
||||
}
|
||||
}
|
||||
|
||||
is_deeply(
|
||||
\@rows,
|
||||
[
|
||||
undef, # no 0 nibble
|
||||
[ ('a'..'e') ], # nibble 1
|
||||
[ ('f'..'y') ], # nibble 2, should contain 20 chars
|
||||
[ 'z' ], # last nibble
|
||||
],
|
||||
"Change chunk size while nibbling"
|
||||
) or print STDERR Dumper(\@rows);
|
||||
|
||||
# #############################################################################
|
||||
# Done.
|
||||
# #############################################################################
|
||||
$sb->wipe_clean($dbh);
|
||||
exit;
|
||||
417
t/lib/RowChecksum.t
Normal file
417
t/lib/RowChecksum.t
Normal file
@@ -0,0 +1,417 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
BEGIN {
|
||||
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
|
||||
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
|
||||
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
|
||||
};
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use Test::More;
|
||||
|
||||
use RowChecksum;
|
||||
use TableParser;
|
||||
use Quoter;
|
||||
use MySQLDump;
|
||||
use DSNParser;
|
||||
use OptionParser;
|
||||
use Sandbox;
|
||||
use PerconaTest;
|
||||
|
||||
my $dp = new DSNParser(opts=>$dsn_opts);
|
||||
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
|
||||
my $dbh = $sb->get_dbh_for('master');
|
||||
|
||||
if ( !$dbh ) {
|
||||
plan skip_all => "Cannot connect to sandbox master";
|
||||
}
|
||||
else {
|
||||
plan tests => 28;
|
||||
}
|
||||
|
||||
$sb->create_dbs($dbh, ['test']);
|
||||
|
||||
my $q = new Quoter();
|
||||
my $tp = new TableParser(Quoter => $q);
|
||||
my $du = new MySQLDump();
|
||||
my $o = new OptionParser(description => 'NibbleIterator');
|
||||
$o->get_specs("$trunk/bin/pt-table-checksum");
|
||||
|
||||
my $c = new RowChecksum(
|
||||
OptionParser => $o,
|
||||
Quoter => $q,
|
||||
);
|
||||
|
||||
# ############################################################################
|
||||
# _make_xor_slices
|
||||
# ############################################################################
|
||||
is(
|
||||
$c->_make_xor_slices(
|
||||
row_checksum => 'FOO',
|
||||
crc_width => 1,
|
||||
),
|
||||
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 1), 16, 10) "
|
||||
. "AS UNSIGNED)), 10, 16), 1, '0')",
|
||||
'FOO XOR slices 1 wide',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->_make_xor_slices(
|
||||
row_checksum => 'FOO',
|
||||
crc_width => 16,
|
||||
),
|
||||
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
|
||||
. "AS UNSIGNED)), 10, 16), 16, '0')",
|
||||
'FOO XOR slices 16 wide',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->_make_xor_slices(
|
||||
row_checksum => 'FOO',
|
||||
crc_width => 17,
|
||||
),
|
||||
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
|
||||
. "AS UNSIGNED)), 10, 16), 16, '0'), "
|
||||
. "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 17, 1), 16, 10) "
|
||||
. "AS UNSIGNED)), 10, 16), 1, '0')",
|
||||
'FOO XOR slices 17 wide',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->_make_xor_slices(
|
||||
row_checksum => 'FOO',
|
||||
crc_width => 32,
|
||||
),
|
||||
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
|
||||
. "AS UNSIGNED)), 10, 16), 16, '0'), "
|
||||
. "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 17, 16), 16, 10) "
|
||||
. "AS UNSIGNED)), 10, 16), 16, '0')",
|
||||
'FOO XOR slices 32 wide',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->_make_xor_slices(
|
||||
row_checksum => 'FOO',
|
||||
crc_width => 32,
|
||||
opt_slice => 0,
|
||||
),
|
||||
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc := FOO, 1, 16), 16, 10) "
|
||||
. "AS UNSIGNED)), 10, 16), 16, '0'), "
|
||||
. "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc, 17, 16), 16, 10) "
|
||||
. "AS UNSIGNED)), 10, 16), 16, '0')",
|
||||
'XOR slice optimized in slice 0',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->_make_xor_slices(
|
||||
row_checksum => 'FOO',
|
||||
crc_width => 32,
|
||||
opt_slice => 1,
|
||||
),
|
||||
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc, 1, 16), 16, 10) "
|
||||
. "AS UNSIGNED)), 10, 16), 16, '0'), "
|
||||
. "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc := FOO, 17, 16), 16, 10) "
|
||||
. "AS UNSIGNED)), 10, 16), 16, '0')",
|
||||
'XOR slice optimized in slice 1',
|
||||
);
|
||||
|
||||
# ############################################################################
|
||||
# make_row_checksum
|
||||
# ############################################################################
|
||||
my $tbl = {
|
||||
db => 'sakila',
|
||||
tbl => 'film',
|
||||
tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.film.sql')),
|
||||
};
|
||||
|
||||
is(
|
||||
$c->make_row_checksum(
|
||||
tbl => $tbl,
|
||||
func => 'SHA1',
|
||||
),
|
||||
q{`film_id`, `title`, `description`, `release_year`, `language_id`, `original_language_id`, `rental_duration`, `rental_rate`, `length`, `replacement_cost`, `rating`, `special_features`, `last_update` + 0 AS `last_update`, }
|
||||
. q{SHA1(CONCAT_WS('#', }
|
||||
. q{`film_id`, `title`, `description`, `release_year`, `language_id`, }
|
||||
. q{`original_language_id`, `rental_duration`, `rental_rate`, `length`, }
|
||||
. q{`replacement_cost`, `rating`, `special_features`, `last_update` + 0, }
|
||||
. q{CONCAT(ISNULL(`description`), ISNULL(`release_year`), }
|
||||
. q{ISNULL(`original_language_id`), ISNULL(`length`), }
|
||||
. q{ISNULL(`rating`), ISNULL(`special_features`))))},
|
||||
'SHA1 query for sakila.film',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->make_row_checksum(
|
||||
tbl => $tbl,
|
||||
func => 'FNV_64',
|
||||
),
|
||||
q{`film_id`, `title`, `description`, `release_year`, `language_id`, `original_language_id`, `rental_duration`, `rental_rate`, `length`, `replacement_cost`, `rating`, `special_features`, `last_update` + 0 AS `last_update`, }
|
||||
. q{FNV_64(}
|
||||
. q{`film_id`, `title`, `description`, `release_year`, `language_id`, }
|
||||
. q{`original_language_id`, `rental_duration`, `rental_rate`, `length`, }
|
||||
. q{`replacement_cost`, `rating`, `special_features`, `last_update` + 0)},
|
||||
'FNV_64 query for sakila.film',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->make_row_checksum(
|
||||
tbl => $tbl,
|
||||
func => 'SHA1',
|
||||
cols => [qw(film_id)],
|
||||
),
|
||||
q{`film_id`, SHA1(`film_id`)},
|
||||
'SHA1 query for sakila.film with only one column',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->make_row_checksum(
|
||||
tbl => $tbl,
|
||||
func => 'SHA1',
|
||||
cols => [qw(FILM_ID)],
|
||||
),
|
||||
q{`film_id`, SHA1(`film_id`)},
|
||||
'Column names are case-insensitive',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->make_row_checksum(
|
||||
tbl => $tbl,
|
||||
func => 'SHA1',
|
||||
cols => [qw(film_id title)],
|
||||
sep => '%',
|
||||
),
|
||||
q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
|
||||
'Separator',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->make_row_checksum(
|
||||
tbl => $tbl,
|
||||
func => 'SHA1',
|
||||
cols => [qw(film_id title)],
|
||||
sep => "'%'",
|
||||
),
|
||||
q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
|
||||
'Bad separator',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->make_row_checksum(
|
||||
tbl => $tbl,
|
||||
func => 'SHA1',
|
||||
cols => [qw(film_id title)],
|
||||
sep => "'''",
|
||||
),
|
||||
q{`film_id`, `title`, SHA1(CONCAT_WS('#', `film_id`, `title`))},
|
||||
'Really bad separator',
|
||||
);
|
||||
|
||||
# sakila.rental
|
||||
$tbl = {
|
||||
db => 'sakila',
|
||||
tbl => 'rental',
|
||||
tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.rental.float.sql')),
|
||||
};
|
||||
|
||||
is(
|
||||
$c->make_row_checksum(
|
||||
tbl => $tbl,
|
||||
func => 'SHA1',
|
||||
),
|
||||
q{`rental_id`, `foo`, SHA1(CONCAT_WS('#', `rental_id`, `foo`))},
|
||||
'FLOAT column is like any other',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->make_row_checksum(
|
||||
tbl => $tbl,
|
||||
func => 'SHA1',
|
||||
float_precision => 5,
|
||||
),
|
||||
q{`rental_id`, ROUND(`foo`, 5), SHA1(CONCAT_WS('#', `rental_id`, ROUND(`foo`, 5)))},
|
||||
'FLOAT column is rounded to 5 places',
|
||||
);
|
||||
|
||||
# sakila.film
|
||||
$tbl = {
|
||||
db => 'sakila',
|
||||
tbl => 'film',
|
||||
tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.film.sql')),
|
||||
};
|
||||
|
||||
like(
|
||||
$c->make_row_checksum(
|
||||
tbl => $tbl,
|
||||
func => 'SHA1',
|
||||
trim => 1,
|
||||
),
|
||||
qr{TRIM\(`title`\)},
|
||||
'VARCHAR column is trimmed',
|
||||
);
|
||||
|
||||
# ############################################################################
|
||||
# make_chunk_checksum
|
||||
# ############################################################################
|
||||
is(
|
||||
$c->make_chunk_checksum(
|
||||
tbl => $tbl,
|
||||
func => 'SHA1',
|
||||
crc_width=> 40,
|
||||
cols => [qw(film_id)],
|
||||
crc_type => 'varchar',
|
||||
),
|
||||
q{COUNT(*) AS cnt, }
|
||||
. q{COALESCE(LOWER(CONCAT(LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 1, }
|
||||
. q{16), 16, 10) AS UNSIGNED)), 10, 16), 16, '0'), }
|
||||
. q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 17, 16), 16, }
|
||||
. q{10) AS UNSIGNED)), 10, 16), 16, '0'), }
|
||||
. q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 33, 8), 16, }
|
||||
. q{10) AS UNSIGNED)), 10, 16), 8, '0'))), 0) AS crc},
|
||||
'sakila.film SHA1',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->make_chunk_checksum(
|
||||
tbl => $tbl,
|
||||
func => 'FNV_64',
|
||||
crc_width=> 99,
|
||||
cols => [qw(film_id)],
|
||||
crc_type => 'bigint',
|
||||
),
|
||||
q{COUNT(*) AS cnt, }
|
||||
. q{COALESCE(LOWER(CONV(BIT_XOR(CAST(FNV_64(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc},
|
||||
'sakila.film FNV_64',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->make_chunk_checksum(
|
||||
tbl => $tbl,
|
||||
func => 'FNV_64',
|
||||
crc_width=> 99,
|
||||
cols => [qw(film_id)],
|
||||
buffer => 1,
|
||||
crc_type => 'bigint',
|
||||
),
|
||||
q{COUNT(*) AS cnt, }
|
||||
. q{COALESCE(LOWER(CONV(BIT_XOR(CAST(FNV_64(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc},
|
||||
'sakila.film FNV_64',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->make_chunk_checksum(
|
||||
tbl => $tbl,
|
||||
func => 'CRC32',
|
||||
crc_width=> 99,
|
||||
cols => [qw(film_id)],
|
||||
buffer => 1,
|
||||
crc_type => 'int',
|
||||
),
|
||||
q{COUNT(*) AS cnt, }
|
||||
. q{COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc},
|
||||
'sakila.film CRC32',
|
||||
);
|
||||
|
||||
# #############################################################################
|
||||
# Sandbox tests.
|
||||
# #############################################################################
|
||||
like(
|
||||
$c->_get_hash_func(
|
||||
dbh => $dbh,
|
||||
),
|
||||
qr/CRC32|FNV_64|MD5/,
|
||||
'CRC32, FNV_64 or MD5 is default',
|
||||
);
|
||||
|
||||
like(
|
||||
$c->_get_hash_func(
|
||||
dbh => $dbh,
|
||||
func => 'SHA99',
|
||||
),
|
||||
qr/CRC32|FNV_64|MD5/,
|
||||
'SHA99 does not exist so I get CRC32 or friends',
|
||||
);
|
||||
|
||||
@ARGV = qw(--function MD5);
|
||||
$o->get_opts();
|
||||
is(
|
||||
$c->_get_hash_func(
|
||||
dbh => $dbh,
|
||||
func => 'MD5',
|
||||
),
|
||||
'MD5',
|
||||
'MD5 requested and MD5 granted',
|
||||
);
|
||||
@ARGV = qw();
|
||||
$o->get_opts();
|
||||
|
||||
is(
|
||||
$c->_optimize_xor(
|
||||
dbh => $dbh,
|
||||
func => 'SHA1',
|
||||
),
|
||||
'2',
|
||||
'SHA1 slice is 2',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->_optimize_xor(
|
||||
dbh => $dbh,
|
||||
func => 'MD5',
|
||||
),
|
||||
'1',
|
||||
'MD5 slice is 1',
|
||||
);
|
||||
|
||||
is(
|
||||
$c->_get_crc_type(
|
||||
dbh => $dbh,
|
||||
func => 'CRC32',
|
||||
),
|
||||
'int',
|
||||
'CRC32 type'
|
||||
);
|
||||
|
||||
is(
|
||||
$c->_get_crc_type(
|
||||
dbh => $dbh,
|
||||
func => 'MD5',
|
||||
),
|
||||
'varchar',
|
||||
'MD5 type'
|
||||
);
|
||||
|
||||
# #############################################################################
|
||||
# Issue 94: Enhance mk-table-checksum, add a --ignorecols option
|
||||
# #############################################################################
|
||||
$sb->load_file('master', 't/lib/samples/issue_94.sql');
|
||||
$tbl = {
|
||||
db => 'test',
|
||||
tbl => 'issue_94',
|
||||
tbl_struct => $tp->parse($du->get_create_table($dbh, $q, 'test', 'issue_94')),
|
||||
};
|
||||
my $query = $c->make_chunk_checksum(
|
||||
tbl => $tbl,
|
||||
func => 'CRC32',
|
||||
crc_width => 16,
|
||||
crc_type => 'int',
|
||||
opt_slice => undef,
|
||||
cols => undef,
|
||||
sep => '#',
|
||||
replicate => undef,
|
||||
precision => undef,
|
||||
trim => undef,
|
||||
ignorecols => {'c'=>1},
|
||||
);
|
||||
is(
|
||||
$query,
|
||||
"COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `a`, `b`)) AS UNSIGNED)), 10, 16)), 0) AS crc",
|
||||
'Ignores specified columns'
|
||||
);
|
||||
|
||||
# ############################################################################
|
||||
# Done.
|
||||
# ############################################################################
|
||||
$sb->wipe_clean($dbh);
|
||||
exit;
|
||||
10
t/lib/samples/NibbleIterator/a-z.sql
Normal file
10
t/lib/samples/NibbleIterator/a-z.sql
Normal file
@@ -0,0 +1,10 @@
|
||||
DROP DATABASE IF EXISTS test;
|
||||
CREATE DATABASE test;
|
||||
USE test;
|
||||
|
||||
CREATE TABLE t (
|
||||
c varchar(16) not null,
|
||||
index (c)
|
||||
);
|
||||
|
||||
INSERT INTO t VALUES ('a'), ('b'), ('c'), ('d'), ('e'), ('f'), ('g'), ('h'), ('i'), ('j'), ('k'), ('l'), ('m'), ('n'), ('o'), ('p'), ('q'), ('r'), ('s'), ('t'), ('u'), ('v'), ('w'), ('x'), ('y'), ('z');
|
||||
Reference in New Issue
Block a user