Merge nibble-iterator.

This commit is contained in:
Daniel Nichter
2011-09-14 08:31:08 -06:00
7 changed files with 1807 additions and 8 deletions

444
lib/NibbleIterator.pm Normal file
View File

@@ -0,0 +1,444 @@
# This program is copyright 2011 Percona Inc.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA.
# ###########################################################################
# NibbleIterator package
# ###########################################################################
{
# Package: NibbleIterator
# NibbleIterator nibbles tables.
package NibbleIterator;
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Quotekeys = 0;
sub new {
my ( $class, %args ) = @_;
my @required_args = qw(dbh tbl OptionParser Quoter TableNibbler TableParser);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($dbh, $tbl, $o, $q) = @args{@required_args};
# Get an index to nibble by. We'll order rows by the index's columns.
my $index = $args{TableParser}->find_best_index(
$tbl->{tbl_struct},
$o->get('chunk-index'),
);
die "No index to nibble table $tbl->{db}.$tbl->{tbl}" unless $index;
my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols};
# Figure out how to nibble the table with the index.
my $asc = $args{TableNibbler}->generate_asc_stmt(
%args,
tbl_struct => $tbl->{tbl_struct},
index => $index,
asc_only => 1,
);
MKDEBUG && _d('Ascend params:', Dumper($asc));
# Make SQL statements, prepared on first call to next(). FROM and
# ORDER BY are the same for all statements. FORCE IDNEX and ORDER BY
# are needed to ensure deterministic nibbling.
my $from = $q->quote(@{$tbl}{qw(db tbl)}) . " FORCE INDEX(`$index`)";
my $order_by = join(', ', map {$q->quote($_)} @{$index_cols});
# These statements are only executed once, so they don't use sths.
my $first_lb_sql
= "SELECT /*!40001 SQL_NO_CACHE */ "
. join(', ', map { $q->quote($_) } @{$asc->{scols}})
. " FROM $from"
. ($args{where} ? " WHERE $args{where}" : '')
. " ORDER BY $order_by"
. " LIMIT 1"
. " /*first lower boundary*/";
MKDEBUG && _d('First lower boundary statement:', $first_lb_sql);
my $last_ub_sql
= "SELECT /*!40001 SQL_NO_CACHE */ "
. join(', ', map { $q->quote($_) } @{$asc->{scols}})
. " FROM $from"
. ($args{where} ? " WHERE $args{where}" : '')
. " ORDER BY "
. join(' DESC, ', map {$q->quote($_)} @{$index_cols}) . ' DESC'
. " LIMIT 1"
. " /*last upper boundary*/";
MKDEBUG && _d('Last upper boundary statement:', $last_ub_sql);
# Nibbles are inclusive, so for a..z, the nibbles are: a-e, f-j, k-o, p-t,
# u-y, and z. This complicates getting the next upper boundary because
# if we use either (col >= lb AND col < ub) or (col > lb AND col <= ub)
# in nibble_sql (below), then that fails for either the last or first
# nibble respectively. E.g. (col >= z AND col < z) doesn't work, nor
# does (col > a AND col <= e). Hence the fancy LIMIT 2 which returns
# the upper boundary for the current nibble *and* the lower boundary
# for the next nibble. See _next_boundaries().
my $ub_sql = _make_ub_sql(
cols => $asc->{scols},
from => $from,
where => $asc->{boundaries}->{'>='}
. ($args{where} ? " AND ($args{where})" : ''),
order_by => $order_by,
limit => $o->get('chunk-size'),
Quoter => $q,
);
# This statement does the actual nibbling work; its rows are returned
# to the caller via next().
my $nibble_sql
= ($args{dms} ? "$args{dms} " : "SELECT ")
. ($args{select} ? $args{select}
: join(', ', map { $q->quote($_) } @{$asc->{cols}}))
. " FROM $from"
. " WHERE " . $asc->{boundaries}->{'>='} # lower boundary
. " AND " . $asc->{boundaries}->{'<='} # upper boundary
. ($args{where} ? " AND ($args{where})" : '')
. " ORDER BY $order_by"
. " /*nibble*/";
MKDEBUG && _d('Nibble statement:', $nibble_sql);
my $explain_nibble_sql
= "EXPLAIN SELECT "
. ($args{select} ? $args{select}
: join(', ', map { $q->quote($_) } @{$asc->{cols}}))
. " FROM $from"
. " WHERE " . $asc->{boundaries}->{'>='} # lower boundary
. " AND " . $asc->{boundaries}->{'<='} # upper boundary
. ($args{where} ? " AND ($args{where})" : '')
. " ORDER BY $order_by"
. " /*explain nibble*/";
MKDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);
# If the chunk size is >= number of rows in table, then we don't
# need to chunk; we can just select all rows, in order, at once.
my $one_nibble_sql
= ($args{dms} ? "$args{dms} " : "SELECT ")
. ($args{select} ? $args{select}
: join(', ', map { $q->quote($_) } @{$asc->{cols}}))
. " FROM $from"
. ($args{where} ? " AND ($args{where})" : '')
. " ORDER BY $order_by"
. " /*one nibble*/";
MKDEBUG && _d('One nibble statement:', $one_nibble_sql);
my $explain_one_nibble_sql
= "EXPLAIN SELECT "
. ($args{select} ? $args{select}
: join(', ', map { $q->quote($_) } @{$asc->{cols}}))
. " FROM $from"
. ($args{where} ? " AND ($args{where})" : '')
. " ORDER BY $order_by"
. " /*explain one nibble*/";
MKDEBUG && _d('Explain one nibble statement:', $explain_one_nibble_sql);
my $self = {
%args,
asc => $asc,
index => $index,
from => $from,
order_by => $order_by,
first_lb_sql => $first_lb_sql,
last_ub_sql => $last_ub_sql,
ub_sql => $ub_sql,
nibble_sql => $nibble_sql,
explain_nibble_sql => $explain_nibble_sql,
one_nibble_sql => $one_nibble_sql,
explain_one_nibble_sql => $explain_one_nibble_sql,
nibbleno => 0,
have_rows => 0,
rowno => 0,
};
return bless $self, $class;
}
sub next {
my ($self) = @_;
# First call, init everything. This could be done in new(), but
# all work is delayed until actually needed.
if ($self->{nibbleno} == 0) {
$self->_can_nibble_once();
$self->_prepare_sths();
$self->_get_bounds();
# $self->_check_index_usage();
if ( my $callback = $self->{callbacks}->{init} ) {
$callback->();
}
}
# If there's another nibble, fetch the rows within it.
NIBBLE:
while ( $self->{have_rows} || $self->_next_boundaries() ) {
# If no rows, then we just got the next boundaries, which start
# the next nibble.
if ( !$self->{have_rows} ) {
$self->{nibbleno}++;
MKDEBUG && _d($self->{nibble_sth}->{Statement}, 'params:',
join(', ', (@{$self->{lb}}, @{$self->{ub}})));
if ( my $callback = $self->{callbacks}->{exec_nibble} ) {
$self->{have_rows} = $callback->(
dbh => $self->{dbh},
tbl => $self->{tbl},
sth => $self->{nibble_sth},
lb => $self->{lb},
ub => $self->{ub},
nibbleno => $self->{nibbleno},
explain_sth => $self->{explain_sth},
);
}
else {
$self->{nibble_sth}->execute(@{$self->{lb}}, @{$self->{ub}});
$self->{have_rows} = $self->{nibble_sth}->rows();
}
}
# Return rows in this nibble.
if ( $self->{have_rows} ) {
MKDEBUG && _d($self->{have_rows}, 'rows in nibble', $self->{nibbleno});
# Return rows in nibble. sth->{Active} is always true with
# DBD::mysql v3, so we track the status manually.
my $row = $self->{nibble_sth}->fetchrow_arrayref();
if ( $row ) {
$self->{rowno}++;
MKDEBUG && _d('Row', $self->{rowno}, 'in nibble',$self->{nibbleno});
# fetchrow_arraryref re-uses an internal arrayref, so we must copy.
return [ @$row ];
}
}
MKDEBUG && _d('No rows in nibble or nibble skipped');
if ( my $callback = $self->{callbacks}->{after_nibble} ) {
$callback->(
dbh => $self->{dbh},
tbl => $self->{tbl},
nibbleno => $self->{nibbleno},
explain_sth => $self->{explain_sth},
);
}
$self->{rowno} = 0;
$self->{have_rows} = 0;
}
MKDEBUG && _d('Done nibbling');
if ( my $callback = $self->{callbacks}->{done} ) {
$callback->(
dbh => $self->{dbh},
tbl => $self->{tbl},
);
}
return;
}
sub nibble_number {
my ($self) = @_;
return $self->{nibbleno};
}
sub set_chunk_size {
my ($self, $limit) = @_;
MKDEBUG && _d('Setting new chunk size (LIMIT):', $limit);
$self->{ub_sql} = _make_ub_sql(
cols => $self->{asc}->{scols},
from => $self->{from},
where => $self->{asc}->{boundaries}->{'>='}
. ($self->{where} ? " AND ($self->{where})" : ''),
order_by => $self->{order_by},
limit => $limit,
Quoter => $self->{Quoter},
);
# ub_sth won't exist if user calls this sub before calling next() once.
if ($self->{ub_sth}) {
$self->{ub_sth}->finish();
$self->{ub_sth} = undef;
}
$self->_prepare_sths();
return;
}
sub _make_ub_sql {
my (%args) = @_;
my @required_args = qw(cols from where order_by limit Quoter);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($cols, $from, $where, $order_by, $limit, $q) = @args{@required_args};
my $ub_sql
= "SELECT /*!40001 SQL_NO_CACHE */ "
. join(', ', map { $q->quote($_) } @{$cols})
. " FROM $from"
. " WHERE $where"
. " ORDER BY $order_by"
. " LIMIT 2 OFFSET " . ((int($limit) || 1) - 1)
. " /*upper boundary*/";
MKDEBUG && _d('Upper boundary statement:', $ub_sql);
return $ub_sql;
}
sub _can_nibble_once {
my ($self) = @_;
my ($dbh, $tbl, $q) = @{$self}{qw(dbh tbl Quoter)};
my $table_status;
eval {
my $sql = "SHOW TABLE STATUS FROM " . $q->quote($tbl->{db})
. " LIKE " . $q->literal_like($tbl->{tbl});
MKDEBUG && _d($sql);
$table_status = $dbh->selectrow_hashref($sql);
MKDEBUG && _d('Table status:', Dumper($table_status));
};
if ( $EVAL_ERROR ) {
warn $EVAL_ERROR;
return 0;
}
my $n_rows = defined $table_status->{Rows} ? $table_status->{Rows}
: defined $table_status->{rows} ? $table_status->{rows}
: 0;
my $chunk_size = $self->{OptionParser}->get('chunk-size') || 1;
$self->{one_nibble} = $n_rows <= $chunk_size ? 1 : 0;
MKDEBUG && _d('One nibble:', $self->{one_nibble} ? 'yes' : 'no');
return $self->{one_nibble};
}
sub _prepare_sths {
my ($self) = @_;
MKDEBUG && _d('Preparing statement handles');
if ( $self->{one_nibble} ) {
$self->{nibble_sth} = $self->{dbh}->prepare($self->{one_nibble_sql})
unless $self->{nibble_sth};
$self->{explain_sth} = $self->{dbh}->prepare($self->{explain_one_nibble_sql})
unless $self->{explain_sth};
}
else {
$self->{ub_sth} = $self->{dbh}->prepare($self->{ub_sql})
unless $self->{ub_sth};
$self->{nibble_sth} = $self->{dbh}->prepare($self->{nibble_sql})
unless $self->{nibble_sth};
$self->{explain_sth} = $self->{dbh}->prepare($self->{explain_nibble_sql})
unless $self->{explain_sth};
}
}
sub _get_bounds {
my ($self) = @_;
return if $self->{one_nibble};
$self->{next_lb} = $self->{dbh}->selectrow_arrayref($self->{first_lb_sql});
MKDEBUG && _d('First lower boundary:', Dumper($self->{next_lb}));
$self->{last_ub} = $self->{dbh}->selectrow_arrayref($self->{last_ub_sql});
MKDEBUG && _d('Last upper boundary:', Dumper($self->{last_ub}));
return;
}
sub _check_index_usage {
my ($self) = @_;
my ($dbh, $tbl, $q) = @{$self}{qw(dbh tbl Quoter)};
my $explain;
eval {
$explain = $dbh->selectall_arrayref("", {Slice => {}});
};
if ( $EVAL_ERROR ) {
warn "Cannot check if MySQL is using the chunk index: $EVAL_ERROR";
return;
}
my $explain_index = lc($explain->[0]->{key} || '');
MKDEBUG && _d('EXPLAIN index:', $explain_index);
if ( $explain_index ne $self->{index} ) {
die "Cannot nibble table $tbl->{db}.$tbl->{tbl} because MySQL chose "
. ($explain_index ? "the `$explain_index`" : 'no') . ' index'
. " instead of the chunk index `$self->{asc}->{index}`";
}
return;
}
sub _next_boundaries {
my ($self) = @_;
if ( $self->{no_more_boundaries} ) {
MKDEBUG && _d('No more boundaries');
return;
}
if ( $self->{one_nibble} ) {
$self->{lb} = $self->{ub} = [];
$self->{no_more_boundaries} = 1; # for next call
return 1;
}
$self->{lb} = $self->{next_lb};
MKDEBUG && _d($self->{ub_sth}->{Statement}, 'params:',
join(', ', @{$self->{lb}}));
$self->{ub_sth}->execute(@{$self->{lb}});
my $boundary = $self->{ub_sth}->fetchall_arrayref();
MKDEBUG && _d('Next boundary:', Dumper($boundary));
if ( $boundary && @$boundary ) {
$self->{ub} = $boundary->[0]; # this nibble
if ( $boundary->[1] ) {
$self->{next_lb} = $boundary->[1]; # next nibble
}
else {
$self->{no_more_boundaries} = 1; # for next call
MKDEBUG && _d('Last upper boundary:', Dumper($boundary->[0]));
}
}
else {
$self->{no_more_boundaries} = 1; # for next call
$self->{ub} = $self->{last_ub};
MKDEBUG && _d('Last upper boundary:', Dumper($self->{ub}));
}
$self->{ub_sth}->finish();
return 1; # have boundary
}
sub DESTROY {
my ( $self ) = @_;
foreach my $key ( keys %$self ) {
if ( $key =~ m/_sth$/ ) {
$self->{$key}->finish();
}
}
return;
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;
}
# ###########################################################################
# End NibbleIterator package
# ###########################################################################

473
lib/RowChecksum.pm Normal file
View File

@@ -0,0 +1,473 @@
# This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA.
# ###########################################################################
# RowChecksum package
# ###########################################################################
{
# Package: RowChecksum
# RowChecksum makes checksum expressions for checksumming rows and chunks.
package RowChecksum;
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
use List::Util qw(max);
use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Quotekeys = 0;
sub new {
my ( $class, %args ) = @_;
foreach my $arg ( qw(OptionParser Quoter) ) {
die "I need a $arg argument" unless defined $args{$arg};
}
my $self = { %args };
return bless $self, $class;
}
# Sub: make_row_checksum
# Make a SELECT column list to checksum a row.
#
# Parameters:
# %args - Arguments
#
# Required Arguments:
# tbl - Table ref
#
# Optional Arguments:
# sep - Separator for CONCAT_WS(); default #
# cols - Arrayref of columns to checksum
# trim - Wrap VARCHAR cols in TRIM() for v4/v5 compatibility
# ignorecols - Arrayref of columns to exclude from checksum
#
# Returns:
# Column list for SELECT
sub make_row_checksum {
my ( $self, %args ) = @_;
my @required_args = qw(tbl);
foreach my $arg( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($tbl) = @args{@required_args};
my $o = $self->{OptionParser};
my $q = $self->{Quoter};
my $tbl_struct = $tbl->{tbl_struct};
my $func = $args{func} || uc($o->get('function'));
my $sep = $args{sep} || '#';
$sep =~ s/'//g;
$sep ||= '#';
# This allows a simpler grep when building %cols below.
my $ignorecols = $args{ignorecols} || {};
# Generate the expression that will turn a row into a checksum.
# Choose columns. Normalize query results: make FLOAT and TIMESTAMP
# stringify uniformly.
my %cols = map { lc($_) => 1 }
grep { !exists $ignorecols->{$_} }
($args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}});
my %seen;
my @cols =
map {
my $type = $tbl_struct->{type_for}->{$_};
my $result = $q->quote($_);
if ( $type eq 'timestamp' ) {
$result .= ' + 0';
}
elsif ( $args{float_precision} && $type =~ m/float|double/ ) {
$result = "ROUND($result, $args{float_precision})";
}
elsif ( $args{trim} && $type =~ m/varchar/ ) {
$result = "TRIM($result)";
}
$result;
}
grep {
$cols{$_} && !$seen{$_}++
}
@{$tbl_struct->{cols}};
# Prepend columns to query, resulting in "col1, col2, FUNC(..col1, col2...)",
# unless caller says not to. The only caller that says not to is
# make_chunk_checksum() which uses this row checksum as part of a larger
# checksum. Other callers, like TableSyncer::make_checksum_queries() call
# this sub directly and want the actual columns.
my $query;
if ( !$args{no_cols} ) {
$query = join(', ',
map {
my $col = $_;
if ( $col =~ m/\+ 0/ ) {
# Alias col name back to itself else its name becomes
# "col + 0" instead of just "col".
my ($real_col) = /^(\S+)/;
$col .= " AS $real_col";
}
elsif ( $col =~ m/TRIM/ ) {
my ($real_col) = m/TRIM\(([^\)]+)\)/;
$col .= " AS $real_col";
}
$col;
} @cols)
. ', ';
}
if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
# Add a bitmap of which nullable columns are NULL.
my @nulls = grep { $cols{$_} } @{$tbl_struct->{null_cols}};
if ( @nulls ) {
my $bitmap = "CONCAT("
. join(', ', map { 'ISNULL(' . $q->quote($_) . ')' } @nulls)
. ")";
push @cols, $bitmap;
}
$query .= @cols > 1
? "$func(CONCAT_WS('$sep', " . join(', ', @cols) . '))'
: "$func($cols[0])";
}
else {
# As a special case, FNV1A_64/FNV_64 doesn't need its arguments
# concatenated, and doesn't need a bitmap of NULLs.
my $fnv_func = uc $func;
$query .= "$fnv_func(" . join(', ', @cols) . ')';
}
MKDEBUG && _d('Row checksum:', $query);
return $query;
}
# Sub: make_chunk_checksum
# Make a SELECT column list to checksum a chunk of rows.
#
# Parameters:
# %args - Arguments
#
# Required Arguments:
# tbl - Table ref
# dbh - dbh if func, crc_width, and crc_type aren't given
#
# Optional Arguments:
# func - Hash function name
# crc_width - CRC width
# crc_type - CRC type
#
# Returns:
# Column list for SELECT
sub make_chunk_checksum {
my ( $self, %args ) = @_;
my @required_args = qw(tbl);
foreach my $arg( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
if ( !$args{dbh} && !($args{func} && $args{crc_width} && $args{crc_type}) ) {
die "I need a dbh argument"
}
my ($tbl) = @args{@required_args};
my $o = $self->{OptionParser};
my $q = $self->{Quoter};
my %crc_args = $self->get_crc_args(%args);
my $opt_slice;
if ( $o->get('optimize-xor') ) {
if ( $crc_args{crc_type} !~ m/int$/ ) {
$opt_slice = $self->_optimize_xor(%args, %crc_args);
warn "Cannot use --optimize-xor" unless defined $opt_slice;
}
}
MKDEBUG && _d("Checksum strat:", Dumper(\%crc_args));
# This checksum algorithm concatenates the columns in each row and
# checksums them, then slices this checksum up into 16-character chunks.
# It then converts them BIGINTs with the CONV() function, and then
# groupwise XORs them to produce an order-independent checksum of the
# slice over all the rows. It then converts these back to base 16 and
# puts them back together. The effect is the same as XORing a very wide
# (32 characters = 128 bits for MD5, and SHA1 is even larger) unsigned
# integer over all the rows.
#
# As a special case, integer functions do not need to be sliced. They
# can be fed right into BIT_XOR after a cast to UNSIGNED.
my $row_checksum = $self->make_row_checksum(
%args,
%crc_args,
no_cols => 1
);
my $crc;
if ( $crc_args{crc_type} =~ m/int$/ ) {
$crc = "COALESCE(LOWER(CONV(BIT_XOR(CAST($row_checksum AS UNSIGNED)), "
. "10, 16)), 0)";
}
else {
my $slices = $self->_make_xor_slices(
row_checksum => $row_checksum,
%crc_args,
);
$crc = "COALESCE(LOWER(CONCAT($slices)), 0)";
}
my $select = "COUNT(*) AS cnt, $crc AS crc";
MKDEBUG && _d('Chunk checksum:', $select);
return $select;
}
sub get_crc_args {
my ($self, %args) = @_;
my $func = $args{func} || $self->_get_hash_func(%args);
my $crc_width = $args{crc_width}|| $self->_get_crc_width(%args, func=>$func);
my $crc_type = $args{crc_type} || $self->_get_crc_type(%args, func=>$func);
return (
func => $func,
crc_width => $crc_width,
crc_type => $crc_type,
);
}
# Sub: _get_hash_func
# Get the fastest available hash function.
#
# Parameters:
# %args - Arguments
#
# Required Arguments:
# dbh - dbh
#
# Returns:
# Function name
sub _get_hash_func {
my ( $self, %args ) = @_;
my @required_args = qw(dbh);
foreach my $arg( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($dbh) = @args{@required_args};
my $o = $self->{OptionParser};
my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1);
if ( my $func = $o->get('function') ) {
unshift @funcs, $func;
}
my ($result, $error);
foreach my $func ( @funcs ) {
eval {
my $sql = "SELECT $func('test-string')";
MKDEBUG && _d($sql);
$args{dbh}->do($sql);
};
if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) {
$error .= qq{$func cannot be used because "$1"\n};
MKDEBUG && _d($func, 'cannot be used because', $1);
}
MKDEBUG && _d('Chosen hash func:', $result);
return $func;
}
die $error || 'No hash functions (CRC32, MD5, etc.) are available';
}
# Returns how wide/long, in characters, a CRC function is.
sub _get_crc_width {
my ( $self, %args ) = @_;
my @required_args = qw(dbh func);
foreach my $arg( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($dbh, $func) = @args{@required_args};
my $crc_width = 16;
if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
eval {
my ($val) = $dbh->selectrow_array("SELECT $func('a')");
$crc_width = max(16, length($val));
};
}
return $crc_width;
}
# Returns a CRC function's MySQL type.
sub _get_crc_type {
my ( $self, %args ) = @_;
my @required_args = qw(dbh func);
foreach my $arg( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($dbh, $func) = @args{@required_args};
my $type = '';
my $length = 0;
my $sql = "SELECT $func('a')";
my $sth = $dbh->prepare($sql);
eval {
$sth->execute();
$type = $sth->{mysql_type_name}->[0];
$length = $sth->{mysql_length}->[0];
MKDEBUG && _d($sql, $type, $length);
if ( $type eq 'bigint' && $length < 20 ) {
$type = 'int';
}
};
$sth->finish;
MKDEBUG && _d('crc_type:', $type, 'length:', $length);
return $type;
}
# Figure out which slice in a sliced BIT_XOR checksum should have the actual
# concat-columns-and-checksum, and which should just get variable references.
# Returns the slice. I'm really not sure if this code is needed. It always
# seems the last slice is the one that works. But I'd rather be paranoid.
# TODO: this function needs a hint to know when a function returns an
# integer. CRC32 is an example. In these cases no optimization or slicing
# is necessary.
sub _optimize_xor {
my ( $self, %args ) = @_;
my @required_args = qw(dbh func);
foreach my $arg( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($dbh, $func) = @args{@required_args};
die "$func never needs BIT_XOR optimization"
if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i;
my $opt_slice = 0;
my $unsliced = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0];
my $sliced = '';
my $start = 1;
my $crc_width = length($unsliced) < 16 ? 16 : length($unsliced);
do { # Try different positions till sliced result equals non-sliced.
MKDEBUG && _d('Trying slice', $opt_slice);
$dbh->do('SET @crc := "", @cnt := 0');
my $slices = $self->_make_xor_slices(
row_checksum => "\@crc := $func('a')",
crc_width => $crc_width,
opt_slice => $opt_slice,
);
my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x";
$sliced = ($dbh->selectrow_array($sql))[0];
if ( $sliced ne $unsliced ) {
MKDEBUG && _d('Slice', $opt_slice, 'does not work');
$start += 16;
++$opt_slice;
}
} while ( $start < $crc_width && $sliced ne $unsliced );
if ( $sliced eq $unsliced ) {
MKDEBUG && _d('Slice', $opt_slice, 'works');
return $opt_slice;
}
else {
MKDEBUG && _d('No slice works');
return undef;
}
}
# Sub: _make_xor_slices
# Make an expression that will do a bitwise XOR over a very wide integer,
# such as that returned by SHA1, which is too large to put into BIT_XOR().
# If an opt_slice is given, a variable is used to avoid calling row_checksum
# multiple times.
#
# Parameters:
# %args - Arguments
#
# Required Arguments:
# row_checksum - <make_row_checksum()> query
# crc_width - CRC width (<_get_crc_width()>
#
# Optional Arguments:
# opt_slice - Slice number. Use a variable to avoid calling row_checksum
# multiple times.
#
# Returns:
# SQL expression
sub _make_xor_slices {
my ( $self, %args ) = @_;
my @required_args = qw(row_checksum crc_width);
foreach my $arg( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($row_checksum, $crc_width) = @args{@required_args};
my ($opt_slice) = $args{opt_slice};
# Create a series of slices with @crc as a placeholder.
my @slices;
for ( my $start = 1; $start <= $crc_width; $start += 16 ) {
my $len = $crc_width - $start + 1;
if ( $len > 16 ) {
$len = 16;
}
push @slices,
"LPAD(CONV(BIT_XOR("
. "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))"
. ", 10, 16), $len, '0')";
}
# Replace the placeholder with the expression. If specified, add a
# user-variable optimization so the expression goes in only one of the
# slices. This optimization relies on @crc being '' when the query begins.
if ( defined $opt_slice && $opt_slice < @slices ) {
$slices[$opt_slice] =~ s/\@crc/\@crc := $row_checksum/;
}
else {
map { s/\@crc/$row_checksum/ } @slices;
}
return join(', ', @slices);
}
# Queries the replication table for chunks that differ from the master's data.
sub find_replication_differences {
my ( $self, $dbh, $table ) = @_;
(my $sql = <<" EOF") =~ s/\s+/ /gm;
SELECT db, tbl, chunk, boundaries,
COALESCE(this_cnt-master_cnt, 0) AS cnt_diff,
COALESCE(
this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc),
0
) AS crc_diff,
this_cnt, master_cnt, this_crc, master_crc
FROM $table
WHERE master_cnt <> this_cnt OR master_crc <> this_crc
OR ISNULL(master_crc) <> ISNULL(this_crc)
EOF
MKDEBUG && _d($sql);
my $diffs = $dbh->selectall_arrayref($sql, { Slice => {} });
return @$diffs;
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;
}
# ###########################################################################
# End RowChecksum package
# ###########################################################################

View File

@@ -209,9 +209,9 @@ sub next_schema_object {
if ( my $schema = $self->{Schema} ) {
$schema->add_schema_object($schema_obj);
}
MKDEBUG && _d('Next schema object:', $schema_obj->{db}, $schema_obj->{tbl});
}
MKDEBUG && _d('Next schema object:', $schema_obj->{db}, $schema_obj->{tbl});
return $schema_obj;
}

View File

@@ -65,28 +65,26 @@ sub generate_asc_stmt {
die "I need a $arg argument" unless defined $args{$arg};
}
my ($tbl_struct, $index) = @args{@required_args};
my @cols = $args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}};
my @cols = $args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}};
my $q = $self->{Quoter};
# This shouldn't happen. TableSyncNibble shouldn't call us with
# a nonexistent index.
die "Index '$index' does not exist in table"
unless exists $tbl_struct->{keys}->{$index};
my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
my @asc_slice;
MKDEBUG && _d('Will ascend index', $index);
# These are the columns we'll ascend.
@asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
MKDEBUG && _d('Will ascend index', $index);
MKDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}};
if ( $args{asc_first} ) {
@asc_cols = $asc_cols[0];
MKDEBUG && _d('Ascending only first column');
}
MKDEBUG && _d('Will ascend columns', join(', ', @asc_cols));
# We found the columns by name, now find their positions for use as
# array slices, and make sure they are included in the SELECT list.
my @asc_slice;
my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols };
foreach my $col ( @asc_cols ) {
if ( !exists $col_posn{$col} ) {

457
t/lib/NibbleIterator.t Normal file
View File

@@ -0,0 +1,457 @@
#!/usr/bin/perl
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More;
use Schema;
use SchemaIterator;
use Quoter;
use DSNParser;
use Sandbox;
use OptionParser;
use MySQLDump;
use TableParser;
use TableNibbler;
use RowChecksum;
use NibbleIterator;
use PerconaTest;
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Quotekeys = 0;
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $dbh = $sb->get_dbh_for('master');
if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
else {
plan tests => 20;
}
my $q = new Quoter();
my $tp = new TableParser(Quoter=>$q);
my $du = new MySQLDump();
my $nb = new TableNibbler(TableParser=>$tp, Quoter=>$q);
my $o = new OptionParser(description => 'NibbleIterator');
my $rc = new RowChecksum(OptionParser => $o, Quoter=>$q);
$o->get_specs("$trunk/bin/pt-table-checksum");
my %common_modules = (
Quoter => $q,
TableParser => $tp,
MySQLDump => $du,
TableNibbler => $nb,
OptionParser => $o,
);
my $in = "/t/lib/samples/NibbleIterator/";
sub make_nibble_iter {
my (%args) = @_;
if (my $file = $args{sql_file}) {
$sb->load_file('master', "$in/$file");
}
@ARGV = $args{argv} ? @{$args{argv}} : ();
$o->get_opts();
my $schema = new Schema();
my $si = new SchemaIterator(
dbh => $dbh,
keep_ddl => 1,
Schema => $schema,
%common_modules,
);
1 while $si->next_schema_object();
my $ni = new NibbleIterator(
dbh => $dbh,
tbl => $schema->get_table($args{db}, $args{tbl}),
callbacks => $args{callbacks},
select => $args{select},
%common_modules,
);
return $ni;
}
# ############################################################################
# a-z w/ chunk-size 5, z is final boundary and single value
# ############################################################################
my $ni = make_nibble_iter(
sql_file => "a-z.sql",
db => 'test',
tbl => 't',
argv => [qw(--databases test --chunk-size 5)],
);
my @rows = ();
for (1..5) {
push @rows, $ni->next();
}
is_deeply(
\@rows,
[['a'],['b'],['c'],['d'],['e']],
'a-z nibble 1'
) or print Dumper(\@rows);
@rows = ();
for (1..5) {
push @rows, $ni->next();
}
is_deeply(
\@rows,
[['f'],['g'],['h'],['i'],['j']],
'a-z nibble 2'
) or print Dumper(\@rows);
@rows = ();
for (1..5) {
push @rows, $ni->next();
}
is_deeply(
\@rows,
[['k'],['l'],['m'],['n'],['o']],
'a-z nibble 3'
) or print Dumper(\@rows);
@rows = ();
for (1..5) {
push @rows, $ni->next();
}
is_deeply(
\@rows,
[['p'],['q'],['r'],['s'],['t']],
'a-z nibble 4'
) or print Dumper(\@rows);
@rows = ();
for (1..5) {
push @rows, $ni->next();
}
is_deeply(
\@rows,
[['u'],['v'],['w'],['x'],['y']],
'a-z nibble 5'
) or print Dumper(\@rows);
# There's only 1 row left but extra calls shouldn't return anything or crash.
@rows = ();
for (1..5) {
push @rows, $ni->next();
}
is_deeply(
\@rows,
[['z']],
'a-z nibble 6'
) or print Dumper(\@rows);
# ############################################################################
# a-y w/ chunk-size 5, even nibbles
# ############################################################################
$dbh->do('delete from test.t where c="z"');
my $all_rows = $dbh->selectall_arrayref('select * from test.t order by c');
$ni = make_nibble_iter(
db => 'test',
tbl => 't',
argv => [qw(--databases test --chunk-size 5)],
);
@rows = ();
for (1..26) {
push @rows, $ni->next();
}
is_deeply(
\@rows,
$all_rows,
'a-y even nibble'
) or print Dumper(\@rows);
# ############################################################################
# chunk-size exceeds number of rows, 1 nibble
# ############################################################################
$ni = make_nibble_iter(
db => 'test',
tbl => 't',
argv => [qw(--databases test --chunk-size 100)],
);
@rows = ();
for (1..27) {
push @rows, $ni->next();
}
is_deeply(
\@rows,
$all_rows,
'1 nibble'
) or print Dumper(\@rows);
# ############################################################################
# single row table
# ############################################################################
$dbh->do('delete from test.t where c != "d"');
$ni = make_nibble_iter(
db => 'test',
tbl => 't',
argv => [qw(--databases test --chunk-size 100)],
);
@rows = ();
for (1..3) {
push @rows, $ni->next();
}
is_deeply(
\@rows,
[['d']],
'single row table'
) or print Dumper(\@rows);
# ############################################################################
# empty table
# ############################################################################
$dbh->do('truncate table test.t');
$ni = make_nibble_iter(
db => 'test',
tbl => 't',
argv => [qw(--databases test --chunk-size 100)],
);
@rows = ();
for (1..3) {
push @rows, $ni->next();
}
is_deeply(
\@rows,
[],
'empty table'
) or print Dumper(\@rows);
# ############################################################################
# Callbacks
# ############################################################################
$ni = make_nibble_iter(
sql_file => "a-z.sql",
db => 'test',
tbl => 't',
argv => [qw(--databases test --chunk-size 2)],
callbacks => {
init => sub { print "init\n" },
after_nibble => sub { print "after nibble ".$ni->nibble_number()."\n" },
done => sub { print "done\n" },
}
);
$dbh->do('delete from test.t limit 20'); # 6 rows left
my $output = output(
sub {
for (1..8) { $ni->next() }
},
);
is(
$output,
"init
after nibble 1
after nibble 2
after nibble 3
done
done
",
"callbacks"
);
# ############################################################################
# Nibble a larger table by numeric pk id
# ############################################################################
SKIP: {
skip "Sakila database is not loaded", 8
unless @{ $dbh->selectall_arrayref('show databases like "sakila"') };
$ni = make_nibble_iter(
db => 'sakila',
tbl => 'payment',
argv => [qw(--databases sakila --tables payment --chunk-size 100)],
);
my $n_nibbles = 0;
$n_nibbles++ while $ni->next();
is(
$n_nibbles,
16049,
"Nibble sakila.payment (16049 rows)"
);
my $tbl = {
db => 'sakila',
tbl => 'country',
tbl_struct => $tp->parse(
$du->get_create_table($dbh, $q, 'sakila', 'country')),
};
my $chunk_checksum = $rc->make_chunk_checksum(
dbh => $dbh,
tbl => $tbl,
);
$ni = make_nibble_iter(
db => 'sakila',
tbl => 'country',
argv => [qw(--databases sakila --tables country --chunk-size 25)],
select => $chunk_checksum,
);
my $row = $ni->next();
is_deeply(
$row,
[25, 'da79784d'],
"SELECT chunk checksum 1 FROM sakila.country"
) or print STDERR Dumper($row);
$row = $ni->next();
is_deeply(
$row,
[25, 'e860c4f9'],
"SELECT chunk checksum 2 FROM sakila.country"
) or print STDERR Dumper($row);
$row = $ni->next();
is_deeply(
$row,
[25, 'eb651f58'],
"SELECT chunk checksum 3 FROM sakila.country"
) or print STDERR Dumper($row);
$row = $ni->next();
is_deeply(
$row,
[25, '2d87d588'],
"SELECT chunk checksum 4 FROM sakila.country"
) or print STDERR Dumper($row);
$row = $ni->next();
is_deeply(
$row,
[9, 'beb4a180'],
"SELECT chunk checksum 5 FROM sakila.country"
) or print STDERR Dumper($row);
# #########################################################################
# exec_nibble callback and explain_sth
# #########################################################################
my @expl;
$ni = make_nibble_iter(
db => 'sakila',
tbl => 'country',
argv => [qw(--databases sakila --tables country --chunk-size 60)],
select => $chunk_checksum,
callbacks => {
exec_nibble => sub {
my (%args) = @_;
my ($expl_sth, $lb, $ub) = @args{qw(explain_sth lb ub)};
$expl_sth->execute(@$lb, @$ub);
push @expl, $expl_sth->fetchrow_hashref();
return 0;
},
}
);
$ni->next();
$ni->next();
is_deeply(
\@expl,
[
{
id => '1',
key => 'PRIMARY',
key_len => '2',
possible_keys => 'PRIMARY',
ref => undef,
rows => '54',
select_type => 'SIMPLE',
table => 'country',
type => 'range',
extra => 'Using where',
},
{
id => '1',
key => 'PRIMARY',
key_len => '2',
possible_keys => 'PRIMARY',
ref => undef,
rows => '49',
select_type => 'SIMPLE',
table => 'country',
type => 'range',
extra => 'Using where',
},
],
'exec_nibble callbackup and explain_sth'
);
# #########################################################################
# film_actor, multi-column pk
# #########################################################################
$ni = make_nibble_iter(
db => 'sakila',
tbl => 'film_actor',
argv => [qw(--tables sakila.film_actor --chunk-size 1000)],
);
$n_nibbles = 0;
$n_nibbles++ while $ni->next();
is(
$n_nibbles,
5462,
"Nibble sakila.film_actor (multi-column pk)"
);
}
# ############################################################################
# Reset chunk size on-the-fly.
# ############################################################################
$ni = make_nibble_iter(
sql_file => "a-z.sql",
db => 'test',
tbl => 't',
argv => [qw(--databases test --chunk-size 5)],
);
@rows = ();
my $i = 0;
while (my $row = $ni->next()) {
push @{$rows[$ni->nibble_number()]}, @$row;
if ( ++$i == 5 ) {
$ni->set_chunk_size(20);
}
}
is_deeply(
\@rows,
[
undef, # no 0 nibble
[ ('a'..'e') ], # nibble 1
[ ('f'..'y') ], # nibble 2, should contain 20 chars
[ 'z' ], # last nibble
],
"Change chunk size while nibbling"
) or print STDERR Dumper(\@rows);
# #############################################################################
# Done.
# #############################################################################
$sb->wipe_clean($dbh);
exit;

417
t/lib/RowChecksum.t Normal file
View File

@@ -0,0 +1,417 @@
#!/usr/bin/perl
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More;
use RowChecksum;
use TableParser;
use Quoter;
use MySQLDump;
use DSNParser;
use OptionParser;
use Sandbox;
use PerconaTest;
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $dbh = $sb->get_dbh_for('master');
if ( !$dbh ) {
plan skip_all => "Cannot connect to sandbox master";
}
else {
plan tests => 28;
}
$sb->create_dbs($dbh, ['test']);
my $q = new Quoter();
my $tp = new TableParser(Quoter => $q);
my $du = new MySQLDump();
my $o = new OptionParser(description => 'NibbleIterator');
$o->get_specs("$trunk/bin/pt-table-checksum");
my $c = new RowChecksum(
OptionParser => $o,
Quoter => $q,
);
# ############################################################################
# _make_xor_slices
# ############################################################################
is(
$c->_make_xor_slices(
row_checksum => 'FOO',
crc_width => 1,
),
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 1), 16, 10) "
. "AS UNSIGNED)), 10, 16), 1, '0')",
'FOO XOR slices 1 wide',
);
is(
$c->_make_xor_slices(
row_checksum => 'FOO',
crc_width => 16,
),
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
. "AS UNSIGNED)), 10, 16), 16, '0')",
'FOO XOR slices 16 wide',
);
is(
$c->_make_xor_slices(
row_checksum => 'FOO',
crc_width => 17,
),
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
. "AS UNSIGNED)), 10, 16), 16, '0'), "
. "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 17, 1), 16, 10) "
. "AS UNSIGNED)), 10, 16), 1, '0')",
'FOO XOR slices 17 wide',
);
is(
$c->_make_xor_slices(
row_checksum => 'FOO',
crc_width => 32,
),
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
. "AS UNSIGNED)), 10, 16), 16, '0'), "
. "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 17, 16), 16, 10) "
. "AS UNSIGNED)), 10, 16), 16, '0')",
'FOO XOR slices 32 wide',
);
is(
$c->_make_xor_slices(
row_checksum => 'FOO',
crc_width => 32,
opt_slice => 0,
),
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc := FOO, 1, 16), 16, 10) "
. "AS UNSIGNED)), 10, 16), 16, '0'), "
. "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc, 17, 16), 16, 10) "
. "AS UNSIGNED)), 10, 16), 16, '0')",
'XOR slice optimized in slice 0',
);
is(
$c->_make_xor_slices(
row_checksum => 'FOO',
crc_width => 32,
opt_slice => 1,
),
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc, 1, 16), 16, 10) "
. "AS UNSIGNED)), 10, 16), 16, '0'), "
. "LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc := FOO, 17, 16), 16, 10) "
. "AS UNSIGNED)), 10, 16), 16, '0')",
'XOR slice optimized in slice 1',
);
# ############################################################################
# make_row_checksum
# ############################################################################
my $tbl = {
db => 'sakila',
tbl => 'film',
tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.film.sql')),
};
is(
$c->make_row_checksum(
tbl => $tbl,
func => 'SHA1',
),
q{`film_id`, `title`, `description`, `release_year`, `language_id`, `original_language_id`, `rental_duration`, `rental_rate`, `length`, `replacement_cost`, `rating`, `special_features`, `last_update` + 0 AS `last_update`, }
. q{SHA1(CONCAT_WS('#', }
. q{`film_id`, `title`, `description`, `release_year`, `language_id`, }
. q{`original_language_id`, `rental_duration`, `rental_rate`, `length`, }
. q{`replacement_cost`, `rating`, `special_features`, `last_update` + 0, }
. q{CONCAT(ISNULL(`description`), ISNULL(`release_year`), }
. q{ISNULL(`original_language_id`), ISNULL(`length`), }
. q{ISNULL(`rating`), ISNULL(`special_features`))))},
'SHA1 query for sakila.film',
);
is(
$c->make_row_checksum(
tbl => $tbl,
func => 'FNV_64',
),
q{`film_id`, `title`, `description`, `release_year`, `language_id`, `original_language_id`, `rental_duration`, `rental_rate`, `length`, `replacement_cost`, `rating`, `special_features`, `last_update` + 0 AS `last_update`, }
. q{FNV_64(}
. q{`film_id`, `title`, `description`, `release_year`, `language_id`, }
. q{`original_language_id`, `rental_duration`, `rental_rate`, `length`, }
. q{`replacement_cost`, `rating`, `special_features`, `last_update` + 0)},
'FNV_64 query for sakila.film',
);
is(
$c->make_row_checksum(
tbl => $tbl,
func => 'SHA1',
cols => [qw(film_id)],
),
q{`film_id`, SHA1(`film_id`)},
'SHA1 query for sakila.film with only one column',
);
is(
$c->make_row_checksum(
tbl => $tbl,
func => 'SHA1',
cols => [qw(FILM_ID)],
),
q{`film_id`, SHA1(`film_id`)},
'Column names are case-insensitive',
);
is(
$c->make_row_checksum(
tbl => $tbl,
func => 'SHA1',
cols => [qw(film_id title)],
sep => '%',
),
q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
'Separator',
);
is(
$c->make_row_checksum(
tbl => $tbl,
func => 'SHA1',
cols => [qw(film_id title)],
sep => "'%'",
),
q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
'Bad separator',
);
is(
$c->make_row_checksum(
tbl => $tbl,
func => 'SHA1',
cols => [qw(film_id title)],
sep => "'''",
),
q{`film_id`, `title`, SHA1(CONCAT_WS('#', `film_id`, `title`))},
'Really bad separator',
);
# sakila.rental
$tbl = {
db => 'sakila',
tbl => 'rental',
tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.rental.float.sql')),
};
is(
$c->make_row_checksum(
tbl => $tbl,
func => 'SHA1',
),
q{`rental_id`, `foo`, SHA1(CONCAT_WS('#', `rental_id`, `foo`))},
'FLOAT column is like any other',
);
is(
$c->make_row_checksum(
tbl => $tbl,
func => 'SHA1',
float_precision => 5,
),
q{`rental_id`, ROUND(`foo`, 5), SHA1(CONCAT_WS('#', `rental_id`, ROUND(`foo`, 5)))},
'FLOAT column is rounded to 5 places',
);
# sakila.film
$tbl = {
db => 'sakila',
tbl => 'film',
tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.film.sql')),
};
like(
$c->make_row_checksum(
tbl => $tbl,
func => 'SHA1',
trim => 1,
),
qr{TRIM\(`title`\)},
'VARCHAR column is trimmed',
);
# ############################################################################
# make_chunk_checksum
# ############################################################################
is(
$c->make_chunk_checksum(
tbl => $tbl,
func => 'SHA1',
crc_width=> 40,
cols => [qw(film_id)],
crc_type => 'varchar',
),
q{COUNT(*) AS cnt, }
. q{COALESCE(LOWER(CONCAT(LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 1, }
. q{16), 16, 10) AS UNSIGNED)), 10, 16), 16, '0'), }
. q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 17, 16), 16, }
. q{10) AS UNSIGNED)), 10, 16), 16, '0'), }
. q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 33, 8), 16, }
. q{10) AS UNSIGNED)), 10, 16), 8, '0'))), 0) AS crc},
'sakila.film SHA1',
);
is(
$c->make_chunk_checksum(
tbl => $tbl,
func => 'FNV_64',
crc_width=> 99,
cols => [qw(film_id)],
crc_type => 'bigint',
),
q{COUNT(*) AS cnt, }
. q{COALESCE(LOWER(CONV(BIT_XOR(CAST(FNV_64(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc},
'sakila.film FNV_64',
);
is(
$c->make_chunk_checksum(
tbl => $tbl,
func => 'FNV_64',
crc_width=> 99,
cols => [qw(film_id)],
buffer => 1,
crc_type => 'bigint',
),
q{COUNT(*) AS cnt, }
. q{COALESCE(LOWER(CONV(BIT_XOR(CAST(FNV_64(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc},
'sakila.film FNV_64',
);
is(
$c->make_chunk_checksum(
tbl => $tbl,
func => 'CRC32',
crc_width=> 99,
cols => [qw(film_id)],
buffer => 1,
crc_type => 'int',
),
q{COUNT(*) AS cnt, }
. q{COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc},
'sakila.film CRC32',
);
# #############################################################################
# Sandbox tests.
# #############################################################################
like(
$c->_get_hash_func(
dbh => $dbh,
),
qr/CRC32|FNV_64|MD5/,
'CRC32, FNV_64 or MD5 is default',
);
like(
$c->_get_hash_func(
dbh => $dbh,
func => 'SHA99',
),
qr/CRC32|FNV_64|MD5/,
'SHA99 does not exist so I get CRC32 or friends',
);
@ARGV = qw(--function MD5);
$o->get_opts();
is(
$c->_get_hash_func(
dbh => $dbh,
func => 'MD5',
),
'MD5',
'MD5 requested and MD5 granted',
);
@ARGV = qw();
$o->get_opts();
is(
$c->_optimize_xor(
dbh => $dbh,
func => 'SHA1',
),
'2',
'SHA1 slice is 2',
);
is(
$c->_optimize_xor(
dbh => $dbh,
func => 'MD5',
),
'1',
'MD5 slice is 1',
);
is(
$c->_get_crc_type(
dbh => $dbh,
func => 'CRC32',
),
'int',
'CRC32 type'
);
is(
$c->_get_crc_type(
dbh => $dbh,
func => 'MD5',
),
'varchar',
'MD5 type'
);
# #############################################################################
# Issue 94: Enhance mk-table-checksum, add a --ignorecols option
# #############################################################################
$sb->load_file('master', 't/lib/samples/issue_94.sql');
$tbl = {
db => 'test',
tbl => 'issue_94',
tbl_struct => $tp->parse($du->get_create_table($dbh, $q, 'test', 'issue_94')),
};
my $query = $c->make_chunk_checksum(
tbl => $tbl,
func => 'CRC32',
crc_width => 16,
crc_type => 'int',
opt_slice => undef,
cols => undef,
sep => '#',
replicate => undef,
precision => undef,
trim => undef,
ignorecols => {'c'=>1},
);
is(
$query,
"COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `a`, `b`)) AS UNSIGNED)), 10, 16)), 0) AS crc",
'Ignores specified columns'
);
# ############################################################################
# Done.
# ############################################################################
$sb->wipe_clean($dbh);
exit;

View File

@@ -0,0 +1,10 @@
DROP DATABASE IF EXISTS test;
CREATE DATABASE test;
USE test;
CREATE TABLE t (
c varchar(16) not null,
index (c)
);
INSERT INTO t VALUES ('a'), ('b'), ('c'), ('d'), ('e'), ('f'), ('g'), ('h'), ('i'), ('j'), ('k'), ('l'), ('m'), ('n'), ('o'), ('p'), ('q'), ('r'), ('s'), ('t'), ('u'), ('v'), ('w'), ('x'), ('y'), ('z');