mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-14 07:06:28 +00:00
Refactor TableChecksum for NibbleIterator (use only BIT_XOR, return just column list, etc.).
This commit is contained in:
@@ -104,8 +104,9 @@ sub new {
|
|||||||
MKDEBUG && _d('Next upper boundary statement:', $ub_sql);
|
MKDEBUG && _d('Next upper boundary statement:', $ub_sql);
|
||||||
|
|
||||||
my $nibble_sql
|
my $nibble_sql
|
||||||
= "SELECT /*!40001 SQL_NO_CACHE */ "
|
= "SELECT "
|
||||||
. join(', ', map { $q->quote($_) } @{$asc->{cols}})
|
. ($args{select} ? $args{select}
|
||||||
|
: join(', ', map { $q->quote($_) } @{$asc->{cols}}))
|
||||||
. " $from "
|
. " $from "
|
||||||
. " WHERE " . $asc->{boundaries}->{'>='} # lower boundary
|
. " WHERE " . $asc->{boundaries}->{'>='} # lower boundary
|
||||||
. " AND " . $asc->{boundaries}->{'<='} # upper boundary
|
. " AND " . $asc->{boundaries}->{'<='} # upper boundary
|
||||||
|
@@ -19,7 +19,7 @@
|
|||||||
# ###########################################################################
|
# ###########################################################################
|
||||||
{
|
{
|
||||||
# Package: TableChecksum
|
# Package: TableChecksum
|
||||||
# TableChecksum checksums tables.
|
# TableChecksum makes checksum expressions for checksumming rows and chunks.
|
||||||
package TableChecksum;
|
package TableChecksum;
|
||||||
|
|
||||||
use strict;
|
use strict;
|
||||||
@@ -29,261 +29,44 @@ use constant MKDEBUG => $ENV{MKDEBUG} || 0;
|
|||||||
|
|
||||||
use List::Util qw(max);
|
use List::Util qw(max);
|
||||||
|
|
||||||
# BXT_XOR is actually faster than ACCUM as long as the user-variable
|
|
||||||
# optimization can be used. I've never seen a case where it can't be.
|
|
||||||
our %ALGOS = (
|
|
||||||
CHECKSUM => { pref => 0, hash => 0 },
|
|
||||||
BIT_XOR => { pref => 2, hash => 1 },
|
|
||||||
ACCUM => { pref => 3, hash => 1 },
|
|
||||||
);
|
|
||||||
|
|
||||||
sub new {
|
sub new {
|
||||||
my ( $class, %args ) = @_;
|
my ( $class, %args ) = @_;
|
||||||
foreach my $arg ( qw(Quoter VersionParser) ) {
|
foreach my $arg ( qw(OptionParser Quoter) ) {
|
||||||
die "I need a $arg argument" unless defined $args{$arg};
|
die "I need a $arg argument" unless defined $args{$arg};
|
||||||
}
|
}
|
||||||
my $self = { %args };
|
my $self = { %args };
|
||||||
return bless $self, $class;
|
return bless $self, $class;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Perl implementation of CRC32, ripped off from Digest::Crc32. The results
|
# Sub: make_row_checksum
|
||||||
# ought to match what you get from any standard CRC32 implementation, such as
|
# Make a SELECT column list to checksum a row.
|
||||||
# that inside MySQL.
|
#
|
||||||
sub crc32 {
|
# Parameters:
|
||||||
my ( $self, $string ) = @_;
|
# %args - Arguments
|
||||||
my $poly = 0xEDB88320;
|
#
|
||||||
my $crc = 0xFFFFFFFF;
|
# Required Arguments:
|
||||||
foreach my $char ( split(//, $string) ) {
|
# tbl - Table ref
|
||||||
my $comp = ($crc ^ ord($char)) & 0xFF;
|
#
|
||||||
for ( 1 .. 8 ) {
|
# Optional Arguments:
|
||||||
$comp = $comp & 1 ? $poly ^ ($comp >> 1) : $comp >> 1;
|
# sep - Separator for CONCAT_WS(); default #
|
||||||
}
|
# cols - Arrayref of columns to checksum
|
||||||
$crc = (($crc >> 8) & 0x00FFFFFF) ^ $comp;
|
# trim - Wrap VARCHAR cols in TRIM() for v4/v5 compatibility
|
||||||
}
|
# ignorecols - Arrayref of columns to exclude from checksum
|
||||||
return $crc ^ 0xFFFFFFFF;
|
#
|
||||||
}
|
# Returns:
|
||||||
|
# Column list for SELECT
|
||||||
# Returns how wide/long, in characters, a CRC function is.
|
|
||||||
sub get_crc_wid {
|
|
||||||
my ( $self, $dbh, $func ) = @_;
|
|
||||||
my $crc_wid = 16;
|
|
||||||
if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
|
|
||||||
eval {
|
|
||||||
my ($val) = $dbh->selectrow_array("SELECT $func('a')");
|
|
||||||
$crc_wid = max(16, length($val));
|
|
||||||
};
|
|
||||||
}
|
|
||||||
return $crc_wid;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Returns a CRC function's MySQL type as a list of (type, length).
|
|
||||||
sub get_crc_type {
|
|
||||||
my ( $self, $dbh, $func ) = @_;
|
|
||||||
my $type = '';
|
|
||||||
my $length = 0;
|
|
||||||
my $sql = "SELECT $func('a')";
|
|
||||||
my $sth = $dbh->prepare($sql);
|
|
||||||
eval {
|
|
||||||
$sth->execute();
|
|
||||||
$type = $sth->{mysql_type_name}->[0];
|
|
||||||
$length = $sth->{mysql_length}->[0];
|
|
||||||
MKDEBUG && _d($sql, $type, $length);
|
|
||||||
if ( $type eq 'bigint' && $length < 20 ) {
|
|
||||||
$type = 'int';
|
|
||||||
}
|
|
||||||
};
|
|
||||||
$sth->finish;
|
|
||||||
MKDEBUG && _d('crc_type:', $type, 'length:', $length);
|
|
||||||
return ($type, $length);
|
|
||||||
}
|
|
||||||
|
|
||||||
# Arguments:
|
|
||||||
# algorithm (optional) One of CHECKSUM, ACCUM, BIT_XOR
|
|
||||||
# dbh DB handle
|
|
||||||
# where bool: whether user wants a WHERE clause applied
|
|
||||||
# chunk bool: whether user wants to checksum in chunks
|
|
||||||
# replicate bool: whether user wants to do via replication
|
|
||||||
# count bool: whether user wants a row count too
|
|
||||||
sub best_algorithm {
|
|
||||||
my ( $self, %args ) = @_;
|
|
||||||
my ( $alg, $dbh ) = @args{ qw(algorithm dbh) };
|
|
||||||
my $vp = $self->{VersionParser};
|
|
||||||
my @choices = sort { $ALGOS{$a}->{pref} <=> $ALGOS{$b}->{pref} } keys %ALGOS;
|
|
||||||
die "Invalid checksum algorithm $alg"
|
|
||||||
if $alg && !$ALGOS{$alg};
|
|
||||||
|
|
||||||
# CHECKSUM is eliminated by lots of things...
|
|
||||||
if (
|
|
||||||
$args{where} || $args{chunk} # CHECKSUM does whole table
|
|
||||||
|| $args{replicate} # CHECKSUM can't do INSERT.. SELECT
|
|
||||||
|| !$vp->version_ge($dbh, '4.1.1')) # CHECKSUM doesn't exist
|
|
||||||
{
|
|
||||||
MKDEBUG && _d('Cannot use CHECKSUM algorithm');
|
|
||||||
@choices = grep { $_ ne 'CHECKSUM' } @choices;
|
|
||||||
}
|
|
||||||
|
|
||||||
# BIT_XOR isn't available till 4.1.1 either
|
|
||||||
if ( !$vp->version_ge($dbh, '4.1.1') ) {
|
|
||||||
MKDEBUG && _d('Cannot use BIT_XOR algorithm because MySQL < 4.1.1');
|
|
||||||
@choices = grep { $_ ne 'BIT_XOR' } @choices;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Choose the best (fastest) among the remaining choices.
|
|
||||||
if ( $alg && grep { $_ eq $alg } @choices ) {
|
|
||||||
# Honor explicit choices.
|
|
||||||
MKDEBUG && _d('User requested', $alg, 'algorithm');
|
|
||||||
return $alg;
|
|
||||||
}
|
|
||||||
|
|
||||||
# If the user wants a count, prefer something other than CHECKSUM, because it
|
|
||||||
# requires an extra query for the count.
|
|
||||||
if ( $args{count} && grep { $_ ne 'CHECKSUM' } @choices ) {
|
|
||||||
MKDEBUG && _d('Not using CHECKSUM algorithm because COUNT desired');
|
|
||||||
@choices = grep { $_ ne 'CHECKSUM' } @choices;
|
|
||||||
}
|
|
||||||
|
|
||||||
MKDEBUG && _d('Algorithms, in order:', @choices);
|
|
||||||
return $choices[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
sub is_hash_algorithm {
|
|
||||||
my ( $self, $algorithm ) = @_;
|
|
||||||
return $ALGOS{$algorithm} && $ALGOS{$algorithm}->{hash};
|
|
||||||
}
|
|
||||||
|
|
||||||
# Picks a hash function, in order of speed.
|
|
||||||
# Arguments:
|
|
||||||
# * dbh
|
|
||||||
# * function (optional) Preferred function: SHA1, MD5, etc.
|
|
||||||
sub choose_hash_func {
|
|
||||||
my ( $self, %args ) = @_;
|
|
||||||
my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1);
|
|
||||||
if ( $args{function} ) {
|
|
||||||
unshift @funcs, $args{function};
|
|
||||||
}
|
|
||||||
my ($result, $error);
|
|
||||||
do {
|
|
||||||
my $func;
|
|
||||||
eval {
|
|
||||||
$func = shift(@funcs);
|
|
||||||
my $sql = "SELECT $func('test-string')";
|
|
||||||
MKDEBUG && _d($sql);
|
|
||||||
$args{dbh}->do($sql);
|
|
||||||
$result = $func;
|
|
||||||
};
|
|
||||||
if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) {
|
|
||||||
$error .= qq{$func cannot be used because "$1"\n};
|
|
||||||
MKDEBUG && _d($func, 'cannot be used because', $1);
|
|
||||||
}
|
|
||||||
} while ( @funcs && !$result );
|
|
||||||
|
|
||||||
die $error unless $result;
|
|
||||||
MKDEBUG && _d('Chosen hash func:', $result);
|
|
||||||
return $result;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Figure out which slice in a sliced BIT_XOR checksum should have the actual
|
|
||||||
# concat-columns-and-checksum, and which should just get variable references.
|
|
||||||
# Returns the slice. I'm really not sure if this code is needed. It always
|
|
||||||
# seems the last slice is the one that works. But I'd rather be paranoid.
|
|
||||||
# TODO: this function needs a hint to know when a function returns an
|
|
||||||
# integer. CRC32 is an example. In these cases no optimization or slicing
|
|
||||||
# is necessary.
|
|
||||||
sub optimize_xor {
|
|
||||||
my ( $self, %args ) = @_;
|
|
||||||
my ($dbh, $func) = @args{qw(dbh function)};
|
|
||||||
|
|
||||||
die "$func never needs the BIT_XOR optimization"
|
|
||||||
if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i;
|
|
||||||
|
|
||||||
my $opt_slice = 0;
|
|
||||||
my $unsliced = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0];
|
|
||||||
my $sliced = '';
|
|
||||||
my $start = 1;
|
|
||||||
my $crc_wid = length($unsliced) < 16 ? 16 : length($unsliced);
|
|
||||||
|
|
||||||
do { # Try different positions till sliced result equals non-sliced.
|
|
||||||
MKDEBUG && _d('Trying slice', $opt_slice);
|
|
||||||
$dbh->do('SET @crc := "", @cnt := 0');
|
|
||||||
my $slices = $self->make_xor_slices(
|
|
||||||
query => "\@crc := $func('a')",
|
|
||||||
crc_wid => $crc_wid,
|
|
||||||
opt_slice => $opt_slice,
|
|
||||||
);
|
|
||||||
|
|
||||||
my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x";
|
|
||||||
$sliced = ($dbh->selectrow_array($sql))[0];
|
|
||||||
if ( $sliced ne $unsliced ) {
|
|
||||||
MKDEBUG && _d('Slice', $opt_slice, 'does not work');
|
|
||||||
$start += 16;
|
|
||||||
++$opt_slice;
|
|
||||||
}
|
|
||||||
} while ( $start < $crc_wid && $sliced ne $unsliced );
|
|
||||||
|
|
||||||
if ( $sliced eq $unsliced ) {
|
|
||||||
MKDEBUG && _d('Slice', $opt_slice, 'works');
|
|
||||||
return $opt_slice;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
MKDEBUG && _d('No slice works');
|
|
||||||
return undef;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Returns an expression that will do a bitwise XOR over a very wide integer,
|
|
||||||
# such as that returned by SHA1, which is too large to just put into BIT_XOR().
|
|
||||||
# $query is an expression that returns a row's checksum, $crc_wid is the width
|
|
||||||
# of that expression in characters. If the opt_slice argument is given, use a
|
|
||||||
# variable to avoid calling the $query expression multiple times. The variable
|
|
||||||
# goes in slice $opt_slice.
|
|
||||||
# Arguments:
|
|
||||||
# * query
|
|
||||||
# * crc_wid
|
|
||||||
# * opt_slice (optional)
|
|
||||||
sub make_xor_slices {
|
|
||||||
my ( $self, %args ) = @_;
|
|
||||||
foreach my $arg ( qw(query crc_wid) ) {
|
|
||||||
die "I need a $arg argument" unless defined $args{$arg};
|
|
||||||
}
|
|
||||||
my ( $query, $crc_wid, $opt_slice ) = @args{qw(query crc_wid opt_slice)};
|
|
||||||
|
|
||||||
# Create a series of slices with @crc as a placeholder.
|
|
||||||
my @slices;
|
|
||||||
for ( my $start = 1; $start <= $crc_wid; $start += 16 ) {
|
|
||||||
my $len = $crc_wid - $start + 1;
|
|
||||||
if ( $len > 16 ) {
|
|
||||||
$len = 16;
|
|
||||||
}
|
|
||||||
push @slices,
|
|
||||||
"LPAD(CONV(BIT_XOR("
|
|
||||||
. "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))"
|
|
||||||
. ", 10, 16), $len, '0')";
|
|
||||||
}
|
|
||||||
|
|
||||||
# Replace the placeholder with the expression. If specified, add a
|
|
||||||
# user-variable optimization so the expression goes in only one of the
|
|
||||||
# slices. This optimization relies on @crc being '' when the query begins.
|
|
||||||
if ( defined $opt_slice && $opt_slice < @slices ) {
|
|
||||||
$slices[$opt_slice] =~ s/\@crc/\@crc := $query/;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
map { s/\@crc/$query/ } @slices;
|
|
||||||
}
|
|
||||||
|
|
||||||
return join(', ', @slices);
|
|
||||||
}
|
|
||||||
|
|
||||||
# Generates a checksum query for a given table. Arguments:
|
|
||||||
# * tbl_struct Struct as returned by TableParser::parse()
|
|
||||||
# * function SHA1, MD5, etc
|
|
||||||
# * sep (optional) Separator for CONCAT_WS(); default #
|
|
||||||
# * cols (optional) arrayref of columns to checksum
|
|
||||||
# * trim (optional) wrap VARCHAR cols in TRIM() for v4/v5 compatibility
|
|
||||||
# * ignorecols (optional) arrayref of columns to exclude from checksum
|
|
||||||
sub make_row_checksum {
|
sub make_row_checksum {
|
||||||
my ( $self, %args ) = @_;
|
my ( $self, %args ) = @_;
|
||||||
my ( $tbl_struct, $func ) = @args{ qw(tbl_struct function) };
|
my @required_args = qw(tbl);
|
||||||
my $q = $self->{Quoter};
|
foreach my $arg( @required_args ) {
|
||||||
|
die "I need a $arg argument" unless $args{$arg};
|
||||||
|
}
|
||||||
|
my ($tbl) = @args{@required_args};
|
||||||
|
|
||||||
|
my $o = $self->{OptionParser};
|
||||||
|
my $q = $self->{Quoter};
|
||||||
|
my $tbl_struct = $tbl->{tbl_struct};
|
||||||
|
my $func = $args{func} || uc($o->get('function'));
|
||||||
|
|
||||||
my $sep = $args{sep} || '#';
|
my $sep = $args{sep} || '#';
|
||||||
$sep =~ s/'//g;
|
$sep =~ s/'//g;
|
||||||
@@ -321,7 +104,7 @@ sub make_row_checksum {
|
|||||||
|
|
||||||
# Prepend columns to query, resulting in "col1, col2, FUNC(..col1, col2...)",
|
# Prepend columns to query, resulting in "col1, col2, FUNC(..col1, col2...)",
|
||||||
# unless caller says not to. The only caller that says not to is
|
# unless caller says not to. The only caller that says not to is
|
||||||
# make_checksum_query() which uses this row checksum as part of a larger
|
# make_chunk_checksum() which uses this row checksum as part of a larger
|
||||||
# checksum. Other callers, like TableSyncer::make_checksum_queries() call
|
# checksum. Other callers, like TableSyncer::make_checksum_queries() call
|
||||||
# this sub directly and want the actual columns.
|
# this sub directly and want the actual columns.
|
||||||
my $query;
|
my $query;
|
||||||
@@ -368,102 +151,270 @@ sub make_row_checksum {
|
|||||||
return $query;
|
return $query;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Generates a checksum query for a given table. Arguments:
|
# Sub: make_chunk_checksum
|
||||||
# * db Database name
|
# Make a SELECT column list to checksum a chunk of rows.
|
||||||
# * tbl Table name
|
#
|
||||||
# * tbl_struct Struct as returned by TableParser::parse()
|
# Parameters:
|
||||||
# * algorithm Any of @ALGOS
|
# %args - Arguments
|
||||||
# * function (optional) SHA1, MD5, etc
|
#
|
||||||
# * crc_wid Width of the string returned by function
|
# Required Arguments:
|
||||||
# * crc_type Type of function's result
|
# tbl - Table ref
|
||||||
# * opt_slice (optional) Which slice gets opt_xor (see make_xor_slices()).
|
# dbh - dbh if func, crc_width, and crc_type aren't given
|
||||||
# * cols (optional) see make_row_checksum()
|
#
|
||||||
# * sep (optional) see make_row_checksum()
|
# Optional Arguments:
|
||||||
# * replicate (optional) generate query to REPLACE into this table.
|
# func - Hash function name
|
||||||
# * trim (optional) see make_row_checksum().
|
# crc_width - CRC width
|
||||||
# * buffer (optional) Adds SQL_BUFFER_RESULT.
|
# crc_type - CRC type
|
||||||
sub make_checksum_query {
|
#
|
||||||
|
# Returns:
|
||||||
|
# Column list for SELECT
|
||||||
|
sub make_chunk_checksum {
|
||||||
my ( $self, %args ) = @_;
|
my ( $self, %args ) = @_;
|
||||||
my @required_args = qw(db tbl tbl_struct algorithm crc_wid crc_type);
|
my @required_args = qw(tbl);
|
||||||
foreach my $arg( @required_args ) {
|
foreach my $arg( @required_args ) {
|
||||||
die "I need a $arg argument" unless $args{$arg};
|
die "I need a $arg argument" unless $args{$arg};
|
||||||
}
|
}
|
||||||
my ( $db, $tbl, $tbl_struct, $algorithm,
|
die "I need a dbh argument"
|
||||||
$crc_wid, $crc_type) = @args{@required_args};
|
unless $args{func} && $args{crc_width} && $args{crc_type} && !$args{dbh};
|
||||||
my $func = $args{function};
|
|
||||||
|
my ($tbl) = @args{@required_args};
|
||||||
|
my $o = $self->{OptionParser};
|
||||||
my $q = $self->{Quoter};
|
my $q = $self->{Quoter};
|
||||||
my $result;
|
|
||||||
|
|
||||||
die "Invalid or missing checksum algorithm"
|
my $func = $args{func} || $self->_get_hash_func(%args);
|
||||||
unless $algorithm && $ALGOS{$algorithm};
|
my $crc_width = $args{crc_width}|| $self->_get_crc_width(%args, func=>$func);
|
||||||
|
my $crc_type = $args{crc_type} || $self->_get_crc_type(%args, func=>$func);
|
||||||
if ( $algorithm eq 'CHECKSUM' ) {
|
my $opt_slice;
|
||||||
return "CHECKSUM TABLE " . $q->quote($db, $tbl);
|
if ( $o->get('optimize-xor') ) {
|
||||||
|
if ( $crc_type !~ m/int$/ ) {
|
||||||
|
$opt_slice = $self->_optimize_xor(%args, func => $func);
|
||||||
|
warn "Cannot use --optimize-xor" unless defined $opt_slice;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
MKDEBUG && _d("Checksum strat:", $func, $crc_width, $crc_type, $opt_slice);
|
||||||
|
|
||||||
my $expr = $self->make_row_checksum(%args, no_cols=>1);
|
# This checksum algorithm concatenates the columns in each row and
|
||||||
|
# checksums them, then slices this checksum up into 16-character chunks.
|
||||||
if ( $algorithm eq 'BIT_XOR' ) {
|
# It then converts them BIGINTs with the CONV() function, and then
|
||||||
# This checksum algorithm concatenates the columns in each row and
|
# groupwise XORs them to produce an order-independent checksum of the
|
||||||
# checksums them, then slices this checksum up into 16-character chunks.
|
# slice over all the rows. It then converts these back to base 16 and
|
||||||
# It then converts them BIGINTs with the CONV() function, and then
|
# puts them back together. The effect is the same as XORing a very wide
|
||||||
# groupwise XORs them to produce an order-independent checksum of the
|
# (32 characters = 128 bits for MD5, and SHA1 is even larger) unsigned
|
||||||
# slice over all the rows. It then converts these back to base 16 and
|
# integer over all the rows.
|
||||||
# puts them back together. The effect is the same as XORing a very wide
|
#
|
||||||
# (32 characters = 128 bits for MD5, and SHA1 is even larger) unsigned
|
# As a special case, integer functions do not need to be sliced. They
|
||||||
# integer over all the rows.
|
# can be fed right into BIT_XOR after a cast to UNSIGNED.
|
||||||
#
|
my $row_checksum = $self->make_row_checksum(%args, no_cols=>1);
|
||||||
# As a special case, integer functions do not need to be sliced. They
|
my $crc;
|
||||||
# can be fed right into BIT_XOR after a cast to UNSIGNED.
|
if ( $crc_type =~ m/int$/ ) {
|
||||||
if ( $crc_type =~ m/int$/ ) {
|
$crc = "COALESCE(LOWER(CONV(BIT_XOR(CAST($row_checksum AS UNSIGNED)), "
|
||||||
$result = "COALESCE(LOWER(CONV(BIT_XOR(CAST($expr AS UNSIGNED)), 10, 16)), 0) AS crc ";
|
. "10, 16)), 0)";
|
||||||
}
|
|
||||||
else {
|
|
||||||
my $slices = $self->make_xor_slices( query => $expr, %args );
|
|
||||||
$result = "COALESCE(LOWER(CONCAT($slices)), 0) AS crc ";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
# Use an accumulator variable. This query relies on @crc being '', and
|
my $slices = $self->_make_xor_slices(
|
||||||
# @cnt being 0 when it begins. It checksums each row, appends it to the
|
row_checksum => $row_checksum,
|
||||||
# running checksum, and checksums the two together. In this way it acts
|
crc_width => $crc_width
|
||||||
# as an accumulator for all the rows. It then prepends a steadily
|
);
|
||||||
# increasing number to the left, left-padded with zeroes, so each checksum
|
$crc = "COALESCE(LOWER(CONCAT($slices)), 0)";
|
||||||
# taken is stringwise greater than the last. In this way the MAX()
|
|
||||||
# function can be used to return the last checksum calculated. @cnt is
|
|
||||||
# not used for a row count, it is only used to make MAX() work correctly.
|
|
||||||
#
|
|
||||||
# As a special case, int funcs must be converted to base 16 so it's a
|
|
||||||
# predictable width (it's also a shorter string, but that's not really
|
|
||||||
# important).
|
|
||||||
#
|
|
||||||
# On MySQL 4.0 and older, crc is NULL/undef if no rows are selected.
|
|
||||||
# We COALESCE to avoid having to check that crc is defined; see
|
|
||||||
# http://code.google.com/p/maatkit/issues/detail?id=672
|
|
||||||
if ( $crc_type =~ m/int$/ ) {
|
|
||||||
$result = "COALESCE(RIGHT(MAX("
|
|
||||||
. "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), "
|
|
||||||
. "CONV(CAST($func(CONCAT(\@crc, $expr)) AS UNSIGNED), 10, 16))"
|
|
||||||
. "), $crc_wid), 0) AS crc ";
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
$result = "COALESCE(RIGHT(MAX("
|
|
||||||
. "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), "
|
|
||||||
. "$func(CONCAT(\@crc, $expr)))"
|
|
||||||
. "), $crc_wid), 0) AS crc ";
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if ( $args{replicate} ) {
|
|
||||||
$result = "REPLACE /*PROGRESS_COMMENT*/ INTO $args{replicate} "
|
my $select = "COUNT(*) AS cnt, $crc AS crc";
|
||||||
. "(db, tbl, chunk, boundaries, this_cnt, this_crc) "
|
MKDEBUG && _d($select);
|
||||||
. "SELECT ?, ?, /*CHUNK_NUM*/ ?, COUNT(*) AS cnt, $result";
|
return $select;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Sub: _get_hash_func
|
||||||
|
# Get the fastest available hash function.
|
||||||
|
#
|
||||||
|
# Parameters:
|
||||||
|
# %args - Arguments
|
||||||
|
#
|
||||||
|
# Required Arguments:
|
||||||
|
# dbh - dbh
|
||||||
|
#
|
||||||
|
# Returns:
|
||||||
|
# Function name
|
||||||
|
sub _get_hash_func {
|
||||||
|
my ( $self, %args ) = @_;
|
||||||
|
my @required_args = qw(dbh);
|
||||||
|
foreach my $arg( @required_args ) {
|
||||||
|
die "I need a $arg argument" unless $args{$arg};
|
||||||
|
}
|
||||||
|
my ($dbh) = @args{@required_args};
|
||||||
|
my $o = $self->{OptionParser};
|
||||||
|
my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1);
|
||||||
|
|
||||||
|
if ( my $func = $o->get('function') ) {
|
||||||
|
unshift @funcs, $func;
|
||||||
|
}
|
||||||
|
|
||||||
|
my ($result, $error);
|
||||||
|
foreach my $func ( @funcs ) {
|
||||||
|
eval {
|
||||||
|
my $sql = "SELECT $func('test-string')";
|
||||||
|
MKDEBUG && _d($sql);
|
||||||
|
$args{dbh}->do($sql);
|
||||||
|
};
|
||||||
|
if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) {
|
||||||
|
$error .= qq{$func cannot be used because "$1"\n};
|
||||||
|
MKDEBUG && _d($func, 'cannot be used because', $1);
|
||||||
|
}
|
||||||
|
MKDEBUG && _d('Chosen hash func:', $result);
|
||||||
|
return $func;
|
||||||
|
}
|
||||||
|
die $error || 'No hash functions (CRC32, MD5, etc.) are available';
|
||||||
|
}
|
||||||
|
|
||||||
|
# Returns how wide/long, in characters, a CRC function is.
|
||||||
|
sub _get_crc_width {
|
||||||
|
my ( $self, %args ) = @_;
|
||||||
|
my @required_args = qw(dbh func);
|
||||||
|
foreach my $arg( @required_args ) {
|
||||||
|
die "I need a $arg argument" unless $args{$arg};
|
||||||
|
}
|
||||||
|
my ($dbh, $func) = @args{@required_args};
|
||||||
|
|
||||||
|
my $crc_width = 16;
|
||||||
|
if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) {
|
||||||
|
eval {
|
||||||
|
my ($val) = $dbh->selectrow_array("SELECT $func('a')");
|
||||||
|
$crc_width = max(16, length($val));
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return $crc_width;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Returns a CRC function's MySQL type.
|
||||||
|
sub _get_crc_type {
|
||||||
|
my ( $self, %args ) = @_;
|
||||||
|
my @required_args = qw(dbh func);
|
||||||
|
foreach my $arg( @required_args ) {
|
||||||
|
die "I need a $arg argument" unless $args{$arg};
|
||||||
|
}
|
||||||
|
my ($dbh, $func) = @args{@required_args};
|
||||||
|
|
||||||
|
my $type = '';
|
||||||
|
my $length = 0;
|
||||||
|
my $sql = "SELECT $func('a')";
|
||||||
|
my $sth = $dbh->prepare($sql);
|
||||||
|
eval {
|
||||||
|
$sth->execute();
|
||||||
|
$type = $sth->{mysql_type_name}->[0];
|
||||||
|
$length = $sth->{mysql_length}->[0];
|
||||||
|
MKDEBUG && _d($sql, $type, $length);
|
||||||
|
if ( $type eq 'bigint' && $length < 20 ) {
|
||||||
|
$type = 'int';
|
||||||
|
}
|
||||||
|
};
|
||||||
|
$sth->finish;
|
||||||
|
MKDEBUG && _d('crc_type:', $type, 'length:', $length);
|
||||||
|
return $type;
|
||||||
|
}
|
||||||
|
|
||||||
|
# Figure out which slice in a sliced BIT_XOR checksum should have the actual
|
||||||
|
# concat-columns-and-checksum, and which should just get variable references.
|
||||||
|
# Returns the slice. I'm really not sure if this code is needed. It always
|
||||||
|
# seems the last slice is the one that works. But I'd rather be paranoid.
|
||||||
|
# TODO: this function needs a hint to know when a function returns an
|
||||||
|
# integer. CRC32 is an example. In these cases no optimization or slicing
|
||||||
|
# is necessary.
|
||||||
|
sub _optimize_xor {
|
||||||
|
my ( $self, %args ) = @_;
|
||||||
|
my @required_args = qw(dbh func);
|
||||||
|
foreach my $arg( @required_args ) {
|
||||||
|
die "I need a $arg argument" unless $args{$arg};
|
||||||
|
}
|
||||||
|
my ($dbh, $func) = @args{@required_args};
|
||||||
|
|
||||||
|
die "$func never needs BIT_XOR optimization"
|
||||||
|
if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i;
|
||||||
|
|
||||||
|
my $opt_slice = 0;
|
||||||
|
my $unsliced = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0];
|
||||||
|
my $sliced = '';
|
||||||
|
my $start = 1;
|
||||||
|
my $crc_width = length($unsliced) < 16 ? 16 : length($unsliced);
|
||||||
|
|
||||||
|
do { # Try different positions till sliced result equals non-sliced.
|
||||||
|
MKDEBUG && _d('Trying slice', $opt_slice);
|
||||||
|
$dbh->do('SET @crc := "", @cnt := 0');
|
||||||
|
my $slices = $self->_make_xor_slices(
|
||||||
|
row_checksum => "\@crc := $func('a')",
|
||||||
|
crc_width => $crc_width,
|
||||||
|
opt_slice => $opt_slice,
|
||||||
|
);
|
||||||
|
|
||||||
|
my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x";
|
||||||
|
$sliced = ($dbh->selectrow_array($sql))[0];
|
||||||
|
if ( $sliced ne $unsliced ) {
|
||||||
|
MKDEBUG && _d('Slice', $opt_slice, 'does not work');
|
||||||
|
$start += 16;
|
||||||
|
++$opt_slice;
|
||||||
|
}
|
||||||
|
} while ( $start < $crc_width && $sliced ne $unsliced );
|
||||||
|
|
||||||
|
if ( $sliced eq $unsliced ) {
|
||||||
|
MKDEBUG && _d('Slice', $opt_slice, 'works');
|
||||||
|
return $opt_slice;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
$result = "SELECT "
|
MKDEBUG && _d('No slice works');
|
||||||
. ($args{buffer} ? 'SQL_BUFFER_RESULT ' : '')
|
return undef;
|
||||||
. "/*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, $result";
|
|
||||||
}
|
}
|
||||||
return $result . "FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/";
|
}
|
||||||
|
|
||||||
|
# Sub: _make_xor_slices
|
||||||
|
# Make an expression that will do a bitwise XOR over a very wide integer,
|
||||||
|
# such as that returned by SHA1, which is too large to put into BIT_XOR().
|
||||||
|
# If an opt_slice is given, a variable is used to avoid calling row_checksum
|
||||||
|
# multiple times.
|
||||||
|
#
|
||||||
|
# Parameters:
|
||||||
|
# %args - Arguments
|
||||||
|
#
|
||||||
|
# Required Arguments:
|
||||||
|
# row_checksum - <make_row_checksum()> query
|
||||||
|
# crc_width - CRC width (<_get_crc_width()>
|
||||||
|
#
|
||||||
|
# Optional Arguments:
|
||||||
|
# opt_slice - Slice number. Use a variable to avoid calling row_checksum
|
||||||
|
# multiple times.
|
||||||
|
#
|
||||||
|
# Returns:
|
||||||
|
# SQL expression
|
||||||
|
sub _make_xor_slices {
|
||||||
|
my ( $self, %args ) = @_;
|
||||||
|
my @required_args = qw(row_checksum crc_width);
|
||||||
|
foreach my $arg( @required_args ) {
|
||||||
|
die "I need a $arg argument" unless $args{$arg};
|
||||||
|
}
|
||||||
|
my ($row_checksum, $crc_width) = @args{@required_args};
|
||||||
|
my ($opt_slice) = $args{opt_slice};
|
||||||
|
|
||||||
|
# Create a series of slices with @crc as a placeholder.
|
||||||
|
my @slices;
|
||||||
|
for ( my $start = 1; $start <= $crc_width; $start += 16 ) {
|
||||||
|
my $len = $crc_width - $start + 1;
|
||||||
|
if ( $len > 16 ) {
|
||||||
|
$len = 16;
|
||||||
|
}
|
||||||
|
push @slices,
|
||||||
|
"LPAD(CONV(BIT_XOR("
|
||||||
|
. "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))"
|
||||||
|
. ", 10, 16), $len, '0')";
|
||||||
|
}
|
||||||
|
|
||||||
|
# Replace the placeholder with the expression. If specified, add a
|
||||||
|
# user-variable optimization so the expression goes in only one of the
|
||||||
|
# slices. This optimization relies on @crc being '' when the query begins.
|
||||||
|
if ( defined $opt_slice && $opt_slice < @slices ) {
|
||||||
|
$slices[$opt_slice] =~ s/\@crc/\@crc := $row_checksum/;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
map { s/\@crc/$row_checksum/ } @slices;
|
||||||
|
}
|
||||||
|
|
||||||
|
return join(', ', @slices);
|
||||||
}
|
}
|
||||||
|
|
||||||
# Queries the replication table for chunks that differ from the master's data.
|
# Queries the replication table for chunks that differ from the master's data.
|
||||||
|
@@ -12,11 +12,11 @@ use English qw(-no_match_vars);
|
|||||||
use Test::More;
|
use Test::More;
|
||||||
|
|
||||||
use TableChecksum;
|
use TableChecksum;
|
||||||
use VersionParser;
|
|
||||||
use TableParser;
|
use TableParser;
|
||||||
use Quoter;
|
use Quoter;
|
||||||
use MySQLDump;
|
use MySQLDump;
|
||||||
use DSNParser;
|
use DSNParser;
|
||||||
|
use OptionParser;
|
||||||
use Sandbox;
|
use Sandbox;
|
||||||
use PerconaTest;
|
use PerconaTest;
|
||||||
|
|
||||||
@@ -28,165 +28,49 @@ if ( !$dbh ) {
|
|||||||
plan skip_all => "Cannot connect to sandbox master";
|
plan skip_all => "Cannot connect to sandbox master";
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
plan tests => 51;
|
plan tests => 28;
|
||||||
}
|
}
|
||||||
|
|
||||||
$sb->create_dbs($dbh, ['test']);
|
$sb->create_dbs($dbh, ['test']);
|
||||||
|
|
||||||
my $q = new Quoter();
|
my $q = new Quoter();
|
||||||
my $tp = new TableParser(Quoter => $q);
|
my $tp = new TableParser(Quoter => $q);
|
||||||
my $vp = new VersionParser();
|
|
||||||
my $du = new MySQLDump();
|
my $du = new MySQLDump();
|
||||||
my $c = new TableChecksum(Quoter=>$q, VersionParser=>$vp);
|
my $o = new OptionParser(description => 'NibbleIterator');
|
||||||
|
$o->get_specs("$trunk/bin/pt-table-checksum");
|
||||||
|
|
||||||
my $t;
|
my $c = new TableChecksum(
|
||||||
|
OptionParser => $o,
|
||||||
my %args = map { $_ => undef }
|
Quoter => $q,
|
||||||
qw(db tbl tbl_struct algorithm function crc_wid crc_type opt_slice);
|
|
||||||
|
|
||||||
throws_ok (
|
|
||||||
sub { $c->best_algorithm( %args, algorithm => 'foo', ) },
|
|
||||||
qr/Invalid checksum algorithm/,
|
|
||||||
'Algorithm=foo',
|
|
||||||
);
|
);
|
||||||
|
|
||||||
# Inject the VersionParser with some bogus versions. Later I'll just pass the
|
# ############################################################################
|
||||||
# string version number instead of a real DBH, so the version parsing will
|
# _make_xor_slices
|
||||||
# return the value I want.
|
# ############################################################################
|
||||||
foreach my $ver( qw(4.0.0 4.1.1) ) {
|
is(
|
||||||
$vp->{$ver} = $vp->parse($ver);
|
$c->_make_xor_slices(
|
||||||
}
|
row_checksum => 'FOO',
|
||||||
|
crc_width => 1,
|
||||||
is (
|
|
||||||
$c->best_algorithm(
|
|
||||||
algorithm => 'CHECKSUM',
|
|
||||||
dbh => '4.1.1',
|
|
||||||
),
|
|
||||||
'CHECKSUM',
|
|
||||||
'Prefers CHECKSUM',
|
|
||||||
);
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->best_algorithm(
|
|
||||||
dbh => '4.1.1',
|
|
||||||
),
|
|
||||||
'CHECKSUM',
|
|
||||||
'Default is CHECKSUM',
|
|
||||||
);
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->best_algorithm(
|
|
||||||
algorithm => 'CHECKSUM',
|
|
||||||
dbh => '4.1.1',
|
|
||||||
where => 1,
|
|
||||||
),
|
|
||||||
'BIT_XOR',
|
|
||||||
'CHECKSUM eliminated by where',
|
|
||||||
);
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->best_algorithm(
|
|
||||||
algorithm => 'CHECKSUM',
|
|
||||||
dbh => '4.1.1',
|
|
||||||
chunk => 1,
|
|
||||||
),
|
|
||||||
'BIT_XOR',
|
|
||||||
'CHECKSUM eliminated by chunk',
|
|
||||||
);
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->best_algorithm(
|
|
||||||
algorithm => 'CHECKSUM',
|
|
||||||
dbh => '4.1.1',
|
|
||||||
replicate => 1,
|
|
||||||
),
|
|
||||||
'BIT_XOR',
|
|
||||||
'CHECKSUM eliminated by replicate',
|
|
||||||
);
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->best_algorithm(
|
|
||||||
dbh => '4.1.1',
|
|
||||||
count => 1,
|
|
||||||
),
|
|
||||||
'BIT_XOR',
|
|
||||||
'Default CHECKSUM eliminated by count',
|
|
||||||
);
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->best_algorithm(
|
|
||||||
algorithm => 'CHECKSUM',
|
|
||||||
dbh => '4.1.1',
|
|
||||||
count => 1,
|
|
||||||
),
|
|
||||||
'CHECKSUM',
|
|
||||||
'Explicit CHECKSUM not eliminated by count',
|
|
||||||
);
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->best_algorithm(
|
|
||||||
algorithm => 'CHECKSUM',
|
|
||||||
dbh => '4.0.0',
|
|
||||||
),
|
|
||||||
'ACCUM',
|
|
||||||
'CHECKSUM and BIT_XOR eliminated by version',
|
|
||||||
);
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->best_algorithm(
|
|
||||||
algorithm => 'BIT_XOR',
|
|
||||||
dbh => '4.1.1',
|
|
||||||
),
|
|
||||||
'BIT_XOR',
|
|
||||||
'BIT_XOR as requested',
|
|
||||||
);
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->best_algorithm(
|
|
||||||
algorithm => 'BIT_XOR',
|
|
||||||
dbh => '4.0.0',
|
|
||||||
),
|
|
||||||
'ACCUM',
|
|
||||||
'BIT_XOR eliminated by version',
|
|
||||||
);
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->best_algorithm(
|
|
||||||
algorithm => 'ACCUM',
|
|
||||||
dbh => '4.1.1',
|
|
||||||
),
|
|
||||||
'ACCUM',
|
|
||||||
'ACCUM as requested',
|
|
||||||
);
|
|
||||||
|
|
||||||
ok($c->is_hash_algorithm('ACCUM'), 'ACCUM is hash');
|
|
||||||
ok($c->is_hash_algorithm('BIT_XOR'), 'BIT_XOR is hash');
|
|
||||||
ok(!$c->is_hash_algorithm('CHECKSUM'), 'CHECKSUM is not hash');
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->make_xor_slices(
|
|
||||||
query => 'FOO',
|
|
||||||
crc_wid => 1,
|
|
||||||
),
|
),
|
||||||
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 1), 16, 10) "
|
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 1), 16, 10) "
|
||||||
. "AS UNSIGNED)), 10, 16), 1, '0')",
|
. "AS UNSIGNED)), 10, 16), 1, '0')",
|
||||||
'FOO XOR slices 1 wide',
|
'FOO XOR slices 1 wide',
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_xor_slices(
|
$c->_make_xor_slices(
|
||||||
query => 'FOO',
|
row_checksum => 'FOO',
|
||||||
crc_wid => 16,
|
crc_width => 16,
|
||||||
),
|
),
|
||||||
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
|
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
|
||||||
. "AS UNSIGNED)), 10, 16), 16, '0')",
|
. "AS UNSIGNED)), 10, 16), 16, '0')",
|
||||||
'FOO XOR slices 16 wide',
|
'FOO XOR slices 16 wide',
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_xor_slices(
|
$c->_make_xor_slices(
|
||||||
query => 'FOO',
|
row_checksum => 'FOO',
|
||||||
crc_wid => 17,
|
crc_width => 17,
|
||||||
),
|
),
|
||||||
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
|
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
|
||||||
. "AS UNSIGNED)), 10, 16), 16, '0'), "
|
. "AS UNSIGNED)), 10, 16), 16, '0'), "
|
||||||
@@ -195,10 +79,10 @@ is (
|
|||||||
'FOO XOR slices 17 wide',
|
'FOO XOR slices 17 wide',
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_xor_slices(
|
$c->_make_xor_slices(
|
||||||
query => 'FOO',
|
row_checksum => 'FOO',
|
||||||
crc_wid => 32,
|
crc_width => 32,
|
||||||
),
|
),
|
||||||
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
|
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(FOO, 1, 16), 16, 10) "
|
||||||
. "AS UNSIGNED)), 10, 16), 16, '0'), "
|
. "AS UNSIGNED)), 10, 16), 16, '0'), "
|
||||||
@@ -207,11 +91,11 @@ is (
|
|||||||
'FOO XOR slices 32 wide',
|
'FOO XOR slices 32 wide',
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_xor_slices(
|
$c->_make_xor_slices(
|
||||||
query => 'FOO',
|
row_checksum => 'FOO',
|
||||||
crc_wid => 32,
|
crc_width => 32,
|
||||||
opt_slice => 0,
|
opt_slice => 0,
|
||||||
),
|
),
|
||||||
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc := FOO, 1, 16), 16, 10) "
|
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc := FOO, 1, 16), 16, 10) "
|
||||||
. "AS UNSIGNED)), 10, 16), 16, '0'), "
|
. "AS UNSIGNED)), 10, 16), 16, '0'), "
|
||||||
@@ -220,11 +104,11 @@ is (
|
|||||||
'XOR slice optimized in slice 0',
|
'XOR slice optimized in slice 0',
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_xor_slices(
|
$c->_make_xor_slices(
|
||||||
query => 'FOO',
|
row_checksum => 'FOO',
|
||||||
crc_wid => 32,
|
crc_width => 32,
|
||||||
opt_slice => 1,
|
opt_slice => 1,
|
||||||
),
|
),
|
||||||
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc, 1, 16), 16, 10) "
|
"LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(\@crc, 1, 16), 16, 10) "
|
||||||
. "AS UNSIGNED)), 10, 16), 16, '0'), "
|
. "AS UNSIGNED)), 10, 16), 16, '0'), "
|
||||||
@@ -233,12 +117,19 @@ is (
|
|||||||
'XOR slice optimized in slice 1',
|
'XOR slice optimized in slice 1',
|
||||||
);
|
);
|
||||||
|
|
||||||
$t = $tp->parse(load_file('t/lib/samples/sakila.film.sql'));
|
# ############################################################################
|
||||||
|
# make_row_checksum
|
||||||
|
# ############################################################################
|
||||||
|
my $tbl = {
|
||||||
|
db => 'sakila',
|
||||||
|
tbl => 'film',
|
||||||
|
tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.film.sql')),
|
||||||
|
};
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_row_checksum(
|
$c->make_row_checksum(
|
||||||
function => 'SHA1',
|
tbl => $tbl,
|
||||||
tbl_struct => $t,
|
func => 'SHA1',
|
||||||
),
|
),
|
||||||
q{`film_id`, `title`, `description`, `release_year`, `language_id`, `original_language_id`, `rental_duration`, `rental_rate`, `length`, `replacement_cost`, `rating`, `special_features`, `last_update` + 0 AS `last_update`, }
|
q{`film_id`, `title`, `description`, `release_year`, `language_id`, `original_language_id`, `rental_duration`, `rental_rate`, `length`, `replacement_cost`, `rating`, `special_features`, `last_update` + 0 AS `last_update`, }
|
||||||
. q{SHA1(CONCAT_WS('#', }
|
. q{SHA1(CONCAT_WS('#', }
|
||||||
@@ -251,10 +142,10 @@ is (
|
|||||||
'SHA1 query for sakila.film',
|
'SHA1 query for sakila.film',
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_row_checksum(
|
$c->make_row_checksum(
|
||||||
function => 'FNV_64',
|
tbl => $tbl,
|
||||||
tbl_struct => $t,
|
func => 'FNV_64',
|
||||||
),
|
),
|
||||||
q{`film_id`, `title`, `description`, `release_year`, `language_id`, `original_language_id`, `rental_duration`, `rental_rate`, `length`, `replacement_cost`, `rating`, `special_features`, `last_update` + 0 AS `last_update`, }
|
q{`film_id`, `title`, `description`, `release_year`, `language_id`, `original_language_id`, `rental_duration`, `rental_rate`, `length`, `replacement_cost`, `rating`, `special_features`, `last_update` + 0 AS `last_update`, }
|
||||||
. q{FNV_64(}
|
. q{FNV_64(}
|
||||||
@@ -264,329 +155,169 @@ is (
|
|||||||
'FNV_64 query for sakila.film',
|
'FNV_64 query for sakila.film',
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_row_checksum(
|
$c->make_row_checksum(
|
||||||
function => 'SHA1',
|
tbl => $tbl,
|
||||||
tbl_struct => $t,
|
func => 'SHA1',
|
||||||
cols => [qw(film_id)],
|
cols => [qw(film_id)],
|
||||||
),
|
),
|
||||||
q{`film_id`, SHA1(`film_id`)},
|
q{`film_id`, SHA1(`film_id`)},
|
||||||
'SHA1 query for sakila.film with only one column',
|
'SHA1 query for sakila.film with only one column',
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_row_checksum(
|
$c->make_row_checksum(
|
||||||
function => 'SHA1',
|
tbl => $tbl,
|
||||||
tbl_struct => $t,
|
func => 'SHA1',
|
||||||
cols => [qw(FILM_ID)],
|
cols => [qw(FILM_ID)],
|
||||||
),
|
),
|
||||||
q{`film_id`, SHA1(`film_id`)},
|
q{`film_id`, SHA1(`film_id`)},
|
||||||
'Column names are case-insensitive',
|
'Column names are case-insensitive',
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_row_checksum(
|
$c->make_row_checksum(
|
||||||
function => 'SHA1',
|
tbl => $tbl,
|
||||||
tbl_struct => $t,
|
func => 'SHA1',
|
||||||
cols => [qw(film_id title)],
|
cols => [qw(film_id title)],
|
||||||
sep => '%',
|
sep => '%',
|
||||||
),
|
),
|
||||||
q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
|
q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
|
||||||
'Separator',
|
'Separator',
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_row_checksum(
|
$c->make_row_checksum(
|
||||||
function => 'SHA1',
|
tbl => $tbl,
|
||||||
tbl_struct => $t,
|
func => 'SHA1',
|
||||||
cols => [qw(film_id title)],
|
cols => [qw(film_id title)],
|
||||||
sep => "'%'",
|
sep => "'%'",
|
||||||
),
|
),
|
||||||
q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
|
q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
|
||||||
'Bad separator',
|
'Bad separator',
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_row_checksum(
|
$c->make_row_checksum(
|
||||||
function => 'SHA1',
|
tbl => $tbl,
|
||||||
tbl_struct => $t,
|
func => 'SHA1',
|
||||||
cols => [qw(film_id title)],
|
cols => [qw(film_id title)],
|
||||||
sep => "'''",
|
sep => "'''",
|
||||||
),
|
),
|
||||||
q{`film_id`, `title`, SHA1(CONCAT_WS('#', `film_id`, `title`))},
|
q{`film_id`, `title`, SHA1(CONCAT_WS('#', `film_id`, `title`))},
|
||||||
'Really bad separator',
|
'Really bad separator',
|
||||||
);
|
);
|
||||||
|
|
||||||
$t = $tp->parse(load_file('t/lib/samples/sakila.rental.float.sql'));
|
# sakila.rental
|
||||||
is (
|
$tbl = {
|
||||||
|
db => 'sakila',
|
||||||
|
tbl => 'rental',
|
||||||
|
tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.rental.float.sql')),
|
||||||
|
};
|
||||||
|
|
||||||
|
is(
|
||||||
$c->make_row_checksum(
|
$c->make_row_checksum(
|
||||||
function => 'SHA1',
|
tbl => $tbl,
|
||||||
tbl_struct => $t,
|
func => 'SHA1',
|
||||||
),
|
),
|
||||||
q{`rental_id`, `foo`, SHA1(CONCAT_WS('#', `rental_id`, `foo`))},
|
q{`rental_id`, `foo`, SHA1(CONCAT_WS('#', `rental_id`, `foo`))},
|
||||||
'FLOAT column is like any other',
|
'FLOAT column is like any other',
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_row_checksum(
|
$c->make_row_checksum(
|
||||||
function => 'SHA1',
|
tbl => $tbl,
|
||||||
tbl_struct => $t,
|
func => 'SHA1',
|
||||||
float_precision => 5,
|
float_precision => 5,
|
||||||
),
|
),
|
||||||
q{`rental_id`, ROUND(`foo`, 5), SHA1(CONCAT_WS('#', `rental_id`, ROUND(`foo`, 5)))},
|
q{`rental_id`, ROUND(`foo`, 5), SHA1(CONCAT_WS('#', `rental_id`, ROUND(`foo`, 5)))},
|
||||||
'FLOAT column is rounded to 5 places',
|
'FLOAT column is rounded to 5 places',
|
||||||
);
|
);
|
||||||
|
|
||||||
$t = $tp->parse(load_file('t/lib/samples/sakila.film.sql'));
|
# sakila.film
|
||||||
|
$tbl = {
|
||||||
|
db => 'sakila',
|
||||||
|
tbl => 'film',
|
||||||
|
tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.film.sql')),
|
||||||
|
};
|
||||||
|
|
||||||
like(
|
like(
|
||||||
$c->make_row_checksum(
|
$c->make_row_checksum(
|
||||||
function => 'SHA1',
|
tbl => $tbl,
|
||||||
tbl_struct => $t,
|
func => 'SHA1',
|
||||||
trim => 1,
|
trim => 1,
|
||||||
),
|
),
|
||||||
qr{TRIM\(`title`\)},
|
qr{TRIM\(`title`\)},
|
||||||
'VARCHAR column is trimmed',
|
'VARCHAR column is trimmed',
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
# ############################################################################
|
||||||
$c->make_checksum_query(
|
# make_chunk_checksum
|
||||||
%args,
|
# ############################################################################
|
||||||
db => 'sakila',
|
is(
|
||||||
tbl => 'film',
|
$c->make_chunk_checksum(
|
||||||
tbl_struct => $t,
|
tbl => $tbl,
|
||||||
algorithm => 'CHECKSUM',
|
func => 'SHA1',
|
||||||
function => 'SHA1',
|
crc_width=> 40,
|
||||||
crc_wid => 40,
|
cols => [qw(film_id)],
|
||||||
crc_type => 'varchar',
|
crc_type => 'varchar',
|
||||||
),
|
),
|
||||||
'CHECKSUM TABLE `sakila`.`film`',
|
q{COUNT(*) AS cnt, }
|
||||||
'Sakila.film CHECKSUM',
|
|
||||||
);
|
|
||||||
|
|
||||||
throws_ok (
|
|
||||||
sub { $c->make_checksum_query(
|
|
||||||
%args,
|
|
||||||
db => 'sakila',
|
|
||||||
tbl => 'film',
|
|
||||||
tbl_struct => $t,
|
|
||||||
algorithm => 'BIT_XOR',
|
|
||||||
crc_wid => 40,
|
|
||||||
cols => [qw(film_id)],
|
|
||||||
crc_type => 'varchar',
|
|
||||||
function => 'SHA1',
|
|
||||||
algorithm => 'CHECKSUM TABLE',
|
|
||||||
)
|
|
||||||
},
|
|
||||||
qr/missing checksum algorithm/,
|
|
||||||
'Complains about bad algorithm',
|
|
||||||
);
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->make_checksum_query(
|
|
||||||
%args,
|
|
||||||
db => 'sakila',
|
|
||||||
tbl => 'film',
|
|
||||||
tbl_struct => $t,
|
|
||||||
algorithm => 'BIT_XOR',
|
|
||||||
function => 'SHA1',
|
|
||||||
crc_wid => 40,
|
|
||||||
cols => [qw(film_id)],
|
|
||||||
crc_type => 'varchar',
|
|
||||||
),
|
|
||||||
q{SELECT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, }
|
|
||||||
. q{COALESCE(LOWER(CONCAT(LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 1, }
|
. q{COALESCE(LOWER(CONCAT(LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 1, }
|
||||||
. q{16), 16, 10) AS UNSIGNED)), 10, 16), 16, '0'), }
|
. q{16), 16, 10) AS UNSIGNED)), 10, 16), 16, '0'), }
|
||||||
. q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 17, 16), 16, }
|
. q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 17, 16), 16, }
|
||||||
. q{10) AS UNSIGNED)), 10, 16), 16, '0'), }
|
. q{10) AS UNSIGNED)), 10, 16), 16, '0'), }
|
||||||
. q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 33, 8), 16, }
|
. q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 33, 8), 16, }
|
||||||
. q{10) AS UNSIGNED)), 10, 16), 8, '0'))), 0) AS crc }
|
. q{10) AS UNSIGNED)), 10, 16), 8, '0'))), 0) AS crc},
|
||||||
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
|
'sakila.film SHA1',
|
||||||
'Sakila.film SHA1 BIT_XOR',
|
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_checksum_query(
|
$c->make_chunk_checksum(
|
||||||
%args,
|
tbl => $tbl,
|
||||||
db => 'sakila',
|
func => 'FNV_64',
|
||||||
tbl => 'film',
|
crc_width=> 99,
|
||||||
tbl_struct => $t,
|
cols => [qw(film_id)],
|
||||||
algorithm => 'BIT_XOR',
|
crc_type => 'bigint',
|
||||||
function => 'FNV_64',
|
|
||||||
crc_wid => 99,
|
|
||||||
cols => [qw(film_id)],
|
|
||||||
crc_type => 'bigint',
|
|
||||||
),
|
),
|
||||||
q{SELECT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, }
|
q{COUNT(*) AS cnt, }
|
||||||
. q{COALESCE(LOWER(CONV(BIT_XOR(CAST(FNV_64(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc }
|
. q{COALESCE(LOWER(CONV(BIT_XOR(CAST(FNV_64(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc},
|
||||||
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
|
'sakila.film FNV_64',
|
||||||
'Sakila.film FNV_64 BIT_XOR',
|
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_checksum_query(
|
$c->make_chunk_checksum(
|
||||||
%args,
|
tbl => $tbl,
|
||||||
db => 'sakila',
|
func => 'FNV_64',
|
||||||
tbl => 'film',
|
crc_width=> 99,
|
||||||
tbl_struct => $t,
|
cols => [qw(film_id)],
|
||||||
algorithm => 'BIT_XOR',
|
buffer => 1,
|
||||||
function => 'FNV_64',
|
crc_type => 'bigint',
|
||||||
crc_wid => 99,
|
|
||||||
cols => [qw(film_id)],
|
|
||||||
buffer => 1,
|
|
||||||
crc_type => 'bigint',
|
|
||||||
),
|
),
|
||||||
q{SELECT SQL_BUFFER_RESULT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, }
|
q{COUNT(*) AS cnt, }
|
||||||
. q{COALESCE(LOWER(CONV(BIT_XOR(CAST(FNV_64(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc }
|
. q{COALESCE(LOWER(CONV(BIT_XOR(CAST(FNV_64(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc},
|
||||||
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
|
'sakila.film FNV_64',
|
||||||
'Sakila.film FNV_64 BIT_XOR',
|
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
is(
|
||||||
$c->make_checksum_query(
|
$c->make_chunk_checksum(
|
||||||
%args,
|
tbl => $tbl,
|
||||||
db => 'sakila',
|
func => 'CRC32',
|
||||||
tbl => 'film',
|
crc_width=> 99,
|
||||||
tbl_struct => $t,
|
cols => [qw(film_id)],
|
||||||
algorithm => 'BIT_XOR',
|
buffer => 1,
|
||||||
function => 'CRC32',
|
crc_type => 'int',
|
||||||
crc_wid => 99,
|
|
||||||
cols => [qw(film_id)],
|
|
||||||
buffer => 1,
|
|
||||||
crc_type => 'int',
|
|
||||||
),
|
),
|
||||||
q{SELECT SQL_BUFFER_RESULT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, }
|
q{COUNT(*) AS cnt, }
|
||||||
. q{COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc }
|
. q{COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc},
|
||||||
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
|
'sakila.film CRC32',
|
||||||
'Sakila.film CRC32 BIT_XOR',
|
|
||||||
);
|
);
|
||||||
|
|
||||||
is (
|
|
||||||
$c->make_checksum_query(
|
|
||||||
%args,
|
|
||||||
db => 'sakila',
|
|
||||||
tbl => 'film',
|
|
||||||
tbl_struct => $t,
|
|
||||||
algorithm => 'BIT_XOR',
|
|
||||||
function => 'SHA1',
|
|
||||||
crc_wid => 40,
|
|
||||||
cols => [qw(film_id)],
|
|
||||||
replicate => 'test.checksum',
|
|
||||||
crc_type => 'varchar',
|
|
||||||
),
|
|
||||||
q{REPLACE /*PROGRESS_COMMENT*/ INTO test.checksum }
|
|
||||||
. q{(db, tbl, chunk, boundaries, this_cnt, this_crc) }
|
|
||||||
. q{SELECT ?, ?, /*CHUNK_NUM*/ ?, COUNT(*) AS cnt, }
|
|
||||||
. q{COALESCE(LOWER(CONCAT(LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 1, }
|
|
||||||
. q{16), 16, 10) AS UNSIGNED)), 10, 16), 16, '0'), }
|
|
||||||
. q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 17, 16), 16, }
|
|
||||||
. q{10) AS UNSIGNED)), 10, 16), 16, '0'), }
|
|
||||||
. q{LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 33, 8), 16, }
|
|
||||||
. q{10) AS UNSIGNED)), 10, 16), 8, '0'))), 0) AS crc }
|
|
||||||
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
|
|
||||||
'Sakila.film SHA1 BIT_XOR with replication',
|
|
||||||
);
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->make_checksum_query(
|
|
||||||
%args,
|
|
||||||
db => 'sakila',
|
|
||||||
tbl => 'film',
|
|
||||||
tbl_struct => $t,
|
|
||||||
algorithm => 'ACCUM',
|
|
||||||
function => 'SHA1',
|
|
||||||
crc_wid => 40,
|
|
||||||
crc_type => 'varchar',
|
|
||||||
),
|
|
||||||
q{SELECT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, }
|
|
||||||
. q{COALESCE(RIGHT(MAX(@crc := CONCAT(LPAD(@cnt := @cnt + 1, 16, '0'), }
|
|
||||||
. q{SHA1(CONCAT(@crc, SHA1(CONCAT_WS('#', }
|
|
||||||
. q{`film_id`, `title`, `description`, `release_year`, `language_id`, }
|
|
||||||
. q{`original_language_id`, `rental_duration`, `rental_rate`, `length`, }
|
|
||||||
. q{`replacement_cost`, `rating`, `special_features`, `last_update` + 0, }
|
|
||||||
. q{CONCAT(ISNULL(`description`), ISNULL(`release_year`), }
|
|
||||||
. q{ISNULL(`original_language_id`), ISNULL(`length`), }
|
|
||||||
. q{ISNULL(`rating`), ISNULL(`special_features`)))))))), 40), 0) AS crc }
|
|
||||||
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
|
|
||||||
'Sakila.film SHA1 ACCUM',
|
|
||||||
);
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->make_checksum_query(
|
|
||||||
%args,
|
|
||||||
db => 'sakila',
|
|
||||||
tbl => 'film',
|
|
||||||
tbl_struct => $t,
|
|
||||||
algorithm => 'ACCUM',
|
|
||||||
function => 'FNV_64',
|
|
||||||
crc_wid => 16,
|
|
||||||
crc_type => 'bigint',
|
|
||||||
),
|
|
||||||
q{SELECT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, }
|
|
||||||
. q{COALESCE(RIGHT(MAX(@crc := CONCAT(LPAD(@cnt := @cnt + 1, 16, '0'), }
|
|
||||||
. q{CONV(CAST(FNV_64(CONCAT(@crc, FNV_64(}
|
|
||||||
. q{`film_id`, `title`, `description`, `release_year`, `language_id`, }
|
|
||||||
. q{`original_language_id`, `rental_duration`, `rental_rate`, `length`, }
|
|
||||||
. q{`replacement_cost`, `rating`, `special_features`, `last_update` + 0}
|
|
||||||
. q{))) AS UNSIGNED), 10, 16))), 16), 0) AS crc }
|
|
||||||
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
|
|
||||||
'Sakila.film FNV_64 ACCUM',
|
|
||||||
);
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->make_checksum_query(
|
|
||||||
%args,
|
|
||||||
db => 'sakila',
|
|
||||||
tbl => 'film',
|
|
||||||
tbl_struct => $t,
|
|
||||||
algorithm => 'ACCUM',
|
|
||||||
function => 'CRC32',
|
|
||||||
crc_wid => 16,
|
|
||||||
crc_type => 'int',
|
|
||||||
cols => [qw(film_id)],
|
|
||||||
),
|
|
||||||
q{SELECT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, }
|
|
||||||
. q{COALESCE(RIGHT(MAX(@crc := CONCAT(LPAD(@cnt := @cnt + 1, 16, '0'), }
|
|
||||||
. q{CONV(CAST(CRC32(CONCAT(@crc, CRC32(`film_id`}
|
|
||||||
. q{))) AS UNSIGNED), 10, 16))), 16), 0) AS crc }
|
|
||||||
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
|
|
||||||
'Sakila.film CRC32 ACCUM',
|
|
||||||
);
|
|
||||||
|
|
||||||
is (
|
|
||||||
$c->make_checksum_query(
|
|
||||||
%args,
|
|
||||||
db => 'sakila',
|
|
||||||
tbl => 'film',
|
|
||||||
tbl_struct => $t,
|
|
||||||
algorithm => 'ACCUM',
|
|
||||||
function => 'SHA1',
|
|
||||||
crc_wid => 40,
|
|
||||||
replicate => 'test.checksum',
|
|
||||||
crc_type => 'varchar',
|
|
||||||
),
|
|
||||||
q{REPLACE /*PROGRESS_COMMENT*/ INTO test.checksum }
|
|
||||||
. q{(db, tbl, chunk, boundaries, this_cnt, this_crc) }
|
|
||||||
. q{SELECT ?, ?, /*CHUNK_NUM*/ ?, COUNT(*) AS cnt, }
|
|
||||||
. q{COALESCE(RIGHT(MAX(@crc := CONCAT(LPAD(@cnt := @cnt + 1, 16, '0'), }
|
|
||||||
. q{SHA1(CONCAT(@crc, SHA1(CONCAT_WS('#', }
|
|
||||||
. q{`film_id`, `title`, `description`, `release_year`, `language_id`, }
|
|
||||||
. q{`original_language_id`, `rental_duration`, `rental_rate`, `length`, }
|
|
||||||
. q{`replacement_cost`, `rating`, `special_features`, `last_update` + 0, }
|
|
||||||
. q{CONCAT(ISNULL(`description`), ISNULL(`release_year`), }
|
|
||||||
. q{ISNULL(`original_language_id`), ISNULL(`length`), }
|
|
||||||
. q{ISNULL(`rating`), ISNULL(`special_features`)))))))), 40), 0) AS crc }
|
|
||||||
. q{FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/},
|
|
||||||
'Sakila.film SHA1 ACCUM with replication',
|
|
||||||
);
|
|
||||||
|
|
||||||
is ( $c->crc32('hello world'), 222957957, 'CRC32 of hello world');
|
|
||||||
|
|
||||||
# #############################################################################
|
# #############################################################################
|
||||||
# Sandbox tests.
|
# Sandbox tests.
|
||||||
# #############################################################################
|
# #############################################################################
|
||||||
like(
|
like(
|
||||||
$c->choose_hash_func(
|
$c->_get_hash_func(
|
||||||
dbh => $dbh,
|
dbh => $dbh,
|
||||||
),
|
),
|
||||||
qr/CRC32|FNV_64|MD5/,
|
qr/CRC32|FNV_64|MD5/,
|
||||||
@@ -594,65 +325,76 @@ like(
|
|||||||
);
|
);
|
||||||
|
|
||||||
like(
|
like(
|
||||||
$c->choose_hash_func(
|
$c->_get_hash_func(
|
||||||
dbh => $dbh,
|
dbh => $dbh,
|
||||||
function => 'SHA99',
|
func => 'SHA99',
|
||||||
),
|
),
|
||||||
qr/CRC32|FNV_64|MD5/,
|
qr/CRC32|FNV_64|MD5/,
|
||||||
'SHA99 does not exist so I get CRC32 or friends',
|
'SHA99 does not exist so I get CRC32 or friends',
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ARGV = qw(--function MD5);
|
||||||
|
$o->get_opts();
|
||||||
is(
|
is(
|
||||||
$c->choose_hash_func(
|
$c->_get_hash_func(
|
||||||
dbh => $dbh,
|
dbh => $dbh,
|
||||||
function => 'MD5',
|
func => 'MD5',
|
||||||
),
|
),
|
||||||
'MD5',
|
'MD5',
|
||||||
'MD5 requested and MD5 granted',
|
'MD5 requested and MD5 granted',
|
||||||
);
|
);
|
||||||
|
@ARGV = qw();
|
||||||
|
$o->get_opts();
|
||||||
|
|
||||||
is(
|
is(
|
||||||
$c->optimize_xor(
|
$c->_optimize_xor(
|
||||||
dbh => $dbh,
|
dbh => $dbh,
|
||||||
function => 'SHA1',
|
func => 'SHA1',
|
||||||
),
|
),
|
||||||
'2',
|
'2',
|
||||||
'SHA1 slice is 2',
|
'SHA1 slice is 2',
|
||||||
);
|
);
|
||||||
|
|
||||||
is(
|
is(
|
||||||
$c->optimize_xor(
|
$c->_optimize_xor(
|
||||||
dbh => $dbh,
|
dbh => $dbh,
|
||||||
function => 'MD5',
|
func => 'MD5',
|
||||||
),
|
),
|
||||||
'1',
|
'1',
|
||||||
'MD5 slice is 1',
|
'MD5 slice is 1',
|
||||||
);
|
);
|
||||||
|
|
||||||
is_deeply(
|
is(
|
||||||
[$c->get_crc_type($dbh, 'CRC32')],
|
$c->_get_crc_type(
|
||||||
[qw(int 10)],
|
dbh => $dbh,
|
||||||
'Type and length of CRC32'
|
func => 'CRC32',
|
||||||
|
),
|
||||||
|
'int',
|
||||||
|
'CRC32 type'
|
||||||
);
|
);
|
||||||
|
|
||||||
is_deeply(
|
is(
|
||||||
[$c->get_crc_type($dbh, 'MD5')],
|
$c->_get_crc_type(
|
||||||
[qw(varchar 32)],
|
dbh => $dbh,
|
||||||
'Type and length of MD5'
|
func => 'MD5',
|
||||||
|
),
|
||||||
|
'varchar',
|
||||||
|
'MD5 type'
|
||||||
);
|
);
|
||||||
|
|
||||||
# #############################################################################
|
# #############################################################################
|
||||||
# Issue 94: Enhance mk-table-checksum, add a --ignorecols option
|
# Issue 94: Enhance mk-table-checksum, add a --ignorecols option
|
||||||
# #############################################################################
|
# #############################################################################
|
||||||
$sb->load_file('master', 't/lib/samples/issue_94.sql');
|
$sb->load_file('master', 't/lib/samples/issue_94.sql');
|
||||||
$t= $tp->parse( $du->get_create_table($dbh, $q, 'test', 'issue_94') );
|
$tbl = {
|
||||||
my $query = $c->make_checksum_query(
|
|
||||||
db => 'test',
|
db => 'test',
|
||||||
tbl => 'issue_47',
|
tbl => 'issue_94',
|
||||||
tbl_struct => $t,
|
tbl_struct => $tp->parse($du->get_create_table($dbh, $q, 'test', 'issue_94')),
|
||||||
algorithm => 'ACCUM',
|
};
|
||||||
function => 'CRC32',
|
my $query = $c->make_chunk_checksum(
|
||||||
crc_wid => 16,
|
tbl => $tbl,
|
||||||
|
func => 'CRC32',
|
||||||
|
crc_width => 16,
|
||||||
crc_type => 'int',
|
crc_type => 'int',
|
||||||
opt_slice => undef,
|
opt_slice => undef,
|
||||||
cols => undef,
|
cols => undef,
|
||||||
@@ -662,9 +404,14 @@ my $query = $c->make_checksum_query(
|
|||||||
trim => undef,
|
trim => undef,
|
||||||
ignorecols => {'c'=>1},
|
ignorecols => {'c'=>1},
|
||||||
);
|
);
|
||||||
is($query,
|
is(
|
||||||
'SELECT /*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, COALESCE(RIGHT(MAX(@crc := CONCAT(LPAD(@cnt := @cnt + 1, 16, \'0\'), CONV(CAST(CRC32(CONCAT(@crc, CRC32(CONCAT_WS(\'#\', `a`, `b`)))) AS UNSIGNED), 10, 16))), 16), 0) AS crc FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/',
|
$query,
|
||||||
'Ignores specified columns');
|
"COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `a`, `b`)) AS UNSIGNED)), 10, 16)), 0) AS crc",
|
||||||
|
'Ignores specified columns'
|
||||||
|
);
|
||||||
|
|
||||||
|
# ############################################################################
|
||||||
|
# Done.
|
||||||
|
# ############################################################################
|
||||||
$sb->wipe_clean($dbh);
|
$sb->wipe_clean($dbh);
|
||||||
exit;
|
exit;
|
||||||
|
Reference in New Issue
Block a user