Fix cols, sep, trim, and float precision. Re-add --columns.

This commit is contained in:
Daniel Nichter
2011-09-14 10:16:43 -06:00
parent e383294144
commit 676a7aa20f
3 changed files with 65 additions and 55 deletions

View File

@@ -5979,6 +5979,12 @@ they are oversize, you might want to specify a value larger than 2.
You can disable oversize chunk checking by specifying L<"--chunk-size-limit"> 0.
=item --columns
short form: -c; type: array; group: Filter
Checksum only this comma-separated list of columns.
=item --config
type: Array; group: Config

View File

@@ -45,17 +45,11 @@ sub new {
# Sub: make_row_checksum
# Make a SELECT column list to checksum a row.
#
# Parameters:
# %args - Arguments
#
# Required Arguments:
# tbl - Table ref
#
# Optional Arguments:
# sep - Separator for CONCAT_WS(); default #
# cols - Arrayref of columns to checksum
# trim - Wrap VARCHAR cols in TRIM() for v4/v5 compatibility
# ignorecols - Arrayref of columns to exclude from checksum
# no_cols - Don't append columns to list oustide of functions.
#
# Returns:
# Column list for SELECT
@@ -72,19 +66,16 @@ sub make_row_checksum {
my $tbl_struct = $tbl->{tbl_struct};
my $func = $args{func} || uc($o->get('function'));
my $sep = $args{sep} || '#';
my $trim = $o->get('trim');
my $float_precision = $o->get('float-precision');
my $sep = $o->get('separator') || '#';
$sep =~ s/'//g;
$sep ||= '#';
# This allows a simpler grep when building %cols below.
my $ignorecols = $args{ignorecols} || {};
my $ignore_col = $o->get('ignore-columns') || {};
my $all_cols = $o->get('columns') || $tbl_struct->{cols};
my %cols = map { lc($_) => 1 } grep { !$ignore_col->{$_} } @$all_cols;
# Generate the expression that will turn a row into a checksum.
# Choose columns. Normalize query results: make FLOAT and TIMESTAMP
# stringify uniformly.
my %cols = map { lc($_) => 1 }
grep { !exists $ignorecols->{$_} }
($args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}});
my %seen;
my @cols =
map {
@@ -93,10 +84,10 @@ sub make_row_checksum {
if ( $type eq 'timestamp' ) {
$result .= ' + 0';
}
elsif ( $args{float_precision} && $type =~ m/float|double/ ) {
$result = "ROUND($result, $args{float_precision})";
elsif ( $float_precision && $type =~ m/float|double/ ) {
$result = "ROUND($result, $float_precision)";
}
elsif ( $args{trim} && $type =~ m/varchar/ ) {
elsif ( $trim && $type =~ m/varchar/ ) {
$result = "TRIM($result)";
}
$result;

View File

@@ -155,6 +155,8 @@ is(
'FNV_64 query for sakila.film',
);
@ARGV = qw(--columns film_id);
$o->get_opts();
is(
$c->make_row_checksum(
tbl => $tbl,
@@ -165,6 +167,8 @@ is(
'SHA1 query for sakila.film with only one column',
);
@ARGV = qw(--columns FILM_ID);
$o->get_opts();
is(
$c->make_row_checksum(
tbl => $tbl,
@@ -175,28 +179,30 @@ is(
'Column names are case-insensitive',
);
@ARGV = ('--columns', 'film_id,title', qw(--separator %));
$o->get_opts();
is(
$c->make_row_checksum(
tbl => $tbl,
func => 'SHA1',
cols => [qw(film_id title)],
sep => '%',
),
q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
'Separator',
);
@ARGV = ('--columns', 'film_id,title', qw(--separator '%'));
$o->get_opts();
is(
$c->make_row_checksum(
tbl => $tbl,
func => 'SHA1',
cols => [qw(film_id title)],
sep => "'%'",
),
q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
'Bad separator',
);
@ARGV = ('--columns', 'film_id,title', qw(--separator ''));
$o->get_opts();
is(
$c->make_row_checksum(
tbl => $tbl,
@@ -208,6 +214,9 @@ is(
'Really bad separator',
);
@ARGV = qw();
$o->get_opts();
# sakila.rental
$tbl = {
db => 'sakila',
@@ -224,6 +233,9 @@ is(
'FLOAT column is like any other',
);
@ARGV = qw(--float-precision 5);
$o->get_opts();
is(
$c->make_row_checksum(
tbl => $tbl,
@@ -241,25 +253,31 @@ $tbl = {
tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.film.sql')),
};
@ARGV = qw(--trim);
$o->get_opts();
like(
$c->make_row_checksum(
tbl => $tbl,
func => 'SHA1',
trim => 1,
trim => 0,
),
qr{TRIM\(`title`\)},
'VARCHAR column is trimmed',
);
@ARGV = qw();
$o->get_opts();
# ############################################################################
# make_chunk_checksum
# ############################################################################
@ARGV = qw(--columns film_id --no-optimize-xor);
$o->get_opts();
is(
$c->make_chunk_checksum(
tbl => $tbl,
func => 'SHA1',
crc_width => 40,
cols => [qw(film_id)],
crc_type => 'varchar',
),
q{COUNT(*) AS cnt, }
@@ -277,7 +295,6 @@ is(
tbl => $tbl,
func => 'FNV_64',
crc_width => 99,
cols => [qw(film_id)],
crc_type => 'bigint',
),
q{COUNT(*) AS cnt, }
@@ -290,7 +307,6 @@ is(
tbl => $tbl,
func => 'FNV_64',
crc_width => 99,
cols => [qw(film_id)],
buffer => 1,
crc_type => 'bigint',
),
@@ -304,7 +320,6 @@ is(
tbl => $tbl,
func => 'CRC32',
crc_width => 99,
cols => [qw(film_id)],
buffer => 1,
crc_type => 'int',
),
@@ -313,6 +328,9 @@ is(
'sakila.film CRC32',
);
@ARGV = qw();
$o->get_opts();
# #############################################################################
# Sandbox tests.
# #############################################################################
@@ -391,18 +409,13 @@ $tbl = {
tbl => 'issue_94',
tbl_struct => $tp->parse($du->get_create_table($dbh, $q, 'test', 'issue_94')),
};
@ARGV = qw(--ignore-columns c);
$o->get_opts();
my $query = $c->make_chunk_checksum(
tbl => $tbl,
func => 'CRC32',
crc_width => 16,
crc_type => 'int',
opt_slice => undef,
cols => undef,
sep => '#',
replicate => undef,
precision => undef,
trim => undef,
ignorecols => {'c'=>1},
);
is(
$query,