Fix cols, sep, trim, and float precision. Re-add --columns.

This commit is contained in:
Daniel Nichter
2011-09-14 10:16:43 -06:00
parent e383294144
commit 676a7aa20f
3 changed files with 65 additions and 55 deletions

View File

@@ -5979,6 +5979,12 @@ they are oversize, you might want to specify a value larger than 2.
You can disable oversize chunk checking by specifying L<"--chunk-size-limit"> 0. You can disable oversize chunk checking by specifying L<"--chunk-size-limit"> 0.
=item --columns
short form: -c; type: array; group: Filter
Checksum only this comma-separated list of columns.
=item --config =item --config
type: Array; group: Config type: Array; group: Config

View File

@@ -45,17 +45,11 @@ sub new {
# Sub: make_row_checksum # Sub: make_row_checksum
# Make a SELECT column list to checksum a row. # Make a SELECT column list to checksum a row.
# #
# Parameters:
# %args - Arguments
#
# Required Arguments: # Required Arguments:
# tbl - Table ref # tbl - Table ref
# #
# Optional Arguments: # Optional Arguments:
# sep - Separator for CONCAT_WS(); default # # no_cols - Don't append columns to list oustide of functions.
# cols - Arrayref of columns to checksum
# trim - Wrap VARCHAR cols in TRIM() for v4/v5 compatibility
# ignorecols - Arrayref of columns to exclude from checksum
# #
# Returns: # Returns:
# Column list for SELECT # Column list for SELECT
@@ -72,19 +66,16 @@ sub make_row_checksum {
my $tbl_struct = $tbl->{tbl_struct}; my $tbl_struct = $tbl->{tbl_struct};
my $func = $args{func} || uc($o->get('function')); my $func = $args{func} || uc($o->get('function'));
my $sep = $args{sep} || '#'; my $trim = $o->get('trim');
my $float_precision = $o->get('float-precision');
my $sep = $o->get('separator') || '#';
$sep =~ s/'//g; $sep =~ s/'//g;
$sep ||= '#'; $sep ||= '#';
# This allows a simpler grep when building %cols below. my $ignore_col = $o->get('ignore-columns') || {};
my $ignorecols = $args{ignorecols} || {}; my $all_cols = $o->get('columns') || $tbl_struct->{cols};
my %cols = map { lc($_) => 1 } grep { !$ignore_col->{$_} } @$all_cols;
# Generate the expression that will turn a row into a checksum.
# Choose columns. Normalize query results: make FLOAT and TIMESTAMP
# stringify uniformly.
my %cols = map { lc($_) => 1 }
grep { !exists $ignorecols->{$_} }
($args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}});
my %seen; my %seen;
my @cols = my @cols =
map { map {
@@ -93,10 +84,10 @@ sub make_row_checksum {
if ( $type eq 'timestamp' ) { if ( $type eq 'timestamp' ) {
$result .= ' + 0'; $result .= ' + 0';
} }
elsif ( $args{float_precision} && $type =~ m/float|double/ ) { elsif ( $float_precision && $type =~ m/float|double/ ) {
$result = "ROUND($result, $args{float_precision})"; $result = "ROUND($result, $float_precision)";
} }
elsif ( $args{trim} && $type =~ m/varchar/ ) { elsif ( $trim && $type =~ m/varchar/ ) {
$result = "TRIM($result)"; $result = "TRIM($result)";
} }
$result; $result;

View File

@@ -155,6 +155,8 @@ is(
'FNV_64 query for sakila.film', 'FNV_64 query for sakila.film',
); );
@ARGV = qw(--columns film_id);
$o->get_opts();
is( is(
$c->make_row_checksum( $c->make_row_checksum(
tbl => $tbl, tbl => $tbl,
@@ -165,6 +167,8 @@ is(
'SHA1 query for sakila.film with only one column', 'SHA1 query for sakila.film with only one column',
); );
@ARGV = qw(--columns FILM_ID);
$o->get_opts();
is( is(
$c->make_row_checksum( $c->make_row_checksum(
tbl => $tbl, tbl => $tbl,
@@ -175,28 +179,30 @@ is(
'Column names are case-insensitive', 'Column names are case-insensitive',
); );
@ARGV = ('--columns', 'film_id,title', qw(--separator %));
$o->get_opts();
is( is(
$c->make_row_checksum( $c->make_row_checksum(
tbl => $tbl, tbl => $tbl,
func => 'SHA1', func => 'SHA1',
cols => [qw(film_id title)],
sep => '%',
), ),
q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))}, q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
'Separator', 'Separator',
); );
@ARGV = ('--columns', 'film_id,title', qw(--separator '%'));
$o->get_opts();
is( is(
$c->make_row_checksum( $c->make_row_checksum(
tbl => $tbl, tbl => $tbl,
func => 'SHA1', func => 'SHA1',
cols => [qw(film_id title)],
sep => "'%'",
), ),
q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))}, q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))},
'Bad separator', 'Bad separator',
); );
@ARGV = ('--columns', 'film_id,title', qw(--separator ''));
$o->get_opts();
is( is(
$c->make_row_checksum( $c->make_row_checksum(
tbl => $tbl, tbl => $tbl,
@@ -208,6 +214,9 @@ is(
'Really bad separator', 'Really bad separator',
); );
@ARGV = qw();
$o->get_opts();
# sakila.rental # sakila.rental
$tbl = { $tbl = {
db => 'sakila', db => 'sakila',
@@ -224,6 +233,9 @@ is(
'FLOAT column is like any other', 'FLOAT column is like any other',
); );
@ARGV = qw(--float-precision 5);
$o->get_opts();
is( is(
$c->make_row_checksum( $c->make_row_checksum(
tbl => $tbl, tbl => $tbl,
@@ -241,25 +253,31 @@ $tbl = {
tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.film.sql')), tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.film.sql')),
}; };
@ARGV = qw(--trim);
$o->get_opts();
like( like(
$c->make_row_checksum( $c->make_row_checksum(
tbl => $tbl, tbl => $tbl,
func => 'SHA1', func => 'SHA1',
trim => 1, trim => 0,
), ),
qr{TRIM\(`title`\)}, qr{TRIM\(`title`\)},
'VARCHAR column is trimmed', 'VARCHAR column is trimmed',
); );
@ARGV = qw();
$o->get_opts();
# ############################################################################ # ############################################################################
# make_chunk_checksum # make_chunk_checksum
# ############################################################################ # ############################################################################
@ARGV = qw(--columns film_id --no-optimize-xor);
$o->get_opts();
is( is(
$c->make_chunk_checksum( $c->make_chunk_checksum(
tbl => $tbl, tbl => $tbl,
func => 'SHA1', func => 'SHA1',
crc_width => 40, crc_width => 40,
cols => [qw(film_id)],
crc_type => 'varchar', crc_type => 'varchar',
), ),
q{COUNT(*) AS cnt, } q{COUNT(*) AS cnt, }
@@ -277,7 +295,6 @@ is(
tbl => $tbl, tbl => $tbl,
func => 'FNV_64', func => 'FNV_64',
crc_width => 99, crc_width => 99,
cols => [qw(film_id)],
crc_type => 'bigint', crc_type => 'bigint',
), ),
q{COUNT(*) AS cnt, } q{COUNT(*) AS cnt, }
@@ -290,7 +307,6 @@ is(
tbl => $tbl, tbl => $tbl,
func => 'FNV_64', func => 'FNV_64',
crc_width => 99, crc_width => 99,
cols => [qw(film_id)],
buffer => 1, buffer => 1,
crc_type => 'bigint', crc_type => 'bigint',
), ),
@@ -304,7 +320,6 @@ is(
tbl => $tbl, tbl => $tbl,
func => 'CRC32', func => 'CRC32',
crc_width => 99, crc_width => 99,
cols => [qw(film_id)],
buffer => 1, buffer => 1,
crc_type => 'int', crc_type => 'int',
), ),
@@ -313,6 +328,9 @@ is(
'sakila.film CRC32', 'sakila.film CRC32',
); );
@ARGV = qw();
$o->get_opts();
# ############################################################################# # #############################################################################
# Sandbox tests. # Sandbox tests.
# ############################################################################# # #############################################################################
@@ -391,18 +409,13 @@ $tbl = {
tbl => 'issue_94', tbl => 'issue_94',
tbl_struct => $tp->parse($du->get_create_table($dbh, $q, 'test', 'issue_94')), tbl_struct => $tp->parse($du->get_create_table($dbh, $q, 'test', 'issue_94')),
}; };
@ARGV = qw(--ignore-columns c);
$o->get_opts();
my $query = $c->make_chunk_checksum( my $query = $c->make_chunk_checksum(
tbl => $tbl, tbl => $tbl,
func => 'CRC32', func => 'CRC32',
crc_width => 16, crc_width => 16,
crc_type => 'int', crc_type => 'int',
opt_slice => undef,
cols => undef,
sep => '#',
replicate => undef,
precision => undef,
trim => undef,
ignorecols => {'c'=>1},
); );
is( is(
$query, $query,