From 676a7aa20f7760b39677457024e2520d5eb12001 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Wed, 14 Sep 2011 10:16:43 -0600 Subject: [PATCH] Fix cols, sep, trim, and float precision. Re-add --columns. --- bin/pt-table-checksum | 6 ++++ lib/RowChecksum.pm | 33 +++++++----------- t/lib/RowChecksum.t | 81 +++++++++++++++++++++++++------------------ 3 files changed, 65 insertions(+), 55 deletions(-) diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum index c5ae1c42..a42fe21d 100755 --- a/bin/pt-table-checksum +++ b/bin/pt-table-checksum @@ -5979,6 +5979,12 @@ they are oversize, you might want to specify a value larger than 2. You can disable oversize chunk checking by specifying L<"--chunk-size-limit"> 0. +=item --columns + +short form: -c; type: array; group: Filter + +Checksum only this comma-separated list of columns. + =item --config type: Array; group: Config diff --git a/lib/RowChecksum.pm b/lib/RowChecksum.pm index c5c14f11..c415a069 100644 --- a/lib/RowChecksum.pm +++ b/lib/RowChecksum.pm @@ -45,17 +45,11 @@ sub new { # Sub: make_row_checksum # Make a SELECT column list to checksum a row. # -# Parameters: -# %args - Arguments -# # Required Arguments: # tbl - Table ref # # Optional Arguments: -# sep - Separator for CONCAT_WS(); default # -# cols - Arrayref of columns to checksum -# trim - Wrap VARCHAR cols in TRIM() for v4/v5 compatibility -# ignorecols - Arrayref of columns to exclude from checksum +# no_cols - Don't append columns to list oustide of functions. # # Returns: # Column list for SELECT @@ -72,31 +66,28 @@ sub make_row_checksum { my $tbl_struct = $tbl->{tbl_struct}; my $func = $args{func} || uc($o->get('function')); - my $sep = $args{sep} || '#'; + my $trim = $o->get('trim'); + my $float_precision = $o->get('float-precision'); + my $sep = $o->get('separator') || '#'; $sep =~ s/'//g; $sep ||= '#'; + + my $ignore_col = $o->get('ignore-columns') || {}; + my $all_cols = $o->get('columns') || $tbl_struct->{cols}; + my %cols = map { lc($_) => 1 } grep { !$ignore_col->{$_} } @$all_cols; - # This allows a simpler grep when building %cols below. - my $ignorecols = $args{ignorecols} || {}; - - # Generate the expression that will turn a row into a checksum. - # Choose columns. Normalize query results: make FLOAT and TIMESTAMP - # stringify uniformly. - my %cols = map { lc($_) => 1 } - grep { !exists $ignorecols->{$_} } - ($args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}}); my %seen; my @cols = map { - my $type = $tbl_struct->{type_for}->{$_}; + my $type = $tbl_struct->{type_for}->{$_}; my $result = $q->quote($_); if ( $type eq 'timestamp' ) { $result .= ' + 0'; } - elsif ( $args{float_precision} && $type =~ m/float|double/ ) { - $result = "ROUND($result, $args{float_precision})"; + elsif ( $float_precision && $type =~ m/float|double/ ) { + $result = "ROUND($result, $float_precision)"; } - elsif ( $args{trim} && $type =~ m/varchar/ ) { + elsif ( $trim && $type =~ m/varchar/ ) { $result = "TRIM($result)"; } $result; diff --git a/t/lib/RowChecksum.t b/t/lib/RowChecksum.t index 835786f8..373663bc 100644 --- a/t/lib/RowChecksum.t +++ b/t/lib/RowChecksum.t @@ -155,6 +155,8 @@ is( 'FNV_64 query for sakila.film', ); +@ARGV = qw(--columns film_id); +$o->get_opts(); is( $c->make_row_checksum( tbl => $tbl, @@ -165,6 +167,8 @@ is( 'SHA1 query for sakila.film with only one column', ); +@ARGV = qw(--columns FILM_ID); +$o->get_opts(); is( $c->make_row_checksum( tbl => $tbl, @@ -175,28 +179,30 @@ is( 'Column names are case-insensitive', ); +@ARGV = ('--columns', 'film_id,title', qw(--separator %)); +$o->get_opts(); is( $c->make_row_checksum( tbl => $tbl, func => 'SHA1', - cols => [qw(film_id title)], - sep => '%', ), q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))}, 'Separator', ); +@ARGV = ('--columns', 'film_id,title', qw(--separator '%')); +$o->get_opts(); is( $c->make_row_checksum( tbl => $tbl, func => 'SHA1', - cols => [qw(film_id title)], - sep => "'%'", ), q{`film_id`, `title`, SHA1(CONCAT_WS('%', `film_id`, `title`))}, 'Bad separator', ); +@ARGV = ('--columns', 'film_id,title', qw(--separator '')); +$o->get_opts(); is( $c->make_row_checksum( tbl => $tbl, @@ -208,6 +214,9 @@ is( 'Really bad separator', ); +@ARGV = qw(); +$o->get_opts(); + # sakila.rental $tbl = { db => 'sakila', @@ -224,6 +233,9 @@ is( 'FLOAT column is like any other', ); + +@ARGV = qw(--float-precision 5); +$o->get_opts(); is( $c->make_row_checksum( tbl => $tbl, @@ -241,26 +253,32 @@ $tbl = { tbl_struct => $tp->parse(load_file('t/lib/samples/sakila.film.sql')), }; +@ARGV = qw(--trim); +$o->get_opts(); like( $c->make_row_checksum( tbl => $tbl, func => 'SHA1', - trim => 1, + trim => 0, ), qr{TRIM\(`title`\)}, 'VARCHAR column is trimmed', ); +@ARGV = qw(); +$o->get_opts(); + # ############################################################################ # make_chunk_checksum # ############################################################################ +@ARGV = qw(--columns film_id --no-optimize-xor); +$o->get_opts(); is( $c->make_chunk_checksum( - tbl => $tbl, - func => 'SHA1', - crc_width=> 40, - cols => [qw(film_id)], - crc_type => 'varchar', + tbl => $tbl, + func => 'SHA1', + crc_width => 40, + crc_type => 'varchar', ), q{COUNT(*) AS cnt, } . q{COALESCE(LOWER(CONCAT(LPAD(CONV(BIT_XOR(CAST(CONV(SUBSTRING(SHA1(`film_id`), 1, } @@ -274,11 +292,10 @@ is( is( $c->make_chunk_checksum( - tbl => $tbl, - func => 'FNV_64', - crc_width=> 99, - cols => [qw(film_id)], - crc_type => 'bigint', + tbl => $tbl, + func => 'FNV_64', + crc_width => 99, + crc_type => 'bigint', ), q{COUNT(*) AS cnt, } . q{COALESCE(LOWER(CONV(BIT_XOR(CAST(FNV_64(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc}, @@ -287,12 +304,11 @@ is( is( $c->make_chunk_checksum( - tbl => $tbl, - func => 'FNV_64', - crc_width=> 99, - cols => [qw(film_id)], - buffer => 1, - crc_type => 'bigint', + tbl => $tbl, + func => 'FNV_64', + crc_width => 99, + buffer => 1, + crc_type => 'bigint', ), q{COUNT(*) AS cnt, } . q{COALESCE(LOWER(CONV(BIT_XOR(CAST(FNV_64(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc}, @@ -301,18 +317,20 @@ is( is( $c->make_chunk_checksum( - tbl => $tbl, - func => 'CRC32', - crc_width=> 99, - cols => [qw(film_id)], - buffer => 1, - crc_type => 'int', + tbl => $tbl, + func => 'CRC32', + crc_width => 99, + buffer => 1, + crc_type => 'int', ), q{COUNT(*) AS cnt, } . q{COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(`film_id`) AS UNSIGNED)), 10, 16)), 0) AS crc}, 'sakila.film CRC32', ); +@ARGV = qw(); +$o->get_opts(); + # ############################################################################# # Sandbox tests. # ############################################################################# @@ -391,18 +409,13 @@ $tbl = { tbl => 'issue_94', tbl_struct => $tp->parse($du->get_create_table($dbh, $q, 'test', 'issue_94')), }; +@ARGV = qw(--ignore-columns c); +$o->get_opts(); my $query = $c->make_chunk_checksum( tbl => $tbl, func => 'CRC32', crc_width => 16, crc_type => 'int', - opt_slice => undef, - cols => undef, - sep => '#', - replicate => undef, - precision => undef, - trim => undef, - ignorecols => {'c'=>1}, ); is( $query,