From bd6e38149418e781af1715bca2500ac3d8f8452d Mon Sep 17 00:00:00 2001 From: Carlos Salguero Date: Thu, 25 May 2017 17:59:25 -0300 Subject: [PATCH 1/2] PT-136 pt-table-checksum shows diffs when table has columns with different collation/charset --- bin/pt-table-checksum | 20 +++++-- t/pt-table-checksum/pt-136.t | 55 +++++++++++++++++++ .../samples/char-chunk-ascii-explain.txt | 2 +- t/pt-table-checksum/samples/chunkidx005.txt | 2 +- t/pt-table-checksum/samples/dot.out | 2 +- .../samples/oversize-chunks.txt | 2 +- t/pt-table-checksum/samples/pt-136.sql | 6 ++ 7 files changed, 81 insertions(+), 8 deletions(-) create mode 100644 t/pt-table-checksum/pt-136.t create mode 100644 t/pt-table-checksum/samples/pt-136.sql diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum index a2a8485a..8bff6ffb 100755 --- a/bin/pt-table-checksum +++ b/bin/pt-table-checksum @@ -5831,17 +5831,29 @@ sub make_row_checksum { $sep =~ s/'//g; $sep ||= '#'; + my @converted_cols; + for my $col(@{$cols->{select}}) { + my $colname = $col; + $colname =~ s/`//g; + my $type = $tbl_struct->{type_for}->{$colname} || ''; + if ($type =~ m/^(CHAR|VARCHAR|BINARY|VARBINARY|BLOB|TEXT|ENUM|SET|JSON)$/i) { + push @converted_cols, "convert($col using utf8mb4)"; + } else { + push @converted_cols, "$col"; + } + } + my @nulls = grep { $cols->{allowed}->{$_} } @{$tbl_struct->{null_cols}}; if ( @nulls ) { my $bitmap = "CONCAT(" . join(', ', map { 'ISNULL(' . $q->quote($_) . ')' } @nulls) . ")"; - push @{$cols->{select}}, $bitmap; + push @converted_cols, $bitmap; } - $query .= @{$cols->{select}} > 1 - ? "$func(CONCAT_WS('$sep', " . join(', ', @{$cols->{select}}) . '))' - : "$func($cols->{select}->[0])"; + $query .= scalar @converted_cols > 1 + ? "$func(CONCAT_WS('$sep', " . join(', ', @converted_cols) . '))' + : "$func($converted_cols[0])"; } else { my $fnv_func = uc $func; diff --git a/t/pt-table-checksum/pt-136.t b/t/pt-table-checksum/pt-136.t new file mode 100644 index 00000000..7124c9df --- /dev/null +++ b/t/pt-table-checksum/pt-136.t @@ -0,0 +1,55 @@ +#!/usr/bin/env perl + +BEGIN { + die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" + unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; + unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; +}; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use Test::More; + +use PerconaTest; +use Sandbox; +use SqlModes; +require "$trunk/bin/pt-table-checksum"; + +my $dp = new DSNParser(opts=>$dsn_opts); +my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); +my $dbh = $sb->get_dbh_for('master'); + +if ( !$dbh ) { + plan skip_all => 'Cannot connect to sandbox master'; +} +else { + plan tests => 2; +} + +$sb->load_file('master', 't/pt-table-checksum/samples/pt-136.sql'); +# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic +# so we need to specify --set-vars innodb_lock_wait_timeout=3 else the tool will die. +# And --max-load "" prevents waiting for status variables. +my $master_dsn = $sb->dsn_for('master'); +my @args = ($master_dsn); +my $output; +my $exit_status; + +$output = output( + sub { $exit_status = pt_table_checksum::main(@args) }, + stderr => 1, +); + +is( + $exit_status, + 0, + "Checksum columns with mismatching collaitons", +); + +# ############################################################################# +# Done. +# ############################################################################# +$sb->wipe_clean($dbh); +ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); +exit; diff --git a/t/pt-table-checksum/samples/char-chunk-ascii-explain.txt b/t/pt-table-checksum/samples/char-chunk-ascii-explain.txt index 6b038e14..d497a349 100644 --- a/t/pt-table-checksum/samples/char-chunk-ascii-explain.txt +++ b/t/pt-table-checksum/samples/char-chunk-ascii-explain.txt @@ -2,7 +2,7 @@ -- test.ascii -- -REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `i`, `c`)) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `test`.`ascii` FORCE INDEX(`c`) WHERE ((`c` >= ?)) AND ((`c` <= ?)) /*checksum chunk*/ +REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `i`, convert(`c` using utf8mb4))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `test`.`ascii` FORCE INDEX(`c`) WHERE ((`c` >= ?)) AND ((`c` <= ?)) /*checksum chunk*/ REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*), '0' FROM `test`.`ascii` FORCE INDEX(`c`) WHERE ((`c` < ?)) ORDER BY `c` /*past lower chunk*/ diff --git a/t/pt-table-checksum/samples/chunkidx005.txt b/t/pt-table-checksum/samples/chunkidx005.txt index a6177f19..e4b0d417 100644 --- a/t/pt-table-checksum/samples/chunkidx005.txt +++ b/t/pt-table-checksum/samples/chunkidx005.txt @@ -2,7 +2,7 @@ -- sakila.city -- -REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `city_id`, `city`, `country_id`, UNIX_TIMESTAMP(`last_update`))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `sakila`.`city` FORCE INDEX(`PRIMARY`) WHERE ((`city_id` >= ?)) AND ((`city_id` <= ?)) AND (country_id > 100) /*checksum chunk*/ +REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `city_id`, convert(`city` using utf8mb4), `country_id`, UNIX_TIMESTAMP(`last_update`))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `sakila`.`city` FORCE INDEX(`PRIMARY`) WHERE ((`city_id` >= ?)) AND ((`city_id` <= ?)) AND (country_id > 100) /*checksum chunk*/ REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*), '0' FROM `sakila`.`city` FORCE INDEX(`PRIMARY`) WHERE ((`city_id` < ?)) AND (country_id > 100) ORDER BY `city_id` /*past lower chunk*/ diff --git a/t/pt-table-checksum/samples/dot.out b/t/pt-table-checksum/samples/dot.out index 003d2880..b7df5c73 100644 --- a/t/pt-table-checksum/samples/dot.out +++ b/t/pt-table-checksum/samples/dot.out @@ -2,7 +2,7 @@ -- test.t -- -REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `no.`, `foo.bar`, CONCAT(ISNULL(`foo.bar`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `test`.`t` FORCE INDEX(`PRIMARY`) WHERE ((`no.` >= ?)) AND ((`no.` <= ?)) /*checksum chunk*/ +REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', convert(`no.` using utf8mb4), convert(`foo.bar` using utf8mb4), CONCAT(ISNULL(`foo.bar`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `test`.`t` FORCE INDEX(`PRIMARY`) WHERE ((`no.` >= ?)) AND ((`no.` <= ?)) /*checksum chunk*/ REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*), '0' FROM `test`.`t` FORCE INDEX(`PRIMARY`) WHERE ((`no.` < ?)) ORDER BY `no.` /*past lower chunk*/ diff --git a/t/pt-table-checksum/samples/oversize-chunks.txt b/t/pt-table-checksum/samples/oversize-chunks.txt index 21fcb183..3888ce25 100644 --- a/t/pt-table-checksum/samples/oversize-chunks.txt +++ b/t/pt-table-checksum/samples/oversize-chunks.txt @@ -2,7 +2,7 @@ -- osc.t2 -- -REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `c`, CONCAT(ISNULL(`c`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `osc`.`t2` FORCE INDEX(`c`) WHERE (((? IS NULL OR `c` >= ?))) AND (((? IS NULL OR `c` <= ?))) /*checksum chunk*/ +REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', convert(`c` using utf8mb4), CONCAT(ISNULL(`c`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `osc`.`t2` FORCE INDEX(`c`) WHERE (((? IS NULL OR `c` >= ?))) AND (((? IS NULL OR `c` <= ?))) /*checksum chunk*/ REPLACE INTO `percona`.`checksums` (db, tbl, chunk, chunk_index, lower_boundary, upper_boundary, this_cnt, this_crc) SELECT ?, ?, ?, ?, ?, ?, COUNT(*), '0' FROM `osc`.`t2` FORCE INDEX(`c`) WHERE ((((? IS NOT NULL AND `c` IS NULL) OR (`c` < ?)))) ORDER BY `c` /*past lower chunk*/ diff --git a/t/pt-table-checksum/samples/pt-136.sql b/t/pt-table-checksum/samples/pt-136.sql new file mode 100644 index 00000000..1c259a71 --- /dev/null +++ b/t/pt-table-checksum/samples/pt-136.sql @@ -0,0 +1,6 @@ +CREATE DATABASE db1; +USE db1; +CREATE TABLE cp1251(f1 VARCHAR(100) CHARACTER SET LATIN1, f2 VARCHAR(100) CHARACTER SET CP1251) ENGINE=InnoDB; +SET NAMES UTF8; +INSERT INTO cp1251 VALUES('Sveta', 'Света'); + From 37860ae28f171ba00a3442b8f585d8c687fddc7b Mon Sep 17 00:00:00 2001 From: Carlos Salguero Date: Fri, 26 May 2017 11:54:44 -0300 Subject: [PATCH 2/2] Updated changelog --- Changelog | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Changelog b/Changelog index a70cd3fd..e4062296 100644 --- a/Changelog +++ b/Changelog @@ -2,13 +2,14 @@ Changelog for Percona Toolkit v3.0.4 - * Fixed bug PT-144 : Constraint name is too long + * Fixed bug PT-144 : Constraint name is too long (> 64 chars) * Fixed bug PT-143 : pt-archiver SELECT query fails because of primary key * Fixed bug PT-142 : pt-online-schema-change find_child_tables slow * Fixed bug PT-138 : Added --output-format option to pt-mongodb-summary + * Fixed bug PT-136 : pt-table-checksum fails with columns having different collation/charset * Feature PT-141 : pt-archiver archive records into csv file -v3.0.3 +v3.0.3 released 2017-05-19 * Fixed bug PT-133 : Sandbox won't start correctly if autocommit=0 in my.cnf * Fixed bug PT-132 : pt-online-schema-change should imply --no-drop-new-table