From 2e0f607589e411ce358fcf3089b946648eb4a56a Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Sat, 27 Aug 2011 09:23:36 -0600 Subject: [PATCH 1/6] Pass --where to get_range_statistics(). --- bin/pt-table-checksum | 1 + t/lib/TableChunker.t | 26 ++++++++++++++++++++++++- t/pt-table-checksum/basics.t | 19 +++++++++++++++++- t/pt-table-checksum/samples/where01.out | 3 +++ t/pt-table-checksum/samples/where01.sql | 26 +++++++++++++++++++++++++ 5 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 t/pt-table-checksum/samples/where01.out create mode 100644 t/pt-table-checksum/samples/where01.sql diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum index 1a8474f1..72cbc371 100755 --- a/bin/pt-table-checksum +++ b/bin/pt-table-checksum @@ -5560,6 +5560,7 @@ sub save_tbl_to_checksum { tbl => $tbl, chunk_col => $chunk_col, tbl_struct => $struct, + where => $final_o->get('where'), ); if ( !grep { !defined $params{$_} } qw(min max rows_in_range) ) { @chunks = $ch->calculate_chunks( diff --git a/t/lib/TableChunker.t b/t/lib/TableChunker.t index f3f93854..5591f80b 100644 --- a/t/lib/TableChunker.t +++ b/t/lib/TableChunker.t @@ -27,7 +27,7 @@ if ( !$dbh ) { plan skip_all => 'Cannot connect to sandbox master'; } else { - plan tests => 86; + plan tests => 88; } $sb->create_dbs($dbh, ['test']); @@ -1231,6 +1231,30 @@ SKIP: { ); }; +# ############################################################################ +# Bug 821673: pt-table-checksum doesn't included --where in min max queries +# ############################################################################ +$sb->load_file('master', "t/pt-table-checksum/samples/where01.sql"); +$t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'checksum_test') ); +%params = $c->get_range_statistics( + dbh => $dbh, + db => 'test', + tbl => 'checksum_test', + chunk_col => 'id', + tbl_struct => $t, + where => "date = '2011-03-03'", +); +is( + $params{min}, + 11, + 'MIN range stats with --where (bug 821673)' +); +is( + $params{max}, + 15, + 'MAX range stats with --where (bug 821673)' +); + # ############################################################################# # Done. # ############################################################################# diff --git a/t/pt-table-checksum/basics.t b/t/pt-table-checksum/basics.t index eacbc00f..d4b95e96 100644 --- a/t/pt-table-checksum/basics.t +++ b/t/pt-table-checksum/basics.t @@ -24,7 +24,7 @@ if ( !$master_dbh ) { plan skip_all => 'Cannot connect to sandbox master'; } else { - plan tests => 12; + plan tests => 13; } my ($output, $output2); @@ -120,6 +120,23 @@ ok( "--sleep doesn't sleep unless table is chunked" ); + +# ############################################################################ +# Bug 821673: pt-table-checksum doesn't included --where in min max queries +# ############################################################################ +$sb->load_file('master', "t/pt-table-checksum/samples/where01.sql"); + +ok( + no_diff( + sub { pt_table_checksum::main(@args, + qw(--no-zero-chunk --chunk-size 5), '--where', "date = '2011-03-03'"); + }, + "t/pt-table-checksum/samples/where01.out", + trf => "awk '{print \$1 \" \" \$2 \" \" \$3}'", + ), + "--where affects range stats (bug 821673)" +); + # ############################################################################# # Done. # ############################################################################# diff --git a/t/pt-table-checksum/samples/where01.out b/t/pt-table-checksum/samples/where01.out new file mode 100644 index 00000000..af2869aa --- /dev/null +++ b/t/pt-table-checksum/samples/where01.out @@ -0,0 +1,3 @@ +DATABASE TABLE CHUNK +test checksum_test 0 +test checksum_test 1 diff --git a/t/pt-table-checksum/samples/where01.sql b/t/pt-table-checksum/samples/where01.sql new file mode 100644 index 00000000..6cb22b24 --- /dev/null +++ b/t/pt-table-checksum/samples/where01.sql @@ -0,0 +1,26 @@ +drop database if exists test; +create database test; +use test; + +CREATE TABLE `checksum_test` ( + `id` int(11) NOT NULL DEFAULT '0', + `date` date DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB; + +INSERT INTO `checksum_test` VALUES + (1, '2011-03-01'), + (2, '2011-03-01'), + (3, '2011-03-01'), + (4, '2011-03-01'), + (5, '2011-03-01'), + (6, '2011-03-02'), + (7, '2011-03-02'), + (8, '2011-03-02'), + (9, '2011-03-02'), + (10, '2011-03-02'), + (11, '2011-03-03'), + (12, '2011-03-03'), + (13, '2011-03-03'), + (14, '2011-03-03'), + (15, '2011-03-03'); From a7ab27bb5463aba6561709be2300d13f68f9ddae Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Sat, 27 Aug 2011 11:17:28 -0600 Subject: [PATCH 2/6] Don't get min/max a 2nd time in _chunk_char(). Use where in _chunk_char(). Test char chunking world_city.name with real range stats. Add failing pt-table-checksum char chunk --where test. --- lib/TableChunker.pm | 19 ++++---- t/lib/TableChunker.t | 60 ++++++++++++++++++++----- t/pt-table-checksum/basics.t | 22 ++++++++- t/pt-table-checksum/samples/where02.out | 0 t/pt-table-checksum/samples/where02.sql | 26 +++++++++++ 5 files changed, 105 insertions(+), 22 deletions(-) create mode 100644 t/pt-table-checksum/samples/where02.out create mode 100644 t/pt-table-checksum/samples/where02.sql diff --git a/lib/TableChunker.pm b/lib/TableChunker.pm index e2afbd16..362e6234 100644 --- a/lib/TableChunker.pm +++ b/lib/TableChunker.pm @@ -204,6 +204,7 @@ sub find_chunk_columns { # exact - Use exact chunk_size? Use approximates is not. # tries - Fetch up to this many rows to find a non-zero value # chunk_range - Make chunk range open (default) or openclosed +# where - WHERE clause. # # Returns: # Array of WHERE predicates like "`col` >= '10' AND `col` < '20'", @@ -510,6 +511,9 @@ sub _chunk_numeric { # # chunk_size - requested size of each chunk # +# Optional Arguments: +# where - WHERE clause. +# # Returns: # Array of chunker info that uses to create # chunks, like: @@ -522,7 +526,7 @@ sub _chunk_numeric { # (end code) sub _chunk_char { my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size); + my @required_args = qw(dbh db tbl tbl_struct chunk_col min max rows_in_range chunk_size); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless defined $args{$arg}; } @@ -533,15 +537,8 @@ sub _chunk_char { my $row; my $sql; - # Get what MySQL says are the min and max column values. - # For example, is 'a' or 'A' the min according to MySQL? - $sql = "SELECT MIN($chunk_col), MAX($chunk_col) FROM $db_tbl " - . "ORDER BY `$chunk_col`"; - MKDEBUG && _d($dbh, $sql); - $row = $dbh->selectrow_arrayref($sql); - my ($min_col, $max_col) = ($row->[0], $row->[1]); - # Get the character codes between the min and max column values. + my ($min_col, $max_col) = @{args}{qw(min max)}; $sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord"; MKDEBUG && _d($dbh, $sql); my $ord_sth = $dbh->prepare($sql); # avoid quoting issues @@ -642,7 +639,9 @@ sub _chunk_char { # [ant, apple, azur, boy]. We assume data is more evenly distributed # than not so we use the minimum number of characters to express a chunk # size. - $sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl ORDER BY `$chunk_col`"; + $sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl " + . ($args{where} ? "WHERE $args{where} " : "") + . "ORDER BY `$chunk_col`"; MKDEBUG && _d($dbh, $sql); $row = $dbh->selectrow_arrayref($sql); my $max_col_len = $row->[0]; diff --git a/t/lib/TableChunker.t b/t/lib/TableChunker.t index 5591f80b..1b5934ff 100644 --- a/t/lib/TableChunker.t +++ b/t/lib/TableChunker.t @@ -27,7 +27,7 @@ if ( !$dbh ) { plan skip_all => 'Cannot connect to sandbox master'; } else { - plan tests => 88; + plan tests => 90; } $sb->create_dbs($dbh, ['test']); @@ -1174,21 +1174,27 @@ SKIP: { $sb->load_file('master', "t/lib/samples/char-chunking/world-city.sql", 'test'); $t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'world_city') ); +%params = $c->get_range_statistics( + dbh => $dbh, + db => 'test', + tbl => 'world_city', + chunk_col => 'name', + tbl_struct => $t, + chunk_size => '500', +); @chunks = $c->calculate_chunks( - tbl_struct => $t, - chunk_col => 'name', - min => 'A Coruña (La Coruña)', - max => '´s-Hertogenbosch', - rows_in_range => 4079, - chunk_size => 500, dbh => $dbh, db => 'test', tbl => 'world_city', + tbl_struct => $t, + chunk_col => 'name', + chunk_size => 500, + %params, ); ok( @chunks >= 9, "At least 9 char chunks on test.world_city.name" -); +) or print STDERR Dumper(\@chunks); my $n_rows = count_rows("test.world_city", "name", @chunks); is( @@ -1247,14 +1253,48 @@ $t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'checksum_test') ); is( $params{min}, 11, - 'MIN range stats with --where (bug 821673)' + 'MIN int range stats with --where (bug 821673)' ); is( $params{max}, 15, - 'MAX range stats with --where (bug 821673)' + 'MAX int range stats with --where (bug 821673)' ); +# char chunking +$sb->load_file('master', "t/pt-table-checksum/samples/where02.sql"); +$t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'checksum_test') ); +%params = $c->get_range_statistics( + dbh => $dbh, + db => 'test', + tbl => 'checksum_test', + chunk_col => 'id', + tbl_struct => $t, + where => "date = '2011-03-03'", +); +is( + $params{min}, + 'Apple', + 'MIN char range stats with --where (bug 821673)' +); +is( + $params{max}, + 'raspberry', + 'MAX char range stats with --where (bug 821673)' +); + +# It's difficult to construct a char chunk test where WHERE will matter. +#@chunks = $c->calculate_chunks( +# dbh => $dbh, +# db => 'test', +# tbl => 'checksum_test', +# tbl_struct => $t, +# chunk_col => 'id', +# chunk_size => 5, +# where => "date = '2011-03-03'", +# %params, +#); + # ############################################################################# # Done. # ############################################################################# diff --git a/t/pt-table-checksum/basics.t b/t/pt-table-checksum/basics.t index d4b95e96..9b598ef8 100644 --- a/t/pt-table-checksum/basics.t +++ b/t/pt-table-checksum/basics.t @@ -24,7 +24,7 @@ if ( !$master_dbh ) { plan skip_all => 'Cannot connect to sandbox master'; } else { - plan tests => 13; + plan tests => 14; } my ($output, $output2); @@ -134,7 +134,25 @@ ok( "t/pt-table-checksum/samples/where01.out", trf => "awk '{print \$1 \" \" \$2 \" \" \$3}'", ), - "--where affects range stats (bug 821673)" + "--where affects int range stats (bug 821673)" +); + +# Test it again with a varchar primary key. The resulting 5 rows are: +# | Apple | 2011-03-03 | +# | lemon | 2011-03-03 | +# | lime | 2011-03-03 | +# | pineapple | 2011-03-03 | +# | raspberry | 2011-03-03 | +$sb->load_file('master', "t/pt-table-checksum/samples/where02.sql"); +ok( + no_diff( + sub { pt_table_checksum::main(@args, + qw(--no-zero-chunk --chunk-size 5), '--where', "date = '2011-03-03'"); + }, + "t/pt-table-checksum/samples/where02.out", + trf => "awk '{print \$1 \" \" \$2 \" \" \$3}'", + ), + "--where affects char range stats (bug 821673)" ); # ############################################################################# diff --git a/t/pt-table-checksum/samples/where02.out b/t/pt-table-checksum/samples/where02.out new file mode 100644 index 00000000..e69de29b diff --git a/t/pt-table-checksum/samples/where02.sql b/t/pt-table-checksum/samples/where02.sql new file mode 100644 index 00000000..1ea5ed44 --- /dev/null +++ b/t/pt-table-checksum/samples/where02.sql @@ -0,0 +1,26 @@ +drop database if exists test; +create database test; +use test; + +CREATE TABLE `checksum_test` ( + `id` varchar(255) NOT NULL, + `date` date DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB; + +INSERT INTO `checksum_test` VALUES + ('Apple', '2011-03-03'), + ('banana', '2011-03-01'), + ('orange', '2011-03-01'), + ('grape', '2011-03-01'), + ('kiwi', '2011-03-01'), + ('strawberry', '2011-03-02'), + ('peach', '2011-03-02'), + ('mango', '2011-03-02'), + ('tomato', '2011-03-02'), + ('nectarine', '2011-03-02'), + ('pear', '2011-03-01'), + ('lemon', '2011-03-03'), + ('lime', '2011-03-03'), + ('pineapple', '2011-03-03'), + ('raspberry', '2011-03-03'); From 3c6edf6d71f2397eae7cf1f8056cd2921614b590 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Sat, 27 Aug 2011 11:26:00 -0600 Subject: [PATCH 3/6] Update TableChunker in pt-table-checksum. Include COUNT in test samples. --- bin/pt-table-checksum | 13 +++++-------- t/pt-table-checksum/basics.t | 4 ++-- t/pt-table-checksum/samples/where01.out | 6 +++--- t/pt-table-checksum/samples/where02.out | 4 ++++ 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum index 72cbc371..3c01380b 100755 --- a/bin/pt-table-checksum +++ b/bin/pt-table-checksum @@ -2863,7 +2863,7 @@ sub _chunk_numeric { sub _chunk_char { my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size); + my @required_args = qw(dbh db tbl tbl_struct chunk_col min max rows_in_range chunk_size); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless defined $args{$arg}; } @@ -2874,12 +2874,7 @@ sub _chunk_char { my $row; my $sql; - $sql = "SELECT MIN($chunk_col), MAX($chunk_col) FROM $db_tbl " - . "ORDER BY `$chunk_col`"; - MKDEBUG && _d($dbh, $sql); - $row = $dbh->selectrow_arrayref($sql); - my ($min_col, $max_col) = ($row->[0], $row->[1]); - + my ($min_col, $max_col) = @{args}{qw(min max)}; $sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord"; MKDEBUG && _d($dbh, $sql); my $ord_sth = $dbh->prepare($sql); # avoid quoting issues @@ -2950,7 +2945,9 @@ sub _chunk_char { MKDEBUG && _d("Base", $base, "chars:", @chars); - $sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl ORDER BY `$chunk_col`"; + $sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl " + . ($args{where} ? "WHERE $args{where} " : "") + . "ORDER BY `$chunk_col`"; MKDEBUG && _d($dbh, $sql); $row = $dbh->selectrow_arrayref($sql); my $max_col_len = $row->[0]; diff --git a/t/pt-table-checksum/basics.t b/t/pt-table-checksum/basics.t index 9b598ef8..749f75b3 100644 --- a/t/pt-table-checksum/basics.t +++ b/t/pt-table-checksum/basics.t @@ -132,7 +132,7 @@ ok( qw(--no-zero-chunk --chunk-size 5), '--where', "date = '2011-03-03'"); }, "t/pt-table-checksum/samples/where01.out", - trf => "awk '{print \$1 \" \" \$2 \" \" \$3}'", + trf => "awk '{print \$1 \" \" \$2 \" \" \$3 \" \" \$6}'", ), "--where affects int range stats (bug 821673)" ); @@ -150,7 +150,7 @@ ok( qw(--no-zero-chunk --chunk-size 5), '--where', "date = '2011-03-03'"); }, "t/pt-table-checksum/samples/where02.out", - trf => "awk '{print \$1 \" \" \$2 \" \" \$3}'", + trf => "awk '{print \$1 \" \" \$2 \" \" \$3 \" \" \$6}'", ), "--where affects char range stats (bug 821673)" ); diff --git a/t/pt-table-checksum/samples/where01.out b/t/pt-table-checksum/samples/where01.out index af2869aa..8709fa8a 100644 --- a/t/pt-table-checksum/samples/where01.out +++ b/t/pt-table-checksum/samples/where01.out @@ -1,3 +1,3 @@ -DATABASE TABLE CHUNK -test checksum_test 0 -test checksum_test 1 +DATABASE TABLE CHUNK COUNT +test checksum_test 0 2 +test checksum_test 1 3 diff --git a/t/pt-table-checksum/samples/where02.out b/t/pt-table-checksum/samples/where02.out index e69de29b..e621908f 100644 --- a/t/pt-table-checksum/samples/where02.out +++ b/t/pt-table-checksum/samples/where02.out @@ -0,0 +1,4 @@ +DATABASE TABLE CHUNK COUNT +test checksum_test 0 1 +test checksum_test 1 4 +test checksum_test 2 0 From c58bb85773be8ce2f6e4824535ae8aae20171d61 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Sat, 27 Aug 2011 11:41:58 -0600 Subject: [PATCH 4/6] Pass --where to calculate_chunks(). --- bin/pt-table-checksum | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum index 3c01380b..e5969c7e 100755 --- a/bin/pt-table-checksum +++ b/bin/pt-table-checksum @@ -5569,6 +5569,7 @@ sub save_tbl_to_checksum { chunk_size => $rows_per_chunk, zero_chunk => $final_o->get('zero-chunk'), chunk_range => $final_o->get('chunk-range'), + where => $final_o->get('where'), %params, ); $maxval = $params{max}; From b2a982f9eae590e91f424911fb4a3f0ebca9e74d Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Sat, 27 Aug 2011 12:22:49 -0600 Subject: [PATCH 5/6] Update a test result that is now correct. --- t/pt-table-checksum/chunk_index.t | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/t/pt-table-checksum/chunk_index.t b/t/pt-table-checksum/chunk_index.t index c5286a53..96e217ee 100644 --- a/t/pt-table-checksum/chunk_index.t +++ b/t/pt-table-checksum/chunk_index.t @@ -127,12 +127,8 @@ $output = output( is( $output, -"issue_519 t SELECT /*issue_519.t:1/5*/ 0 AS chunk_num, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `i`, `y`, `t`, CONCAT(ISNULL(`t`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `issue_519`.`t` FORCE INDEX (`y`) WHERE (`y` = 0) AND ((y > 2009)) -issue_519 t `y` = 0 -issue_519 t `y` > 0 AND `y` < '2003' -issue_519 t `y` >= '2003' AND `y` < '2006' -issue_519 t `y` >= '2006' AND `y` < '2009' -issue_519 t `y` >= '2009' +"issue_519 t SELECT /*issue_519.t:1/1*/ 0 AS chunk_num, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `i`, `y`, `t`, CONCAT(ISNULL(`t`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `issue_519`.`t` FORCE INDEX (`y`) WHERE (1=1) AND ((y > 2009)) +issue_519 t 1=1 ", "Auto-chosen --chunk-index for --where (issue 378)" ); From d567e3d282fbb5d714c8f8e5c560ac3aa41a39ae Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Sat, 27 Aug 2011 12:27:11 -0600 Subject: [PATCH 6/6] Restart slave1 so subsequent tests don't try to connect to phantom 12347. --- t/pt-table-checksum/throttle.t | 2 ++ 1 file changed, 2 insertions(+) diff --git a/t/pt-table-checksum/throttle.t b/t/pt-table-checksum/throttle.t index 0cbc568b..01676bc6 100644 --- a/t/pt-table-checksum/throttle.t +++ b/t/pt-table-checksum/throttle.t @@ -281,6 +281,8 @@ is( # Done. # ############################################################################# diag(`$trunk/sandbox/stop-sandbox 12347 >/dev/null`); +diag(`/tmp/12346/stop >/dev/null`); # Start/stop clears SHOW SLAVE HOSTS. +diag(`/tmp/12346/start >/dev/null`); $sb->wipe_clean($master_dbh); diag(`$trunk/sandbox/test-env reset >/dev/null`); exit;