diff --git a/lib/TableChunker.pm b/lib/TableChunker.pm index e2afbd16..362e6234 100644 --- a/lib/TableChunker.pm +++ b/lib/TableChunker.pm @@ -204,6 +204,7 @@ sub find_chunk_columns { # exact - Use exact chunk_size? Use approximates is not. # tries - Fetch up to this many rows to find a non-zero value # chunk_range - Make chunk range open (default) or openclosed +# where - WHERE clause. # # Returns: # Array of WHERE predicates like "`col` >= '10' AND `col` < '20'", @@ -510,6 +511,9 @@ sub _chunk_numeric { # # chunk_size - requested size of each chunk # +# Optional Arguments: +# where - WHERE clause. +# # Returns: # Array of chunker info that uses to create # chunks, like: @@ -522,7 +526,7 @@ sub _chunk_numeric { # (end code) sub _chunk_char { my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size); + my @required_args = qw(dbh db tbl tbl_struct chunk_col min max rows_in_range chunk_size); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless defined $args{$arg}; } @@ -533,15 +537,8 @@ sub _chunk_char { my $row; my $sql; - # Get what MySQL says are the min and max column values. - # For example, is 'a' or 'A' the min according to MySQL? - $sql = "SELECT MIN($chunk_col), MAX($chunk_col) FROM $db_tbl " - . "ORDER BY `$chunk_col`"; - MKDEBUG && _d($dbh, $sql); - $row = $dbh->selectrow_arrayref($sql); - my ($min_col, $max_col) = ($row->[0], $row->[1]); - # Get the character codes between the min and max column values. + my ($min_col, $max_col) = @{args}{qw(min max)}; $sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord"; MKDEBUG && _d($dbh, $sql); my $ord_sth = $dbh->prepare($sql); # avoid quoting issues @@ -642,7 +639,9 @@ sub _chunk_char { # [ant, apple, azur, boy]. We assume data is more evenly distributed # than not so we use the minimum number of characters to express a chunk # size. - $sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl ORDER BY `$chunk_col`"; + $sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl " + . ($args{where} ? "WHERE $args{where} " : "") + . "ORDER BY `$chunk_col`"; MKDEBUG && _d($dbh, $sql); $row = $dbh->selectrow_arrayref($sql); my $max_col_len = $row->[0]; diff --git a/t/lib/TableChunker.t b/t/lib/TableChunker.t index 5591f80b..1b5934ff 100644 --- a/t/lib/TableChunker.t +++ b/t/lib/TableChunker.t @@ -27,7 +27,7 @@ if ( !$dbh ) { plan skip_all => 'Cannot connect to sandbox master'; } else { - plan tests => 88; + plan tests => 90; } $sb->create_dbs($dbh, ['test']); @@ -1174,21 +1174,27 @@ SKIP: { $sb->load_file('master', "t/lib/samples/char-chunking/world-city.sql", 'test'); $t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'world_city') ); +%params = $c->get_range_statistics( + dbh => $dbh, + db => 'test', + tbl => 'world_city', + chunk_col => 'name', + tbl_struct => $t, + chunk_size => '500', +); @chunks = $c->calculate_chunks( - tbl_struct => $t, - chunk_col => 'name', - min => 'A Coruña (La Coruña)', - max => '´s-Hertogenbosch', - rows_in_range => 4079, - chunk_size => 500, dbh => $dbh, db => 'test', tbl => 'world_city', + tbl_struct => $t, + chunk_col => 'name', + chunk_size => 500, + %params, ); ok( @chunks >= 9, "At least 9 char chunks on test.world_city.name" -); +) or print STDERR Dumper(\@chunks); my $n_rows = count_rows("test.world_city", "name", @chunks); is( @@ -1247,14 +1253,48 @@ $t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'checksum_test') ); is( $params{min}, 11, - 'MIN range stats with --where (bug 821673)' + 'MIN int range stats with --where (bug 821673)' ); is( $params{max}, 15, - 'MAX range stats with --where (bug 821673)' + 'MAX int range stats with --where (bug 821673)' ); +# char chunking +$sb->load_file('master', "t/pt-table-checksum/samples/where02.sql"); +$t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'checksum_test') ); +%params = $c->get_range_statistics( + dbh => $dbh, + db => 'test', + tbl => 'checksum_test', + chunk_col => 'id', + tbl_struct => $t, + where => "date = '2011-03-03'", +); +is( + $params{min}, + 'Apple', + 'MIN char range stats with --where (bug 821673)' +); +is( + $params{max}, + 'raspberry', + 'MAX char range stats with --where (bug 821673)' +); + +# It's difficult to construct a char chunk test where WHERE will matter. +#@chunks = $c->calculate_chunks( +# dbh => $dbh, +# db => 'test', +# tbl => 'checksum_test', +# tbl_struct => $t, +# chunk_col => 'id', +# chunk_size => 5, +# where => "date = '2011-03-03'", +# %params, +#); + # ############################################################################# # Done. # ############################################################################# diff --git a/t/pt-table-checksum/basics.t b/t/pt-table-checksum/basics.t index d4b95e96..9b598ef8 100644 --- a/t/pt-table-checksum/basics.t +++ b/t/pt-table-checksum/basics.t @@ -24,7 +24,7 @@ if ( !$master_dbh ) { plan skip_all => 'Cannot connect to sandbox master'; } else { - plan tests => 13; + plan tests => 14; } my ($output, $output2); @@ -134,7 +134,25 @@ ok( "t/pt-table-checksum/samples/where01.out", trf => "awk '{print \$1 \" \" \$2 \" \" \$3}'", ), - "--where affects range stats (bug 821673)" + "--where affects int range stats (bug 821673)" +); + +# Test it again with a varchar primary key. The resulting 5 rows are: +# | Apple | 2011-03-03 | +# | lemon | 2011-03-03 | +# | lime | 2011-03-03 | +# | pineapple | 2011-03-03 | +# | raspberry | 2011-03-03 | +$sb->load_file('master', "t/pt-table-checksum/samples/where02.sql"); +ok( + no_diff( + sub { pt_table_checksum::main(@args, + qw(--no-zero-chunk --chunk-size 5), '--where', "date = '2011-03-03'"); + }, + "t/pt-table-checksum/samples/where02.out", + trf => "awk '{print \$1 \" \" \$2 \" \" \$3}'", + ), + "--where affects char range stats (bug 821673)" ); # ############################################################################# diff --git a/t/pt-table-checksum/samples/where02.out b/t/pt-table-checksum/samples/where02.out new file mode 100644 index 00000000..e69de29b diff --git a/t/pt-table-checksum/samples/where02.sql b/t/pt-table-checksum/samples/where02.sql new file mode 100644 index 00000000..1ea5ed44 --- /dev/null +++ b/t/pt-table-checksum/samples/where02.sql @@ -0,0 +1,26 @@ +drop database if exists test; +create database test; +use test; + +CREATE TABLE `checksum_test` ( + `id` varchar(255) NOT NULL, + `date` date DEFAULT NULL, + PRIMARY KEY (`id`) +) ENGINE=InnoDB; + +INSERT INTO `checksum_test` VALUES + ('Apple', '2011-03-03'), + ('banana', '2011-03-01'), + ('orange', '2011-03-01'), + ('grape', '2011-03-01'), + ('kiwi', '2011-03-01'), + ('strawberry', '2011-03-02'), + ('peach', '2011-03-02'), + ('mango', '2011-03-02'), + ('tomato', '2011-03-02'), + ('nectarine', '2011-03-02'), + ('pear', '2011-03-01'), + ('lemon', '2011-03-03'), + ('lime', '2011-03-03'), + ('pineapple', '2011-03-03'), + ('raspberry', '2011-03-03');