Don't get min/max a 2nd time in _chunk_char(). Use where in _chunk_char(). Test char chunking world_city.name with real range stats. Add failing pt-table-checksum char chunk --where test.

This commit is contained in:
Daniel Nichter
2011-08-27 11:17:28 -06:00
parent 2e0f607589
commit a7ab27bb54
5 changed files with 105 additions and 22 deletions

View File

@@ -204,6 +204,7 @@ sub find_chunk_columns {
# exact - Use exact chunk_size? Use approximates is not. # exact - Use exact chunk_size? Use approximates is not.
# tries - Fetch up to this many rows to find a non-zero value # tries - Fetch up to this many rows to find a non-zero value
# chunk_range - Make chunk range open (default) or openclosed # chunk_range - Make chunk range open (default) or openclosed
# where - WHERE clause.
# #
# Returns: # Returns:
# Array of WHERE predicates like "`col` >= '10' AND `col` < '20'", # Array of WHERE predicates like "`col` >= '10' AND `col` < '20'",
@@ -510,6 +511,9 @@ sub _chunk_numeric {
# <TableChunker::get_range_statistics()> # <TableChunker::get_range_statistics()>
# chunk_size - requested size of each chunk # chunk_size - requested size of each chunk
# #
# Optional Arguments:
# where - WHERE clause.
#
# Returns: # Returns:
# Array of chunker info that <calculate_chunks()> uses to create # Array of chunker info that <calculate_chunks()> uses to create
# chunks, like: # chunks, like:
@@ -522,7 +526,7 @@ sub _chunk_numeric {
# (end code) # (end code)
sub _chunk_char { sub _chunk_char {
my ( $self, %args ) = @_; my ( $self, %args ) = @_;
my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size); my @required_args = qw(dbh db tbl tbl_struct chunk_col min max rows_in_range chunk_size);
foreach my $arg ( @required_args ) { foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless defined $args{$arg}; die "I need a $arg argument" unless defined $args{$arg};
} }
@@ -533,15 +537,8 @@ sub _chunk_char {
my $row; my $row;
my $sql; my $sql;
# Get what MySQL says are the min and max column values.
# For example, is 'a' or 'A' the min according to MySQL?
$sql = "SELECT MIN($chunk_col), MAX($chunk_col) FROM $db_tbl "
. "ORDER BY `$chunk_col`";
MKDEBUG && _d($dbh, $sql);
$row = $dbh->selectrow_arrayref($sql);
my ($min_col, $max_col) = ($row->[0], $row->[1]);
# Get the character codes between the min and max column values. # Get the character codes between the min and max column values.
my ($min_col, $max_col) = @{args}{qw(min max)};
$sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord"; $sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord";
MKDEBUG && _d($dbh, $sql); MKDEBUG && _d($dbh, $sql);
my $ord_sth = $dbh->prepare($sql); # avoid quoting issues my $ord_sth = $dbh->prepare($sql); # avoid quoting issues
@@ -642,7 +639,9 @@ sub _chunk_char {
# [ant, apple, azur, boy]. We assume data is more evenly distributed # [ant, apple, azur, boy]. We assume data is more evenly distributed
# than not so we use the minimum number of characters to express a chunk # than not so we use the minimum number of characters to express a chunk
# size. # size.
$sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl ORDER BY `$chunk_col`"; $sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl "
. ($args{where} ? "WHERE $args{where} " : "")
. "ORDER BY `$chunk_col`";
MKDEBUG && _d($dbh, $sql); MKDEBUG && _d($dbh, $sql);
$row = $dbh->selectrow_arrayref($sql); $row = $dbh->selectrow_arrayref($sql);
my $max_col_len = $row->[0]; my $max_col_len = $row->[0];

View File

@@ -27,7 +27,7 @@ if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox master'; plan skip_all => 'Cannot connect to sandbox master';
} }
else { else {
plan tests => 88; plan tests => 90;
} }
$sb->create_dbs($dbh, ['test']); $sb->create_dbs($dbh, ['test']);
@@ -1174,21 +1174,27 @@ SKIP: {
$sb->load_file('master', "t/lib/samples/char-chunking/world-city.sql", 'test'); $sb->load_file('master', "t/lib/samples/char-chunking/world-city.sql", 'test');
$t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'world_city') ); $t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'world_city') );
%params = $c->get_range_statistics(
dbh => $dbh,
db => 'test',
tbl => 'world_city',
chunk_col => 'name',
tbl_struct => $t,
chunk_size => '500',
);
@chunks = $c->calculate_chunks( @chunks = $c->calculate_chunks(
tbl_struct => $t,
chunk_col => 'name',
min => 'A Coruña (La Coruña)',
max => '´s-Hertogenbosch',
rows_in_range => 4079,
chunk_size => 500,
dbh => $dbh, dbh => $dbh,
db => 'test', db => 'test',
tbl => 'world_city', tbl => 'world_city',
tbl_struct => $t,
chunk_col => 'name',
chunk_size => 500,
%params,
); );
ok( ok(
@chunks >= 9, @chunks >= 9,
"At least 9 char chunks on test.world_city.name" "At least 9 char chunks on test.world_city.name"
); ) or print STDERR Dumper(\@chunks);
my $n_rows = count_rows("test.world_city", "name", @chunks); my $n_rows = count_rows("test.world_city", "name", @chunks);
is( is(
@@ -1247,14 +1253,48 @@ $t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'checksum_test') );
is( is(
$params{min}, $params{min},
11, 11,
'MIN range stats with --where (bug 821673)' 'MIN int range stats with --where (bug 821673)'
); );
is( is(
$params{max}, $params{max},
15, 15,
'MAX range stats with --where (bug 821673)' 'MAX int range stats with --where (bug 821673)'
); );
# char chunking
$sb->load_file('master', "t/pt-table-checksum/samples/where02.sql");
$t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'checksum_test') );
%params = $c->get_range_statistics(
dbh => $dbh,
db => 'test',
tbl => 'checksum_test',
chunk_col => 'id',
tbl_struct => $t,
where => "date = '2011-03-03'",
);
is(
$params{min},
'Apple',
'MIN char range stats with --where (bug 821673)'
);
is(
$params{max},
'raspberry',
'MAX char range stats with --where (bug 821673)'
);
# It's difficult to construct a char chunk test where WHERE will matter.
#@chunks = $c->calculate_chunks(
# dbh => $dbh,
# db => 'test',
# tbl => 'checksum_test',
# tbl_struct => $t,
# chunk_col => 'id',
# chunk_size => 5,
# where => "date = '2011-03-03'",
# %params,
#);
# ############################################################################# # #############################################################################
# Done. # Done.
# ############################################################################# # #############################################################################

View File

@@ -24,7 +24,7 @@ if ( !$master_dbh ) {
plan skip_all => 'Cannot connect to sandbox master'; plan skip_all => 'Cannot connect to sandbox master';
} }
else { else {
plan tests => 13; plan tests => 14;
} }
my ($output, $output2); my ($output, $output2);
@@ -134,7 +134,25 @@ ok(
"t/pt-table-checksum/samples/where01.out", "t/pt-table-checksum/samples/where01.out",
trf => "awk '{print \$1 \" \" \$2 \" \" \$3}'", trf => "awk '{print \$1 \" \" \$2 \" \" \$3}'",
), ),
"--where affects range stats (bug 821673)" "--where affects int range stats (bug 821673)"
);
# Test it again with a varchar primary key. The resulting 5 rows are:
# | Apple | 2011-03-03 |
# | lemon | 2011-03-03 |
# | lime | 2011-03-03 |
# | pineapple | 2011-03-03 |
# | raspberry | 2011-03-03 |
$sb->load_file('master', "t/pt-table-checksum/samples/where02.sql");
ok(
no_diff(
sub { pt_table_checksum::main(@args,
qw(--no-zero-chunk --chunk-size 5), '--where', "date = '2011-03-03'");
},
"t/pt-table-checksum/samples/where02.out",
trf => "awk '{print \$1 \" \" \$2 \" \" \$3}'",
),
"--where affects char range stats (bug 821673)"
); );
# ############################################################################# # #############################################################################

View File

View File

@@ -0,0 +1,26 @@
drop database if exists test;
create database test;
use test;
CREATE TABLE `checksum_test` (
`id` varchar(255) NOT NULL,
`date` date DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB;
INSERT INTO `checksum_test` VALUES
('Apple', '2011-03-03'),
('banana', '2011-03-01'),
('orange', '2011-03-01'),
('grape', '2011-03-01'),
('kiwi', '2011-03-01'),
('strawberry', '2011-03-02'),
('peach', '2011-03-02'),
('mango', '2011-03-02'),
('tomato', '2011-03-02'),
('nectarine', '2011-03-02'),
('pear', '2011-03-01'),
('lemon', '2011-03-03'),
('lime', '2011-03-03'),
('pineapple', '2011-03-03'),
('raspberry', '2011-03-03');