Merge fix-bug-821673.

This commit is contained in:
Daniel Nichter
2011-08-27 12:34:10 -06:00
10 changed files with 187 additions and 33 deletions

View File

@@ -2863,7 +2863,7 @@ sub _chunk_numeric {
sub _chunk_char {
my ( $self, %args ) = @_;
my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
my @required_args = qw(dbh db tbl tbl_struct chunk_col min max rows_in_range chunk_size);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless defined $args{$arg};
}
@@ -2874,12 +2874,7 @@ sub _chunk_char {
my $row;
my $sql;
$sql = "SELECT MIN($chunk_col), MAX($chunk_col) FROM $db_tbl "
. "ORDER BY `$chunk_col`";
MKDEBUG && _d($dbh, $sql);
$row = $dbh->selectrow_arrayref($sql);
my ($min_col, $max_col) = ($row->[0], $row->[1]);
my ($min_col, $max_col) = @{args}{qw(min max)};
$sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord";
MKDEBUG && _d($dbh, $sql);
my $ord_sth = $dbh->prepare($sql); # avoid quoting issues
@@ -2950,7 +2945,9 @@ sub _chunk_char {
MKDEBUG && _d("Base", $base, "chars:", @chars);
$sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl ORDER BY `$chunk_col`";
$sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl "
. ($args{where} ? "WHERE $args{where} " : "")
. "ORDER BY `$chunk_col`";
MKDEBUG && _d($dbh, $sql);
$row = $dbh->selectrow_arrayref($sql);
my $max_col_len = $row->[0];
@@ -5560,6 +5557,7 @@ sub save_tbl_to_checksum {
tbl => $tbl,
chunk_col => $chunk_col,
tbl_struct => $struct,
where => $final_o->get('where'),
);
if ( !grep { !defined $params{$_} } qw(min max rows_in_range) ) {
@chunks = $ch->calculate_chunks(
@@ -5571,6 +5569,7 @@ sub save_tbl_to_checksum {
chunk_size => $rows_per_chunk,
zero_chunk => $final_o->get('zero-chunk'),
chunk_range => $final_o->get('chunk-range'),
where => $final_o->get('where'),
%params,
);
$maxval = $params{max};

View File

@@ -204,6 +204,7 @@ sub find_chunk_columns {
# exact - Use exact chunk_size? Use approximates is not.
# tries - Fetch up to this many rows to find a non-zero value
# chunk_range - Make chunk range open (default) or openclosed
# where - WHERE clause.
#
# Returns:
# Array of WHERE predicates like "`col` >= '10' AND `col` < '20'",
@@ -510,6 +511,9 @@ sub _chunk_numeric {
# <TableChunker::get_range_statistics()>
# chunk_size - requested size of each chunk
#
# Optional Arguments:
# where - WHERE clause.
#
# Returns:
# Array of chunker info that <calculate_chunks()> uses to create
# chunks, like:
@@ -522,7 +526,7 @@ sub _chunk_numeric {
# (end code)
sub _chunk_char {
my ( $self, %args ) = @_;
my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
my @required_args = qw(dbh db tbl tbl_struct chunk_col min max rows_in_range chunk_size);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless defined $args{$arg};
}
@@ -533,15 +537,8 @@ sub _chunk_char {
my $row;
my $sql;
# Get what MySQL says are the min and max column values.
# For example, is 'a' or 'A' the min according to MySQL?
$sql = "SELECT MIN($chunk_col), MAX($chunk_col) FROM $db_tbl "
. "ORDER BY `$chunk_col`";
MKDEBUG && _d($dbh, $sql);
$row = $dbh->selectrow_arrayref($sql);
my ($min_col, $max_col) = ($row->[0], $row->[1]);
# Get the character codes between the min and max column values.
my ($min_col, $max_col) = @{args}{qw(min max)};
$sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord";
MKDEBUG && _d($dbh, $sql);
my $ord_sth = $dbh->prepare($sql); # avoid quoting issues
@@ -642,7 +639,9 @@ sub _chunk_char {
# [ant, apple, azur, boy]. We assume data is more evenly distributed
# than not so we use the minimum number of characters to express a chunk
# size.
$sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl ORDER BY `$chunk_col`";
$sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl "
. ($args{where} ? "WHERE $args{where} " : "")
. "ORDER BY `$chunk_col`";
MKDEBUG && _d($dbh, $sql);
$row = $dbh->selectrow_arrayref($sql);
my $max_col_len = $row->[0];

View File

@@ -27,7 +27,7 @@ if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
else {
plan tests => 86;
plan tests => 90;
}
$sb->create_dbs($dbh, ['test']);
@@ -1174,21 +1174,27 @@ SKIP: {
$sb->load_file('master', "t/lib/samples/char-chunking/world-city.sql", 'test');
$t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'world_city') );
%params = $c->get_range_statistics(
dbh => $dbh,
db => 'test',
tbl => 'world_city',
chunk_col => 'name',
tbl_struct => $t,
chunk_size => '500',
);
@chunks = $c->calculate_chunks(
tbl_struct => $t,
chunk_col => 'name',
min => 'A Coruña (La Coruña)',
max => '´s-Hertogenbosch',
rows_in_range => 4079,
chunk_size => 500,
dbh => $dbh,
db => 'test',
tbl => 'world_city',
tbl_struct => $t,
chunk_col => 'name',
chunk_size => 500,
%params,
);
ok(
@chunks >= 9,
"At least 9 char chunks on test.world_city.name"
);
) or print STDERR Dumper(\@chunks);
my $n_rows = count_rows("test.world_city", "name", @chunks);
is(
@@ -1231,6 +1237,64 @@ SKIP: {
);
};
# ############################################################################
# Bug 821673: pt-table-checksum doesn't included --where in min max queries
# ############################################################################
$sb->load_file('master', "t/pt-table-checksum/samples/where01.sql");
$t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'checksum_test') );
%params = $c->get_range_statistics(
dbh => $dbh,
db => 'test',
tbl => 'checksum_test',
chunk_col => 'id',
tbl_struct => $t,
where => "date = '2011-03-03'",
);
is(
$params{min},
11,
'MIN int range stats with --where (bug 821673)'
);
is(
$params{max},
15,
'MAX int range stats with --where (bug 821673)'
);
# char chunking
$sb->load_file('master', "t/pt-table-checksum/samples/where02.sql");
$t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'checksum_test') );
%params = $c->get_range_statistics(
dbh => $dbh,
db => 'test',
tbl => 'checksum_test',
chunk_col => 'id',
tbl_struct => $t,
where => "date = '2011-03-03'",
);
is(
$params{min},
'Apple',
'MIN char range stats with --where (bug 821673)'
);
is(
$params{max},
'raspberry',
'MAX char range stats with --where (bug 821673)'
);
# It's difficult to construct a char chunk test where WHERE will matter.
#@chunks = $c->calculate_chunks(
# dbh => $dbh,
# db => 'test',
# tbl => 'checksum_test',
# tbl_struct => $t,
# chunk_col => 'id',
# chunk_size => 5,
# where => "date = '2011-03-03'",
# %params,
#);
# #############################################################################
# Done.
# #############################################################################

View File

@@ -24,7 +24,7 @@ if ( !$master_dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
else {
plan tests => 12;
plan tests => 14;
}
my ($output, $output2);
@@ -120,6 +120,41 @@ ok(
"--sleep doesn't sleep unless table is chunked"
);
# ############################################################################
# Bug 821673: pt-table-checksum doesn't included --where in min max queries
# ############################################################################
$sb->load_file('master', "t/pt-table-checksum/samples/where01.sql");
ok(
no_diff(
sub { pt_table_checksum::main(@args,
qw(--no-zero-chunk --chunk-size 5), '--where', "date = '2011-03-03'");
},
"t/pt-table-checksum/samples/where01.out",
trf => "awk '{print \$1 \" \" \$2 \" \" \$3 \" \" \$6}'",
),
"--where affects int range stats (bug 821673)"
);
# Test it again with a varchar primary key. The resulting 5 rows are:
# | Apple | 2011-03-03 |
# | lemon | 2011-03-03 |
# | lime | 2011-03-03 |
# | pineapple | 2011-03-03 |
# | raspberry | 2011-03-03 |
$sb->load_file('master', "t/pt-table-checksum/samples/where02.sql");
ok(
no_diff(
sub { pt_table_checksum::main(@args,
qw(--no-zero-chunk --chunk-size 5), '--where', "date = '2011-03-03'");
},
"t/pt-table-checksum/samples/where02.out",
trf => "awk '{print \$1 \" \" \$2 \" \" \$3 \" \" \$6}'",
),
"--where affects char range stats (bug 821673)"
);
# #############################################################################
# Done.
# #############################################################################

View File

@@ -127,12 +127,8 @@ $output = output(
is(
$output,
"issue_519 t SELECT /*issue_519.t:1/5*/ 0 AS chunk_num, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `i`, `y`, `t`, CONCAT(ISNULL(`t`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `issue_519`.`t` FORCE INDEX (`y`) WHERE (`y` = 0) AND ((y > 2009))
issue_519 t `y` = 0
issue_519 t `y` > 0 AND `y` < '2003'
issue_519 t `y` >= '2003' AND `y` < '2006'
issue_519 t `y` >= '2006' AND `y` < '2009'
issue_519 t `y` >= '2009'
"issue_519 t SELECT /*issue_519.t:1/1*/ 0 AS chunk_num, COUNT(*) AS cnt, COALESCE(LOWER(CONV(BIT_XOR(CAST(CRC32(CONCAT_WS('#', `i`, `y`, `t`, CONCAT(ISNULL(`t`)))) AS UNSIGNED)), 10, 16)), 0) AS crc FROM `issue_519`.`t` FORCE INDEX (`y`) WHERE (1=1) AND ((y > 2009))
issue_519 t 1=1
",
"Auto-chosen --chunk-index for --where (issue 378)"
);

View File

@@ -0,0 +1,3 @@
DATABASE TABLE CHUNK COUNT
test checksum_test 0 2
test checksum_test 1 3

View File

@@ -0,0 +1,26 @@
drop database if exists test;
create database test;
use test;
CREATE TABLE `checksum_test` (
`id` int(11) NOT NULL DEFAULT '0',
`date` date DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB;
INSERT INTO `checksum_test` VALUES
(1, '2011-03-01'),
(2, '2011-03-01'),
(3, '2011-03-01'),
(4, '2011-03-01'),
(5, '2011-03-01'),
(6, '2011-03-02'),
(7, '2011-03-02'),
(8, '2011-03-02'),
(9, '2011-03-02'),
(10, '2011-03-02'),
(11, '2011-03-03'),
(12, '2011-03-03'),
(13, '2011-03-03'),
(14, '2011-03-03'),
(15, '2011-03-03');

View File

@@ -0,0 +1,4 @@
DATABASE TABLE CHUNK COUNT
test checksum_test 0 1
test checksum_test 1 4
test checksum_test 2 0

View File

@@ -0,0 +1,26 @@
drop database if exists test;
create database test;
use test;
CREATE TABLE `checksum_test` (
`id` varchar(255) NOT NULL,
`date` date DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB;
INSERT INTO `checksum_test` VALUES
('Apple', '2011-03-03'),
('banana', '2011-03-01'),
('orange', '2011-03-01'),
('grape', '2011-03-01'),
('kiwi', '2011-03-01'),
('strawberry', '2011-03-02'),
('peach', '2011-03-02'),
('mango', '2011-03-02'),
('tomato', '2011-03-02'),
('nectarine', '2011-03-02'),
('pear', '2011-03-01'),
('lemon', '2011-03-03'),
('lime', '2011-03-03'),
('pineapple', '2011-03-03'),
('raspberry', '2011-03-03');

View File

@@ -281,6 +281,8 @@ is(
# Done.
# #############################################################################
diag(`$trunk/sandbox/stop-sandbox 12347 >/dev/null`);
diag(`/tmp/12346/stop >/dev/null`); # Start/stop clears SHOW SLAVE HOSTS.
diag(`/tmp/12346/start >/dev/null`);
$sb->wipe_clean($master_dbh);
diag(`$trunk/sandbox/test-env reset >/dev/null`);
exit;