mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-11 21:51:21 +00:00
Don't get min/max a 2nd time in _chunk_char(). Use where in _chunk_char(). Test char chunking world_city.name with real range stats. Add failing pt-table-checksum char chunk --where test.
This commit is contained in:
@@ -204,6 +204,7 @@ sub find_chunk_columns {
|
|||||||
# exact - Use exact chunk_size? Use approximates is not.
|
# exact - Use exact chunk_size? Use approximates is not.
|
||||||
# tries - Fetch up to this many rows to find a non-zero value
|
# tries - Fetch up to this many rows to find a non-zero value
|
||||||
# chunk_range - Make chunk range open (default) or openclosed
|
# chunk_range - Make chunk range open (default) or openclosed
|
||||||
|
# where - WHERE clause.
|
||||||
#
|
#
|
||||||
# Returns:
|
# Returns:
|
||||||
# Array of WHERE predicates like "`col` >= '10' AND `col` < '20'",
|
# Array of WHERE predicates like "`col` >= '10' AND `col` < '20'",
|
||||||
@@ -510,6 +511,9 @@ sub _chunk_numeric {
|
|||||||
# <TableChunker::get_range_statistics()>
|
# <TableChunker::get_range_statistics()>
|
||||||
# chunk_size - requested size of each chunk
|
# chunk_size - requested size of each chunk
|
||||||
#
|
#
|
||||||
|
# Optional Arguments:
|
||||||
|
# where - WHERE clause.
|
||||||
|
#
|
||||||
# Returns:
|
# Returns:
|
||||||
# Array of chunker info that <calculate_chunks()> uses to create
|
# Array of chunker info that <calculate_chunks()> uses to create
|
||||||
# chunks, like:
|
# chunks, like:
|
||||||
@@ -522,7 +526,7 @@ sub _chunk_numeric {
|
|||||||
# (end code)
|
# (end code)
|
||||||
sub _chunk_char {
|
sub _chunk_char {
|
||||||
my ( $self, %args ) = @_;
|
my ( $self, %args ) = @_;
|
||||||
my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size);
|
my @required_args = qw(dbh db tbl tbl_struct chunk_col min max rows_in_range chunk_size);
|
||||||
foreach my $arg ( @required_args ) {
|
foreach my $arg ( @required_args ) {
|
||||||
die "I need a $arg argument" unless defined $args{$arg};
|
die "I need a $arg argument" unless defined $args{$arg};
|
||||||
}
|
}
|
||||||
@@ -533,15 +537,8 @@ sub _chunk_char {
|
|||||||
my $row;
|
my $row;
|
||||||
my $sql;
|
my $sql;
|
||||||
|
|
||||||
# Get what MySQL says are the min and max column values.
|
|
||||||
# For example, is 'a' or 'A' the min according to MySQL?
|
|
||||||
$sql = "SELECT MIN($chunk_col), MAX($chunk_col) FROM $db_tbl "
|
|
||||||
. "ORDER BY `$chunk_col`";
|
|
||||||
MKDEBUG && _d($dbh, $sql);
|
|
||||||
$row = $dbh->selectrow_arrayref($sql);
|
|
||||||
my ($min_col, $max_col) = ($row->[0], $row->[1]);
|
|
||||||
|
|
||||||
# Get the character codes between the min and max column values.
|
# Get the character codes between the min and max column values.
|
||||||
|
my ($min_col, $max_col) = @{args}{qw(min max)};
|
||||||
$sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord";
|
$sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord";
|
||||||
MKDEBUG && _d($dbh, $sql);
|
MKDEBUG && _d($dbh, $sql);
|
||||||
my $ord_sth = $dbh->prepare($sql); # avoid quoting issues
|
my $ord_sth = $dbh->prepare($sql); # avoid quoting issues
|
||||||
@@ -642,7 +639,9 @@ sub _chunk_char {
|
|||||||
# [ant, apple, azur, boy]. We assume data is more evenly distributed
|
# [ant, apple, azur, boy]. We assume data is more evenly distributed
|
||||||
# than not so we use the minimum number of characters to express a chunk
|
# than not so we use the minimum number of characters to express a chunk
|
||||||
# size.
|
# size.
|
||||||
$sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl ORDER BY `$chunk_col`";
|
$sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl "
|
||||||
|
. ($args{where} ? "WHERE $args{where} " : "")
|
||||||
|
. "ORDER BY `$chunk_col`";
|
||||||
MKDEBUG && _d($dbh, $sql);
|
MKDEBUG && _d($dbh, $sql);
|
||||||
$row = $dbh->selectrow_arrayref($sql);
|
$row = $dbh->selectrow_arrayref($sql);
|
||||||
my $max_col_len = $row->[0];
|
my $max_col_len = $row->[0];
|
||||||
|
@@ -27,7 +27,7 @@ if ( !$dbh ) {
|
|||||||
plan skip_all => 'Cannot connect to sandbox master';
|
plan skip_all => 'Cannot connect to sandbox master';
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
plan tests => 88;
|
plan tests => 90;
|
||||||
}
|
}
|
||||||
|
|
||||||
$sb->create_dbs($dbh, ['test']);
|
$sb->create_dbs($dbh, ['test']);
|
||||||
@@ -1174,21 +1174,27 @@ SKIP: {
|
|||||||
|
|
||||||
$sb->load_file('master', "t/lib/samples/char-chunking/world-city.sql", 'test');
|
$sb->load_file('master', "t/lib/samples/char-chunking/world-city.sql", 'test');
|
||||||
$t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'world_city') );
|
$t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'world_city') );
|
||||||
@chunks = $c->calculate_chunks(
|
%params = $c->get_range_statistics(
|
||||||
tbl_struct => $t,
|
|
||||||
chunk_col => 'name',
|
|
||||||
min => 'A Coruña (La Coruña)',
|
|
||||||
max => '´s-Hertogenbosch',
|
|
||||||
rows_in_range => 4079,
|
|
||||||
chunk_size => 500,
|
|
||||||
dbh => $dbh,
|
dbh => $dbh,
|
||||||
db => 'test',
|
db => 'test',
|
||||||
tbl => 'world_city',
|
tbl => 'world_city',
|
||||||
|
chunk_col => 'name',
|
||||||
|
tbl_struct => $t,
|
||||||
|
chunk_size => '500',
|
||||||
|
);
|
||||||
|
@chunks = $c->calculate_chunks(
|
||||||
|
dbh => $dbh,
|
||||||
|
db => 'test',
|
||||||
|
tbl => 'world_city',
|
||||||
|
tbl_struct => $t,
|
||||||
|
chunk_col => 'name',
|
||||||
|
chunk_size => 500,
|
||||||
|
%params,
|
||||||
);
|
);
|
||||||
ok(
|
ok(
|
||||||
@chunks >= 9,
|
@chunks >= 9,
|
||||||
"At least 9 char chunks on test.world_city.name"
|
"At least 9 char chunks on test.world_city.name"
|
||||||
);
|
) or print STDERR Dumper(\@chunks);
|
||||||
|
|
||||||
my $n_rows = count_rows("test.world_city", "name", @chunks);
|
my $n_rows = count_rows("test.world_city", "name", @chunks);
|
||||||
is(
|
is(
|
||||||
@@ -1247,14 +1253,48 @@ $t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'checksum_test') );
|
|||||||
is(
|
is(
|
||||||
$params{min},
|
$params{min},
|
||||||
11,
|
11,
|
||||||
'MIN range stats with --where (bug 821673)'
|
'MIN int range stats with --where (bug 821673)'
|
||||||
);
|
);
|
||||||
is(
|
is(
|
||||||
$params{max},
|
$params{max},
|
||||||
15,
|
15,
|
||||||
'MAX range stats with --where (bug 821673)'
|
'MAX int range stats with --where (bug 821673)'
|
||||||
);
|
);
|
||||||
|
|
||||||
|
# char chunking
|
||||||
|
$sb->load_file('master', "t/pt-table-checksum/samples/where02.sql");
|
||||||
|
$t = $p->parse( $du->get_create_table($dbh, $q, 'test', 'checksum_test') );
|
||||||
|
%params = $c->get_range_statistics(
|
||||||
|
dbh => $dbh,
|
||||||
|
db => 'test',
|
||||||
|
tbl => 'checksum_test',
|
||||||
|
chunk_col => 'id',
|
||||||
|
tbl_struct => $t,
|
||||||
|
where => "date = '2011-03-03'",
|
||||||
|
);
|
||||||
|
is(
|
||||||
|
$params{min},
|
||||||
|
'Apple',
|
||||||
|
'MIN char range stats with --where (bug 821673)'
|
||||||
|
);
|
||||||
|
is(
|
||||||
|
$params{max},
|
||||||
|
'raspberry',
|
||||||
|
'MAX char range stats with --where (bug 821673)'
|
||||||
|
);
|
||||||
|
|
||||||
|
# It's difficult to construct a char chunk test where WHERE will matter.
|
||||||
|
#@chunks = $c->calculate_chunks(
|
||||||
|
# dbh => $dbh,
|
||||||
|
# db => 'test',
|
||||||
|
# tbl => 'checksum_test',
|
||||||
|
# tbl_struct => $t,
|
||||||
|
# chunk_col => 'id',
|
||||||
|
# chunk_size => 5,
|
||||||
|
# where => "date = '2011-03-03'",
|
||||||
|
# %params,
|
||||||
|
#);
|
||||||
|
|
||||||
# #############################################################################
|
# #############################################################################
|
||||||
# Done.
|
# Done.
|
||||||
# #############################################################################
|
# #############################################################################
|
||||||
|
@@ -24,7 +24,7 @@ if ( !$master_dbh ) {
|
|||||||
plan skip_all => 'Cannot connect to sandbox master';
|
plan skip_all => 'Cannot connect to sandbox master';
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
plan tests => 13;
|
plan tests => 14;
|
||||||
}
|
}
|
||||||
|
|
||||||
my ($output, $output2);
|
my ($output, $output2);
|
||||||
@@ -134,7 +134,25 @@ ok(
|
|||||||
"t/pt-table-checksum/samples/where01.out",
|
"t/pt-table-checksum/samples/where01.out",
|
||||||
trf => "awk '{print \$1 \" \" \$2 \" \" \$3}'",
|
trf => "awk '{print \$1 \" \" \$2 \" \" \$3}'",
|
||||||
),
|
),
|
||||||
"--where affects range stats (bug 821673)"
|
"--where affects int range stats (bug 821673)"
|
||||||
|
);
|
||||||
|
|
||||||
|
# Test it again with a varchar primary key. The resulting 5 rows are:
|
||||||
|
# | Apple | 2011-03-03 |
|
||||||
|
# | lemon | 2011-03-03 |
|
||||||
|
# | lime | 2011-03-03 |
|
||||||
|
# | pineapple | 2011-03-03 |
|
||||||
|
# | raspberry | 2011-03-03 |
|
||||||
|
$sb->load_file('master', "t/pt-table-checksum/samples/where02.sql");
|
||||||
|
ok(
|
||||||
|
no_diff(
|
||||||
|
sub { pt_table_checksum::main(@args,
|
||||||
|
qw(--no-zero-chunk --chunk-size 5), '--where', "date = '2011-03-03'");
|
||||||
|
},
|
||||||
|
"t/pt-table-checksum/samples/where02.out",
|
||||||
|
trf => "awk '{print \$1 \" \" \$2 \" \" \$3}'",
|
||||||
|
),
|
||||||
|
"--where affects char range stats (bug 821673)"
|
||||||
);
|
);
|
||||||
|
|
||||||
# #############################################################################
|
# #############################################################################
|
||||||
|
0
t/pt-table-checksum/samples/where02.out
Normal file
0
t/pt-table-checksum/samples/where02.out
Normal file
26
t/pt-table-checksum/samples/where02.sql
Normal file
26
t/pt-table-checksum/samples/where02.sql
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
drop database if exists test;
|
||||||
|
create database test;
|
||||||
|
use test;
|
||||||
|
|
||||||
|
CREATE TABLE `checksum_test` (
|
||||||
|
`id` varchar(255) NOT NULL,
|
||||||
|
`date` date DEFAULT NULL,
|
||||||
|
PRIMARY KEY (`id`)
|
||||||
|
) ENGINE=InnoDB;
|
||||||
|
|
||||||
|
INSERT INTO `checksum_test` VALUES
|
||||||
|
('Apple', '2011-03-03'),
|
||||||
|
('banana', '2011-03-01'),
|
||||||
|
('orange', '2011-03-01'),
|
||||||
|
('grape', '2011-03-01'),
|
||||||
|
('kiwi', '2011-03-01'),
|
||||||
|
('strawberry', '2011-03-02'),
|
||||||
|
('peach', '2011-03-02'),
|
||||||
|
('mango', '2011-03-02'),
|
||||||
|
('tomato', '2011-03-02'),
|
||||||
|
('nectarine', '2011-03-02'),
|
||||||
|
('pear', '2011-03-01'),
|
||||||
|
('lemon', '2011-03-03'),
|
||||||
|
('lime', '2011-03-03'),
|
||||||
|
('pineapple', '2011-03-03'),
|
||||||
|
('raspberry', '2011-03-03');
|
Reference in New Issue
Block a user