Merge pull request #355 from percona/PT-1572

PT-1572 Better usage of ENUM fields in keys
This commit is contained in:
Carlos Salguero
2018-08-13 12:45:03 -03:00
committed by GitHub
14 changed files with 281 additions and 33 deletions

View File

@@ -5520,11 +5520,35 @@ sub new {
);
PTDEBUG && _d('Ascend params:', Dumper($asc));
my $force_concat_enums = $o->has('force-concat-enums') && $o->get('force-concat-enums');
my $i=0;
for my $index (@{$index_cols}) {
last if $args{n_chunk_index_cols} && $i >= $args{n_chunk_index_cols};
$i++;
if ($tbl->{tbl_struct}->{type_for}->{$index} eq 'enum') {
if ($tbl->{tbl_struct}->{defs}->{$index} =~ m/enum\s*\((.*?)\)/) {
my @items = split(/,\s*/, $1);
my $sorted = 1; # Asume the items list is sorted to later check if this is true
for (my $i=1; $i < scalar(@items); $i++) {
if ($items[$i-1] gt $items[$i]) {
$sorted = 0;
last;
}
}
if (!$force_concat_enums && !$sorted) {
die "The index " . $index . " in table " . $tbl->{name} .
" has unsorted enum items.\nPlease read the documentation for the --force-concat-enums parameter\n";
}
}
}
}
my $from = "$tbl->{name} FORCE INDEX(`$index`)";
my $order_by = join(', ', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum'
my $order_by = join(', ', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum' && $force_concat_enums
? "CONCAT(".$q->quote($_).")" : $q->quote($_)} @{$index_cols});
my $order_by_dec = join(' DESC,', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum'
my $order_by_dec = join(' DESC,', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum' && $force_concat_enums
? "CONCAT(".$q->quote($_).")" : $q->quote($_)} @{$index_cols});
my $first_lb_sql
@@ -12249,6 +12273,30 @@ duplicate rows and this data will be lost.
This options bypasses confirmation in case of using alter-foreign-keys-method = none , which might break foreign key constraints.
=item --force-concat-enums
The NibbleIterator in Percona Toolkit can detect indexes having ENUM fields and
if the items it has are sorted or not. According to MySQL documentation at
L<https://dev.mysql.com/doc/refman/8.0/en/enum.html>:
ENUM values are sorted based on their index numbers, which depend on the order in
which the enumeration members were listed in the column specification.
For example, 'b' sorts before 'a' for ENUM('b', 'a').
The empty string sorts before nonempty strings, and NULL values sort before all other
enumeration values.
To prevent unexpected results when using the ORDER BY clause on an ENUM column,
use one of these techniques:
- Specify the ENUM list in alphabetic order.
- Make sure that the column is sorted lexically rather than by index number by coding
ORDER BY CAST(col AS CHAR) or ORDER BY CONCAT(col).
The NibbleIterator in Percona Toolkit uses CONCAT(col) but, doing that, adds overhead
since MySQL cannot use the column directly and has to calculate the result of CONCAT
for every row.
To make this scenario vissible to the user, if there are indexes having ENUM fields
with usorted items, it is necessary to specify the C<--force-concat-enums> parameter.
=item --help
Show help and exit.

View File

@@ -6371,11 +6371,35 @@ sub new {
);
PTDEBUG && _d('Ascend params:', Dumper($asc));
my $force_concat_enums = $o->has('force-concat-enums') && $o->get('force-concat-enums');
my $i=0;
for my $index (@{$index_cols}) {
last if $args{n_chunk_index_cols} && $i >= $args{n_chunk_index_cols};
$i++;
if ($tbl->{tbl_struct}->{type_for}->{$index} eq 'enum') {
if ($tbl->{tbl_struct}->{defs}->{$index} =~ m/enum\s*\((.*?)\)/) {
my @items = split(/,\s*/, $1);
my $sorted = 1; # Asume the items list is sorted to later check if this is true
for (my $i=1; $i < scalar(@items); $i++) {
if ($items[$i-1] gt $items[$i]) {
$sorted = 0;
last;
}
}
if (!$force_concat_enums && !$sorted) {
die "The index " . $index . " in table " . $tbl->{name} .
" has unsorted enum items.\nPlease read the documentation for the --force-concat-enums parameter\n";
}
}
}
}
my $from = "$tbl->{name} FORCE INDEX(`$index`)";
my $order_by = join(', ', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum'
my $order_by = join(', ', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum' && $force_concat_enums
? "CONCAT(".$q->quote($_).")" : $q->quote($_)} @{$index_cols});
my $order_by_dec = join(' DESC,', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum'
my $order_by_dec = join(' DESC,', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum' && $force_concat_enums
? "CONCAT(".$q->quote($_).")" : $q->quote($_)} @{$index_cols});
my $first_lb_sql
@@ -13262,6 +13286,30 @@ the values are converted to strings by the CONCAT() function, and MySQL chooses
the string representation. If you specify a value of 2, for example, then the
values 1.008 and 1.009 will be rounded to 1.01, and will checksum as equal.
=item --force-concat-enums
The NibbleIterator in Percona Toolkit can detect indexes having ENUM fields and
if the items it has are sorted or not. According to MySQL documentation at
L<https://dev.mysql.com/doc/refman/8.0/en/enum.html>:
ENUM values are sorted based on their index numbers, which depend on the order in
which the enumeration members were listed in the column specification.
For example, 'b' sorts before 'a' for ENUM('b', 'a').
The empty string sorts before nonempty strings, and NULL values sort before all other
enumeration values.
To prevent unexpected results when using the ORDER BY clause on an ENUM column,
use one of these techniques:
- Specify the ENUM list in alphabetic order.
- Make sure that the column is sorted lexically rather than by index number by coding
ORDER BY CAST(col AS CHAR) or ORDER BY CONCAT(col).
The NibbleIterator in Percona Toolkit uses CONCAT(col) but, doing that, adds overhead
since MySQL cannot use the column directly and has to calculate the result of CONCAT
for every row.
To make this scenario vissible to the user, if there are indexes having ENUM fields
with usorted items, it is necessary to specify the C<--force-concat-enums> parameter.
=item --function
type: string

View File

@@ -134,14 +134,40 @@ sub new {
);
PTDEBUG && _d('Ascend params:', Dumper($asc));
# Check if enum fields items are sorted or not.
# If they are sorted we can skip adding CONCAT to improve the queries eficiency.
my $force_concat_enums = $o->has('force-concat-enums') && $o->get('force-concat-enums');
my $i=0;
for my $index (@{$index_cols}) {
last if $args{n_chunk_index_cols} && $i >= $args{n_chunk_index_cols};
$i++;
if ($tbl->{tbl_struct}->{type_for}->{$index} eq 'enum') {
if ($tbl->{tbl_struct}->{defs}->{$index} =~ m/enum\s*\((.*?)\)/) {
my @items = split(/,\s*/, $1);
my $sorted = 1; # Asume the items list is sorted to later check if this is true
for (my $i=1; $i < scalar(@items); $i++) {
if ($items[$i-1] gt $items[$i]) {
$sorted = 0;
last;
}
}
if (!$force_concat_enums && !$sorted) {
die "The index " . $index . " in table " . $tbl->{name} .
" has unsorted enum items.\nPlease read the documentation for the --force-concat-enums parameter\n";
}
}
}
}
# Make SQL statements, prepared on first call to next(). FROM and
# ORDER BY are the same for all statements. FORCE IDNEX and ORDER BY
# are needed to ensure deterministic nibbling.
my $from = "$tbl->{name} FORCE INDEX(`$index`)";
my $order_by = join(', ', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum'
my $order_by = join(', ', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum' && $force_concat_enums
? "CONCAT(".$q->quote($_).")" : $q->quote($_)} @{$index_cols});
my $order_by_dec = join(' DESC,', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum'
my $order_by_dec = join(' DESC,', map { $tbl->{tbl_struct}->{type_for}->{$_} eq 'enum' && $force_concat_enums
? "CONCAT(".$q->quote($_).")" : $q->quote($_)} @{$index_cols});
# The real first row in the table. Usually we start nibbling from

View File

@@ -38,6 +38,8 @@ my $dbh = $sb->get_dbh_for('master');
if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
} else {
plan tests => 60;
}
my $q = new Quoter();
@@ -65,6 +67,7 @@ my $in = "/t/lib/samples/NibbleIterator/";
sub make_nibble_iter {
my (%args) = @_;
if (my $file = $args{sql_file}) {
$sb->load_file('master', "$in/$file");
}
@@ -83,18 +86,19 @@ sub make_nibble_iter {
1 while $si->next();
my $ni = new NibbleIterator(
Cxn => $cxn,
tbl => $schema->get_table(lc($args{db}), lc($args{tbl})),
chunk_size => $o->get('chunk-size'),
chunk_index => $o->get('chunk-index'),
callbacks => $args{callbacks},
select => $args{select},
one_nibble => $args{one_nibble},
resume => $args{resume},
order_by => $args{order_by},
comments => $args{comments},
pause_file => $o->get('pause-file'),
sleep => $args{sleep} || 60,
Cxn => $cxn,
tbl => $schema->get_table(lc($args{db}), lc($args{tbl})),
chunk_size => $o->get('chunk-size'),
chunk_index => $o->get('chunk-index'),
callbacks => $args{callbacks},
select => $args{select},
one_nibble => $args{one_nibble},
resume => $args{resume},
order_by => $args{order_by},
comments => $args{comments},
n_chunk_index_cols => $o->get('chunk-index-columns'),
pause_file => $o->get('pause-file'),
sleep => $args{sleep} || 60,
%common_modules,
);
@@ -879,7 +883,7 @@ is_deeply(
# #############################################################################
diag(`/tmp/12345/use < $trunk/t/lib/samples/cardinality.sql >/dev/null`);
$dbh->do('analyze table bad_tables.inv');
$ni = make_nibble_iter(
db => 'cardb',
tbl => 't',
@@ -892,6 +896,76 @@ is(
"Use non-unique index with highest cardinality (bug 1199591)"
);
$sb->load_file('master', "t/lib/samples/NibbleIterator/enum_keys.sql");
$ni = undef;
eval {
$ni = make_nibble_iter(
db => 'test',
tbl => 't1',
argv => [qw(--databases test --chunk-size 3)],
);
};
like(
$EVAL_ERROR,
qr/The index f3 in table `test`.`t1` has unsorted enum items/,
"PT-1572 Die on unsorted enum items in index",
);
eval {
$ni = make_nibble_iter(
db => 'test',
tbl => 't1',
argv => [qw(--databases test --force-concat-enums --chunk-size 3)],
);
};
like(
$ni->{explain_first_lb_sql},
qr/ORDER BY `f1`, `f2`, CONCAT\(`f3`\)/,
"PT-1572 Use of CONCAT for unsorted ENUM field items without --",
);
eval {
$ni = make_nibble_iter(
db => 'test',
tbl => 't2',
argv => [qw(--databases test --chunk-size 3)],
);
};
is(
$EVAL_ERROR,
'',
"PT-1572 No errors on sorted enum items in index",
);
like(
$ni->{explain_first_lb_sql},
qr/ORDER BY `f1`, `f2`, `f3`/,
"PT-1572 Don't use CONCAT for sorted ENUM field items without --force-concat-enums",
);
eval {
$ni = make_nibble_iter(
db => 'test',
tbl => 't1',
argv => [qw(--databases test --chunk-size 3 --chunk-index-columns 2)],
);
};
is(
$EVAL_ERROR,
'',
"PT-1572 No errors on unsorted enum items in index and --chunk-index-columns",
);
like(
$ni->{explain_first_lb_sql},
qr/ORDER BY `f1`, `f2`, `f3`/,
"PT-1572 Don't use CONCAT for sorted ENUM field items without --force-concat-enums & --chunk-index-columns",
);
# #############################################################################
# Done.
# #############################################################################
@@ -905,6 +979,9 @@ like(
qr/Complete test coverage/,
'_d() works'
);
$dbh->do("DROP DATABASE test");
$sb->wipe_clean($dbh);
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
done_testing;

View File

@@ -0,0 +1,47 @@
DROP DATABASE IF EXISTS test;
CREATE DATABASE test;
USE test;
-- Don't change the comments. The enum word inside the comment is there to test the table parser
CREATE TABLE `test`.`t1` (
f1 DATE NOT NULL,
f2 INT(10) UNSIGNED NOT NULL,
f3 ENUM('c','a','b','d') NOT NULL DEFAULT 'c' COMMENT "unsorted enum items",
f4 INT(10) UNSIGNED NOT NULL DEFAULT '0',
PRIMARY KEY (`f1`,`f2`,`f3`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
INSERT INTO `t1` VALUES
('2018-07-27',165910161,'c',1375471152),
('2018-07-27',393713658,'d',1382302491),
('2018-07-27',472875023,'c',525456967),
('2018-07-27',543582931,'c',1657080267),
('2018-07-27',583532949,'d',280366509),
('2018-07-27',1396416465,'d',1252007743),
('2018-07-27',1705409249,'c',1714682759),
('2018-07-27',1801160058,'a',1022430181),
('2018-07-27',1898674299,'c',1310715836),
('2018-07-27',2011751560,'a',109015753);
CREATE TABLE `test`.`t2` (
f1 DATE NOT NULL,
f2 INT(10) UNSIGNED NOT NULL,
f3 ENUM('a','b','c','d') NOT NULL DEFAULT 'c' COMMENT "sorted enum items",
f4 INT(10) UNSIGNED NOT NULL DEFAULT '0',
PRIMARY KEY (`f1`,`f2`,`f3`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
INSERT INTO `t2` VALUES
('2018-07-27',165910161,'c',1375471152),
('2018-07-27',393713658,'d',1382302491),
('2018-07-27',472875023,'c',525456967),
('2018-07-27',543582931,'c',1657080267),
('2018-07-27',583532949,'d',280366509),
('2018-07-27',1396416465,'d',1252007743),
('2018-07-27',1705409249,'c',1714682759),
('2018-07-27',1801160058,'a',1022430181),
('2018-07-27',1898674299,'c',1310715836),
('2018-07-27',2011751560,'a',109015753);

View File

@@ -45,7 +45,7 @@ elsif ( !@{$master_dbh->selectall_arrayref("show databases like 'sakila'")} ) {
# so we need to specify --set-vars innodb_lock_wait_timeout=3 else the tool will die.
my $master_dsn = 'h=127.1,P=12345,u=msandbox,p=msandbox';
my $slave2_dsn = 'h=127.1,P=12347,u=msandbox,p=msandbox';
my @args = ($master_dsn, qw(--set-vars innodb_lock_wait_timeout=3));
my @args = ($master_dsn, qw(--set-vars innodb_lock_wait_timeout=3 --ignore-tables load_data));
my $row;
my $output;
my $exit_status;

View File

@@ -27,6 +27,13 @@ else {
plan tests => 3;
}
# This table is being used by some other tests and it is not being properly cleaned
# so this tests fails sometimes. Just in case, clean the table but don't fail if the
# table doesn't exists.
eval {
$dbh->do("TRUNCATE TABLE percona_test.load_data");
};
$sb->load_file('master', 't/lib/samples/issue_pt-193_backtick_in_col_comments.sql');
# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic
@@ -49,7 +56,7 @@ is(
$exit_status,
0,
"PT-193 use single backtick in comments",
);
) or diag($output);
like(
$output,

View File

@@ -23,7 +23,6 @@ ERRORS DIFFS ROWS SKIPPED TABLE
0 0 0 0 mysql.time_zone_transition_type
0 0 2 0 mysql.user
0 0 22 0 percona_test.checksums
0 0 1 0 percona_test.load_data
0 0 1 0 percona_test.sentinel
0 0 200 0 sakila.actor
0 0 603 0 sakila.address

View File

@@ -25,7 +25,6 @@ ERRORS DIFFS ROWS SKIPPED TABLE
0 0 0 0 mysql.time_zone_transition_type
0 0 2 0 mysql.user
0 0 23 0 percona_test.checksums
0 0 1 0 percona_test.load_data
0 0 1 0 percona_test.sentinel
0 0 200 0 sakila.actor
0 0 603 0 sakila.address

View File

@@ -27,7 +27,6 @@ ERRORS DIFFS ROWS SKIPPED TABLE
0 0 0 0 mysql.time_zone_transition_type
0 0 5 0 mysql.user
0 0 27 0 percona_test.checksums
0 0 1 0 percona_test.load_data
0 0 1 0 percona_test.sentinel
0 0 200 0 sakila.actor
0 0 603 0 sakila.address

View File

@@ -2,7 +2,6 @@ if all tables be checksummed
checksum ...
ERRORS DIFFS ROWS CHUNKS SKIPPED TABLE
0 0 22 1 0 percona_test.checksums
0 0 1 1 0 percona_test.load_data
0 0 1 1 0 percona_test.sentinel
0 0 200 1 0 sakila.actor
0 0 603 1 0 sakila.address

View File

@@ -2,7 +2,6 @@ if all tables be checksummed
checksum ...
ERRORS DIFFS ROWS CHUNKS SKIPPED TABLE
0 0 23 1 0 percona_test.checksums
0 0 1 1 0 percona_test.load_data
0 0 1 1 0 percona_test.sentinel
0 0 200 1 0 sakila.actor
0 0 603 1 0 sakila.address

View File

@@ -2,7 +2,6 @@ if all tables be checksummed
checksum ...
ERRORS DIFFS ROWS CHUNKS SKIPPED TABLE
0 0 27 1 0 percona_test.checksums
0 0 1 1 0 percona_test.load_data
0 0 1 1 0 percona_test.sentinel
0 0 200 1 0 sakila.actor
0 0 603 1 0 sakila.address

View File

@@ -6,6 +6,9 @@ BEGIN {
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
binmode(STDIN, ':utf8') or die "Can't binmode(STDIN, ':utf8'): $OS_ERROR";
binmode(STDOUT, ':utf8') or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR";
use strict;
use utf8;
use Encode qw(decode encode);
@@ -17,9 +20,6 @@ use PerconaTest;
use Sandbox;
require "$trunk/bin/pt-table-sync";
binmode(STDIN, ':utf8') or die "Can't binmode(STDIN, ':utf8'): $OS_ERROR";
binmode(STDOUT, ':utf8') or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR";
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $master_dbh = $sb->get_dbh_for('master');
@@ -39,9 +39,10 @@ else {
plan tests => 5;
}
my ($output, $status);
my @args = ('--sync-to-master', 'h=127.1,P=12346,u=msandbox,p=msandbox',
qw(-t test.t1 --print --execute));
qw(-t test.t1 --print --execute --charset utf8));
# use lib/samples dir since the main change is in DSNParser
$sb->load_file('master', "t/lib/samples/charset.sql");
@@ -77,10 +78,10 @@ is(
$row->{f2},
$want,
"Character set is correct",
) or diag("Want '$want', got '$row->{f2}");
) or diag("Want '".($want||"")."', got '".($row->{f2}||"")."'");
SKIP: {
skip "Skipping in MySQL 8.0.4-rc since there is an error in the server itself", 1 if ($sandbox_version ge '8.0');
skip "Skipping in MySQL 8.0.4-rc since there is an error in the server itself", 2 if ($sandbox_version ge '8.0');
# 3
$output = `$trunk/bin/pt-table-sync --execute --lock-and-rename h=127.1,P=12345,u=msandbox,p=msandbox,D=test,t=t1 t=t2 2>&1`;
$output = `/tmp/12345/use -e 'show create table test.t2'`;