Merge pt-dupe-key-fixes.

This commit is contained in:
Daniel Nichter
2013-12-11 19:49:47 -08:00
13 changed files with 806 additions and 452 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -143,8 +143,6 @@ sub get_duplicate_keys {
push @dupes,
$self->remove_prefix_duplicates(\@fulltext_keys, \@fulltext_keys, %args, exact_duplicates => 1);
# TODO: other structs
# Remove clustered duplicates.
my $clustered_key = $args{clustered_key} ? $keys{$args{clustered_key}}
: undef;
@@ -314,7 +312,7 @@ sub remove_prefix_duplicates {
if ( substr($left_cols, 0, $right_len_cols)
eq substr($right_cols, 0, $right_len_cols) ) {
# FULLTEXT keys, for example, are only duplicates if they
# UNIQUE and FULLTEXT indexes are only duplicates if they
# are exact duplicates.
if ( $args{exact_duplicates} && ($right_len_cols<$left_len_cols) ) {
PTDEBUG && _d($right_name, 'not exact duplicate of', $left_name);
@@ -333,10 +331,10 @@ sub remove_prefix_duplicates {
PTDEBUG && _d('Remove', $right_name);
my $reason;
if ( $right_keys->[$right_index]->{unconstrained} ) {
if ( my $type = $right_keys->[$right_index]->{unconstrained} ) {
$reason .= "Uniqueness of $right_name ignored because "
. $right_keys->[$right_index]->{constraining_key}->{name}
. " is a stronger constraint\n";
. " is a $type constraint\n";
}
my $exact_dupe = $right_len_cols < $left_len_cols ? 0 : 1;
$reason .= $right_name
@@ -454,14 +452,23 @@ sub unconstrain_keys {
next unless $unique_key; # primary key may be undefined
my $cols = $unique_key->{cols};
if ( @$cols == 1 ) {
PTDEBUG && _d($unique_key->{name},'defines unique column:',$cols->[0]);
# Save only the first unique key for the unique col. If there
# are others, then they are exact duplicates and will be removed
# later when unique keys are compared to unique keys.
if ( !exists $unique_cols{$cols->[0]} ) {
PTDEBUG && _d($unique_key->{name}, 'defines unique column:',
$cols->[0]);
$unique_cols{$cols->[0]} = $unique_key;
$unique_key->{unique_col} = 1;
}
else {
# https://bugs.launchpad.net/percona-toolkit/+bug/1217013
# If two unique indexes are not exact, then they must be enforcing
# different uniqueness constraints. Else they're exact dupes
# so one can be treated as a non-unique and removed later
# when comparing unique to non-unique.
PTDEBUG && _d($unique_key->{name},
'redundantly constrains unique column:', $cols->[0]);
$unique_key->{exact_dupe} = 1;
$unique_key->{constraining_key} = $unique_cols{$cols->[0]};
}
}
else {
local $LIST_SEPARATOR = '-';
@@ -496,18 +503,25 @@ sub unconstrain_keys {
}
}
# And finally, unconstrain the redudantly unique sets found above by
# And finally, unconstrain the redundantly unique sets found above by
# removing them from the list of unique keys and adding them to the
# list of normal keys.
for my $i ( 0..(scalar @$unique_keys-1) ) {
if ( exists $unconstrain{$unique_keys->[$i]->{name}} ) {
PTDEBUG && _d('Unconstraining', $unique_keys->[$i]->{name});
$unique_keys->[$i]->{unconstrained} = 1;
PTDEBUG && _d('Unconstraining weak', $unique_keys->[$i]->{name});
$unique_keys->[$i]->{unconstrained} = 'stronger';
$unique_keys->[$i]->{constraining_key}
= $unconstrain{$unique_keys->[$i]->{name}};
push @unconstrained_keys, $unique_keys->[$i];
delete $unique_keys->[$i];
}
elsif ( $unique_keys->[$i]->{exact_dupe} ) {
# https://bugs.launchpad.net/percona-toolkit/+bug/1217013
PTDEBUG && _d('Unconstraining dupe', $unique_keys->[$i]->{name});
$unique_keys->[$i]->{unconstrained} = 'duplicate';
push @unconstrained_keys, $unique_keys->[$i];
delete $unique_keys->[$i];
}
}
PTDEBUG && _d('No more keys');

View File

@@ -89,7 +89,7 @@ sub get_key_size {
# EXPLAIN rows will report only the rows that satisfy the query
# using the key, but this is not what we want. We want total table rows.
# In other words, we need an EXPLAIN type index, not ref or range.
if ( scalar @cols == 1 ) {
if ( scalar(@cols) == 1 && !$args{only_eq} ) {
push @where_cols, "$cols[0]<>1";
}
$sql .= join(' OR ', @where_cols);
@@ -113,6 +113,22 @@ sub get_key_size {
PTDEBUG && _d('MySQL chose key:', $chosen_key, 'len:', $key_len,
'rows:', $rows);
# https://bugs.launchpad.net/percona-toolkit/+bug/1201443
if ( $chosen_key && $key_len eq '0' ) {
if ( $args{recurse} ) {
$self->{error} = "key_len = 0 in EXPLAIN:\n"
. _explain_to_text($explain);
return;
}
else {
return $self->get_key_size(
%args,
only_eq => 1,
recurse => 1,
);
}
}
my $key_size = 0;
if ( $key_len && $rows ) {
if ( $chosen_key =~ m/,/ && $key_len =~ m/,/ ) {

View File

@@ -10,6 +10,7 @@ use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More;
use Data::Dumper;
use VersionParser;
use DuplicateKeyFinder;
@@ -756,7 +757,7 @@ is_deeply(
duplicate_of_cols => [ 'row_id' ],
duplicate_of_ddl => 'PRIMARY KEY (`row_id`),',
key => 'row_id',
reason => 'row_id is a duplicate of PRIMARY',
reason => "Uniqueness of row_id ignored because PRIMARY is a duplicate constraint\nrow_id is a duplicate of PRIMARY",
},
{
cols => [ 'player_id' ],
@@ -772,6 +773,65 @@ is_deeply(
'Finds duplicates OK on uppercase columns',
);
# #############################################################################
# https://bugs.launchpad.net/percona-toolkit/+bug/1214114
# #############################################################################
#$ddl = load_file('t/lib/samples/dupekeys/prefix_bug_1214114.sql');
#$dupes = [];
#($keys, $ck) = $tp->get_keys($ddl, $opt);
#$dk->get_duplicate_keys(
# $keys,
# clustered_key => $ck,
# clustered => 1,
# callback => $callback,
# tbl_info => { engine => 'InnoDB', ddl => $ddl },
#);
#
#is_deeply(
# $dupes,
# [{
# cols => ['b', 'id'],
# ddl => 'KEY `b` (`b`,`id`)',
# dupe_type => 'clustered',
# duplicate_of => 'PRIMARY',
# duplicate_of_cols => ['id'],
# duplicate_of_ddl => 'PRIMARY KEY (`id`),',
# key => 'b',
# reason => 'Key b ends with a prefix of the clustered index',
# short_key => '`b`',
# }],
# "Prefix bug 1214114"
#) or diag(Dumper($dupes));
# #############################################################################
# https://bugs.launchpad.net/percona-toolkit/+bug/1217013
#############################################################################
$ddl = load_file('t/lib/samples/dupekeys/simple_dupe_bug_1217013.sql');
$dupes = [];
($keys, $ck) = $tp->get_keys($ddl, $opt);
$dk->get_duplicate_keys(
$keys,
clustered_key => $ck,
clustered => 1,
callback => $callback,
tbl_info => { engine => 'InnoDB', ddl => $ddl },
);
is_deeply(
$dupes,
[{
cols => ['domain'],
ddl => 'UNIQUE KEY `domain` (`domain`),',
dupe_type => 'exact',
duplicate_of => 'unique_key_domain',
duplicate_of_cols => ['domain'],
duplicate_of_ddl => 'UNIQUE KEY `unique_key_domain` (`domain`)',
key => 'domain',
reason => "Uniqueness of domain ignored because unique_key_domain is a duplicate constraint\ndomain is a duplicate of unique_key_domain",
}],
"Exact dupe uniques (bug 1217013)"
) or diag(Dumper($dupes));
# #############################################################################
# Done.
# #############################################################################
@@ -787,4 +847,3 @@ like(
'_d() works'
);
done_testing;
exit;

View File

@@ -25,9 +25,6 @@ my $dbh = $sb->get_dbh_for('master');
if ( !$dbh ) {
plan skip_all => "Cannot connect to sandbox master";
}
else {
plan tests => 19;
}
my $q = new Quoter();
my $tp = new TableParser(Quoter => $q);
@@ -190,6 +187,107 @@ is(
'Query without FORCE INDEX (issue 364)'
);
# #############################################################################
# https://bugs.launchpad.net/percona-toolkit/+bug/1201443
# #############################################################################
$sb->load_file('master', "t/pt-duplicate-key-checker/samples/fk_chosen_index_bug_1201443.sql");
($size, $chosen_key) = $ks->get_key_size(
name => 'child_ibfk_2',
cols => [qw(parent_id)],
tbl_name => 'fk_chosen_index_bug_1201443.child',
tbl_struct => {
charset => 'latin1',
clustered_key => undef,
col_posn => {
id => 0,
parent_id => 1
},
cols => [
'id',
'parent_id'
],
defs => {
id => ' `id` int(11) NOT NULL AUTO_INCREMENT',
parent_id => ' `parent_id` int(11) NOT NULL'
},
engine => 'InnoDB',
is_autoinc => {
id => 1,
parent_id => 0
},
is_col => {
id => 1,
parent_id => 1
},
is_nullable => {},
is_numeric => {
id => 1,
parent_id => 1
},
keys => {
id => {
col_prefixes => [
undef
],
colnames => '`id`',
cols => [
'id'
],
ddl => 'KEY `id` (`id`),',
is_col => {
id => 1
},
is_nullable => 0,
is_unique => 0,
name => 'id',
type => 'BTREE'
},
parent_id => {
col_prefixes => [
undef
],
colnames => '`parent_id`',
cols => [
'parent_id'
],
ddl => 'KEY `parent_id` (`parent_id`),',
is_col => {
parent_id => 1
},
is_nullable => 0,
is_unique => 0,
name => 'parent_id',
type => 'BTREE'
}
},
name => 'child',
null_cols => [],
numeric_cols => [
'id',
'parent_id'
],
type_for => {
id => 'int',
parent_id => 'int'
}
},
dbh => $dbh,
);
cmp_ok(
$size,
'>',
15_000, # estimages range from 15k to 30k
"Bug 1201443: size"
);
is(
$chosen_key,
'parent_id',
"Bug 1201443: chosen key"
);
# #############################################################################
# Done.
# #############################################################################
@@ -206,4 +304,4 @@ like(
);
$sb->wipe_clean($dbh);
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
exit;
done_testing;

View File

@@ -0,0 +1,7 @@
CREATE TABLE `t` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`a` varchar(200) DEFAULT NULL,
`b` decimal(22,0) NOT NULL,
PRIMARY KEY (`id`),
KEY `b` (`b`,`id`)
) ENGINE=InnoDB DEFAULT CHARSET=latin1

View File

@@ -0,0 +1,7 @@
CREATE TABLE `domains` (
`id` bigint(20) NOT NULL,
`domain` varchar(175) COLLATE utf8_bin NOT NULL,
PRIMARY KEY (`id`),
UNIQUE KEY `domain` (`domain`),
UNIQUE KEY `unique_key_domain` (`domain`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_bin

View File

@@ -22,9 +22,6 @@ my $dbh = $sb->get_dbh_for('master');
if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
else {
plan tests => 13;
}
my $output;
my $sample = "t/pt-duplicate-key-checker/samples/";
@@ -136,9 +133,23 @@ ok(
'--key-types fk (explicit)'
);
# #############################################################################
# Exact unique dupes
# https://bugs.launchpad.net/percona-toolkit/+bug/1217013
# #############################################################################
$sb->load_file('master', 't/lib/samples/dupekeys/simple_dupe_bug_1217013.sql', 'test');
ok(
no_diff(
sub { pt_duplicate_key_checker::main(@args, qw(-t test.domains)) },
"$sample/simple_dupe_bug_1217013.txt"),
'Exact unique dupes (bug 1217013)'
) or diag($test_diff);
# #############################################################################
# Done.
# #############################################################################
$sb->wipe_clean($dbh);
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
exit;
done_testing;

View File

@@ -41,7 +41,7 @@ ok(
: "$sample/issue_295.txt")
),
"Shorten, not remove, clustered dupes"
);
) or diag($test_diff);
# #############################################################################
# Error if InnoDB table has no PK or unique indexes
@@ -80,10 +80,23 @@ unlike(
'PTDEBUG doesn\'t auto-vivify cluster key hashref (bug 1036804)'
);
# #############################################################################
#
# https://bugs.launchpad.net/percona-toolkit/+bug/1201443
# #############################################################################
$sb->load_file('master', "t/pt-duplicate-key-checker/samples/fk_chosen_index_bug_1201443.sql");
$output = `$trunk/bin/pt-duplicate-key-checker F=$cnf -d fk_chosen_index_bug_1201443 2>&1`;
unlike(
$output,
qr/Use of uninitialized value/,
'fk_chosen_index_bug_1201443'
);
# #############################################################################
# Done.
# #############################################################################
$sb->wipe_clean($dbh);
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
done_testing;
exit;

View File

@@ -2,6 +2,7 @@
# test.bug_894140
# ########################################################################
# Uniqueness of row_id ignored because PRIMARY is a duplicate constraint
# row_id is a duplicate of PRIMARY
# Key definitions:
# UNIQUE KEY `row_id` (`row_id`),

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,21 @@
# ########################################################################
# test.domains
# ########################################################################
# Uniqueness of domain ignored because unique_key_domain is a duplicate constraint
# domain is a duplicate of unique_key_domain
# Key definitions:
# UNIQUE KEY `domain` (`domain`),
# UNIQUE KEY `unique_key_domain` (`domain`)
# Column types:
# `domain` varchar(175) collate utf8_bin not null
# To remove this duplicate index, execute:
ALTER TABLE `test`.`domains` DROP INDEX `domain`;
# ########################################################################
# Summary of indexes
# ########################################################################
# Size Duplicate Indexes 527
# Total Duplicate Indexes 1
# Total Indexes 3

View File

@@ -18,20 +18,25 @@ require "$trunk/bin/pt-duplicate-key-checker";
my $output;
my $cnf = "/tmp/12345/my.sandbox.cnf";
my $cmd = "$trunk/bin/pt-duplicate-key-checker -F $cnf -h 127.1";
my $pid_file = "/tmp/pt-dupe-key-test.pid";
diag(`rm -f $pid_file >/dev/null`);
# #########################################################################
# Issue 391: Add --pid option to all scripts
# #########################################################################
`touch /tmp/mk-script.pid`;
$output = `$cmd -d issue_295 --pid /tmp/mk-script.pid 2>&1`;
diag(`touch $pid_file`);
$output = `$cmd -d issue_295 --pid $pid_file 2>&1`;
like(
$output,
qr{PID file /tmp/mk-script.pid already exists},
qr{PID file $pid_file exists},
'Dies if PID file already exists (issue 391)'
);
`rm -rf /tmp/mk-script.pid`;
# #############################################################################
# Done.
# #############################################################################
diag(`rm -f $pid_file >/dev/null`);
exit;