Merge pull request #206 from percona/PT-116

PT-116 pt-online-schema change eats data on adding a unique index
This commit is contained in:
Carlos Salguero
2017-04-20 15:34:47 -03:00
committed by GitHub
4 changed files with 207 additions and 10 deletions

View File

@@ -1,5 +1,10 @@
Changelog for Percona Toolkit
v3.0.3
* Fixed bug PT-116 : pt-online-schema change eats data on adding a unique index
v3.0.2 released 2017-03-23
* Fixed bug PT-73 : pt-mongodb tools add support for SSL connections

View File

@@ -8925,12 +8925,13 @@ sub main {
if ( $o->get('check-alter') ) {
check_alter(
tbl => $orig_tbl,
alter => $alter,
dry_run => $o->get('dry-run'),
renamed_cols => $renamed_cols,
Cxn => $cxn,
TableParser => $tp,
tbl => $orig_tbl,
alter => $alter,
dry_run => $o->get('dry-run'),
renamed_cols => $renamed_cols,
Cxn => $cxn,
TableParser => $tp,
got_use_insert_ignore => $o->got('use-insert-ignore'),
);
}
}
@@ -9571,7 +9572,8 @@ sub main {
# NibbleIterator combines these two statements and adds
# "FROM $orig_table->{name} WHERE <nibble stuff>".
my $dml = "INSERT LOW_PRIORITY IGNORE INTO $new_tbl->{name} "
my $ignore = $o->get('use-insert-ignore') ? "IGNORE" : '';
my $dml = "INSERT LOW_PRIORITY $ignore INTO $new_tbl->{name} "
. "(" . join(', ', map { $q->quote($_->{new}) } @common_cols) . ") "
. "SELECT";
my $select = join(', ', map { $q->quote($_->{old}) } @common_cols);
@@ -9908,11 +9910,11 @@ sub validate_tries {
sub check_alter {
my (%args) = @_;
my @required_args = qw(alter tbl dry_run Cxn TableParser);
my @required_args = qw(alter tbl dry_run Cxn TableParser got_use_insert_ignore);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless exists $args{$arg};
}
my ($alter, $tbl, $dry_run, $cxn, $tp) = @args{@required_args};
my ($alter, $tbl, $dry_run, $cxn, $tp, $got_use_insert_ignore) = @args{@required_args};
my $ok = 1;
@@ -9983,11 +9985,31 @@ sub check_alter {
}
}
# This regex matches unquoted strings in $1 and quoted strings in $2
# We need to loop through all matches in the alter string and check if the
# unquoted part has the string 'UNIQUE INDEX'
# We are doing this because the alter could be something like
# ADD KEY pk COMMENT "ADD UNIQUE KEY"
my $re = qr/([^"']*)?((?:"|\\"|'|\\').*?(?:"|\\"|'|\\'))?/i;
my $has_unique_index = 0;
while ( $alter =~ m/$re/g) {
if ($1 =~ m/UNIQUE INDEX/i) {
$has_unique_index = 1;
}
}
if ($has_unique_index && !$got_use_insert_ignore) {
$ok = 0;
warn "It seems like you are trying to add an UNIQUE INDEX.\n"
. "Since in some cases this could lead to data loss, you need to specify a value "
. "for the use-insert-ignore parameter.\n"
. "Please read the documentation and restart the program with the apropriate paramenters.\n";
}
if ( !$ok ) {
# check_alter.t relies on this output.
die "--check-alter failed.\n";
}
return;
}
@@ -10957,6 +10979,7 @@ sub exec_nibble {
# any pattern
# use MySQL's message for this warning
},
1062 => {},
);
return osc_retry(
@@ -11736,6 +11759,25 @@ documentation, then do not specify this option.
This options bypasses confirmation in case of using alter-foreign-keys-method = none , which might break foreign key constraints.
=item --[no]use-insert-ignore
default: yes
pt-online-schema-change by default use INSERT LOW_PRIORITY IGNORE statements to copy rows from the old table to the new one.
In some cases, like when altering a table to add an unique index, using IGNORE may lead to data loss.
Example:
CREATE TABLE test.t1 (
id TINYINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
notunique VARCHAR(200) NOT NULL
);
INSERT INTO test.t1(notunique) VALUES('test01'),('test01'),('test02');
If you run pt-online-schema-change --alter="ADD UNIQUE INDEX unique_1 (notunique)", when copying the second row to the new
table, INSERT ... IGNORE will fail silently and that row won't be copied.
In cas osc detects you are adding an UNIQUE index, you need to specify this option in the command line.
=item --help
Show help and exit.

View File

@@ -0,0 +1,140 @@
#!/usr/bin/env perl
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More;
use Data::Dumper;
use PerconaTest;
use Sandbox;
use SqlModes;
use File::Temp qw/ tempdir /;
plan tests => 8;
require "$trunk/bin/pt-online-schema-change";
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $master_dbh = $sb->get_dbh_for('master');
my $master_dsn = 'h=127.1,P=12345,u=msandbox,p=msandbox';
if ( !$master_dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic
# so we need to specify --set-vars innodb_lock_wait_timeout=3 else the
# tool will die.
my @args = (qw(--set-vars innodb_lock_wait_timeout=3));
my $output;
my $exit_status;
my $sample = "t/pt-online-schema-change/samples/";
$sb->load_file('master', "$sample/pt-116.sql");
my $dir = tempdir( CLEANUP => 1 );
($output, $exit_status) = full_output(
sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=t1",
'--execute',
'--alter', "ADD UNIQUE INDEX unique_1 (notunique)",
'--chunk-size', '1',
),
},
stderr => 1,
);
like(
$output,
qr/It seems like/s,
"Need to specify use-insert-ignore",
);
($output, $exit_status) = full_output(
sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=t1",
'--execute',
'--alter', "ADD UNIQUE INDEX unique_1 (notunique)",
'--chunk-size', '1',
'--nouse-insert-ignore',
),
},
stderr => 1,
);
like(
$output,
qr/Error copying rows from/s,
"Error adding unique index not using insert ignore",
);
isnt(
$exit_status,
0,
"Got error adding unique index (exit status != 0)",
);
# Check no data was deleted from the original table
my $rows = $master_dbh->selectrow_arrayref(
"SELECT COUNT(*) FROM `test`.`t1`");
is(
$rows->[0],
3,
"ALTER ADD UNIQUE key on a field having duplicated values"
) or diag(Dumper($rows));
# # This test looks weird but since we added use-insert-ignore, we know in this particular
# # case, having the testing dataset with repeated values for the field on which we are
# # adding a unique will lose data.
# # It is not the intention of this test to lose data, but we need to test the INSERT statement
# # was created as expected.
($output, $exit_status) = full_output(
sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=t1",
'--execute',
'--alter', "ADD UNIQUE INDEX unique_1 (notunique)",
'--chunk-size', '1',
'--use-insert-ignore',
),
},
stderr => 1,
);
like(
$output,
qr/Successfully altered/s,
"Error adding unique index not using insert ignore",
);
is(
$exit_status,
0,
"Added unique index and some rows got lost (exit status = 0)",
);
# Check no data was deleted from the original table
$rows = $master_dbh->selectrow_arrayref(
"SELECT COUNT(*) FROM `test`.`t1`");
is(
$rows->[0],
2,
"Added unique index and some rows got lost (row count = original - 1)",
) or diag(Dumper($rows));
$master_dbh->do("DROP DATABASE IF EXISTS test");
# #############################################################################
# Done.
# #############################################################################
$sb->wipe_clean($master_dbh);
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
done_testing;

View File

@@ -0,0 +1,10 @@
CREATE SCHEMA IF NOT EXISTS test;
DROP TABLE IF EXISTS test.t1;
CREATE TABLE test.t1 (
id TINYINT UNSIGNED AUTO_INCREMENT PRIMARY KEY,
notunique VARCHAR(200) NOT NULL
);
INSERT INTO test.t1(notunique) VALUES('test01'),('test01'),('test02');