PT-229 Retry insert on WSRESP deadlock on PXC

PXC returns WSREP detected deadlock/conflict error if there are updates
on the same rows on different masters at the same time.
We shouldn't abort on that error, we should retry the insert instead.
This commit is contained in:
Carlos Salguero
2017-12-09 12:07:42 -03:00
parent fe51f68587
commit 3d54dff029
3 changed files with 79 additions and 51 deletions
+1
View File
@@ -2,6 +2,7 @@ Changelog for Percona Toolkit
v3.0.6
* Fixed bug PT-229: pt-online-schema-change does not retry on deadlock error when using percona 5.7
* Fixed bug PT-225: pt-table-checksum ignores generated cols
* Improvement PT-221: pt-table-sync pt-table-sync support for MyRocks
+3 -2
View File
@@ -56,7 +56,7 @@ BEGIN {
{
package Percona::Toolkit;
our $VERSION = '3.0.5';
our $VERSION = '3.0.6';
use strict;
use warnings FATAL => 'all';
@@ -11262,6 +11262,7 @@ sub osc_retry {
if ( $error =~ m/Lock wait timeout exceeded/
|| $error =~ m/Deadlock found/
|| $error =~ m/Query execution was interrupted/
|| $error =~ m/WSREP detected deadlock\/conflict/
) {
# These errors/warnings can be retried, so don't print
# a warning yet; do that in final_fail.
@@ -12972,6 +12973,6 @@ Place, Suite 330, Boston, MA 02111-1307 USA.
=head1 VERSION
pt-online-schema-change 3.0.5
pt-online-schema-change 3.0.6
=cut
+75 -49
View File
@@ -1,15 +1,16 @@
#!/usr/bin/env perl
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
use strict;
use warnings FATAL => 'all';
use threads;
use threads::shared;
use Thread::Semaphore;
use English qw(-no_match_vars);
use Test::More;
@@ -20,33 +21,35 @@ use Sandbox;
use SqlModes;
use File::Temp qw/ tempdir /;
plan tests => 2;
plan tests => 5;
require "$trunk/bin/pt-online-schema-change";
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
our ($master_dbh, $master_dsn) = $sb->start_sandbox(
server => 'master',
type => 'master',
env => q/FORK="pxc" BINLOG_FORMAT="ROW"/,
);
my $master_dbh = $sb->get_dbh_for('node1');
my $master_dsn = $sb->dsn_for('node1');
if ( !$master_dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
plan skip_all => 'Cannot connect to sandbox master';
}
# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic
# so we need to specify --set-vars innodb_lock_wait_timeout=3 else the
# tool will die.
my @args = (qw(--set-vars innodb_lock_wait_timeout=3));
my @args = (qw(--set-vars innodb_lock_wait_timeout=3));
my $output;
my $exit_status;
my $sample = "t/pt-online-schema-change/samples/";
$sb->load_file('master', "$sample/pt-229.sql");
diag(`util/mysql_random_data_load_linux_amd64 --host=127.1 --port=12345 --user=msandbox --password=msandbox test test_a 400000`);
my $num_rows = 40000;
diag("Loading $num_rows into the table. This might take some time.");
diag(`util/mysql_random_data_load_linux_amd64 --host=127.1 --port=12345 --user=msandbox --password=msandbox test test_a $num_rows`);
diag("$num_rows rows loaded. Starting tests.");
$master_dbh->do("FLUSH TABLES");
my $threads = [];
@@ -56,71 +59,94 @@ sub signal_handler {
$i++;
diag ("Signaling thread #$i to stop");
$thread->kill("STOP");
$thread->join();
diag ("Thread $i stopped");
}
}
sub start_thread {
my ($dsn_opts, $node) = @_;
my ($dsn_opts, $node, $s) = @_;
my $stop;
$SIG{'STOP'} = sub {
$stop = 1;
};
my $stop;
$SIG{'STOP'} = sub {
$stop = 1;
};
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $dbh= $sb->get_dbh_for($node);
diag("Thread started");
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $dbh= $sb->get_dbh_for($node);
diag("Thread started");
while(!$stop) {
$dbh->do("UPDATE `test`.`test_a` SET modified=NOW() WHERE RAND() <= 0.2 LIMIT 1");
my $random_sleep_time = rand() / 10;
# diag("Row updated on node: $node. Sleeping $random_sleep_time");
select(undef, undef, undef, $random_sleep_time);
}
print "leaving thread for $node\n";
while(!$stop) {
eval {
$dbh->do("UPDATE `test`.`test_a` SET modified=NOW() WHERE RAND() <= 0.2 LIMIT 1");
};
my $random_sleep_time = rand() / 10;
select(undef, undef, undef, $random_sleep_time);
}
print "Thread for $node has been stopped\n";
$s->up();
}
#$SIG{INT} = \&signal_handler;
$SIG{INT} = \&signal_handler;
my $nodes = ['node1', 'node2', 'node3'];
my $s = Thread::Semaphore->new();
for my $node (@$nodes) {
my $thread = threads->create('start_thread', $dsn_opts, $node);
$thread->detach();
push @$threads, $thread;
my $thread = threads->create('start_thread', $dsn_opts, $node, $s);
$thread->detach();
push @$threads, $thread;
}
threads->yield();
diag("Starting osc. A row will be updated in a different thread.");
diag("Starting osc. Random rows will be updated in other threads.");
($output, $exit_status) = full_output(
sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=test_a",
'--execute',
'--alter', "ADD COLUMN zzz INT",
),
},
sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=test_a",
'--execute',
'--alter', "ADD COLUMN zzz INT",
),
},
stderr => 1,
);
diag("status: $exit_status");
diag($output);
like(
$output,
qr/Successfully altered/s,
"OK",
is(
$exit_status,
0,
"PT-229 Successfully altered. Exit status = 0",
);
sleep(10);
threads->exit();
like(
$output,
qr/Successfully altered/s,
"PT-229 Got successfully altered message.",
);
my $rows = $master_dbh->selectrow_arrayref('SHOW CREATE TABLE test.test_a');
like(
@$rows[1],
qr/ `zzz` int\(11\) DEFAULT NULL,/im,
"PT-229 New field was added",
);
$rows = $master_dbh->selectrow_arrayref('SELECT COUNT(*) FROM test.test_a');
is(
@$rows[0],
$num_rows,
"PT-229 Number of rows is correct",
);
signal_handler(); # Signal all threads to stop
for (@$threads) {
$s->down(); # Wait until all threads are really stopped
}
$master_dbh->do("DROP DATABASE IF EXISTS test");
# #############################################################################
# Done.
# #############################################################################
#$sb->wipe_clean($master_dbh);
$sb->wipe_clean($master_dbh);
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
done_testing;