PT-1717 - resume pt-online-schema-change if it's interrupted

- Now --resume dies if new table or triggers do not exist
- New table and triggers are not removed if --resume fails during these
  checkups
- Added sample file and more tests for --resume
This commit is contained in:
Sveta Smirnova
2024-03-12 22:12:48 +03:00
parent 8ce3b94a1c
commit 2d4f5c290a
5 changed files with 583 additions and 293 deletions
+148 -69
View File
@@ -8812,13 +8812,15 @@ sub main {
PTDEBUG && _d('Last chunk:', Dumper($last_chunk));
if ( !$last_chunk || !$last_chunk->{new_table_name} ) {
_die("Option --resume refers non-existing chunk: ${old_job_id}. Exiting."
$oktorun = undef;
_die("Option --resume refers non-existing job ID: ${old_job_id}. Exiting."
, UNSUPPORTED_OPERATION);
}
if ( $last_chunk->{db} ne $db
|| $last_chunk->{tbl} ne $tbl
|| $last_chunk->{altr} ne $o->get('alter') ){
$oktorun = undef;
_die("Essential options for the failed job are different from current:\n"
. "Database: failed - $last_chunk->{db}, current - ${db}\n"
. "Table: failed - $last_chunk->{tbl}, current - ${tbl}\n"
@@ -8828,10 +8830,29 @@ sub main {
}
if ( $last_chunk->{done} eq 'yes' ) {
$oktorun = undef;
_die("Copying rows for job ${old_job_id} finished.\n"
. "Nothing to do. Exiting."
, UNSUPPORTED_OPERATION);
}
my $json = JSON->new->allow_nonref;
my $opt_hash = $json->decode($last_chunk->{args});
if ( ( ($opt_hash->{'chunk-index'} // '') ne ($o->get('chunk-index') // '') )
|| ( ($opt_hash->{'chunk-index-columns'} // '')
ne ($o->get('chunk-index-columns') // '') )
) {
$oktorun = undef;
_die("User-specified chunk index does not match stored one\n"
. "Stored chunk index: " . ($opt_hash->{'chunk-index'}//'') . "\n"
. "User-specified chunk index: " . ($o->get('chunk-index')//'') . "\n"
. "Stored value of --chunk-index-columns: "
. ($opt_hash->{'chunk-index-columns'}//'') . "\n"
. "User-specified value of --chunk-index-columns: "
. ($o->get('chunk-index-columns')//'') . "\n"
. "Cannot resume job ${old_job_id}. Exiting."
, UNSUPPORTED_OPERATION);
}
}
my $job_id;
@@ -9042,7 +9063,7 @@ sub main {
my $is_skip = 0;
for my $slave_to_skip (@$slaves_to_skip) {
if ( $slave->{dsn}->{h} eq $slave_to_skip->{h} ) {
my $skip_slave_port = defined($slave_to_skip->{P})
my $skip_slave_port = defined($slave_to_skip->{P})
? $slave_to_skip->{P} : '3306';
if ( ($slave->{dsn}->{P} eq $skip_slave_port) ) {
print "Skipping slave " . $slave->description() . "\n";
@@ -9587,6 +9608,19 @@ sub main {
tbl => $new_table_name,
name => $q->quote($orig_tbl->{db}, $new_table_name),
};
my $sql = "SELECT COUNT(*) AS c FROM information_schema.tables "
. "WHERE TABLE_SCHEMA = ? and TABLE_NAME = ?";
PTDEBUG && _d($sql);
my $sth = $cxn->dbh()->prepare($sql);
$sth->execute($new_tbl->{db}, $new_tbl->{tbl});
my $cnt = $sth->fetchrow_hashref();
$sth->finish();
PTDEBUG && _d("Found table: $cnt->{c}");
if ( !$cnt->{c} || int($cnt->{c}) != 1 ) {
_die("New table $new_tbl->{name} not found, restart operation from scratch"
, UNSUPPORTED_OPERATION);
}
}
else {
$new_table_name = $o->get('new-table-name');
@@ -9621,7 +9655,7 @@ sub main {
"UPDATE ${hist_table} SET new_table_name = ?"
);
$sth->execute($new_tbl->{tbl});
}
}
# If the new table still exists, drop it unless the tool was interrupted.
push @cleanup_tasks, sub {
@@ -9637,8 +9671,7 @@ sub main {
my $sql = "DROP TABLE IF EXISTS $new_tbl->{name};";
if ( !$oktorun ) {
# The tool was interrupted, so do not drop the new table
# in case the user wants to resume (once resume capability
# is implemented).
# in case the user wants to resume.
print "Not dropping the new table $new_tbl->{name} because "
. "the tool was interrupted. To drop the new table, "
. "execute:\n$sql\n";
@@ -9904,69 +9937,117 @@ sub main {
}
};
if ( !$o->got('resume') ) {
# --plugin hook
if ( $plugin && $plugin->can('before_create_triggers') ) {
$plugin->before_create_triggers();
}
# --plugin hook
if ( $plugin && $plugin->can('before_create_triggers') ) {
$plugin->before_create_triggers();
}
my @trigger_names = eval {
my @trigger_names = eval {
create_triggers(
orig_tbl => $orig_tbl,
new_tbl => $new_tbl,
del_tbl => $del_tbl,
columns => \@common_cols,
Cxn => $cxn,
Quoter => $q,
OptionParser => $o,
Retry => $retry,
tries => $tries,
stats => \%stats,
dont => $o->got('resume'),
);
};
if ( $EVAL_ERROR ) {
$oktorun = undef;
_die("Error creating triggers: $EVAL_ERROR", ERROR_CREATING_TRIGGERS);
};
# We do not create triggers if option --resume is provided
# but we need to check if triggers exist
if ( $o->got('resume') ) {
my $sql = "SELECT COUNT(*) AS c FROM "
. "INFORMATION_SCHEMA.TRIGGERS "
. "WHERE TRIGGER_SCHEMA = ? AND TRIGGER_NAME = ?";
my $sth = $cxn->dbh()->prepare($sql);
foreach my $trigger_name ( @trigger_names ) {
PTDEBUG && _d("Checking trigger: $orig_tbl->{db}.$trigger_name");
$sth->execute($orig_tbl->{db}, $trigger_name);
my $cnt = $sth->fetchrow_hashref();
PTDEBUG && _d("Found table: $cnt->{c}");
if ( !$cnt->{c} || int($cnt->{c}) != 1 ) {
$oktorun = undef;
_die("Trigger $orig_tbl->{db}.$trigger_name not found, "
. "restart operation from scratch to avoid data loss"
, UNSUPPORTED_OPERATION);
}
}
$sth->finish();
}
if ( $o->get('reverse-triggers') ) {
print "Adding reverse triggers\n";
my $old_tbl_name = '_'.$orig_tbl->{tbl}.'_old';
my $new_tbl_name = '_'.$orig_tbl->{tbl}.'_new';
my $old_tbl = {
db => $orig_tbl->{db},
name => '`'.$orig_tbl->{db}.'`.`'.$old_tbl_name.'`',
tbl => $old_tbl_name,
};
my $new_tbl = {
db => $orig_tbl->{db},
name => '`'.$orig_tbl->{db}.'`.`'.$new_tbl_name.'`',
tbl => $new_tbl_name,
};
my @reverse_trigger_names=eval {
create_triggers(
orig_tbl => $orig_tbl,
new_tbl => $new_tbl,
del_tbl => $del_tbl,
columns => \@common_cols,
Cxn => $cxn,
Quoter => $q,
OptionParser => $o,
Retry => $retry,
tries => $tries,
stats => \%stats,
orig_tbl => $new_tbl,
new_tbl => $old_tbl,
del_tbl => $orig_tbl,
columns => \@common_cols,
Cxn => $cxn,
Quoter => $q,
OptionParser => $o,
Retry => $retry,
tries => $tries,
stats => \%stats,
reverse_triggers => 1,
dont => $o->got('resume'),
);
};
if ( $EVAL_ERROR ) {
_die("Error creating triggers: $EVAL_ERROR", ERROR_CREATING_TRIGGERS);
$oktorun = undef;
_die("Error creating reverse triggers: $EVAL_ERROR", ERROR_CREATING_REVERSE_TRIGGERS);
};
if ( $o->get('reverse-triggers') ) {
print "Adding reverse triggers\n";
eval {
my $old_tbl_name = '_'.$orig_tbl->{tbl}.'_old';
my $new_tbl_name = '_'.$orig_tbl->{tbl}.'_new';
my $old_tbl = {
db => $orig_tbl->{db},
name => '`'.$orig_tbl->{db}.'`.`'.$old_tbl_name.'`',
tbl => $old_tbl_name,
};
my $new_tbl = {
db => $orig_tbl->{db},
name => '`'.$orig_tbl->{db}.'`.`'.$new_tbl_name.'`',
tbl => $new_tbl_name,
};
my $triggers=create_triggers(
orig_tbl => $new_tbl,
new_tbl => $old_tbl,
del_tbl => $orig_tbl,
columns => \@common_cols,
Cxn => $cxn,
Quoter => $q,
OptionParser => $o,
Retry => $retry,
tries => $tries,
stats => \%stats,
reverse_triggers => 1,
);
};
if ( $EVAL_ERROR ) {
_die("Error creating reverse triggers: $EVAL_ERROR", ERROR_CREATING_REVERSE_TRIGGERS);
};
# We do not create reverse triggers if option --resume is provided
# but we need to check if triggers exist
if ( $o->got('resume') ) {
my $sql = "SELECT COUNT(*) AS c FROM "
. "INFORMATION_SCHEMA.TRIGGERS "
. "WHERE TRIGGER_SCHEMA = ? AND TRIGGER_NAME = ?";
my $sth = $cxn->dbh()->prepare($sql);
foreach my $trigger_name ( @reverse_trigger_names ) {
PTDEBUG && _d("Checking reverse trigger: $orig_tbl->{db}.$trigger_name");
$sth->execute($orig_tbl->{db}, $trigger_name);
my $cnt = $sth->fetchrow_hashref();
PTDEBUG && _d("Found table: $cnt->{c}");
if ( !$cnt->{c} || int($cnt->{c}) != 1 ) {
$oktorun = undef;
_die("Reverse trigger $orig_tbl->{db}.$trigger_name not found, "
. "restart operation from scratch to avoid data loss"
, UNSUPPORTED_OPERATION);
}
}
$sth->finish();
}
# --plugin hook
if ( $plugin && $plugin->can('after_create_triggers') ) {
$plugin->after_create_triggers();
}
}
# --plugin hook
if ( $plugin && $plugin->can('after_create_triggers') ) {
$plugin->after_create_triggers();
}
# #####################################################################
@@ -11810,7 +11891,7 @@ sub create_triggers {
# If --preserve-triggers was specified, try to create the original triggers into the new table.
# We are doing this to ensure the original triggers will work in the new modified table
# and we want to know this BEFORE copying all rows from the old table to the new one.
if ($o->get('preserve-triggers')) {
if ($o->get('preserve-triggers') && !$args{dont}) {
foreach my $trigger_info (@$triggers_info) {
foreach my $orig_trigger (@{$trigger_info->{orig_triggers}}) {
my $definer = $orig_trigger->{definer} || '';
@@ -12656,8 +12737,8 @@ Prompt for a password when connecting to MySQL.
This option modifies the behavior of L<"--history"> such that the
history table's upper and lower boundary columns are created with the BLOB
data type.
This is useful in cases where you changing large tables with keys that
include a binary data type or that have non-standard character sets.
This is useful when you change large tables with keys that include a binary
data type or that have non-standard character sets.
See L<"--history"> and L<"--resume">.
=item --channel
@@ -13023,8 +13104,6 @@ Show help and exit.
=item --history
default: 0
Write job progress to a table. Unfinished jobs may be restarted by the option L<"--resume">.
The history table must have this structure (MAGIC_create_pt_osc_history):
@@ -13317,11 +13396,11 @@ type: int
Resume altering table from the last completed chunk. If the tool stops before it finishes
copying rows, this option makes copying resume from the last chunk copied.
The option accepts ID of the failed job. This ID is printed when pt-online-schema-change
The option accepts ID of the failed job. This ID is printed when pt-online-schema-change
is running with option L<i"--history"> and stored in the L<"--history-table">.
Warning! To use this option previous, failed, run of pt-online-schema-change should use options
L<"--history">, L<"--nodrop-new-table">, and L<"--nodrop-triggers">. Otherwise,
Warning! To use this option previous, failed, run of pt-online-schema-change should use options
L<"--history">, L<"--nodrop-new-table">, and L<"--nodrop-triggers">. Otherwise,
pt-online-schema-change would not be able to resume.
=item --skip-check-slave-lag
@@ -13521,8 +13600,8 @@ keyword. You might need to quote the value. Here is an example:
pt-online-schema-change --where "id > 12345678"
IMPORTANT. If used without options --no-drop-new-table and --no-swap-tables
may lead to data loss, therefore this operation only allowed if option --force
Warning! If used without options L<"--no-drop-new-table"> and L<"--no-swap-tables">
may lead to data loss, therefore this operation only allowed if option L<"--force">
also specified.
=item --[no]fail-on-stopped-replication
-205
View File
@@ -1,205 +0,0 @@
#!/usr/bin/env perl
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
use strict;
use warnings FATAL => 'all';
use threads;
use English qw(-no_match_vars);
use Test::More;
use Data::Dumper;
use PerconaTest;
use Sandbox;
use SqlModes;
use File::Temp qw/ tempdir tempfile /;
our $delay = 10;
my $output;
my $exit;
my $tmp_file = File::Temp->new();
my $tmp_file_name = $tmp_file->filename;
unlink $tmp_file_name;
require "$trunk/bin/pt-online-schema-change";
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
if ($sb->is_cluster_mode) {
plan skip_all => 'Not for PXC';
}
my $master_dbh = $sb->get_dbh_for('master');
my $slave_dbh1 = $sb->get_dbh_for('slave1');
my $slave_dbh2 = $sb->get_dbh_for('slave2');
my $master_dsn = 'h=127.0.0.1,P=12345,u=msandbox,p=msandbox';
my $slave_dsn1 = 'h=127.0.0.1,P=12346,u=msandbox,p=msandbox';
my $slave_dsn2 = 'h=127.0.0.1,P=12347,u=msandbox,p=msandbox';
my $sample = "t/pt-online-schema-change/samples";
my $plugin = "$trunk/$sample/plugins";
# We need sync_relay_log=1 to keep changes after replica restart
my $cnf = '/tmp/12347/my.sandbox.cnf';
diag(`cp $cnf $cnf.bak`);
diag(`echo "[mysqld]" > /tmp/12347/my.sandbox.2.cnf`);
diag(`echo "sync_relay_log=1" >> /tmp/12347/my.sandbox.2.cnf`);
diag(`echo "sync_relay_log_info=1" >> /tmp/12347/my.sandbox.2.cnf`);
diag(`echo "relay_log_recovery=1" >> /tmp/12347/my.sandbox.2.cnf`);
diag(`echo "!include /tmp/12347/my.sandbox.2.cnf" >> $cnf`);
diag(`/tmp/12347/stop >/dev/null`);
sleep 1;
diag(`/tmp/12347/start >/dev/null`);
sub reset_query_cache {
my @dbhs = @_;
return if ($sandbox_version >= '8.0');
foreach my $dbh (@dbhs) {
$dbh->do('RESET QUERY CACHE');
}
}
# 1) Set the slave delay to 0 just in case we are re-running the tests without restarting the sandbox.
# 2) Load sample data
# 3) Set the slave delay to 30 seconds to be able to see the 'waiting' message.
diag("Setting slave delay to 0 seconds");
$slave_dbh1->do('STOP SLAVE');
$master_dbh->do("RESET MASTER");
$slave_dbh1->do('RESET SLAVE');
$slave_dbh1->do('START SLAVE');
diag('Loading test data');
$sb->load_file('master', "t/pt-online-schema-change/samples/slave_lag.sql");
# Should be greater than chunk-size and big enough, so pt-osc will wait for delay
my $num_rows = 5000;
my $chunk_size = 10;
diag("Loading $num_rows into the table. This might take some time.");
diag(`util/mysql_random_data_load --host=127.0.0.1 --port=12345 --user=msandbox --password=msandbox test pt178 $num_rows`);
$sb->wait_for_slaves();
diag("Setting slave delay to $delay seconds");
$slave_dbh1->do('STOP SLAVE');
$slave_dbh1->do("CHANGE MASTER TO MASTER_DELAY=$delay");
$slave_dbh1->do('START SLAVE');
# Run a full table scan query to ensure the slave is behind the master
# There is no query cache in MySQL 8.0+
reset_query_cache($master_dbh, $master_dbh);
# Update one row so slave is delayed
$master_dbh->do('UPDATE `test`.`pt178` SET f2 = f2 + 1 LIMIT 1');
$master_dbh->do('UPDATE `test`.`pt178` SET f2 = f2 + 1 WHERE f1 = ""');
diag("Starting tests...");
my $max_lag = $delay / 2;
# We need to sleep, otherwise pt-osc can finish before slave is delayed
sleep($max_lag);
my $args = "$master_dsn,D=test,t=pt178 --execute --chunk-size ${chunk_size} --max-lag $max_lag --alter 'engine=INNODB' --pid $tmp_file_name --progress time,5 --nodrop-new-table --nodrop-triggers --history";
my ($fh, $filename) = tempfile();
my $pid = fork();
if (!$pid) {
open(STDERR, '>', $filename);
open(STDOUT, '>', $filename);
exec("$trunk/bin/pt-online-schema-change $args");
}
sleep($max_lag + $max_lag/2);
# restart slave 12347
diag(`/tmp/12347/stop >/dev/null`);
sleep 1;
waitpid($pid, 0);
$output = do {
local $/ = undef;
<$fh>;
};
like(
$output,
qr/`test`.`pt178` was not altered/s,
"pt-osc stopped with error as expected",
) or diag($output);
diag(`/tmp/12347/start >/dev/null`);
# Creating copy of table pt178, so we can compare data later
diag(`/tmp/12345/use -N test -e "CREATE TABLE pt178_back like pt178"`);
diag(`/tmp/12345/use -N test -e "INSERT INTO pt178_back SELECT * FROM pt178"`);
$output = `/tmp/12345/use -N -e "select job_id, upper_boundary from percona.pt_osc_history"`;
my ($job_id, $upper_boundary) = split(/\s+/, $output);
my $copied_rows = `/tmp/12345/use -N -e "select count(*) from test._pt178_new"`;
chomp($copied_rows);
ok(
$copied_rows eq $upper_boundary,
'Upper chunk boundary stored correctly'
) or diag("Copied_rows: ${copied_rows}, upper boundary: ${upper_boundary}");;
my @args = (qw(--execute --chunk-size=10 --nodrop-new-table --nodrop-triggers --history));
($output, $exit) = full_output(
sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt178",
'--max-lag', $max_lag,
'--resume', $job_id,
'--alter', 'engine=INNODB',
#'--progress', 'time,1',
'--plugin', "$plugin/pt-1717.pm",
),
},
);
$output =~ /.*Chunk: (\d+)\n/ms;
my $last_chunk = int($1);
ok(
$last_chunk * $chunk_size + int($copied_rows) == $num_rows,
'Tool inserted only missed rows in the second run'
) or diag("Last chunk: ${last_chunk}, copied rows: ${copied_rows}");
my $new_table_checksum = diag(`/tmp/12345/use test -N -e "CHECKSUM TABLE pt178"`);
my $old_table_checksum = diag(`/tmp/12345/use test -N -e "CHECKSUM TABLE pt178_back"`);
ok(
$new_table_checksum eq $old_table_checksum,
'All rows copied correctly'
) or diag("New table checksum: ${new_table_checksum}, original content checksum: ${old_table_checksum}");
# #############################################################################
# Done.
# #############################################################################
diag("Cleaning");
$slave_dbh2 = $sb->get_dbh_for('slave2');
diag("Setting slave delay to 0 seconds");
$slave_dbh1->do('STOP SLAVE');
$slave_dbh2->do('STOP SLAVE');
$master_dbh->do("RESET MASTER");
$slave_dbh1->do('RESET SLAVE');
$slave_dbh2->do('RESET SLAVE');
$slave_dbh1->do('START SLAVE');
$slave_dbh2->do('START SLAVE');
diag(`mv $cnf.bak $cnf`);
diag(`/tmp/12347/stop >/dev/null`);
diag(`/tmp/12347/start >/dev/null`);
diag("Dropping test database");
$master_dbh->do("DROP DATABASE IF EXISTS test");
$sb->wait_for_slaves();
$sb->wipe_clean($master_dbh);
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
done_testing;
@@ -38,25 +38,6 @@ my $sample = "t/pt-online-schema-change/samples";
$sb->load_file('master', "$sample/basic_no_fks_innodb.sql");
# First test option --history
# * - Test done for the development step
# ** - Test done for two development steps
# 1.** If table percona.pt_osc not created when option not specified
# 2. If table percona.pt_osc created when option present
# 2.1.** Default name
# 2.2.** Custom name
# 2.3.** Second run should not fail or modify this table (except inserting a row for new job)
# 2.4.** Case for binary index
# 2.5.** Second run for the binary index
# 2.6.** Case for invalid existing table
# 2.7.** Case for invalid existing table and binary index
# 3.** Inserting db, tbl, alter, args
# 4. Updating lower and upper boundaries
# 4.1. In situation when pt-osc finishes correctly
# 4.1.1.* `done` set to 'yes'
# 4.2. In failures
# 4.2.1. `done` set to 'no'
($output, $exit) = full_output(
sub { pt_online_schema_change::main(@args, "$dsn,D=pt_osc,t=t",
'--alter', 'engine=innodb', '--execute') }
@@ -319,6 +300,7 @@ is(
) or diag($output);
$output = `/tmp/12345/use -N -e "select count(*) from information_schema.tables where TABLE_SCHEMA='pt_1717' and table_name='pt_1717_history'"`;
is(
$output + 0,
1,
+419
View File
@@ -0,0 +1,419 @@
#!/usr/bin/env perl
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
use strict;
use warnings FATAL => 'all';
use threads;
use English qw(-no_match_vars);
use Test::More;
use Data::Dumper;
use PerconaTest;
use Sandbox;
use SqlModes;
use File::Temp qw/ tempdir tempfile /;
our $delay = 10;
my $max_lag = $delay / 2;
my $output;
my $exit;
my $tmp_file = File::Temp->new();
my $tmp_file_name = $tmp_file->filename;
unlink $tmp_file_name;
require "$trunk/bin/pt-online-schema-change";
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
if ($sb->is_cluster_mode) {
plan skip_all => 'Not for PXC';
}
my $master_dbh = $sb->get_dbh_for('master');
my $slave_dbh1 = $sb->get_dbh_for('slave1');
my $slave_dbh2 = $sb->get_dbh_for('slave2');
my $master_dsn = 'h=127.0.0.1,P=12345,u=msandbox,p=msandbox';
my $slave_dsn1 = 'h=127.0.0.1,P=12346,u=msandbox,p=msandbox';
my $slave_dsn2 = 'h=127.0.0.1,P=12347,u=msandbox,p=msandbox';
my $sample = "t/pt-online-schema-change/samples";
my $plugin = "$trunk/$sample/plugins";
# We need sync_relay_log=1 to keep changes after replica restart
my $cnf = '/tmp/12347/my.sandbox.cnf';
diag(`cp $cnf $cnf.bak`);
diag(`echo "[mysqld]" > /tmp/12347/my.sandbox.2.cnf`);
diag(`echo "sync_relay_log=1" >> /tmp/12347/my.sandbox.2.cnf`);
diag(`echo "sync_relay_log_info=1" >> /tmp/12347/my.sandbox.2.cnf`);
diag(`echo "relay_log_recovery=1" >> /tmp/12347/my.sandbox.2.cnf`);
diag(`echo "!include /tmp/12347/my.sandbox.2.cnf" >> $cnf`);
diag(`/tmp/12347/stop >/dev/null`);
sleep 1;
diag(`/tmp/12347/start >/dev/null`);
sub reset_query_cache {
my @dbhs = @_;
return if ($sandbox_version >= '8.0');
foreach my $dbh (@dbhs) {
$dbh->do('RESET QUERY CACHE');
}
}
sub run_broken_job {
my ($args) = @_;
my ($fh, $filename) = tempfile();
my $pid = fork();
if (!$pid) {
open(STDERR, '>', $filename);
open(STDOUT, '>', $filename);
exec("$trunk/bin/pt-online-schema-change $args");
}
sleep($max_lag + $max_lag/2);
# stop slave 12347
diag(`/tmp/12347/stop >/dev/null`);
sleep 1;
waitpid($pid, 0);
my $output = do {
local $/ = undef;
<$fh>;
};
return $output;
}
sub set_delay {
$sb->wait_for_slaves();
diag("Setting slave delay to $delay seconds");
diag(`/tmp/12345/use -N test -e "DROP TABLE IF EXISTS pt1717_back"`);
$slave_dbh1->do('STOP SLAVE');
$slave_dbh1->do("CHANGE MASTER TO MASTER_DELAY=$delay");
$slave_dbh1->do('START SLAVE');
# Run a full table scan query to ensure the slave is behind the master
# There is no query cache in MySQL 8.0+
reset_query_cache($master_dbh, $master_dbh);
# Update one row so slave is delayed
$master_dbh->do('UPDATE `test`.`pt1717` SET f2 = f2 + 1 LIMIT 1');
$master_dbh->do('UPDATE `test`.`pt1717` SET f2 = f2 + 1 WHERE f1 = ""');
# Creating copy of table pt1717, so we can compare data later
diag(`/tmp/12345/use -N test -e "CREATE TABLE pt1717_back like pt1717"`);
diag(`/tmp/12345/use -N test -e "INSERT INTO pt1717_back SELECT * FROM pt1717"`);
}
# 1) Set the slave delay to 0 just in case we are re-running the tests without restarting the sandbox.
# 2) Load sample data
# 3) Set the slave delay to 30 seconds to be able to see the 'waiting' message.
diag("Setting slave delay to 0 seconds");
$slave_dbh1->do('STOP SLAVE');
$master_dbh->do("RESET MASTER");
$slave_dbh1->do('RESET SLAVE');
$slave_dbh1->do('START SLAVE');
diag('Loading test data');
$sb->load_file('master', "t/pt-online-schema-change/samples/pt-1717.sql");
# Should be greater than chunk-size and big enough, so pt-osc will wait for delay
my $num_rows = 5000;
my $chunk_size = 10;
diag("Loading $num_rows into the table. This might take some time.");
diag(`util/mysql_random_data_load --host=127.0.0.1 --port=12345 --user=msandbox --password=msandbox test pt1717 $num_rows`);
diag("Starting tests...");
set_delay();
# We need to sleep, otherwise pt-osc can finish before slave is delayed
sleep($max_lag);
my $args = "$master_dsn,D=test,t=pt1717 --execute --chunk-size ${chunk_size} --max-lag $max_lag --alter 'engine=INNODB' --pid $tmp_file_name --progress time,5 --nodrop-new-table --nodrop-triggers --history";
$output = run_broken_job($args);
like(
$output,
qr/`test`.`pt1717` was not altered/s,
"pt-osc stopped with error as expected",
) or diag($output);
diag(`/tmp/12347/start >/dev/null`);
$sb->wait_for_slaves();
$output = `/tmp/12345/use -N -e "select job_id, upper_boundary from percona.pt_osc_history"`;
my ($job_id, $upper_boundary) = split(/\s+/, $output);
my $copied_rows = `/tmp/12345/use -N -e "select count(*) from test._pt1717_new"`;
chomp($copied_rows);
ok(
$copied_rows eq $upper_boundary,
'Upper chunk boundary stored correctly'
) or diag("Copied_rows: ${copied_rows}, upper boundary: ${upper_boundary}");;
my @args = (qw(--execute --chunk-size=10 --history));
($output, $exit) = full_output(
sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717",
'--alter', 'engine=INNODB', '--execute', "--resume=${job_id}",
'--chunk-index=f2'
) }
);
is(
$exit,
17,
'pt-osc --resume correctly fails if --chunk-index is different from the --chunk-index in the stored job'
) or diag($exit);
like(
$output,
qr/User-specified chunk index does not match stored one/i,
'Error message printed for the different --chunk-index option'
) or diag($output);
($output, $exit) = full_output(
sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717",
'--max-lag', $max_lag,
'--resume', $job_id,
'--alter', 'engine=INNODB',
'--plugin', "$plugin/pt-1717.pm",
),
},
);
$output =~ /.*Chunk: (\d+)\n/ms;
my $last_chunk = int($1);
ok(
$last_chunk * $chunk_size + int($copied_rows) == $num_rows,
'Tool inserted only missed rows in the second run'
) or diag("Last chunk: ${last_chunk}, copied rows: ${copied_rows}");
my $new_table_checksum = diag(`/tmp/12345/use test -N -e "CHECKSUM TABLE pt1717"`);
my $old_table_checksum = diag(`/tmp/12345/use test -N -e "CHECKSUM TABLE pt1717_back"`);
ok(
$new_table_checksum eq $old_table_checksum,
'All rows copied correctly'
) or diag("New table checksum: '${new_table_checksum}', original content checksum: '${old_table_checksum}'");
# Tests for chunk-index and chunk-index-columns options
$args = "$master_dsn,D=test,t=pt1717 --alter engine=innodb --execute --history --chunk-size=10 --no-drop-new-table --no-drop-triggers --reverse-triggers --chunk-index=f2";
set_delay();
$output = run_broken_job($args);
diag(`/tmp/12347/start >/dev/null`);
$output =~ /History saved. Job id: (\d+)/ms;
$job_id = $1;
($output, $exit) = full_output(
sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717",
'--alter', 'engine=innodb', '--execute', "--resume=${job_id}",
) }
);
is(
$exit,
17,
'pt-osc --resume correctly fails if --chunk-index option not specified for the job run with custom --chunk-index'
) or diag($exit);
like(
$output,
qr/User-specified chunk index does not match stored one/i,
'Error message printed for the missed --chunk-index option'
) or diag($output);
($output, $exit) = full_output(
sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717",
'--alter', 'engine=innodb', '--execute', "--resume=${job_id}",
'--chunk-index=f1'
) }
);
is(
$exit,
17,
'pt-osc --resume correctly fails if --chunk-index is different from the --chunk-index in the stored job'
) or diag($exit);
like(
$output,
qr/User-specified chunk index does not match stored one/i,
'Error message printed for the different --chunk-index option'
) or diag($output);
($output, $exit) = full_output(
sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717",
'--alter', 'engine=innodb', '--execute', "--resume=${job_id}",
'--chunk-index=f2', '--chunk-index-columns=1'
) }
);
is(
$exit,
17,
'pt-osc --resume correctly fails if --chunk-index-columns is different from the --chunk-index-columns in the stored job'
) or diag($exit);
like(
$output,
qr/User-specified chunk index does not match stored one/i,
'Error message printed for the different --chunk-index-columns option'
) or diag($output);
$output = `/tmp/12345/use -N -e "select count(*) from information_schema.tables where TABLE_SCHEMA='test' and table_name like '%pt1717%' and table_name != 'pt1717_back'"`;
is(
$output + 0,
2,
'Table was not dropped'
);
$output = `/tmp/12345/use -N -e "select count(*) from information_schema.triggers where TRIGGER_SCHEMA='test' AND EVENT_OBJECT_TABLE='pt1717' AND trigger_name NOT LIKE 'rt_%'"`;
is(
$output + 0,
3,
'Triggers were not dropped'
);
$output = `/tmp/12345/use -N -e "select count(*) from information_schema.triggers where TRIGGER_SCHEMA='test' AND EVENT_OBJECT_TABLE like '%pt1717%_new' AND trigger_name LIKE 'rt_%'"`;
is(
$output + 0,
3,
'Reverse triggers were not dropped'
);
($output, $exit) = full_output(
sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717",
'--alter', 'engine=innodb', '--execute', "--resume=${job_id}",
'--chunk-size=4',
'--chunk-index=f2'
) }
);
is(
$exit,
0,
'pt-osc --resume finishes correctly if --chunk-index option points to the same index as previous job run'
) or diag($output);
$output = `/tmp/12345/use -N -e "select count(*) from information_schema.tables where TABLE_SCHEMA='test' and table_name like '%pt1717%' and table_name != 'pt1717_back'"`;
is(
$output + 0,
1,
'Table was dropped after successful change'
);
$output = `/tmp/12345/use -N -e "select count(*) from information_schema.triggers where TRIGGER_SCHEMA='test' AND EVENT_OBJECT_TABLE = 'pt1717' AND TRIGGER_NAME NOT LIKE 'rt_%'"`;
is(
$output + 0,
0,
'Triggers were dropped after successful change'
);
$output = `/tmp/12345/use -N -e "select count(*) from information_schema.triggers where TRIGGER_SCHEMA='test' AND EVENT_OBJECT_TABLE = 'pt1717' AND TRIGGER_NAME LIKE 'rt_%'"`;
is(
$output + 0,
3,
'Reverse triggers were dropped after successful change'
);
$new_table_checksum = diag(`/tmp/12345/use test -N -e "CHECKSUM TABLE pt1717"`);
$old_table_checksum = diag(`/tmp/12345/use test -N -e "CHECKSUM TABLE pt1717_back"`);
ok(
$new_table_checksum eq $old_table_checksum,
'All rows copied correctly'
) or diag("New table checksum: '${new_table_checksum}', original content checksum: '${old_table_checksum}'");
`/tmp/12345/use test -N -e "UPDATE percona.pt_osc_history SET done = 'no' where job_id='${job_id}'"`;
($output, $exit) = full_output(
sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717",
'--alter', 'engine=innodb', '--execute', "--resume=${job_id}",
'--chunk-size=4',
'--chunk-index=f2'
) }
);
is(
$exit,
17,
'--resume expectedly fails when new table does not exists'
);
like(
$output,
qr/New table `test`.`[_]+pt1717_new` not found, restart operation from scratch/i,
'Correct error message printed for the missed new table'
) or diag($output);
$output =~ /New table `test`.`([_]+pt1717_new)` not found, restart operation from scratch/i;
`/tmp/12345/use test -N -e "CREATE TABLE $1 LIKE pt1717"`;
($output, $exit) = full_output(
sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717",
'--alter', 'engine=innodb', '--execute', "--resume=${job_id}",
'--chunk-size=4',
'--chunk-index=f2'
) }
);
is(
$exit,
17,
'--resume expectedly fails when triggers do not exists'
);
like(
$output,
qr/Trigger test.pt_osc_test_pt1717_\w{3} not found, restart operation from scratch to avoid data loss/i,
'Correct error message printed for the missed triggers'
) or diag($output);
# #############################################################################
# Done.
# #############################################################################
diag("Cleaning");
$slave_dbh2 = $sb->get_dbh_for('slave2');
diag("Setting slave delay to 0 seconds");
$slave_dbh1->do('STOP SLAVE');
$slave_dbh2->do('STOP SLAVE');
$master_dbh->do('RESET MASTER');
$slave_dbh1->do('RESET MASTER');
$slave_dbh1->do('RESET SLAVE');
$slave_dbh2->do('RESET SLAVE');
$slave_dbh1->do('START SLAVE');
$slave_dbh2->do('START SLAVE');
diag(`mv $cnf.bak $cnf`);
diag(`/tmp/12347/stop >/dev/null`);
diag(`/tmp/12347/start >/dev/null`);
diag("Dropping test database");
$master_dbh->do("DROP DATABASE IF EXISTS test");
$sb->wait_for_slaves();
$sb->wipe_clean($master_dbh);
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
done_testing;
@@ -0,0 +1,15 @@
DROP DATABASE IF EXISTS test;
CREATE DATABASE test;
USE test;
DROP TABLE IF EXISTS `pt1717`;
/*!40101 SET @saved_cs_client = @@character_set_client */;
/*!40101 SET character_set_client = utf8 */;
CREATE TABLE `pt1717` (
`id` int(11) NOT NULL AUTO_INCREMENT,
f1 VARCHAR(30) DEFAULT '',
f2 BIGINT(11) DEFAULT 0,
PRIMARY KEY(id),
KEY(f2),
KEY(f1, f2)
) ENGINE=InnoDB;