From 2d4f5c290a1410299f3d7e8a7ca232a345bd06ee Mon Sep 17 00:00:00 2001 From: Sveta Smirnova Date: Tue, 12 Mar 2024 22:12:48 +0300 Subject: [PATCH] PT-1717 - resume pt-online-schema-change if it's interrupted - Now --resume dies if new table or triggers do not exist - New table and triggers are not removed if --resume fails during these checkups - Added sample file and more tests for --resume --- bin/pt-online-schema-change | 217 ++++++--- t/pt-online-schema-change/pt-1717-errors.t | 205 --------- .../{pt-1717.t => pt-1717-history.t} | 20 +- t/pt-online-schema-change/pt-1717-resume.t | 419 ++++++++++++++++++ t/pt-online-schema-change/samples/pt-1717.sql | 15 + 5 files changed, 583 insertions(+), 293 deletions(-) delete mode 100644 t/pt-online-schema-change/pt-1717-errors.t rename t/pt-online-schema-change/{pt-1717.t => pt-1717-history.t} (92%) create mode 100644 t/pt-online-schema-change/pt-1717-resume.t create mode 100644 t/pt-online-schema-change/samples/pt-1717.sql diff --git a/bin/pt-online-schema-change b/bin/pt-online-schema-change index e52bd006..980f8cd0 100755 --- a/bin/pt-online-schema-change +++ b/bin/pt-online-schema-change @@ -8812,13 +8812,15 @@ sub main { PTDEBUG && _d('Last chunk:', Dumper($last_chunk)); if ( !$last_chunk || !$last_chunk->{new_table_name} ) { - _die("Option --resume refers non-existing chunk: ${old_job_id}. Exiting." + $oktorun = undef; + _die("Option --resume refers non-existing job ID: ${old_job_id}. Exiting." , UNSUPPORTED_OPERATION); } if ( $last_chunk->{db} ne $db || $last_chunk->{tbl} ne $tbl || $last_chunk->{altr} ne $o->get('alter') ){ + $oktorun = undef; _die("Essential options for the failed job are different from current:\n" . "Database: failed - $last_chunk->{db}, current - ${db}\n" . "Table: failed - $last_chunk->{tbl}, current - ${tbl}\n" @@ -8828,10 +8830,29 @@ sub main { } if ( $last_chunk->{done} eq 'yes' ) { + $oktorun = undef; _die("Copying rows for job ${old_job_id} finished.\n" . "Nothing to do. Exiting." , UNSUPPORTED_OPERATION); } + + my $json = JSON->new->allow_nonref; + my $opt_hash = $json->decode($last_chunk->{args}); + if ( ( ($opt_hash->{'chunk-index'} // '') ne ($o->get('chunk-index') // '') ) + || ( ($opt_hash->{'chunk-index-columns'} // '') + ne ($o->get('chunk-index-columns') // '') ) + ) { + $oktorun = undef; + _die("User-specified chunk index does not match stored one\n" + . "Stored chunk index: " . ($opt_hash->{'chunk-index'}//'') . "\n" + . "User-specified chunk index: " . ($o->get('chunk-index')//'') . "\n" + . "Stored value of --chunk-index-columns: " + . ($opt_hash->{'chunk-index-columns'}//'') . "\n" + . "User-specified value of --chunk-index-columns: " + . ($o->get('chunk-index-columns')//'') . "\n" + . "Cannot resume job ${old_job_id}. Exiting." + , UNSUPPORTED_OPERATION); + } } my $job_id; @@ -9042,7 +9063,7 @@ sub main { my $is_skip = 0; for my $slave_to_skip (@$slaves_to_skip) { if ( $slave->{dsn}->{h} eq $slave_to_skip->{h} ) { - my $skip_slave_port = defined($slave_to_skip->{P}) + my $skip_slave_port = defined($slave_to_skip->{P}) ? $slave_to_skip->{P} : '3306'; if ( ($slave->{dsn}->{P} eq $skip_slave_port) ) { print "Skipping slave " . $slave->description() . "\n"; @@ -9587,6 +9608,19 @@ sub main { tbl => $new_table_name, name => $q->quote($orig_tbl->{db}, $new_table_name), }; + + my $sql = "SELECT COUNT(*) AS c FROM information_schema.tables " + . "WHERE TABLE_SCHEMA = ? and TABLE_NAME = ?"; + PTDEBUG && _d($sql); + my $sth = $cxn->dbh()->prepare($sql); + $sth->execute($new_tbl->{db}, $new_tbl->{tbl}); + my $cnt = $sth->fetchrow_hashref(); + $sth->finish(); + PTDEBUG && _d("Found table: $cnt->{c}"); + if ( !$cnt->{c} || int($cnt->{c}) != 1 ) { + _die("New table $new_tbl->{name} not found, restart operation from scratch" + , UNSUPPORTED_OPERATION); + } } else { $new_table_name = $o->get('new-table-name'); @@ -9621,7 +9655,7 @@ sub main { "UPDATE ${hist_table} SET new_table_name = ?" ); $sth->execute($new_tbl->{tbl}); - } + } # If the new table still exists, drop it unless the tool was interrupted. push @cleanup_tasks, sub { @@ -9637,8 +9671,7 @@ sub main { my $sql = "DROP TABLE IF EXISTS $new_tbl->{name};"; if ( !$oktorun ) { # The tool was interrupted, so do not drop the new table - # in case the user wants to resume (once resume capability - # is implemented). + # in case the user wants to resume. print "Not dropping the new table $new_tbl->{name} because " . "the tool was interrupted. To drop the new table, " . "execute:\n$sql\n"; @@ -9904,69 +9937,117 @@ sub main { } }; - if ( !$o->got('resume') ) { - # --plugin hook - if ( $plugin && $plugin->can('before_create_triggers') ) { - $plugin->before_create_triggers(); - } + # --plugin hook + if ( $plugin && $plugin->can('before_create_triggers') ) { + $plugin->before_create_triggers(); + } - my @trigger_names = eval { + my @trigger_names = eval { + create_triggers( + orig_tbl => $orig_tbl, + new_tbl => $new_tbl, + del_tbl => $del_tbl, + columns => \@common_cols, + Cxn => $cxn, + Quoter => $q, + OptionParser => $o, + Retry => $retry, + tries => $tries, + stats => \%stats, + dont => $o->got('resume'), + ); + }; + if ( $EVAL_ERROR ) { + $oktorun = undef; + _die("Error creating triggers: $EVAL_ERROR", ERROR_CREATING_TRIGGERS); + }; + + # We do not create triggers if option --resume is provided + # but we need to check if triggers exist + if ( $o->got('resume') ) { + my $sql = "SELECT COUNT(*) AS c FROM " + . "INFORMATION_SCHEMA.TRIGGERS " + . "WHERE TRIGGER_SCHEMA = ? AND TRIGGER_NAME = ?"; + my $sth = $cxn->dbh()->prepare($sql); + foreach my $trigger_name ( @trigger_names ) { + PTDEBUG && _d("Checking trigger: $orig_tbl->{db}.$trigger_name"); + $sth->execute($orig_tbl->{db}, $trigger_name); + my $cnt = $sth->fetchrow_hashref(); + PTDEBUG && _d("Found table: $cnt->{c}"); + if ( !$cnt->{c} || int($cnt->{c}) != 1 ) { + $oktorun = undef; + _die("Trigger $orig_tbl->{db}.$trigger_name not found, " + . "restart operation from scratch to avoid data loss" + , UNSUPPORTED_OPERATION); + } + } + $sth->finish(); + } + + if ( $o->get('reverse-triggers') ) { + print "Adding reverse triggers\n"; + my $old_tbl_name = '_'.$orig_tbl->{tbl}.'_old'; + my $new_tbl_name = '_'.$orig_tbl->{tbl}.'_new'; + + my $old_tbl = { + db => $orig_tbl->{db}, + name => '`'.$orig_tbl->{db}.'`.`'.$old_tbl_name.'`', + tbl => $old_tbl_name, + }; + my $new_tbl = { + db => $orig_tbl->{db}, + name => '`'.$orig_tbl->{db}.'`.`'.$new_tbl_name.'`', + tbl => $new_tbl_name, + }; + + my @reverse_trigger_names=eval { create_triggers( - orig_tbl => $orig_tbl, - new_tbl => $new_tbl, - del_tbl => $del_tbl, - columns => \@common_cols, - Cxn => $cxn, - Quoter => $q, - OptionParser => $o, - Retry => $retry, - tries => $tries, - stats => \%stats, + orig_tbl => $new_tbl, + new_tbl => $old_tbl, + del_tbl => $orig_tbl, + columns => \@common_cols, + Cxn => $cxn, + Quoter => $q, + OptionParser => $o, + Retry => $retry, + tries => $tries, + stats => \%stats, + reverse_triggers => 1, + dont => $o->got('resume'), ); }; if ( $EVAL_ERROR ) { - _die("Error creating triggers: $EVAL_ERROR", ERROR_CREATING_TRIGGERS); + $oktorun = undef; + _die("Error creating reverse triggers: $EVAL_ERROR", ERROR_CREATING_REVERSE_TRIGGERS); }; - if ( $o->get('reverse-triggers') ) { - print "Adding reverse triggers\n"; - eval { - my $old_tbl_name = '_'.$orig_tbl->{tbl}.'_old'; - my $new_tbl_name = '_'.$orig_tbl->{tbl}.'_new'; - - my $old_tbl = { - db => $orig_tbl->{db}, - name => '`'.$orig_tbl->{db}.'`.`'.$old_tbl_name.'`', - tbl => $old_tbl_name, - }; - my $new_tbl = { - db => $orig_tbl->{db}, - name => '`'.$orig_tbl->{db}.'`.`'.$new_tbl_name.'`', - tbl => $new_tbl_name, - }; - my $triggers=create_triggers( - orig_tbl => $new_tbl, - new_tbl => $old_tbl, - del_tbl => $orig_tbl, - columns => \@common_cols, - Cxn => $cxn, - Quoter => $q, - OptionParser => $o, - Retry => $retry, - tries => $tries, - stats => \%stats, - reverse_triggers => 1, - ); - }; - if ( $EVAL_ERROR ) { - _die("Error creating reverse triggers: $EVAL_ERROR", ERROR_CREATING_REVERSE_TRIGGERS); - }; + # We do not create reverse triggers if option --resume is provided + # but we need to check if triggers exist + if ( $o->got('resume') ) { + my $sql = "SELECT COUNT(*) AS c FROM " + . "INFORMATION_SCHEMA.TRIGGERS " + . "WHERE TRIGGER_SCHEMA = ? AND TRIGGER_NAME = ?"; + my $sth = $cxn->dbh()->prepare($sql); + foreach my $trigger_name ( @reverse_trigger_names ) { + PTDEBUG && _d("Checking reverse trigger: $orig_tbl->{db}.$trigger_name"); + $sth->execute($orig_tbl->{db}, $trigger_name); + my $cnt = $sth->fetchrow_hashref(); + PTDEBUG && _d("Found table: $cnt->{c}"); + if ( !$cnt->{c} || int($cnt->{c}) != 1 ) { + $oktorun = undef; + _die("Reverse trigger $orig_tbl->{db}.$trigger_name not found, " + . "restart operation from scratch to avoid data loss" + , UNSUPPORTED_OPERATION); + } + } + $sth->finish(); } - # --plugin hook - if ( $plugin && $plugin->can('after_create_triggers') ) { - $plugin->after_create_triggers(); - } + } + + # --plugin hook + if ( $plugin && $plugin->can('after_create_triggers') ) { + $plugin->after_create_triggers(); } # ##################################################################### @@ -11810,7 +11891,7 @@ sub create_triggers { # If --preserve-triggers was specified, try to create the original triggers into the new table. # We are doing this to ensure the original triggers will work in the new modified table # and we want to know this BEFORE copying all rows from the old table to the new one. - if ($o->get('preserve-triggers')) { + if ($o->get('preserve-triggers') && !$args{dont}) { foreach my $trigger_info (@$triggers_info) { foreach my $orig_trigger (@{$trigger_info->{orig_triggers}}) { my $definer = $orig_trigger->{definer} || ''; @@ -12656,8 +12737,8 @@ Prompt for a password when connecting to MySQL. This option modifies the behavior of L<"--history"> such that the history table's upper and lower boundary columns are created with the BLOB data type. -This is useful in cases where you changing large tables with keys that -include a binary data type or that have non-standard character sets. +This is useful when you change large tables with keys that include a binary +data type or that have non-standard character sets. See L<"--history"> and L<"--resume">. =item --channel @@ -13023,8 +13104,6 @@ Show help and exit. =item --history -default: 0 - Write job progress to a table. Unfinished jobs may be restarted by the option L<"--resume">. The history table must have this structure (MAGIC_create_pt_osc_history): @@ -13317,11 +13396,11 @@ type: int Resume altering table from the last completed chunk. If the tool stops before it finishes copying rows, this option makes copying resume from the last chunk copied. -The option accepts ID of the failed job. This ID is printed when pt-online-schema-change +The option accepts ID of the failed job. This ID is printed when pt-online-schema-change is running with option L and stored in the L<"--history-table">. -Warning! To use this option previous, failed, run of pt-online-schema-change should use options -L<"--history">, L<"--nodrop-new-table">, and L<"--nodrop-triggers">. Otherwise, +Warning! To use this option previous, failed, run of pt-online-schema-change should use options +L<"--history">, L<"--nodrop-new-table">, and L<"--nodrop-triggers">. Otherwise, pt-online-schema-change would not be able to resume. =item --skip-check-slave-lag @@ -13521,8 +13600,8 @@ keyword. You might need to quote the value. Here is an example: pt-online-schema-change --where "id > 12345678" -IMPORTANT. If used without options --no-drop-new-table and --no-swap-tables -may lead to data loss, therefore this operation only allowed if option --force +Warning! If used without options L<"--no-drop-new-table"> and L<"--no-swap-tables"> +may lead to data loss, therefore this operation only allowed if option L<"--force"> also specified. =item --[no]fail-on-stopped-replication diff --git a/t/pt-online-schema-change/pt-1717-errors.t b/t/pt-online-schema-change/pt-1717-errors.t deleted file mode 100644 index db6da70e..00000000 --- a/t/pt-online-schema-change/pt-1717-errors.t +++ /dev/null @@ -1,205 +0,0 @@ -#!/usr/bin/env perl - -BEGIN { - die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" - unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; - unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; -}; - -use strict; -use warnings FATAL => 'all'; -use threads; - -use English qw(-no_match_vars); -use Test::More; - -use Data::Dumper; -use PerconaTest; -use Sandbox; -use SqlModes; -use File::Temp qw/ tempdir tempfile /; - -our $delay = 10; -my $output; -my $exit; - -my $tmp_file = File::Temp->new(); -my $tmp_file_name = $tmp_file->filename; -unlink $tmp_file_name; - -require "$trunk/bin/pt-online-schema-change"; - -my $dp = new DSNParser(opts=>$dsn_opts); -my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); -if ($sb->is_cluster_mode) { - plan skip_all => 'Not for PXC'; -} - -my $master_dbh = $sb->get_dbh_for('master'); -my $slave_dbh1 = $sb->get_dbh_for('slave1'); -my $slave_dbh2 = $sb->get_dbh_for('slave2'); -my $master_dsn = 'h=127.0.0.1,P=12345,u=msandbox,p=msandbox'; -my $slave_dsn1 = 'h=127.0.0.1,P=12346,u=msandbox,p=msandbox'; -my $slave_dsn2 = 'h=127.0.0.1,P=12347,u=msandbox,p=msandbox'; -my $sample = "t/pt-online-schema-change/samples"; -my $plugin = "$trunk/$sample/plugins"; - -# We need sync_relay_log=1 to keep changes after replica restart -my $cnf = '/tmp/12347/my.sandbox.cnf'; -diag(`cp $cnf $cnf.bak`); -diag(`echo "[mysqld]" > /tmp/12347/my.sandbox.2.cnf`); -diag(`echo "sync_relay_log=1" >> /tmp/12347/my.sandbox.2.cnf`); -diag(`echo "sync_relay_log_info=1" >> /tmp/12347/my.sandbox.2.cnf`); -diag(`echo "relay_log_recovery=1" >> /tmp/12347/my.sandbox.2.cnf`); -diag(`echo "!include /tmp/12347/my.sandbox.2.cnf" >> $cnf`); -diag(`/tmp/12347/stop >/dev/null`); -sleep 1; -diag(`/tmp/12347/start >/dev/null`); - -sub reset_query_cache { - my @dbhs = @_; - return if ($sandbox_version >= '8.0'); - foreach my $dbh (@dbhs) { - $dbh->do('RESET QUERY CACHE'); - } -} - -# 1) Set the slave delay to 0 just in case we are re-running the tests without restarting the sandbox. -# 2) Load sample data -# 3) Set the slave delay to 30 seconds to be able to see the 'waiting' message. -diag("Setting slave delay to 0 seconds"); -$slave_dbh1->do('STOP SLAVE'); -$master_dbh->do("RESET MASTER"); -$slave_dbh1->do('RESET SLAVE'); -$slave_dbh1->do('START SLAVE'); - -diag('Loading test data'); -$sb->load_file('master', "t/pt-online-schema-change/samples/slave_lag.sql"); - -# Should be greater than chunk-size and big enough, so pt-osc will wait for delay -my $num_rows = 5000; -my $chunk_size = 10; -diag("Loading $num_rows into the table. This might take some time."); -diag(`util/mysql_random_data_load --host=127.0.0.1 --port=12345 --user=msandbox --password=msandbox test pt178 $num_rows`); - -$sb->wait_for_slaves(); - -diag("Setting slave delay to $delay seconds"); - -$slave_dbh1->do('STOP SLAVE'); -$slave_dbh1->do("CHANGE MASTER TO MASTER_DELAY=$delay"); -$slave_dbh1->do('START SLAVE'); - -# Run a full table scan query to ensure the slave is behind the master -# There is no query cache in MySQL 8.0+ -reset_query_cache($master_dbh, $master_dbh); -# Update one row so slave is delayed -$master_dbh->do('UPDATE `test`.`pt178` SET f2 = f2 + 1 LIMIT 1'); -$master_dbh->do('UPDATE `test`.`pt178` SET f2 = f2 + 1 WHERE f1 = ""'); - -diag("Starting tests..."); - -my $max_lag = $delay / 2; -# We need to sleep, otherwise pt-osc can finish before slave is delayed -sleep($max_lag); - -my $args = "$master_dsn,D=test,t=pt178 --execute --chunk-size ${chunk_size} --max-lag $max_lag --alter 'engine=INNODB' --pid $tmp_file_name --progress time,5 --nodrop-new-table --nodrop-triggers --history"; - - my ($fh, $filename) = tempfile(); - my $pid = fork(); - - if (!$pid) { - open(STDERR, '>', $filename); - open(STDOUT, '>', $filename); - exec("$trunk/bin/pt-online-schema-change $args"); - } - - sleep($max_lag + $max_lag/2); - # restart slave 12347 - diag(`/tmp/12347/stop >/dev/null`); - sleep 1; - - waitpid($pid, 0); - $output = do { - local $/ = undef; - <$fh>; - }; - -like( - $output, - qr/`test`.`pt178` was not altered/s, - "pt-osc stopped with error as expected", -) or diag($output); - -diag(`/tmp/12347/start >/dev/null`); - -# Creating copy of table pt178, so we can compare data later -diag(`/tmp/12345/use -N test -e "CREATE TABLE pt178_back like pt178"`); -diag(`/tmp/12345/use -N test -e "INSERT INTO pt178_back SELECT * FROM pt178"`); - -$output = `/tmp/12345/use -N -e "select job_id, upper_boundary from percona.pt_osc_history"`; -my ($job_id, $upper_boundary) = split(/\s+/, $output); - -my $copied_rows = `/tmp/12345/use -N -e "select count(*) from test._pt178_new"`; -chomp($copied_rows); - -ok( - $copied_rows eq $upper_boundary, - 'Upper chunk boundary stored correctly' -) or diag("Copied_rows: ${copied_rows}, upper boundary: ${upper_boundary}");; - -my @args = (qw(--execute --chunk-size=10 --nodrop-new-table --nodrop-triggers --history)); - -($output, $exit) = full_output( - sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt178", - '--max-lag', $max_lag, - '--resume', $job_id, - '--alter', 'engine=INNODB', - #'--progress', 'time,1', - '--plugin', "$plugin/pt-1717.pm", - ), - }, -); - -$output =~ /.*Chunk: (\d+)\n/ms; -my $last_chunk = int($1); - -ok( - $last_chunk * $chunk_size + int($copied_rows) == $num_rows, - 'Tool inserted only missed rows in the second run' -) or diag("Last chunk: ${last_chunk}, copied rows: ${copied_rows}"); - -my $new_table_checksum = diag(`/tmp/12345/use test -N -e "CHECKSUM TABLE pt178"`); -my $old_table_checksum = diag(`/tmp/12345/use test -N -e "CHECKSUM TABLE pt178_back"`); - -ok( - $new_table_checksum eq $old_table_checksum, - 'All rows copied correctly' -) or diag("New table checksum: ${new_table_checksum}, original content checksum: ${old_table_checksum}"); - -# ############################################################################# -# Done. -# ############################################################################# -diag("Cleaning"); -$slave_dbh2 = $sb->get_dbh_for('slave2'); -diag("Setting slave delay to 0 seconds"); -$slave_dbh1->do('STOP SLAVE'); -$slave_dbh2->do('STOP SLAVE'); -$master_dbh->do("RESET MASTER"); -$slave_dbh1->do('RESET SLAVE'); -$slave_dbh2->do('RESET SLAVE'); -$slave_dbh1->do('START SLAVE'); -$slave_dbh2->do('START SLAVE'); - -diag(`mv $cnf.bak $cnf`); - -diag(`/tmp/12347/stop >/dev/null`); -diag(`/tmp/12347/start >/dev/null`); - -diag("Dropping test database"); -$master_dbh->do("DROP DATABASE IF EXISTS test"); -$sb->wait_for_slaves(); - -$sb->wipe_clean($master_dbh); -ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); -done_testing; diff --git a/t/pt-online-schema-change/pt-1717.t b/t/pt-online-schema-change/pt-1717-history.t similarity index 92% rename from t/pt-online-schema-change/pt-1717.t rename to t/pt-online-schema-change/pt-1717-history.t index c7fc2f47..44cd3355 100644 --- a/t/pt-online-schema-change/pt-1717.t +++ b/t/pt-online-schema-change/pt-1717-history.t @@ -38,25 +38,6 @@ my $sample = "t/pt-online-schema-change/samples"; $sb->load_file('master', "$sample/basic_no_fks_innodb.sql"); -# First test option --history -# * - Test done for the development step -# ** - Test done for two development steps -# 1.** If table percona.pt_osc not created when option not specified -# 2. If table percona.pt_osc created when option present -# 2.1.** Default name -# 2.2.** Custom name -# 2.3.** Second run should not fail or modify this table (except inserting a row for new job) -# 2.4.** Case for binary index -# 2.5.** Second run for the binary index -# 2.6.** Case for invalid existing table -# 2.7.** Case for invalid existing table and binary index -# 3.** Inserting db, tbl, alter, args -# 4. Updating lower and upper boundaries -# 4.1. In situation when pt-osc finishes correctly -# 4.1.1.* `done` set to 'yes' -# 4.2. In failures -# 4.2.1. `done` set to 'no' - ($output, $exit) = full_output( sub { pt_online_schema_change::main(@args, "$dsn,D=pt_osc,t=t", '--alter', 'engine=innodb', '--execute') } @@ -319,6 +300,7 @@ is( ) or diag($output); $output = `/tmp/12345/use -N -e "select count(*) from information_schema.tables where TABLE_SCHEMA='pt_1717' and table_name='pt_1717_history'"`; + is( $output + 0, 1, diff --git a/t/pt-online-schema-change/pt-1717-resume.t b/t/pt-online-schema-change/pt-1717-resume.t new file mode 100644 index 00000000..b7618803 --- /dev/null +++ b/t/pt-online-schema-change/pt-1717-resume.t @@ -0,0 +1,419 @@ +#!/usr/bin/env perl + +BEGIN { + die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" + unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; + unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; +}; + +use strict; +use warnings FATAL => 'all'; +use threads; + +use English qw(-no_match_vars); +use Test::More; + +use Data::Dumper; +use PerconaTest; +use Sandbox; +use SqlModes; +use File::Temp qw/ tempdir tempfile /; + +our $delay = 10; +my $max_lag = $delay / 2; +my $output; +my $exit; + +my $tmp_file = File::Temp->new(); +my $tmp_file_name = $tmp_file->filename; +unlink $tmp_file_name; + +require "$trunk/bin/pt-online-schema-change"; + +my $dp = new DSNParser(opts=>$dsn_opts); +my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); +if ($sb->is_cluster_mode) { + plan skip_all => 'Not for PXC'; +} + +my $master_dbh = $sb->get_dbh_for('master'); +my $slave_dbh1 = $sb->get_dbh_for('slave1'); +my $slave_dbh2 = $sb->get_dbh_for('slave2'); +my $master_dsn = 'h=127.0.0.1,P=12345,u=msandbox,p=msandbox'; +my $slave_dsn1 = 'h=127.0.0.1,P=12346,u=msandbox,p=msandbox'; +my $slave_dsn2 = 'h=127.0.0.1,P=12347,u=msandbox,p=msandbox'; +my $sample = "t/pt-online-schema-change/samples"; +my $plugin = "$trunk/$sample/plugins"; + +# We need sync_relay_log=1 to keep changes after replica restart +my $cnf = '/tmp/12347/my.sandbox.cnf'; +diag(`cp $cnf $cnf.bak`); +diag(`echo "[mysqld]" > /tmp/12347/my.sandbox.2.cnf`); +diag(`echo "sync_relay_log=1" >> /tmp/12347/my.sandbox.2.cnf`); +diag(`echo "sync_relay_log_info=1" >> /tmp/12347/my.sandbox.2.cnf`); +diag(`echo "relay_log_recovery=1" >> /tmp/12347/my.sandbox.2.cnf`); +diag(`echo "!include /tmp/12347/my.sandbox.2.cnf" >> $cnf`); +diag(`/tmp/12347/stop >/dev/null`); +sleep 1; +diag(`/tmp/12347/start >/dev/null`); + +sub reset_query_cache { + my @dbhs = @_; + return if ($sandbox_version >= '8.0'); + foreach my $dbh (@dbhs) { + $dbh->do('RESET QUERY CACHE'); + } +} + +sub run_broken_job { + my ($args) = @_; + my ($fh, $filename) = tempfile(); + my $pid = fork(); + + if (!$pid) { + open(STDERR, '>', $filename); + open(STDOUT, '>', $filename); + exec("$trunk/bin/pt-online-schema-change $args"); + } + + sleep($max_lag + $max_lag/2); + # stop slave 12347 + diag(`/tmp/12347/stop >/dev/null`); + sleep 1; + + waitpid($pid, 0); + my $output = do { + local $/ = undef; + <$fh>; + }; + + return $output; +} + +sub set_delay { + $sb->wait_for_slaves(); + + diag("Setting slave delay to $delay seconds"); + diag(`/tmp/12345/use -N test -e "DROP TABLE IF EXISTS pt1717_back"`); + + $slave_dbh1->do('STOP SLAVE'); + $slave_dbh1->do("CHANGE MASTER TO MASTER_DELAY=$delay"); + $slave_dbh1->do('START SLAVE'); + + # Run a full table scan query to ensure the slave is behind the master + # There is no query cache in MySQL 8.0+ + reset_query_cache($master_dbh, $master_dbh); + # Update one row so slave is delayed + $master_dbh->do('UPDATE `test`.`pt1717` SET f2 = f2 + 1 LIMIT 1'); + $master_dbh->do('UPDATE `test`.`pt1717` SET f2 = f2 + 1 WHERE f1 = ""'); + + # Creating copy of table pt1717, so we can compare data later + diag(`/tmp/12345/use -N test -e "CREATE TABLE pt1717_back like pt1717"`); + diag(`/tmp/12345/use -N test -e "INSERT INTO pt1717_back SELECT * FROM pt1717"`); +} + +# 1) Set the slave delay to 0 just in case we are re-running the tests without restarting the sandbox. +# 2) Load sample data +# 3) Set the slave delay to 30 seconds to be able to see the 'waiting' message. +diag("Setting slave delay to 0 seconds"); +$slave_dbh1->do('STOP SLAVE'); +$master_dbh->do("RESET MASTER"); +$slave_dbh1->do('RESET SLAVE'); +$slave_dbh1->do('START SLAVE'); + +diag('Loading test data'); +$sb->load_file('master', "t/pt-online-schema-change/samples/pt-1717.sql"); + +# Should be greater than chunk-size and big enough, so pt-osc will wait for delay +my $num_rows = 5000; +my $chunk_size = 10; +diag("Loading $num_rows into the table. This might take some time."); +diag(`util/mysql_random_data_load --host=127.0.0.1 --port=12345 --user=msandbox --password=msandbox test pt1717 $num_rows`); + +diag("Starting tests..."); + +set_delay(); + +# We need to sleep, otherwise pt-osc can finish before slave is delayed +sleep($max_lag); + +my $args = "$master_dsn,D=test,t=pt1717 --execute --chunk-size ${chunk_size} --max-lag $max_lag --alter 'engine=INNODB' --pid $tmp_file_name --progress time,5 --nodrop-new-table --nodrop-triggers --history"; + +$output = run_broken_job($args); + +like( + $output, + qr/`test`.`pt1717` was not altered/s, + "pt-osc stopped with error as expected", +) or diag($output); + +diag(`/tmp/12347/start >/dev/null`); +$sb->wait_for_slaves(); + +$output = `/tmp/12345/use -N -e "select job_id, upper_boundary from percona.pt_osc_history"`; +my ($job_id, $upper_boundary) = split(/\s+/, $output); + +my $copied_rows = `/tmp/12345/use -N -e "select count(*) from test._pt1717_new"`; +chomp($copied_rows); + +ok( + $copied_rows eq $upper_boundary, + 'Upper chunk boundary stored correctly' +) or diag("Copied_rows: ${copied_rows}, upper boundary: ${upper_boundary}");; + +my @args = (qw(--execute --chunk-size=10 --history)); + +($output, $exit) = full_output( + sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717", + '--alter', 'engine=INNODB', '--execute', "--resume=${job_id}", + '--chunk-index=f2' + ) } +); + +is( + $exit, + 17, + 'pt-osc --resume correctly fails if --chunk-index is different from the --chunk-index in the stored job' +) or diag($exit); + +like( + $output, + qr/User-specified chunk index does not match stored one/i, + 'Error message printed for the different --chunk-index option' +) or diag($output); + +($output, $exit) = full_output( + sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717", + '--max-lag', $max_lag, + '--resume', $job_id, + '--alter', 'engine=INNODB', + '--plugin', "$plugin/pt-1717.pm", + ), + }, +); + +$output =~ /.*Chunk: (\d+)\n/ms; +my $last_chunk = int($1); + +ok( + $last_chunk * $chunk_size + int($copied_rows) == $num_rows, + 'Tool inserted only missed rows in the second run' +) or diag("Last chunk: ${last_chunk}, copied rows: ${copied_rows}"); + +my $new_table_checksum = diag(`/tmp/12345/use test -N -e "CHECKSUM TABLE pt1717"`); +my $old_table_checksum = diag(`/tmp/12345/use test -N -e "CHECKSUM TABLE pt1717_back"`); + +ok( + $new_table_checksum eq $old_table_checksum, + 'All rows copied correctly' +) or diag("New table checksum: '${new_table_checksum}', original content checksum: '${old_table_checksum}'"); + +# Tests for chunk-index and chunk-index-columns options +$args = "$master_dsn,D=test,t=pt1717 --alter engine=innodb --execute --history --chunk-size=10 --no-drop-new-table --no-drop-triggers --reverse-triggers --chunk-index=f2"; + +set_delay(); +$output = run_broken_job($args); +diag(`/tmp/12347/start >/dev/null`); + +$output =~ /History saved. Job id: (\d+)/ms; +$job_id = $1; + +($output, $exit) = full_output( + sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717", + '--alter', 'engine=innodb', '--execute', "--resume=${job_id}", + ) } +); + +is( + $exit, + 17, + 'pt-osc --resume correctly fails if --chunk-index option not specified for the job run with custom --chunk-index' +) or diag($exit); + +like( + $output, + qr/User-specified chunk index does not match stored one/i, + 'Error message printed for the missed --chunk-index option' +) or diag($output); + +($output, $exit) = full_output( + sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717", + '--alter', 'engine=innodb', '--execute', "--resume=${job_id}", + '--chunk-index=f1' + ) } +); + +is( + $exit, + 17, + 'pt-osc --resume correctly fails if --chunk-index is different from the --chunk-index in the stored job' +) or diag($exit); + +like( + $output, + qr/User-specified chunk index does not match stored one/i, + 'Error message printed for the different --chunk-index option' +) or diag($output); + +($output, $exit) = full_output( + sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717", + '--alter', 'engine=innodb', '--execute', "--resume=${job_id}", + '--chunk-index=f2', '--chunk-index-columns=1' + ) } +); + +is( + $exit, + 17, + 'pt-osc --resume correctly fails if --chunk-index-columns is different from the --chunk-index-columns in the stored job' +) or diag($exit); + +like( + $output, + qr/User-specified chunk index does not match stored one/i, + 'Error message printed for the different --chunk-index-columns option' +) or diag($output); + +$output = `/tmp/12345/use -N -e "select count(*) from information_schema.tables where TABLE_SCHEMA='test' and table_name like '%pt1717%' and table_name != 'pt1717_back'"`; + +is( + $output + 0, + 2, + 'Table was not dropped' +); + +$output = `/tmp/12345/use -N -e "select count(*) from information_schema.triggers where TRIGGER_SCHEMA='test' AND EVENT_OBJECT_TABLE='pt1717' AND trigger_name NOT LIKE 'rt_%'"`; + +is( + $output + 0, + 3, + 'Triggers were not dropped' +); + +$output = `/tmp/12345/use -N -e "select count(*) from information_schema.triggers where TRIGGER_SCHEMA='test' AND EVENT_OBJECT_TABLE like '%pt1717%_new' AND trigger_name LIKE 'rt_%'"`; + +is( + $output + 0, + 3, + 'Reverse triggers were not dropped' +); + +($output, $exit) = full_output( + sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717", + '--alter', 'engine=innodb', '--execute', "--resume=${job_id}", + '--chunk-size=4', + '--chunk-index=f2' + ) } +); + +is( + $exit, + 0, + 'pt-osc --resume finishes correctly if --chunk-index option points to the same index as previous job run' +) or diag($output); + +$output = `/tmp/12345/use -N -e "select count(*) from information_schema.tables where TABLE_SCHEMA='test' and table_name like '%pt1717%' and table_name != 'pt1717_back'"`; + +is( + $output + 0, + 1, + 'Table was dropped after successful change' +); + +$output = `/tmp/12345/use -N -e "select count(*) from information_schema.triggers where TRIGGER_SCHEMA='test' AND EVENT_OBJECT_TABLE = 'pt1717' AND TRIGGER_NAME NOT LIKE 'rt_%'"`; + +is( + $output + 0, + 0, + 'Triggers were dropped after successful change' +); + +$output = `/tmp/12345/use -N -e "select count(*) from information_schema.triggers where TRIGGER_SCHEMA='test' AND EVENT_OBJECT_TABLE = 'pt1717' AND TRIGGER_NAME LIKE 'rt_%'"`; + +is( + $output + 0, + 3, + 'Reverse triggers were dropped after successful change' +); + +$new_table_checksum = diag(`/tmp/12345/use test -N -e "CHECKSUM TABLE pt1717"`); +$old_table_checksum = diag(`/tmp/12345/use test -N -e "CHECKSUM TABLE pt1717_back"`); + +ok( + $new_table_checksum eq $old_table_checksum, + 'All rows copied correctly' +) or diag("New table checksum: '${new_table_checksum}', original content checksum: '${old_table_checksum}'"); + +`/tmp/12345/use test -N -e "UPDATE percona.pt_osc_history SET done = 'no' where job_id='${job_id}'"`; + +($output, $exit) = full_output( + sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717", + '--alter', 'engine=innodb', '--execute', "--resume=${job_id}", + '--chunk-size=4', + '--chunk-index=f2' + ) } +); + +is( + $exit, + 17, + '--resume expectedly fails when new table does not exists' +); + +like( + $output, + qr/New table `test`.`[_]+pt1717_new` not found, restart operation from scratch/i, + 'Correct error message printed for the missed new table' +) or diag($output); + +$output =~ /New table `test`.`([_]+pt1717_new)` not found, restart operation from scratch/i; + +`/tmp/12345/use test -N -e "CREATE TABLE $1 LIKE pt1717"`; + +($output, $exit) = full_output( + sub { pt_online_schema_change::main(@args, "$master_dsn,D=test,t=pt1717", + '--alter', 'engine=innodb', '--execute', "--resume=${job_id}", + '--chunk-size=4', + '--chunk-index=f2' + ) } +); + +is( + $exit, + 17, + '--resume expectedly fails when triggers do not exists' +); + +like( + $output, + qr/Trigger test.pt_osc_test_pt1717_\w{3} not found, restart operation from scratch to avoid data loss/i, + 'Correct error message printed for the missed triggers' +) or diag($output); + +# ############################################################################# +# Done. +# ############################################################################# +diag("Cleaning"); +$slave_dbh2 = $sb->get_dbh_for('slave2'); +diag("Setting slave delay to 0 seconds"); +$slave_dbh1->do('STOP SLAVE'); +$slave_dbh2->do('STOP SLAVE'); +$master_dbh->do('RESET MASTER'); +$slave_dbh1->do('RESET MASTER'); +$slave_dbh1->do('RESET SLAVE'); +$slave_dbh2->do('RESET SLAVE'); +$slave_dbh1->do('START SLAVE'); +$slave_dbh2->do('START SLAVE'); + +diag(`mv $cnf.bak $cnf`); + +diag(`/tmp/12347/stop >/dev/null`); +diag(`/tmp/12347/start >/dev/null`); + +diag("Dropping test database"); +$master_dbh->do("DROP DATABASE IF EXISTS test"); +$sb->wait_for_slaves(); + +$sb->wipe_clean($master_dbh); +ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); +done_testing; diff --git a/t/pt-online-schema-change/samples/pt-1717.sql b/t/pt-online-schema-change/samples/pt-1717.sql new file mode 100644 index 00000000..6b5f97c0 --- /dev/null +++ b/t/pt-online-schema-change/samples/pt-1717.sql @@ -0,0 +1,15 @@ +DROP DATABASE IF EXISTS test; +CREATE DATABASE test; +USE test; + +DROP TABLE IF EXISTS `pt1717`; +/*!40101 SET @saved_cs_client = @@character_set_client */; +/*!40101 SET character_set_client = utf8 */; +CREATE TABLE `pt1717` ( + `id` int(11) NOT NULL AUTO_INCREMENT, + f1 VARCHAR(30) DEFAULT '', + f2 BIGINT(11) DEFAULT 0, + PRIMARY KEY(id), + KEY(f2), + KEY(f1, f2) +) ENGINE=InnoDB;