From eefaeeb8cc7139aabb7851151998bc5cc15912be Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 5 Mar 2013 16:18:02 -0700 Subject: [PATCH 1/4] Change --retries to --tries and enhance with per-operation format that also allows setting the wait between tries. --- bin/pt-online-schema-change | 180 +++++++++++++++------ lib/Retry.pm | 2 + t/pt-online-schema-change/metadata_locks.t | 6 +- 3 files changed, 138 insertions(+), 50 deletions(-) diff --git a/bin/pt-online-schema-change b/bin/pt-online-schema-change index 23d49a7e..532c473a 100755 --- a/bin/pt-online-schema-change +++ b/bin/pt-online-schema-change @@ -3578,6 +3578,8 @@ use warnings FATAL => 'all'; use English qw(-no_match_vars); use constant PTDEBUG => $ENV{PTDEBUG} || 0; +use Time::HiRes qw(sleep); + sub new { my ( $class, %args ) = @_; my $self = { @@ -7617,6 +7619,13 @@ sub main { . $n_chunk_index_cols); } + my $tries = eval { + validate_tries($o); + }; + if ( $EVAL_ERROR ) { + $o->save_error($EVAL_ERROR); + } + if ( !$o->get('help') ) { if ( @ARGV ) { $o->save_error('Specify only one DSN on the command line'); @@ -7973,6 +7982,17 @@ sub main { Quoter => $q, ); + # ######################################################################## + # Print --tries. + # ######################################################################## +# print "Operation, tries, wait:\n"; +# { +# my $fmt = " %s, %s, %s\n"; +# foreach my $op ( sort keys %$tries ) { +# printf $fmt, $op, $tries->{$op}->{tries}, $tries->{$op}->{wait}; +# } +# } + # ######################################################################## # Get child tables of the original table, if necessary. # ######################################################################## @@ -8427,7 +8447,7 @@ sub main { Quoter => $q, OptionParser => $o, Retry => $retry, - retries => $o->get('retries'), + tries => $tries, stats => \%stats, ); } @@ -8453,7 +8473,7 @@ sub main { Quoter => $q, OptionParser => $o, Retry => $retry, - retries => $o->get('retries'), + tries => $tries, stats => \%stats, ); }; @@ -8642,7 +8662,7 @@ sub main { # Exec and time the chunk checksum query. $tbl->{nibble_time} = exec_nibble( %args, - retries => $o->get('retries'), + tries => $tries, Retry => $retry, Quoter => $q, stats => \%stats, @@ -8852,7 +8872,7 @@ sub main { Quoter => $q, OptionParser => $o, Retry => $retry, - retries => $o->get('retries'), + tries => $tries, stats => \%stats, ); }; @@ -8900,7 +8920,7 @@ sub main { TableParser => $tp, stats => \%stats, Retry => $retry, - retries => $o->get('retries'), + tries => $tries, ); } elsif ( $alter_fk_method eq 'drop_swap' ) { @@ -8911,7 +8931,7 @@ sub main { OptionParser => $o, stats => \%stats, Retry => $retry, - retries => $o->get('retries'), + tries => $tries, ); } elsif ( !$alter_fk_method @@ -9003,6 +9023,56 @@ sub main { # Subroutines. # ############################################################################ +sub validate_tries { + my ($o) = @_; + my @ops = qw( + create_triggers + drop_triggers + copy_rows + swap_tables + update_foreign_keys + ); + my %user_tries; + my $user_tries = $o->get('tries'); + if ( $user_tries ) { + foreach my $var_val ( @$user_tries ) { + my ($op, $tries, $wait) = split(':', $var_val); + die "Invalid --tries value: $var_val\n" unless $op && $tries && $wait; + die "Invalid --tries operation: $op\n" unless grep { $op eq $_ } @ops; + die "Invalid --tries tries: $tries\n" unless $tries > 0; + die "Invalid --tries wait: $wait\n" unless $wait > 0; + $user_tries{$op} = { + tries => $tries, + wait => $wait, + }; + } + } + + my %default_tries; + my $default_tries = $o->read_para_after(__FILE__, qr/MAGIC_tries/); + if ( $default_tries ) { + %default_tries = map { + my $var_val = $_; + my ($op, $tries, $wait) = $var_val =~ m/(\S+)/g; + die "Invalid --tries value: $var_val\n" unless $op && $tries && $wait; + die "Invalid --tries operation: $op\n" unless grep { $op eq $_ } @ops; + die "Invalid --tries tries: $tries\n" unless $tries > 0; + die "Invalid --tries wait: $wait\n" unless $wait > 0; + $op => { + tries => $tries, + wait => $wait, + }; + } grep { m/^\s+\w+\s+\d+\s+[\d\.]+/ } split("\n", $default_tries); + } + + my %tries = ( + %default_tries, # first the tool's defaults + %user_tries, # then the user's which overwrite the defaults + ); + PTDEBUG && _d('--tries:', Dumper(\%tries)); + return \%tries; +} + sub check_alter { my (%args) = @_; my @required_args = qw(alter tbl dry_run Cxn TableParser); @@ -9300,15 +9370,15 @@ sub create_new_table { sub swap_tables { my (%args) = @_; - my @required_args = qw(orig_tbl new_tbl Cxn Quoter OptionParser Retry retries stats); + my @required_args = qw(orig_tbl new_tbl Cxn Quoter OptionParser Retry tries stats); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my ($orig_tbl, $new_tbl, $cxn, $q, $o, $retry, $retries, $stats) = @args{@required_args}; + my ($orig_tbl, $new_tbl, $cxn, $q, $o, $retry, $tries, $stats) = @args{@required_args}; my $prefix = '_'; my $table_name = $orig_tbl->{tbl} . ($args{suffix} || ''); - my $tries = 10; # don't try forever + my $name_tries = 10; # don't try forever my $table_exists = qr/table.+?already exists/i; # This sub only works for --execute. Since the options are @@ -9326,7 +9396,7 @@ sub swap_tables { elsif ( $o->get('execute') ) { print "Swapping tables...\n"; - while ( $tries-- ) { + while ( $name_tries-- ) { $table_name = $prefix . $table_name; if ( length($table_name) > 64 ) { @@ -9344,7 +9414,7 @@ sub swap_tables { osc_retry( Cxn => $cxn, Retry => $retry, - retries => $retries, + tries => $tries->{swap_tables}, stats => $stats, code => sub { PTDEBUG && _d($sql); @@ -9380,7 +9450,7 @@ sub swap_tables { # This shouldn't happen. # Here and in the attempt to find a new table name we probably ought to - # use --retries (and maybe a Retry object?) + # use --tries (and maybe a Retry object?) die "Failed to find a unique old table name after serveral attempts.\n"; } } @@ -9530,11 +9600,11 @@ sub rebuild_constraints { my ( %args ) = @_; my @required_args = qw(orig_tbl old_tbl child_tables stats Cxn Quoter OptionParser TableParser - Retry retries); + Retry tries); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my ($orig_tbl, $old_tbl, $child_tables, $stats, $cxn, $q, $o, $tp, $retry, $retries) + my ($orig_tbl, $old_tbl, $child_tables, $stats, $cxn, $q, $o, $tp, $retry, $tries) = @args{@required_args}; # MySQL has a "feature" where if the parent tbl is in the same db, @@ -9612,7 +9682,7 @@ sub rebuild_constraints { osc_retry( Cxn => $cxn, Retry => $retry, - retries => $retries, + tries => $tries->{update_foreign_keys}, stats => $stats, code => sub { PTDEBUG && _d($sql); @@ -9632,11 +9702,11 @@ sub rebuild_constraints { sub drop_swap { my ( %args ) = @_; - my @required_args = qw(orig_tbl new_tbl Cxn OptionParser stats Retry retries); + my @required_args = qw(orig_tbl new_tbl Cxn OptionParser stats Retry tries); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my ($orig_tbl, $new_tbl, $cxn, $o, $stats, $retry, $retries) = @args{@required_args}; + my ($orig_tbl, $new_tbl, $cxn, $o, $stats, $retry, $tries) = @args{@required_args}; if ( $o->get('dry-run') ) { print "Not drop-swapping tables because this is a dry run.\n"; @@ -9658,7 +9728,7 @@ sub drop_swap { osc_retry( Cxn => $cxn, Retry => $retry, - retries => $retries, + tries => $tries->{update_foreign_keys}, stats => $stats, code => sub { PTDEBUG && _d($sql); @@ -9677,11 +9747,11 @@ sub drop_swap { sub create_triggers { my ( %args ) = @_; - my @required_args = qw(orig_tbl new_tbl del_tbl columns Cxn Quoter OptionParser Retry retries stats); + my @required_args = qw(orig_tbl new_tbl del_tbl columns Cxn Quoter OptionParser Retry tries stats); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my ($orig_tbl, $new_tbl, $del_tbl, $cols, $cxn, $q, $o, $retry, $retries, $stats) = @args{@required_args}; + my ($orig_tbl, $new_tbl, $del_tbl, $cols, $cxn, $q, $o, $retry, $tries, $stats) = @args{@required_args}; # This sub works for --dry-run and --execute. With --dry-run it's # only interesting if --print is specified, too; then the user can @@ -9752,7 +9822,7 @@ sub create_triggers { osc_retry( Cxn => $cxn, Retry => $retry, - retries => $retries, + tries => $tries->{create_triggers}, stats => $stats, code => sub { PTDEBUG && _d($sql); @@ -9778,11 +9848,11 @@ sub create_triggers { sub drop_triggers { my ( %args ) = @_; - my @required_args = qw(tbl Cxn Quoter OptionParser Retry retries stats); + my @required_args = qw(tbl Cxn Quoter OptionParser Retry tries stats); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my ($tbl, $cxn, $q, $o, $retry, $retries, $stats) = @args{@required_args}; + my ($tbl, $cxn, $q, $o, $retry, $tries, $stats) = @args{@required_args}; # This sub works for --dry-run and --execute, although --dry-run is # only interesting with --print so the user can see the drop trigger @@ -9802,7 +9872,7 @@ sub drop_triggers { osc_retry( Cxn => $cxn, Retry => $retry, - retries => $retries, + tries => $tries->{drop_triggers}, stats => $stats, code => sub { PTDEBUG && _d($sql); @@ -9846,20 +9916,20 @@ sub error_event { sub osc_retry { my (%args) = @_; - my @required_args = qw(Cxn Retry retries code stats); + my @required_args = qw(Cxn Retry tries code stats); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } my $cxn = $args{Cxn}; my $retry = $args{Retry}; - my $retries = $args{retries}; + my $tries = $args{tries}; my $code = $args{code}; my $stats = $args{stats}; my $ignore_errors = $args{ignore_errors}; return $retry->retry( - tries => $retries, - wait => sub { sleep 0.25; return; }, + tries => $tries->{tries}, + wait => sub { sleep ($tries->{wait} || 0.25) }, try => $code, fail => sub { my (%args) = @_; @@ -9911,11 +9981,11 @@ sub osc_retry { sub exec_nibble { my (%args) = @_; - my @required_args = qw(Cxn tbl stats retries Retry NibbleIterator Quoter); + my @required_args = qw(Cxn tbl stats tries Retry NibbleIterator Quoter); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my ($cxn, $tbl, $stats, $retries, $retry, $nibble_iter, $q) + my ($cxn, $tbl, $stats, $tries, $retry, $nibble_iter, $q) = @args{@required_args}; my $sth = $nibble_iter->statements(); @@ -9952,7 +10022,7 @@ sub exec_nibble { return osc_retry( Cxn => $cxn, Retry => $retry, - retries => $retries, + tries => $tries->{copy_rows}, stats => $stats, code => sub { # ################################################################### @@ -10814,25 +10884,43 @@ replication lag, insert the values C and C into the table. Currently, the DSNs are ordered by id, but id and parent_id are otherwise ignored. -=item --retries +=item --tries -type: int; default: 10 +type: array -Retry critical operations and recover from non-fatal errors. The tool -retries these operations: +How many times to try critical operations. If certain operations fail due +to non-fatal, recoverable errors, the tool waits and tries the operation +again. These are the operations that are retried, with their default number +of tries and wait time between tries (in seconds): - Creating triggers - Dropping triggers - Copying chunks - Swapping tables - Rebuilding foreign key constraints +=for comment ignore-pt-internal-value +MAGIC_tries -For creating and dropping triggers, the number of retries applies to each + OPERATION TRIES WAIT + =================== ===== ==== + create_triggers 10 1 + drop_triggers 10 1 + copy_rows 10 0.25 + swap_tables 10 1 + update_foreign_keys 10 1 + +To change the defaults, specify the new values like: + + --tries create_triggers:5:0.5,drop_triggers:5:0.5 + +That makes the tool try C and C 2 times +with a 0.5 second wait between tries. So the format is: + + operation:tries:wait[,operation:tries:wait] + +All three values must be specified. + +For creating and dropping triggers, the number of tries applies to each C and C statement for each trigger. -For copying chunks, the number of retries applies to each chunk, not the -entire table. For swapping tables, the number of retries usually applies +For copying rows, the number of tries applies to each chunk, not the +entire table. For swapping tables, the number of tries usually applies once because there is usually only one C statement. -For rebuilding foreign key constraints, the number of retries applies to +For rebuilding foreign key constraints, the number of tries applies to each statement (C statements for the C L<"--alter-foreign-keys-method">; other statements for the C method). @@ -10848,10 +10936,6 @@ The tool retries each operation if these errors occur: In the case of lost and killed connections, the tool will automatically reconnect. -To alter extremely busy tables, it may be necessary to increase L<"--retries">, -and also C and (for MySQL 5.5 and newer) -C by specifying higher values with L<"--set-vars">. - Failures and retries are recorded in the L<"--statistics">. =item --set-vars diff --git a/lib/Retry.pm b/lib/Retry.pm index 43a8f840..2e8354d6 100644 --- a/lib/Retry.pm +++ b/lib/Retry.pm @@ -27,6 +27,8 @@ use warnings FATAL => 'all'; use English qw(-no_match_vars); use constant PTDEBUG => $ENV{PTDEBUG} || 0; +use Time::HiRes qw(sleep); + sub new { my ( $class, %args ) = @_; my $self = { diff --git a/t/pt-online-schema-change/metadata_locks.t b/t/pt-online-schema-change/metadata_locks.t index 5413a266..ae668412 100644 --- a/t/pt-online-schema-change/metadata_locks.t +++ b/t/pt-online-schema-change/metadata_locks.t @@ -51,7 +51,8 @@ $sb->load_file('master', "$sample/basic_no_fks_innodb.sql"); ($output) = full_output( sub { pt_online_schema_change::main( "$master_dsn,D=pt_osc,t=t", - qw(--statistics --execute --retries 2 --set-vars lock_wait_timeout=1), + qw(--statistics --execute --tries create_triggers:2:0.1), + qw(--set-vars lock_wait_timeout=1), '--plugin', "$plugin/block_create_triggers.pm", )}, stderr => 1, @@ -76,7 +77,8 @@ like( ($output) = full_output( sub { pt_online_schema_change::main( "$master_dsn,D=pt_osc,t=t", - qw(--statistics --execute --retries 2 --set-vars lock_wait_timeout=1), + qw(--statistics --execute --tries swap_tables:2:0.1), + qw(--set-vars lock_wait_timeout=1), '--plugin', "$plugin/block_swap_tables.pm", )}, stderr => 1, From c1f3e130b1e5de2264ed4b8a921041fa0a327201 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 5 Mar 2013 16:25:12 -0700 Subject: [PATCH 2/4] Update samples with new output for --tries. --- bin/pt-online-schema-change | 14 +++++++------- .../samples/stats-dry-run.txt | 6 ++++++ .../samples/stats-execute-5.5.txt | 6 ++++++ 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/bin/pt-online-schema-change b/bin/pt-online-schema-change index 532c473a..d36cbd9e 100755 --- a/bin/pt-online-schema-change +++ b/bin/pt-online-schema-change @@ -7985,13 +7985,13 @@ sub main { # ######################################################################## # Print --tries. # ######################################################################## -# print "Operation, tries, wait:\n"; -# { -# my $fmt = " %s, %s, %s\n"; -# foreach my $op ( sort keys %$tries ) { -# printf $fmt, $op, $tries->{$op}->{tries}, $tries->{$op}->{wait}; -# } -# } + print "Operation, tries, wait:\n"; + { + my $fmt = " %s, %s, %s\n"; + foreach my $op ( sort keys %$tries ) { + printf $fmt, $op, $tries->{$op}->{tries}, $tries->{$op}->{wait}; + } + } # ######################################################################## # Get child tables of the original table, if necessary. diff --git a/t/pt-online-schema-change/samples/stats-dry-run.txt b/t/pt-online-schema-change/samples/stats-dry-run.txt index dec86793..672904e9 100644 --- a/t/pt-online-schema-change/samples/stats-dry-run.txt +++ b/t/pt-online-schema-change/samples/stats-dry-run.txt @@ -1,3 +1,9 @@ +Operation, tries, wait: + copy_rows, 10, 0.25 + create_triggers, 10, 1 + drop_triggers, 10, 1 + swap_tables, 10, 1 + update_foreign_keys, 10, 1 Starting a dry run. `bug_1045317`.`bits` will not be altered. Specify --execute instead of --dry-run to alter the table. Not dropping triggers because this is a dry run. Dropping new table... diff --git a/t/pt-online-schema-change/samples/stats-execute-5.5.txt b/t/pt-online-schema-change/samples/stats-execute-5.5.txt index 9947355d..6cfc1b1d 100644 --- a/t/pt-online-schema-change/samples/stats-execute-5.5.txt +++ b/t/pt-online-schema-change/samples/stats-execute-5.5.txt @@ -1,3 +1,9 @@ +Operation, tries, wait: + copy_rows, 10, 0.25 + create_triggers, 10, 1 + drop_triggers, 10, 1 + swap_tables, 10, 1 + update_foreign_keys, 10, 1 Altering `bug_1045317`.`bits`... Dropping triggers... Dropped triggers OK. From 627b4fab70e2ebcb37cd06220c955bcd28aed74c Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 5 Mar 2013 16:33:38 -0700 Subject: [PATCH 3/4] Note in --tries when/why certain ops are affected. Re-alphabetize the options. --- bin/pt-online-schema-change | 126 +++++++++++++++++++----------------- 1 file changed, 65 insertions(+), 61 deletions(-) diff --git a/bin/pt-online-schema-change b/bin/pt-online-schema-change index d36cbd9e..3992a241 100755 --- a/bin/pt-online-schema-change +++ b/bin/pt-online-schema-change @@ -10466,13 +10466,6 @@ STDOUT to utf8, passes the mysql_enable_utf8 option to DBD::mysql, and runs SET NAMES UTF8 after connecting to MySQL. Any other value sets binmode on STDOUT without the utf8 layer, and runs SET NAMES after connecting to MySQL. -=item --check-interval - -type: time; default: 1 - -Sleep time between checks for L<"--max-lag">. - - =item --[no]check-alter default: yes @@ -10504,6 +10497,12 @@ L<"--print"> and verify that the triggers are correct. =back +=item --check-interval + +type: time; default: 1 + +Sleep time between checks for L<"--max-lag">. + =item --[no]check-plan default: yes @@ -10884,60 +10883,6 @@ replication lag, insert the values C and C into the table. Currently, the DSNs are ordered by id, but id and parent_id are otherwise ignored. -=item --tries - -type: array - -How many times to try critical operations. If certain operations fail due -to non-fatal, recoverable errors, the tool waits and tries the operation -again. These are the operations that are retried, with their default number -of tries and wait time between tries (in seconds): - -=for comment ignore-pt-internal-value -MAGIC_tries - - OPERATION TRIES WAIT - =================== ===== ==== - create_triggers 10 1 - drop_triggers 10 1 - copy_rows 10 0.25 - swap_tables 10 1 - update_foreign_keys 10 1 - -To change the defaults, specify the new values like: - - --tries create_triggers:5:0.5,drop_triggers:5:0.5 - -That makes the tool try C and C 2 times -with a 0.5 second wait between tries. So the format is: - - operation:tries:wait[,operation:tries:wait] - -All three values must be specified. - -For creating and dropping triggers, the number of tries applies to each -C and C statement for each trigger. -For copying rows, the number of tries applies to each chunk, not the -entire table. For swapping tables, the number of tries usually applies -once because there is usually only one C statement. -For rebuilding foreign key constraints, the number of tries applies to -each statement (C statements for the C -L<"--alter-foreign-keys-method">; other statements for the C -method). - -The tool retries each operation if these errors occur: - - Lock wait timeout (innodb_lock_wait_timeout and lock_wait_timeout) - Deadlock found - Query is killed (KILL QUERY ) - Connection is killed (KILL CONNECTION ) - Lost connection to MySQL - -In the case of lost and killed connections, the tool will automatically -reconnect. - -Failures and retries are recorded in the L<"--statistics">. - =item --set-vars type: Array @@ -10979,6 +10924,65 @@ online schema change process by making the table with the new schema take the place of the original table. The original table becomes the "old table," and the tool drops it unless you disable L<"--[no]drop-old-table">. +=item --tries + +type: array + +How many times to try critical operations. If certain operations fail due +to non-fatal, recoverable errors, the tool waits and tries the operation +again. These are the operations that are retried, with their default number +of tries and wait time between tries (in seconds): + +=for comment ignore-pt-internal-value +MAGIC_tries + + OPERATION TRIES WAIT + =================== ===== ==== + create_triggers 10 1 + drop_triggers 10 1 + copy_rows 10 0.25 + swap_tables 10 1 + update_foreign_keys 10 1 + +To change the defaults, specify the new values like: + + --tries create_triggers:5:0.5,drop_triggers:5:0.5 + +That makes the tool try C and C 2 times +with a 0.5 second wait between tries. So the format is: + + operation:tries:wait[,operation:tries:wait] + +All three values must be specified. + +Note that most operations are affected only in MySQL 5.5 and newer by +C (see L<"--set-vars">) because of metadata locks. +The C operation is affected in any version of MySQL by +C. + +For creating and dropping triggers, the number of tries applies to each +C and C statement for each trigger. +For copying rows, the number of tries applies to each chunk, not the +entire table. For swapping tables, the number of tries usually applies +once because there is usually only one C statement. +For rebuilding foreign key constraints, the number of tries applies to +each statement (C statements for the C +L<"--alter-foreign-keys-method">; other statements for the C +method). + +The tool retries each operation if these errors occur: + + Lock wait timeout (innodb_lock_wait_timeout and lock_wait_timeout) + Deadlock found + Query is killed (KILL QUERY ) + Connection is killed (KILL CONNECTION ) + Lost connection to MySQL + +In the case of lost and killed connections, the tool will automatically +reconnect. + +Failures and retries are recorded in the L<"--statistics">. + =item --user short form: -u; type: string From 150d4cbe532af1565ed55f61791833ad8ce9a658 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 5 Mar 2013 16:42:40 -0700 Subject: [PATCH 4/4] Update sample file with --tries output. --- t/pt-online-schema-change/samples/stats-execute.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/t/pt-online-schema-change/samples/stats-execute.txt b/t/pt-online-schema-change/samples/stats-execute.txt index 2bee23e6..c8fb653e 100644 --- a/t/pt-online-schema-change/samples/stats-execute.txt +++ b/t/pt-online-schema-change/samples/stats-execute.txt @@ -1,3 +1,9 @@ +Operation, tries, wait: + copy_rows, 10, 0.25 + create_triggers, 10, 1 + drop_triggers, 10, 1 + swap_tables, 10, 1 + update_foreign_keys, 10, 1 Altering `bug_1045317`.`bits`... Dropping triggers... Dropped triggers OK.