mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-11 13:40:07 +00:00
Rewrite ReplicaLagLimiter::wait().
This commit is contained in:
@@ -4479,67 +4479,25 @@ use English qw(-no_match_vars);
|
|||||||
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
|
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
|
||||||
|
|
||||||
use Time::HiRes qw(sleep time);
|
use Time::HiRes qw(sleep time);
|
||||||
|
use Data::Dumper;
|
||||||
|
|
||||||
sub new {
|
sub new {
|
||||||
my ( $class, %args ) = @_;
|
my ( $class, %args ) = @_;
|
||||||
my @required_args = qw(spec slaves get_lag initial_n initial_t target_t);
|
my @required_args = qw(oktorun get_lag sleep max_lag slaves initial_n initial_t target_t);
|
||||||
foreach my $arg ( @required_args ) {
|
foreach my $arg ( @required_args ) {
|
||||||
die "I need a $arg argument" unless defined $args{$arg};
|
die "I need a $arg argument" unless defined $args{$arg};
|
||||||
}
|
}
|
||||||
my ($spec) = @args{@required_args};
|
|
||||||
|
|
||||||
my %specs = map {
|
|
||||||
my ($key, $val) = split '=', $_;
|
|
||||||
MKDEBUG && _d($key, '=', $val);
|
|
||||||
lc($key) => $val;
|
|
||||||
} @$spec;
|
|
||||||
|
|
||||||
my $self = {
|
my $self = {
|
||||||
max => 1, # max slave lag
|
%args,
|
||||||
timeout => 3600, # max time to wait for all slaves to catch up
|
|
||||||
check => 1, # sleep time between checking slave lag
|
|
||||||
continue => 'no', # return true even if timeout
|
|
||||||
%specs, # slave wait specs from caller
|
|
||||||
slaves => $args{slaves},
|
|
||||||
get_lag => $args{get_lag},
|
|
||||||
avg_n => $args{initial_n},
|
avg_n => $args{initial_n},
|
||||||
avg_t => $args{initial_t},
|
avg_t => $args{initial_t},
|
||||||
target_t => $args{target_t},
|
|
||||||
weight => $args{weight} || 0.75,
|
weight => $args{weight} || 0.75,
|
||||||
};
|
};
|
||||||
|
|
||||||
return bless $self, $class;
|
return bless $self, $class;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub validate_spec {
|
|
||||||
shift @_ if $_[0] eq 'ReplicaLagLimiter';
|
|
||||||
my ( $spec ) = @_;
|
|
||||||
if ( @$spec == 0 ) {
|
|
||||||
die "spec array requires at least a max value\n";
|
|
||||||
}
|
|
||||||
my $have_max;
|
|
||||||
foreach my $op ( @$spec ) {
|
|
||||||
my ($key, $val) = split '=', $op;
|
|
||||||
if ( !$key || !$val ) {
|
|
||||||
die "invalid spec format, should be option=value: $op\n";
|
|
||||||
}
|
|
||||||
if ( $key !~ m/(?:max|timeout|continue)/i ) {
|
|
||||||
die "unknown option in spec: $op\n";
|
|
||||||
}
|
|
||||||
if ( $key ne 'continue' && $val !~ m/^\d+$/ ) {
|
|
||||||
die "value must be an integer: $op\n";
|
|
||||||
}
|
|
||||||
if ( $key eq 'continue' && $val !~ m/(?:yes|no)/i ) {
|
|
||||||
die "value for $key must be \"yes\" or \"no\"\n";
|
|
||||||
}
|
|
||||||
$have_max = 1 if $key eq 'max';
|
|
||||||
}
|
|
||||||
if ( !$have_max ) {
|
|
||||||
die "max must be specified"
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
sub update {
|
sub update {
|
||||||
my ($self, $n, $t) = @_;
|
my ($self, $n, $t) = @_;
|
||||||
MKDEBUG && _d('Master op time:', $n, 'n /', $t, 's');
|
MKDEBUG && _d('Master op time:', $n, 'n /', $t, 's');
|
||||||
@@ -4560,60 +4518,68 @@ sub wait {
|
|||||||
foreach my $arg ( @required_args ) {
|
foreach my $arg ( @required_args ) {
|
||||||
die "I need a $arg argument" unless $args{$arg};
|
die "I need a $arg argument" unless $args{$arg};
|
||||||
}
|
}
|
||||||
my $pr = $args{Progres};
|
my $pr = $args{Progress};
|
||||||
my $get_lag = $self->{get_lag};
|
|
||||||
my $slaves = $self->{slaves};
|
|
||||||
my $n_slaves = @$slaves;
|
|
||||||
|
|
||||||
|
my $oktorun = $self->{oktorun};
|
||||||
|
my $get_lag = $self->{get_lag};
|
||||||
|
my $sleep = $self->{sleep};
|
||||||
|
my $slaves = $self->{slaves};
|
||||||
|
my $max_lag = $self->{max_lag};
|
||||||
|
|
||||||
|
my $worst; # most lagging slave
|
||||||
my $pr_callback;
|
my $pr_callback;
|
||||||
if ( $pr ) {
|
if ( $pr ) {
|
||||||
my $reported = 0;
|
|
||||||
$pr_callback = sub {
|
$pr_callback = sub {
|
||||||
my ($fraction, $elapsed, $remaining, $eta, $slave_no) = @_;
|
my ($fraction, $elapsed, $remaining, $eta, $completed) = @_;
|
||||||
if ( !$reported ) {
|
if ( defined $worst->{lag} ) {
|
||||||
print STDERR "Waiting for replica "
|
print STDERR "Replica lag is $worst->{lag} seconds on "
|
||||||
. ($slaves->[$slave_no]->{dsn}->{n} || '')
|
. "$worst->{n}. Waiting.\n";
|
||||||
. " to catch up...\n";
|
|
||||||
$reported = 1;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
print STDERR "Still waiting ($elapsed seconds)...\n";
|
print STDERR "Replica $worst->{n} is stopped. Waiting.\n";
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
$pr->set_callback($pr_callback);
|
$pr->set_callback($pr_callback);
|
||||||
}
|
}
|
||||||
|
|
||||||
my ($max, $check, $timeout) = @{$self}{qw(max check timeout)};
|
my @lagged_slaves = @$slaves; # first check all slaves
|
||||||
my $slave_no = 0;
|
while ( $oktorun->() && @lagged_slaves ) {
|
||||||
my $slave = $slaves->[$slave_no];
|
MKDEBUG && _d('Checking slave lag');
|
||||||
my $t_start = time;
|
for my $i ( 0..$#lagged_slaves ) {
|
||||||
while ($slave && time - $t_start < $timeout) {
|
my $slave = $lagged_slaves[$i];
|
||||||
MKDEBUG && _d('Checking slave lag on', $slave->{dsn}->{n});
|
|
||||||
my $lag = $get_lag->($slave->{dbh});
|
my $lag = $get_lag->($slave->{dbh});
|
||||||
if ( !defined $lag || $lag > $max ) {
|
MKDEBUG && _d($slave->{dsn}->{n}, 'slave lag:', $lag);
|
||||||
MKDEBUG && _d('Replica lag', $lag, '>', $max, '; sleeping', $check);
|
if ( !defined $lag || $lag > $max_lag ) {
|
||||||
$pr->update(sub { return $slave_no; }) if $pr;
|
$slave->{lag} = $lag;
|
||||||
sleep $check;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
MKDEBUG && _d('Replica ready, lag', $lag, '<=', $max);
|
delete $lagged_slaves[$i];
|
||||||
$slave = $slaves->[++$slave_no];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ( $slave_no < @$slaves ) {
|
|
||||||
if ( $self->{continue} eq 'no' ) {
|
@lagged_slaves = grep { defined $_ } @lagged_slaves;
|
||||||
die "Timeout waiting for replica " . $slaves->[$slave_no]->{dsn}->{n}
|
if ( @lagged_slaves ) {
|
||||||
. " to catch up\n";
|
@lagged_slaves = reverse sort {
|
||||||
|
defined $a && defined $b ? $a <=> $b
|
||||||
|
: defined $a ? -1
|
||||||
|
: 1;
|
||||||
|
} @lagged_slaves;
|
||||||
|
$worst = $lagged_slaves[0];
|
||||||
|
MKDEBUG && _d(scalar @lagged_slaves, 'slaves are lagging, worst:',
|
||||||
|
Dumper($worst));
|
||||||
|
|
||||||
|
if ( $pr ) {
|
||||||
|
$pr->update(sub { return 0; });
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
MKDEBUG && _d('Some slave are not caught up');
|
MKDEBUG && _d('Calling sleep callback');
|
||||||
return 0; # not ready
|
$sleep->();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MKDEBUG && _d('All slaves caught up');
|
MKDEBUG && _d('All slaves caught up');
|
||||||
return 1; # ready
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub _d {
|
sub _d {
|
||||||
@@ -4693,12 +4659,6 @@ sub main {
|
|||||||
$o->save_error("--progress $EVAL_ERROR");
|
$o->save_error("--progress $EVAL_ERROR");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
eval { ReplicaLagLimiter::validate_spec($o->get('replica-lag')) };
|
|
||||||
if ($EVAL_ERROR) {
|
|
||||||
chomp $EVAL_ERROR;
|
|
||||||
$o->save_error("--replica-lag: $EVAL_ERROR");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$o->usage_or_errors();
|
$o->usage_or_errors();
|
||||||
@@ -4762,11 +4722,11 @@ sub main {
|
|||||||
MKDEBUG && _d(scalar @$slaves, 'slaves found');
|
MKDEBUG && _d(scalar @$slaves, 'slaves found');
|
||||||
|
|
||||||
my $slave_lag_cxn;
|
my $slave_lag_cxn;
|
||||||
if ( $o->get('replica-lag-dsn') ) {
|
if ( $o->get('check-slave-lag') ) {
|
||||||
MKDEBUG && _d('Will use --replica-lag-dsn to check for slave lag');
|
MKDEBUG && _d('Will use --check-slave-lag to check for slave lag');
|
||||||
# OptionParser can't auto-copy DSN vals from a cmd line DSN
|
# OptionParser can't auto-copy DSN vals from a cmd line DSN
|
||||||
# to an opt DSN, so we copy them manually.
|
# to an opt DSN, so we copy them manually.
|
||||||
my $dsn = $dp->copy($dsn, $o->get('replica-lag-dsn'));
|
my $dsn = $dp->copy($dsn, $o->get('check-slave-lag'));
|
||||||
my $dbh = get_cxn(
|
my $dbh = get_cxn(
|
||||||
dsn => $dsn,
|
dsn => $dsn,
|
||||||
DSNParser => $dp,
|
DSNParser => $dp,
|
||||||
@@ -4783,12 +4743,14 @@ sub main {
|
|||||||
# Make a lag limiter to help adjust chunk size and wait for slaves.
|
# Make a lag limiter to help adjust chunk size and wait for slaves.
|
||||||
# ########################################################################
|
# ########################################################################
|
||||||
my $lag_limiter = new ReplicaLagLimiter(
|
my $lag_limiter = new ReplicaLagLimiter(
|
||||||
|
oktorun => sub { return $oktorun },
|
||||||
|
get_lag => sub { return $ms->get_slave_lag(@_) },
|
||||||
|
sleep => sub { sleep $o->get('check-interval') },
|
||||||
|
max_lag => $o->get('max-lag'),
|
||||||
initial_n => $o->get('chunk-size'),
|
initial_n => $o->get('chunk-size'),
|
||||||
initial_t => $o->get('chunk-time'),
|
initial_t => $o->get('chunk-time'),
|
||||||
target_t => $o->get('chunk-time'),
|
target_t => $o->get('chunk-time'),
|
||||||
spec => $o->get('replica-lag'),
|
|
||||||
slaves => $slave_lag_cxn,
|
slaves => $slave_lag_cxn,
|
||||||
get_lag => sub { return $ms->get_slave_lag(@_) },
|
|
||||||
);
|
);
|
||||||
|
|
||||||
# ########################################################################
|
# ########################################################################
|
||||||
@@ -4976,23 +4938,10 @@ sub main {
|
|||||||
$pr = new Progress(
|
$pr = new Progress(
|
||||||
jobsize => scalar @$slaves,
|
jobsize => scalar @$slaves,
|
||||||
spec => $o->get('progress'),
|
spec => $o->get('progress'),
|
||||||
name => "Waiting for " . (@$slaves > 1 ? "slaves" : "slave")
|
name => "Waiting for replicas to catch up",
|
||||||
. " to catch up",
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
my $caught_up;
|
$lag_limiter->wait(Progress => $pr);
|
||||||
eval {
|
|
||||||
$caught_up = $lag_limiter->wait();
|
|
||||||
};
|
|
||||||
if ( $EVAL_ERROR ) { # slaves didn't catch up and continue=no.
|
|
||||||
$tbl->{checksum_results}->{errors}++;
|
|
||||||
warn $EVAL_ERROR;
|
|
||||||
$oktorun = 0;
|
|
||||||
}
|
|
||||||
elsif ( !$caught_up ) {
|
|
||||||
warn "Some replicas are lagging, but checksumming will "
|
|
||||||
. "continue because --replica-lag continue=yes.\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
return;
|
return;
|
||||||
},
|
},
|
||||||
@@ -5736,6 +5685,12 @@ group: Connection
|
|||||||
|
|
||||||
Prompt for a password when connecting to MySQL.
|
Prompt for a password when connecting to MySQL.
|
||||||
|
|
||||||
|
=item --check-interval
|
||||||
|
|
||||||
|
type: time; default: 1; group: Throttle
|
||||||
|
|
||||||
|
Sleep time between checks for L<"--max-lag">.
|
||||||
|
|
||||||
=item --[no]check-replication-filters
|
=item --[no]check-replication-filters
|
||||||
|
|
||||||
default: yes; group: Safety
|
default: yes; group: Safety
|
||||||
@@ -5749,31 +5704,23 @@ queries won't break replication or simply fail to replicate. If you are sure
|
|||||||
that it's OK to run the checksum queries, you can negate this option to
|
that it's OK to run the checksum queries, you can negate this option to
|
||||||
disable the checks. See also L<"--replicate-database">.
|
disable the checks. See also L<"--replicate-database">.
|
||||||
|
|
||||||
=item --chunk-column
|
=item --check-slave-lag
|
||||||
|
|
||||||
type: string
|
type: DSN; group: Throttle
|
||||||
|
|
||||||
Prefer this column for dividing tables into chunks. By default,
|
Pause checksumming until the specified slave's lag is less than L<"--max-lag">.
|
||||||
pt-table-checksum chooses the first suitable column for each table, preferring
|
|
||||||
to use the primary key. This option lets you specify a preferred column, which
|
|
||||||
pt-table-checksum uses if it exists in the table and is chunkable. If not, then
|
|
||||||
pt-table-checksum will revert to its default behavior. Be careful when using
|
|
||||||
this option; a poor choice could cause bad performance. This is probably best
|
|
||||||
to use when you are checksumming only a single table, not an entire server. See
|
|
||||||
also L<"--chunk-index">.
|
|
||||||
|
|
||||||
=item --chunk-index
|
=item --chunk-index
|
||||||
|
|
||||||
type: string
|
type: string
|
||||||
|
|
||||||
Prefer this index for chunking tables. By default, pt-table-checksum chooses an
|
Prefer this index for chunking tables. By default, pt-table-checksum chooses
|
||||||
appropriate index for the L<"--chunk-column"> (even if it chooses the chunk
|
an appropriate index for chunking. This option lets you specify the index
|
||||||
column automatically). This option lets you specify the index you prefer. If
|
that you prefer. If the index doesn't exist, then pt-table-checksum will fall
|
||||||
the index doesn't exist, then pt-table-checksum will fall back to its default
|
back to its default behavior. pt-table-checksum adds the index to the checksum
|
||||||
behavior. pt-table-checksum adds the index to the checksum SQL statements in a
|
SQL statements in a C<FORCE INDEX> clause. Be careful when using this option;
|
||||||
C<FORCE INDEX> clause. Be careful when using this option; a poor choice of
|
a poor choice of index could cause bad performance. This is probably best to
|
||||||
index could cause bad performance. This is probably best to use when you are
|
use when you are checksumming only a single table, not an entire server.
|
||||||
checksumming only a single table, not an entire server.
|
|
||||||
|
|
||||||
=item --chunk-size
|
=item --chunk-size
|
||||||
|
|
||||||
@@ -5974,6 +5921,22 @@ type: string; group: Filter
|
|||||||
|
|
||||||
Ignore tables whose names match the Perl regex.
|
Ignore tables whose names match the Perl regex.
|
||||||
|
|
||||||
|
=item --max-lag
|
||||||
|
|
||||||
|
type: time; default: 1s; group: Throttle
|
||||||
|
|
||||||
|
Suspend checksumming if the slave given by L<"--check-slave-lag"> lags.
|
||||||
|
|
||||||
|
This option causes pt-table-checksum to look at slave lag after each checksum.
|
||||||
|
If the any slave's lag is greater than the option's value, or if the slave
|
||||||
|
isn't running (so its lag is NULL), pt-table-checksum sleeps for
|
||||||
|
L<"--check-interval"> seconds and then looks at the lag again. It repeats
|
||||||
|
until all slaves are caught up, then continues checksumming.
|
||||||
|
|
||||||
|
This option is useful to let you checksum data as fast as the slaves can handle
|
||||||
|
it, assuming the slave you directed pt-table-checksum to monitor is
|
||||||
|
representative of all the slaves that may be replicating from this server.
|
||||||
|
|
||||||
=item --[no]optimize-xor
|
=item --[no]optimize-xor
|
||||||
|
|
||||||
default: yes
|
default: yes
|
||||||
@@ -6087,24 +6050,6 @@ t. The DSN table should have the following structure:
|
|||||||
One row specifies one DSN in the C<dsn> column. Currently, the DSNs are
|
One row specifies one DSN in the C<dsn> column. Currently, the DSNs are
|
||||||
ordered by C<id>, but C<id> and C<parent_id> are otherwise ignored.
|
ordered by C<id>, but C<id> and C<parent_id> are otherwise ignored.
|
||||||
|
|
||||||
=item --replica-lag
|
|
||||||
|
|
||||||
type: array; default: max=1,timeout=3600,continue=no
|
|
||||||
|
|
||||||
Limit lag on replicas to C<max> seconds. After each checksum, the tool
|
|
||||||
checks all replica servers, or just the L<"--replica-lag-dsn"> if
|
|
||||||
specified, and waits until the lag on all replicas is <= C<max>.
|
|
||||||
The tool waits up to C<timeout> seconds and if the lag is still too high,
|
|
||||||
it will exit if C<continue> is "no", or it will continue and check replica
|
|
||||||
lag again after the next checksum.
|
|
||||||
|
|
||||||
=item --replica-lag-dsn
|
|
||||||
|
|
||||||
type: DSN
|
|
||||||
|
|
||||||
Check L<"--replica-lag"> only on this replica. If not specified, all replicas
|
|
||||||
will be checked.
|
|
||||||
|
|
||||||
=item --replicate
|
=item --replicate
|
||||||
|
|
||||||
type: string; default: percona.checksums
|
type: string; default: percona.checksums
|
||||||
|
@@ -37,13 +37,16 @@ use English qw(-no_match_vars);
|
|||||||
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
|
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
|
||||||
|
|
||||||
use Time::HiRes qw(sleep time);
|
use Time::HiRes qw(sleep time);
|
||||||
|
use Data::Dumper;
|
||||||
|
|
||||||
# Sub: new
|
# Sub: new
|
||||||
#
|
#
|
||||||
# Required Arguments:
|
# Required Arguments:
|
||||||
# spec - --replicat-lag spec (arrayref of option=value pairs)
|
# oktorun - Callback that returns true if it's ok to continue running
|
||||||
# slaves - Arrayref of slave cxn, like [{dsn=>{...}, dbh=>...},...]
|
|
||||||
# get_lag - Callback passed slave dbh and returns slave's lag
|
# get_lag - Callback passed slave dbh and returns slave's lag
|
||||||
|
# sleep - Callback to sleep between checking lag.
|
||||||
|
# max_lag - Max lag
|
||||||
|
# slaves - Arrayref of slave cxn, like [{dsn=>{...}, dbh=>...},...]
|
||||||
# initial_n - Initial n value for <update()>
|
# initial_n - Initial n value for <update()>
|
||||||
# initial_t - Initial t value for <update()>
|
# initial_t - Initial t value for <update()>
|
||||||
# target_t - Target time for t in <update()>
|
# target_t - Target time for t in <update()>
|
||||||
@@ -55,65 +58,21 @@ use Time::HiRes qw(sleep time);
|
|||||||
# ReplicaLagLimiter object
|
# ReplicaLagLimiter object
|
||||||
sub new {
|
sub new {
|
||||||
my ( $class, %args ) = @_;
|
my ( $class, %args ) = @_;
|
||||||
my @required_args = qw(spec slaves get_lag initial_n initial_t target_t);
|
my @required_args = qw(oktorun get_lag sleep max_lag slaves initial_n initial_t target_t);
|
||||||
foreach my $arg ( @required_args ) {
|
foreach my $arg ( @required_args ) {
|
||||||
die "I need a $arg argument" unless defined $args{$arg};
|
die "I need a $arg argument" unless defined $args{$arg};
|
||||||
}
|
}
|
||||||
my ($spec) = @args{@required_args};
|
|
||||||
|
|
||||||
my %specs = map {
|
|
||||||
my ($key, $val) = split '=', $_;
|
|
||||||
MKDEBUG && _d($key, '=', $val);
|
|
||||||
lc($key) => $val;
|
|
||||||
} @$spec;
|
|
||||||
|
|
||||||
my $self = {
|
my $self = {
|
||||||
max => 1, # max slave lag
|
%args,
|
||||||
timeout => 3600, # max time to wait for all slaves to catch up
|
|
||||||
check => 1, # sleep time between checking slave lag
|
|
||||||
continue => 'no', # return true even if timeout
|
|
||||||
%specs, # slave wait specs from caller
|
|
||||||
slaves => $args{slaves},
|
|
||||||
get_lag => $args{get_lag},
|
|
||||||
avg_n => $args{initial_n},
|
avg_n => $args{initial_n},
|
||||||
avg_t => $args{initial_t},
|
avg_t => $args{initial_t},
|
||||||
target_t => $args{target_t},
|
|
||||||
weight => $args{weight} || 0.75,
|
weight => $args{weight} || 0.75,
|
||||||
};
|
};
|
||||||
|
|
||||||
return bless $self, $class;
|
return bless $self, $class;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub validate_spec {
|
|
||||||
# Permit calling as ReplicaLagLimiter-> or ReplicaLagLimiter::
|
|
||||||
shift @_ if $_[0] eq 'ReplicaLagLimiter';
|
|
||||||
my ( $spec ) = @_;
|
|
||||||
if ( @$spec == 0 ) {
|
|
||||||
die "spec array requires at least a max value\n";
|
|
||||||
}
|
|
||||||
my $have_max;
|
|
||||||
foreach my $op ( @$spec ) {
|
|
||||||
my ($key, $val) = split '=', $op;
|
|
||||||
if ( !$key || !$val ) {
|
|
||||||
die "invalid spec format, should be option=value: $op\n";
|
|
||||||
}
|
|
||||||
if ( $key !~ m/(?:max|timeout|continue)/i ) {
|
|
||||||
die "unknown option in spec: $op\n";
|
|
||||||
}
|
|
||||||
if ( $key ne 'continue' && $val !~ m/^\d+$/ ) {
|
|
||||||
die "value must be an integer: $op\n";
|
|
||||||
}
|
|
||||||
if ( $key eq 'continue' && $val !~ m/(?:yes|no)/i ) {
|
|
||||||
die "value for $key must be \"yes\" or \"no\"\n";
|
|
||||||
}
|
|
||||||
$have_max = 1 if $key eq 'max';
|
|
||||||
}
|
|
||||||
if ( !$have_max ) {
|
|
||||||
die "max must be specified"
|
|
||||||
}
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
# Sub: update
|
# Sub: update
|
||||||
# Update weighted decaying average of master operation time. Param n is
|
# Update weighted decaying average of master operation time. Param n is
|
||||||
# generic; it's how many of whatever the caller is doing (rows, checksums,
|
# generic; it's how many of whatever the caller is doing (rows, checksums,
|
||||||
@@ -139,7 +98,7 @@ sub update {
|
|||||||
return $new_n;
|
return $new_n;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Sub: wait_for_slave
|
# Sub: wait
|
||||||
# Wait for Seconds_Behind_Master on all slaves to become < max.
|
# Wait for Seconds_Behind_Master on all slaves to become < max.
|
||||||
#
|
#
|
||||||
# Optional Arguments:
|
# Optional Arguments:
|
||||||
@@ -153,62 +112,76 @@ sub wait {
|
|||||||
foreach my $arg ( @required_args ) {
|
foreach my $arg ( @required_args ) {
|
||||||
die "I need a $arg argument" unless $args{$arg};
|
die "I need a $arg argument" unless $args{$arg};
|
||||||
}
|
}
|
||||||
my $pr = $args{Progres};
|
my $pr = $args{Progress};
|
||||||
my $get_lag = $self->{get_lag};
|
|
||||||
my $slaves = $self->{slaves};
|
|
||||||
my $n_slaves = @$slaves;
|
|
||||||
|
|
||||||
|
my $oktorun = $self->{oktorun};
|
||||||
|
my $get_lag = $self->{get_lag};
|
||||||
|
my $sleep = $self->{sleep};
|
||||||
|
my $slaves = $self->{slaves};
|
||||||
|
my $max_lag = $self->{max_lag};
|
||||||
|
|
||||||
|
my $worst; # most lagging slave
|
||||||
my $pr_callback;
|
my $pr_callback;
|
||||||
if ( $pr ) {
|
if ( $pr ) {
|
||||||
# If you use the default Progress report callback, you'll need to
|
# If you use the default Progress report callback, you'll need to
|
||||||
# to add Transformers.pm to this tool.
|
# to add Transformers.pm to this tool.
|
||||||
my $reported = 0;
|
|
||||||
$pr_callback = sub {
|
$pr_callback = sub {
|
||||||
my ($fraction, $elapsed, $remaining, $eta, $slave_no) = @_;
|
my ($fraction, $elapsed, $remaining, $eta, $completed) = @_;
|
||||||
if ( !$reported ) {
|
if ( defined $worst->{lag} ) {
|
||||||
print STDERR "Waiting for replica "
|
print STDERR "Replica lag is $worst->{lag} seconds on "
|
||||||
. ($slaves->[$slave_no]->{dsn}->{n} || '')
|
. "$worst->{dsn}->{n}. Waiting.\n";
|
||||||
. " to catch up...\n";
|
|
||||||
$reported = 1;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
print STDERR "Still waiting ($elapsed seconds)...\n";
|
print STDERR "Replica $worst->{dsn}->{n} is stopped. Waiting.\n";
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
};
|
};
|
||||||
$pr->set_callback($pr_callback);
|
$pr->set_callback($pr_callback);
|
||||||
}
|
}
|
||||||
|
|
||||||
my ($max, $check, $timeout) = @{$self}{qw(max check timeout)};
|
my @lagged_slaves = @$slaves; # first check all slaves
|
||||||
my $slave_no = 0;
|
while ( $oktorun->() && @lagged_slaves ) {
|
||||||
my $slave = $slaves->[$slave_no];
|
MKDEBUG && _d('Checking slave lag');
|
||||||
my $t_start = time;
|
for my $i ( 0..$#lagged_slaves ) {
|
||||||
while ($slave && time - $t_start < $timeout) {
|
my $slave = $lagged_slaves[$i];
|
||||||
MKDEBUG && _d('Checking slave lag on', $slave->{dsn}->{n});
|
|
||||||
my $lag = $get_lag->($slave->{dbh});
|
my $lag = $get_lag->($slave->{dbh});
|
||||||
if ( !defined $lag || $lag > $max ) {
|
MKDEBUG && _d($slave->{dsn}->{n}, 'slave lag:', $lag);
|
||||||
MKDEBUG && _d('Replica lag', $lag, '>', $max, '; sleeping', $check);
|
if ( !defined $lag || $lag > $max_lag ) {
|
||||||
$pr->update(sub { return $slave_no; }) if $pr;
|
$slave->{lag} = $lag;
|
||||||
sleep $check;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
MKDEBUG && _d('Replica ready, lag', $lag, '<=', $max);
|
delete $lagged_slaves[$i];
|
||||||
$slave = $slaves->[++$slave_no];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ( $slave_no < @$slaves ) {
|
|
||||||
if ( $self->{continue} eq 'no' ) {
|
# Remove slaves that aren't lagging.
|
||||||
die "Timeout waiting for replica " . $slaves->[$slave_no]->{dsn}->{n}
|
@lagged_slaves = grep { defined $_ } @lagged_slaves;
|
||||||
. " to catch up\n";
|
if ( @lagged_slaves ) {
|
||||||
|
# Sort lag, undef is highest because it means the slave is stopped.
|
||||||
|
@lagged_slaves = reverse sort {
|
||||||
|
defined $a && defined $b ? $a <=> $b
|
||||||
|
: defined $a ? -1
|
||||||
|
: 1;
|
||||||
|
} @lagged_slaves;
|
||||||
|
$worst = $lagged_slaves[0];
|
||||||
|
MKDEBUG && _d(scalar @lagged_slaves, 'slaves are lagging, worst:',
|
||||||
|
Dumper($worst));
|
||||||
|
|
||||||
|
if ( $pr ) {
|
||||||
|
# There's no real progress because we can't estimate how long
|
||||||
|
# it will take all slaves to catch up. The progress reports
|
||||||
|
# are just to inform the user every 30s which slave is still
|
||||||
|
# lagging this most.
|
||||||
|
$pr->update(sub { return 0; });
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
MKDEBUG && _d('Some slave are not caught up');
|
MKDEBUG && _d('Calling sleep callback');
|
||||||
return 0; # not ready
|
$sleep->();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MKDEBUG && _d('All slaves caught up');
|
MKDEBUG && _d('All slaves caught up');
|
||||||
return 1; # ready
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
sub _d {
|
sub _d {
|
||||||
|
@@ -9,62 +9,44 @@ BEGIN {
|
|||||||
use strict;
|
use strict;
|
||||||
use warnings FATAL => 'all';
|
use warnings FATAL => 'all';
|
||||||
use English qw(-no_match_vars);
|
use English qw(-no_match_vars);
|
||||||
use Test::More tests => 17;
|
use Test::More tests => 10;
|
||||||
|
|
||||||
use ReplicaLagLimiter;
|
use ReplicaLagLimiter;
|
||||||
use PerconaTest;
|
use PerconaTest;
|
||||||
|
|
||||||
|
my $oktorun = 1;
|
||||||
|
my @waited = ();
|
||||||
|
my @lag = ();
|
||||||
|
my @sleep = ();
|
||||||
|
|
||||||
|
sub oktorun {
|
||||||
|
return $oktorun;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub get_lag {
|
||||||
|
my ($dbh) = @_;
|
||||||
|
push @waited, $dbh;
|
||||||
|
my $lag = shift @lag || 0;
|
||||||
|
return $lag;
|
||||||
|
}
|
||||||
|
|
||||||
|
sub sleep {
|
||||||
|
my $t = shift @sleep || 0;
|
||||||
|
sleep $t;
|
||||||
|
}
|
||||||
|
|
||||||
my $rll = new ReplicaLagLimiter(
|
my $rll = new ReplicaLagLimiter(
|
||||||
spec => [qw(max=1 timeout=3600 continue=no)],
|
oktorun => \&oktorun,
|
||||||
|
get_lag => \&get_lag,
|
||||||
|
sleep => \&sleep,
|
||||||
|
max_lag => 1,
|
||||||
|
initial_n => 1000,
|
||||||
|
initial_t => 1,
|
||||||
|
target_t => 1,
|
||||||
slaves => [
|
slaves => [
|
||||||
{ dsn=>{n=>'slave1'}, dbh=>1 },
|
{ dsn=>{n=>'slave1'}, dbh=>1 },
|
||||||
{ dsn=>{n=>'slave2'}, dbh=>2 },
|
{ dsn=>{n=>'slave2'}, dbh=>2 },
|
||||||
],
|
],
|
||||||
get_lag => \&get_lag,
|
|
||||||
initial_n => 1000,
|
|
||||||
initial_t => 1,
|
|
||||||
target_t => 1,
|
|
||||||
);
|
|
||||||
|
|
||||||
# ############################################################################
|
|
||||||
# Validate spec.
|
|
||||||
# ############################################################################
|
|
||||||
is(
|
|
||||||
ReplicaLagLimiter::validate_spec(['max=1','timeout=3600','continue=no']),
|
|
||||||
1,
|
|
||||||
"Valid spec"
|
|
||||||
);
|
|
||||||
|
|
||||||
throws_ok(
|
|
||||||
sub {
|
|
||||||
ReplicaLagLimiter::validate_spec(['max=1','timeout=3600','foo,bar'])
|
|
||||||
},
|
|
||||||
qr/invalid spec format, should be option=value: foo,bar/,
|
|
||||||
"Invalid spec format"
|
|
||||||
);
|
|
||||||
|
|
||||||
throws_ok(
|
|
||||||
sub {
|
|
||||||
ReplicaLagLimiter::validate_spec(['max=1','timeout=3600','foo=bar'])
|
|
||||||
},
|
|
||||||
qr/unknown option in spec: foo=bar/,
|
|
||||||
"Unknown spec option"
|
|
||||||
);
|
|
||||||
|
|
||||||
throws_ok(
|
|
||||||
sub {
|
|
||||||
ReplicaLagLimiter::validate_spec(['max=1','timeout=yes'])
|
|
||||||
},
|
|
||||||
qr/value must be an integer: timeout=yes/,
|
|
||||||
"Value must be int"
|
|
||||||
);
|
|
||||||
|
|
||||||
throws_ok(
|
|
||||||
sub {
|
|
||||||
ReplicaLagLimiter::validate_spec(['max=1','continue=1'])
|
|
||||||
},
|
|
||||||
qr/value for continue must be "yes" or "no"/,
|
|
||||||
"Value must be yes or no"
|
|
||||||
);
|
);
|
||||||
|
|
||||||
# ############################################################################
|
# ############################################################################
|
||||||
@@ -122,20 +104,12 @@ is(
|
|||||||
# ############################################################################
|
# ############################################################################
|
||||||
# Fake waiting for slaves.
|
# Fake waiting for slaves.
|
||||||
# ############################################################################
|
# ############################################################################
|
||||||
my @waited = ();
|
|
||||||
my @lag = ();
|
|
||||||
sub get_lag {
|
|
||||||
my ($dbh) = @_;
|
|
||||||
push @waited, $dbh;
|
|
||||||
my $lag = shift @lag || 0;
|
|
||||||
return $lag;
|
|
||||||
}
|
|
||||||
|
|
||||||
@lag = (0, 0);
|
@lag = (0, 0);
|
||||||
is(
|
my $t = time;
|
||||||
$rll->wait(),
|
$rll->wait();
|
||||||
1,
|
ok(
|
||||||
"wait() returns 1 if all slaves catch up"
|
time - $t < 0.5,
|
||||||
|
"wait() returns immediately if all slaves are ready"
|
||||||
);
|
);
|
||||||
|
|
||||||
is_deeply(
|
is_deeply(
|
||||||
@@ -146,8 +120,9 @@ is_deeply(
|
|||||||
|
|
||||||
@waited = ();
|
@waited = ();
|
||||||
@lag = (5, 0, 0);
|
@lag = (5, 0, 0);
|
||||||
my $t = time;
|
@sleep = (1, 1, 1);
|
||||||
my $ret = $rll->wait(),
|
$t = time;
|
||||||
|
$rll->wait(),
|
||||||
ok(
|
ok(
|
||||||
time - $t >= 0.9,
|
time - $t >= 0.9,
|
||||||
"wait() waited a second"
|
"wait() waited a second"
|
||||||
@@ -155,52 +130,10 @@ ok(
|
|||||||
|
|
||||||
is_deeply(
|
is_deeply(
|
||||||
\@waited,
|
\@waited,
|
||||||
[1, 1, 2],
|
[1, 2, 1],
|
||||||
"wait() waited for first slave"
|
"wait() waited for first slave"
|
||||||
);
|
);
|
||||||
|
|
||||||
# Lower timeout to check if wait() will die.
|
|
||||||
$rll = new ReplicaLagLimiter(
|
|
||||||
spec => [qw(max=1 timeout=0.75 continue=no)],
|
|
||||||
slaves => [
|
|
||||||
{ dsn=>{n=>'slave1'}, dbh=>1 },
|
|
||||||
{ dsn=>{n=>'slave2'}, dbh=>2 },
|
|
||||||
],
|
|
||||||
get_lag => \&get_lag,
|
|
||||||
initial_n => 1000,
|
|
||||||
initial_t => 1,
|
|
||||||
target_t => 1,
|
|
||||||
);
|
|
||||||
|
|
||||||
@waited = ();
|
|
||||||
@lag = (5, 0, 0);
|
|
||||||
throws_ok(
|
|
||||||
sub { $rll->wait() },
|
|
||||||
qr/Timeout waiting for replica slave1 to catch up/,
|
|
||||||
"wait() dies on timeout"
|
|
||||||
);
|
|
||||||
|
|
||||||
# Continue despite not catching up.
|
|
||||||
$rll = new ReplicaLagLimiter(
|
|
||||||
spec => [qw(max=1 timeout=0.75 continue=yes)],
|
|
||||||
slaves => [
|
|
||||||
{ dsn=>{n=>'slave1'}, dbh=>1 },
|
|
||||||
{ dsn=>{n=>'slave2'}, dbh=>2 },
|
|
||||||
],
|
|
||||||
get_lag => \&get_lag,
|
|
||||||
initial_n => 1000,
|
|
||||||
initial_t => 1,
|
|
||||||
target_t => 1,
|
|
||||||
);
|
|
||||||
|
|
||||||
@waited = ();
|
|
||||||
@lag = (5, 0, 0);
|
|
||||||
is(
|
|
||||||
$rll->wait(),
|
|
||||||
0,
|
|
||||||
"wait() returns 0 if timeout and continue=yes"
|
|
||||||
);
|
|
||||||
|
|
||||||
# #############################################################################
|
# #############################################################################
|
||||||
# Done.
|
# Done.
|
||||||
# #############################################################################
|
# #############################################################################
|
||||||
|
Reference in New Issue
Block a user