From b1e01be2c28deef50330b113919070aa746b4ccf Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 19 Sep 2011 09:06:30 -0600 Subject: [PATCH] Finish, test, and docu ReplicaLagLimiter. --- lib/ReplicaLagLimiter.pm | 87 +++++++++++++------- t/lib/ReplicaLagLimiter.t | 169 ++++++++++++++++++++++++++++++++------ 2 files changed, 203 insertions(+), 53 deletions(-) diff --git a/lib/ReplicaLagLimiter.pm b/lib/ReplicaLagLimiter.pm index f01a2ba2..ef8b8975 100644 --- a/lib/ReplicaLagLimiter.pm +++ b/lib/ReplicaLagLimiter.pm @@ -25,8 +25,12 @@ # should be adjusted to prevent overloading slaves. returns # and adjustment (-1=down/decrease, 0=none, 1=up/increase) based on # a moving average of how long operations are taking on the master. -# Regardless of that, slaves may still lag, so waits for them -# to catchup based on the spec passed to . +# The desired master op time range is specified by target_time. By +# default, the moving average sample_size is 5, so the last 5 master op +# times are used. Increasing sample_size will smooth out variations +# and make adjustments less volatile. Regardless of all that, slaves may +# still lag, so waits for them to catch up based on the spec +# passed to . package ReplicaLagLimiter; use strict; @@ -34,9 +38,24 @@ use warnings FATAL => 'all'; use English qw(-no_match_vars); use constant MKDEBUG => $ENV{MKDEBUG} || 0; +use Time::HiRes qw(sleep time); + +# Sub: new +# +# Required Arguments: +# spec - --replicat-lag spec (arrayref of option=value pairs) +# slaves - Arrayref of slave cxn, like [{dsn=>{...}, dbh=>...},...] +# get_lag - Callback passed slave dbh and returns slave's lag +# target_time - Arrayref with lower and upper range for master op time +# +# Optional Arguments: +# sample_size - Number of master op samples to use for moving avg (default 5) +# +# Returns: +# ReplicaLagLimiter object sub new { my ( $class, %args ) = @_; - my @required_args = qw(spec slaves get_lag); + my @required_args = qw(spec slaves get_lag target_time); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } @@ -49,16 +68,17 @@ sub new { } @$spec; my $self = { - target_time => 1, # optimal time for master ops - sample_size => 5, # number of master ops to use for moving average - max => 1, # max slave lag - timeout => 3600, # max time to wait for all slaves to catchup - check => 1, # sleep time between checking slave lag - continue => 'no', # return true even if timeout - %specs, # slave wait specs from caller - samples => [], # master op times - moving_avg => 0, # moving avgerge of samples + max => 1, # max slave lag + timeout => 3600, # max time to wait for all slaves to catch up + check => 1, # sleep time between checking slave lag + continue => 'no', # return true even if timeout + %specs, # slave wait specs from caller + samples => [], # master op times + moving_avg => 0, # moving avgerge of samples + slaves => $args{slaves}, get_lag => $args{get_lag}, + target_time => $args{target_time}, + sample_size => $args{sample_size} || 5, }; return bless $self, $class; @@ -74,17 +94,14 @@ sub validate_spec { my $have_max; foreach my $op ( @$spec ) { my ($key, $val) = split '=', $op; - if ( !$key ) { - die "invalid spec format, should be key=value: $spec\n"; + if ( !$key || !$val ) { + die "invalid spec format, should be option=value: $op\n"; } if ( $key !~ m/(?:max|timeout|continue)/i ) { - die "invalid spec: $spec\n"; - } - if ( !$val ) { - die "spec has no value: $spec\n"; + die "unknown option in spec: $op\n"; } if ( $key ne 'continue' && $val !~ m/^\d+$/ ) { - die "value must be an integer: $spec\n"; + die "value must be an integer: $op\n"; } if ( $key eq 'continue' && $val !~ m/(?:yes|no)/i ) { die "value for $key must be \"yes\" or \"no\"\n"; @@ -94,8 +111,19 @@ sub validate_spec { if ( !$have_max ) { die "max must be specified" } + return 1; } +# Sub: update +# Update moving average of master operation time. +# +# Parameters: +# t - Master operation time (int or float). +# +# Returns: +# -1 master op is too slow, it should be reduced +# 0 master op is within target time range, no adjustment +# 1 master is too fast; it can be increased sub update { my ($self, $t) = @_; MKDEBUG && _d('Sample time:', $t); @@ -109,9 +137,11 @@ sub update { my $sum = 0; map { $sum += $_ } @$samples; $self->{moving_avg} = $sum / $sample_size; - + MKDEBUG && _d('Moving average:', $self->{moving_avg}); - $adjust = $self->{target_time} <=> $self->{moving_avg}; + $adjust = $self->{moving_avg} < $self->{target_time}->[0] ? 1 + : $self->{moving_avg} > $self->{target_time}->[1] ? -1 + : 0; } else { MKDEBUG && _d('Saving sample', @$samples + 1, 'of', $sample_size); @@ -125,10 +155,10 @@ sub update { # Wait for Seconds_Behind_Master on all slaves to become < max. # # Optional Arguments: -# Progress - object. +# Progress - object to report waiting # # Returns: -# True if all slaves caught up, else 0 (timeout) +# True if all slaves catch up before timeout, else die unless continue is true sub wait { my ( $self, %args ) = @_; my @required_args = qw(); @@ -150,7 +180,7 @@ sub wait { if ( !$reported ) { print STDERR "Waiting for replica " . ($slaves->[$slave_no]->{dsn}->{n} || '') - . " to catchup...\n"; + . " to catch up...\n"; $reported = 1; } else { @@ -166,7 +196,7 @@ sub wait { my $slave = $slaves->[$slave_no]; my $t_start = time; while ($slave && time - $t_start < $timeout) { - MKDEBUG && _d('Checking slave lag on', $slave->{n}); + MKDEBUG && _d('Checking slave lag on', $slave->{dsn}->{n}); my $lag = $get_lag->($slave->{dbh}); if ( !defined $lag || $lag > $max ) { MKDEBUG && _d('Replica lag', $lag, '>', $max, '; sleeping', $check); @@ -178,10 +208,11 @@ sub wait { $slave = $slaves->[++$slave_no]; } } - if ( $slave_no >= @$slave ) { - MKDEBUG && _d('Timeout waiting for', $slaves->[$slave_no]->{dsn}->{n}); - return 0 unless $self->{continue}; + if ( $slave_no < @$slaves && $self->{continue} eq 'no' ) { + die "Timeout waiting for replica " . $slaves->[$slave_no]->{dsn}->{n} + . " to catch up\n"; } + MKDEBUG && _d('All slaves caught up'); return 1; } diff --git a/t/lib/ReplicaLagLimiter.t b/t/lib/ReplicaLagLimiter.t index 2b95762f..68bd65ca 100644 --- a/t/lib/ReplicaLagLimiter.t +++ b/t/lib/ReplicaLagLimiter.t @@ -9,57 +9,176 @@ BEGIN { use strict; use warnings FATAL => 'all'; use English qw(-no_match_vars); -use Test::More tests => 5; +use Test::More tests => 16; use ReplicaLagLimiter; use PerconaTest; - -my $lag = 0; -sub get_lag { - my ($dbh) = @_; - return $lag; -} - -my $sll = new ReplicaLagLimiter( - spec => [qw(max=1 timeout=3600 continue=no)], - slaves => [[]], - get_lag => \&get_lag, +my $rll = new ReplicaLagLimiter( + spec => [qw(max=1 timeout=3600 continue=no)], + slaves => [ + { dsn=>{n=>'slave1'}, dbh=>1 }, + { dsn=>{n=>'slave2'}, dbh=>2 }, + ], + get_lag => \&get_lag, + target_time => [0.9,1.1], ); +# ############################################################################ +# Validate spec. +# ############################################################################ +is( + ReplicaLagLimiter::validate_spec(['max=1','timeout=3600','continue=no']), + 1, + "Valid spec" +); + +throws_ok( + sub { + ReplicaLagLimiter::validate_spec(['max=1','timeout=3600','foo,bar']) + }, + qr/invalid spec format, should be option=value: foo,bar/, + "Invalid spec format" +); + +throws_ok( + sub { + ReplicaLagLimiter::validate_spec(['max=1','timeout=3600','foo=bar']) + }, + qr/unknown option in spec: foo=bar/, + "Unknown spec option" +); + +throws_ok( + sub { + ReplicaLagLimiter::validate_spec(['max=1','timeout=yes']) + }, + qr/value must be an integer: timeout=yes/, + "Value must be int" +); + +throws_ok( + sub { + ReplicaLagLimiter::validate_spec(['max=1','continue=1']) + }, + qr/value for continue must be "yes" or "no"/, + "Value must be yes or no" +); + +# ############################################################################ +# Update master op, see if we get correct adjustment result. +# ############################################################################ for (1..4) { - $sll->update(1); + $rll->update(1); } is( - $sll->update(1), + $rll->update(1), 0, "5 time samples, no adjustmenet" ); for (1..4) { - $sll->update(1); + $rll->update(1); } is( - $sll->update(1), + $rll->update(1), 0, "Moving avg hasn't changed" ); -$sll->update(2); -$sll->update(2); -$sll->update(2); +$rll->update(2); +$rll->update(2); +$rll->update(2); is( - $sll->update(2), + $rll->update(2), -1, "Adjust down (moving avg = 1.8)" ); -$sll->update(0.3); -$sll->update(0.3); +$rll->update(0.1); +$rll->update(0.1); is( - $sll->update(0.3), + $rll->update(0.1), 1, - "Adjust up (moving avg = 0.98)" + "Adjust up (moving avg = 0.86)" +); + +# ############################################################################ +# Fake waiting for slaves. +# ############################################################################ +my @waited = (); +my @lag = (); +sub get_lag { + my ($dbh) = @_; + push @waited, $dbh; + my $lag = shift @lag || 0; + return $lag; +} + +@lag = (0, 0); +is( + $rll->wait(), + 1, + "wait() returns 1 if all slaves catch up" +); + +is_deeply( + \@waited, + [1,2], + "Waited for all slaves" +); + +@waited = (); +@lag = (5, 0, 0); +my $t = time; +my $ret = $rll->wait(), +ok( + time - $t >= 0.9, + "wait() waited a second" +); + +is_deeply( + \@waited, + [1, 1, 2], + "wait() waited for first slave" +); + +# Lower timeout to check if wait() will die. +$rll = new ReplicaLagLimiter( + spec => [qw(max=1 timeout=0.75 continue=no)], + slaves => [ + { dsn=>{n=>'slave1'}, dbh=>1 }, + { dsn=>{n=>'slave2'}, dbh=>2 }, + ], + get_lag => \&get_lag, + target_time => [0.9,1.1], +); + +@waited = (); +@lag = (5, 0, 0); +throws_ok( + sub { $rll->wait() }, + qr/Timeout waiting for replica slave1 to catch up/, + "wait() dies on timeout" +); + +# Continue despite not catching up. +$rll = new ReplicaLagLimiter( + spec => [qw(max=1 timeout=0.75 continue=yes)], + slaves => [ + { dsn=>{n=>'slave1'}, dbh=>1 }, + { dsn=>{n=>'slave2'}, dbh=>2 }, + ], + get_lag => \&get_lag, + target_time => [0.9,1.1], +); + +@waited = (); +@lag = (5, 0, 0); +is( + $rll->wait(), + 1, + "wait() returns 1 despite timeout if continue=yes" ); # ############################################################################# @@ -69,7 +188,7 @@ my $output = ''; { local *STDERR; open STDERR, '>', \$output; - $sll->_d('Complete test coverage'); + $rll->_d('Complete test coverage'); } like( $output,