Adding --read-only-interval flag, and read-only check on wake-up

This commit is contained in:
Shlomi Noach
2018-02-13 09:30:55 +02:00
parent 5574d09a6e
commit 3bcdf913fb

View File

@@ -112,22 +112,22 @@ use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
sub check_recursion_method {
sub check_recursion_method {
my ($methods) = @_;
if ( @$methods != 1 ) {
if ( grep({ !m/processlist|hosts/i } @$methods)
&& $methods->[0] !~ /^dsn=/i )
{
die "Invalid combination of recursion methods: "
. join(", ", map { defined($_) ? $_ : 'undef' } @$methods) . ". "
. "Only hosts and processlist may be combined.\n"
}
}
else {
if ( @$methods != 1 ) {
if ( grep({ !m/processlist|hosts/i } @$methods)
&& $methods->[0] !~ /^dsn=/i )
{
die "Invalid combination of recursion methods: "
. join(", ", map { defined($_) ? $_ : 'undef' } @$methods) . ". "
. "Only hosts and processlist may be combined.\n"
}
}
else {
my ($method) = @$methods;
die "Invalid recursion method: " . ( $method || 'undef' )
unless $method && $method =~ m/^(?:processlist$|hosts$|none$|cluster$|dsn=)/i;
}
die "Invalid recursion method: " . ( $method || 'undef' )
unless $method && $method =~ m/^(?:processlist$|hosts$|none$|cluster$|dsn=)/i;
}
}
sub new {
@@ -156,7 +156,7 @@ sub get_slaves {
my $methods = $self->_resolve_recursion_methods($args{dsn});
return $slaves unless @$methods;
if ( grep { m/processlist|hosts/i } @$methods ) {
my @required_args = qw(dbh dsn);
foreach my $arg ( @required_args ) {
@@ -200,7 +200,7 @@ sub get_slaves {
else {
die "Unexpected recursion methods: @$methods";
}
return $slaves;
}
@@ -668,7 +668,7 @@ sub short_host {
}
sub is_replication_thread {
my ( $self, $query, %args ) = @_;
my ( $self, $query, %args ) = @_;
return unless $query;
my $type = lc($args{type} || 'all');
@@ -683,7 +683,7 @@ sub is_replication_thread {
if ( !$match ) {
if ( ($query->{User} || $query->{user} || '') eq "system user" ) {
PTDEBUG && _d("Slave replication thread");
if ( $type ne 'all' ) {
if ( $type ne 'all' ) {
my $state = $query->{State} || $query->{state} || '';
if ( $state =~ m/^init|end$/ ) {
@@ -696,7 +696,7 @@ sub is_replication_thread {
|Reading\sevent\sfrom\sthe\srelay\slog
|Has\sread\sall\srelay\slog;\swaiting
|Making\stemp\sfile
|Waiting\sfor\sslave\smutex\son\sexit)/xi;
|Waiting\sfor\sslave\smutex\son\sexit)/xi;
$match = $type eq 'slave_sql' && $slave_sql ? 1
: $type eq 'slave_io' && !$slave_sql ? 1
@@ -760,7 +760,7 @@ sub get_replication_filters {
replicate_do_db
replicate_ignore_db
replicate_do_table
replicate_ignore_table
replicate_ignore_table
replicate_wild_do_table
replicate_wild_ignore_table
);
@@ -771,7 +771,7 @@ sub get_replication_filters {
$filters{slave_skip_errors} = $row->[1] if $row->[1] && $row->[1] ne 'OFF';
}
return \%filters;
return \%filters;
}
@@ -905,7 +905,7 @@ sub new {
rules => [], # desc of rules for --help
mutex => [], # rule: opts are mutually exclusive
atleast1 => [], # rule: at least one opt is required
disables => {}, # rule: opt disables other opts
disables => {}, # rule: opt disables other opts
defaults_to => {}, # rule: opt defaults to value of other opt
DSNParser => undef,
default_files => [
@@ -1068,7 +1068,7 @@ sub _pod_to_specs {
}
push @specs, {
spec => $self->{parse_attributes}->($self, $option, \%attribs),
spec => $self->{parse_attributes}->($self, $option, \%attribs),
desc => $para
. (defined $attribs{default} ? " (default $attribs{default})" : ''),
group => ($attribs{'group'} ? $attribs{'group'} : 'default'),
@@ -1159,7 +1159,7 @@ sub _parse_specs {
$self->{opts}->{$long} = $opt;
}
else { # It's an option rule, not a spec.
PTDEBUG && _d('Parsing rule:', $opt);
PTDEBUG && _d('Parsing rule:', $opt);
push @{$self->{rules}}, $opt;
my @participants = $self->_get_participants($opt);
my $rule_ok = 0;
@@ -1204,7 +1204,7 @@ sub _parse_specs {
PTDEBUG && _d('Option', $long, 'disables', @participants);
}
return;
return;
}
sub _get_participants {
@@ -1291,7 +1291,7 @@ sub _set_option {
}
sub get_opts {
my ( $self ) = @_;
my ( $self ) = @_;
foreach my $long ( keys %{$self->{opts}} ) {
$self->{opts}->{$long}->{got} = 0;
@@ -1422,7 +1422,7 @@ sub _check_opts {
else {
$err = join(', ',
map { "--$self->{opts}->{$_}->{long}" }
grep { $_ }
grep { $_ }
@restricted_opts[0..scalar(@restricted_opts) - 2]
)
. ' or --'.$self->{opts}->{$restricted_opts[-1]}->{long};
@@ -1432,7 +1432,7 @@ sub _check_opts {
}
}
elsif ( $opt->{is_required} ) {
elsif ( $opt->{is_required} ) {
$self->save_error("Required option --$long must be specified");
}
@@ -1808,7 +1808,7 @@ sub clone {
$clone{$scalar} = $self->{$scalar};
}
return bless \%clone;
return bless \%clone;
}
sub _parse_size {
@@ -2322,7 +2322,7 @@ sub extends {
sub _load_module {
my ($class) = @_;
(my $file = $class) =~ s{::|'}{/}g;
$file .= '.pm';
{ local $@; eval { require "$file" } } # or warn $@;
@@ -2353,7 +2353,7 @@ sub has {
my $caller = scalar caller();
my $class_metadata = Lmo::Meta->metadata_for($caller);
for my $attribute ( ref $names ? @$names : $names ) {
my %args = @_;
my $method = ($args{is} || '') eq 'ro'
@@ -2372,16 +2372,16 @@ sub has {
if ( my $type_check = $args{isa} ) {
my $check_name = $type_check;
if ( my ($aggregate_type, $inner_type) = $type_check =~ /\A(ArrayRef|Maybe)\[(.*)\]\z/ ) {
$type_check = Lmo::Types::_nested_constraints($attribute, $aggregate_type, $inner_type);
}
my $check_sub = sub {
my ($new_val) = @_;
Lmo::Types::check_type_constaints($attribute, $type_check, $check_name, $new_val);
};
$class_metadata->{$attribute}{isa} = [$check_name, $check_sub];
my $orig_method = $method;
$method = sub {
@@ -2823,7 +2823,7 @@ sub get_dbh {
my $dbh;
my $tries = 2;
while ( !$dbh && $tries-- ) {
PTDEBUG && _d($cxn_string, ' ', $user, ' ', $pass,
PTDEBUG && _d($cxn_string, ' ', $user, ' ', $pass,
join(', ', map { "$_=>$defaults->{$_}" } keys %$defaults ));
$dbh = eval { DBI->connect($cxn_string, $user, $pass, $defaults) };
@@ -2999,7 +2999,7 @@ sub set_vars {
}
}
return;
return;
}
sub _d {
@@ -3086,7 +3086,7 @@ sub daemonize {
close STDERR;
open STDERR, ">&STDOUT"
or die "Cannot dupe STDERR to STDOUT: $OS_ERROR";
or die "Cannot dupe STDERR to STDOUT: $OS_ERROR";
}
else {
if ( -t STDOUT ) {
@@ -3277,7 +3277,7 @@ sub split_unquote {
s/`\z//;
s/``/`/g;
}
return ($db, $tbl);
}
@@ -4164,7 +4164,7 @@ sub value_to_json {
my $b_obj = B::svref_2object(\$value); # for round trip problem
my $flags = $b_obj->FLAGS;
return $value # as is
return $value # as is
if $flags & ( B::SVp_IOK | B::SVp_NOK ) and !( $flags & B::SVp_POK ); # SvTYPE is IV or NV?
my $type = ref($value);
@@ -4448,7 +4448,7 @@ sub _split_url {
or die(qq/SSL certificate not valid for $host\n/);
}
}
$self->{host} = $host;
$self->{port} = $port;
@@ -4922,7 +4922,7 @@ eval {
}
PTDEBUG && _d('Version check file', $file, 'in', $ENV{PWD});
return $file; # in the CWD
}
}
}
sub version_check_time_limit {
@@ -4939,11 +4939,11 @@ sub version_check {
PTDEBUG && _d('FindBin::Bin:', $FindBin::Bin);
if ( !$args{force} ) {
if ( $FindBin::Bin
&& (-d "$FindBin::Bin/../.bzr" ||
&& (-d "$FindBin::Bin/../.bzr" ||
-d "$FindBin::Bin/../../.bzr" ||
-d "$FindBin::Bin/../.git" ||
-d "$FindBin::Bin/../../.git"
)
-d "$FindBin::Bin/../.git" ||
-d "$FindBin::Bin/../../.git"
)
) {
PTDEBUG && _d("$FindBin::Bin/../.bzr disables --version-check");
return;
@@ -4967,7 +4967,7 @@ sub version_check {
PTDEBUG && _d(scalar @$instances_to_check, 'instances to check');
return unless @$instances_to_check;
my $protocol = 'https';
my $protocol = 'https';
eval { require IO::Socket::SSL; };
if ( $EVAL_ERROR ) {
PTDEBUG && _d($EVAL_ERROR);
@@ -5153,7 +5153,7 @@ sub pingback {
);
die "Failed to parse server requested programs: $response->{content}"
if !scalar keys %$items;
my $versions = get_versions(
items => $items,
instances => $instances,
@@ -5412,7 +5412,7 @@ sub get_from_mysql {
if ($item->{item} eq 'MySQL' && $item->{type} eq 'mysql_variable') {
@{$item->{vars}} = grep { $_ eq 'version' || $_ eq 'version_comment' } @{$item->{vars}};
}
my @versions;
my %version_for;
@@ -5674,7 +5674,7 @@ sub main {
# Reset all global vars between test runs else weird things happen.
@dbhs = ();
@sths = ();
@sths = ();
# ########################################################################
# Get configuration information.
@@ -5697,7 +5697,7 @@ sub main {
$o->save_error("Invalid --frames argument");
}
else {
push @times,
push @times,
$suf eq 's' ? $num # Seconds
: $suf eq 'm' ? $num * 60 # Minutes
: $suf eq 'h' ? $num * 3600 # Hours
@@ -5791,9 +5791,9 @@ sub main {
$dbh->do("USE `$db`");
# ########################################################################
# If --check-read-only option was given and we are in --update mode
# If --check-read-only option was given and we are in --update mode
# we wait until server is writable , or run-time is over, or sentinel file
# We also do this check after daemon is up and running, but it is necessary
# We also do this check after daemon is up and running, but it is necessary
# to check this before attempting to create the table and inserting rows
# https://bugs.launchpad.net/percona-toolkit/+bug/1328686
# #######################################################################
@@ -5802,12 +5802,13 @@ sub main {
if ( server_is_readonly($dbh) && PTDEBUG ) {
_d('Server is read-only, waiting')
}
my $start_time = time;
my $run_time = $o->get('run-time');
my $interval = $o->get('interval') || 5;
my $start_time = time;
my $run_time = $o->get('run-time');
my $interval = $o->get('interval') || 5;
my $read_only_interval = $o->get('read-only-interval') || $interval;
while (server_is_readonly($dbh)) {
sleep($interval);
if (
sleep($read_only_interval);
if (
($run_time && $run_time < time - $start_time)
|| -f $sentinel
) {
@@ -5837,7 +5838,7 @@ sub main {
if ( $EVAL_ERROR ) {
die "Error creating heartbeat table:". $EVAL_ERROR;
}
# Now we insert first row.
# Some caveats:
@@ -5851,18 +5852,18 @@ sub main {
. qq/ INTO $db_tbl (ts, server_id) VALUES ($now_func, $server_id)/;
# 2)
# RBR (Row Based Replication) converts REPLACE to INSERT if row isn't
# RBR (Row Based Replication) converts REPLACE to INSERT if row isn't
# present in master. This breakes replication when the row is present in slave.
# Other workarounds also fail.
# Other workarounds also fail.
# INSERT IGNORE (ignore is not replicated if no error in master)
# DELETE then INSERT (DELETE is ignored, INSERT breaks replication)
# DELETE then INSERT (DELETE is ignored, INSERT breaks replication)
# INSERT ON DUPLICATE UPDATE (converts to simple INSERT)
# TRUNCATE gets trough and replicates! So we use that to wipe slave(s).
# TRUNCATE gets trough and replicates! So we use that to wipe slave(s).
if ($o->get('replace')) {
my $sql_truncate = "TRUNCATE TABLE $db_tbl";
PTDEBUG && _d($sql_truncate);
eval { $dbh->do($sql_truncate) };
}
}
PTDEBUG && _d($sql_insert_row);
eval { $dbh->do($sql_insert_row); };
}
@@ -6018,8 +6019,8 @@ sub main {
my @vals;
return if $ro_check && server_is_readonly($dbh);
my $sql;
my $sql;
if ( @master_status_cols ) {
$sql = "SHOW MASTER STATUS";
PTDEBUG && _d($dbh, $sql);
@@ -6032,7 +6033,7 @@ sub main {
push @vals, map { $row->{$_} } @master_status_cols;
}
}
if ( @slave_status_cols ) {
$sql = "SHOW SLAVE STATUS";
PTDEBUG && _d($dbh, $sql);
@@ -6070,7 +6071,7 @@ sub main {
die $args{error};
}
);
return;
};
}
@@ -6106,7 +6107,7 @@ sub main {
PTDEBUG && _d('Delay', sprintf('%.6f', $delay), 'on', $hostname);
# Because we adjust for skew, if the ts are less than skew seconds
# apart (i.e. replication is very fast) then delay will be negative.
# apart (i.e. replication is very fast) then delay will be negative.
# So it's effectively 0 seconds of lag.
$delay = 0.00 if $delay < 0;
@@ -6151,7 +6152,7 @@ sub main {
# ########################################################################
# --check and exit if --check was given.
# ########################################################################
if ( $o->get('check') ) {
if ( $o->get('check') ) {
PTDEBUG && _d('--check and exit');
check_delay(
dsn => $dsn,
@@ -6223,6 +6224,18 @@ sub main {
sleep $next_interval - $time;
PTDEBUG && _d('Woke up at', ts(time));
if ( $o->get('check-read-only') && $o->get('update') ) {
my $read_only_interval = $o->get('read-only-interval') || $interval;
while (server_is_readonly($dbh)) {
sleep($read_only_interval);
if (
-f $sentinel
) {
return 0;
}
}
}
# Connect or reconnect if necessary.
if ( !$dbh->ping() ) {
$dbh = $dp->get_dbh($dp->get_cxn_params($dsn), { AutoCommit => 1 });
@@ -6247,7 +6260,7 @@ sub main {
} @$frames;
my $output = sprintf $format, $delay, @vals, $pk_val;
if ( my $file = $o->get('file') ) {
if ( my $file = $o->get('file') ) {
open my $file, '>', $file
or die "Can't open $file: $OS_ERROR";
print $file $output
@@ -6389,7 +6402,7 @@ sub check_delay {
# next interval is or should be. The caller can then sleep(time-next_interval)
# to wake up at that interval. If the caller misses the next interval,
# they just call the iterator until the next interval is later then the
# current time.
# current time.
sub make_interval_iter {
my ( $interval, $skew ) = @_;
die "I need an interval argument" unless defined $interval;
@@ -6549,7 +6562,7 @@ specify the L<"--master-server-id"> to use.
For example, if the replication hierarchy is "master -> slave1 -> slave2"
with corresponding server IDs 1, 2 and 3, you can:
pt-heartbeat --daemonize -D test --update -h master
pt-heartbeat --daemonize -D test --update -h master
pt-heartbeat --daemonize -D test --update -h slave1
Then check (or monitor) the replication delay from master to slave2:
@@ -6578,7 +6591,7 @@ Cluster (PXC), we recommend using 5.5.28-23.7 and newer.
If you are setting up heartbeat instances between cluster nodes, keep in mind
that, since the speed of the cluster is determined by its slowest node,
pt-heartbeat will not report how fast the cluster itself is, but only how
fast events are replicating from one node to another.
fast events are replicating from one node to another.
You must specify L<"--master-server-id"> for L<"--monitor"> and L<"--check">
instances.
@@ -6619,7 +6632,7 @@ before its delay. L<"--recurse"> only works with MySQL.
=item --check-read-only
Check if the server has read_only enabled; If it does, the tool skips doing
any inserts.
any inserts. See also L<"--read-only-interval">
=item --config
@@ -6640,7 +6653,7 @@ be created with the following MAGIC_create_heartbeat table definition:
server_id int unsigned NOT NULL PRIMARY KEY,
file varchar(255) DEFAULT NULL, -- SHOW MASTER STATUS
position bigint unsigned DEFAULT NULL, -- SHOW MASTER STATUS
relay_master_log_file varchar(255) DEFAULT NULL, -- SHOW SLAVE STATUS
relay_master_log_file varchar(255) DEFAULT NULL, -- SHOW SLAVE STATUS
exec_master_log_pos bigint unsigned DEFAULT NULL -- SHOW SLAVE STATUS
);
@@ -6835,6 +6848,13 @@ Print the auto-detected or given L<"--master-server-id">. If L<"--check">
or L<"--monitor"> is specified, specifying this option will print the
auto-detected or given L<"--master-server-id"> at the end of each line.
=item --read-only-interval
type: int
When L<"--check-read-only"> is specified, the interval to sleep while the
server is found to be read-only. If unspecified, L<"--interval"> is used.
=item --recurse
type: int
@@ -7023,7 +7043,7 @@ as 5.5.25a.
Any updates or known problems are printed to STDOUT before the tool's normal
output. This feature should never interfere with the normal operation of the
tool.
tool.
For more information, visit L<https://www.percona.com/version-check>.