mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-26 15:31:55 +00:00
Merged pxc-pt-heartbeat
This commit is contained in:
121
bin/pt-heartbeat
121
bin/pt-heartbeat
@@ -20,6 +20,7 @@ BEGIN {
|
||||
Daemon
|
||||
Quoter
|
||||
TableParser
|
||||
Retry
|
||||
Transformers
|
||||
VersionCheck
|
||||
HTTPMicro
|
||||
@@ -2920,6 +2921,84 @@ sub _d {
|
||||
# End TableParser package
|
||||
# ###########################################################################
|
||||
|
||||
# ###########################################################################
|
||||
# Retry package
|
||||
# This package is a copy without comments from the original. The original
|
||||
# with comments and its test file can be found in the Bazaar repository at,
|
||||
# lib/Retry.pm
|
||||
# t/lib/Retry.t
|
||||
# See https://launchpad.net/percona-toolkit for more information.
|
||||
# ###########################################################################
|
||||
{
|
||||
package Retry;
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
||||
|
||||
sub new {
|
||||
my ( $class, %args ) = @_;
|
||||
my $self = {
|
||||
%args,
|
||||
};
|
||||
return bless $self, $class;
|
||||
}
|
||||
|
||||
sub retry {
|
||||
my ( $self, %args ) = @_;
|
||||
my @required_args = qw(try fail final_fail);
|
||||
foreach my $arg ( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
};
|
||||
my ($try, $fail, $final_fail) = @args{@required_args};
|
||||
my $wait = $args{wait} || sub { sleep 1; };
|
||||
my $tries = $args{tries} || 3;
|
||||
|
||||
my $last_error;
|
||||
my $tryno = 0;
|
||||
TRY:
|
||||
while ( ++$tryno <= $tries ) {
|
||||
PTDEBUG && _d("Try", $tryno, "of", $tries);
|
||||
my $result;
|
||||
eval {
|
||||
$result = $try->(tryno=>$tryno);
|
||||
};
|
||||
if ( $EVAL_ERROR ) {
|
||||
PTDEBUG && _d("Try code failed:", $EVAL_ERROR);
|
||||
$last_error = $EVAL_ERROR;
|
||||
|
||||
if ( $tryno < $tries ) { # more retries
|
||||
my $retry = $fail->(tryno=>$tryno, error=>$last_error);
|
||||
last TRY unless $retry;
|
||||
PTDEBUG && _d("Calling wait code");
|
||||
$wait->(tryno=>$tryno);
|
||||
}
|
||||
}
|
||||
else {
|
||||
PTDEBUG && _d("Try code succeeded");
|
||||
return $result;
|
||||
}
|
||||
}
|
||||
|
||||
PTDEBUG && _d('Try code did not succeed');
|
||||
return $final_fail->(error=>$last_error);
|
||||
}
|
||||
|
||||
sub _d {
|
||||
my ($package, undef, $line) = caller 0;
|
||||
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
||||
map { defined $_ ? $_ : 'undef' }
|
||||
@_;
|
||||
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
||||
}
|
||||
|
||||
1;
|
||||
}
|
||||
# ###########################################################################
|
||||
# End Retry package
|
||||
# ###########################################################################
|
||||
|
||||
# ###########################################################################
|
||||
# Transformers package
|
||||
# This package is a copy without comments from the original. The original
|
||||
@@ -4920,10 +4999,31 @@ sub main {
|
||||
}
|
||||
}
|
||||
|
||||
$sth->execute(ts(time), @vals);
|
||||
PTDEBUG && _d($sth->{Statement});
|
||||
$sth->finish();
|
||||
|
||||
my $retry = Retry->new();
|
||||
$retry->retry(
|
||||
tries => 3,
|
||||
wait => sub { sleep 0.25; return; },
|
||||
try => sub {
|
||||
$sth->execute(ts(time), @vals);
|
||||
PTDEBUG && _d($sth->{Statement});
|
||||
$sth->finish();
|
||||
},
|
||||
fail => sub {
|
||||
my (%args) = @_;
|
||||
my $error = $args{error};
|
||||
if ( $error =~ m/Deadlock found/ ) {
|
||||
return 1; # try again
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
},
|
||||
final_fail => sub {
|
||||
my (%args) = @_;
|
||||
die $args{error};
|
||||
}
|
||||
);
|
||||
|
||||
return;
|
||||
};
|
||||
}
|
||||
@@ -5387,6 +5487,19 @@ information from C<SHOW MASTER STATUS> and C<SHOW SLAVE STATUS>. These
|
||||
columns are optional. If any are present, their corresponding information
|
||||
will be saved.
|
||||
|
||||
=head1 Percona XtraDB Cluster
|
||||
|
||||
Although pt-heartbeat should work with all supported versions of Percona XtraDB
|
||||
Cluster (PXC), we recommend using 5.5.28-23.7 and newer.
|
||||
|
||||
If you are setting up heartbeat instances between cluster nodes, keep in mind
|
||||
that, since the speed of the cluster is determined by its slowest node,
|
||||
pt-heartbeat will not report how fast the cluster itself is, but only how
|
||||
fast events are replicating from one node to another.
|
||||
|
||||
You must specify L<"--master-server-id"> for L<"--monitor"> and L<"--check">
|
||||
instances.
|
||||
|
||||
=head1 OPTIONS
|
||||
|
||||
Specify at least one of L<"--stop">, L<"--update">, L<"--monitor">, or L<"--check">.
|
||||
|
@@ -52,6 +52,10 @@ make_sandbox() {
|
||||
if [ -n "${master_port}" ]; then
|
||||
local master_listen_port=$(($master_port + 10))
|
||||
cluster_address="gcomm://$ip:$master_listen_port"
|
||||
|
||||
local this_listen_port=$(($port + 10))
|
||||
local this_cluster_address="gcomm://$ip:$this_listen_port"
|
||||
sed -e "s!gcomm://\$!$this_cluster_address!g" -i.bak "/tmp/$master_port/my.sandbox.cnf"
|
||||
fi
|
||||
|
||||
sed -e "s/ADDR/$ip/g" -i.bak "/tmp/$port/my.sandbox.cnf"
|
||||
@@ -118,7 +122,7 @@ make_sandbox() {
|
||||
debug_sandbox $port
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
# If the sandbox is a slave, start the slave.
|
||||
if [ "$type" = "slave" ]; then
|
||||
/tmp/$port/use -e "change master to master_host='127.0.0.1', master_user='msandbox', master_password='msandbox', master_port=$master_port"
|
||||
|
@@ -299,6 +299,12 @@ case $opt in
|
||||
exit_status=$((exit_status | $?))
|
||||
|
||||
if [ "${2:-""}" = "cluster" ]; then
|
||||
# Bit of magic here. 'start-sandbox cluster new_node old_node'
|
||||
# changes old_node's my.sandbox.cnf's wsrep_cluster_address to
|
||||
# point to new_node. This is especially useful because otherwise,
|
||||
# calling stop/start like below on 12345 would create a new cluster.
|
||||
/tmp/12345/stop >/dev/null
|
||||
/tmp/12345/start >/dev/null
|
||||
echo -n "Checking that the cluster size is correct... "
|
||||
size=$(/tmp/12345/use -ss -e "SHOW STATUS LIKE 'wsrep_cluster_size'" | awk '{print $2}')
|
||||
if [ ${size:-0} -ne 3 ]; then
|
||||
|
384
t/pt-heartbeat/pxc.t
Normal file
384
t/pt-heartbeat/pxc.t
Normal file
@@ -0,0 +1,384 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
BEGIN {
|
||||
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
|
||||
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
|
||||
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
|
||||
};
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use Test::More;
|
||||
use Data::Dumper;
|
||||
|
||||
use File::Temp qw(tempfile);
|
||||
|
||||
use PerconaTest;
|
||||
use Sandbox;
|
||||
|
||||
require "$trunk/bin/pt-heartbeat";
|
||||
# Do this after requiring pt-hb, since it uses Mo
|
||||
require VersionParser;
|
||||
|
||||
my $dp = new DSNParser(opts=>$dsn_opts);
|
||||
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
|
||||
my $node1 = $sb->get_dbh_for('node1');
|
||||
my $node2 = $sb->get_dbh_for('node2');
|
||||
my $node3 = $sb->get_dbh_for('node3');
|
||||
|
||||
if ( !$node1 ) {
|
||||
plan skip_all => 'Cannot connect to cluster node1';
|
||||
}
|
||||
elsif ( !$node2 ) {
|
||||
plan skip_all => 'Cannot connect to cluster node2';
|
||||
}
|
||||
elsif ( !$node3 ) {
|
||||
plan skip_all => 'Cannot connect to cluster node3';
|
||||
}
|
||||
|
||||
my $db_flavor = VersionParser->new($node1)->flavor();
|
||||
if ( $db_flavor !~ /XtraDB Cluster/ ) {
|
||||
plan skip_all => "PXC tests";
|
||||
}
|
||||
|
||||
my $node1_dsn = $sb->dsn_for('node1');
|
||||
my $node2_dsn = $sb->dsn_for('node2');
|
||||
my $node3_dsn = $sb->dsn_for('node3');
|
||||
my $node1_port = $sb->port_for('node1');
|
||||
my $node2_port = $sb->port_for('node2');
|
||||
my $node3_port = $sb->port_for('node3');
|
||||
|
||||
my $output;
|
||||
my $exit;
|
||||
my $base_pidfile = (tempfile("/tmp/pt-heartbeat-test.XXXXXXXX", OPEN => 0, UNLINK => 0))[1];
|
||||
my $sample = "t/pt-heartbeat/samples/";
|
||||
|
||||
my $sentinel = '/tmp/pt-heartbeat-sentinel';
|
||||
|
||||
diag(`rm -rf $sentinel >/dev/null 2>&1`);
|
||||
$sb->create_dbs($node1, ['test']);
|
||||
|
||||
my @exec_pids;
|
||||
my @pidfiles;
|
||||
|
||||
sub start_update_instance {
|
||||
my ($port) = @_;
|
||||
my $pidfile = "$base_pidfile.$port.pid";
|
||||
push @pidfiles, $pidfile;
|
||||
|
||||
my $pid = fork();
|
||||
die "Cannot fork: $OS_ERROR" unless defined $pid;
|
||||
if ( $pid == 0 ) {
|
||||
my $cmd = "$trunk/bin/pt-heartbeat";
|
||||
exec { $cmd } $cmd, qw(-h 127.0.0.1 -u msandbox -p msandbox -P), $port,
|
||||
qw(--database test --table heartbeat --create-table),
|
||||
qw(--update --interval 0.5 --pid), $pidfile;
|
||||
exit 1;
|
||||
}
|
||||
push @exec_pids, $pid;
|
||||
|
||||
PerconaTest::wait_for_files($pidfile);
|
||||
ok(
|
||||
-f $pidfile,
|
||||
"--update on $port started"
|
||||
);
|
||||
}
|
||||
|
||||
sub stop_all_instances {
|
||||
my @pids = @exec_pids, map { chomp; $_ } map { slurp_file($_) } @pidfiles;
|
||||
diag(`$trunk/bin/pt-heartbeat --stop >/dev/null`);
|
||||
|
||||
waitpid($_, 0) for @pids;
|
||||
PerconaTest::wait_until(sub{ !-e $_ }) for @pidfiles;
|
||||
|
||||
unlink $sentinel;
|
||||
}
|
||||
|
||||
foreach my $port ( map { $sb->port_for($_) } qw(node1 node2 node3) ) {
|
||||
start_update_instance($port);
|
||||
}
|
||||
|
||||
# #############################################################################
|
||||
# Basic cluster tests
|
||||
# #############################################################################
|
||||
|
||||
my $rows = $node1->selectall_hashref("select * from test.heartbeat", 'server_id');
|
||||
|
||||
is(
|
||||
scalar keys %$rows,
|
||||
3,
|
||||
"Sanity check: All nodes are in the heartbeat table"
|
||||
);
|
||||
|
||||
my $only_slave_data = {
|
||||
map {
|
||||
$_ => {
|
||||
relay_master_log_file => $rows->{$_}->{relay_master_log_file},
|
||||
exec_master_log_pos => $rows->{$_}->{exec_master_log_pos},
|
||||
} } keys %$rows
|
||||
};
|
||||
|
||||
my $same_data = { relay_master_log_file => undef, exec_master_log_pos => undef };
|
||||
is_deeply(
|
||||
$only_slave_data,
|
||||
{
|
||||
12345 => $same_data,
|
||||
12346 => $same_data,
|
||||
12347 => $same_data,
|
||||
},
|
||||
"Sanity check: No slave data (relay log or master pos) is stored"
|
||||
);
|
||||
|
||||
$output = output(sub{
|
||||
pt_heartbeat::main($node1_dsn, qw(-D test --check)),
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
like(
|
||||
$output,
|
||||
qr/\QThe --master-server-id option must be specified because the heartbeat table `test`.`heartbeat`/,
|
||||
"pt-heartbeat --check + PXC doesn't autodetect a master if there isn't any"
|
||||
);
|
||||
|
||||
$output = output(sub{
|
||||
pt_heartbeat::main($node1_dsn, qw(-D test --check),
|
||||
'--master-server-id', $node3_port),
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
$output =~ s/\d\.\d{2}/0.00/g;
|
||||
is(
|
||||
$output,
|
||||
"0.00\n",
|
||||
"pt-heartbeat --check + PXC works with --master-server-id"
|
||||
);
|
||||
|
||||
# Test --monitor
|
||||
|
||||
$output = output(sub {
|
||||
pt_heartbeat::main($node1_dsn,
|
||||
qw(-D test --monitor --run-time 1s),
|
||||
'--master-server-id', $node3_port)
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
$output =~ s/\d\.\d{2}/0.00/g;
|
||||
is(
|
||||
$output,
|
||||
"0.00s [ 0.00s, 0.00s, 0.00s ]\n",
|
||||
"--monitor works"
|
||||
);
|
||||
|
||||
# Try to generate some lag between cluster nodes. Rather brittle at the moment.
|
||||
|
||||
# Lifted from alter active table
|
||||
my $pt_osc_sample = "t/pt-online-schema-change/samples";
|
||||
|
||||
my $query_table_stop = "/tmp/query_table.$PID.stop";
|
||||
my $query_table_pid = "/tmp/query_table.$PID.pid";
|
||||
my $query_table_output = "/tmp/query_table.$PID.output";
|
||||
|
||||
$sb->create_dbs($node1, ['pt_osc']);
|
||||
$sb->load_file('master', "$pt_osc_sample/basic_no_fks_innodb.sql");
|
||||
|
||||
$node1->do("USE pt_osc");
|
||||
$node1->do("TRUNCATE TABLE t");
|
||||
$node1->do("LOAD DATA INFILE '$trunk/$pt_osc_sample/basic_no_fks.data' INTO TABLE t");
|
||||
$node1->do("ANALYZE TABLE t");
|
||||
$sb->wait_for_slaves();
|
||||
|
||||
diag(`rm -rf $query_table_stop`);
|
||||
diag(`echo > $query_table_output`);
|
||||
|
||||
my $cmd = "$trunk/$pt_osc_sample/query_table.pl";
|
||||
system("$cmd 127.0.0.1 $node1_port pt_osc t id $query_table_stop $query_table_pid >$query_table_output 2>&1 &");
|
||||
wait_until(sub{-e $query_table_pid});
|
||||
|
||||
# Reload sakila
|
||||
system "$trunk/sandbox/load-sakila-db $node1_port &";
|
||||
|
||||
$output = output(sub {
|
||||
pt_heartbeat::main($node3_dsn,
|
||||
qw(-D test --monitor --run-time 5s),
|
||||
'--master-server-id', $node1_port)
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
like(
|
||||
$output,
|
||||
qr/^(?:0\.(?:\d[1-9]|[1-9]\d)|\d*[1-9]\d*\.\d{2})s\s+\[/m,
|
||||
"pt-heartbeat can detect replication lag between nodes"
|
||||
);
|
||||
|
||||
diag(`touch $query_table_stop`);
|
||||
chomp(my $p = slurp_file($query_table_pid));
|
||||
wait_until(sub{!kill 0, $p});
|
||||
|
||||
$node1->do(q{DROP DATABASE pt_osc});
|
||||
|
||||
$sb->wait_for_slaves();
|
||||
|
||||
# #############################################################################
|
||||
# cluster, node1 -> slave, run on node1
|
||||
# #############################################################################
|
||||
|
||||
my ($slave_dbh, $slave_dsn) = $sb->start_sandbox(
|
||||
server => 'cslave1',
|
||||
type => 'slave',
|
||||
master => 'node1',
|
||||
env => q/BINLOG_FORMAT="ROW"/,
|
||||
);
|
||||
|
||||
$sb->create_dbs($slave_dbh, ['test']);
|
||||
|
||||
start_update_instance($sb->port_for('cslave1'));
|
||||
PerconaTest::wait_for_table($slave_dbh, "test.heartbeat", "1=1");
|
||||
|
||||
$output = output(sub{
|
||||
pt_heartbeat::main($slave_dsn, qw(-D test --check)),
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
like(
|
||||
$output,
|
||||
qr/\d\.\d{2}\n/,
|
||||
"pt-heartbeat --check works on a slave of a cluster node"
|
||||
);
|
||||
|
||||
$output = output(sub {
|
||||
pt_heartbeat::main($slave_dsn,
|
||||
qw(-D test --monitor --run-time 2s))
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
like(
|
||||
$output,
|
||||
qr/^\d.\d{2}s\s+\[/,
|
||||
"pt-heartbeat --monitor + slave of a node1, without --master-server-id"
|
||||
);
|
||||
|
||||
$output = output(sub {
|
||||
pt_heartbeat::main($slave_dsn,
|
||||
qw(-D test --monitor --run-time 2s),
|
||||
'--master-server-id', $node3_port)
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
like(
|
||||
$output,
|
||||
qr/^\d.\d{2}s\s+\[/,
|
||||
"pt-heartbeat --monitor + slave of node1, --master-server-id pointing to node3"
|
||||
);
|
||||
|
||||
# #############################################################################
|
||||
# master -> node1 in cluster
|
||||
# #############################################################################
|
||||
|
||||
# CAREFUL! See the comments in t/pt-table-checksum/pxc.t about cmaster.
|
||||
# Nearly everything applies here.
|
||||
|
||||
my ($master_dbh, $master_dsn) = $sb->start_sandbox(
|
||||
server => 'cmaster',
|
||||
type => 'master',
|
||||
env => q/BINLOG_FORMAT="ROW"/,
|
||||
);
|
||||
|
||||
my $cmaster_port = $sb->port_for('cmaster');
|
||||
|
||||
$sb->create_dbs($master_dbh, ['test']);
|
||||
|
||||
$master_dbh->do("FLUSH LOGS");
|
||||
$master_dbh->do("RESET MASTER");
|
||||
|
||||
$sb->set_as_slave('node1', 'cmaster');
|
||||
|
||||
start_update_instance($sb->port_for('cmaster'));
|
||||
PerconaTest::wait_for_table($node1, "test.heartbeat", "server_id=$cmaster_port");
|
||||
|
||||
$output = output(sub{
|
||||
pt_heartbeat::main($node1_dsn, qw(-D test --check --print-master-server-id)),
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
like(
|
||||
$output,
|
||||
qr/^\d.\d{2} $cmaster_port$/,
|
||||
"--print-master-id works for master -> $node1_port, when run from $node1_port"
|
||||
);
|
||||
|
||||
# Wait until node2 & node3 get cmaster in their heartbeat tables
|
||||
$sb->wait_for_slaves(master => 'node1', slave => 'node2');
|
||||
$sb->wait_for_slaves(master => 'node1', slave => 'node3');
|
||||
|
||||
foreach my $test (
|
||||
[ $node2_port, $node2_dsn, $node2 ],
|
||||
[ $node3_port, $node3_dsn, $node3 ],
|
||||
) {
|
||||
my ($port, $dsn, $dbh) = @$test;
|
||||
|
||||
$output = output(sub{
|
||||
pt_heartbeat::main($dsn, qw(-D test --check --print-master-server-id)),
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
# This could be made to work, see the node autodiscovery branch
|
||||
TODO: {
|
||||
local $::TODO = "cmaster -> node1, other nodes can't autodetect the master";
|
||||
like(
|
||||
$output,
|
||||
qr/$cmaster_port/,
|
||||
"--print-master-id works for master -> $node1_port, when run from $port"
|
||||
);
|
||||
}
|
||||
|
||||
$output = output(sub{
|
||||
pt_heartbeat::main($dsn, qw(-D test --check --master-server-id), $cmaster_port),
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
$output =~ s/\d\.\d{2}/0.00/g;
|
||||
is(
|
||||
$output,
|
||||
"0.00\n",
|
||||
"--check + explicit --master-server-id work for master -> node1, run from $port"
|
||||
);
|
||||
}
|
||||
|
||||
# ############################################################################
|
||||
# Stop the --update instances.
|
||||
# ############################################################################
|
||||
|
||||
stop_all_instances();
|
||||
|
||||
# ############################################################################
|
||||
# Disconnect & stop the two servers we started
|
||||
# ############################################################################
|
||||
|
||||
# We have to do this after the --stop, otherwise the --update processes will
|
||||
# spew a bunch of warnings and clog
|
||||
|
||||
$slave_dbh->disconnect;
|
||||
$master_dbh->disconnect;
|
||||
$sb->stop_sandbox('cslave1', 'cmaster');
|
||||
$node1->do("STOP SLAVE");
|
||||
$node1->do("RESET SLAVE");
|
||||
|
||||
# #############################################################################
|
||||
# Done.
|
||||
# #############################################################################
|
||||
$sb->wipe_clean($node1);
|
||||
diag(`/tmp/12345/stop`);
|
||||
diag(`/tmp/12345/start`);
|
||||
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
|
||||
done_testing;
|
Reference in New Issue
Block a user