mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-10 13:11:32 +00:00
Rewrite and enhance lib/Percona/XtraDB/Cluster.pm and t/pt-table-checksum/pxc.t. Change how ptc handles various cluster issues. Change lib/Sandbox.pm subs like start_sandbox() and start_cluster(). PXC docs in ptc are a work in progress.
This commit is contained in:
@@ -3375,87 +3375,66 @@ sub _d {
|
||||
# See https://launchpad.net/percona-toolkit for more information.
|
||||
# ###########################################################################
|
||||
{
|
||||
|
||||
package Percona::XtraDB::Cluster;
|
||||
use Mo;
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
||||
|
||||
use Mo;
|
||||
use Data::Dumper;
|
||||
|
||||
sub get_cluster_name {
|
||||
my ($self, $cxn) = @_;
|
||||
my $sql = "SHOW VARIABLES LIKE 'wsrep\_cluster\_name'";
|
||||
PTDEBUG && _d($cxn->name, $sql);
|
||||
my (undef, $cluster_name) = $cxn->dbh->selectrow_array($sql);
|
||||
return $cluster_name;
|
||||
}
|
||||
|
||||
sub is_cluster_node {
|
||||
my ($self, $cxn) = @_;
|
||||
return $self->{is_cluster_node}->{$cxn} if defined $self->{is_cluster_node}->{$cxn};
|
||||
|
||||
my $sql = "SHOW VARIABLES LIKE 'wsrep_on'";
|
||||
PTDEBUG && _d($sql);
|
||||
my $sql = "SHOW VARIABLES LIKE 'wsrep\_on'";
|
||||
PTDEBUG && _d($cxn->name, $sql);
|
||||
my $row = $cxn->dbh->selectrow_arrayref($sql);
|
||||
PTDEBUG && _d(defined $row ? @$row : 'undef');
|
||||
$self->{is_cluster_node}->{$cxn} = $row && $row->[1]
|
||||
? ($row->[1] eq 'ON' || $row->[1] eq '1')
|
||||
: 0;
|
||||
PTDEBUG && _d(Dumper($row));
|
||||
return unless $row && $row->[1] && ($row->[1] eq 'ON' || $row->[1] eq '1');
|
||||
|
||||
return $self->{is_cluster_node}->{$cxn};
|
||||
my $cluster_name = $self->get_cluster_name($cxn);
|
||||
return $cluster_name;
|
||||
}
|
||||
|
||||
sub same_node {
|
||||
my ($self, $cxn1, $cxn2) = @_;
|
||||
|
||||
my $sql = "SHOW VARIABLES LIKE 'wsrep\_sst\_receive\_address'";
|
||||
PTDEBUG && _d($cxn1->name, $sql);
|
||||
my (undef, $val1) = $cxn1->dbh->selectrow_array($sql);
|
||||
PTDEBUG && _d($cxn2->name, $sql);
|
||||
my (undef, $val2) = $cxn2->dbh->selectrow_array($sql);
|
||||
|
||||
return ($val1 || '') eq ($val2 || '');
|
||||
}
|
||||
|
||||
sub same_cluster {
|
||||
my ($self, $cxn1, $cxn2) = @_;
|
||||
return unless $self->is_cluster_node($cxn1) && $self->is_cluster_node($cxn2);
|
||||
return if $self->is_master_of($cxn1, $cxn2) || $self->is_master_of($cxn2, $cxn1);
|
||||
|
||||
my $sql = q{SHOW VARIABLES LIKE 'wsrep_cluster_name'};
|
||||
PTDEBUG && _d($sql);
|
||||
my (undef, $row) = $cxn1->dbh->selectrow_array($sql);
|
||||
my (undef, $cxn2_row) = $cxn2->dbh->selectrow_array($sql);
|
||||
return 0 if !$self->is_cluster_node($cxn1) || !$self->is_cluster_node($cxn2);
|
||||
|
||||
return unless $row eq $cxn2_row;
|
||||
my $cluster1 = $self->get_cluster_name($cxn1);
|
||||
my $cluster2 = $self->get_cluster_name($cxn2);
|
||||
|
||||
$sql = q{SHOW VARIABLES LIKE 'wsrep_cluster_address'};
|
||||
PTDEBUG && _d($sql);
|
||||
my (undef, $addr) = $cxn1->dbh->selectrow_array($sql);
|
||||
my (undef, $cxn2_addr) = $cxn2->dbh->selectrow_array($sql);
|
||||
|
||||
return if $addr eq 'gcomm://' && $cxn2_addr eq 'gcomm://';
|
||||
|
||||
if ( $addr eq 'gcomm://' ) {
|
||||
$addr = $self->_find_full_gcomm_addr($cxn1->dbh);
|
||||
}
|
||||
elsif ( $cxn2_addr eq 'gcomm://' ) {
|
||||
$cxn2_addr = $self->_find_full_gcomm_addr($cxn2->dbh);
|
||||
}
|
||||
|
||||
return 1 if lc($addr) eq lc($cxn2_addr);
|
||||
|
||||
return 1;
|
||||
return ($cluster1 || '') eq ($cluster2 || '');
|
||||
}
|
||||
|
||||
sub is_master_of {
|
||||
my ($self, $cxn1, $cxn2) = @_;
|
||||
|
||||
my $cxn2_dbh = $cxn2->dbh;
|
||||
my $sql = q{SHOW SLAVE STATUS};
|
||||
PTDEBUG && _d($sql);
|
||||
local $cxn2_dbh->{FetchHashKeyName} = 'NAME_lc';
|
||||
my $slave_status = $cxn2_dbh->selectrow_hashref($sql);
|
||||
return unless ref($slave_status) eq 'HASH';
|
||||
|
||||
my $port = $cxn1->dsn->{P};
|
||||
return unless $slave_status->{master_port} eq $port;
|
||||
return 1 if $cxn1->dsn->{h} eq $slave_status->{master_host};
|
||||
|
||||
my $host = scalar gethostbyname($cxn1->dsn->{h});
|
||||
my $master_host = scalar gethostbyname($slave_status->{master_host});
|
||||
return 1 if $master_host eq $host;
|
||||
return;
|
||||
}
|
||||
|
||||
sub _find_full_gcomm_addr {
|
||||
my ($self, $dbh) = @_;
|
||||
|
||||
my $sql = q{SHOW VARIABLES LIKE 'wsrep_provider_options'};
|
||||
PTDEBUG && _d($sql);
|
||||
my (undef, $provider_opts) = $dbh->selectrow_array($sql);
|
||||
my ($prov_addr) = $provider_opts =~ m{\Qgmcast.listen_addr\E\s*=\s*tcp://([^:]+:[0-9]+)\s*;}i;
|
||||
my $full_gcomm = "gcomm://$prov_addr";
|
||||
PTDEBUG && _d("gcomm address: ", $full_gcomm);
|
||||
return $full_gcomm;
|
||||
sub _d {
|
||||
my ($package, undef, $line) = caller 0;
|
||||
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
||||
map { defined $_ ? $_ : 'undef' }
|
||||
@_;
|
||||
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
||||
}
|
||||
|
||||
1;
|
||||
@@ -8658,8 +8637,12 @@ sub main {
|
||||
$have_time = sub { return 1; };
|
||||
}
|
||||
|
||||
# PXC helper class
|
||||
# ########################################################################
|
||||
# Set up PXC stuff.
|
||||
# ########################################################################
|
||||
my $cluster = Percona::XtraDB::Cluster->new();
|
||||
my %cluster_name_for;
|
||||
$cluster_name_for{$master_cxn} = $cluster->is_cluster_node($master_cxn);
|
||||
|
||||
# ########################################################################
|
||||
# If this is not a dry run (--explain was not specified), then we're
|
||||
@@ -8697,9 +8680,29 @@ sub main {
|
||||
dbh => $master_dbh,
|
||||
dsn => $master_dsn,
|
||||
make_cxn => sub {
|
||||
return $make_cxn->(@_, prev_dsn => $master_cxn->dsn());
|
||||
my $cxn = $make_cxn->(@_, prev_dsn => $master_cxn->dsn());
|
||||
$cluster_name_for{$cxn} = $cluster->is_cluster_node($cxn);
|
||||
return $cxn;
|
||||
},
|
||||
);
|
||||
|
||||
# If the "master" is a cluster node, then a DSN table should have been
|
||||
# used, and it may have all nodes' DSNs so the user can run the tool
|
||||
# on any node, in which case it has the "master" node, the DSN given
|
||||
# on the command line. So detect and remove this dupe.
|
||||
if ( $cluster_name_for{$master_cxn} ) {
|
||||
@$slaves = grep {
|
||||
my $slave_cxn = $_;
|
||||
if ( $cluster->same_node($master_cxn, $slave_cxn) ) {
|
||||
PTDEBUG && _d('Removing ', $slave_cxn->name, 'from slaves',
|
||||
'because it is the master');
|
||||
0;
|
||||
}
|
||||
else {
|
||||
$slave_cxn;
|
||||
}
|
||||
} @$slaves;
|
||||
}
|
||||
PTDEBUG && _d(scalar @$slaves, 'slaves found');
|
||||
|
||||
# https://bugs.launchpad.net/percona-toolkit/+bug/938068
|
||||
@@ -8711,8 +8714,10 @@ sub main {
|
||||
}
|
||||
|
||||
my $err = '';
|
||||
for my $slave_cxn (@$slaves) {
|
||||
next if $cluster->is_cluster_node($slave_cxn);
|
||||
for my $slave_cxn ( @$slaves ) {
|
||||
# https://bugs.launchpad.net/percona-toolkit/+bug/1080385
|
||||
next if $cluster_name_for{$slave_cxn};
|
||||
|
||||
my $slave_binlog = 'STATEMENT';
|
||||
if ( VersionParser->new($slave_cxn->dbh) >= '5.1.5' ) {
|
||||
($slave_binlog) = $slave_cxn->dbh->selectrow_array(
|
||||
@@ -8731,41 +8736,90 @@ sub main {
|
||||
die $err if $err;
|
||||
}
|
||||
|
||||
if ( $cluster->is_cluster_node($master_cxn) ) {
|
||||
if ( $cluster_name_for{$master_cxn} ) {
|
||||
if ( !@$slaves ) {
|
||||
die $master_cxn->name() . " is a cluster node but no other nodes "
|
||||
. "or regular replicas were found. Use --recursion-method=dsn "
|
||||
. "to specify the other nodes in the cluster.\n";
|
||||
}
|
||||
else {
|
||||
my $err = '';
|
||||
for my $slave (@$slaves) {
|
||||
if ( $cluster->is_cluster_node($slave)
|
||||
&& !$cluster->same_cluster($slave, $master_cxn) ) {
|
||||
$err .= $slave->name() . " is a cluster node, but doesn't "
|
||||
. "belong to the same cluster as " . $master_cxn->name()
|
||||
. ". This is not currently supported; You can try "
|
||||
. "using --recursion-method=dsn to specify all nodes "
|
||||
. "in the slave cluster.\n"
|
||||
|
||||
# Make sure the master and all node are in the same cluster.
|
||||
my @other_cluster;
|
||||
foreach my $slave ( @$slaves ) {
|
||||
next unless $cluster_name_for{$slave};
|
||||
if ( $cluster_name_for{$master_cxn} ne $cluster_name_for{$slave}) {
|
||||
push @other_cluster, $slave;
|
||||
}
|
||||
}
|
||||
warn $err if $err;
|
||||
if ( @other_cluster ) {
|
||||
die $master_cxn->name . " is in cluster "
|
||||
. $cluster_name_for{$master_cxn} . " but these nodes are "
|
||||
. "in other clusters:\n"
|
||||
. join("\n",
|
||||
map {' ' . $_->name . " is in cluster $cluster_name_for{$_}"}
|
||||
@other_cluster) . "\n"
|
||||
. "All nodes must be in the same cluster. "
|
||||
. "For more information, please read the Percona XtraDB "
|
||||
. "Cluster section of the tool's documentation.\n";
|
||||
}
|
||||
}
|
||||
elsif ( @$slaves ) {
|
||||
my $err = '';
|
||||
for my $slave (@$slaves) {
|
||||
if ( $cluster->is_cluster_node($slave) ) {
|
||||
$err .= $slave->name() . " is a cluster node, but "
|
||||
. $master_cxn->name() . " is not. This is not currently "
|
||||
. "supported; You can try to specify "
|
||||
. "all nodes in the cluster with "
|
||||
. "--recursion-method=dsn if you want them checksummed.\n"
|
||||
}
|
||||
}
|
||||
warn $err if $err;
|
||||
# master is not a cluster node, but what about the slaves?
|
||||
my $direct_slave; # master -> direct_slave
|
||||
my @slaves; # traditional slaves
|
||||
my @nodes; # cluster nodes
|
||||
foreach my $slave ( @$slaves ) {
|
||||
if ( !$cluster_name_for{$slave} ) {
|
||||
push @slaves, $slave;
|
||||
next;
|
||||
}
|
||||
|
||||
my $is_master_of = eval {
|
||||
$ms->is_master_of($master_cxn->dbh, $slave->dbh);
|
||||
};
|
||||
if ( $EVAL_ERROR && $EVAL_ERROR =~ m/is not a slave/ ) {
|
||||
push @nodes, $slave;
|
||||
}
|
||||
elsif ( $is_master_of ) {
|
||||
$direct_slave = $slave;
|
||||
}
|
||||
else {
|
||||
# Another error could have happened but we don't really
|
||||
# care. We know for sure the slave is a node, so just
|
||||
# presume that and carry on.
|
||||
push @nodes, $slave;
|
||||
}
|
||||
}
|
||||
|
||||
my $err = '';
|
||||
if ( @nodes ) {
|
||||
if ( $direct_slave ) {
|
||||
warn "Diffs will only be detected if the cluster is "
|
||||
. "consistent with " . $direct_slave->name . " because "
|
||||
. $master_cxn->name . " is a traditional replication master "
|
||||
. " but these replicas are cluster nodes:\n"
|
||||
. join("\n", map { ' ' . $_->name } @nodes) . "\n"
|
||||
. "For more information, please read the Percona XtraDB "
|
||||
. "Cluster section of the tool's documentation.\n";
|
||||
}
|
||||
else {
|
||||
warn "Diffs may not be detected on these cluster nodes "
|
||||
. "because the direct replica of " . $master_cxn->name
|
||||
. " was not found or specified:\n"
|
||||
. join("\n", map { ' ' . $_->name } @nodes) . "\n"
|
||||
. "For more information, please read the Percona XtraDB "
|
||||
. "Cluster section of the tool's documentation.\n";
|
||||
}
|
||||
|
||||
if ( @slaves ) {
|
||||
warn "Diffs will only be detected on these replicas if "
|
||||
. "they replicate from " . $master_cxn->name . ":\n"
|
||||
. join("\n", map { ' ' . $_->name } @slaves) . "\n"
|
||||
. "For more information, please read the Percona XtraDB "
|
||||
. "Cluster section of the tool's documentation.\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( $o->get('check-slave-lag') ) {
|
||||
PTDEBUG && _d('Will use --check-slave-lag to check for slave lag');
|
||||
@@ -8786,8 +8840,7 @@ sub main {
|
||||
# to appear should be sufficient.
|
||||
@$slave_lag_cxns = grep {
|
||||
my $slave_cxn = $_;
|
||||
if ( $cluster->is_cluster_node($slave_cxn)
|
||||
&& $cluster->same_cluster($master_cxn, $slave_cxn) ) {
|
||||
if ( $cluster_name_for{$slave_cxn} ) {
|
||||
warn "Not checking replica lag on " . $slave_cxn->name()
|
||||
. " because it is a cluster node.\n";
|
||||
0;
|
||||
@@ -10689,6 +10742,35 @@ can try something like the following:
|
||||
SET boundaries = COALESCE(CONCAT('id BETWEEN ', lower_boundary,
|
||||
' AND ', upper_boundary), '1=1');
|
||||
|
||||
=head1 Percona XtraDB Cluster
|
||||
|
||||
pt-table-checksum works with Percona XtraDB Cluster (PXC) 5.5.27-23.6 and newer,
|
||||
but only the following PXC setups are supported, all of which require that
|
||||
you use the C<dsn> method for L<"--recursion-method"> to specify cluster nodes.
|
||||
Also, the lag check (see L<"REPLICA CHECKS">) is not performed for cluster
|
||||
nodes.
|
||||
|
||||
=over
|
||||
|
||||
=item Single cluster
|
||||
|
||||
The simplest PXC setup is a single cluster: all servers are cluster nodes,
|
||||
and there are no regular replicas. If all nodes are specified in the
|
||||
DSN table (see L<"--recursion-method">), then you can run the tool on any
|
||||
node and any diffs on any other nodes will be detected.
|
||||
|
||||
=item Single cluster with a replica
|
||||
|
||||
Any cluster node can also be a regular master and replicate to a regular
|
||||
replica. The tool can only detect diffs on a replica if you run it on
|
||||
the "master node"
|
||||
|
||||
Mixed replication setups are not currently supported. For example, the tool
|
||||
does not work completely if the master host is replicating to a cluster,
|
||||
or if the cluster is replicating to another cluster. In short, the only
|
||||
supported setup is a single cluster with nodes optionally having traditional
|
||||
replication slaves.
|
||||
|
||||
=head1 OUTPUT
|
||||
|
||||
The tool prints tabular results, one line per table:
|
||||
@@ -11669,24 +11751,10 @@ past any replicas using row-based replication that are masters for
|
||||
further replicas.
|
||||
|
||||
The tool automatically checks the C<binlog_format> on all servers.
|
||||
See L<"--[no]check-binlog-format">
|
||||
See L<"--[no]check-binlog-format"> .
|
||||
|
||||
(L<Bug 899415|https://bugs.launchpad.net/percona-toolkit/+bug/899415>)
|
||||
|
||||
=item Percona XtraDB Cluster
|
||||
|
||||
pt-table-checksum works with Percona XtraDB Cluster 5.5.27-23.6 and newer.
|
||||
The C<dsn> method for L<"--recursion-method"> must be used to specify cluster
|
||||
nodes and regular replicas because nodes are not regular replicas so they
|
||||
cannot be detected automatically. The lag check (see L<"REPLICA CHECKS">)
|
||||
is not performed for cluster nodes.
|
||||
|
||||
Mixed replication setups are not currently supported. For example, the tool
|
||||
does not work completely if the master host is replicating to a cluster,
|
||||
or if the cluster is replicating to another cluster. In short, the only
|
||||
supported setup is a single cluster with nodes optionally having traditional
|
||||
replication slaves.
|
||||
|
||||
=back
|
||||
|
||||
=head1 BUGS
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# This program is copyright 2011 Percona Inc.
|
||||
# This program is copyright 2012 Percona Inc.
|
||||
# Feedback and improvements are welcome.
|
||||
#
|
||||
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
@@ -19,104 +19,60 @@
|
||||
# ###########################################################################
|
||||
{
|
||||
# Package: Percona::XtraDB::Cluster
|
||||
# Percona::XtraDB::Cluster has helper methods to deal with Percona XtraDB Cluster
|
||||
# based servers
|
||||
|
||||
# Helper methods for dealing with Percona XtraDB Cluster nodes.
|
||||
package Percona::XtraDB::Cluster;
|
||||
use Mo;
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
||||
|
||||
use Mo;
|
||||
use Data::Dumper;
|
||||
|
||||
sub get_cluster_name {
|
||||
my ($self, $cxn) = @_;
|
||||
my $sql = "SHOW VARIABLES LIKE 'wsrep\_cluster\_name'";
|
||||
PTDEBUG && _d($cxn->name, $sql);
|
||||
my (undef, $cluster_name) = $cxn->dbh->selectrow_array($sql);
|
||||
return $cluster_name;
|
||||
}
|
||||
|
||||
sub is_cluster_node {
|
||||
my ($self, $cxn) = @_;
|
||||
return $self->{is_cluster_node}->{$cxn} if defined $self->{is_cluster_node}->{$cxn};
|
||||
|
||||
my $sql = "SHOW VARIABLES LIKE 'wsrep_on'";
|
||||
PTDEBUG && _d($sql);
|
||||
my $sql = "SHOW VARIABLES LIKE 'wsrep\_on'";
|
||||
PTDEBUG && _d($cxn->name, $sql);
|
||||
my $row = $cxn->dbh->selectrow_arrayref($sql);
|
||||
PTDEBUG && _d(defined $row ? @$row : 'undef');
|
||||
$self->{is_cluster_node}->{$cxn} = $row && $row->[1]
|
||||
? ($row->[1] eq 'ON' || $row->[1] eq '1')
|
||||
: 0;
|
||||
PTDEBUG && _d(Dumper($row));
|
||||
return unless $row && $row->[1] && ($row->[1] eq 'ON' || $row->[1] eq '1');
|
||||
|
||||
return $self->{is_cluster_node}->{$cxn};
|
||||
my $cluster_name = $self->get_cluster_name($cxn);
|
||||
return $cluster_name;
|
||||
}
|
||||
|
||||
sub same_node {
|
||||
my ($self, $cxn1, $cxn2) = @_;
|
||||
|
||||
my $sql = "SHOW VARIABLES LIKE 'wsrep\_sst\_receive\_address'";
|
||||
PTDEBUG && _d($cxn1->name, $sql);
|
||||
my (undef, $val1) = $cxn1->dbh->selectrow_array($sql);
|
||||
PTDEBUG && _d($cxn2->name, $sql);
|
||||
my (undef, $val2) = $cxn2->dbh->selectrow_array($sql);
|
||||
|
||||
return ($val1 || '') eq ($val2 || '');
|
||||
}
|
||||
|
||||
sub same_cluster {
|
||||
my ($self, $cxn1, $cxn2) = @_;
|
||||
return unless $self->is_cluster_node($cxn1) && $self->is_cluster_node($cxn2);
|
||||
return if $self->is_master_of($cxn1, $cxn2) || $self->is_master_of($cxn2, $cxn1);
|
||||
|
||||
my $sql = q{SHOW VARIABLES LIKE 'wsrep_cluster_name'};
|
||||
PTDEBUG && _d($sql);
|
||||
my (undef, $row) = $cxn1->dbh->selectrow_array($sql);
|
||||
my (undef, $cxn2_row) = $cxn2->dbh->selectrow_array($sql);
|
||||
# They can't be the same cluster if one of them isn't in a cluster.
|
||||
return 0 if !$self->is_cluster_node($cxn1) || !$self->is_cluster_node($cxn2);
|
||||
|
||||
return unless $row eq $cxn2_row;
|
||||
my $cluster1 = $self->get_cluster_name($cxn1);
|
||||
my $cluster2 = $self->get_cluster_name($cxn2);
|
||||
|
||||
# Now it becomes tricky. Ostensibly clusters shouldn't have the
|
||||
# same name, but tell that to the world.
|
||||
$sql = q{SHOW VARIABLES LIKE 'wsrep_cluster_address'};
|
||||
PTDEBUG && _d($sql);
|
||||
my (undef, $addr) = $cxn1->dbh->selectrow_array($sql);
|
||||
my (undef, $cxn2_addr) = $cxn2->dbh->selectrow_array($sql);
|
||||
|
||||
# If they both have gcomm://, then they are both the first
|
||||
# node of a cluster, so they can't be in the same one.
|
||||
return if $addr eq 'gcomm://' && $cxn2_addr eq 'gcomm://';
|
||||
|
||||
if ( $addr eq 'gcomm://' ) {
|
||||
$addr = $self->_find_full_gcomm_addr($cxn1->dbh);
|
||||
}
|
||||
elsif ( $cxn2_addr eq 'gcomm://' ) {
|
||||
$cxn2_addr = $self->_find_full_gcomm_addr($cxn2->dbh);
|
||||
}
|
||||
|
||||
# Meanwhile, if they have the same address, then
|
||||
# they are definitely part of the same cluster
|
||||
return 1 if lc($addr) eq lc($cxn2_addr);
|
||||
|
||||
# However, this still leaves us with the issue that
|
||||
# the cluster addresses could look like this:
|
||||
# node1 -> node2, node2 -> node1,
|
||||
# or
|
||||
# node1 -> node2 addr,
|
||||
# node2 -> node3 addr,
|
||||
# node3 -> node1 addr,
|
||||
# TODO No clue what to do here
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub is_master_of {
|
||||
my ($self, $cxn1, $cxn2) = @_;
|
||||
|
||||
my $cxn2_dbh = $cxn2->dbh;
|
||||
my $sql = q{SHOW SLAVE STATUS};
|
||||
PTDEBUG && _d($sql);
|
||||
local $cxn2_dbh->{FetchHashKeyName} = 'NAME_lc';
|
||||
my $slave_status = $cxn2_dbh->selectrow_hashref($sql);
|
||||
return unless ref($slave_status) eq 'HASH';
|
||||
|
||||
my $port = $cxn1->dsn->{P};
|
||||
return unless $slave_status->{master_port} eq $port;
|
||||
return 1 if $cxn1->dsn->{h} eq $slave_status->{master_host};
|
||||
|
||||
# They might be the same but in different format
|
||||
my $host = scalar gethostbyname($cxn1->dsn->{h});
|
||||
my $master_host = scalar gethostbyname($slave_status->{master_host});
|
||||
return 1 if $master_host eq $host;
|
||||
return;
|
||||
}
|
||||
|
||||
sub _find_full_gcomm_addr {
|
||||
my ($self, $dbh) = @_;
|
||||
|
||||
my $sql = q{SHOW VARIABLES LIKE 'wsrep_provider_options'};
|
||||
PTDEBUG && _d($sql);
|
||||
my (undef, $provider_opts) = $dbh->selectrow_array($sql);
|
||||
my ($prov_addr) = $provider_opts =~ m{\Qgmcast.listen_addr\E\s*=\s*tcp://([^:]+:[0-9]+)\s*;}i;
|
||||
my $full_gcomm = "gcomm://$prov_addr";
|
||||
PTDEBUG && _d("gcomm address: ", $full_gcomm);
|
||||
return $full_gcomm;
|
||||
return ($cluster1 || '') eq ($cluster2 || '');
|
||||
}
|
||||
|
||||
sub _d {
|
||||
@@ -129,3 +85,6 @@ sub _d {
|
||||
|
||||
1;
|
||||
}
|
||||
# ###########################################################################
|
||||
# End Percona::XtraDB::Cluster package
|
||||
# ###########################################################################
|
||||
|
128
lib/Sandbox.pm
128
lib/Sandbox.pm
@@ -53,6 +53,20 @@ my %port_for = (
|
||||
master4 => 2901,
|
||||
master5 => 2902,
|
||||
master6 => 2903,
|
||||
node1 => 12345, # pxc...
|
||||
node2 => 12346,
|
||||
node3 => 12347,
|
||||
node4 => 2900,
|
||||
node5 => 2901,
|
||||
node6 => 2902,
|
||||
cmaster => 12349, # master -> cluster
|
||||
cslave1 => 12348, # cluster -> slave
|
||||
);
|
||||
|
||||
my %server_type = (
|
||||
master => 1,
|
||||
slave => 1,
|
||||
node => 1,
|
||||
);
|
||||
|
||||
my $test_dbs = qr/^(?:mysql|information_schema|sakila|performance_schema|percona_test)$/;
|
||||
@@ -315,9 +329,9 @@ sub ok {
|
||||
|
||||
# Dings a heartbeat on the master, and waits until the slave catches up fully.
|
||||
sub wait_for_slaves {
|
||||
my $self = shift;
|
||||
my ($self, $slave) = @_;
|
||||
my $master_dbh = $self->get_dbh_for('master');
|
||||
my $slave2_dbh = $self->get_dbh_for('slave2');
|
||||
my $slave2_dbh = $self->get_dbh_for($slave || 'slave2');
|
||||
my ($ping) = $master_dbh->selectrow_array("SELECT MD5(RAND())");
|
||||
$master_dbh->do("UPDATE percona_test.sentinel SET ping='$ping' WHERE id=1");
|
||||
PerconaTest::wait_until(
|
||||
@@ -329,14 +343,6 @@ sub wait_for_slaves {
|
||||
);
|
||||
}
|
||||
|
||||
sub _d {
|
||||
my ($package, undef, $line) = caller 0;
|
||||
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
||||
map { defined $_ ? $_ : 'undef' }
|
||||
@_;
|
||||
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
||||
}
|
||||
|
||||
# Verifies that master, slave1, and slave2 have a faithful copy of the mysql and
|
||||
# sakila databases. The reference data is inserted into percona_test.checksums
|
||||
# by util/checksum-test-dataset when sandbox/test-env starts the environment.
|
||||
@@ -438,62 +444,84 @@ sub set_as_slave {
|
||||
}
|
||||
|
||||
sub start_sandbox {
|
||||
my ($self, $mode, $server, $master_server) = @_;
|
||||
my ($self, %args) = @_;
|
||||
my @required_args = qw(type server);
|
||||
foreach my $arg ( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
};
|
||||
my ($type, $server) = @args{@required_args};
|
||||
my $env = $args{env} || '';
|
||||
|
||||
die "Invalid server type: $type" unless $server_type{$type};
|
||||
_check_server($server);
|
||||
my $port = $port_for{$server};
|
||||
my $master_port = $master_server ? $port_for{$master_server} : '';
|
||||
my $out = `$trunk/sandbox/start-sandbox $mode $port $master_port`;
|
||||
|
||||
if ( $type eq 'master') {
|
||||
my $out = `$env $trunk/sandbox/start-sandbox $type $port >/dev/null`;
|
||||
die $out if $CHILD_ERROR;
|
||||
return $out;
|
||||
}
|
||||
elsif ( $type eq 'slave' ) {
|
||||
die "I need a slave arg" unless $args{master};
|
||||
_check_server($args{master});
|
||||
my $master_port = $port_for{$args{master}};
|
||||
|
||||
my $out = `$env $trunk/sandbox/start-sandbox $type $port $master_port >/dev/null`;
|
||||
die $out if $CHILD_ERROR;
|
||||
}
|
||||
elsif ( $type eq 'node' ) {
|
||||
my $first_node = $args{first_node} ? $port_for{$args{first_node}} : '';
|
||||
my $out = `$env $trunk/sandbox/start-sandbox cluster $port $first_node >/dev/null`;
|
||||
die $out if $CHILD_ERROR;
|
||||
}
|
||||
|
||||
my $dbh = $self->get_dbh_for($server, $args{cxn_opts});
|
||||
my $dsn = $self->dsn_for($server);
|
||||
|
||||
return $dbh, $dsn;
|
||||
}
|
||||
|
||||
sub stop_sandbox {
|
||||
my ($self, @sandboxes) = @_;
|
||||
my @ports = @port_for{@sandboxes};
|
||||
my $out = `$trunk/sandbox/stop-sandbox @ports`;
|
||||
my $out = `$trunk/sandbox/stop-sandbox @ports >/dev/null`;
|
||||
die $out if $CHILD_ERROR;
|
||||
return $out;
|
||||
}
|
||||
|
||||
sub start_cluster {
|
||||
my ($self, %args) = @_;
|
||||
my $cluster_size = $args{cluster_size} || 3;
|
||||
my @required_args = qw(nodes);
|
||||
foreach my $arg ( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
};
|
||||
my ($nodes) = @args{@required_args};
|
||||
|
||||
my $out = '';
|
||||
|
||||
my ($node1, @nodes) = map {
|
||||
my $node_name = "node$_";
|
||||
$node_name = "_$node_name" while exists $port_for{$node_name};
|
||||
$port_for{$node_name} = $self->_get_unused_port();
|
||||
$node_name
|
||||
} 1..$cluster_size;
|
||||
|
||||
local $ENV{CLUSTER_NAME} = $args{cluster_name} if $args{cluster_name};
|
||||
$self->start_sandbox("cluster", $node1);
|
||||
for my $node ( @nodes ) {
|
||||
$self->start_sandbox("cluster", $node, $node1);
|
||||
foreach my $node ( @$nodes ) {
|
||||
_check_server($node);
|
||||
}
|
||||
|
||||
return ($node1, @nodes);
|
||||
}
|
||||
Test::More::diag("Starting cluster with @$nodes");
|
||||
my %connect;
|
||||
|
||||
# Lifted from Nginx::Test on CPAN
|
||||
sub _get_unused_port {
|
||||
my $port = 50000 + int (rand() * 5000);
|
||||
my $first_node = shift @$nodes;
|
||||
my ($dbh, $dsn) = $self->start_sandbox(
|
||||
type => "node",
|
||||
server => $first_node,
|
||||
env => $args{env},
|
||||
);
|
||||
$connect{$first_node} = { dbh => $dbh, dsn => $dsn };
|
||||
|
||||
while ($port++ < 64000) {
|
||||
my $sock = IO::Socket::INET->new (
|
||||
Listen => 5,
|
||||
LocalAddr => '127.0.0.1',
|
||||
LocalPort => $port,
|
||||
Proto => 'tcp',
|
||||
ReuseAddr => 1
|
||||
) or next;
|
||||
|
||||
$sock->close;
|
||||
return $port;
|
||||
foreach my $node ( @$nodes ) {
|
||||
my ($dbh, $dsn) = $self->start_sandbox(
|
||||
server => $node,
|
||||
type => "node",
|
||||
first_node => $first_node,
|
||||
env => $args{env},
|
||||
);
|
||||
$connect{$node} = { dbh => $dbh, dsn => $dsn };
|
||||
}
|
||||
|
||||
die "Cannot find an open port";
|
||||
return \%connect;
|
||||
}
|
||||
|
||||
sub port_for {
|
||||
@@ -501,6 +529,14 @@ sub port_for {
|
||||
return $port_for{$server};
|
||||
}
|
||||
|
||||
sub _d {
|
||||
my ($package, undef, $line) = caller 0;
|
||||
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
||||
map { defined $_ ? $_ : 'undef' }
|
||||
@_;
|
||||
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
||||
}
|
||||
|
||||
1;
|
||||
}
|
||||
# ###########################################################################
|
||||
|
@@ -11,7 +11,6 @@ use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use Test::More;
|
||||
use Data::Dumper;
|
||||
use File::Spec::Functions;
|
||||
|
||||
# Hostnames make testing less accurate. Tests need to see
|
||||
# that such-and-such happened on specific slave hosts, but
|
||||
@@ -28,21 +27,29 @@ require VersionParser;
|
||||
|
||||
my $dp = new DSNParser(opts=>$dsn_opts);
|
||||
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
|
||||
my $master_dbh = $sb->get_dbh_for('master');
|
||||
my $node1 = $sb->get_dbh_for('node1');
|
||||
my $node2 = $sb->get_dbh_for('node2');
|
||||
my $node3 = $sb->get_dbh_for('node3');
|
||||
|
||||
my $db_flavor = VersionParser->new($master_dbh)->flavor();
|
||||
my $db_flavor = VersionParser->new($node1)->flavor();
|
||||
|
||||
if ( !$master_dbh ) {
|
||||
plan skip_all => 'Cannot connect to sandbox master';
|
||||
}
|
||||
elsif ( $db_flavor !~ /XtraDB Cluster/ ) {
|
||||
if ( $db_flavor !~ /XtraDB Cluster/ ) {
|
||||
plan skip_all => "PXC tests";
|
||||
}
|
||||
elsif ( !$node1 ) {
|
||||
plan skip_all => 'Cannot connect to cluster node1';
|
||||
}
|
||||
elsif ( !$node2 ) {
|
||||
plan skip_all => 'Cannot connect to cluster node2';
|
||||
}
|
||||
elsif ( !$node3 ) {
|
||||
plan skip_all => 'Cannot connect to cluster node3';
|
||||
}
|
||||
|
||||
# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic
|
||||
# so we need to specify --lock-wait-timeout=3 else the tool will die.
|
||||
my $master_dsn = 'h=127.1,P=12345,u=msandbox,p=msandbox';
|
||||
my @args = ($master_dsn, qw(--lock-wait-timeout 3));
|
||||
my $node1_dsn = $sb->dsn_for('node1');
|
||||
my @args = ($node1_dsn, qw(--lock-wait-timeout 3));
|
||||
my $output;
|
||||
my $exit_status;
|
||||
my $sample = "t/pt-table-checksum/samples/";
|
||||
@@ -52,191 +59,545 @@ my $sample = "t/pt-table-checksum/samples/";
|
||||
# https://bugs.launchpad.net/percona-toolkit/+bug/1062563
|
||||
# #############################################################################
|
||||
|
||||
sub make_dbh_differ {
|
||||
my ($dbh, @vals) = @_;
|
||||
@vals = (@vals ? @vals : 1);
|
||||
# Make them differ...
|
||||
$dbh->do("DROP DATABASE IF EXISTS bug_1062563");
|
||||
$dbh->do("CREATE DATABASE bug_1062563");
|
||||
$dbh->do("CREATE TABLE bug_1062563.ptc_pxc (i int)");
|
||||
# #############################################################################
|
||||
# Check just a cluster
|
||||
# #############################################################################
|
||||
|
||||
# Now make this node different from the rest
|
||||
$dbh->do("set sql_log_bin=0");
|
||||
$dbh->do("INSERT INTO bug_1062563.ptc_pxc (i) VALUES ($_)") for @vals;
|
||||
$dbh->do("set sql_log_bin=1");
|
||||
}
|
||||
# This DSN table has node2 and node3 (12346 and 12347) but not node1 (12345)
|
||||
# because it was originally created for traditional setups which require only
|
||||
# slave DSNs, but the DSN table for a PXC setup can/should contain DSNs for
|
||||
# all nodes so the user can run pxc on any node and find all the others.
|
||||
$sb->load_file('node1', "$sample/dsn-table.sql");
|
||||
$node1->do(qq/INSERT INTO dsns.dsns VALUES (1, 1, '$node1_dsn')/);
|
||||
|
||||
diag("Creating a 5-node PXC cluster...");
|
||||
my @nodes = $sb->start_cluster(cluster_size => 5);
|
||||
diag("Nodes: ", Dumper( { map { $_ => $sb->port_for($_) } @nodes } ));
|
||||
|
||||
my $node2 = $nodes[1];
|
||||
my $node2_dbh = $sb->get_dbh_for($node2);
|
||||
|
||||
my $node2_slave = "master3";
|
||||
|
||||
diag("Creating a slave for $node2...");
|
||||
{
|
||||
local $ENV{BINLOG_FORMAT} = 'ROW';
|
||||
diag($sb->start_sandbox("slave", $node2_slave, $node2));
|
||||
}
|
||||
my $node_slave_dbh = $sb->get_dbh_for($node2_slave);
|
||||
|
||||
make_dbh_differ($node2_dbh);
|
||||
|
||||
# And make its slave differ as well
|
||||
PerconaTest::wait_for_table($sb->get_dbh_for($nodes[-1]), "bug_1062563.ptc_pxc");
|
||||
PerconaTest::wait_for_table($node_slave_dbh, "bug_1062563.ptc_pxc");
|
||||
$node_slave_dbh->do("INSERT INTO bug_1062563.ptc_pxc (i) VALUES ($_)") for 3, 4;
|
||||
|
||||
my $dsns_table_sql = catfile(qw(t lib samples MasterSlave dsn_table.sql));
|
||||
$sb->load_file($node2, $dsns_table_sql, undef, no_wait => 1);
|
||||
$node2_dbh->do("DELETE FROM dsn_t.dsns"); # Delete 12346
|
||||
my $sth = $node2_dbh->prepare("INSERT INTO dsn_t.dsns VALUES (null, null, ?)");
|
||||
for my $dsn ( map { $sb->dsn_for($_) } @nodes[0,2..$#nodes], $node2_slave ) {
|
||||
$sth->execute($dsn);
|
||||
}
|
||||
|
||||
my $node2_dsn = $sb->dsn_for($node2);
|
||||
# First a little test to make sure the tool detects and bails out
|
||||
# if no other cluster nodes are detected, in which case the user
|
||||
# probably didn't specifying --recursion-method dsn.
|
||||
$output = output(
|
||||
sub { pt_table_checksum::main(
|
||||
$node2_dsn, qw(--lock-wait-timeout 3),
|
||||
qw(-d bug_1062563),
|
||||
'--recursion-method', "dsn=D=dsn_t,t=dsns"
|
||||
) },
|
||||
sub { pt_table_checksum::main(@args) },
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'diffs'),
|
||||
1,
|
||||
"Bug 1062563: Detects diffs between PXC nodes"
|
||||
) or diag($output);
|
||||
|
||||
my @cluster_nodes = $output =~ /(because it is a cluster node)/g;
|
||||
is(
|
||||
scalar(@cluster_nodes),
|
||||
4,
|
||||
"Skips all the cluster nodes in the dsns table"
|
||||
) or diag($output);
|
||||
|
||||
# Now try with just the slave
|
||||
|
||||
$node2_dbh->do("DELETE FROM dsn_t.dsns");
|
||||
$sth->execute($sb->dsn_for($node2_slave));
|
||||
|
||||
$output = output(
|
||||
sub { pt_table_checksum::main(
|
||||
$node2_dsn, qw(--lock-wait-timeout 3),
|
||||
qw(--chunk-size 1),
|
||||
qw(-d bug_1062563),
|
||||
'--recursion-method', "dsn=D=dsn_t,t=dsns"
|
||||
) },
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'diffs'),
|
||||
1,
|
||||
"Bug 1062563: Detects diffs on slaves where the master is a PXC node"
|
||||
) or diag($output);
|
||||
|
||||
$sth->finish();
|
||||
diag("Stopping the PXC cluster and the slave...");
|
||||
$sb->stop_sandbox($node2_slave, @nodes);
|
||||
|
||||
# Now checking that cluster -> cluster works
|
||||
|
||||
diag("Creating two 3-node clusters...");
|
||||
my @cluster1 = $sb->start_cluster(cluster_size => 3, cluster_name => "pt_test_cluster_1");
|
||||
my @cluster2 = $sb->start_cluster(cluster_size => 3, cluster_name => "pt_test_cluster_2");
|
||||
diag("Cluster 1: ", Dumper( { map { $_ => $sb->port_for($_) } @cluster1 } ));
|
||||
diag("Cluster 2: ", Dumper( { map { $_ => $sb->port_for($_) } @cluster2 } ));
|
||||
|
||||
$sb->set_as_slave($cluster2[0], $cluster1[0]);
|
||||
|
||||
my $cluster1_dbh = $sb->get_dbh_for($cluster1[0]);
|
||||
my $cluster2_dbh = $sb->get_dbh_for($cluster2[0]);
|
||||
make_dbh_differ($cluster1_dbh);
|
||||
|
||||
# And make its slave differ as well
|
||||
PerconaTest::wait_for_table($sb->get_dbh_for($cluster2[-1]), "bug_1062563.ptc_pxc");
|
||||
PerconaTest::wait_for_table($sb->get_dbh_for($cluster1[-1]), "bug_1062563.ptc_pxc");
|
||||
PerconaTest::wait_for_table($cluster2_dbh, "bug_1062563.ptc_pxc");
|
||||
$cluster2_dbh->do("INSERT INTO bug_1062563.ptc_pxc (i) VALUES ($_)") for 3, 4;
|
||||
|
||||
$dsns_table_sql = catfile(qw(t lib samples MasterSlave dsn_table.sql));
|
||||
$sb->load_file($cluster1[0], $dsns_table_sql, undef, no_wait => 1);
|
||||
$cluster1_dbh->do("DELETE FROM dsn_t.dsns"); # Delete 12346
|
||||
$sth = $cluster1_dbh->prepare("INSERT INTO dsn_t.dsns VALUES (null, null, ?)");
|
||||
for my $dsn ( map { $sb->dsn_for($_) } @cluster1[1..$#cluster1], $cluster2[0] ) {
|
||||
$sth->execute($dsn);
|
||||
}
|
||||
$sth->finish();
|
||||
|
||||
my $cluster1_dsn = $sb->dsn_for($cluster1[0]);
|
||||
$output = output(
|
||||
sub { pt_table_checksum::main(
|
||||
$cluster1_dsn, qw(--lock-wait-timeout 3),
|
||||
qw(-d bug_1062563),
|
||||
'--recursion-method', "dsn=D=dsn_t,t=dsns"
|
||||
) },
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'diffs'),
|
||||
1,
|
||||
"Bug 1062563: Detects diffs between PXC nodes when cluster -> cluster"
|
||||
) or diag($output);
|
||||
|
||||
like(
|
||||
$output,
|
||||
qr/is a cluster node, but doesn't belong to the same cluster as/, #'
|
||||
"Shows a warning when cluster -> cluster"
|
||||
qr/h=127.1,P=12345 is a cluster node but no other nodes/,
|
||||
"Dies if no other nodes are found"
|
||||
);
|
||||
|
||||
$output = output(
|
||||
sub { pt_table_checksum::main(@args,
|
||||
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns")
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'errors'),
|
||||
0,
|
||||
"No diffs: no errors"
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'skipped'),
|
||||
0,
|
||||
"No diffs: no skips"
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'diffs'),
|
||||
0,
|
||||
"No diffs: no diffs"
|
||||
);
|
||||
|
||||
# Now really test checksumming a cluster. To create a diff we have to disable
|
||||
# the binlog. Although PXC doesn't need or use the binlog to communicate
|
||||
# (it has its own broadcast-based protocol implemented via the Galera lib)
|
||||
# it still respects sql_log_bin, so we can make a change on one node without
|
||||
# affecting the others.
|
||||
$sb->load_file('node1', "$sample/a-z.sql");
|
||||
$node2->do("set sql_log_bin=0");
|
||||
$node2->do("update test.t set c='zebra' where c='z'");
|
||||
$node2->do("set sql_log_bin=1");
|
||||
|
||||
my ($row) = $node2->selectrow_array("select c from test.t order by c desc limit 1");
|
||||
is(
|
||||
$row,
|
||||
"zebra",
|
||||
"Node2 is changed"
|
||||
);
|
||||
|
||||
($row) = $node1->selectrow_array("select c from test.t order by c desc limit 1");
|
||||
is(
|
||||
$row,
|
||||
"z",
|
||||
"Node1 not changed"
|
||||
);
|
||||
|
||||
($row) = $node3->selectrow_array("select c from test.t order by c desc limit 1");
|
||||
is(
|
||||
$row,
|
||||
"z",
|
||||
"Node3 not changed"
|
||||
);
|
||||
|
||||
$output = output(
|
||||
sub { pt_table_checksum::main(@args,
|
||||
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns")
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'errors'),
|
||||
0,
|
||||
"1 diff: no errors"
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'skipped'),
|
||||
0,
|
||||
"1 diff: no skips"
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'diffs'),
|
||||
1,
|
||||
"1 diff: 1 diff"
|
||||
) or diag($output);
|
||||
|
||||
diag("Starting master1...");
|
||||
$sb->start_sandbox("master", "master1");
|
||||
diag("Setting it as master of a node in the first cluster");
|
||||
$sb->set_as_slave($cluster1[0], "master1");
|
||||
# 11-17T13:02:54 0 1 26 1 0 0.021 test.t
|
||||
like(
|
||||
$output,
|
||||
qr/^\S+\s+ # ts
|
||||
0\s+ # errors
|
||||
1\s+ # diffs
|
||||
26\s+ # rows
|
||||
\d+\s+ # chunks
|
||||
0\s+ # skipped
|
||||
\S+\s+ # time
|
||||
test.t$ # table
|
||||
/xm,
|
||||
"1 diff: it's in test.t"
|
||||
);
|
||||
|
||||
my $master1_dbh = $sb->get_dbh_for("master1");
|
||||
make_dbh_differ($master1_dbh, 10..50);
|
||||
# #############################################################################
|
||||
# cluster, node1 -> slave, run on node1
|
||||
# #############################################################################
|
||||
|
||||
my $master1_dsn = $sb->dsn_for("master1");
|
||||
my ($slave_dbh, $slave_dsn) = $sb->start_sandbox(
|
||||
server => 'cslave1',
|
||||
type => 'slave',
|
||||
master => 'node1',
|
||||
env => q/BINLOG_FORMAT="ROW"/,
|
||||
);
|
||||
|
||||
# Add the slave to the DSN table.
|
||||
$node1->do(qq/INSERT INTO dsns.dsns VALUES (4, 3, '$slave_dsn')/);
|
||||
|
||||
# Fix what we changed earlier on node2 so the cluster is consistent.
|
||||
$node2->do("set sql_log_bin=0");
|
||||
$node2->do("update test.t set c='z' where c='zebra'");
|
||||
$node2->do("set sql_log_bin=1");
|
||||
|
||||
# Wait for the slave to apply the binlogs from node1 (its master).
|
||||
# Then change it so it's not consistent.
|
||||
PerconaTest::wait_for_table($slave_dbh, 'test.t');
|
||||
$sb->wait_for_slaves('cslave1');
|
||||
$slave_dbh->do("update test.t set c='zebra' where c='z'");
|
||||
|
||||
# Another quick test first: the tool should complain about the slave's
|
||||
# binlog format but only the slave's, not the cluster nodes:
|
||||
# https://bugs.launchpad.net/percona-toolkit/+bug/1080385
|
||||
# Cluster nodes default to ROW format because that's what Galeara
|
||||
# works best with, even though it doesn't really use binlogs.
|
||||
$output = output(
|
||||
sub { pt_table_checksum::main(
|
||||
$master1_dsn, qw(--lock-wait-timeout 3),
|
||||
qw(-d bug_1062563),
|
||||
) },
|
||||
sub { pt_table_checksum::main(@args,
|
||||
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns")
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
like(
|
||||
$output,
|
||||
qr/replica h=127.1,P=12348 has binlog_format ROW/,
|
||||
"--check-binlog-format warns about slave's binlog format"
|
||||
);
|
||||
|
||||
# Now really test that diffs on the slave are detected.
|
||||
$output = output(
|
||||
sub { pt_table_checksum::main(@args,
|
||||
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns",
|
||||
qw(--no-check-binlog-format)),
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'diffs'),
|
||||
1,
|
||||
"Bug 1062563: Detects diffs when master -> cluster"
|
||||
"Detects diffs on slave of cluster node1"
|
||||
) or diag($output);
|
||||
|
||||
$slave_dbh->disconnect;
|
||||
$sb->stop_sandbox('cslave1');
|
||||
|
||||
# #############################################################################
|
||||
# cluster, node2 -> slave, run on node1
|
||||
#
|
||||
# Does not work because we only set binglog_format=STATEMENT on node1 which
|
||||
# does not affect other nodes, so node2 gets checksum queries in STATEMENT
|
||||
# format, executes them, but then logs the results in ROW format (since ROW
|
||||
# format is the default for cluster nodes) which doesn't work on the slave
|
||||
# (i.e. the slave doesn't execute the query). So any diffs on the slave are
|
||||
# not detected.
|
||||
# #############################################################################
|
||||
|
||||
($slave_dbh, $slave_dsn) = $sb->start_sandbox(
|
||||
server => 'cslave1',
|
||||
type => 'slave',
|
||||
master => 'node2',
|
||||
env => q/BINLOG_FORMAT="ROW"/,
|
||||
);
|
||||
|
||||
# Wait for the slave to apply the binlogs from node2 (its master).
|
||||
# Then change it so it's not consistent.
|
||||
PerconaTest::wait_for_table($slave_dbh, 'test.t');
|
||||
$sb->wait_for_slaves('cslave1');
|
||||
$slave_dbh->do("update test.t set c='zebra' where c='z'");
|
||||
|
||||
($row) = $slave_dbh->selectrow_array("select c from test.t order by c desc limit 1");
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'rows'),
|
||||
41,
|
||||
"Bug 1062563: Correct number of rows for master -> cluster"
|
||||
$row,
|
||||
"zebra",
|
||||
"Slave is changed"
|
||||
);
|
||||
|
||||
$output = output(
|
||||
sub { pt_table_checksum::main(@args,
|
||||
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns",
|
||||
qw(--no-check-binlog-format -d test)),
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'diffs'),
|
||||
0,
|
||||
"Limitation: does not detect diffs on slave of cluster node2"
|
||||
) or diag($output);
|
||||
|
||||
$slave_dbh->disconnect;
|
||||
$sb->stop_sandbox('cslave1');
|
||||
|
||||
# Restore the original DSN table.
|
||||
$node1->do(qq/DELETE FROM dsns.dsns WHERE id=4/);
|
||||
|
||||
# #############################################################################
|
||||
# master -> node1 in cluster, run on master
|
||||
# #############################################################################
|
||||
|
||||
my ($master_dbh, $master_dsn) = $sb->start_sandbox(
|
||||
server => 'cmaster',
|
||||
type => 'master',
|
||||
env => q/BINLOG_FORMAT="ROW"/,
|
||||
);
|
||||
|
||||
# CAREFUL: The master and the cluster are different, so we must load dbs on
|
||||
# the master then flush the logs, else node1 will apply the master's binlogs
|
||||
# and blow up because it already had these dbs.
|
||||
|
||||
# Remember: this DSN table only has node2 and node3 (12346 and 12347) which is
|
||||
# sufficient for this test.
|
||||
$sb->load_file('cmaster', "$sample/dsn-table.sql");
|
||||
|
||||
# We have to load a-z-cluster.sql else the pk id won'ts match because nodes use
|
||||
# auto-inc offsets but the master doesn't.
|
||||
$sb->load_file('cmaster', "$sample/a-z-cluster.sql");
|
||||
|
||||
$master_dbh->do("FLUSH LOGS");
|
||||
$master_dbh->do("RESET MASTER");
|
||||
|
||||
$sb->set_as_slave('node1', 'cmaster');
|
||||
|
||||
# Notice: no --recursion-method=dsn yet. Since node1 is a traditional slave
|
||||
# of the master, ptc should auto-detect it, which we'll test later by making
|
||||
# the slave differ.
|
||||
$output = output(
|
||||
sub { pt_table_checksum::main($master_dsn,
|
||||
qw(-d test))
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'errors'),
|
||||
0,
|
||||
"master->cluster no diffs: no errors"
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'skipped'),
|
||||
0,
|
||||
"master->cluster no diffs: no skips"
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'diffs'),
|
||||
0,
|
||||
"master->cluster no diffs: no diffs"
|
||||
) or diag($output);
|
||||
|
||||
# Make a diff on node1. If ptc is really auto-detecting node1, then it
|
||||
# should report this diff.
|
||||
$node1->do("set sql_log_bin=0");
|
||||
$node1->do("update test.t set c='zebra' where c='z'");
|
||||
$node1->do("set sql_log_bin=1");
|
||||
|
||||
$output = output(
|
||||
sub { pt_table_checksum::main($master_dsn,
|
||||
qw(-d test))
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'errors'),
|
||||
0,
|
||||
"master->cluster 1 diff: no errors"
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'skipped'),
|
||||
0,
|
||||
"master->cluster 1 diff: no skips"
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'diffs'),
|
||||
1,
|
||||
"master->cluster 1 diff: 1 diff"
|
||||
) or diag($output);
|
||||
|
||||
# 11-17T13:02:54 0 1 26 1 0 0.021 test.t
|
||||
like(
|
||||
$output,
|
||||
qr/^\S+\s+ # ts
|
||||
0\s+ # errors
|
||||
1\s+ # diffs
|
||||
26\s+ # rows
|
||||
\d+\s+ # chunks
|
||||
0\s+ # skipped
|
||||
\S+\s+ # time
|
||||
test.t$ # table
|
||||
/xm,
|
||||
"master->cluster 1 diff: it's in test.t"
|
||||
);
|
||||
|
||||
# Use the DSN table to check for diffs on node2 and node3. This works
|
||||
# because the diff is on node1 and node1 is the direct slave of the master,
|
||||
# so the checksum query will replicate from the master in STATEMENT format,
|
||||
# node1 will execute it, find the diff, then broadcast that result to all
|
||||
# other nodes. -- Remember: the DSN table on the master has node2 and node3.
|
||||
$output = output(
|
||||
sub { pt_table_checksum::main($master_dsn,
|
||||
'--recursion-method', "dsn=$master_dsn,D=dsns,t=dsns",
|
||||
qw(-d test))
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'errors'),
|
||||
0,
|
||||
"...check other nodes: no errors"
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'skipped'),
|
||||
0,
|
||||
"...check other nodes: no skips"
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'diffs'),
|
||||
1,
|
||||
"...check other nodes: 1 diff"
|
||||
) or diag($output);
|
||||
|
||||
# 11-17T13:02:54 0 1 26 1 0 0.021 test.t
|
||||
like(
|
||||
$output,
|
||||
qr/^\S+\s+ # ts
|
||||
0\s+ # errors
|
||||
1\s+ # diffs
|
||||
26\s+ # rows
|
||||
\d+\s+ # chunks
|
||||
0\s+ # skipped
|
||||
\S+\s+ # time
|
||||
test.t$ # table
|
||||
/xm,
|
||||
"...check other nodes: it's in test.t"
|
||||
);
|
||||
|
||||
like(
|
||||
$output,
|
||||
qr/is a cluster node, but .*? is not. This is not currently supported/,
|
||||
"Shows a warning when master -> cluster"
|
||||
qr/the direct replica of h=127.1,P=12349 was not found or specified/,
|
||||
"Warns that direct replica of the master isn't found or specified",
|
||||
);
|
||||
|
||||
# Use the other DSN table with all three nodes. Now the tool should
|
||||
# give a more specific warning than that ^.
|
||||
$output = output(
|
||||
sub { pt_table_checksum::main($master_dsn,
|
||||
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns",
|
||||
qw(-d test))
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'diffs'),
|
||||
1,
|
||||
"...check all nodes: 1 diff"
|
||||
) or diag($output);
|
||||
|
||||
diag("Stopping both clusters and master1...");
|
||||
$sb->stop_sandbox(@cluster1, @cluster2, "master1");
|
||||
# 11-17T13:02:54 0 1 26 1 0 0.021 test.t
|
||||
like(
|
||||
$output,
|
||||
qr/^\S+\s+ # ts
|
||||
0\s+ # errors
|
||||
1\s+ # diffs
|
||||
26\s+ # rows
|
||||
\d+\s+ # chunks
|
||||
0\s+ # skipped
|
||||
\S+\s+ # time
|
||||
test.t$ # table
|
||||
/xm,
|
||||
"...check all nodes: it's in test.t"
|
||||
);
|
||||
|
||||
like(
|
||||
$output,
|
||||
qr/Diffs will only be detected if the cluster is consistent with h=127.1,P=12345 because h=127.1,P=12349/,
|
||||
"Warns that diffs only detected if cluster consistent with direct replica",
|
||||
);
|
||||
|
||||
# Restore node1 so the cluster is consistent, but then make node2 differ.
|
||||
# ptc should NOT detect this diff because the checksum query will replicate
|
||||
# to node1, node1 isn't different, so it broadcasts the result in ROW format
|
||||
# that all is ok, which node2 gets and thus false reports. This is why
|
||||
# those ^ warnings exist.
|
||||
$node1->do("set sql_log_bin=0");
|
||||
$node1->do("update test.t set c='z' where c='zebra'");
|
||||
$node1->do("set sql_log_bin=1");
|
||||
|
||||
$node2->do("set sql_log_bin=0");
|
||||
$node2->do("update test.t set c='zebra' where c='z'");
|
||||
$node2->do("set sql_log_bin=1");
|
||||
|
||||
($row) = $node2->selectrow_array("select c from test.t order by c desc limit 1");
|
||||
is(
|
||||
$row,
|
||||
"zebra",
|
||||
"Node2 is changed again"
|
||||
);
|
||||
|
||||
($row) = $node1->selectrow_array("select c from test.t order by c desc limit 1");
|
||||
is(
|
||||
$row,
|
||||
"z",
|
||||
"Node1 not changed again"
|
||||
);
|
||||
|
||||
($row) = $node3->selectrow_array("select c from test.t order by c desc limit 1");
|
||||
is(
|
||||
$row,
|
||||
"z",
|
||||
"Node3 not changed again"
|
||||
);
|
||||
|
||||
# the other DSN table with all three nodes, but it won't matter because
|
||||
# node1 is going to broadcast the false-positive that there are no diffs.
|
||||
$output = output(
|
||||
sub { pt_table_checksum::main($master_dsn,
|
||||
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns",
|
||||
qw(-d test))
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
is(
|
||||
PerconaTest::count_checksum_results($output, 'diffs'),
|
||||
0,
|
||||
"Limitation: diff not on direct replica not detected"
|
||||
) or diag($output);
|
||||
|
||||
# ###########################################################################
|
||||
# Be sure to stop the slave on node1, else further test will die with:
|
||||
# Failed to execute -e "change master to master_host='127.0.0.1',
|
||||
# master_user='msandbox', master_password='msandbox', master_port=12349"
|
||||
# on node1: ERROR 1198 (HY000) at line 1: This operation cannot be performed
|
||||
# with a running slave; run STOP SLAVE first
|
||||
# ###########################################################################
|
||||
$master_dbh->disconnect;
|
||||
$sb->stop_sandbox('cmaster');
|
||||
$node1->do("STOP SLAVE");
|
||||
$node1->do("RESET SLAVE");
|
||||
|
||||
# #############################################################################
|
||||
# cluster -> cluster
|
||||
#
|
||||
# This is not supported. The link between the two clusters is probably
|
||||
# a traditional MySQL replication setup in ROW format, so any checksum
|
||||
# results will be lost across it.
|
||||
# #############################################################################
|
||||
|
||||
my $c = $sb->start_cluster(
|
||||
nodes => [qw(node4 node5 node6)],
|
||||
env => q/CLUSTER_NAME="cluster2"/,
|
||||
);
|
||||
|
||||
# Load the same db just in case this does work (it shouldn't), then there
|
||||
# will be normal results instead of an error because the db is missing.
|
||||
$sb->load_file('node4', "$sample/a-z.sql");
|
||||
|
||||
# Add node4 in the cluster2 to the DSN table.
|
||||
$node1->do(qq/INSERT INTO dsns.dsns VALUES (5, null, '$c->{node4}->{dsn}')/);
|
||||
|
||||
$output = output(
|
||||
sub { pt_table_checksum::main(@args,
|
||||
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns",
|
||||
qw(-d test))
|
||||
},
|
||||
stderr => 1,
|
||||
);
|
||||
|
||||
like(
|
||||
$output,
|
||||
qr/h=127.1,P=12345 is in cluster pt_sandbox_cluster/,
|
||||
"Detects that node1 is in pt_sandbox_cluster"
|
||||
);
|
||||
|
||||
like(
|
||||
$output,
|
||||
qr/h=127.1,P=2900 is in cluster cluster2/,
|
||||
"Detects that node4 is in cluster2"
|
||||
);
|
||||
|
||||
unlike(
|
||||
$output,
|
||||
qr/test/,
|
||||
"Different clusters, no results"
|
||||
);
|
||||
|
||||
$sb->stop_sandbox(qw(node4 node5 node6));
|
||||
|
||||
# Restore the DSN table in case there are more tests.
|
||||
$node1->do(qq/DELETE FROM dsns.dsns WHERE id=5/);
|
||||
|
||||
# #############################################################################
|
||||
# Done.
|
||||
# #############################################################################
|
||||
$sb->wipe_clean($master_dbh);
|
||||
$sb->wipe_clean($node1);
|
||||
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
|
||||
done_testing;
|
||||
|
10
t/pt-table-checksum/samples/a-z-cluster.sql
Normal file
10
t/pt-table-checksum/samples/a-z-cluster.sql
Normal file
@@ -0,0 +1,10 @@
|
||||
drop database if exists test;
|
||||
create database test;
|
||||
use test;
|
||||
|
||||
create table t (
|
||||
id int auto_increment primary key,
|
||||
c varchar(16) not null
|
||||
) engine=innodb;
|
||||
|
||||
INSERT INTO `t` VALUES (1,'a'),(4,'b'),(7,'c'),(10,'d'),(13,'e'),(16,'f'),(19,'g'),(22,'h'),(25,'i'),(28,'j'),(31,'k'),(34,'l'),(37,'m'),(40,'n'),(43,'o'),(46,'p'),(49,'q'),(52,'r'),(55,'s'),(58,'t'),(61,'u'),(64,'v'),(67,'w'),(70,'x'),(73,'y'),(76,'z');
|
Reference in New Issue
Block a user