Fix and tests for 1062563 and 1063912: ptc+PXC bugs

This commit is contained in:
Brian Fraser fraserb@gmail.com
2012-10-17 12:58:48 -03:00
parent 6771202be0
commit 1875066868
5 changed files with 516 additions and 8 deletions

View File

@@ -8586,10 +8586,39 @@ sub main {
);
PTDEBUG && _d(scalar @$slaves, 'slaves found');
if ( $master_cxn->is_cluster_node() && !@$slaves ) {
die $master_cxn->name() . " is a cluster node but no other nodes "
. "or regular replicas were found. Use --recursion-method=dsn "
. "to specify the other nodes in the cluster.\n";
if ( $master_cxn->is_cluster_node() ) {
if ( !@$slaves ) {
die $master_cxn->name() . " is a cluster node but no other nodes "
. "or regular replicas were found. Use --recursion-method=dsn "
. "to specify the other nodes in the cluster.\n";
}
else {
my $err = '';
for my $slave (@$slaves) {
if ( $slave->is_cluster_node()
&& !$slave->same_cluster($master_cxn) ) {
$err .= $slave->name() . " is a cluster node, but doesn't "
. "belong to the same cluster as " . $master_cxn->name()
. ". This is not currently supported; You can try "
. "using --recursion-method=dsn to specify all nodes "
. "in the slave cluster.\n"
}
}
warn $err if $err;
}
}
elsif ( @$slaves ) {
my $err = '';
for my $slave (@$slaves) {
if ( $slave->is_cluster_node() ) {
$err .= $slave->name() . " is a cluster node, but "
. $master_cxn->name() . " is not. This is not currently "
. "supported; You can try to specify "
. "all nodes in the cluster with "
. "--recursion-method=dsn if you want them checksummed.\n"
}
}
warn $err if $err;
}
if ( $o->get('check-slave-lag') ) {
@@ -8611,7 +8640,8 @@ sub main {
# to appear should be sufficient.
@$slave_lag_cxns = grep {
my $slave_cxn = $_;
if ( $slave_cxn->is_cluster_node() ) {
if ( $slave_cxn->is_cluster_node()
&& $master_cxn->same_cluster($slave_cxn) ) {
warn "Not checking replica lag on " . $slave_cxn->name()
. " because it is a cluster node.\n";
0;

View File

@@ -210,6 +210,77 @@ sub is_cluster_node {
return $self->{is_cluster_node};
}
sub same_cluster {
my ($self, $cxn) = @_;
return unless $self->is_cluster_node() && $cxn->is_cluster_node();
return if $self->is_master_of($cxn) || $cxn->is_master_of($self);
my $sql = q{SHOW VARIABLES LIKE 'wsrep_cluster_name'};
PTDEBUG && _d($sql);
my (undef, $row) = $self->dbh->selectrow_array($sql);
my (undef, $cxn_row) = $cxn->dbh->selectrow_array($sql);
return unless $row eq $cxn_row;
# Now it becomes tricky. Ostensibly clusters shouldn't have the
# same name, but tell that to the world.
$sql = q{SHOW VARIABLES LIKE 'wsrep_cluster_address'};
PTDEBUG && _d($sql);
my (undef, $addr) = $self->dbh->selectrow_array($sql);
my (undef, $cxn_addr) = $cxn->dbh->selectrow_array($sql);
# If they both have gcomm://, then they are both the first
# node of a cluster, so they can't be in the same one.
return if $addr eq 'gcomm://' && $cxn_addr eq 'gcomm://';
if ( $addr eq 'gcomm://' ) {
$addr = $self->_find_full_gcomm_addr($self->dbh);
}
elsif ( $cxn_addr eq 'gcomm://' ) {
$cxn_addr = $self->_find_full_gcomm_addr($cxn->dbh);
}
# Meanwhile, if they have the same address, then
# they are definitely part of the same cluster
return 1 if $addr eq $cxn_addr;
# However, this still leaves us with the issue that
# the cluster addresses could look like this:
# node1 -> node2, node2 -> node1,
# or
# node1 -> node2 addr,
# node2 -> node3 addr,
# node3 -> node1 addr,
# TODO No clue what to do here
return 1;
}
sub is_master_of {
my ($self, $cxn) = @_;
my $cxn_dbh = $cxn->dbh;
local $cxn_dbh->{FetchHashKeyName} = 'NAME_lc';
my $slave_status = $cxn_dbh->selectrow_hashref(q{SHOW SLAVE STATUS});
return unless ref($slave_status) eq 'HASH';
my $port = $self->dsn->{P};
my $host = $self->dsn->{h};
return 1 if $slave_status->{master_host} eq $host
&& $slave_status->{master_port} eq $port;
}
sub _find_full_gcomm_addr {
my ($self, $dbh) = @_;
my $sql = q{SHOW VARIABLES LIKE 'wsrep_provider_options'};
PTDEBUG && _d($sql);
my (undef, $provider_opts) = $dbh->selectrow_array($sql);
my ($prov_addr) = $provider_opts =~ m{\Qgmcast.listen_addr\E\s*=\s*tcp://([^:]+:[0-9]+)\s*;};
my $full_gcomm = "gcomm://$prov_addr";
PTDEBUG && _d("gcomm address: ", $full_gcomm);
return $full_gcomm;
}
sub DESTROY {
my ($self) = @_;
if ( $self->{dbh}

View File

@@ -39,6 +39,7 @@ $Data::Dumper::Quotekeys = 0;
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
use constant PTDEVDEBUG => $ENV{PTDEVDEBUG} || 0;
use IO::Socket::INET;
$Sandbox::Percona::Toolkit::VERSION = "2.1.4";
my $trunk = $ENV{PERCONA_TOOLKIT_BRANCH};
@@ -129,7 +130,7 @@ sub get_dbh_for {
}
sub load_file {
my ( $self, $server, $file, $use_db ) = @_;
my ( $self, $server, $file, $use_db, %args ) = @_;
_check_server($server);
$file = "$trunk/$file";
if ( !-f $file ) {
@@ -144,7 +145,7 @@ sub load_file {
if ( $? >> 8 ) {
die "Failed to execute $file on $server: $out";
}
$self->wait_for_slaves();
$self->wait_for_slaves() unless $args{no_wait};
}
sub _use_for {
@@ -412,6 +413,85 @@ sub is_cluster_node {
: 0;
}
sub set_as_slave {
my ($self, $server, $master_server, @extras) = @_;
PTDEBUG && _d("Setting $server as slave of $master_server");
my $master_port = $port_for{$master_server};
my $sql = join ", ", qq{change master to master_host='127.0.0.1'},
qq{master_user='msandbox'},
qq{master_password='msandbox'},
qq{master_port=$master_port},
@extras;
for my $sql_to_run ($sql, "start slave") {
my $out = $self->use($server, qq{-e "$sql_to_run"});
PTDEBUG && _d($out);
}
}
sub start_sandbox {
my ($self, $mode, $server, $master_server) = @_;
my $port = $port_for{$server};
my $master_port = $master_server ? $port_for{$master_server} : '';
my $out = `$trunk/sandbox/start-sandbox $mode $port $master_port 2>&1`;
die $out if $CHILD_ERROR;
return $out;
}
sub stop_sandbox {
my ($self, @sandboxes) = @_;
my @ports = @port_for{@sandboxes};
my $out = `$trunk/sandbox/stop-sandbox @ports 2>&1`;
die $out if $CHILD_ERROR;
return $out;
}
sub start_cluster {
my ($self, %args) = @_;
my $cluster_size = $args{cluster_size} || 3;
my $out = '';
my ($node1, @nodes) = map {
my $node_name = "node$_";
$node_name = "_$node_name" while exists $port_for{$node_name};
$port_for{$node_name} = $self->_get_unused_port();
$node_name
} 1..$cluster_size;
local $ENV{CLUSTER_NAME} = $args{cluster_name} if $args{cluster_name};
$self->start_sandbox("cluster", $node1);
for my $node ( @nodes ) {
$self->start_sandbox("cluster", $node, $node1);
}
return ($node1, @nodes);
}
# Lifted from Nginx::Test on CPAN
sub _get_unused_port {
my $port = 50000 + int (rand() * 5000);
while ($port++ < 64000) {
my $sock = IO::Socket::INET->new (
Listen => 5,
LocalAddr => '127.0.0.1',
LocalPort => $port,
Proto => 'tcp',
ReuseAddr => 1
) or next;
$sock->close;
return $port;
}
die "Cannot find an open port";
}
sub port_for {
my ($self, $server) = @_;
return $port_for{$server};
}
1;
}
# ###########################################################################

View File

@@ -9,7 +9,7 @@ BEGIN {
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More tests => 19;
use Test::More;
use Sandbox;
use OptionParser;
@@ -251,9 +251,145 @@ is_deeply(
@ARGV = ();
$o->get_opts();
diag("Starting master1");
$sb->start_sandbox("master", "master1");
$cxn = make_cxn( dsn_string => $sb->dsn_for("master1") );
$cxn->connect();
ok(
!$cxn->is_cluster_node(),
"is_cluster_node works correctly for non-nodes"
);
use VersionParser;
my $db_flavor = VersionParser->new($master_dbh)->flavor();
SKIP: {
skip "PXC-only test", 1
unless $db_flavor =~ /XtraDB Cluster/;
diag("Starting a 1-node PXC");
my ($node) = $sb->start_cluster(cluster_size => 1);
my $cxn1 = make_cxn( dsn_string => $sb->dsn_for($node) );
$cxn1->connect();
ok(
$cxn1->is_cluster_node(),
"is_cluster_node works correctly for cluster nodes"
);
diag("Setting node as a slave of master1");
$sb->set_as_slave($node, "master1");
ok(
$cxn->is_master_of($cxn1),
"->is_master_of works correctly for master -> cluster"
);
ok(
!$cxn1->is_master_of($cxn),
"...and the inverse returns the expected result"
);
ok(
!$cxn->same_cluster($cxn1),
"->same_cluster works for master -> cluster"
);
diag("Restarting the cluster");
$sb->stop_sandbox($node);
($node) = $sb->start_cluster(cluster_size => 1);
$cxn1 = make_cxn( dsn_string => $sb->dsn_for($node) );
$cxn1->connect();
diag("Setting master1 as a slave of the node");
$sb->set_as_slave("master1", $node);
ok(
$cxn1->is_master_of($cxn),
"->is_master_of works correctly for cluster -> master"
);
ok(
!$cxn->is_master_of($cxn1),
"...and the inverse returns the expected result"
);
ok(
!$cxn1->same_cluster($cxn),
"->same_cluster works for cluster -> master"
);
diag("Starting a 2-node PXC");
my ($node2, $node3) = $sb->start_cluster(cluster_size => 2);
my $cxn2 = make_cxn( dsn_string => $sb->dsn_for($node2) );
$cxn2->connect();
my $cxn3 = make_cxn( dsn_string => $sb->dsn_for($node3) );
$cxn3->connect();
ok(
$cxn2->is_cluster_node(),
"is_cluster_node correctly finds that this node is part of a cluster"
);
ok(
!$cxn1->same_cluster($cxn2),
"and same_cluster correctly finds that they don't belong to the same cluster, even when they have the same cluster name"
);
ok(
$cxn2->same_cluster($cxn3),
"...but does find that they are in the same cluster, even if one is node1"
);
TODO: {
local $::TODO = "Should detected that (cluster1.node1) (cluster2.node2) come from different clusters, but doesn't";
ok(
!$cxn1->same_cluster($cxn3),
"...same_cluster works correctly when they have the same cluster names"
);
}
diag("Making the second cluster a slave of the first");
$sb->set_as_slave($node2, $node);
ok($cxn1->is_master_of($cxn2), "is_master_of works");
ok(
!$cxn1->same_cluster($cxn2),
"...same_cluster works correctly when they are cluster1.node1.master -> cluster2.node1.slave"
);
diag($sb->stop_sandbox($node2, $node3));
diag("Starting a 3-node cluster");
my $node4;
($node2, $node3, $node4) = $sb->start_cluster(
cluster_size => 3,
cluster_name => "pt_cxn_test",
);
$cxn2 = make_cxn( dsn_string => $sb->dsn_for($node2) );
$cxn2->connect();
$cxn3 = make_cxn( dsn_string => $sb->dsn_for($node3) );
$cxn3->connect();
my $cxn4 = make_cxn( dsn_string => $sb->dsn_for($node4) );
$cxn4->connect();
ok(
!$cxn1->same_cluster($cxn2),
"...same_cluster works correctly when they have different cluster names & the are both gcomm"
);
ok(
!$cxn1->same_cluster($cxn3),
"same_cluster detects that (cluster1.node1) (cluster2.node2) come from different clusters if they have different cluster_names"
);
ok(
$cxn2->same_cluster($cxn3),
"sanity check: but still finds that nodes in the same cluster belong together"
);
diag($sb->stop_sandbox($node, $node2, $node3, $node4));
}
diag($sb->stop_sandbox("master1"));
# #############################################################################
# Done.
# #############################################################################
$master_dbh->disconnect() if $master_dbh;
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
done_testing
exit;

View File

@@ -191,6 +191,197 @@ like(
"Bug 1016131: ptc should skip tables where all columns are excluded"
);
# #############################################################################
# pt-table-checksum v2.1.4 doesn't detect diffs on Percona XtraDB Cluster nodes
# https://bugs.launchpad.net/percona-toolkit/+bug/1062563
# #############################################################################
use File::Spec::Functions;
my $db_flavor = VersionParser->new($master_dbh)->flavor();
SKIP: {
skip "PXC-only tests", 8
unless $db_flavor =~ /XtraDB Cluster/;
diag("Creating a 5-node PXC cluster...");
my @nodes = $sb->start_cluster(cluster_size => 5);
diag("Nodes: ", Dumper( { map { $_ => $sb->port_for($_) } @nodes } ));
my $node2 = $nodes[1];
my $node2_dbh = $sb->get_dbh_for($node2);
my $node2_slave = "master3";
diag("Creating a slave for $node2...");
{
local $ENV{BINLOG_FORMAT} = 'ROW';
$sb->start_sandbox("slave", $node2_slave, $node2);
}
my $node_slave_dbh = $sb->get_dbh_for($node2_slave);
make_dbh_differ($node2_dbh);
# And make its slave differ as well
PerconaTest::wait_for_table($node_slave_dbh, "bug_1062563.ptc_pxc");
$node_slave_dbh->do("INSERT INTO bug_1062563.ptc_pxc (i) VALUES ($_)") for 3, 4;
my $dsns_table_sql = catfile(qw(t lib samples MasterSlave dsn_table.sql));
$sb->load_file($node2, $dsns_table_sql, undef, no_wait => 1);
my $sth = $node2_dbh->prepare("INSERT INTO dsn_t.dsns VALUES (null, null, ?)");
for my $dsn ( map { $sb->dsn_for($_) } @nodes[0,2..$#nodes], $node2_slave ) {
$sth->execute($dsn);
}
my $node2_dsn = $sb->dsn_for($node2);
$output = output(
sub { pt_table_checksum::main(
$node2_dsn, qw(--lock-wait-timeout 3),
qw(-d bug_1062563),
'--recursion-method', "dsn=D=dsn_t,t=dsns"
) },
stderr => 1,
);
is(
PerconaTest::count_checksum_results($output, 'diffs'),
1,
"Bug 1062563: Detects diffs between PXC nodes"
);
my @cluster_nodes = $output =~ /(because it is a cluster node)/g;
is(
scalar(@cluster_nodes),
4,
"Skips all the cluster nodes in the dsns table"
) or diag($output);
# Now try with just the slave
$node2_dbh->do("DELETE FROM dsn_t.dsns");
$sth->execute($sb->dsn_for($node2_slave));
$output = output(
sub { pt_table_checksum::main(
$node2_dsn, qw(--lock-wait-timeout 3),
qw(--chunk-size 1),
qw(-d bug_1062563),
'--recursion-method', "dsn=D=dsn_t,t=dsns"
) },
stderr => 1,
);
is(
PerconaTest::count_checksum_results($output, 'diffs'),
1,
"Bug 1062563: Detects diffs on slaves where the master is a PXC node"
) or diag($output);
$sth->finish();
diag("Stopping the PXC cluster and the slave...");
$sb->stop_sandbox($node2_slave, @nodes);
# Now checking that cluster -> cluster works
diag("Creating two 3-node clusters...");
my @cluster1 = $sb->start_cluster(cluster_size => 3, cluster_name => "pt_test_cluster_1");
my @cluster2 = $sb->start_cluster(cluster_size => 3, cluster_name => "pt_test_cluster_2");
diag("Cluster 1: ", Dumper( { map { $_ => $sb->port_for($_) } @cluster1 } ));
diag("Cluster 2: ", Dumper( { map { $_ => $sb->port_for($_) } @cluster2 } ));
$sb->set_as_slave($cluster2[0], $cluster1[0]);
my $cluster1_dbh = $sb->get_dbh_for($cluster1[0]);
my $cluster2_dbh = $sb->get_dbh_for($cluster2[0]);
make_dbh_differ($cluster1_dbh);
# And make its slave differ as well
PerconaTest::wait_for_table($cluster2_dbh, "bug_1062563.ptc_pxc");
$cluster2_dbh->do("INSERT INTO bug_1062563.ptc_pxc (i) VALUES ($_)") for 3, 4;
$dsns_table_sql = catfile(qw(t lib samples MasterSlave dsn_table.sql));
$sb->load_file($cluster1[0], $dsns_table_sql, undef, no_wait => 1);
$sth = $cluster1_dbh->prepare("INSERT INTO dsn_t.dsns VALUES (null, null, ?)");
for my $dsn ( map { $sb->dsn_for($_) } @cluster1[1..$#cluster1], $cluster2[0] ) {
$sth->execute($dsn);
}
$sth->finish();
my $cluster1_dsn = $sb->dsn_for($cluster1[0]);
$output = output(
sub { pt_table_checksum::main(
$cluster1_dsn, qw(--lock-wait-timeout 3),
qw(-d bug_1062563),
'--recursion-method', "dsn=D=dsn_t,t=dsns"
) },
stderr => 1,
);
is(
PerconaTest::count_checksum_results($output, 'diffs'),
1,
"Bug 1062563: Detects diffs between PXC nodes when cluster -> cluster"
);
like(
$output,
qr/is a cluster node, but doesn't belong to the same cluster as/, #'
"Shows a warning when cluster -> cluster"
) or diag($output);
diag("Starting master1...");
$sb->start_sandbox("master", "master1");
diag("Setting it as master of a node in the first cluster");
$sb->set_as_slave($cluster1[0], "master1");
my $master1_dbh = $sb->get_dbh_for("master1");
make_dbh_differ($master1_dbh, 10..50);
my $master1_dsn = $sb->dsn_for("master1");
$output = output(
sub { pt_table_checksum::main(
$master1_dsn, qw(--lock-wait-timeout 3),
qw(-d bug_1062563),
) },
stderr => 1,
);
is(
PerconaTest::count_checksum_results($output, 'diffs'),
1,
"Bug 1062563: Detects diffs when master -> cluster"
) or diag($output);
is(
PerconaTest::count_checksum_results($output, 'rows'),
41,
"Bug 1062563: Correct number of rows for master -> cluster"
) or diag($output);
like(
$output,
qr/is a cluster node, but .*? is not. This is not currently supported/,
"Shows a warning when master -> cluster"
) or diag($output);
diag("Stopping both clusters and master1...");
$sb->stop_sandbox(@cluster1, @cluster2, "master1");
}
sub make_dbh_differ {
my ($dbh, @vals) = @_;
@vals = (@vals ? @vals : 1);
# Make them differ...
$dbh->do("DROP DATABASE IF EXISTS bug_1062563");
$dbh->do("CREATE DATABASE bug_1062563");
$dbh->do("CREATE TABLE bug_1062563.ptc_pxc (i int)");
# Now make this node different from the rest
$dbh->do("set sql_log_bin=0");
$dbh->do("INSERT INTO bug_1062563.ptc_pxc (i) VALUES ($_)") for @vals;
$dbh->do("set sql_log_bin=1");
}
# #############################################################################
# Done.
# #############################################################################