mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-11 21:51:21 +00:00
WIP ptc: Autodetect cluster nodes & recurse to slaves
This commit is contained in:
@@ -3426,7 +3426,6 @@ sub find_cluster_nodes {
|
|||||||
my $dp = $args{DSNParser};
|
my $dp = $args{DSNParser};
|
||||||
my $make_cxn = $args{make_cxn};
|
my $make_cxn = $args{make_cxn};
|
||||||
|
|
||||||
|
|
||||||
my $sql = q{SHOW STATUS LIKE 'wsrep_incoming_addresses'};
|
my $sql = q{SHOW STATUS LIKE 'wsrep_incoming_addresses'};
|
||||||
PTDEBUG && _d($sql);
|
PTDEBUG && _d($sql);
|
||||||
my (undef, $addresses) = $dbh->selectrow_array($sql);
|
my (undef, $addresses) = $dbh->selectrow_array($sql);
|
||||||
@@ -3442,16 +3441,18 @@ sub find_cluster_nodes {
|
|||||||
my $spec = "h=$host"
|
my $spec = "h=$host"
|
||||||
. ($port ? ",P=$port" : "");
|
. ($port ? ",P=$port" : "");
|
||||||
my $node_dsn = $dp->parse($spec, $dsn);
|
my $node_dsn = $dp->parse($spec, $dsn);
|
||||||
my $node_dbh = eval {
|
my $node_dbh = eval { $dp->get_dbh(
|
||||||
$dp->get_dbh(
|
$dp->get_cxn_params($node_dsn), { AutoCommit => 1 }) };
|
||||||
$dp->get_cxn_params($node_dsn), { AutoCommit => 1 });
|
|
||||||
PTDEBUG && _d('Connected to', $dp->as_string($node_dsn));
|
|
||||||
};
|
|
||||||
if ( $EVAL_ERROR ) {
|
if ( $EVAL_ERROR ) {
|
||||||
print STDERR "Cannot connect to ", $dp->as_string($node_dsn),
|
print STDERR "Cannot connect to ", $dp->as_string($node_dsn),
|
||||||
", discovered through $sql: $EVAL_ERROR\n";
|
", discovered through $sql: $EVAL_ERROR\n";
|
||||||
|
if ( !$port && $dsn->{P} != 3306 ) {
|
||||||
|
$address .= ":3306";
|
||||||
|
redo;
|
||||||
|
}
|
||||||
next;
|
next;
|
||||||
}
|
}
|
||||||
|
PTDEBUG && _d('Connected to', $dp->as_string($node_dsn));
|
||||||
$node_dbh->disconnect();
|
$node_dbh->disconnect();
|
||||||
|
|
||||||
push @nodes, $make_cxn->(dsn => $node_dsn);
|
push @nodes, $make_cxn->(dsn => $node_dsn);
|
||||||
@@ -3468,7 +3469,7 @@ sub remove_duplicate_cxns {
|
|||||||
my @unique_cxns;
|
my @unique_cxns;
|
||||||
CXN:
|
CXN:
|
||||||
foreach my $cxn ( @cxns ) {
|
foreach my $cxn ( @cxns ) {
|
||||||
if ( !$self->cluster_node($cxn) ) {
|
if ( !$self->is_cluster_node($cxn) ) {
|
||||||
push @unique_cxns, $cxn;
|
push @unique_cxns, $cxn;
|
||||||
next CXN;
|
next CXN;
|
||||||
}
|
}
|
||||||
@@ -3476,7 +3477,7 @@ sub remove_duplicate_cxns {
|
|||||||
my $dbh = $cxn->dbh();
|
my $dbh = $cxn->dbh();
|
||||||
my $sql = q{SHOW VARIABLES LIKE 'wsrep_sst_receive_address'};
|
my $sql = q{SHOW VARIABLES LIKE 'wsrep_sst_receive_address'};
|
||||||
PTDEBUG && _d($dbh, $sql);
|
PTDEBUG && _d($dbh, $sql);
|
||||||
my (undef, $receive_addr) = $dbh->selectrow_array();
|
my (undef, $receive_addr) = $dbh->selectrow_array($sql);
|
||||||
|
|
||||||
if ( !$receive_addr ) {
|
if ( !$receive_addr ) {
|
||||||
PTDEBUG && _d(q{Query returned nothing, assuming that it's },
|
PTDEBUG && _d(q{Query returned nothing, assuming that it's },
|
||||||
@@ -3492,8 +3493,6 @@ sub remove_duplicate_cxns {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
warn "<@cxns>";
|
|
||||||
warn "<@unique_cxns>";
|
|
||||||
return @unique_cxns;
|
return @unique_cxns;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -8767,14 +8766,57 @@ sub main {
|
|||||||
make_cxn => $make_cxn_cluster,
|
make_cxn => $make_cxn_cluster,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if ( $o->get('recursion-method') !~ /^dsn/i ) {
|
||||||
|
my %seen;
|
||||||
|
my @new_slaves;
|
||||||
|
for my $slave ( @$slaves ) {
|
||||||
|
next unless $cluster->is_cluster_node($slave);
|
||||||
|
my @nodes = $cluster->find_cluster_nodes(
|
||||||
|
dbh => $slave->dbh(),
|
||||||
|
dsn => $slave->dsn(),
|
||||||
|
make_cxn => $make_cxn_cluster,
|
||||||
|
DSNParser => $dp,
|
||||||
|
);
|
||||||
|
@nodes = grep { !$seen{$dp->as_string($_->dsn)}++ }
|
||||||
|
grep { !$cluster->same_node($slave, $_) } @nodes;
|
||||||
|
push @new_slaves, @nodes;
|
||||||
|
|
||||||
|
foreach my $node (@nodes) {
|
||||||
|
my $node_slaves = $ms->get_slaves(
|
||||||
|
dbh => $node->dbh(),
|
||||||
|
dsn => $node->dsn(),
|
||||||
|
make_cxn => $make_cxn_cluster,
|
||||||
|
);
|
||||||
|
push @new_slaves, @$node_slaves;
|
||||||
|
}
|
||||||
|
($master_cxn, @new_slaves) =
|
||||||
|
$cluster->remove_duplicate_cxns($master_cxn, @new_slaves);
|
||||||
|
}
|
||||||
|
push @$slaves, @new_slaves;
|
||||||
|
}
|
||||||
|
|
||||||
if ( $cluster_name_for{$master_cxn} ) {
|
if ( $cluster_name_for{$master_cxn} ) {
|
||||||
push @$slaves, $cluster->find_cluster_nodes(
|
if ( $o->get('recursion-method') !~ /^dsn/i ) {
|
||||||
|
my @nodes = $cluster->find_cluster_nodes(
|
||||||
dbh => $master_dbh,
|
dbh => $master_dbh,
|
||||||
dsn => $master_dsn,
|
dsn => $master_dsn,
|
||||||
make_cxn => $make_cxn_cluster,
|
make_cxn => $make_cxn_cluster,
|
||||||
DSNParser => $dp,
|
DSNParser => $dp,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@nodes = grep { !$cluster->same_node($master_cxn, $_) } @nodes;
|
||||||
|
push @$slaves, @nodes;
|
||||||
|
|
||||||
|
foreach my $node (@nodes) {
|
||||||
|
my $node_slaves = $ms->get_slaves(
|
||||||
|
dbh => $node->dbh(),
|
||||||
|
dsn => $node->dsn(),
|
||||||
|
make_cxn => $make_cxn_cluster,
|
||||||
|
);
|
||||||
|
push @$slaves, @$node_slaves;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
my @pruned_slaves;
|
my @pruned_slaves;
|
||||||
($master_cxn, @pruned_slaves) =
|
($master_cxn, @pruned_slaves) =
|
||||||
$cluster->remove_duplicate_cxns($master_cxn, @$slaves);
|
$cluster->remove_duplicate_cxns($master_cxn, @$slaves);
|
||||||
|
@@ -64,6 +64,8 @@ sub same_node {
|
|||||||
}
|
}
|
||||||
|
|
||||||
# TODO: Check that the PXC version supports wsrep_incoming_addresses
|
# TODO: Check that the PXC version supports wsrep_incoming_addresses
|
||||||
|
# Not really necessary, actually. But in case it's needed,
|
||||||
|
# wsrep_provider_version =~ /[0-9]+\.[0-9]+\(r([0-9]+)\)/ && $1 >= 137
|
||||||
sub find_cluster_nodes {
|
sub find_cluster_nodes {
|
||||||
my ($self, %args) = @_;
|
my ($self, %args) = @_;
|
||||||
|
|
||||||
@@ -72,6 +74,9 @@ sub find_cluster_nodes {
|
|||||||
my $dp = $args{DSNParser};
|
my $dp = $args{DSNParser};
|
||||||
my $make_cxn = $args{make_cxn};
|
my $make_cxn = $args{make_cxn};
|
||||||
|
|
||||||
|
# Ostensibly the caller should've done this already, but
|
||||||
|
# useful for safety.
|
||||||
|
$dp->fill_in_dsn($dbh, $dsn);
|
||||||
|
|
||||||
my $sql = q{SHOW STATUS LIKE 'wsrep_incoming_addresses'};
|
my $sql = q{SHOW STATUS LIKE 'wsrep_incoming_addresses'};
|
||||||
PTDEBUG && _d($sql);
|
PTDEBUG && _d($sql);
|
||||||
@@ -88,16 +93,24 @@ sub find_cluster_nodes {
|
|||||||
my $spec = "h=$host"
|
my $spec = "h=$host"
|
||||||
. ($port ? ",P=$port" : "");
|
. ($port ? ",P=$port" : "");
|
||||||
my $node_dsn = $dp->parse($spec, $dsn);
|
my $node_dsn = $dp->parse($spec, $dsn);
|
||||||
my $node_dbh = eval {
|
my $node_dbh = eval { $dp->get_dbh(
|
||||||
$dp->get_dbh(
|
$dp->get_cxn_params($node_dsn), { AutoCommit => 1 }) };
|
||||||
$dp->get_cxn_params($node_dsn), { AutoCommit => 1 });
|
|
||||||
PTDEBUG && _d('Connected to', $dp->as_string($node_dsn));
|
|
||||||
};
|
|
||||||
if ( $EVAL_ERROR ) {
|
if ( $EVAL_ERROR ) {
|
||||||
print STDERR "Cannot connect to ", $dp->as_string($node_dsn),
|
print STDERR "Cannot connect to ", $dp->as_string($node_dsn),
|
||||||
", discovered through $sql: $EVAL_ERROR\n";
|
", discovered through $sql: $EVAL_ERROR\n";
|
||||||
|
# This is a bit strange, so an explanation is called for.
|
||||||
|
# If there wasn't a port, that means that this bug
|
||||||
|
# https://bugs.launchpad.net/percona-toolkit/+bug/1082406
|
||||||
|
# isn't fixed on this version of PXC. We tried using the
|
||||||
|
# master's port, but that didn't work. So try again, using
|
||||||
|
# the default port.
|
||||||
|
if ( !$port && $dsn->{P} != 3306 ) {
|
||||||
|
$address .= ":3306";
|
||||||
|
redo;
|
||||||
|
}
|
||||||
next;
|
next;
|
||||||
}
|
}
|
||||||
|
PTDEBUG && _d('Connected to', $dp->as_string($node_dsn));
|
||||||
$node_dbh->disconnect();
|
$node_dbh->disconnect();
|
||||||
|
|
||||||
push @nodes, $make_cxn->(dsn => $node_dsn);
|
push @nodes, $make_cxn->(dsn => $node_dsn);
|
||||||
@@ -125,7 +138,7 @@ sub remove_duplicate_cxns {
|
|||||||
CXN:
|
CXN:
|
||||||
foreach my $cxn ( @cxns ) {
|
foreach my $cxn ( @cxns ) {
|
||||||
# If not a cluster node, assume that it's unique
|
# If not a cluster node, assume that it's unique
|
||||||
if ( !$self->cluster_node($cxn) ) {
|
if ( !$self->is_cluster_node($cxn) ) {
|
||||||
push @unique_cxns, $cxn;
|
push @unique_cxns, $cxn;
|
||||||
next CXN;
|
next CXN;
|
||||||
}
|
}
|
||||||
@@ -134,7 +147,7 @@ sub remove_duplicate_cxns {
|
|||||||
my $dbh = $cxn->dbh();
|
my $dbh = $cxn->dbh();
|
||||||
my $sql = q{SHOW VARIABLES LIKE 'wsrep_sst_receive_address'};
|
my $sql = q{SHOW VARIABLES LIKE 'wsrep_sst_receive_address'};
|
||||||
PTDEBUG && _d($dbh, $sql);
|
PTDEBUG && _d($dbh, $sql);
|
||||||
my (undef, $receive_addr) = $dbh->selectrow_array();
|
my (undef, $receive_addr) = $dbh->selectrow_array($sql);
|
||||||
|
|
||||||
if ( !$receive_addr ) {
|
if ( !$receive_addr ) {
|
||||||
PTDEBUG && _d(q{Query returned nothing, assuming that it's },
|
PTDEBUG && _d(q{Query returned nothing, assuming that it's },
|
||||||
@@ -150,8 +163,6 @@ sub remove_duplicate_cxns {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
warn "<@cxns>";
|
|
||||||
warn "<@unique_cxns>";
|
|
||||||
return @unique_cxns;
|
return @unique_cxns;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -31,7 +31,7 @@ binlog_format = ROW
|
|||||||
wsrep_provider = LIBGALERA
|
wsrep_provider = LIBGALERA
|
||||||
wsrep_cluster_address = CLUSTER_AD
|
wsrep_cluster_address = CLUSTER_AD
|
||||||
wsrep_sst_receive_address = ADDR:RECEIVE_PRT
|
wsrep_sst_receive_address = ADDR:RECEIVE_PRT
|
||||||
wsrep_node_incoming_address= ADDR
|
wsrep_node_incoming_address= ADDR:PORT
|
||||||
wsrep_slave_threads = 2
|
wsrep_slave_threads = 2
|
||||||
wsrep_cluster_name = CLUSTER_NAME
|
wsrep_cluster_name = CLUSTER_NAME
|
||||||
wsrep_provider_options = "gmcast.listen_addr=tcp://ADDR:LISTEN_PRT;"
|
wsrep_provider_options = "gmcast.listen_addr=tcp://ADDR:LISTEN_PRT;"
|
||||||
|
@@ -272,6 +272,7 @@ is(
|
|||||||
"Slave is changed"
|
"Slave is changed"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
for my
|
||||||
$output = output(
|
$output = output(
|
||||||
sub { pt_table_checksum::main(@args,
|
sub { pt_table_checksum::main(@args,
|
||||||
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns",
|
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns",
|
||||||
@@ -445,24 +446,31 @@ like(
|
|||||||
"Warns that direct replica of the master isn't found or specified",
|
"Warns that direct replica of the master isn't found or specified",
|
||||||
);
|
);
|
||||||
|
|
||||||
|
# Originally, these tested a dsn table with all nodes.
|
||||||
# Use the other DSN table with all three nodes. Now the tool should
|
# Use the other DSN table with all three nodes. Now the tool should
|
||||||
# give a more specific warning than that ^.
|
# give a more specific warning than that ^.
|
||||||
$output = output(
|
for my $args (
|
||||||
|
["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"],
|
||||||
|
["autodetecting everything"]
|
||||||
|
)
|
||||||
|
{
|
||||||
|
my $test = shift @$args;
|
||||||
|
$output = output(
|
||||||
sub { pt_table_checksum::main($master_dsn,
|
sub { pt_table_checksum::main($master_dsn,
|
||||||
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns",
|
@$args,
|
||||||
qw(-d test))
|
qw(-d test))
|
||||||
},
|
},
|
||||||
stderr => 1,
|
stderr => 1,
|
||||||
);
|
);
|
||||||
|
|
||||||
is(
|
is(
|
||||||
PerconaTest::count_checksum_results($output, 'diffs'),
|
PerconaTest::count_checksum_results($output, 'diffs'),
|
||||||
1,
|
1,
|
||||||
"...check all nodes: 1 diff"
|
"...check all nodes: 1 diff ($test)"
|
||||||
) or diag($output);
|
) or diag($output);
|
||||||
|
|
||||||
# 11-17T13:02:54 0 1 26 1 0 0.021 test.t
|
# 11-17T13:02:54 0 1 26 1 0 0.021 test.t
|
||||||
like(
|
like(
|
||||||
$output,
|
$output,
|
||||||
qr/^\S+\s+ # ts
|
qr/^\S+\s+ # ts
|
||||||
0\s+ # errors
|
0\s+ # errors
|
||||||
@@ -473,64 +481,66 @@ like(
|
|||||||
\S+\s+ # time
|
\S+\s+ # time
|
||||||
test.t$ # table
|
test.t$ # table
|
||||||
/xm,
|
/xm,
|
||||||
"...check all nodes: it's in test.t"
|
"...check all nodes: it's in test.t ($test)"
|
||||||
);
|
);
|
||||||
|
|
||||||
like(
|
like(
|
||||||
$output,
|
$output,
|
||||||
qr/Diffs will only be detected if the cluster is consistent with h=127.1,P=12345 because h=127.1,P=12349/,
|
qr/Diffs will only be detected if the cluster is consistent with h=127.1,P=12345 because h=127.1,P=12349/,
|
||||||
"Warns that diffs only detected if cluster consistent with direct replica",
|
"Warns that diffs only detected if cluster consistent with direct replica ($test)",
|
||||||
);
|
);
|
||||||
|
|
||||||
# Restore node1 so the cluster is consistent, but then make node2 differ.
|
# Restore node1 so the cluster is consistent, but then make node2 differ.
|
||||||
# ptc should NOT detect this diff because the checksum query will replicate
|
# ptc should NOT detect this diff because the checksum query will replicate
|
||||||
# to node1, node1 isn't different, so it broadcasts the result in ROW format
|
# to node1, node1 isn't different, so it broadcasts the result in ROW format
|
||||||
# that all is ok, which node2 gets and thus false reports. This is why
|
# that all is ok, which node2 gets and thus false reports. This is why
|
||||||
# those ^ warnings exist.
|
# those ^ warnings exist.
|
||||||
$node1->do("set sql_log_bin=0");
|
$node1->do("set sql_log_bin=0");
|
||||||
$node1->do("update test.t set c='z' where c='zebra'");
|
$node1->do("update test.t set c='z' where c='zebra'");
|
||||||
$node1->do("set sql_log_bin=1");
|
$node1->do("set sql_log_bin=1");
|
||||||
|
|
||||||
$node2->do("set sql_log_bin=0");
|
$node2->do("set sql_log_bin=0");
|
||||||
$node2->do("update test.t set c='zebra' where c='z'");
|
$node2->do("update test.t set c='zebra' where c='z'");
|
||||||
$node2->do("set sql_log_bin=1");
|
$node2->do("set sql_log_bin=1");
|
||||||
|
|
||||||
($row) = $node2->selectrow_array("select c from test.t order by c desc limit 1");
|
($row) = $node2->selectrow_array("select c from test.t order by c desc limit 1");
|
||||||
is(
|
is(
|
||||||
$row,
|
$row,
|
||||||
"zebra",
|
"zebra",
|
||||||
"Node2 is changed again"
|
"Node2 is changed again ($test)"
|
||||||
);
|
);
|
||||||
|
|
||||||
($row) = $node1->selectrow_array("select c from test.t order by c desc limit 1");
|
($row) = $node1->selectrow_array("select c from test.t order by c desc limit 1");
|
||||||
is(
|
is(
|
||||||
$row,
|
$row,
|
||||||
"z",
|
"z",
|
||||||
"Node1 not changed again"
|
"Node1 not changed again ($test)"
|
||||||
);
|
);
|
||||||
|
|
||||||
($row) = $node3->selectrow_array("select c from test.t order by c desc limit 1");
|
($row) = $node3->selectrow_array("select c from test.t order by c desc limit 1");
|
||||||
is(
|
is(
|
||||||
$row,
|
$row,
|
||||||
"z",
|
"z",
|
||||||
"Node3 not changed again"
|
"Node3 not changed again ($test)"
|
||||||
);
|
);
|
||||||
|
|
||||||
# the other DSN table with all three nodes, but it won't matter because
|
# the other DSN table with all three nodes, but it won't matter because
|
||||||
# node1 is going to broadcast the false-positive that there are no diffs.
|
# node1 is going to broadcast the false-positive that there are no diffs.
|
||||||
$output = output(
|
$output = output(
|
||||||
sub { pt_table_checksum::main($master_dsn,
|
sub { pt_table_checksum::main($master_dsn,
|
||||||
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns",
|
@$args,
|
||||||
qw(-d test))
|
qw(-d test))
|
||||||
},
|
},
|
||||||
stderr => 1,
|
stderr => 1,
|
||||||
);
|
);
|
||||||
|
|
||||||
is(
|
is(
|
||||||
PerconaTest::count_checksum_results($output, 'diffs'),
|
PerconaTest::count_checksum_results($output, 'diffs'),
|
||||||
0,
|
0,
|
||||||
"Limitation: diff not on direct replica not detected"
|
"Limitation: diff not on direct replica not detected ($test)"
|
||||||
) or diag($output);
|
) or diag($output);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
# ###########################################################################
|
# ###########################################################################
|
||||||
# Be sure to stop the slave on node1, else further test will die with:
|
# Be sure to stop the slave on node1, else further test will die with:
|
||||||
|
Reference in New Issue
Block a user