WIP Autodetect nodes & recurse to find new slaves

This commit is contained in:
Brian Fraser fraserb@gmail.com
2012-11-25 18:51:06 -03:00
parent 41d7700aa5
commit 79a5c39cec
3 changed files with 357 additions and 228 deletions

View File

@@ -3425,6 +3425,7 @@ sub find_cluster_nodes {
my $dsn = $args{dsn};
my $dp = $args{DSNParser};
my $make_cxn = $args{make_cxn};
my $sql = q{SHOW STATUS LIKE 'wsrep_incoming_addresses'};
PTDEBUG && _d($sql);
@@ -3463,37 +3464,29 @@ sub find_cluster_nodes {
sub remove_duplicate_cxns {
my ($self, @cxns) = @_;
my %addresses;
my ($self, %args) = @_;
my @cxns = @{$args{cxns}};
my $seen_ids = $args{seen_ids};
PTDEBUG && _d("Removing duplicates from ", join " ", map { $_->name } @cxns);
my @trimmed_cxns;
my @unique_cxns;
CXN:
foreach my $cxn ( @cxns ) {
if ( !$self->is_cluster_node($cxn) ) {
push @unique_cxns, $cxn;
next CXN;
}
for my $cxn ( @cxns ) {
my $dbh = $cxn->dbh();
my $sql = q{SELECT @@SERVER_ID};
PTDEBUG && _d($sql);
my ($id) = $dbh->selectrow_array($sql);
PTDEBUG && _d('Server ID for ', $cxn->name, ': ', $id);
my $dbh = $cxn->dbh();
my $sql = q{SHOW VARIABLES LIKE 'wsrep_sst_receive_address'};
PTDEBUG && _d($dbh, $sql);
my (undef, $receive_addr) = $dbh->selectrow_array($sql);
if ( !$receive_addr ) {
PTDEBUG && _d(q{Query returned nothing, assuming that it's },
q{not a duplicate});
push @unique_cxns, $cxn;
}
elsif ( $addresses{$receive_addr}++ ) {
PTDEBUG && _d('Removing ', $cxn->name, 'from slaves',
'because we already have a node from this address');
if ( ! $seen_ids->{$id}++ ) {
push @trimmed_cxns, $cxn
}
else {
push @unique_cxns, $cxn;
PTDEBUG && _d("Removing ", $cxn->name,
", ID $id, because we've already seen it");
}
}
return @unique_cxns;
return @trimmed_cxns;
}
sub same_cluster {
@@ -3507,6 +3500,56 @@ sub same_cluster {
return ($cluster1 || '') eq ($cluster2 || '');
}
sub autodetect_nodes {
my ($self, %args) = @_;
my $ms = $args{MasterSlave};
my $dp = $args{DSNParser};
my $make_cxn = $args{make_cxn};
my $nodes = $args{nodes};
my $seen_ids = $args{seen_ids};
return unless @$nodes;
my @new_nodes;
for my $node ( @$nodes ) {
my @nodes = $self->find_cluster_nodes(
dbh => $node->dbh(),
dsn => $node->dsn(),
make_cxn => $make_cxn,
DSNParser => $dp,
);
push @new_nodes, @nodes;
}
@new_nodes = $self->remove_duplicate_cxns(
cxns => \@new_nodes,
seen_ids => $seen_ids
);
my @new_slaves;
foreach my $node (@new_nodes) {
my $node_slaves = $ms->get_slaves(
dbh => $node->dbh(),
dsn => $node->dsn(),
make_cxn => $make_cxn,
);
push @new_slaves, @$node_slaves;
}
@new_slaves = $self->remove_duplicate_cxns(
cxns => \@new_slaves,
seen_ids => $seen_ids
);
my @new_slave_nodes = grep { $self->is_cluster_node($_) } @new_slaves;
return @new_nodes, @new_slaves,
$self->autodetect_nodes(
%args,
nodes => \@new_slave_nodes,
);
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
@@ -8766,63 +8809,29 @@ sub main {
make_cxn => $make_cxn_cluster,
);
if ( $o->get('recursion-method') !~ /^dsn/i ) {
my %seen;
my @new_slaves;
for my $slave ( @$slaves ) {
next unless $cluster->is_cluster_node($slave);
my @nodes = $cluster->find_cluster_nodes(
dbh => $slave->dbh(),
dsn => $slave->dsn(),
make_cxn => $make_cxn_cluster,
DSNParser => $dp,
);
@nodes = grep { !$seen{$dp->as_string($_->dsn)}++ }
grep { !$cluster->same_node($slave, $_) } @nodes;
push @new_slaves, @nodes;
foreach my $node (@nodes) {
my $node_slaves = $ms->get_slaves(
dbh => $node->dbh(),
dsn => $node->dsn(),
make_cxn => $make_cxn_cluster,
);
push @new_slaves, @$node_slaves;
}
($master_cxn, @new_slaves) =
$cluster->remove_duplicate_cxns($master_cxn, @new_slaves);
}
push @$slaves, @new_slaves;
my %seen_ids;
for my $cxn ($master_cxn, @$slaves) {
my $dbh = $cxn->dbh();
my $sql = q{SELECT @@SERVER_ID};
PTDEBUG && _d($cxn, $dbh, $sql);
my ($id) = $dbh->selectrow_array($sql);
$seen_ids{$id}++;
}
if ( $cluster_name_for{$master_cxn} ) {
if ( $o->get('recursion-method') !~ /^dsn/i ) {
my @nodes = $cluster->find_cluster_nodes(
dbh => $master_dbh,
dsn => $master_dsn,
make_cxn => $make_cxn_cluster,
DSNParser => $dp,
);
@nodes = grep { !$cluster->same_node($master_cxn, $_) } @nodes;
push @$slaves, @nodes;
foreach my $node (@nodes) {
my $node_slaves = $ms->get_slaves(
dbh => $node->dbh(),
dsn => $node->dsn(),
make_cxn => $make_cxn_cluster,
);
push @$slaves, @$node_slaves;
}
}
my @pruned_slaves;
($master_cxn, @pruned_slaves) =
$cluster->remove_duplicate_cxns($master_cxn, @$slaves);
$slaves = \@pruned_slaves;
my $dsn = grep { /^dsn/i } @{$o->get('recursion-method')};
if ( !$dsn && $o->get('autodetect-nodes') ) {
my @known_nodes = grep { $cluster_name_for{$_} } $master_cxn, @$slaves;
push @$slaves, $cluster->autodetect_nodes(
nodes => \@known_nodes,
MasterSlave => $ms,
DSNParser => $dp,
make_cxn => $make_cxn_cluster,
seen_ids => \%seen_ids,
);
}
($master_cxn, @$slaves) = remove_duplicate_cxns( $master_cxn, @$slaves);
PTDEBUG && _d(scalar @$slaves, 'slaves found');
# https://bugs.launchpad.net/percona-toolkit/+bug/938068
@@ -8917,7 +8926,7 @@ sub main {
warn "Diffs will only be detected if the cluster is "
. "consistent with " . $direct_slave->name . " because "
. $master_cxn->name . " is a traditional replication master "
. " but these replicas are cluster nodes:\n"
. "but these replicas are cluster nodes:\n"
. join("\n", map { ' ' . $_->name } @nodes) . "\n"
. "For more information, please read the Percona XtraDB "
. "Cluster section of the tool's documentation.\n";
@@ -9835,6 +9844,31 @@ sub main {
# ############################################################################
# Subroutines
# ############################################################################
sub remove_duplicate_cxns {
my (@cxns) = @_;
PTDEBUG && _d("Removing duplicates from ", join " ", map { $_->name } @cxns);
my @trimmed_cxns;
my %seen;
for my $cxn ( @cxns ) {
my $dbh = $cxn->dbh();
my $sql = q{SELECT @@SERVER_ID};
PTDEBUG && _d($sql);
my ($id) = $dbh->selectrow_array($sql);
PTDEBUG && _d('Server ID for ', $cxn->name, ': ', $id);
if ( ! $seen{$id}++ ) {
push @trimmed_cxns, $cxn
}
else {
PTDEBUG && _d("Removing ", $cxn->name,
", ID $id, because we've already seen it");
}
}
return @trimmed_cxns;
}
sub ts {
my ($msg) = @_;
my ($s, $m, $h, $d, $M) = localtime;
@@ -11063,6 +11097,12 @@ group: Connection
Prompt for a password when connecting to MySQL.
=item --[no]autodetect-nodes
default: yes
Try to automatically find other cluster nodes. TODO TODO.
=item --[no]check-binlog-format
default: yes

View File

@@ -76,7 +76,8 @@ sub find_cluster_nodes {
# Ostensibly the caller should've done this already, but
# useful for safety.
$dp->fill_in_dsn($dbh, $dsn);
# TODO this fails with a strange error.
#$dp->fill_in_dsn($dbh, $dsn);
my $sql = q{SHOW STATUS LIKE 'wsrep_incoming_addresses'};
PTDEBUG && _d($sql);
@@ -131,39 +132,29 @@ sub find_cluster_nodes {
# So try to detect and remove those.
sub remove_duplicate_cxns {
my ($self, @cxns) = @_;
my %addresses;
my ($self, %args) = @_;
my @cxns = @{$args{cxns}};
my $seen_ids = $args{seen_ids};
PTDEBUG && _d("Removing duplicates from ", join " ", map { $_->name } @cxns);
my @trimmed_cxns;
my @unique_cxns;
CXN:
foreach my $cxn ( @cxns ) {
# If not a cluster node, assume that it's unique
if ( !$self->is_cluster_node($cxn) ) {
push @unique_cxns, $cxn;
next CXN;
}
for my $cxn ( @cxns ) {
my $dbh = $cxn->dbh();
my $sql = q{SELECT @@SERVER_ID};
PTDEBUG && _d($sql);
my ($id) = $dbh->selectrow_array($sql);
PTDEBUG && _d('Server ID for ', $cxn->name, ': ', $id);
# Otherwise, check that it only shows up once.
my $dbh = $cxn->dbh();
my $sql = q{SHOW VARIABLES LIKE 'wsrep_sst_receive_address'};
PTDEBUG && _d($dbh, $sql);
my (undef, $receive_addr) = $dbh->selectrow_array($sql);
if ( !$receive_addr ) {
PTDEBUG && _d(q{Query returned nothing, assuming that it's },
q{not a duplicate});
push @unique_cxns, $cxn;
}
elsif ( $addresses{$receive_addr}++ ) {
PTDEBUG && _d('Removing ', $cxn->name, 'from slaves',
'because we already have a node from this address');
if ( ! $seen_ids->{$id}++ ) {
push @trimmed_cxns, $cxn
}
else {
push @unique_cxns, $cxn;
PTDEBUG && _d("Removing ", $cxn->name,
", ID $id, because we've already seen it");
}
}
return @unique_cxns;
return @trimmed_cxns;
}
sub same_cluster {
@@ -178,6 +169,56 @@ sub same_cluster {
return ($cluster1 || '') eq ($cluster2 || '');
}
sub autodetect_nodes {
my ($self, %args) = @_;
my $ms = $args{MasterSlave};
my $dp = $args{DSNParser};
my $make_cxn = $args{make_cxn};
my $nodes = $args{nodes};
my $seen_ids = $args{seen_ids};
return unless @$nodes;
my @new_nodes;
for my $node ( @$nodes ) {
my @nodes = $self->find_cluster_nodes(
dbh => $node->dbh(),
dsn => $node->dsn(),
make_cxn => $make_cxn,
DSNParser => $dp,
);
push @new_nodes, @nodes;
}
@new_nodes = $self->remove_duplicate_cxns(
cxns => \@new_nodes,
seen_ids => $seen_ids
);
my @new_slaves;
foreach my $node (@new_nodes) {
my $node_slaves = $ms->get_slaves(
dbh => $node->dbh(),
dsn => $node->dsn(),
make_cxn => $make_cxn,
);
push @new_slaves, @$node_slaves;
}
@new_slaves = $self->remove_duplicate_cxns(
cxns => \@new_slaves,
seen_ids => $seen_ids
);
my @new_slave_nodes = grep { $self->is_cluster_node($_) } @new_slaves;
return @new_nodes, @new_slaves,
$self->autodetect_nodes(
%args,
nodes => \@new_slave_nodes,
);
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }

View File

@@ -25,6 +25,8 @@ require "$trunk/bin/pt-table-checksum";
# Do this after requiring ptc, since it uses Mo
require VersionParser;
my $ip = qr/\Q127.1\E|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/;
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $node1 = $sb->get_dbh_for('node1');
@@ -74,40 +76,47 @@ $node1->do(qq/INSERT INTO dsns.dsns VALUES (1, 1, '$node1_dsn')/);
# if no other cluster nodes are detected, in which case the user
# probably didn't specifying --recursion-method dsn.
$output = output(
sub { pt_table_checksum::main(@args) },
sub { pt_table_checksum::main(@args, qw(--no-autodetect-nodes)) },
stderr => 1,
);
like(
$output,
qr/h=127.1,P=12345 is a cluster node but no other nodes/,
qr/h=127(?:\Q.0.0\E)?.1,P=12345 is a cluster node but no other nodes/,
"Dies if no other nodes are found"
);
$output = output(
sub { pt_table_checksum::main(@args,
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns")
},
stderr => 1,
);
for my $args (
["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"],
["autodetecting everything"]
)
{
my $test = shift @$args;
$output = output(
sub { pt_table_checksum::main(@args,
@$args)
},
stderr => 1,
);
is(
PerconaTest::count_checksum_results($output, 'errors'),
0,
"No diffs: no errors"
);
is(
PerconaTest::count_checksum_results($output, 'errors'),
0,
"No diffs: no errors ($test)"
);
is(
PerconaTest::count_checksum_results($output, 'skipped'),
0,
"No diffs: no skips"
);
is(
PerconaTest::count_checksum_results($output, 'skipped'),
0,
"No diffs: no skips ($test)"
);
is(
PerconaTest::count_checksum_results($output, 'diffs'),
0,
"No diffs: no diffs"
);
is(
PerconaTest::count_checksum_results($output, 'diffs'),
0,
"No diffs: no diffs ($test)"
);
}
# Now really test checksumming a cluster. To create a diff we have to disable
# the binlog. Although PXC doesn't need or use the binlog to communicate
@@ -140,45 +149,53 @@ is(
"Node3 not changed"
);
$output = output(
sub { pt_table_checksum::main(@args,
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns")
},
stderr => 1,
);
for my $args (
["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"],
["autodetecting everything"]
)
{
my $test = shift @$args;
is(
PerconaTest::count_checksum_results($output, 'errors'),
0,
"1 diff: no errors"
);
$output = output(
sub { pt_table_checksum::main(@args,
@$args)
},
stderr => 1,
);
is(
PerconaTest::count_checksum_results($output, 'skipped'),
0,
"1 diff: no skips"
);
is(
PerconaTest::count_checksum_results($output, 'errors'),
0,
"1 diff: no errors ($test)"
);
is(
PerconaTest::count_checksum_results($output, 'diffs'),
1,
"1 diff: 1 diff"
) or diag($output);
is(
PerconaTest::count_checksum_results($output, 'skipped'),
0,
"1 diff: no skips ($test)"
);
# 11-17T13:02:54 0 1 26 1 0 0.021 test.t
like(
$output,
qr/^\S+\s+ # ts
0\s+ # errors
1\s+ # diffs
26\s+ # rows
\d+\s+ # chunks
0\s+ # skipped
\S+\s+ # time
test.t$ # table
/xm,
"1 diff: it's in test.t"
);
is(
PerconaTest::count_checksum_results($output, 'diffs'),
1,
"1 diff: 1 diff ($test)"
) or diag($output);
# 11-17T13:02:54 0 1 26 1 0 0.021 test.t
like(
$output,
qr/^\S+\s+ # ts
0\s+ # errors
1\s+ # diffs
26\s+ # rows
\d+\s+ # chunks
0\s+ # skipped
\S+\s+ # time
test.t$ # table
/xm,
"1 diff: it's in test.t ($test)"
);
}
# #############################################################################
# cluster, node1 -> slave, run on node1
@@ -210,34 +227,42 @@ $slave_dbh->do("update test.t set c='zebra' where c='z'");
# https://bugs.launchpad.net/percona-toolkit/+bug/1080385
# Cluster nodes default to ROW format because that's what Galeara
# works best with, even though it doesn't really use binlogs.
$output = output(
sub { pt_table_checksum::main(@args,
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns")
},
stderr => 1,
);
for my $args (
["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"],
["autodetecting everything"]
)
{
my $test = shift @$args;
$output = output(
sub { pt_table_checksum::main(@args,
@$args)
},
stderr => 1,
);
like(
$output,
qr/replica h=127.1,P=12348 has binlog_format ROW/,
"--check-binlog-format warns about slave's binlog format"
);
like(
$output,
qr/replica h=127(?:\Q.0.0\E)?\.1,P=12348 has binlog_format ROW/,
"--check-binlog-format warns about slave's binlog format ($test)"
);
# Now really test that diffs on the slave are detected.
$output = output(
sub { pt_table_checksum::main(@args,
@$args,
qw(--no-check-binlog-format)),
},
stderr => 1,
);
# Now really test that diffs on the slave are detected.
$output = output(
sub { pt_table_checksum::main(@args,
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns",
qw(--no-check-binlog-format)),
},
stderr => 1,
);
is(
PerconaTest::count_checksum_results($output, 'diffs'),
1,
"Detects diffs on slave of cluster node1"
) or diag($output);
is(
PerconaTest::count_checksum_results($output, 'diffs'),
1,
"Detects diffs on slave of cluster node1 ($test)"
) or diag($output);
}
$slave_dbh->disconnect;
$sb->stop_sandbox('cslave1');
@@ -272,21 +297,28 @@ is(
"Slave is changed"
);
for my
$output = output(
sub { pt_table_checksum::main(@args,
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns",
qw(--no-check-binlog-format -d test)),
},
stderr => 1,
);
for my $args (
["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"],
["autodetecting everything"]
)
{
my $test = shift @$args;
is(
PerconaTest::count_checksum_results($output, 'diffs'),
0,
"Limitation: does not detect diffs on slave of cluster node2"
) or diag($output);
$output = output(
sub { pt_table_checksum::main(@args,
@$args,
qw(--no-check-binlog-format -d test)),
},
stderr => 1,
);
is(
PerconaTest::count_checksum_results($output, 'diffs'),
0,
"Limitation: does not detect diffs on slave of cluster node2 ($test)"
) or diag($output);
}
$slave_dbh->disconnect;
$sb->stop_sandbox('cslave1');
@@ -442,19 +474,27 @@ like(
like(
$output,
qr/the direct replica of h=127.1,P=12349 was not found or specified/,
qr/the direct replica of h=$ip,P=12349 was not found or specified/,
"Warns that direct replica of the master isn't found or specified",
);
# Originally, these tested a dsn table with all nodes.
# Use the other DSN table with all three nodes. Now the tool should
# give a more specific warning than that ^.
# Originally, these tested a dsn table with all nodes; now we hijack
# those tests to also try the autodetection
for my $args (
["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"],
["autodetecting everything"]
)
{
my $test = shift @$args;
# Make a diff on node1. If ptc is really auto-detecting node1, then it
# should report this diff.
$node1->do("set sql_log_bin=0");
$node1->do("update test.t set c='zebra' where c='z'");
$node1->do("set sql_log_bin=1");
$output = output(
sub { pt_table_checksum::main($master_dsn,
@$args,
@@ -486,7 +526,7 @@ for my $args (
like(
$output,
qr/Diffs will only be detected if the cluster is consistent with h=127.1,P=12345 because h=127.1,P=12349/,
qr/Diffs will only be detected if the cluster is consistent with h=$ip,P=12345 because h=$ip,P=12349/,
"Warns that diffs only detected if cluster consistent with direct replica ($test)",
);
@@ -574,32 +614,40 @@ $sb->load_file('node4', "$sample/a-z.sql");
# Add node4 in the cluster2 to the DSN table.
$node1->do(qq/INSERT INTO dsns.dsns VALUES (5, null, '$c->{node4}->{dsn}')/);
$output = output(
sub { pt_table_checksum::main(@args,
'--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns",
qw(-d test))
},
stderr => 1,
);
for my $args (
["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"],
["autodetecting everything"]
)
{
my $test = shift @$args;
like(
$output,
qr/h=127.1,P=12345 is in cluster pt_sandbox_cluster/,
"Detects that node1 is in pt_sandbox_cluster"
);
$output = output(
sub { pt_table_checksum::main(@args,
@$args,
qw(-d test))
},
stderr => 1,
);
like(
$output,
qr/h=127.1,P=2900 is in cluster cluster2/,
"Detects that node4 is in cluster2"
);
like(
$output,
qr/h=127(?:\Q.0.0\E)?.1,P=12345 is in cluster pt_sandbox_cluster/,
"Detects that node1 is in pt_sandbox_cluster ($test)"
);
unlike(
$output,
qr/test/,
"Different clusters, no results"
);
like(
$output,
qr/h=127(?:\Q.0.0\E)?.1,P=2900 is in cluster cluster2/,
"Detects that node4 is in cluster2 ($test)"
);
unlike(
$output,
qr/test/,
"Different clusters, no results ($test)"
);
}
$sb->stop_sandbox(qw(node4 node5 node6));
# Restore the DSN table in case there are more tests.