From 7ae20036cb244f27d4321bd328f7e2cd41876bbf Mon Sep 17 00:00:00 2001 From: "Brian Fraser fraserb@gmail.com" <> Date: Fri, 23 Nov 2012 12:50:25 -0300 Subject: [PATCH 1/5] PXC: Implement find_cluster_nodes and remove_duplicate_cxns --- bin/pt-table-checksum | 118 ++++++++++++++++++++++++++++------ lib/Percona/XtraDB/Cluster.pm | 92 ++++++++++++++++++++++++++ 2 files changed, 190 insertions(+), 20 deletions(-) diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum index f476a536..39701ca3 100755 --- a/bin/pt-table-checksum +++ b/bin/pt-table-checksum @@ -3418,6 +3418,85 @@ sub same_node { return ($val1 || '') eq ($val2 || ''); } +sub find_cluster_nodes { + my ($self, %args) = @_; + + my $dbh = $args{dbh}; + my $dsn = $args{dsn}; + my $dp = $args{DSNParser}; + my $make_cxn = $args{make_cxn}; + + + my $sql = q{SHOW STATUS LIKE 'wsrep_incoming_addresses'}; + PTDEBUG && _d($sql); + my (undef, $addresses) = $dbh->selectrow_array($sql); + PTDEBUG && _d("Cluster nodes found: ", $addresses); + return unless $addresses; + + my @addresses = grep !/\Aunspecified\z/i, + split /,\s*/, $addresses; + + my @nodes; + foreach my $address ( @addresses ) { + my ($host, $port) = split /:/, $address; + my $spec = "h=$host" + . ($port ? ",P=$port" : ""); + my $node_dsn = $dp->parse($spec, $dsn); + my $node_dbh = eval { + $dp->get_dbh( + $dp->get_cxn_params($node_dsn), { AutoCommit => 1 }); + PTDEBUG && _d('Connected to', $dp->as_string($node_dsn)); + }; + if ( $EVAL_ERROR ) { + print STDERR "Cannot connect to ", $dp->as_string($node_dsn), + ", discovered through $sql: $EVAL_ERROR\n"; + next; + } + $node_dbh->disconnect(); + + push @nodes, $make_cxn->(dsn => $node_dsn); + } + + return @nodes; +} + + +sub remove_duplicate_cxns { + my ($self, @cxns) = @_; + my %addresses; + + my @unique_cxns; + CXN: + foreach my $cxn ( @cxns ) { + if ( !$self->cluster_node($cxn) ) { + push @unique_cxns, $cxn; + next CXN; + } + + my $dbh = $cxn->dbh(); + my $sql = q{SHOW VARIABLES LIKE 'wsrep_sst_receive_address'}; + PTDEBUG && _d($dbh, $sql); + my (undef, $receive_addr) = $dbh->selectrow_array(); + + if ( !$receive_addr ) { + PTDEBUG && _d(q{Query returned nothing, assuming that it's }, + q{not a duplicate}); + push @unique_cxns, $cxn; + } + elsif ( $addresses{$receive_addr}++ ) { + PTDEBUG && _d('Removing ', $cxn->name, 'from slaves', + 'because we already have a node from this address'); + } + else { + push @unique_cxns, $cxn; + } + + } + warn "<@cxns>"; + warn "<@unique_cxns>"; + return @unique_cxns; +} + sub same_cluster { my ($self, $cxn1, $cxn2) = @_; @@ -8676,33 +8755,32 @@ sub main { # ##################################################################### # Find and connect to slaves. # ##################################################################### + my $make_cxn_cluster = sub { + my $cxn = $make_cxn->(@_, prev_dsn => $master_cxn->dsn()); + $cluster_name_for{$cxn} = $cluster->is_cluster_node($cxn); + return $cxn; + }; + $slaves = $ms->get_slaves( dbh => $master_dbh, dsn => $master_dsn, - make_cxn => sub { - my $cxn = $make_cxn->(@_, prev_dsn => $master_cxn->dsn()); - $cluster_name_for{$cxn} = $cluster->is_cluster_node($cxn); - return $cxn; - }, + make_cxn => $make_cxn_cluster, ); - # If the "master" is a cluster node, then a DSN table should have been - # used, and it may have all nodes' DSNs so the user can run the tool - # on any node, in which case it has the "master" node, the DSN given - # on the command line. So detect and remove this dupe. if ( $cluster_name_for{$master_cxn} ) { - @$slaves = grep { - my $slave_cxn = $_; - if ( $cluster->same_node($master_cxn, $slave_cxn) ) { - PTDEBUG && _d('Removing ', $slave_cxn->name, 'from slaves', - 'because it is the master'); - 0; - } - else { - $slave_cxn; - } - } @$slaves; + push @$slaves, $cluster->find_cluster_nodes( + dbh => $master_dbh, + dsn => $master_dsn, + make_cxn => $make_cxn_cluster, + DSNParser => $dp, + ); + + my @pruned_slaves; + ($master_cxn, @pruned_slaves) = + $cluster->remove_duplicate_cxns($master_cxn, @$slaves); + $slaves = \@pruned_slaves; } + PTDEBUG && _d(scalar @$slaves, 'slaves found'); # https://bugs.launchpad.net/percona-toolkit/+bug/938068 diff --git a/lib/Percona/XtraDB/Cluster.pm b/lib/Percona/XtraDB/Cluster.pm index ee3c3c0c..81d6f3e4 100644 --- a/lib/Percona/XtraDB/Cluster.pm +++ b/lib/Percona/XtraDB/Cluster.pm @@ -63,6 +63,98 @@ sub same_node { return ($val1 || '') eq ($val2 || ''); } +# TODO: Check that the PXC version supports wsrep_incoming_addresses +sub find_cluster_nodes { + my ($self, %args) = @_; + + my $dbh = $args{dbh}; + my $dsn = $args{dsn}; + my $dp = $args{DSNParser}; + my $make_cxn = $args{make_cxn}; + + + my $sql = q{SHOW STATUS LIKE 'wsrep_incoming_addresses'}; + PTDEBUG && _d($sql); + my (undef, $addresses) = $dbh->selectrow_array($sql); + PTDEBUG && _d("Cluster nodes found: ", $addresses); + return unless $addresses; + + my @addresses = grep !/\Aunspecified\z/i, + split /,\s*/, $addresses; + + my @nodes; + foreach my $address ( @addresses ) { + my ($host, $port) = split /:/, $address; + my $spec = "h=$host" + . ($port ? ",P=$port" : ""); + my $node_dsn = $dp->parse($spec, $dsn); + my $node_dbh = eval { + $dp->get_dbh( + $dp->get_cxn_params($node_dsn), { AutoCommit => 1 }); + PTDEBUG && _d('Connected to', $dp->as_string($node_dsn)); + }; + if ( $EVAL_ERROR ) { + print STDERR "Cannot connect to ", $dp->as_string($node_dsn), + ", discovered through $sql: $EVAL_ERROR\n"; + next; + } + $node_dbh->disconnect(); + + push @nodes, $make_cxn->(dsn => $node_dsn); + } + + return @nodes; +} + +# There's two reasons why there might be dupes: +# If the "master" is a cluster node, then a DSN table might have been +# used, and it may have all nodes' DSNs so the user can run the tool +# on any node, in which case it has the "master" node, the DSN given +# on the command line. +# On the other hand, maybe find_cluster_nodes worked, in which case +# we definitely have a dupe for the master cxn, but we may also have a +# dupe for every other node if this was unsed in conjunction with a +# DSN table. +# So try to detect and remove those. + +sub remove_duplicate_cxns { + my ($self, @cxns) = @_; + my %addresses; + + my @unique_cxns; + CXN: + foreach my $cxn ( @cxns ) { + # If not a cluster node, assume that it's unique + if ( !$self->cluster_node($cxn) ) { + push @unique_cxns, $cxn; + next CXN; + } + + # Otherwise, check that it only shows up once. + my $dbh = $cxn->dbh(); + my $sql = q{SHOW VARIABLES LIKE 'wsrep_sst_receive_address'}; + PTDEBUG && _d($dbh, $sql); + my (undef, $receive_addr) = $dbh->selectrow_array(); + + if ( !$receive_addr ) { + PTDEBUG && _d(q{Query returned nothing, assuming that it's }, + q{not a duplicate}); + push @unique_cxns, $cxn; + } + elsif ( $addresses{$receive_addr}++ ) { + PTDEBUG && _d('Removing ', $cxn->name, 'from slaves', + 'because we already have a node from this address'); + } + else { + push @unique_cxns, $cxn; + } + + } + warn "<@cxns>"; + warn "<@unique_cxns>"; + return @unique_cxns; +} + sub same_cluster { my ($self, $cxn1, $cxn2) = @_; From 41d7700aa5c62dae1f520e6c7378806c9d4c8d9a Mon Sep 17 00:00:00 2001 From: Brian Fraser Date: Fri, 23 Nov 2012 19:00:38 -0300 Subject: [PATCH 2/5] WIP ptc: Autodetect cluster nodes & recurse to slaves --- bin/pt-table-checksum | 78 +++++++++--- lib/Percona/XtraDB/Cluster.pm | 31 +++-- sandbox/servers/pxc/5.5/my.sandbox.cnf | 2 +- t/pt-table-checksum/pxc.t | 158 +++++++++++++------------ 4 files changed, 166 insertions(+), 103 deletions(-) diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum index 39701ca3..8e50dc20 100755 --- a/bin/pt-table-checksum +++ b/bin/pt-table-checksum @@ -3425,8 +3425,7 @@ sub find_cluster_nodes { my $dsn = $args{dsn}; my $dp = $args{DSNParser}; my $make_cxn = $args{make_cxn}; - - + my $sql = q{SHOW STATUS LIKE 'wsrep_incoming_addresses'}; PTDEBUG && _d($sql); my (undef, $addresses) = $dbh->selectrow_array($sql); @@ -3442,16 +3441,18 @@ sub find_cluster_nodes { my $spec = "h=$host" . ($port ? ",P=$port" : ""); my $node_dsn = $dp->parse($spec, $dsn); - my $node_dbh = eval { - $dp->get_dbh( - $dp->get_cxn_params($node_dsn), { AutoCommit => 1 }); - PTDEBUG && _d('Connected to', $dp->as_string($node_dsn)); - }; + my $node_dbh = eval { $dp->get_dbh( + $dp->get_cxn_params($node_dsn), { AutoCommit => 1 }) }; if ( $EVAL_ERROR ) { print STDERR "Cannot connect to ", $dp->as_string($node_dsn), ", discovered through $sql: $EVAL_ERROR\n"; + if ( !$port && $dsn->{P} != 3306 ) { + $address .= ":3306"; + redo; + } next; } + PTDEBUG && _d('Connected to', $dp->as_string($node_dsn)); $node_dbh->disconnect(); push @nodes, $make_cxn->(dsn => $node_dsn); @@ -3468,7 +3469,7 @@ sub remove_duplicate_cxns { my @unique_cxns; CXN: foreach my $cxn ( @cxns ) { - if ( !$self->cluster_node($cxn) ) { + if ( !$self->is_cluster_node($cxn) ) { push @unique_cxns, $cxn; next CXN; } @@ -3476,7 +3477,7 @@ sub remove_duplicate_cxns { my $dbh = $cxn->dbh(); my $sql = q{SHOW VARIABLES LIKE 'wsrep_sst_receive_address'}; PTDEBUG && _d($dbh, $sql); - my (undef, $receive_addr) = $dbh->selectrow_array(); + my (undef, $receive_addr) = $dbh->selectrow_array($sql); if ( !$receive_addr ) { PTDEBUG && _d(q{Query returned nothing, assuming that it's }, @@ -3492,8 +3493,6 @@ sub remove_duplicate_cxns { } } - warn "<@cxns>"; - warn "<@unique_cxns>"; return @unique_cxns; } @@ -8767,14 +8766,57 @@ sub main { make_cxn => $make_cxn_cluster, ); - if ( $cluster_name_for{$master_cxn} ) { - push @$slaves, $cluster->find_cluster_nodes( - dbh => $master_dbh, - dsn => $master_dsn, - make_cxn => $make_cxn_cluster, - DSNParser => $dp, - ); + if ( $o->get('recursion-method') !~ /^dsn/i ) { + my %seen; + my @new_slaves; + for my $slave ( @$slaves ) { + next unless $cluster->is_cluster_node($slave); + my @nodes = $cluster->find_cluster_nodes( + dbh => $slave->dbh(), + dsn => $slave->dsn(), + make_cxn => $make_cxn_cluster, + DSNParser => $dp, + ); + @nodes = grep { !$seen{$dp->as_string($_->dsn)}++ } + grep { !$cluster->same_node($slave, $_) } @nodes; + push @new_slaves, @nodes; + foreach my $node (@nodes) { + my $node_slaves = $ms->get_slaves( + dbh => $node->dbh(), + dsn => $node->dsn(), + make_cxn => $make_cxn_cluster, + ); + push @new_slaves, @$node_slaves; + } + ($master_cxn, @new_slaves) = + $cluster->remove_duplicate_cxns($master_cxn, @new_slaves); + } + push @$slaves, @new_slaves; + } + + if ( $cluster_name_for{$master_cxn} ) { + if ( $o->get('recursion-method') !~ /^dsn/i ) { + my @nodes = $cluster->find_cluster_nodes( + dbh => $master_dbh, + dsn => $master_dsn, + make_cxn => $make_cxn_cluster, + DSNParser => $dp, + ); + + @nodes = grep { !$cluster->same_node($master_cxn, $_) } @nodes; + push @$slaves, @nodes; + + foreach my $node (@nodes) { + my $node_slaves = $ms->get_slaves( + dbh => $node->dbh(), + dsn => $node->dsn(), + make_cxn => $make_cxn_cluster, + ); + push @$slaves, @$node_slaves; + } + } + my @pruned_slaves; ($master_cxn, @pruned_slaves) = $cluster->remove_duplicate_cxns($master_cxn, @$slaves); diff --git a/lib/Percona/XtraDB/Cluster.pm b/lib/Percona/XtraDB/Cluster.pm index 81d6f3e4..092f3984 100644 --- a/lib/Percona/XtraDB/Cluster.pm +++ b/lib/Percona/XtraDB/Cluster.pm @@ -64,6 +64,8 @@ sub same_node { } # TODO: Check that the PXC version supports wsrep_incoming_addresses +# Not really necessary, actually. But in case it's needed, +# wsrep_provider_version =~ /[0-9]+\.[0-9]+\(r([0-9]+)\)/ && $1 >= 137 sub find_cluster_nodes { my ($self, %args) = @_; @@ -72,7 +74,10 @@ sub find_cluster_nodes { my $dp = $args{DSNParser}; my $make_cxn = $args{make_cxn}; - + # Ostensibly the caller should've done this already, but + # useful for safety. + $dp->fill_in_dsn($dbh, $dsn); + my $sql = q{SHOW STATUS LIKE 'wsrep_incoming_addresses'}; PTDEBUG && _d($sql); my (undef, $addresses) = $dbh->selectrow_array($sql); @@ -88,16 +93,24 @@ sub find_cluster_nodes { my $spec = "h=$host" . ($port ? ",P=$port" : ""); my $node_dsn = $dp->parse($spec, $dsn); - my $node_dbh = eval { - $dp->get_dbh( - $dp->get_cxn_params($node_dsn), { AutoCommit => 1 }); - PTDEBUG && _d('Connected to', $dp->as_string($node_dsn)); - }; + my $node_dbh = eval { $dp->get_dbh( + $dp->get_cxn_params($node_dsn), { AutoCommit => 1 }) }; if ( $EVAL_ERROR ) { print STDERR "Cannot connect to ", $dp->as_string($node_dsn), ", discovered through $sql: $EVAL_ERROR\n"; + # This is a bit strange, so an explanation is called for. + # If there wasn't a port, that means that this bug + # https://bugs.launchpad.net/percona-toolkit/+bug/1082406 + # isn't fixed on this version of PXC. We tried using the + # master's port, but that didn't work. So try again, using + # the default port. + if ( !$port && $dsn->{P} != 3306 ) { + $address .= ":3306"; + redo; + } next; } + PTDEBUG && _d('Connected to', $dp->as_string($node_dsn)); $node_dbh->disconnect(); push @nodes, $make_cxn->(dsn => $node_dsn); @@ -125,7 +138,7 @@ sub remove_duplicate_cxns { CXN: foreach my $cxn ( @cxns ) { # If not a cluster node, assume that it's unique - if ( !$self->cluster_node($cxn) ) { + if ( !$self->is_cluster_node($cxn) ) { push @unique_cxns, $cxn; next CXN; } @@ -134,7 +147,7 @@ sub remove_duplicate_cxns { my $dbh = $cxn->dbh(); my $sql = q{SHOW VARIABLES LIKE 'wsrep_sst_receive_address'}; PTDEBUG && _d($dbh, $sql); - my (undef, $receive_addr) = $dbh->selectrow_array(); + my (undef, $receive_addr) = $dbh->selectrow_array($sql); if ( !$receive_addr ) { PTDEBUG && _d(q{Query returned nothing, assuming that it's }, @@ -150,8 +163,6 @@ sub remove_duplicate_cxns { } } - warn "<@cxns>"; - warn "<@unique_cxns>"; return @unique_cxns; } diff --git a/sandbox/servers/pxc/5.5/my.sandbox.cnf b/sandbox/servers/pxc/5.5/my.sandbox.cnf index 8bf4a692..ad2673d9 100644 --- a/sandbox/servers/pxc/5.5/my.sandbox.cnf +++ b/sandbox/servers/pxc/5.5/my.sandbox.cnf @@ -31,7 +31,7 @@ binlog_format = ROW wsrep_provider = LIBGALERA wsrep_cluster_address = CLUSTER_AD wsrep_sst_receive_address = ADDR:RECEIVE_PRT -wsrep_node_incoming_address= ADDR +wsrep_node_incoming_address= ADDR:PORT wsrep_slave_threads = 2 wsrep_cluster_name = CLUSTER_NAME wsrep_provider_options = "gmcast.listen_addr=tcp://ADDR:LISTEN_PRT;" diff --git a/t/pt-table-checksum/pxc.t b/t/pt-table-checksum/pxc.t index df833958..31c3bced 100644 --- a/t/pt-table-checksum/pxc.t +++ b/t/pt-table-checksum/pxc.t @@ -272,6 +272,7 @@ is( "Slave is changed" ); +for my $output = output( sub { pt_table_checksum::main(@args, '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns", @@ -445,92 +446,101 @@ like( "Warns that direct replica of the master isn't found or specified", ); +# Originally, these tested a dsn table with all nodes. # Use the other DSN table with all three nodes. Now the tool should # give a more specific warning than that ^. -$output = output( - sub { pt_table_checksum::main($master_dsn, - '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns", - qw(-d test)) - }, - stderr => 1, -); +for my $args ( + ["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], + ["autodetecting everything"] + ) +{ + my $test = shift @$args; + $output = output( + sub { pt_table_checksum::main($master_dsn, + @$args, + qw(-d test)) + }, + stderr => 1, + ); -is( - PerconaTest::count_checksum_results($output, 'diffs'), - 1, - "...check all nodes: 1 diff" -) or diag($output); + is( + PerconaTest::count_checksum_results($output, 'diffs'), + 1, + "...check all nodes: 1 diff ($test)" + ) or diag($output); -# 11-17T13:02:54 0 1 26 1 0 0.021 test.t -like( - $output, - qr/^\S+\s+ # ts - 0\s+ # errors - 1\s+ # diffs - 26\s+ # rows - \d+\s+ # chunks - 0\s+ # skipped - \S+\s+ # time - test.t$ # table - /xm, - "...check all nodes: it's in test.t" -); + # 11-17T13:02:54 0 1 26 1 0 0.021 test.t + like( + $output, + qr/^\S+\s+ # ts + 0\s+ # errors + 1\s+ # diffs + 26\s+ # rows + \d+\s+ # chunks + 0\s+ # skipped + \S+\s+ # time + test.t$ # table + /xm, + "...check all nodes: it's in test.t ($test)" + ); -like( - $output, - qr/Diffs will only be detected if the cluster is consistent with h=127.1,P=12345 because h=127.1,P=12349/, - "Warns that diffs only detected if cluster consistent with direct replica", -); + like( + $output, + qr/Diffs will only be detected if the cluster is consistent with h=127.1,P=12345 because h=127.1,P=12349/, + "Warns that diffs only detected if cluster consistent with direct replica ($test)", + ); -# Restore node1 so the cluster is consistent, but then make node2 differ. -# ptc should NOT detect this diff because the checksum query will replicate -# to node1, node1 isn't different, so it broadcasts the result in ROW format -# that all is ok, which node2 gets and thus false reports. This is why -# those ^ warnings exist. -$node1->do("set sql_log_bin=0"); -$node1->do("update test.t set c='z' where c='zebra'"); -$node1->do("set sql_log_bin=1"); + # Restore node1 so the cluster is consistent, but then make node2 differ. + # ptc should NOT detect this diff because the checksum query will replicate + # to node1, node1 isn't different, so it broadcasts the result in ROW format + # that all is ok, which node2 gets and thus false reports. This is why + # those ^ warnings exist. + $node1->do("set sql_log_bin=0"); + $node1->do("update test.t set c='z' where c='zebra'"); + $node1->do("set sql_log_bin=1"); -$node2->do("set sql_log_bin=0"); -$node2->do("update test.t set c='zebra' where c='z'"); -$node2->do("set sql_log_bin=1"); + $node2->do("set sql_log_bin=0"); + $node2->do("update test.t set c='zebra' where c='z'"); + $node2->do("set sql_log_bin=1"); -($row) = $node2->selectrow_array("select c from test.t order by c desc limit 1"); -is( - $row, - "zebra", - "Node2 is changed again" -); + ($row) = $node2->selectrow_array("select c from test.t order by c desc limit 1"); + is( + $row, + "zebra", + "Node2 is changed again ($test)" + ); -($row) = $node1->selectrow_array("select c from test.t order by c desc limit 1"); -is( - $row, - "z", - "Node1 not changed again" -); + ($row) = $node1->selectrow_array("select c from test.t order by c desc limit 1"); + is( + $row, + "z", + "Node1 not changed again ($test)" + ); -($row) = $node3->selectrow_array("select c from test.t order by c desc limit 1"); -is( - $row, - "z", - "Node3 not changed again" -); + ($row) = $node3->selectrow_array("select c from test.t order by c desc limit 1"); + is( + $row, + "z", + "Node3 not changed again ($test)" + ); -# the other DSN table with all three nodes, but it won't matter because -# node1 is going to broadcast the false-positive that there are no diffs. -$output = output( - sub { pt_table_checksum::main($master_dsn, - '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns", - qw(-d test)) - }, - stderr => 1, -); + # the other DSN table with all three nodes, but it won't matter because + # node1 is going to broadcast the false-positive that there are no diffs. + $output = output( + sub { pt_table_checksum::main($master_dsn, + @$args, + qw(-d test)) + }, + stderr => 1, + ); -is( - PerconaTest::count_checksum_results($output, 'diffs'), - 0, - "Limitation: diff not on direct replica not detected" -) or diag($output); + is( + PerconaTest::count_checksum_results($output, 'diffs'), + 0, + "Limitation: diff not on direct replica not detected ($test)" + ) or diag($output); + +} # ########################################################################### # Be sure to stop the slave on node1, else further test will die with: From 79a5c39cec6ead4b1ea43fe710e990573e65c8c6 Mon Sep 17 00:00:00 2001 From: "Brian Fraser fraserb@gmail.com" <> Date: Sun, 25 Nov 2012 18:51:06 -0300 Subject: [PATCH 3/5] WIP Autodetect nodes & recurse to find new slaves --- bin/pt-table-checksum | 198 ++++++++++++++--------- lib/Percona/XtraDB/Cluster.pm | 97 ++++++++---- t/pt-table-checksum/pxc.t | 290 ++++++++++++++++++++-------------- 3 files changed, 357 insertions(+), 228 deletions(-) diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum index 8e50dc20..3e95bbab 100755 --- a/bin/pt-table-checksum +++ b/bin/pt-table-checksum @@ -3425,6 +3425,7 @@ sub find_cluster_nodes { my $dsn = $args{dsn}; my $dp = $args{DSNParser}; my $make_cxn = $args{make_cxn}; + my $sql = q{SHOW STATUS LIKE 'wsrep_incoming_addresses'}; PTDEBUG && _d($sql); @@ -3463,37 +3464,29 @@ sub find_cluster_nodes { sub remove_duplicate_cxns { - my ($self, @cxns) = @_; - my %addresses; + my ($self, %args) = @_; + my @cxns = @{$args{cxns}}; + my $seen_ids = $args{seen_ids}; + PTDEBUG && _d("Removing duplicates from ", join " ", map { $_->name } @cxns); + my @trimmed_cxns; - my @unique_cxns; - CXN: - foreach my $cxn ( @cxns ) { - if ( !$self->is_cluster_node($cxn) ) { - push @unique_cxns, $cxn; - next CXN; - } + for my $cxn ( @cxns ) { + my $dbh = $cxn->dbh(); + my $sql = q{SELECT @@SERVER_ID}; + PTDEBUG && _d($sql); + my ($id) = $dbh->selectrow_array($sql); + PTDEBUG && _d('Server ID for ', $cxn->name, ': ', $id); - my $dbh = $cxn->dbh(); - my $sql = q{SHOW VARIABLES LIKE 'wsrep_sst_receive_address'}; - PTDEBUG && _d($dbh, $sql); - my (undef, $receive_addr) = $dbh->selectrow_array($sql); - - if ( !$receive_addr ) { - PTDEBUG && _d(q{Query returned nothing, assuming that it's }, - q{not a duplicate}); - push @unique_cxns, $cxn; - } - elsif ( $addresses{$receive_addr}++ ) { - PTDEBUG && _d('Removing ', $cxn->name, 'from slaves', - 'because we already have a node from this address'); + if ( ! $seen_ids->{$id}++ ) { + push @trimmed_cxns, $cxn } else { - push @unique_cxns, $cxn; + PTDEBUG && _d("Removing ", $cxn->name, + ", ID $id, because we've already seen it"); } - } - return @unique_cxns; + + return @trimmed_cxns; } sub same_cluster { @@ -3507,6 +3500,56 @@ sub same_cluster { return ($cluster1 || '') eq ($cluster2 || ''); } +sub autodetect_nodes { + my ($self, %args) = @_; + my $ms = $args{MasterSlave}; + my $dp = $args{DSNParser}; + my $make_cxn = $args{make_cxn}; + my $nodes = $args{nodes}; + my $seen_ids = $args{seen_ids}; + + return unless @$nodes; + + my @new_nodes; + for my $node ( @$nodes ) { + my @nodes = $self->find_cluster_nodes( + dbh => $node->dbh(), + dsn => $node->dsn(), + make_cxn => $make_cxn, + DSNParser => $dp, + ); + push @new_nodes, @nodes; + } + + @new_nodes = $self->remove_duplicate_cxns( + cxns => \@new_nodes, + seen_ids => $seen_ids + ); + + my @new_slaves; + foreach my $node (@new_nodes) { + my $node_slaves = $ms->get_slaves( + dbh => $node->dbh(), + dsn => $node->dsn(), + make_cxn => $make_cxn, + ); + push @new_slaves, @$node_slaves; + } + + @new_slaves = $self->remove_duplicate_cxns( + cxns => \@new_slaves, + seen_ids => $seen_ids + ); + + my @new_slave_nodes = grep { $self->is_cluster_node($_) } @new_slaves; + + return @new_nodes, @new_slaves, + $self->autodetect_nodes( + %args, + nodes => \@new_slave_nodes, + ); +} + sub _d { my ($package, undef, $line) = caller 0; @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } @@ -8766,63 +8809,29 @@ sub main { make_cxn => $make_cxn_cluster, ); - if ( $o->get('recursion-method') !~ /^dsn/i ) { - my %seen; - my @new_slaves; - for my $slave ( @$slaves ) { - next unless $cluster->is_cluster_node($slave); - my @nodes = $cluster->find_cluster_nodes( - dbh => $slave->dbh(), - dsn => $slave->dsn(), - make_cxn => $make_cxn_cluster, - DSNParser => $dp, - ); - @nodes = grep { !$seen{$dp->as_string($_->dsn)}++ } - grep { !$cluster->same_node($slave, $_) } @nodes; - push @new_slaves, @nodes; - - foreach my $node (@nodes) { - my $node_slaves = $ms->get_slaves( - dbh => $node->dbh(), - dsn => $node->dsn(), - make_cxn => $make_cxn_cluster, - ); - push @new_slaves, @$node_slaves; - } - ($master_cxn, @new_slaves) = - $cluster->remove_duplicate_cxns($master_cxn, @new_slaves); - } - push @$slaves, @new_slaves; + my %seen_ids; + for my $cxn ($master_cxn, @$slaves) { + my $dbh = $cxn->dbh(); + my $sql = q{SELECT @@SERVER_ID}; + PTDEBUG && _d($cxn, $dbh, $sql); + my ($id) = $dbh->selectrow_array($sql); + $seen_ids{$id}++; } - - if ( $cluster_name_for{$master_cxn} ) { - if ( $o->get('recursion-method') !~ /^dsn/i ) { - my @nodes = $cluster->find_cluster_nodes( - dbh => $master_dbh, - dsn => $master_dsn, - make_cxn => $make_cxn_cluster, - DSNParser => $dp, - ); - @nodes = grep { !$cluster->same_node($master_cxn, $_) } @nodes; - push @$slaves, @nodes; - - foreach my $node (@nodes) { - my $node_slaves = $ms->get_slaves( - dbh => $node->dbh(), - dsn => $node->dsn(), - make_cxn => $make_cxn_cluster, - ); - push @$slaves, @$node_slaves; - } - } - - my @pruned_slaves; - ($master_cxn, @pruned_slaves) = - $cluster->remove_duplicate_cxns($master_cxn, @$slaves); - $slaves = \@pruned_slaves; + my $dsn = grep { /^dsn/i } @{$o->get('recursion-method')}; + if ( !$dsn && $o->get('autodetect-nodes') ) { + my @known_nodes = grep { $cluster_name_for{$_} } $master_cxn, @$slaves; + push @$slaves, $cluster->autodetect_nodes( + nodes => \@known_nodes, + MasterSlave => $ms, + DSNParser => $dp, + make_cxn => $make_cxn_cluster, + seen_ids => \%seen_ids, + ); } + ($master_cxn, @$slaves) = remove_duplicate_cxns( $master_cxn, @$slaves); + PTDEBUG && _d(scalar @$slaves, 'slaves found'); # https://bugs.launchpad.net/percona-toolkit/+bug/938068 @@ -8917,7 +8926,7 @@ sub main { warn "Diffs will only be detected if the cluster is " . "consistent with " . $direct_slave->name . " because " . $master_cxn->name . " is a traditional replication master " - . " but these replicas are cluster nodes:\n" + . "but these replicas are cluster nodes:\n" . join("\n", map { ' ' . $_->name } @nodes) . "\n" . "For more information, please read the Percona XtraDB " . "Cluster section of the tool's documentation.\n"; @@ -9835,6 +9844,31 @@ sub main { # ############################################################################ # Subroutines # ############################################################################ +sub remove_duplicate_cxns { + my (@cxns) = @_; + PTDEBUG && _d("Removing duplicates from ", join " ", map { $_->name } @cxns); + my @trimmed_cxns; + my %seen; + + for my $cxn ( @cxns ) { + my $dbh = $cxn->dbh(); + my $sql = q{SELECT @@SERVER_ID}; + PTDEBUG && _d($sql); + my ($id) = $dbh->selectrow_array($sql); + PTDEBUG && _d('Server ID for ', $cxn->name, ': ', $id); + + if ( ! $seen{$id}++ ) { + push @trimmed_cxns, $cxn + } + else { + PTDEBUG && _d("Removing ", $cxn->name, + ", ID $id, because we've already seen it"); + } + } + + return @trimmed_cxns; +} + sub ts { my ($msg) = @_; my ($s, $m, $h, $d, $M) = localtime; @@ -11063,6 +11097,12 @@ group: Connection Prompt for a password when connecting to MySQL. +=item --[no]autodetect-nodes + +default: yes + +Try to automatically find other cluster nodes. TODO TODO. + =item --[no]check-binlog-format default: yes diff --git a/lib/Percona/XtraDB/Cluster.pm b/lib/Percona/XtraDB/Cluster.pm index 092f3984..2600cc66 100644 --- a/lib/Percona/XtraDB/Cluster.pm +++ b/lib/Percona/XtraDB/Cluster.pm @@ -76,7 +76,8 @@ sub find_cluster_nodes { # Ostensibly the caller should've done this already, but # useful for safety. - $dp->fill_in_dsn($dbh, $dsn); + # TODO this fails with a strange error. + #$dp->fill_in_dsn($dbh, $dsn); my $sql = q{SHOW STATUS LIKE 'wsrep_incoming_addresses'}; PTDEBUG && _d($sql); @@ -131,39 +132,29 @@ sub find_cluster_nodes { # So try to detect and remove those. sub remove_duplicate_cxns { - my ($self, @cxns) = @_; - my %addresses; + my ($self, %args) = @_; + my @cxns = @{$args{cxns}}; + my $seen_ids = $args{seen_ids}; + PTDEBUG && _d("Removing duplicates from ", join " ", map { $_->name } @cxns); + my @trimmed_cxns; - my @unique_cxns; - CXN: - foreach my $cxn ( @cxns ) { - # If not a cluster node, assume that it's unique - if ( !$self->is_cluster_node($cxn) ) { - push @unique_cxns, $cxn; - next CXN; - } + for my $cxn ( @cxns ) { + my $dbh = $cxn->dbh(); + my $sql = q{SELECT @@SERVER_ID}; + PTDEBUG && _d($sql); + my ($id) = $dbh->selectrow_array($sql); + PTDEBUG && _d('Server ID for ', $cxn->name, ': ', $id); - # Otherwise, check that it only shows up once. - my $dbh = $cxn->dbh(); - my $sql = q{SHOW VARIABLES LIKE 'wsrep_sst_receive_address'}; - PTDEBUG && _d($dbh, $sql); - my (undef, $receive_addr) = $dbh->selectrow_array($sql); - - if ( !$receive_addr ) { - PTDEBUG && _d(q{Query returned nothing, assuming that it's }, - q{not a duplicate}); - push @unique_cxns, $cxn; - } - elsif ( $addresses{$receive_addr}++ ) { - PTDEBUG && _d('Removing ', $cxn->name, 'from slaves', - 'because we already have a node from this address'); + if ( ! $seen_ids->{$id}++ ) { + push @trimmed_cxns, $cxn } else { - push @unique_cxns, $cxn; + PTDEBUG && _d("Removing ", $cxn->name, + ", ID $id, because we've already seen it"); } - } - return @unique_cxns; + + return @trimmed_cxns; } sub same_cluster { @@ -178,6 +169,56 @@ sub same_cluster { return ($cluster1 || '') eq ($cluster2 || ''); } +sub autodetect_nodes { + my ($self, %args) = @_; + my $ms = $args{MasterSlave}; + my $dp = $args{DSNParser}; + my $make_cxn = $args{make_cxn}; + my $nodes = $args{nodes}; + my $seen_ids = $args{seen_ids}; + + return unless @$nodes; + + my @new_nodes; + for my $node ( @$nodes ) { + my @nodes = $self->find_cluster_nodes( + dbh => $node->dbh(), + dsn => $node->dsn(), + make_cxn => $make_cxn, + DSNParser => $dp, + ); + push @new_nodes, @nodes; + } + + @new_nodes = $self->remove_duplicate_cxns( + cxns => \@new_nodes, + seen_ids => $seen_ids + ); + + my @new_slaves; + foreach my $node (@new_nodes) { + my $node_slaves = $ms->get_slaves( + dbh => $node->dbh(), + dsn => $node->dsn(), + make_cxn => $make_cxn, + ); + push @new_slaves, @$node_slaves; + } + + @new_slaves = $self->remove_duplicate_cxns( + cxns => \@new_slaves, + seen_ids => $seen_ids + ); + + my @new_slave_nodes = grep { $self->is_cluster_node($_) } @new_slaves; + + return @new_nodes, @new_slaves, + $self->autodetect_nodes( + %args, + nodes => \@new_slave_nodes, + ); +} + sub _d { my ($package, undef, $line) = caller 0; @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } diff --git a/t/pt-table-checksum/pxc.t b/t/pt-table-checksum/pxc.t index 31c3bced..0c1f39a1 100644 --- a/t/pt-table-checksum/pxc.t +++ b/t/pt-table-checksum/pxc.t @@ -25,6 +25,8 @@ require "$trunk/bin/pt-table-checksum"; # Do this after requiring ptc, since it uses Mo require VersionParser; +my $ip = qr/\Q127.1\E|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/; + my $dp = new DSNParser(opts=>$dsn_opts); my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); my $node1 = $sb->get_dbh_for('node1'); @@ -74,40 +76,47 @@ $node1->do(qq/INSERT INTO dsns.dsns VALUES (1, 1, '$node1_dsn')/); # if no other cluster nodes are detected, in which case the user # probably didn't specifying --recursion-method dsn. $output = output( - sub { pt_table_checksum::main(@args) }, + sub { pt_table_checksum::main(@args, qw(--no-autodetect-nodes)) }, stderr => 1, ); like( $output, - qr/h=127.1,P=12345 is a cluster node but no other nodes/, + qr/h=127(?:\Q.0.0\E)?.1,P=12345 is a cluster node but no other nodes/, "Dies if no other nodes are found" ); -$output = output( - sub { pt_table_checksum::main(@args, - '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns") - }, - stderr => 1, -); +for my $args ( + ["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], + ["autodetecting everything"] + ) +{ + my $test = shift @$args; + $output = output( + sub { pt_table_checksum::main(@args, + @$args) + }, + stderr => 1, + ); -is( - PerconaTest::count_checksum_results($output, 'errors'), - 0, - "No diffs: no errors" -); + is( + PerconaTest::count_checksum_results($output, 'errors'), + 0, + "No diffs: no errors ($test)" + ); -is( - PerconaTest::count_checksum_results($output, 'skipped'), - 0, - "No diffs: no skips" -); + is( + PerconaTest::count_checksum_results($output, 'skipped'), + 0, + "No diffs: no skips ($test)" + ); -is( - PerconaTest::count_checksum_results($output, 'diffs'), - 0, - "No diffs: no diffs" -); + is( + PerconaTest::count_checksum_results($output, 'diffs'), + 0, + "No diffs: no diffs ($test)" + ); +} # Now really test checksumming a cluster. To create a diff we have to disable # the binlog. Although PXC doesn't need or use the binlog to communicate @@ -140,45 +149,53 @@ is( "Node3 not changed" ); -$output = output( - sub { pt_table_checksum::main(@args, - '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns") - }, - stderr => 1, -); +for my $args ( + ["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], + ["autodetecting everything"] + ) +{ + my $test = shift @$args; -is( - PerconaTest::count_checksum_results($output, 'errors'), - 0, - "1 diff: no errors" -); + $output = output( + sub { pt_table_checksum::main(@args, + @$args) + }, + stderr => 1, + ); -is( - PerconaTest::count_checksum_results($output, 'skipped'), - 0, - "1 diff: no skips" -); + is( + PerconaTest::count_checksum_results($output, 'errors'), + 0, + "1 diff: no errors ($test)" + ); -is( - PerconaTest::count_checksum_results($output, 'diffs'), - 1, - "1 diff: 1 diff" -) or diag($output); + is( + PerconaTest::count_checksum_results($output, 'skipped'), + 0, + "1 diff: no skips ($test)" + ); -# 11-17T13:02:54 0 1 26 1 0 0.021 test.t -like( - $output, - qr/^\S+\s+ # ts - 0\s+ # errors - 1\s+ # diffs - 26\s+ # rows - \d+\s+ # chunks - 0\s+ # skipped - \S+\s+ # time - test.t$ # table - /xm, - "1 diff: it's in test.t" -); + is( + PerconaTest::count_checksum_results($output, 'diffs'), + 1, + "1 diff: 1 diff ($test)" + ) or diag($output); + + # 11-17T13:02:54 0 1 26 1 0 0.021 test.t + like( + $output, + qr/^\S+\s+ # ts + 0\s+ # errors + 1\s+ # diffs + 26\s+ # rows + \d+\s+ # chunks + 0\s+ # skipped + \S+\s+ # time + test.t$ # table + /xm, + "1 diff: it's in test.t ($test)" + ); +} # ############################################################################# # cluster, node1 -> slave, run on node1 @@ -210,34 +227,42 @@ $slave_dbh->do("update test.t set c='zebra' where c='z'"); # https://bugs.launchpad.net/percona-toolkit/+bug/1080385 # Cluster nodes default to ROW format because that's what Galeara # works best with, even though it doesn't really use binlogs. -$output = output( - sub { pt_table_checksum::main(@args, - '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns") - }, - stderr => 1, -); +for my $args ( + ["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], + ["autodetecting everything"] + ) +{ + my $test = shift @$args; + $output = output( + sub { pt_table_checksum::main(@args, + @$args) + }, + stderr => 1, + ); -like( - $output, - qr/replica h=127.1,P=12348 has binlog_format ROW/, - "--check-binlog-format warns about slave's binlog format" -); + like( + $output, + qr/replica h=127(?:\Q.0.0\E)?\.1,P=12348 has binlog_format ROW/, + "--check-binlog-format warns about slave's binlog format ($test)" + ); + + # Now really test that diffs on the slave are detected. + $output = output( + sub { pt_table_checksum::main(@args, + @$args, + qw(--no-check-binlog-format)), + }, + stderr => 1, + ); -# Now really test that diffs on the slave are detected. -$output = output( - sub { pt_table_checksum::main(@args, - '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns", - qw(--no-check-binlog-format)), - }, - stderr => 1, -); - -is( - PerconaTest::count_checksum_results($output, 'diffs'), - 1, - "Detects diffs on slave of cluster node1" -) or diag($output); + is( + PerconaTest::count_checksum_results($output, 'diffs'), + 1, + "Detects diffs on slave of cluster node1 ($test)" + ) or diag($output); +} + $slave_dbh->disconnect; $sb->stop_sandbox('cslave1'); @@ -272,21 +297,28 @@ is( "Slave is changed" ); -for my -$output = output( - sub { pt_table_checksum::main(@args, - '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns", - qw(--no-check-binlog-format -d test)), - }, - stderr => 1, -); +for my $args ( + ["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], + ["autodetecting everything"] + ) +{ + my $test = shift @$args; -is( - PerconaTest::count_checksum_results($output, 'diffs'), - 0, - "Limitation: does not detect diffs on slave of cluster node2" -) or diag($output); + $output = output( + sub { pt_table_checksum::main(@args, + @$args, + qw(--no-check-binlog-format -d test)), + }, + stderr => 1, + ); + is( + PerconaTest::count_checksum_results($output, 'diffs'), + 0, + "Limitation: does not detect diffs on slave of cluster node2 ($test)" + ) or diag($output); +} + $slave_dbh->disconnect; $sb->stop_sandbox('cslave1'); @@ -442,19 +474,27 @@ like( like( $output, - qr/the direct replica of h=127.1,P=12349 was not found or specified/, + qr/the direct replica of h=$ip,P=12349 was not found or specified/, "Warns that direct replica of the master isn't found or specified", ); -# Originally, these tested a dsn table with all nodes. # Use the other DSN table with all three nodes. Now the tool should # give a more specific warning than that ^. +# Originally, these tested a dsn table with all nodes; now we hijack +# those tests to also try the autodetection for my $args ( ["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], ["autodetecting everything"] ) { my $test = shift @$args; + + # Make a diff on node1. If ptc is really auto-detecting node1, then it + # should report this diff. + $node1->do("set sql_log_bin=0"); + $node1->do("update test.t set c='zebra' where c='z'"); + $node1->do("set sql_log_bin=1"); + $output = output( sub { pt_table_checksum::main($master_dsn, @$args, @@ -486,7 +526,7 @@ for my $args ( like( $output, - qr/Diffs will only be detected if the cluster is consistent with h=127.1,P=12345 because h=127.1,P=12349/, + qr/Diffs will only be detected if the cluster is consistent with h=$ip,P=12345 because h=$ip,P=12349/, "Warns that diffs only detected if cluster consistent with direct replica ($test)", ); @@ -574,32 +614,40 @@ $sb->load_file('node4', "$sample/a-z.sql"); # Add node4 in the cluster2 to the DSN table. $node1->do(qq/INSERT INTO dsns.dsns VALUES (5, null, '$c->{node4}->{dsn}')/); -$output = output( - sub { pt_table_checksum::main(@args, - '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns", - qw(-d test)) - }, - stderr => 1, -); +for my $args ( + ["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], + ["autodetecting everything"] + ) +{ + my $test = shift @$args; -like( - $output, - qr/h=127.1,P=12345 is in cluster pt_sandbox_cluster/, - "Detects that node1 is in pt_sandbox_cluster" -); + $output = output( + sub { pt_table_checksum::main(@args, + @$args, + qw(-d test)) + }, + stderr => 1, + ); -like( - $output, - qr/h=127.1,P=2900 is in cluster cluster2/, - "Detects that node4 is in cluster2" -); + like( + $output, + qr/h=127(?:\Q.0.0\E)?.1,P=12345 is in cluster pt_sandbox_cluster/, + "Detects that node1 is in pt_sandbox_cluster ($test)" + ); -unlike( - $output, - qr/test/, - "Different clusters, no results" -); + like( + $output, + qr/h=127(?:\Q.0.0\E)?.1,P=2900 is in cluster cluster2/, + "Detects that node4 is in cluster2 ($test)" + ); + unlike( + $output, + qr/test/, + "Different clusters, no results ($test)" + ); +} + $sb->stop_sandbox(qw(node4 node5 node6)); # Restore the DSN table in case there are more tests. From 823914826a2b4376c3dfb30e4c75fa1bfe2d28d6 Mon Sep 17 00:00:00 2001 From: "Brian Fraser fraserb@gmail.com" <> Date: Sun, 25 Nov 2012 19:12:17 -0300 Subject: [PATCH 4/5] Removed three autodetect tests that were rightfully failing -- I thought the tests were for cluster1 -> cluster2, but they were for cluster1, unrelated cluster2 --- t/pt-table-checksum/pxc.t | 52 +++++++++++++++++---------------------- 1 file changed, 22 insertions(+), 30 deletions(-) diff --git a/t/pt-table-checksum/pxc.t b/t/pt-table-checksum/pxc.t index 0c1f39a1..d14088f8 100644 --- a/t/pt-table-checksum/pxc.t +++ b/t/pt-table-checksum/pxc.t @@ -614,39 +614,31 @@ $sb->load_file('node4', "$sample/a-z.sql"); # Add node4 in the cluster2 to the DSN table. $node1->do(qq/INSERT INTO dsns.dsns VALUES (5, null, '$c->{node4}->{dsn}')/); -for my $args ( - ["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], - ["autodetecting everything"] - ) -{ - my $test = shift @$args; +$output = output( + sub { pt_table_checksum::main(@args, + '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns", + qw(-d test)) + }, + stderr => 1, +); - $output = output( - sub { pt_table_checksum::main(@args, - @$args, - qw(-d test)) - }, - stderr => 1, - ); +like( + $output, + qr/h=127(?:\Q.0.0\E)?.1,P=12345 is in cluster pt_sandbox_cluster/, + "Detects that node1 is in pt_sandbox_cluster" +); - like( - $output, - qr/h=127(?:\Q.0.0\E)?.1,P=12345 is in cluster pt_sandbox_cluster/, - "Detects that node1 is in pt_sandbox_cluster ($test)" - ); +like( + $output, + qr/h=127(?:\Q.0.0\E)?.1,P=2900 is in cluster cluster2/, + "Detects that node4 is in cluster2" +); - like( - $output, - qr/h=127(?:\Q.0.0\E)?.1,P=2900 is in cluster cluster2/, - "Detects that node4 is in cluster2 ($test)" - ); - - unlike( - $output, - qr/test/, - "Different clusters, no results ($test)" - ); -} +unlike( + $output, + qr/test/, + "Different clusters, no results" +); $sb->stop_sandbox(qw(node4 node5 node6)); From db0a29561cc3be02463d0394957d4901d6322f4f Mon Sep 17 00:00:00 2001 From: Brian Fraser Date: Tue, 9 Apr 2013 10:21:15 -0300 Subject: [PATCH 5/5] Fixes per Daniel's review --- bin/pt-table-checksum | 162 +++++++++++++++++----------------- lib/Cxn.pm | 36 ++++++++ lib/MasterSlave.pm | 3 +- lib/Percona/XtraDB/Cluster.pm | 81 ++++++----------- sandbox/start-sandbox | 3 +- t/pt-table-checksum/pxc.t | 34 +++---- 6 files changed, 170 insertions(+), 149 deletions(-) diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum index 3e95bbab..f15e7a76 100755 --- a/bin/pt-table-checksum +++ b/bin/pt-table-checksum @@ -3341,6 +3341,32 @@ sub name { return $self->{hostname} || $self->{dsn_name} || 'unknown host'; } +sub remove_duplicate_cxns { + my ($self, %args) = @_; + my @cxns = @{$args{cxns}}; + my $seen_ids = $args{seen_ids} || {}; + PTDEBUG && _d("Removing duplicates from ", join(" ", map { $_->name } @cxns)); + my @trimmed_cxns; + + for my $cxn ( @cxns ) { + my $dbh = $cxn->dbh(); + my $sql = q{SELECT @@server_id}; + PTDEBUG && _d($sql); + my ($id) = $dbh->selectrow_array($sql); + PTDEBUG && _d('Server ID for ', $cxn->name, ': ', $id); + + if ( ! $seen_ids->{$id}++ ) { + push @trimmed_cxns, $cxn + } + else { + PTDEBUG && _d("Removing ", $cxn->name, + ", ID ", $id, ", because we've already seen it"); + } + } + + return \@trimmed_cxns; +} + sub DESTROY { my ($self) = @_; if ( $self->{dbh} @@ -3385,6 +3411,8 @@ use constant PTDEBUG => $ENV{PTDEBUG} || 0; use Mo; use Data::Dumper; +{ local $EVAL_ERROR; eval { require Cxn } }; + sub get_cluster_name { my ($self, $cxn) = @_; my $sql = "SHOW VARIABLES LIKE 'wsrep\_cluster\_name'"; @@ -3427,13 +3455,13 @@ sub find_cluster_nodes { my $make_cxn = $args{make_cxn}; - my $sql = q{SHOW STATUS LIKE 'wsrep_incoming_addresses'}; + my $sql = q{SHOW STATUS LIKE 'wsrep\_incoming\_addresses'}; PTDEBUG && _d($sql); my (undef, $addresses) = $dbh->selectrow_array($sql); PTDEBUG && _d("Cluster nodes found: ", $addresses); return unless $addresses; - my @addresses = grep !/\Aunspecified\z/i, + my @addresses = grep { !/\Aunspecified\z/i } split /,\s*/, $addresses; my @nodes; @@ -3459,34 +3487,14 @@ sub find_cluster_nodes { push @nodes, $make_cxn->(dsn => $node_dsn); } - return @nodes; + return \@nodes; } - sub remove_duplicate_cxns { my ($self, %args) = @_; my @cxns = @{$args{cxns}}; my $seen_ids = $args{seen_ids}; - PTDEBUG && _d("Removing duplicates from ", join " ", map { $_->name } @cxns); - my @trimmed_cxns; - - for my $cxn ( @cxns ) { - my $dbh = $cxn->dbh(); - my $sql = q{SELECT @@SERVER_ID}; - PTDEBUG && _d($sql); - my ($id) = $dbh->selectrow_array($sql); - PTDEBUG && _d('Server ID for ', $cxn->name, ': ', $id); - - if ( ! $seen_ids->{$id}++ ) { - push @trimmed_cxns, $cxn - } - else { - PTDEBUG && _d("Removing ", $cxn->name, - ", ID $id, because we've already seen it"); - } - } - - return @trimmed_cxns; + return Cxn->remove_duplicate_cxns(%args); } sub same_cluster { @@ -3508,46 +3516,49 @@ sub autodetect_nodes { my $nodes = $args{nodes}; my $seen_ids = $args{seen_ids}; - return unless @$nodes; + my $new_nodes = []; - my @new_nodes; + return $new_nodes unless @$nodes; + for my $node ( @$nodes ) { - my @nodes = $self->find_cluster_nodes( + my $nodes_found = $self->find_cluster_nodes( dbh => $node->dbh(), dsn => $node->dsn(), make_cxn => $make_cxn, DSNParser => $dp, ); - push @new_nodes, @nodes; + push @$new_nodes, @$nodes_found; } - @new_nodes = $self->remove_duplicate_cxns( - cxns => \@new_nodes, + $new_nodes = $self->remove_duplicate_cxns( + cxns => $new_nodes, seen_ids => $seen_ids ); - my @new_slaves; - foreach my $node (@new_nodes) { + my $new_slaves = []; + foreach my $node (@$new_nodes) { my $node_slaves = $ms->get_slaves( dbh => $node->dbh(), dsn => $node->dsn(), make_cxn => $make_cxn, ); - push @new_slaves, @$node_slaves; + push @$new_slaves, @$node_slaves; } - @new_slaves = $self->remove_duplicate_cxns( - cxns => \@new_slaves, + $new_slaves = $self->remove_duplicate_cxns( + cxns => $new_slaves, seen_ids => $seen_ids ); - my @new_slave_nodes = grep { $self->is_cluster_node($_) } @new_slaves; - - return @new_nodes, @new_slaves, - $self->autodetect_nodes( - %args, - nodes => \@new_slave_nodes, - ); + my @new_slave_nodes = grep { $self->is_cluster_node($_) } @$new_slaves; + + my $slaves_of_slaves = $self->autodetect_nodes( + %args, + nodes => \@new_slave_nodes, + ); + + my @autodetected_nodes = ( @$new_nodes, @$new_slaves, @$slaves_of_slaves ); + return \@autodetected_nodes; } sub _d { @@ -4617,6 +4628,8 @@ sub get_slaves { my $dp = $self->{DSNParser}; my $methods = $self->_resolve_recursion_methods($args{dsn}); + return $slaves unless @$methods; + if ( grep { m/processlist|hosts/i } @$methods ) { my @required_args = qw(dbh dsn); foreach my $arg ( @required_args ) { @@ -8591,11 +8604,22 @@ sub main { } } + my $autodiscover_cluster; + my $recursion_method = []; + foreach my $method ( @{$o->get('recursion-method')} ) { + if ( $method eq 'cluster' ) { + $autodiscover_cluster = 1; + } + else { + push @$recursion_method, $method + } + } + $o->set('recursion-method', $recursion_method); eval { MasterSlave::check_recursion_method($o->get('recursion-method')); }; if ( $EVAL_ERROR ) { - $o->save_error("Invalid --recursion-method: $EVAL_ERROR") + $o->save_error($EVAL_ERROR) } Pingback::validate_options($o); @@ -8812,25 +8836,28 @@ sub main { my %seen_ids; for my $cxn ($master_cxn, @$slaves) { my $dbh = $cxn->dbh(); - my $sql = q{SELECT @@SERVER_ID}; + my $sql = q{SELECT @@server_id}; PTDEBUG && _d($cxn, $dbh, $sql); my ($id) = $dbh->selectrow_array($sql); $seen_ids{$id}++; } - my $dsn = grep { /^dsn/i } @{$o->get('recursion-method')}; - if ( !$dsn && $o->get('autodetect-nodes') ) { + if ( $autodiscover_cluster ) { my @known_nodes = grep { $cluster_name_for{$_} } $master_cxn, @$slaves; - push @$slaves, $cluster->autodetect_nodes( + my $new_cxns = $cluster->autodetect_nodes( nodes => \@known_nodes, MasterSlave => $ms, DSNParser => $dp, make_cxn => $make_cxn_cluster, seen_ids => \%seen_ids, ); + push @$slaves, @$new_cxns; } - ($master_cxn, @$slaves) = remove_duplicate_cxns( $master_cxn, @$slaves); + my $trimmed_nodes = Cxn->remove_duplicate_cxns( + cxns => [ $master_cxn, @$slaves ], + ); + ($master_cxn, @$slaves) = @$trimmed_nodes; PTDEBUG && _d(scalar @$slaves, 'slaves found'); @@ -9844,31 +9871,6 @@ sub main { # ############################################################################ # Subroutines # ############################################################################ -sub remove_duplicate_cxns { - my (@cxns) = @_; - PTDEBUG && _d("Removing duplicates from ", join " ", map { $_->name } @cxns); - my @trimmed_cxns; - my %seen; - - for my $cxn ( @cxns ) { - my $dbh = $cxn->dbh(); - my $sql = q{SELECT @@SERVER_ID}; - PTDEBUG && _d($sql); - my ($id) = $dbh->selectrow_array($sql); - PTDEBUG && _d('Server ID for ', $cxn->name, ': ', $id); - - if ( ! $seen{$id}++ ) { - push @trimmed_cxns, $cxn - } - else { - PTDEBUG && _d("Removing ", $cxn->name, - ", ID $id, because we've already seen it"); - } - } - - return @trimmed_cxns; -} - sub ts { my ($msg) = @_; my ($s, $m, $h, $d, $M) = localtime; @@ -11097,12 +11099,6 @@ group: Connection Prompt for a password when connecting to MySQL. -=item --[no]autodetect-nodes - -default: yes - -Try to automatically find other cluster nodes. TODO TODO. - =item --[no]check-binlog-format default: yes @@ -11584,6 +11580,7 @@ Possible methods are: =========== ================== processlist SHOW PROCESSLIST hosts SHOW SLAVE HOSTS + cluster SHOW STATUS LIKE 'wsrep\_incoming\_addresses' dsn=DSN DSNs from a table none Do not find slaves @@ -11594,6 +11591,13 @@ the C method becomes the default because it works better in this case. The C method requires replicas to be configured with C, C, etc. +The C method requires a cluster based on Galera 23.7.3 or newer, +such as Percona XtraDB Cluster versions 5.5.29 and above. This will +autodiscover nodes in a cluster using C. +Note that you can combine C with C and C to +have the tool autodiscover all cluster nodes and traditional slaves, +but this is considered highly experimental. + The C method is special: rather than automatically discovering replicas, this method specifies a table with replica DSNs. The tool will only connect to these replicas. This method works best when replicas do not use the same diff --git a/lib/Cxn.pm b/lib/Cxn.pm index df7f6ce4..8a75ecda 100644 --- a/lib/Cxn.pm +++ b/lib/Cxn.pm @@ -195,6 +195,42 @@ sub name { return $self->{hostname} || $self->{dsn_name} || 'unknown host'; } +# There's two reasons why there might be dupes: +# If the "master" is a cluster node, then a DSN table might have been +# used, and it may have all nodes' DSNs so the user can run the tool +# on any node, in which case it has the "master" node, the DSN given +# on the command line. +# On the other hand, maybe find_cluster_nodes worked, in which case +# we definitely have a dupe for the master cxn, but we may also have a +# dupe for every other node if this was unsed in conjunction with a +# DSN table. +# So try to detect and remove those. +sub remove_duplicate_cxns { + my ($self, %args) = @_; + my @cxns = @{$args{cxns}}; + my $seen_ids = $args{seen_ids} || {}; + PTDEBUG && _d("Removing duplicates from ", join(" ", map { $_->name } @cxns)); + my @trimmed_cxns; + + for my $cxn ( @cxns ) { + my $dbh = $cxn->dbh(); + my $sql = q{SELECT @@server_id}; + PTDEBUG && _d($sql); + my ($id) = $dbh->selectrow_array($sql); + PTDEBUG && _d('Server ID for ', $cxn->name, ': ', $id); + + if ( ! $seen_ids->{$id}++ ) { + push @trimmed_cxns, $cxn + } + else { + PTDEBUG && _d("Removing ", $cxn->name, + ", ID ", $id, ", because we've already seen it"); + } + } + + return \@trimmed_cxns; +} + sub DESTROY { my ($self) = @_; if ( $self->{dbh} diff --git a/lib/MasterSlave.pm b/lib/MasterSlave.pm index a60f63c4..3049552d 100644 --- a/lib/MasterSlave.pm +++ b/lib/MasterSlave.pm @@ -73,6 +73,8 @@ sub get_slaves { my $dp = $self->{DSNParser}; my $methods = $self->_resolve_recursion_methods($args{dsn}); + return $slaves unless @$methods; + if ( grep { m/processlist|hosts/i } @$methods ) { my @required_args = qw(dbh dsn); foreach my $arg ( @required_args ) { @@ -114,7 +116,6 @@ sub _resolve_recursion_methods { my ($self, $dsn) = @_; my $o = $self->{OptionParser}; if ( $o->got('recursion-method') ) { - # Use whatever the user explicitly gave on the command line. return $o->get('recursion-method'); } elsif ( $dsn && ($dsn->{P} || 3306) != 3306 ) { diff --git a/lib/Percona/XtraDB/Cluster.pm b/lib/Percona/XtraDB/Cluster.pm index 2600cc66..68c67670 100644 --- a/lib/Percona/XtraDB/Cluster.pm +++ b/lib/Percona/XtraDB/Cluster.pm @@ -30,6 +30,8 @@ use constant PTDEBUG => $ENV{PTDEBUG} || 0; use Mo; use Data::Dumper; +{ local $EVAL_ERROR; eval { require Cxn } }; + sub get_cluster_name { my ($self, $cxn) = @_; my $sql = "SHOW VARIABLES LIKE 'wsrep\_cluster\_name'"; @@ -79,13 +81,13 @@ sub find_cluster_nodes { # TODO this fails with a strange error. #$dp->fill_in_dsn($dbh, $dsn); - my $sql = q{SHOW STATUS LIKE 'wsrep_incoming_addresses'}; + my $sql = q{SHOW STATUS LIKE 'wsrep\_incoming\_addresses'}; PTDEBUG && _d($sql); my (undef, $addresses) = $dbh->selectrow_array($sql); PTDEBUG && _d("Cluster nodes found: ", $addresses); return unless $addresses; - my @addresses = grep !/\Aunspecified\z/i, + my @addresses = grep { !/\Aunspecified\z/i } split /,\s*/, $addresses; my @nodes; @@ -117,44 +119,14 @@ sub find_cluster_nodes { push @nodes, $make_cxn->(dsn => $node_dsn); } - return @nodes; + return \@nodes; } -# There's two reasons why there might be dupes: -# If the "master" is a cluster node, then a DSN table might have been -# used, and it may have all nodes' DSNs so the user can run the tool -# on any node, in which case it has the "master" node, the DSN given -# on the command line. -# On the other hand, maybe find_cluster_nodes worked, in which case -# we definitely have a dupe for the master cxn, but we may also have a -# dupe for every other node if this was unsed in conjunction with a -# DSN table. -# So try to detect and remove those. - sub remove_duplicate_cxns { my ($self, %args) = @_; my @cxns = @{$args{cxns}}; my $seen_ids = $args{seen_ids}; - PTDEBUG && _d("Removing duplicates from ", join " ", map { $_->name } @cxns); - my @trimmed_cxns; - - for my $cxn ( @cxns ) { - my $dbh = $cxn->dbh(); - my $sql = q{SELECT @@SERVER_ID}; - PTDEBUG && _d($sql); - my ($id) = $dbh->selectrow_array($sql); - PTDEBUG && _d('Server ID for ', $cxn->name, ': ', $id); - - if ( ! $seen_ids->{$id}++ ) { - push @trimmed_cxns, $cxn - } - else { - PTDEBUG && _d("Removing ", $cxn->name, - ", ID $id, because we've already seen it"); - } - } - - return @trimmed_cxns; + return Cxn->remove_duplicate_cxns(%args); } sub same_cluster { @@ -177,46 +149,51 @@ sub autodetect_nodes { my $nodes = $args{nodes}; my $seen_ids = $args{seen_ids}; - return unless @$nodes; + my $new_nodes = []; - my @new_nodes; + return $new_nodes unless @$nodes; + for my $node ( @$nodes ) { - my @nodes = $self->find_cluster_nodes( + my $nodes_found = $self->find_cluster_nodes( dbh => $node->dbh(), dsn => $node->dsn(), make_cxn => $make_cxn, DSNParser => $dp, ); - push @new_nodes, @nodes; + push @$new_nodes, @$nodes_found; } - @new_nodes = $self->remove_duplicate_cxns( - cxns => \@new_nodes, + $new_nodes = $self->remove_duplicate_cxns( + cxns => $new_nodes, seen_ids => $seen_ids ); - my @new_slaves; - foreach my $node (@new_nodes) { + my $new_slaves = []; + foreach my $node (@$new_nodes) { my $node_slaves = $ms->get_slaves( dbh => $node->dbh(), dsn => $node->dsn(), make_cxn => $make_cxn, ); - push @new_slaves, @$node_slaves; + push @$new_slaves, @$node_slaves; } - @new_slaves = $self->remove_duplicate_cxns( - cxns => \@new_slaves, + $new_slaves = $self->remove_duplicate_cxns( + cxns => $new_slaves, seen_ids => $seen_ids ); - my @new_slave_nodes = grep { $self->is_cluster_node($_) } @new_slaves; - - return @new_nodes, @new_slaves, - $self->autodetect_nodes( - %args, - nodes => \@new_slave_nodes, - ); + # If some of the new slaves is a cluster node, autodetect new nodes + # from there too. + my @new_slave_nodes = grep { $self->is_cluster_node($_) } @$new_slaves; + + my $slaves_of_slaves = $self->autodetect_nodes( + %args, + nodes => \@new_slave_nodes, + ); + + my @autodetected_nodes = ( @$new_nodes, @$new_slaves, @$slaves_of_slaves ); + return \@autodetected_nodes; } sub _d { diff --git a/sandbox/start-sandbox b/sandbox/start-sandbox index 3b775e24..ae4e87c8 100755 --- a/sandbox/start-sandbox +++ b/sandbox/start-sandbox @@ -195,7 +195,8 @@ elif [ -x "$PERCONA_TOOLKIT_SANDBOX/libexec/mysqld" ]; then else die "Cannot find executable mysqld in $PERCONA_TOOLKIT_SANDBOX/bin, $PERCONA_TOOLKIT_SANDBOX/sbin or $PERCONA_TOOLKIT_SANDBOX/libexec." fi -version=`$PERCONA_TOOLKIT_SANDBOX/$mysqld -V 2>/dev/null | awk '{print $3}' | cut -d. -f 1,2`; +version="5.5" +#`$PERCONA_TOOLKIT_SANDBOX/$mysqld -V 2>/dev/null | awk '{print $3}' | cut -d. -f 1,2`; if [ ! -d "$PERCONA_TOOLKIT_BRANCH/sandbox/servers/$version" ]; then die "$PERCONA_TOOLKIT_BRANCH/sandbox/servers/$version does not exist." fi diff --git a/t/pt-table-checksum/pxc.t b/t/pt-table-checksum/pxc.t index d14088f8..58bcac34 100644 --- a/t/pt-table-checksum/pxc.t +++ b/t/pt-table-checksum/pxc.t @@ -76,7 +76,7 @@ $node1->do(qq/INSERT INTO dsns.dsns VALUES (1, 1, '$node1_dsn')/); # if no other cluster nodes are detected, in which case the user # probably didn't specifying --recursion-method dsn. $output = output( - sub { pt_table_checksum::main(@args, qw(--no-autodetect-nodes)) }, + sub { pt_table_checksum::main(@args) }, stderr => 1, ); @@ -87,8 +87,8 @@ like( ); for my $args ( - ["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], - ["autodetecting everything"] + ["using recusion-method=dsn", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], + ["using recursion-method=cluster", '--recursion-method', 'cluster'] ) { my $test = shift @$args; @@ -150,8 +150,8 @@ is( ); for my $args ( - ["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], - ["autodetecting everything"] + ["using recusion-method=dsn", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], + ["using recursion-method=cluster", '--recursion-method', 'cluster'] ) { my $test = shift @$args; @@ -216,11 +216,6 @@ $node2->do("set sql_log_bin=0"); $node2->do("update test.t set c='z' where c='zebra'"); $node2->do("set sql_log_bin=1"); -# Wait for the slave to apply the binlogs from node1 (its master). -# Then change it so it's not consistent. -PerconaTest::wait_for_table($slave_dbh, 'test.t'); -$sb->wait_for_slaves('cslave1'); -$slave_dbh->do("update test.t set c='zebra' where c='z'"); # Another quick test first: the tool should complain about the slave's # binlog format but only the slave's, not the cluster nodes: @@ -228,11 +223,18 @@ $slave_dbh->do("update test.t set c='zebra' where c='z'"); # Cluster nodes default to ROW format because that's what Galeara # works best with, even though it doesn't really use binlogs. for my $args ( - ["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], - ["autodetecting everything"] + ["using recusion-method=dsn", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], + ["using recursion-method=cluster,hosts", '--recursion-method', 'cluster,hosts'] ) { my $test = shift @$args; + + # Wait for the slave to apply the binlogs from node1 (its master). + # Then change it so it's not consistent. + PerconaTest::wait_for_table($slave_dbh, 'test.t'); + $sb->wait_for_slaves('cslave1'); + $slave_dbh->do("update test.t set c='zebra' where c='z'"); + $output = output( sub { pt_table_checksum::main(@args, @$args) @@ -298,8 +300,8 @@ is( ); for my $args ( - ["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], - ["autodetecting everything"] + ["using recusion-method=dsn", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], + ["using recursion-method=cluster,hosts", '--recursion-method', 'cluster,hosts'] ) { my $test = shift @$args; @@ -483,8 +485,8 @@ like( # Originally, these tested a dsn table with all nodes; now we hijack # those tests to also try the autodetection for my $args ( - ["using recusion-method", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], - ["autodetecting everything"] + ["using recusion-method=dsn", '--recursion-method', "dsn=$node1_dsn,D=dsns,t=dsns"], + ["using recursion-method=cluster,hosts", '--recursion-method', 'cluster,hosts'] ) { my $test = shift @$args;