mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-24 21:35:00 +00:00
PT-2168 pt-osc shouldnt fail while unable to monitor a replica node (#676)
* PT-2168 - PT-OSC shouldn't fail while unable to monitor a replica node - Proof of concept - Fixed regular expression in lib/TableParser.pm mistakenly chaged in the tool's code * PT-2168 - PT-OSC shouldn't fail while unable to monitor a replica node - Added basic test case for PT-2168 - Added more details for replica lag information - Disconnecting replica if lag is not checked. This prevents "Too many connections" error * PT-2168 - PT-OSC shouldn't fail while unable to monitor a replica node - Implemented option --wait-lost-replicas for pt-osc, added test case * PT-2168 - PT-OSC shouldn't fail while unable to monitor a replica node - Added more tests for situations where connection to the replica can fail * PT-2168 - PT-OSC shouldn't fail while unable to monitor a replica node - Removed extra checks for wait_no_die variable - Added test cases for SQL queries that pt-osc sends to replicas * PT-2168 - PT-OSC shouldn't fail while unable to monitor a replica node - Allow to reload dsns table while waiting for missed replica if --recursion-method is dsn - Fixed logic in replica rediscovery, so it works with replicas on the same host but with different ports - Renamed option wait-lost-replicas to fail-on-stopped-replication, so it is in line with pt-table-checksum - Adjusted tests - Removed debug code for PT-1760 - Added test case for PT-1760 - Added exception for variable Open_tables_with_triggers in lib/bash/collect.sh due to failed test in Percona Server 8.0.34+ - Updated pt-stalk * PT-2168 - PT-OSC shouldn't fail while unable to monitor a replica node - Updated modules - Fixed typo in t/pt-table-sync/bidirectional.t - Removed trailing whitespaces in lib/MasterSlave.pm * PT-2168 - PT-OSC shouldn't fail while unable to monitor a replica node - Help for option --fail-on-stopped-replication * PT-2168 - PT-OSC shouldn't fail while unable to monitor a replica node - Added check for availability of the simple_rewrite_plugin in t/pt-online-schema-change/pt-2168.t * PT-2168 - PT-OSC shouldn't fail while unable to monitor a replica node - Added link to the simple_rewrite_plugin source code - Removed tests for code that runs only in the beginning of pt-osc action, so should not be affected by the option fail-on-stopped-replication
This commit is contained in:
@@ -2734,6 +2734,7 @@ sub get_slaves {
|
||||
push @$slaves, $make_cxn->(dsn => $slave_dsn, dbh => $dbh, parent => $parent);
|
||||
return;
|
||||
},
|
||||
wait_no_die => $args{'wait_no_die'},
|
||||
}
|
||||
);
|
||||
} elsif ( $methods->[0] =~ m/^dsn=/i ) {
|
||||
@@ -2741,6 +2742,7 @@ sub get_slaves {
|
||||
$slaves = $self->get_cxn_from_dsn_table(
|
||||
%args,
|
||||
dsn_table_dsn => $dsn_table_dsn,
|
||||
wait_no_die => $args{'wait_no_die'},
|
||||
);
|
||||
}
|
||||
elsif ( $methods->[0] =~ m/none/i ) {
|
||||
@@ -2796,6 +2798,20 @@ sub recurse_to_slaves {
|
||||
|
||||
my $dbh = $args->{dbh};
|
||||
|
||||
my $get_dbh = sub {
|
||||
eval {
|
||||
$dbh = $dp->get_dbh(
|
||||
$dp->get_cxn_params($slave_dsn), { AutoCommit => 1 }
|
||||
);
|
||||
PTDEBUG && _d('Connected to', $dp->as_string($slave_dsn));
|
||||
};
|
||||
if ( $EVAL_ERROR ) {
|
||||
print STDERR "Cannot connect to ", $dp->as_string($slave_dsn), ": ", $EVAL_ERROR, "\n"
|
||||
or die "Cannot print: $OS_ERROR";
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
DBH: {
|
||||
if ( !defined $dbh ) {
|
||||
foreach my $known_slave ( @{$args->{slaves}} ) {
|
||||
@@ -2805,23 +2821,29 @@ sub recurse_to_slaves {
|
||||
last DBH;
|
||||
}
|
||||
}
|
||||
|
||||
eval {
|
||||
$dbh = $dp->get_dbh(
|
||||
$dp->get_cxn_params($slave_dsn), { AutoCommit => 1 });
|
||||
PTDEBUG && _d('Connected to', $dp->as_string($slave_dsn));
|
||||
};
|
||||
if ( $EVAL_ERROR ) {
|
||||
print STDERR "Cannot connect to ", $dp->as_string($slave_dsn), ": ", $EVAL_ERROR, "\n"
|
||||
or die "Cannot print: $OS_ERROR";
|
||||
return;
|
||||
}
|
||||
$get_dbh->();
|
||||
}
|
||||
}
|
||||
|
||||
my $sql = 'SELECT @@SERVER_ID';
|
||||
PTDEBUG && _d($sql);
|
||||
my ($id) = $dbh->selectrow_array($sql);
|
||||
my $id = undef;
|
||||
do {
|
||||
eval {
|
||||
($id) = $dbh->selectrow_array($sql);
|
||||
};
|
||||
if ( $EVAL_ERROR ) {
|
||||
if ( $args->{wait_no_die} ) {
|
||||
print STDERR "Error getting server id: ", $EVAL_ERROR,
|
||||
"\nRetrying query for server ", $slave_dsn->{h}, ":", $slave_dsn->{P}, "\n";
|
||||
sleep 1;
|
||||
$dbh->disconnect();
|
||||
$get_dbh->();
|
||||
} else {
|
||||
die $EVAL_ERROR;
|
||||
}
|
||||
}
|
||||
} until ($id);
|
||||
PTDEBUG && _d('Working on server ID', $id);
|
||||
my $master_thinks_i_am = $dsn->{server_id};
|
||||
if ( !defined $id
|
||||
@@ -3443,18 +3465,39 @@ sub get_cxn_from_dsn_table {
|
||||
. "or a database-qualified table (t)";
|
||||
}
|
||||
|
||||
my $done = 0;
|
||||
my $dsn_tbl_cxn = $make_cxn->(dsn => $dsn);
|
||||
my $dbh = $dsn_tbl_cxn->connect();
|
||||
my $sql = "SELECT dsn FROM $dsn_table ORDER BY id";
|
||||
PTDEBUG && _d($sql);
|
||||
my $dsn_strings = $dbh->selectcol_arrayref($sql);
|
||||
my @cxn;
|
||||
if ( $dsn_strings ) {
|
||||
foreach my $dsn_string ( @$dsn_strings ) {
|
||||
PTDEBUG && _d('DSN from DSN table:', $dsn_string);
|
||||
push @cxn, $make_cxn->(dsn_string => $dsn_string);
|
||||
use Data::Dumper;
|
||||
DSN:
|
||||
do {
|
||||
@cxn = ();
|
||||
my $dsn_strings = $dbh->selectcol_arrayref($sql);
|
||||
if ( $dsn_strings ) {
|
||||
foreach my $dsn_string ( @$dsn_strings ) {
|
||||
PTDEBUG && _d('DSN from DSN table:', $dsn_string);
|
||||
if ($args{wait_no_die}) {
|
||||
my $lcxn;
|
||||
eval {
|
||||
$lcxn = $make_cxn->(dsn_string => $dsn_string);
|
||||
};
|
||||
if ( $EVAL_ERROR && ($dsn_tbl_cxn->lost_connection($EVAL_ERROR)
|
||||
|| $EVAL_ERROR =~ m/Can't connect to MySQL server/)) {
|
||||
PTDEBUG && _d("Server is not accessible, waiting when it is online again");
|
||||
sleep(1);
|
||||
goto DSN;
|
||||
}
|
||||
push @cxn, $lcxn;
|
||||
} else {
|
||||
push @cxn, $make_cxn->(dsn_string => $dsn_string);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
$done = 1;
|
||||
} until $done;
|
||||
return \@cxn;
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user