Recheck the list of slaves in the table specified with DSN

This is to add/remove monitored machines while the OSC runs.

This contains work by:
- Daniël van Eeden <daniel.vaneeden@booking.com>
- Mikhail Izioumtchenko <mikhail.izioumtchenko@booking.com>
This commit is contained in:
Daniël van Eeden
2016-07-26 14:33:46 +02:00
parent aa227353b2
commit 9596848343
2 changed files with 97 additions and 18 deletions

View File

@@ -3381,7 +3381,6 @@ sub check_table {
$self->{check_table_error} = $e;
return 0;
}
if ( !$row->[0] || $row->[0] ne $tbl ) {
PTDEBUG && _d('Table does not exist');
return 0;
@@ -4838,6 +4837,24 @@ sub wait {
my $worst; # most lagging slave
my $pr_callback;
my $pr_first_report;
my $pr_refresh_slave_list = sub {
my ($self) = @_;
my ($slaves, $refresher) = ($self->{slaves}, $self->{get_slaves_cb});
return if ( not defined $refresher );
my $before = join ' ', sort map {$_->name()} @$slaves;
$slaves = $refresher->();
my $after = join ' ', sort map {$_->name()} @$slaves;
if ($before ne $after) {
$self->{slaves} = $slaves;
printf "Slave set to watch has changed\n Was: %s\n Now: %s\n",
$before, $after;
}
return($self->{slaves});
};
$slaves = $pr_refresh_slave_list->($self);
if ( $pr ) {
$pr_callback = sub {
my ($fraction, $elapsed, $remaining, $eta, $completed) = @_;
@@ -4865,6 +4882,14 @@ sub wait {
my @lagged_slaves = map { {cxn=>$_, lag=>undef} } @$slaves;
while ( $oktorun->() && @lagged_slaves ) {
PTDEBUG && _d('Checking slave lag');
$slaves = $pr_refresh_slave_list->($self);
my $watched = 0;
@lagged_slaves = grep {
my $slave_name = $_->{cxn}->name();
grep {$slave_name eq $_->name()} @{$slaves // []}
} @lagged_slaves;
for my $i ( 0..$#lagged_slaves ) {
my $lag = $get_lag->($lagged_slaves[$i]->{cxn});
PTDEBUG && _d($lagged_slaves[$i]->{cxn}->name(),
@@ -5708,23 +5733,26 @@ sub _find_best_index {
my $tbl_struct = $tbl->{tbl_struct};
my $indexes = $tbl_struct->{keys};
my $best_index;
my $want_index = $args{chunk_index};
if ( $want_index ) {
PTDEBUG && _d('User wants to use index', $want_index);
if ( !exists $indexes->{$want_index} ) {
PTDEBUG && _d('Cannot use user index because it does not exist');
$want_index = undef;
} else {
$best_index = $want_index;
}
}
if ( !$want_index && $args{mysql_index} ) {
if ( !$best_index && !$want_index && $args{mysql_index} ) {
PTDEBUG && _d('MySQL wants to use index', $args{mysql_index});
$want_index = $args{mysql_index};
}
my $best_index;
my @possible_indexes;
if ( $want_index ) {
if ( !$best_index && $want_index ) {
if ( $indexes->{$want_index}->{is_unique} ) {
PTDEBUG && _d('Will use wanted index');
$best_index = $want_index;
@@ -5734,7 +5762,8 @@ sub _find_best_index {
push @possible_indexes, $want_index;
}
}
else {
if (!$best_index) {
PTDEBUG && _d('Auto-selecting best index');
foreach my $index ( $tp->sort_indexes($tbl_struct) ) {
if ( $index eq 'PRIMARY' || $indexes->{$index}->{is_unique} ) {
@@ -8220,8 +8249,11 @@ sub main {
# think that can be less confusing. Also, the $set_on_connect variable can be
# inlined into this subroutine. Many of our tools have a get_dbh() subroutine
# and it might be good to just make a convention of it.
# Note: args->{errok} to tolerate connection error
my $make_cxn = sub {
my (%args) = @_;
my $errok = $args{errok};
delete($args{errok});
my $cxn = Cxn->new(
%args,
DSNParser => $dp,
@@ -8230,7 +8262,12 @@ sub main {
);
eval { $cxn->connect() }; # connect or die trying
if ( $EVAL_ERROR ) {
die "Cannot connect to MySQL: $EVAL_ERROR\n";
if ($errok) {
printf "IGNORING CONNECTION ERROR: %s\n",
$EVAL_ERROR;
} else {
die "Cannot connect to MySQL: $EVAL_ERROR\n";
}
}
return $cxn;
};
@@ -8369,13 +8406,21 @@ sub main {
Quoter => $q,
);
$slaves = $ms->get_slaves(
dbh => $cxn->dbh(),
dsn => $cxn->dsn(),
make_cxn => sub {
return $make_cxn->(@_, prev_dsn => $cxn->dsn());
},
);
my $get_slaves_cb = sub {
my ($intolerant) = @_;
return( $ms->get_slaves(
dbh => $cxn->dbh(),
dsn => $cxn->dsn(),
make_cxn => sub {
return $make_cxn->(@_, prev_dsn => $cxn->dsn(),
errok => (not $intolerant));
},
)
);
};
### first ever call only: do not tolerate connection errors
$slaves = $get_slaves_cb->('intolerant');
PTDEBUG && _d(scalar @$slaves, 'slaves found');
if ( scalar @$slaves ) {
print "Found " . scalar(@$slaves) . " slaves:\n";
@@ -8399,6 +8444,7 @@ sub main {
prev_dsn => $cxn->dsn(),
);
$slave_lag_cxns = [ $cxn ];
$get_slaves_cb = undef;
}
else {
PTDEBUG && _d('Will check slave lag on all slaves');
@@ -8507,11 +8553,12 @@ sub main {
}
$replica_lag = new ReplicaLagWaiter(
slaves => $slave_lag_cxns,
max_lag => $o->get('max-lag'),
oktorun => sub { return $oktorun },
get_lag => $get_lag,
sleep => $sleep,
slaves => $slave_lag_cxns,
get_slaves_cb => $get_slaves_cb,
max_lag => $o->get('max-lag'),
oktorun => sub { return $oktorun },
get_lag => $get_lag,
sleep => $sleep,
);
my $get_status;
@@ -11966,6 +12013,9 @@ replication lag, insert the values C<h=10.10.1.16> and C<h=10.10.1.17> into the
table. Currently, the DSNs are ordered by id, but id and parent_id are otherwise
ignored.
You can change the list of hosts while OSC is executing:
if you change the contents of the DSN table, OSC will pick it up very soon.
=item --slave-user
type: string