Handle losing replica connection in check_slave_tables(). Add save eval error in TableParser::check_table() in check_table_error.

This commit is contained in:
Daniel Nichter
2015-06-24 18:20:02 -07:00
parent 921a2fe7ff
commit 7764955209
2 changed files with 81 additions and 41 deletions

View File

@@ -4505,6 +4505,8 @@ sub check_table {
my $db_tbl = $q->quote($db, $tbl); my $db_tbl = $q->quote($db, $tbl);
PTDEBUG && _d('Checking', $db_tbl); PTDEBUG && _d('Checking', $db_tbl);
$self->{check_table_error} = undef;
my $sql = "SHOW TABLES FROM " . $q->quote($db) my $sql = "SHOW TABLES FROM " . $q->quote($db)
. ' LIKE ' . $q->literal_like($tbl); . ' LIKE ' . $q->literal_like($tbl);
PTDEBUG && _d($sql); PTDEBUG && _d($sql);
@@ -4512,8 +4514,9 @@ sub check_table {
eval { eval {
$row = $dbh->selectrow_arrayref($sql); $row = $dbh->selectrow_arrayref($sql);
}; };
if ( $EVAL_ERROR ) { if ( my $e = $EVAL_ERROR ) {
PTDEBUG && _d($EVAL_ERROR); PTDEBUG && _d($e);
$self->{check_table_error} = $e;
return 0; return 0;
} }
if ( !$row->[0] || $row->[0] ne $tbl ) { if ( !$row->[0] || $row->[0] ne $tbl ) {
@@ -9641,6 +9644,7 @@ sub main {
dbh => $master_dbh, dbh => $master_dbh,
repl_table => $repl_table, repl_table => $repl_table,
slaves => $slaves, slaves => $slaves,
have_time => $have_time,
OptionParser => $o, OptionParser => $o,
TableParser => $tp, TableParser => $tp,
Quoter => $q, Quoter => $q,
@@ -9896,7 +9900,9 @@ sub main {
db => $tbl->{db}, db => $tbl->{db},
tbl => $tbl->{tbl}, tbl => $tbl->{tbl},
checksum_cols => $tbl->{checksum_cols}, checksum_cols => $tbl->{checksum_cols},
have_time => $have_time,
TableParser => $tp, TableParser => $tp,
OptionParser => $o,
); );
}; };
if ( $EVAL_ERROR ) { if ( $EVAL_ERROR ) {
@@ -10829,12 +10835,12 @@ sub filter_tables_replicate_check_only {
sub check_repl_table { sub check_repl_table {
my ( %args ) = @_; my ( %args ) = @_;
my @required_args = qw(dbh repl_table slaves my @required_args = qw(dbh repl_table slaves have_time
OptionParser TableParser Quoter); OptionParser TableParser Quoter);
foreach my $arg ( @required_args ) { foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg}; die "I need a $arg argument" unless $args{$arg};
} }
my ($dbh, $repl_table, $slaves, $o, $tp, $q) = @args{@required_args}; my ($dbh, $repl_table, $slaves, $have_time, $o, $tp, $q) = @args{@required_args};
PTDEBUG && _d('Checking --replicate table', $repl_table); PTDEBUG && _d('Checking --replicate table', $repl_table);
@@ -10954,7 +10960,9 @@ sub check_repl_table {
db => $db, db => $db,
tbl => $tbl, tbl => $tbl,
checksum_cols => $tbl_struct->{cols}, checksum_cols => $tbl_struct->{cols},
have_time => $have_time,
TableParser => $tp, TableParser => $tp,
OptionParser => $o,
); );
}; };
if ( $EVAL_ERROR ) { if ( $EVAL_ERROR ) {
@@ -11032,49 +11040,78 @@ sub check_repl_table {
# a nonexistent column. # a nonexistent column.
sub check_slave_tables { sub check_slave_tables {
my (%args) = @_; my (%args) = @_;
my @required_args = qw(slaves db tbl checksum_cols TableParser); my @required_args = qw(slaves db tbl checksum_cols have_time TableParser OptionParser);
foreach my $arg ( @required_args ) { foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg}; die "I need a $arg argument" unless $args{$arg};
} }
my ($slaves, $db, $tbl, $checksum_cols, $tp) = @args{@required_args}; my ($slaves, $db, $tbl, $checksum_cols, $have_time, $tp, $o) = @args{@required_args};
my @problems; my @problems;
SLAVE: SLAVE:
foreach my $slave ( @$slaves ) { foreach my $slave ( @$slaves ) {
my $slave_has_table = $tp->check_table( my $slave_has_table = 0;
dbh => $slave->dbh, my $have_warned = 0;
db => $db, while ( $oktorun && $have_time->() ) {
tbl => $tbl, eval {
); # TableParser::check_table() does not die on error, it sets
if ( !$slave_has_table ) { # check_table_error and return 0.
push @problems, "Table $db.$tbl does not exist on replica " $slave_has_table = $tp->check_table(
. $slave->name; dbh => $slave->dbh,
next SLAVE; db => $db,
} tbl => $tbl,
);
die $tp->{check_table_error} if defined $tp->{check_table_error};
if ( !$slave_has_table ) {
push @problems, "Table $db.$tbl does not exist on replica "
. $slave->name;
}
else {
# TableParser::get_create_table() will die on error.
my $slave_tbl_struct = $tp->parse(
$tp->get_create_table($slave->dbh, $db, $tbl)
);
my @slave_missing_cols;
foreach my $col ( @$checksum_cols ) {
if ( !$slave_tbl_struct->{is_col}->{$col} ) {
push @slave_missing_cols, $col;
}
}
if ( @slave_missing_cols ) {
push @problems, "Table $db.$tbl on replica " . $slave->name
. " is missing these columns: "
. join(", ", @slave_missing_cols);
}
}
};
if ( my $e = $EVAL_ERROR ) {
PTDEBUG && _d($e);
if ( !$slave->lost_connection($e) ) {
push @problems, "Error checking table $db.$tbl on replica "
. $slave->name . ": $e";
next SLAVE;
}
my $slave_tbl_struct = eval { # Lost connection to slave. Reconnect and try again.
$tp->parse( eval { $slave->connect() };
$tp->get_create_table($slave->dbh, $db, $tbl) if ( $EVAL_ERROR ) {
); PTDEBUG && _d('Failed to connect to slave', $slave->name(),
}; ':', $EVAL_ERROR);
if ( $EVAL_ERROR ) { if ( !$have_warned && $o->get('quiet') < 2 ) {
push @problems, "Error parsing table $db.$tbl on replica " my $msg = "Trying to connect to replica "
. $slave->name . ": $EVAL_ERROR"; . $slave->name() . " to check $db.$tbl...\n";
next SLAVE; warn ts($msg);
} $have_warned = 1;
}
sleep 2; # wait between failed reconnects attempts
}
next; # try again
} # eval error
my @slave_missing_cols; # No error, so we successfully queried this slave.
foreach my $col ( @$checksum_cols ) { next SLAVE;
if ( !$slave_tbl_struct->{is_col}->{$col} ) {
push @slave_missing_cols, $col; } # while oktorun && have_time
} } # foreach slave
}
if ( @slave_missing_cols ) {
push @problems, "Table $db.$tbl on replica " . $slave->name
. " is missing these columns: "
. join(", ", @slave_missing_cols);
}
}
die join("\n", @problems) . "\n" if @problems; die join("\n", @problems) . "\n" if @problems;
@@ -12625,7 +12662,7 @@ disabled by specifying C<--no-check-replication-filters>.
pt-table-checksum checks that the L<"--replicate"> table exists on all pt-table-checksum checks that the L<"--replicate"> table exists on all
replicas, else checksumming can break replication when updates to the table replicas, else checksumming can break replication when updates to the table
on the master replicate to a replica that doesn't have the table. This on the master replicate to a replica that doesn't have the table. This
check cannot be disabled, and the tool wait forever until the table check cannot be disabled, and the tool waits forever until the table
exists on all replicas, printing L<"--progress"> messages while it waits. exists on all replicas, printing L<"--progress"> messages while it waits.
=item 3. Single chunk size =item 3. Single chunk size

View File

@@ -306,6 +306,8 @@ sub check_table {
my $db_tbl = $q->quote($db, $tbl); my $db_tbl = $q->quote($db, $tbl);
PTDEBUG && _d('Checking', $db_tbl); PTDEBUG && _d('Checking', $db_tbl);
$self->{check_table_error} = undef;
my $sql = "SHOW TABLES FROM " . $q->quote($db) my $sql = "SHOW TABLES FROM " . $q->quote($db)
. ' LIKE ' . $q->literal_like($tbl); . ' LIKE ' . $q->literal_like($tbl);
PTDEBUG && _d($sql); PTDEBUG && _d($sql);
@@ -313,8 +315,9 @@ sub check_table {
eval { eval {
$row = $dbh->selectrow_arrayref($sql); $row = $dbh->selectrow_arrayref($sql);
}; };
if ( $EVAL_ERROR ) { if ( my $e = $EVAL_ERROR ) {
PTDEBUG && _d($EVAL_ERROR); PTDEBUG && _d($e);
$self->{check_table_error} = $e;
return 0; return 0;
} }
if ( !$row->[0] || $row->[0] ne $tbl ) { if ( !$row->[0] || $row->[0] ne $tbl ) {