PT-1637 Added --fail-on stopped-replication param to table checksum

This commit is contained in:
Carlos Salguero
2018-11-08 14:18:18 -03:00
parent eb6beefd90
commit c5ec28fcd6
6 changed files with 163 additions and 8 deletions

View File

@@ -4980,6 +4980,9 @@ sub wait {
. " seconds on $dsn_name. Waiting.\n"; . " seconds on $dsn_name. Waiting.\n";
} }
else { else {
if ($self->{fail_on_stopped_replication}) {
die 'replication is stopped';
}
print STDERR "Replica $dsn_name is stopped. Waiting.\n"; print STDERR "Replica $dsn_name is stopped. Waiting.\n";
} }
return; return;
@@ -4989,6 +4992,9 @@ sub wait {
$pr_first_report = sub { $pr_first_report = sub {
my $dsn_name = $worst->{cxn}->name(); my $dsn_name = $worst->{cxn}->name();
if ( !defined $worst->{lag} ) { if ( !defined $worst->{lag} ) {
if ($self->{fail_on_stopped_replication}) {
die 'replication is stopped';
}
print STDERR "Replica $dsn_name is stopped. Waiting.\n"; print STDERR "Replica $dsn_name is stopped. Waiting.\n";
} }
return; return;
@@ -5002,7 +5008,7 @@ sub wait {
my $lag = $get_lag->($lagged_slaves[$i]->{cxn}); my $lag = $get_lag->($lagged_slaves[$i]->{cxn});
PTDEBUG && _d($lagged_slaves[$i]->{cxn}->name(), PTDEBUG && _d($lagged_slaves[$i]->{cxn}->name(),
'slave lag:', $lag); 'slave lag:', $lag);
if ( defined $lag && $lag > $max_lag ) { if ( !defined $lag || $lag > $max_lag ) {
$lagged_slaves[$i]->{lag} = $lag; $lagged_slaves[$i]->{lag} = $lag;
} }
else { else {

View File

@@ -8596,6 +8596,9 @@ sub wait {
. " seconds on $dsn_name. Waiting.\n"; . " seconds on $dsn_name. Waiting.\n";
} }
else { else {
if ($self->{fail_on_stopped_replication}) {
die 'replication is stopped';
}
print STDERR "Replica $dsn_name is stopped. Waiting.\n"; print STDERR "Replica $dsn_name is stopped. Waiting.\n";
} }
return; return;
@@ -8605,6 +8608,9 @@ sub wait {
$pr_first_report = sub { $pr_first_report = sub {
my $dsn_name = $worst->{cxn}->name(); my $dsn_name = $worst->{cxn}->name();
if ( !defined $worst->{lag} ) { if ( !defined $worst->{lag} ) {
if ($self->{fail_on_stopped_replication}) {
die 'replication is stopped';
}
print STDERR "Replica $dsn_name is stopped. Waiting.\n"; print STDERR "Replica $dsn_name is stopped. Waiting.\n";
} }
return; return;
@@ -9886,6 +9892,7 @@ our %PTC_EXIT_STATUS = (
TABLE_DIFF => 16, TABLE_DIFF => 16,
SKIP_CHUNK => 32, SKIP_CHUNK => 32,
SKIP_TABLE => 64, SKIP_TABLE => 64,
REPLICATION_STOPPED => 128,
); );
# The following two hashes are used in exec_nibble(). # The following two hashes are used in exec_nibble().
@@ -10719,6 +10726,7 @@ sub main {
oktorun => sub { return $oktorun && $have_time->(); }, oktorun => sub { return $oktorun && $have_time->(); },
get_lag => $get_lag, get_lag => $get_lag,
sleep => $sleep, sleep => $sleep,
fail_on_stopped_replication => $o->get('fail-on-stopped-replication'),
); );
my $get_status; my $get_status;
@@ -13274,6 +13282,11 @@ L<"--[no]empty-replicate-table">). If specified twice, the tool actually
iterates through the chunking algorithm, printing the upper and lower boundary iterates through the chunking algorithm, printing the upper and lower boundary
values for each chunk, but not executing the checksum queries. values for each chunk, but not executing the checksum queries.
=item --fail-on-stopped-replication
If replication is stopped, fail with an error (exit status 128) instead of waiting
until replication is restarted.
=item --float-precision =item --float-precision
type: int type: int

View File

@@ -91,6 +91,9 @@ sub wait {
. " seconds on $dsn_name. Waiting.\n"; . " seconds on $dsn_name. Waiting.\n";
} }
else { else {
if ($self->{fail_on_stopped_replication}) {
die 'replication is stopped';
}
print STDERR "Replica $dsn_name is stopped. Waiting.\n"; print STDERR "Replica $dsn_name is stopped. Waiting.\n";
} }
return; return;
@@ -103,6 +106,9 @@ sub wait {
$pr_first_report = sub { $pr_first_report = sub {
my $dsn_name = $worst->{cxn}->name(); my $dsn_name = $worst->{cxn}->name();
if ( !defined $worst->{lag} ) { if ( !defined $worst->{lag} ) {
if ($self->{fail_on_stopped_replication}) {
die 'replication is stopped';
}
print STDERR "Replica $dsn_name is stopped. Waiting.\n"; print STDERR "Replica $dsn_name is stopped. Waiting.\n";
} }
return; return;

View File

@@ -67,6 +67,7 @@ my %port_for = (
chan_master1 => 2900, chan_master1 => 2900,
chan_master2 => 2901, chan_master2 => 2901,
chan_slave1 => 2902, chan_slave1 => 2902,
chan_slave2 => 2903,
); );
my %server_type = ( my %server_type = (

View File

@@ -0,0 +1,92 @@
#!/usr/bin/env perl
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More;
use PerconaTest;
use Sandbox;
use SqlModes;
require "$trunk/bin/pt-table-checksum";
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
diag ('Starting second sandbox master');
my ($master1_dbh, $master1_dsn) = $sb->start_sandbox(
server => 'chan_master1',
type => 'master',
);
diag ('Starting second sandbox slave 1');
my ($slave1_dbh, $slave1_dsn) = $sb->start_sandbox(
server => 'chan_slave1',
type => 'slave',
master => 'chan_master1',
);
diag ('Starting second sandbox slave 2');
my ($slave2_dbh, $slave2_dsn) = $sb->start_sandbox(
server => 'chan_slave2',
type => 'slave',
master => 'chan_master1',
);
my $dbh = $sb->get_dbh_for('chan_master1');
if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
else {
plan tests => 2;
}
diag("loading samples");
$sb->load_file('chan_master1', 't/pt-table-checksum/samples/pt-1637.sql');
my @args = ($master1_dsn,
"--set-vars", "innodb_lock_wait_timeout=50",
"--ignore-databases", "mysql", "--no-check-binlog-format",
"--recursion-method", "dsn=h=127.0.0.1,D=test,t=dsns",
"--run-time", "5", "--fail-on-stopped-replication",
);
diag(join(" ", @args));
# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic
# so we need to specify --set-vars innodb_lock_wait_timeout=3 else the tool will die.
my $master_dsn = $sb->dsn_for('master');
$sb->do_as_root("chan_slave1", 'stop slave IO_thread;');
my $output;
my $exit_status;
$output = output(
sub { $exit_status = pt_table_checksum::main(@args) },
stderr => 1,
);
diag($output);
is(
$exit_status,
0,
"PT-1616 pt-table-cheksum before --resume with binary fields exit status",
);
$sb->stop_sandbox('chan_master1');
$sb->stop_sandbox('chan_slave1');
$sb->stop_sandbox('chan_slave2');
# #############################################################################
# Done.
# #############################################################################
$sb->wipe_clean($dbh);
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
exit;

View File

@@ -0,0 +1,37 @@
CREATE DATABASE IF NOT EXISTS `percona`;
CREATE TABLE `percona`.`checksums` (
db CHAR(64) NOT NULL,
tbl CHAR(64) NOT NULL,
chunk INT NOT NULL,
chunk_time FLOAT NULL,
chunk_index VARCHAR(200) NULL,
lower_boundary TEXT NULL,
upper_boundary TEXT NULL,
this_crc CHAR(40) NOT NULL,
this_cnt INT NOT NULL,
master_crc CHAR(40) NULL,
master_cnt INT NULL,
ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (db, tbl, chunk),
INDEX ts_db_tbl (ts, db, tbl)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE DATABASE IF NOT EXISTS test;
CREATE TABLE `test`.`dsns` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`parent_id` int(11) DEFAULT NULL,
`dsn` varchar(255) NOT NULL,
PRIMARY KEY (`id`)
);
-- From Sandbox.pm
-- chan_master1 => 2900,
-- chan_master2 => 2901,
-- chan_slave1 => 2902,
-- chan_slave2 => 2903,
INSERT INTO `test`.`dsns` VALUES
(1, NULL, "h=127.0.0.1,P=2902,u=msandbox,p=msandbox"),
(2, NULL, "h=127.0.0.1,P=2903,u=msandbox,p=msandbox");