PT-1637 Added --fail-on stopped-replication param to table checksum

This commit is contained in:
Carlos Salguero
2018-11-08 14:18:18 -03:00
parent eb6beefd90
commit c5ec28fcd6
6 changed files with 163 additions and 8 deletions

View File

@@ -4980,6 +4980,9 @@ sub wait {
. " seconds on $dsn_name. Waiting.\n";
}
else {
if ($self->{fail_on_stopped_replication}) {
die 'replication is stopped';
}
print STDERR "Replica $dsn_name is stopped. Waiting.\n";
}
return;
@@ -4989,6 +4992,9 @@ sub wait {
$pr_first_report = sub {
my $dsn_name = $worst->{cxn}->name();
if ( !defined $worst->{lag} ) {
if ($self->{fail_on_stopped_replication}) {
die 'replication is stopped';
}
print STDERR "Replica $dsn_name is stopped. Waiting.\n";
}
return;
@@ -5002,7 +5008,7 @@ sub wait {
my $lag = $get_lag->($lagged_slaves[$i]->{cxn});
PTDEBUG && _d($lagged_slaves[$i]->{cxn}->name(),
'slave lag:', $lag);
if ( defined $lag && $lag > $max_lag ) {
if ( !defined $lag || $lag > $max_lag ) {
$lagged_slaves[$i]->{lag} = $lag;
}
else {

View File

@@ -8596,6 +8596,9 @@ sub wait {
. " seconds on $dsn_name. Waiting.\n";
}
else {
if ($self->{fail_on_stopped_replication}) {
die 'replication is stopped';
}
print STDERR "Replica $dsn_name is stopped. Waiting.\n";
}
return;
@@ -8605,6 +8608,9 @@ sub wait {
$pr_first_report = sub {
my $dsn_name = $worst->{cxn}->name();
if ( !defined $worst->{lag} ) {
if ($self->{fail_on_stopped_replication}) {
die 'replication is stopped';
}
print STDERR "Replica $dsn_name is stopped. Waiting.\n";
}
return;
@@ -9878,14 +9884,15 @@ my $original_qrt_plugin_master_status = undef;
# http://www.tldp.org/LDP/abs/html/exitcodes.html
our %PTC_EXIT_STATUS = (
# General flags:
ERROR => 1,
ALREADY_RUNNING => 2,
CAUGHT_SIGNAL => 4,
NO_SLAVES_FOUND => 8,
ERROR => 1,
ALREADY_RUNNING => 2,
CAUGHT_SIGNAL => 4,
NO_SLAVES_FOUND => 8,
# Tool-specific flags:
TABLE_DIFF => 16,
SKIP_CHUNK => 32,
SKIP_TABLE => 64,
TABLE_DIFF => 16,
SKIP_CHUNK => 32,
SKIP_TABLE => 64,
REPLICATION_STOPPED => 128,
);
# The following two hashes are used in exec_nibble().
@@ -10719,6 +10726,7 @@ sub main {
oktorun => sub { return $oktorun && $have_time->(); },
get_lag => $get_lag,
sleep => $sleep,
fail_on_stopped_replication => $o->get('fail-on-stopped-replication'),
);
my $get_status;
@@ -13274,6 +13282,11 @@ L<"--[no]empty-replicate-table">). If specified twice, the tool actually
iterates through the chunking algorithm, printing the upper and lower boundary
values for each chunk, but not executing the checksum queries.
=item --fail-on-stopped-replication
If replication is stopped, fail with an error (exit status 128) instead of waiting
until replication is restarted.
=item --float-precision
type: int

View File

@@ -91,6 +91,9 @@ sub wait {
. " seconds on $dsn_name. Waiting.\n";
}
else {
if ($self->{fail_on_stopped_replication}) {
die 'replication is stopped';
}
print STDERR "Replica $dsn_name is stopped. Waiting.\n";
}
return;
@@ -103,6 +106,9 @@ sub wait {
$pr_first_report = sub {
my $dsn_name = $worst->{cxn}->name();
if ( !defined $worst->{lag} ) {
if ($self->{fail_on_stopped_replication}) {
die 'replication is stopped';
}
print STDERR "Replica $dsn_name is stopped. Waiting.\n";
}
return;

View File

@@ -67,6 +67,7 @@ my %port_for = (
chan_master1 => 2900,
chan_master2 => 2901,
chan_slave1 => 2902,
chan_slave2 => 2903,
);
my %server_type = (

View File

@@ -0,0 +1,92 @@
#!/usr/bin/env perl
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More;
use PerconaTest;
use Sandbox;
use SqlModes;
require "$trunk/bin/pt-table-checksum";
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
diag ('Starting second sandbox master');
my ($master1_dbh, $master1_dsn) = $sb->start_sandbox(
server => 'chan_master1',
type => 'master',
);
diag ('Starting second sandbox slave 1');
my ($slave1_dbh, $slave1_dsn) = $sb->start_sandbox(
server => 'chan_slave1',
type => 'slave',
master => 'chan_master1',
);
diag ('Starting second sandbox slave 2');
my ($slave2_dbh, $slave2_dsn) = $sb->start_sandbox(
server => 'chan_slave2',
type => 'slave',
master => 'chan_master1',
);
my $dbh = $sb->get_dbh_for('chan_master1');
if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
else {
plan tests => 2;
}
diag("loading samples");
$sb->load_file('chan_master1', 't/pt-table-checksum/samples/pt-1637.sql');
my @args = ($master1_dsn,
"--set-vars", "innodb_lock_wait_timeout=50",
"--ignore-databases", "mysql", "--no-check-binlog-format",
"--recursion-method", "dsn=h=127.0.0.1,D=test,t=dsns",
"--run-time", "5", "--fail-on-stopped-replication",
);
diag(join(" ", @args));
# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic
# so we need to specify --set-vars innodb_lock_wait_timeout=3 else the tool will die.
my $master_dsn = $sb->dsn_for('master');
$sb->do_as_root("chan_slave1", 'stop slave IO_thread;');
my $output;
my $exit_status;
$output = output(
sub { $exit_status = pt_table_checksum::main(@args) },
stderr => 1,
);
diag($output);
is(
$exit_status,
0,
"PT-1616 pt-table-cheksum before --resume with binary fields exit status",
);
$sb->stop_sandbox('chan_master1');
$sb->stop_sandbox('chan_slave1');
$sb->stop_sandbox('chan_slave2');
# #############################################################################
# Done.
# #############################################################################
$sb->wipe_clean($dbh);
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
exit;

View File

@@ -0,0 +1,37 @@
CREATE DATABASE IF NOT EXISTS `percona`;
CREATE TABLE `percona`.`checksums` (
db CHAR(64) NOT NULL,
tbl CHAR(64) NOT NULL,
chunk INT NOT NULL,
chunk_time FLOAT NULL,
chunk_index VARCHAR(200) NULL,
lower_boundary TEXT NULL,
upper_boundary TEXT NULL,
this_crc CHAR(40) NOT NULL,
this_cnt INT NOT NULL,
master_crc CHAR(40) NULL,
master_cnt INT NULL,
ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY (db, tbl, chunk),
INDEX ts_db_tbl (ts, db, tbl)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE DATABASE IF NOT EXISTS test;
CREATE TABLE `test`.`dsns` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`parent_id` int(11) DEFAULT NULL,
`dsn` varchar(255) NOT NULL,
PRIMARY KEY (`id`)
);
-- From Sandbox.pm
-- chan_master1 => 2900,
-- chan_master2 => 2901,
-- chan_slave1 => 2902,
-- chan_slave2 => 2903,
INSERT INTO `test`.`dsns` VALUES
(1, NULL, "h=127.0.0.1,P=2902,u=msandbox,p=msandbox"),
(2, NULL, "h=127.0.0.1,P=2903,u=msandbox,p=msandbox");