Fix for 1009510 and 1039569: pt-table-checksum doesn't check that tables exist on all replicas / shouldn't die if it can't create db/table

This commit is contained in:
Brian Fraser
2012-08-23 03:14:14 -03:00
parent 5ddef54fc8
commit c444384c6c
5 changed files with 334 additions and 44 deletions

View File

@@ -3321,16 +3321,20 @@ $Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Quotekeys = 0;
local $EVAL_ERROR;
eval {
require Quoter;
};
sub new {
my ( $class, %args ) = @_;
my @required_args = qw(Quoter);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my $self = { %args };
$self->{Quoter} ||= Quoter->new();
return bless $self, $class;
}
sub Quoter { shift->{Quoter} }
sub get_create_table {
my ( $self, $dbh, $db, $tbl ) = @_;
die "I need a dbh parameter" unless $dbh;
@@ -3576,6 +3580,73 @@ sub check_table {
return 1;
}
sub check_table_against_slave {
my ($self, %args) = @_;
my @required_args = qw(tbl_name slave_tbl slave_name master_tbl);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($tbl_name, $slave_tbl, $slave_name, $master_tbl) = @args{@required_args};
my $columns_in_slave = { map { $_ => 1 } @{$slave_tbl->{cols}} };
my $columns_in_master = { map { $_ => 1 } @{$master_tbl->{cols}} };
my @extra_columns_in_slave;
for my $column (keys %$columns_in_slave) {
if ( $columns_in_master->{$column} ) {
$columns_in_master->{$column}++
}
else {
push @extra_columns_in_slave, $column;
}
}
my @not_in_slave = grep { $columns_in_master->{$_} != 2 }
keys %$columns_in_master;
die "Table $tbl_name in $slave_name differs from master: "
. "Columns @not_in_slave are missing\n "
if @not_in_slave;
print STDERR "Table $tbl_name in $slave_name has more columns than "
. "its master: @extra_columns_in_slave\n"
if @extra_columns_in_slave;
return 1;
}
sub check_table_against_slaves {
my ($self, %args) = @_;
my @required_args = qw(tbl_struct db table slaves);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($tbl_struct, $db, $tbl, $slaves) = @args{@required_args};
for my $slave_cxn (@$slaves) {
my $slave_has_table = $self->check_table(
dbh => $slave_cxn->dbh(),
db => $db,
tbl => $tbl,
);
die "Table $db.$tbl doesn't exist in slave " . $slave_cxn->name()
unless $slave_has_table;
my $ddl = $self->get_create_table( $slave_cxn->dbh, $db, $tbl );
my $slave_tbl = $self->parse( $ddl );
$self->check_table_against_slave(
tbl_name => "$db.$tbl",
master_tbl => $tbl_struct,
slave_tbl => $slave_tbl,
slave_name => $slave_cxn->name(),
);
}
}
sub get_engine {
my ( $self, $ddl, $opts ) = @_;
my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;
@@ -8801,6 +8872,15 @@ sub main {
: $o->get('chunk-size');
$tbl->{chunk_size} = $chunk_size;
if ( $o->get('check-replicate-table-columns') ) {
$tp->check_table_against_slaves(
tbl_struct => $tbl->{tbl_struct},
db => $tbl->{db},
table => $tbl->{tbl},
slaves => $slaves,
) if $slaves;
}
# Make a nibble iterator for this table. This should only fail
# if the table has no indexes and is too large to checksum in
# one chunk.
@@ -9204,19 +9284,26 @@ sub check_repl_table {
$sql = "CREATE DATABASE IF NOT EXISTS "
. $q->quote($db)
. " /* pt-table-checksum */";
# local $@;
local $EVAL_ERROR;
PTDEBUG && _d($sql);
eval {
$dbh->do($sql);
};
if ( $EVAL_ERROR ) {
if ( @db_exists ) {
die "Error executing $sql: $EVAL_ERROR\n"
. "The database exists on the master, but replication will "
. "break if it does not also exist on all replicas. If the "
. "--replicate database and table already exist on the master "
. "and all replicas, or if you created them manually, then "
. "specify --no-create-replicate-table to avoid this error."
my $e = $EVAL_ERROR;
for my $slave_cxn (@$slaves) {
my $slave_dbh = $slave_cxn->dbh();
my $check_sql = "SHOW DATABASES LIKE '$db'";
PTDEBUG && _d($check_sql);
my @db_exists_in_slave = $slave_dbh->selectrow_array($check_sql);
die "Error executing $sql: $e\n"
. "The $db database exists on the master, but doesn't in slave "
. $slave_cxn->name() . "; This can cause replication to fail. "
. "If you understand the risks, you can specify "
. "--no-create-replicate-table to avoid this error."
unless @db_exists_in_slave;
}
}
else {
die "--replicate database $db does not exist and it cannot be "
@@ -9238,32 +9325,39 @@ sub check_repl_table {
# Always create the table, unless --no-create-replicate-table
# was passed in; see https://bugs.launchpad.net/percona-toolkit/+bug/950294
if ( !$tbl_exists ) {
if ( !$o->get('create-replicate-table') ) {
if ( !$tbl_exists && !$o->get('create-replicate-table') ) {
die "--replicate table $repl_table does not exist and "
. "--no-create-replicate-table was specified. "
. "You need to create the table.\n";
}
}
else {
PTDEBUG && _d('--replicate table', $repl_table, 'already exists');
PTDEBUG && $tbl_exists && _d('--replicate table', $repl_table, 'already exists');
# We used to check the table privs here, but:
# https://bugs.launchpad.net/percona-toolkit/+bug/916168
}
if ( $o->get('create-replicate-table') ) {
local $@;
local $EVAL_ERROR;
eval {
create_repl_table(%args);
};
if ( $EVAL_ERROR ) {
if ( $tbl_exists ) {
die "Error executing $sql: $EVAL_ERROR\n"
. "The table exists on the master, but replication will "
. "break if it does not also exist on all replicas. If the "
. "--replicate database and table already exist on the master "
. "and all replicas, or if you created them manually, then "
. "specify --no-create-replicate-table to avoid this error."
my $e = $EVAL_ERROR;
my $ddl = $tp->get_create_table( $dbh, $db, $tbl );
my $tbl_struct = $tp->parse( $ddl );
eval {
$tp->check_table_against_slaves(
tbl_struct => $tbl_struct,
db => $db,
table => $tbl,
slaves => $slaves,
) if $slaves;
};
die "Error executing $sql: $e\n"
. $EVAL_ERROR
. "\nThis can cause replication to fail. "
. "If you understand the risks, you can specify "
. "--no-create-replicate-table to avoid this error."
if $EVAL_ERROR;
}
else {
die "--replicate table $tbl does not exist and it cannot be "
@@ -9988,6 +10082,14 @@ consumes time. Making chunks too small will cause the overhead to become
relatively larger. It is therefore recommended that you not make chunks too
small, because the tool may take a very long time to complete if you do.
=item --[no]check-replicate-table-columns
default: yes; group: Safety
Checks that tables in slaves don't differ from the table in master before
checksumming. Tables with missing columns will be skipped with an error;
Tables with extra columns will generate a warning.
=item --[no]check-replication-filters
default: yes; group: Safety

View File

@@ -38,16 +38,20 @@ $Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Quotekeys = 0;
local $EVAL_ERROR;
eval {
require Quoter;
};
sub new {
my ( $class, %args ) = @_;
my @required_args = qw(Quoter);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my $self = { %args };
$self->{Quoter} ||= Quoter->new();
return bless $self, $class;
}
sub Quoter { shift->{Quoter} }
sub get_create_table {
my ( $self, $dbh, $db, $tbl ) = @_;
die "I need a dbh parameter" unless $dbh;
@@ -362,6 +366,84 @@ sub check_table {
return 1;
}
# Required args:
# * slave_tbl hashref:
# * master_tbl hashref: table struct to check against
# * slave_name scalar: name of the slave, for error messages
# Returns: bool
# Can die: yes
# check_table_against_slave() checks the given table struct against
# a slave dbh; It should only be called after checking that the table
# exists in the slave (through check_table()). It will die if the table
# in the slave doesn't have any of the columns in the master, and warn
# if there are any extra columns in the slave.
sub check_table_against_slave {
my ($self, %args) = @_;
my @required_args = qw(tbl_name slave_tbl slave_name master_tbl);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($tbl_name, $slave_tbl, $slave_name, $master_tbl) = @args{@required_args};
my $columns_in_slave = { map { $_ => 1 } @{$slave_tbl->{cols}} };
my $columns_in_master = { map { $_ => 1 } @{$master_tbl->{cols}} };
my @extra_columns_in_slave;
for my $column (keys %$columns_in_slave) {
if ( $columns_in_master->{$column} ) {
$columns_in_master->{$column}++
}
else {
push @extra_columns_in_slave, $column;
}
}
my @not_in_slave = grep { $columns_in_master->{$_} != 2 }
keys %$columns_in_master;
die "Table $tbl_name in $slave_name differs from master: "
. "Columns @not_in_slave are missing\n "
if @not_in_slave;
print STDERR "Table $tbl_name in $slave_name has more columns than "
. "its master: @extra_columns_in_slave\n"
if @extra_columns_in_slave;
return 1;
}
sub check_table_against_slaves {
my ($self, %args) = @_;
my @required_args = qw(tbl_struct db table slaves);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless $args{$arg};
}
my ($tbl_struct, $db, $tbl, $slaves) = @args{@required_args};
for my $slave_cxn (@$slaves) {
my $slave_has_table = $self->check_table(
dbh => $slave_cxn->dbh(),
db => $db,
tbl => $tbl,
);
die "Table $db.$tbl doesn't exist in slave " . $slave_cxn->name()
unless $slave_has_table;
my $ddl = $self->get_create_table( $slave_cxn->dbh, $db, $tbl );
my $slave_tbl = $self->parse( $ddl );
$self->check_table_against_slave(
tbl_name => "$db.$tbl",
master_tbl => $tbl_struct,
slave_tbl => $slave_tbl,
slave_name => $slave_cxn->name(),
);
}
}
sub get_engine {
my ( $self, $ddl, $opts ) = @_;
my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;

View File

@@ -9,7 +9,7 @@ BEGIN {
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More tests => 40;
use Test::More;
use TableParser;
use Quoter;
@@ -910,9 +910,78 @@ is_deeply(
'Index with comma in its name (issue 388)'
);
# #############################################################################
# pt-table-checksum doesn't check that tables exist on all replicas
# https://bugs.launchpad.net/percona-toolkit/+bug/1009510
# #############################################################################
my $master_dbh = $sb->get_dbh_for('master');
my $slave_dbh = $sb->get_dbh_for('slave1');
SKIP: {
skip "Cannot connect to sandbox master", 2 unless $master_dbh;
skip "Cannot connect to sandbox slave1", 2 unless $slave_dbh;
$master_dbh->do("DROP DATABASE IF EXISTS test");
$master_dbh->do("CREATE DATABASE test");
$sb->wait_for_slaves();
$slave_dbh->do("CREATE TABLE test.bug_1009510 ( i int, b int )");
$master_dbh->do("CREATE TABLE IF NOT EXISTS test.bug_1009510 ( i int )");
my $master_tbl = get_tbl_struct( $tp, $master_dbh, "test", "bug_1009510" );
$master_tbl->{db} = "test";
$master_tbl->{tbl} = "bug_1009510";
my $slave_tbl = get_tbl_struct( $tp, $slave_dbh, "test", "bug_1009510" );
my ($output) = full_output(sub {
$tp->check_table_against_slave(
tbl_name => "bug_1009510.test",
master_tbl => $master_tbl,
slave_tbl => $slave_tbl,
slave_name => 'slave1',
);
});
like(
$output,
qr/has more columns than its master: b/,
"check_table_against_slave warns about extra columns"
);
$slave_dbh->do("DROP TABLE test.bug_1009510");
$slave_dbh->do("CREATE TABLE test.bug_1009510 ( b int )");
$slave_tbl = get_tbl_struct( $tp, $slave_dbh, "test", "bug_1009510" );
local $EVAL_ERROR;
eval {
$tp->check_table_against_slave(
tbl_name => "bug_1009510.test",
master_tbl => $master_tbl,
slave_tbl => $slave_tbl,
slave_name => 'slave1',
);
};
like(
$EVAL_ERROR,
qr/differs from master: Columns i/,
"check_table_against_slave dies if the slave table has missing columns"
);
}
sub get_tbl_struct {
my ($tp, $dbh, $db, $tbl) = @_;
my $ddl = $tp->get_create_table( $dbh, $db, $tbl );
return $tp->parse( $ddl );
}
# #############################################################################
# Done.
# #############################################################################
$sb->wipe_clean($dbh) if $dbh;
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
exit;
done_testing;

View File

@@ -41,9 +41,6 @@ elsif ( !$slave1_dbh ) {
elsif ( !@{$master_dbh->selectall_arrayref("show databases like 'sakila'")} ) {
plan skip_all => 'sakila database is not loaded';
}
else {
plan tests => 38;
}
# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic
# so we need to specify --lock-wait-timeout=3 else the tool will die.
@@ -476,9 +473,48 @@ is(
"Bug 821675 (dot): 0 errors"
);
# #############################################################################
# pt-table-checksum doesn't check that tables exist on all replicas
# https://bugs.launchpad.net/percona-toolkit/+bug/1009510
# #############################################################################
$master_dbh->do("DROP DATABASE IF EXISTS bug_1009510");
$master_dbh->do("CREATE DATABASE bug_1009510");
$sb->wait_for_slaves();
$slave1_dbh->do("CREATE TABLE bug_1009510.bug_1009510 ( i int, b int )");
$master_dbh->do("CREATE TABLE IF NOT EXISTS bug_1009510.bug_1009510 ( i int )");
($output) = full_output(
sub { pt_table_checksum::main(@args,
qw(-t bug_1009510.bug_1009510)) },
);
like(
$output,
qr/has more columns than its master: b/,
"Bug 1009510: ptc warns about extra columns"
);
$slave1_dbh->do("DROP TABLE bug_1009510.bug_1009510");
$slave1_dbh->do("CREATE TABLE bug_1009510.bug_1009510 ( b int )");
($output) = full_output(
sub { $exit_status = pt_table_checksum::main(@args,
qw(-t bug_1009510.bug_1009510)) },
);
like(
$output,
qr/differs from master: Columns i/,
"Bug 1009510: ptc dies if the slave table has missing columns"
);
$sb->wipe_clean($master_dbh);
$sb->wipe_clean($slave1_dbh);
# #############################################################################
# Done.
# #############################################################################
$sb->wipe_clean($master_dbh);
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
exit;
done_testing;

View File

@@ -132,16 +132,17 @@ like(
"Read-only user (bug 987694): checksummed rows"
);
($output) = full_output(
sub { $exit_status = pt_table_checksum::main(@args,
($output, $exit_status) = full_output(
sub { pt_table_checksum::main(@args,
"$master_dsn,u=ro_checksum_user,p=msandbox",
qw(--recursion-method none)
) }
);
like($output,
qr/\QThe database exists on the master, but replication will break/,
"Error if db exists on the master, can't CREATE DATABASE, and --no-create-replicate-table was not specified",
is(
$exit_status,
0,
"No error if db exists on the master, can't CREATE DATABASE, --no-create-replicate-table was not specified, but the database does exist in all slaves"
);
diag(qx{/tmp/12345/use -u root -e 'DROP TABLE `percona`.`checksums`'});