From c444384c6c2b68b464255281066651a72d205fe0 Mon Sep 17 00:00:00 2001 From: Brian Fraser Date: Thu, 23 Aug 2012 03:14:14 -0300 Subject: [PATCH] Fix for 1009510 and 1039569: pt-table-checksum doesn't check that tables exist on all replicas / shouldn't die if it can't create db/table --- bin/pt-table-checksum | 160 ++++++++++++++++++++++++++++------- lib/TableParser.pm | 90 +++++++++++++++++++- t/lib/TableParser.t | 73 +++++++++++++++- t/pt-table-checksum/basics.t | 44 +++++++++- t/pt-table-checksum/privs.t | 11 +-- 5 files changed, 334 insertions(+), 44 deletions(-) diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum index 7e78d9a2..1d033351 100755 --- a/bin/pt-table-checksum +++ b/bin/pt-table-checksum @@ -3321,16 +3321,20 @@ $Data::Dumper::Indent = 1; $Data::Dumper::Sortkeys = 1; $Data::Dumper::Quotekeys = 0; +local $EVAL_ERROR; +eval { + require Quoter; +}; + sub new { my ( $class, %args ) = @_; - my @required_args = qw(Quoter); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } my $self = { %args }; + $self->{Quoter} ||= Quoter->new(); return bless $self, $class; } +sub Quoter { shift->{Quoter} } + sub get_create_table { my ( $self, $dbh, $db, $tbl ) = @_; die "I need a dbh parameter" unless $dbh; @@ -3576,6 +3580,73 @@ sub check_table { return 1; } + +sub check_table_against_slave { + my ($self, %args) = @_; + + my @required_args = qw(tbl_name slave_tbl slave_name master_tbl); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($tbl_name, $slave_tbl, $slave_name, $master_tbl) = @args{@required_args}; + + my $columns_in_slave = { map { $_ => 1 } @{$slave_tbl->{cols}} }; + my $columns_in_master = { map { $_ => 1 } @{$master_tbl->{cols}} }; + + my @extra_columns_in_slave; + for my $column (keys %$columns_in_slave) { + if ( $columns_in_master->{$column} ) { + $columns_in_master->{$column}++ + } + else { + push @extra_columns_in_slave, $column; + } + } + + my @not_in_slave = grep { $columns_in_master->{$_} != 2 } + keys %$columns_in_master; + + die "Table $tbl_name in $slave_name differs from master: " + . "Columns @not_in_slave are missing\n " + if @not_in_slave; + + print STDERR "Table $tbl_name in $slave_name has more columns than " + . "its master: @extra_columns_in_slave\n" + if @extra_columns_in_slave; + + return 1; +} + +sub check_table_against_slaves { + my ($self, %args) = @_; + + my @required_args = qw(tbl_struct db table slaves); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + + my ($tbl_struct, $db, $tbl, $slaves) = @args{@required_args}; + + for my $slave_cxn (@$slaves) { + my $slave_has_table = $self->check_table( + dbh => $slave_cxn->dbh(), + db => $db, + tbl => $tbl, + ); + die "Table $db.$tbl doesn't exist in slave " . $slave_cxn->name() + unless $slave_has_table; + + my $ddl = $self->get_create_table( $slave_cxn->dbh, $db, $tbl ); + my $slave_tbl = $self->parse( $ddl ); + $self->check_table_against_slave( + tbl_name => "$db.$tbl", + master_tbl => $tbl_struct, + slave_tbl => $slave_tbl, + slave_name => $slave_cxn->name(), + ); + } +} + sub get_engine { my ( $self, $ddl, $opts ) = @_; my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/; @@ -8801,6 +8872,15 @@ sub main { : $o->get('chunk-size'); $tbl->{chunk_size} = $chunk_size; + if ( $o->get('check-replicate-table-columns') ) { + $tp->check_table_against_slaves( + tbl_struct => $tbl->{tbl_struct}, + db => $tbl->{db}, + table => $tbl->{tbl}, + slaves => $slaves, + ) if $slaves; + } + # Make a nibble iterator for this table. This should only fail # if the table has no indexes and is too large to checksum in # one chunk. @@ -9204,19 +9284,26 @@ sub check_repl_table { $sql = "CREATE DATABASE IF NOT EXISTS " . $q->quote($db) . " /* pt-table-checksum */"; -# local $@; + local $EVAL_ERROR; PTDEBUG && _d($sql); eval { $dbh->do($sql); }; if ( $EVAL_ERROR ) { if ( @db_exists ) { - die "Error executing $sql: $EVAL_ERROR\n" - . "The database exists on the master, but replication will " - . "break if it does not also exist on all replicas. If the " - . "--replicate database and table already exist on the master " - . "and all replicas, or if you created them manually, then " - . "specify --no-create-replicate-table to avoid this error." + my $e = $EVAL_ERROR; + for my $slave_cxn (@$slaves) { + my $slave_dbh = $slave_cxn->dbh(); + my $check_sql = "SHOW DATABASES LIKE '$db'"; + PTDEBUG && _d($check_sql); + my @db_exists_in_slave = $slave_dbh->selectrow_array($check_sql); + die "Error executing $sql: $e\n" + . "The $db database exists on the master, but doesn't in slave " + . $slave_cxn->name() . "; This can cause replication to fail. " + . "If you understand the risks, you can specify " + . "--no-create-replicate-table to avoid this error." + unless @db_exists_in_slave; + } } else { die "--replicate database $db does not exist and it cannot be " @@ -9238,32 +9325,39 @@ sub check_repl_table { # Always create the table, unless --no-create-replicate-table # was passed in; see https://bugs.launchpad.net/percona-toolkit/+bug/950294 - if ( !$tbl_exists ) { - if ( !$o->get('create-replicate-table') ) { - die "--replicate table $repl_table does not exist and " - . "--no-create-replicate-table was specified. " - . "You need to create the table.\n"; - } - } - else { - PTDEBUG && _d('--replicate table', $repl_table, 'already exists'); - # We used to check the table privs here, but: - # https://bugs.launchpad.net/percona-toolkit/+bug/916168 + if ( !$tbl_exists && !$o->get('create-replicate-table') ) { + die "--replicate table $repl_table does not exist and " + . "--no-create-replicate-table was specified. " + . "You need to create the table.\n"; } + PTDEBUG && $tbl_exists && _d('--replicate table', $repl_table, 'already exists'); + # We used to check the table privs here, but: + # https://bugs.launchpad.net/percona-toolkit/+bug/916168 if ( $o->get('create-replicate-table') ) { - local $@; + local $EVAL_ERROR; eval { create_repl_table(%args); }; if ( $EVAL_ERROR ) { if ( $tbl_exists ) { - die "Error executing $sql: $EVAL_ERROR\n" - . "The table exists on the master, but replication will " - . "break if it does not also exist on all replicas. If the " - . "--replicate database and table already exist on the master " - . "and all replicas, or if you created them manually, then " - . "specify --no-create-replicate-table to avoid this error." + my $e = $EVAL_ERROR; + my $ddl = $tp->get_create_table( $dbh, $db, $tbl ); + my $tbl_struct = $tp->parse( $ddl ); + eval { + $tp->check_table_against_slaves( + tbl_struct => $tbl_struct, + db => $db, + table => $tbl, + slaves => $slaves, + ) if $slaves; + }; + die "Error executing $sql: $e\n" + . $EVAL_ERROR + . "\nThis can cause replication to fail. " + . "If you understand the risks, you can specify " + . "--no-create-replicate-table to avoid this error." + if $EVAL_ERROR; } else { die "--replicate table $tbl does not exist and it cannot be " @@ -9988,6 +10082,14 @@ consumes time. Making chunks too small will cause the overhead to become relatively larger. It is therefore recommended that you not make chunks too small, because the tool may take a very long time to complete if you do. +=item --[no]check-replicate-table-columns + +default: yes; group: Safety + +Checks that tables in slaves don't differ from the table in master before +checksumming. Tables with missing columns will be skipped with an error; +Tables with extra columns will generate a warning. + =item --[no]check-replication-filters default: yes; group: Safety diff --git a/lib/TableParser.pm b/lib/TableParser.pm index 9d90de3b..99ed9559 100644 --- a/lib/TableParser.pm +++ b/lib/TableParser.pm @@ -38,16 +38,20 @@ $Data::Dumper::Indent = 1; $Data::Dumper::Sortkeys = 1; $Data::Dumper::Quotekeys = 0; +local $EVAL_ERROR; +eval { + require Quoter; +}; + sub new { my ( $class, %args ) = @_; - my @required_args = qw(Quoter); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } my $self = { %args }; + $self->{Quoter} ||= Quoter->new(); return bless $self, $class; } +sub Quoter { shift->{Quoter} } + sub get_create_table { my ( $self, $dbh, $db, $tbl ) = @_; die "I need a dbh parameter" unless $dbh; @@ -362,6 +366,84 @@ sub check_table { return 1; } +# Required args: +# * slave_tbl hashref: +# * master_tbl hashref: table struct to check against +# * slave_name scalar: name of the slave, for error messages +# Returns: bool +# Can die: yes +# check_table_against_slave() checks the given table struct against +# a slave dbh; It should only be called after checking that the table +# exists in the slave (through check_table()). It will die if the table +# in the slave doesn't have any of the columns in the master, and warn +# if there are any extra columns in the slave. + +sub check_table_against_slave { + my ($self, %args) = @_; + + my @required_args = qw(tbl_name slave_tbl slave_name master_tbl); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($tbl_name, $slave_tbl, $slave_name, $master_tbl) = @args{@required_args}; + + my $columns_in_slave = { map { $_ => 1 } @{$slave_tbl->{cols}} }; + my $columns_in_master = { map { $_ => 1 } @{$master_tbl->{cols}} }; + + my @extra_columns_in_slave; + for my $column (keys %$columns_in_slave) { + if ( $columns_in_master->{$column} ) { + $columns_in_master->{$column}++ + } + else { + push @extra_columns_in_slave, $column; + } + } + + my @not_in_slave = grep { $columns_in_master->{$_} != 2 } + keys %$columns_in_master; + + die "Table $tbl_name in $slave_name differs from master: " + . "Columns @not_in_slave are missing\n " + if @not_in_slave; + + print STDERR "Table $tbl_name in $slave_name has more columns than " + . "its master: @extra_columns_in_slave\n" + if @extra_columns_in_slave; + + return 1; +} + +sub check_table_against_slaves { + my ($self, %args) = @_; + + my @required_args = qw(tbl_struct db table slaves); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + + my ($tbl_struct, $db, $tbl, $slaves) = @args{@required_args}; + + for my $slave_cxn (@$slaves) { + my $slave_has_table = $self->check_table( + dbh => $slave_cxn->dbh(), + db => $db, + tbl => $tbl, + ); + die "Table $db.$tbl doesn't exist in slave " . $slave_cxn->name() + unless $slave_has_table; + + my $ddl = $self->get_create_table( $slave_cxn->dbh, $db, $tbl ); + my $slave_tbl = $self->parse( $ddl ); + $self->check_table_against_slave( + tbl_name => "$db.$tbl", + master_tbl => $tbl_struct, + slave_tbl => $slave_tbl, + slave_name => $slave_cxn->name(), + ); + } +} + sub get_engine { my ( $self, $ddl, $opts ) = @_; my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/; diff --git a/t/lib/TableParser.t b/t/lib/TableParser.t index 0cbf25d1..a2b64c54 100644 --- a/t/lib/TableParser.t +++ b/t/lib/TableParser.t @@ -9,7 +9,7 @@ BEGIN { use strict; use warnings FATAL => 'all'; use English qw(-no_match_vars); -use Test::More tests => 40; +use Test::More; use TableParser; use Quoter; @@ -910,9 +910,78 @@ is_deeply( 'Index with comma in its name (issue 388)' ); +# ############################################################################# +# pt-table-checksum doesn't check that tables exist on all replicas +# https://bugs.launchpad.net/percona-toolkit/+bug/1009510 +# ############################################################################# + +my $master_dbh = $sb->get_dbh_for('master'); +my $slave_dbh = $sb->get_dbh_for('slave1'); +SKIP: { + skip "Cannot connect to sandbox master", 2 unless $master_dbh; + skip "Cannot connect to sandbox slave1", 2 unless $slave_dbh; + + $master_dbh->do("DROP DATABASE IF EXISTS test"); + $master_dbh->do("CREATE DATABASE test"); + + $sb->wait_for_slaves(); + + $slave_dbh->do("CREATE TABLE test.bug_1009510 ( i int, b int )"); + $master_dbh->do("CREATE TABLE IF NOT EXISTS test.bug_1009510 ( i int )"); + + my $master_tbl = get_tbl_struct( $tp, $master_dbh, "test", "bug_1009510" ); + $master_tbl->{db} = "test"; + $master_tbl->{tbl} = "bug_1009510"; + + my $slave_tbl = get_tbl_struct( $tp, $slave_dbh, "test", "bug_1009510" ); + + my ($output) = full_output(sub { + $tp->check_table_against_slave( + tbl_name => "bug_1009510.test", + master_tbl => $master_tbl, + slave_tbl => $slave_tbl, + slave_name => 'slave1', + ); + }); + + like( + $output, + qr/has more columns than its master: b/, + "check_table_against_slave warns about extra columns" + ); + + $slave_dbh->do("DROP TABLE test.bug_1009510"); + $slave_dbh->do("CREATE TABLE test.bug_1009510 ( b int )"); + + $slave_tbl = get_tbl_struct( $tp, $slave_dbh, "test", "bug_1009510" ); + + local $EVAL_ERROR; + eval { + $tp->check_table_against_slave( + tbl_name => "bug_1009510.test", + master_tbl => $master_tbl, + slave_tbl => $slave_tbl, + slave_name => 'slave1', + ); + }; + + like( + $EVAL_ERROR, + qr/differs from master: Columns i/, + "check_table_against_slave dies if the slave table has missing columns" + ); +} + +sub get_tbl_struct { + my ($tp, $dbh, $db, $tbl) = @_; + my $ddl = $tp->get_create_table( $dbh, $db, $tbl ); + return $tp->parse( $ddl ); +} + # ############################################################################# # Done. # ############################################################################# $sb->wipe_clean($dbh) if $dbh; ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); -exit; + +done_testing; diff --git a/t/pt-table-checksum/basics.t b/t/pt-table-checksum/basics.t index 585d981c..f72bfa8c 100644 --- a/t/pt-table-checksum/basics.t +++ b/t/pt-table-checksum/basics.t @@ -41,9 +41,6 @@ elsif ( !$slave1_dbh ) { elsif ( !@{$master_dbh->selectall_arrayref("show databases like 'sakila'")} ) { plan skip_all => 'sakila database is not loaded'; } -else { - plan tests => 38; -} # The sandbox servers run with lock_wait_timeout=3 and it's not dynamic # so we need to specify --lock-wait-timeout=3 else the tool will die. @@ -476,9 +473,48 @@ is( "Bug 821675 (dot): 0 errors" ); +# ############################################################################# +# pt-table-checksum doesn't check that tables exist on all replicas +# https://bugs.launchpad.net/percona-toolkit/+bug/1009510 +# ############################################################################# + +$master_dbh->do("DROP DATABASE IF EXISTS bug_1009510"); +$master_dbh->do("CREATE DATABASE bug_1009510"); + +$sb->wait_for_slaves(); +$slave1_dbh->do("CREATE TABLE bug_1009510.bug_1009510 ( i int, b int )"); +$master_dbh->do("CREATE TABLE IF NOT EXISTS bug_1009510.bug_1009510 ( i int )"); +($output) = full_output( + sub { pt_table_checksum::main(@args, + qw(-t bug_1009510.bug_1009510)) }, +); + +like( + $output, + qr/has more columns than its master: b/, + "Bug 1009510: ptc warns about extra columns" +); + +$slave1_dbh->do("DROP TABLE bug_1009510.bug_1009510"); +$slave1_dbh->do("CREATE TABLE bug_1009510.bug_1009510 ( b int )"); + +($output) = full_output( + sub { $exit_status = pt_table_checksum::main(@args, + qw(-t bug_1009510.bug_1009510)) }, +); + +like( + $output, + qr/differs from master: Columns i/, + "Bug 1009510: ptc dies if the slave table has missing columns" +); +$sb->wipe_clean($master_dbh); +$sb->wipe_clean($slave1_dbh); + # ############################################################################# # Done. # ############################################################################# $sb->wipe_clean($master_dbh); ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); -exit; + +done_testing; diff --git a/t/pt-table-checksum/privs.t b/t/pt-table-checksum/privs.t index 36dd23b7..270ac575 100644 --- a/t/pt-table-checksum/privs.t +++ b/t/pt-table-checksum/privs.t @@ -132,16 +132,17 @@ like( "Read-only user (bug 987694): checksummed rows" ); -($output) = full_output( - sub { $exit_status = pt_table_checksum::main(@args, +($output, $exit_status) = full_output( + sub { pt_table_checksum::main(@args, "$master_dsn,u=ro_checksum_user,p=msandbox", qw(--recursion-method none) ) } ); -like($output, - qr/\QThe database exists on the master, but replication will break/, - "Error if db exists on the master, can't CREATE DATABASE, and --no-create-replicate-table was not specified", +is( + $exit_status, + 0, + "No error if db exists on the master, can't CREATE DATABASE, --no-create-replicate-table was not specified, but the database does exist in all slaves" ); diag(qx{/tmp/12345/use -u root -e 'DROP TABLE `percona`.`checksums`'});