merge branch daniel

This commit is contained in:
Kenny Gryp
2014-05-29 00:18:57 +02:00
5 changed files with 174 additions and 143 deletions

View File

@@ -4982,8 +4982,8 @@ sub watch_server {
$start_sql .= " UNTIL RELAY_LOG_FILE = '$file', RELAY_LOG_POS = $pos";
}
my $start = $dbh->prepare($start_sql);
my $stop = $dbh->prepare('STOP SLAVE');
my $start = $dbh->prepare($start_sql);
my $stop = $dbh->prepare('STOP SLAVE');
# ########################################################################
# Detect if GTID is enabled. Skipping an event is done differently.
@@ -4991,27 +4991,25 @@ sub watch_server {
# When MySQL 5.6.5 or higher is used and gtid is enabled, skipping a
# transaction is not possible with SQL_SLAVE_SKIP_COUNTER
my $skip_event;
my $gtid_mode;
my $have_gtid = 0;
if ( VersionParser->new($dbh) >= '5.6.5' ) {
my $row = $dbh->selectrow_arrayref('SELECT @@GLOBAL.gtid_mode');
$gtid_mode = $row->[0];
} else {
$gtid_mode="N/A";
my $row = $dbh->selectrow_arrayref('SELECT @@GLOBAL.gtid_mode');
PTDEBUG && _d('@@GLOBAL.gtid_mode:', $row->[0]);
if ( $row && $row->[0] eq 'ON' ) {
$have_gtid = 1;
}
}
PTDEBUG && _d('GTID is ' . ($gtid_mode eq 'ON'
? 'enabled'
: 'disabled'));
PTDEBUG && _d('Have GTID:', $have_gtid);
# If GTID is enabled, slave_parallel_workers should be == 0.
# it's currently not possible to know what GTID event the failed trx is
if ( $gtid_mode eq 'ON') {
my $threads = $dbh->selectrow_hashref('SELECT
@@GLOBAL.slave_parallel_workers AS threads');
# It's currently not possible to know what GTID event the failed trx is.
if ( $have_gtid ) {
my $threads = $dbh->selectrow_hashref(
'SELECT @@GLOBAL.slave_parallel_workers AS threads');
if ( $threads->{threads} > 0 ) {
die("Error: GTID is enabled, and slave_parallel_workers="
. $threads->{threads}
. ". It is impossible to skip transactions properly.\n");
die "Cannot skip transactions properly because GTID is enabled "
. "and slave_parallel_workers > 0. See 'GLOBAL TRANSACTION IDS' "
. "in the tool's documentation.\n";
}
}
@@ -5023,9 +5021,7 @@ sub watch_server {
[ qr/Could not parse relay log event entry/ => 'refetch_relay_log' ],
[ qr/Incorrect key file for table/ => 'repair_table' ],
# This must be the last one. It's a catch-all rule: skip and restart.
[ qr/./ => ($gtid_mode eq 'ON'
? 'skip_gtid'
: 'skip') ],
[ qr/./ => ($have_gtid ? 'skip_gtid' : 'skip') ],
);
# ########################################################################
@@ -5054,6 +5050,14 @@ sub watch_server {
},
skip_gtid => sub {
my ( $stat, $dbh ) = @_;
# Get master_uuid from SHOW SLAVE STATUS if a UUID is not specified
# with --master-uuid.
my $gtid_uuid = $o->get('master-uuid');
if ( !$gtid_uuid ) {
$gtid_uuid = $stat->{master_uuid};
die "No master_uuid" unless $gtid_uuid; # shouldn't happen
}
# We need the highest transaction in the executed_gtid_set.
# and then we need to increase it by 1 (the one we want to skip)
@@ -5062,54 +5066,38 @@ sub watch_server {
# - it skips the next transaction from the master_uuid
# (when a slaveB is replicating from slaveA,
# the master_uuid is it's own master, slaveA)
my $gtid_exec = $stat->{executed_gtid_set};
# default behavior is to take the master_uuid from SHOW SLAVE STATUS
# or use --skip-gtid-uuid specified uuid.
my $gtid_uuid;
if ( $o->get('skip-gtid-uuid') eq 'master' ) {
$gtid_uuid = $stat->{master_uuid};
} else {
$gtid_uuid = $o->get('skip-gtid-uuid');
}
$gtid_exec =~ /$gtid_uuid([0-9-:]*)/;
my $gtid_exec_ids = $1;
$gtid_exec_ids =~ s/:[0-9]-/:/g;
my ($gtid_exec_ids) = ($stat->{executed_gtid_set} || '') =~ m/$gtid_uuid([0-9-:]*)/;
$gtid_exec_ids =~ s/:[0-9]-/:/g;
die "No executed GTIDs" unless $gtid_exec_ids;
my @gtid_exec_ranges = split(/:/, $gtid_exec_ids);
delete $gtid_exec_ranges[0]; # undef the first value,it's always empty
delete $gtid_exec_ranges[0]; # undef the first value, it's always empty
# get the highest id by sorting the array, removing the undef value
# Get the highest id by sorting the array, removing the undef value.
my @gtid_exec_sorted = sort { $a <=> $b }
grep { defined($_) } @gtid_exec_ranges;
my $gtid_exec_last = $gtid_exec_sorted[-1];
PTDEBUG && _d("GTID: master_uuid:$gtid_uuid,\n"
. "GTID: executed_gtid_set:$gtid_exec,\n"
. "GTID: gtid max for master_uuid:" . $gtid_exec_sorted[-1] . "\n"
. "GTID: last executed gtid:'$gtid_uuid:$gtid_exec_last'");
PTDEBUG && _d("\n",
"GTID: master_uuid:", $gtid_uuid, "\n",
"GTID: executed_gtid_set:", $gtid_exec_ids, "\n",
"GTID: max for master_uuid:", $gtid_exec_sorted[-1], "\n",
"GTID: last executed gtid:", $gtid_uuid, ":", $gtid_exec_last);
# Set the sessions next gtid, write an empty transaction
my $skipped=0;
until ( $skipped == $o->get('skip-count') ) {
$skipped++;
my $gtid_next=$gtid_exec_last + $skipped;
PTDEBUG && _d("GTID: Skipping " . $gtid_uuid . ":" . $gtid_next);
my $gtid_set_next = $dbh->prepare("SET GTID_NEXT='"
. $gtid_uuid . ":" . $gtid_next . "'");
$gtid_set_next->execute();
my $skipped = 0;
while ( $skipped++ < $o->get('skip-count') ) {
my $gtid_next = $gtid_exec_last + $skipped;
my $sql = "SET GTID_NEXT='$gtid_uuid:$gtid_next'";
PTDEBUG && _d($sql);
my $sth = $dbh->prepare($sql);
$sth->execute();
$dbh->begin_work();
$dbh->commit();
}
# Set the session back to the automatically generated GTID_NEXT.
my $gtid_automatic = $dbh->prepare("SET GTID_NEXT='AUTOMATIC'");
$gtid_automatic->execute();
$dbh->do("SET GTID_NEXT='AUTOMATIC'");
},
repair_table => sub {
my ( $stat, $dbh ) = @_;
@@ -5399,25 +5387,23 @@ sleep time, whichever is less.
=head1 GLOBAL TRANSACTION IDS
pt-slave-restart supports Global Transaction IDs, which has been introduced in
MySQL in 5.6.5.
It's important to keep in mind that:
As of Percona Toolkit 2.2.8, pt-slave-restart supports Global Transaction IDs
introduced in MySQL 5.6.5. It's important to keep in mind that:
=over
=item *
pt-slave-restart will not skip transactions when multiple replication threads
are being used (slave_parallel_workers>0). pt-slave-restart does not know what
the GTID event is of the failed transaction of a specific slave thread.
are being used (slave_parallel_workers > 0). pt-slave-restart does not know
what the GTID event is of the failed transaction of a specific slave thread.
=item *
The default behavior is to skip the next transaction from the slave's master.
Writes can originate on different servers, each with their own unique UUID.
Writes can originate on different servers, each with their own UUID.
See L<"--skip-gtid-uuid">.
See L<"--master-uuid">.
=back
@@ -5675,20 +5661,23 @@ type: int; default: 1
Number of statements to skip when restarting the slave.
=item --skip-gtid-uuid
=item --master-uuid
type: string; default: master
type: string
When using GTID, an empty transaction should be created in order to skip it.
If writes are coming from different nodes in the replication tree above, it is
not possible to know which event from which UUID to skip.
By default, the UUID from the slave's master is being used to skip.
(C<SHOW GLOBAL STATUS Master_UUID> column).
By default, transactions from the slave's master (C<'Master_UUID'> from
C<SHOW SLAVE STATUS>) are skipped.
Example: Master -> Slave1 -> Slave2. When skipping events from 'Slave2', and
writes originated from 'Master', --skip-gtid-uuid should be specified with the
'Master' it's UUID.
For example, with
master1 -> slave1 -> slave2
When skipping events on slave2 that were written to master1, you must specify
the UUID of master1, else the tool will use the UUID of slave1 by default.
See L<"GLOBAL TRANSACTION IDS">.

View File

@@ -379,6 +379,7 @@ sub verify_test_data {
# Diff the two sets of checksums: host to master (ref).
my @diffs;
foreach my $c ( @checksums ) {
next unless $c->{checksum};
if ( $c->{checksum} ne $ref->{$c->{table}}->{checksum} ) {
push @diffs, $c->{table};
}

View File

@@ -315,13 +315,16 @@ case $opt in
fi
if [ $? -eq 0 -a "$MYSQL_VERSION" '>' "4.1" ]; then
echo -n "Loading sakila database... "
./load-sakila-db 12345 "${2:-""}"
exit_status=$((exit_status | $?))
if [ $exit_status -ne 0 ]; then
echo "FAILED"
else
echo "OK"
SAKILA=${SAKILA:-1}
if [ $SAKILA -eq 1 ]; then
echo -n "Loading sakila database... "
./load-sakila-db 12345 "${2:-""}"
exit_status=$((exit_status | $?))
if [ $exit_status -ne 0 ]; then
echo "FAILED"
else
echo "OK"
fi
fi
# Create percona_test db and checksum all the tables.

View File

@@ -16,69 +16,119 @@ use Sandbox;
require "$trunk/bin/pt-slave-restart";
if ( $sandbox_version lt '5.6' ) {
plan skip_all => 'MySQL Version ' . $sandbox_version
. ' < 5.6, GTID is not available, skipping tests';
plan skip_all => "Requires MySQL 5.6";
}
diag("Stopping/reconfiguring/restarting sandboxes 12345, 12346 and 12347");
diag(`$trunk/sandbox/test-env stop >/dev/null`);
diag(`GTID=1 $trunk/sandbox/test-env start >/dev/null`);
diag(`SAKILA=0 GTID=1 $trunk/sandbox/test-env restart`);
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $master_dbh = $sb->get_dbh_for('master');
my $slave_dbh = $sb->get_dbh_for('slave1');
my $slave2_dbh = $sb->get_dbh_for('slave2');
my $master_dbh = $sb->get_dbh_for('master');
my $slave1_dbh = $sb->get_dbh_for('slave1');
my $slave2_dbh = $sb->get_dbh_for('slave2');
if ( !$master_dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
elsif ( !$slave_dbh ) {
elsif ( !$slave1_dbh ) {
plan skip_all => 'Cannot connect to sandbox slave1';
}
elsif ( !$slave2_dbh ) {
plan skip_all => 'Cannot connect to sandbox slave2';
}
my $slave1_dsn = $sb->dsn_for("slave1");
my $slave2_dsn = $sb->dsn_for("slave2");
my $pid_file = "/tmp/pt-slave-restart-test-$PID.pid";
my $log_file = "/tmp/pt-slave-restart-test-$PID.log";
my $cmd = "$trunk/bin/pt-slave-restart --daemonize --run-time 5 --max-sleep .25 --pid $pid_file --log $log_file";
sub start {
my ( $extra ) = @_;
stop() or return;
system "$cmd $extra";
PerconaTest::wait_for_files($pid_file);
}
sub stop() {
return 1 if !is_running();
diag(`$trunk/bin/pt-slave-restart --stop -q >/dev/null 2>&1 &`);
wait_until(sub { !-f $pid_file }, 0.3, 2);
diag(`rm -f /tmp/pt-slave-restart-sentinel`);
return is_running() ? 0 : 1;
}
sub is_running {
chomp(my $running = `ps -eaf | grep -v grep | grep '$cmd'`);
if (!-f $pid_file && !$running) {
return 0;
} elsif (-f $pid_file && !$running) {
diag(`rm -f $pid_file`);
return 0;
}
return 1;
}
sub wait_repl_broke {
my $dbh = shift;
return wait_until(
sub {
my $row = $dbh->selectrow_hashref('show slave status');
return $row->{last_sql_errno};
}
);
}
sub wait_repl_ok {
my $dbh = shift;
wait_until(
sub {
my $row = $dbh->selectrow_hashref('show slave status');
return $row->{last_sql_errno} == 0;
},
0.30,
5,
);
}
# #############################################################################
# basic test to see if restart works
# Basic test to see if restart works with GTID.
# #############################################################################
$master_dbh->do('DROP DATABASE IF EXISTS test');
$master_dbh->do('CREATE DATABASE test');
$master_dbh->do('CREATE TABLE test.t (a INT)');
$sb->wait_for_slaves;
# Bust replication
$slave_dbh->do('DROP TABLE test.t');
$slave1_dbh->do('DROP TABLE test.t');
$master_dbh->do('INSERT INTO test.t SELECT 1');
wait_until(
sub {
my $row = $slave_dbh->selectrow_hashref('show slave status');
return $row->{last_sql_errno};
}
);
wait_repl_broke($slave1_dbh) or die "Failed to break replication";
my $r = $slave_dbh->selectrow_hashref('show slave status');
my $r = $slave1_dbh->selectrow_hashref('show slave status');
like($r->{last_error}, qr/Table 'test.t' doesn't exist'/, 'slave: Replication broke');
# Start an instance
diag(`$trunk/bin/pt-slave-restart --max-sleep .25 -h 127.0.0.1 -P 12346 -u msandbox -p msandbox --daemonize --pid /tmp/pt-slave-restart.pid --log /tmp/pt-slave-restart.log`);
sleep 1;
# Start pt-slave-restart and wait up to 5s for it to fix replication
# (it should take < 1s but tests can be really slow sometimes).
start("$slave1_dsn") or die "Failed to start pt-slave-restart";
wait_repl_ok($slave1_dbh);
$r = $slave_dbh->selectrow_hashref('show slave status');
like($r->{last_errno}, qr/^0$/, 'slave: event is not skipped successfully');
# Check if replication is fixed.
$r = $slave1_dbh->selectrow_hashref('show slave status');
like(
$r->{last_errno},
qr/^0$/,
'Event is skipped',
) or BAIL_OUT("Replication is broken");
diag(`$trunk/bin/pt-slave-restart --stop -q`);
sleep 1;
my $output = `ps -eaf | grep pt-slave-restart | grep -v grep`;
unlike($output, qr/pt-slave-restart --max/, 'slave: stopped pt-slave-restart successfully');
diag(`rm -f /tmp/pt-slave-re*`);
# Stop pt-slave-restart.
stop() or die "Failed to stop pt-slave-restart";
# #############################################################################
# test the slave of the master
# Test the slave of the master.
# #############################################################################
$master_dbh->do('DROP DATABASE IF EXISTS test');
$master_dbh->do('CREATE DATABASE test');
$master_dbh->do('CREATE TABLE test.t (a INT)');
@@ -87,12 +137,7 @@ $sb->wait_for_slaves;
# Bust replication
$slave2_dbh->do('DROP TABLE test.t');
$master_dbh->do('INSERT INTO test.t SELECT 1');
wait_until(
sub {
my $row = $slave2_dbh->selectrow_hashref('show slave status');
return $row->{last_sql_errno};
}
);
wait_repl_broke($slave2_dbh) or die "Failed to break replication";
# fetch the master uuid, which is the machine we need to skip an event from
$r = $master_dbh->selectrow_hashref('select @@GLOBAL.server_uuid as uuid');
@@ -102,22 +147,22 @@ $r = $slave2_dbh->selectrow_hashref('show slave status');
like($r->{last_error}, qr/Table 'test.t' doesn't exist'/, 'slaveofslave: Replication broke');
# Start an instance
diag(`$trunk/bin/pt-slave-restart --skip-gtid-uuid=$uuid --max-sleep .25 -h 127.0.0.1 -P 12347 -u msandbox -p msandbox --daemonize --pid /tmp/pt-slave-restart.pid --log /tmp/pt-slave-restart.log`);
sleep 1;
start("--master-uuid=$uuid $slave2_dsn") or die;
wait_repl_ok($slave2_dbh);
$r = $slave2_dbh->selectrow_hashref('show slave status');
like($r->{last_errno}, qr/^0$/, 'slaveofslave: event is not skipped successfully');
like(
$r->{last_errno},
qr/^0$/,
'Skips event from master on slave2'
) or BAIL_OUT("Replication is broken");
diag(`$trunk/bin/pt-slave-restart --stop -q`);
sleep 1;
$output = `ps -eaf | grep pt-slave-restart | grep -v grep`;
unlike($output, qr/pt-slave-restart --max/, 'slaveofslave: stopped pt-slave-restart successfully');
diag(`rm -f /tmp/pt-slave-re*`);
stop() or die "Failed to stop pt-slave-restart";
# #############################################################################
# test skipping 2 events in a row.
# Test skipping 2 events in a row.
# #############################################################################
$master_dbh->do('DROP DATABASE IF EXISTS test');
$master_dbh->do('CREATE DATABASE test');
$master_dbh->do('CREATE TABLE test.t (a INT)');
@@ -127,12 +172,7 @@ $sb->wait_for_slaves;
$slave2_dbh->do('DROP TABLE test.t');
$master_dbh->do('INSERT INTO test.t SELECT 1');
$master_dbh->do('INSERT INTO test.t SELECT 1');
wait_until(
sub {
my $row = $slave2_dbh->selectrow_hashref('show slave status');
return $row->{last_sql_errno};
}
);
wait_repl_broke($slave2_dbh) or die "Failed to break replication";
# fetch the master uuid, which is the machine we need to skip an event from
$r = $master_dbh->selectrow_hashref('select @@GLOBAL.server_uuid as uuid');
@@ -142,25 +182,22 @@ $r = $slave2_dbh->selectrow_hashref('show slave status');
like($r->{last_error}, qr/Table 'test.t' doesn't exist'/, 'slaveofslaveskip2: Replication broke');
# Start an instance
diag(`$trunk/bin/pt-slave-restart --skip-count=2 --skip-gtid-uuid=$uuid --max-sleep .25 -h 127.0.0.1 -P 12347 -u msandbox -p msandbox --daemonize --pid /tmp/pt-slave-restart.pid --log /tmp/pt-slave-restart.log`);
sleep 1;
start("--skip-count=2 --master-uuid=$uuid $slave2_dsn") or die;
wait_repl_ok($slave2_dbh);
$r = $slave2_dbh->selectrow_hashref('show slave status');
like($r->{last_errno}, qr/^0$/, 'slaveofslaveskip2: event is not skipped successfully');
like(
$r->{last_errno},
qr/^0$/,
'Skips multiple events'
) or BAIL_OUT("Replication is broken");
diag(`$trunk/bin/pt-slave-restart --stop -q`);
sleep 1;
$output = `ps -eaf | grep pt-slave-restart | grep -v grep`;
unlike($output, qr/pt-slave-restart --max/, 'slaveofslaveskip2: stopped pt-slave-restart successfully');
diag(`rm -f /tmp/pt-slave-re*`);
stop() or die "Failed to stop pt-slave-restart";
# #############################################################################
# Done.
# #############################################################################
diag(`rm -f /tmp/pt-slave-re*`);
diag(`$trunk/sandbox/test-env stop >/dev/null`);
diag(`$trunk/sandbox/test-env start >/dev/null`);
diag(`rm -f $pid_file $log_file >/dev/null`);
diag(`$trunk/sandbox/test-env restart`);
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
done_testing;

View File

@@ -61,6 +61,7 @@ my $sql = "CHECKSUM TABLES "
. join(", ", map { "sakila.$_" } @tables_in_sakila);
my @checksums = @{$dbh->selectall_arrayref($sql, {Slice => {} })};
foreach my $c ( @checksums ) {
next unless $c->{Checksum};
$dbh->do("INSERT INTO percona_test.checksums(db_tbl, checksum)
VALUES('$c->{Table}', $c->{Checksum})");
}