Merge pull request #992 from percona/PT-1423_LP_1682929_pt-table-checksum_First_chunk_takes_too_long

PT-1423 - LP #1682929: pt-table-checksum: First chunk takes too long
This commit is contained in:
Sveta Smirnova
2025-08-26 19:43:16 +03:00
committed by GitHub
2 changed files with 117 additions and 4 deletions

View File

@@ -8749,7 +8749,7 @@ sub main {
}
# Explicit --chunk-size disable auto chunk sizing.
$o->set('chunk-time', 0) if $o->got('chunk-size');
$o->set('chunk-time', 0) if $o->got('chunk-size') && !$o->got('chunk-time');
if (!$o->get('swap-tables') && !$o->get('drop-triggers')) {
PTDEBUG && _d('Enabling no-drop-new-table since no-swap-tables & no-drop-triggers were specified');
$o->set('drop-new-table', 0);
@@ -13093,8 +13093,9 @@ This option can override the default behavior, which is to adjust chunk size
dynamically to try to make chunks run in exactly L<"--chunk-time"> seconds.
When this option isn't set explicitly, its default value is used as a starting
point, but after that, the tool ignores this option's value. If you set this
option explicitly, however, then it disables the dynamic adjustment behavior and
tries to make all chunks exactly the specified number of rows.
option explicitly, and do not set L<"--chunk-time"> explicitly, then it disables
the dynamic adjustment behavior and tries to make all chunks exactly the specified
number of rows.
=item --chunk-size-limit
@@ -13131,7 +13132,13 @@ changes in server load, the tool adapts quickly.
If this option is set to zero, the chunk size doesn't auto-adjust, so query
times will vary, but query chunk sizes will not. Another way to do the same
thing is to specify a value for L<"--chunk-size"> explicitly, instead of leaving
it at the default.
it at the default, and omit the option L<"--chunk-time">.
If both options L<"--chunk-size"> and L<"--chunk-time"> set explicitly, initial
chunk size will be as specified by the option L<"--chunk-size">, but later it will
be adjusted, so that the next query takes this amount of time (in seconds)
to execute. Use this option if the tool spends too much time copying the first chunk
with default L<"--chunk-size">.
=item --config

View File

@@ -0,0 +1,106 @@
#!/usr/bin/env perl
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More;
use Data::Dumper;
use PerconaTest;
use Sandbox;
require "$trunk/bin/pt-online-schema-change";
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $source_dbh = $sb->get_dbh_for('source');
if ( !$source_dbh ) {
plan skip_all => 'Cannot connect to sandbox source';
}
# The sandbox servers run with lock_wait_timeout=3 and it's not dynamic
# so we need to specify --set-vars innodb_lock_wait_timeout=3 else the
# tool will die.
my $source_dsn = 'h=127.1,P=12345,u=msandbox,p=msandbox';
my @args = (qw(--set-vars innodb_lock_wait_timeout=3));
my $sample = "t/pt-online-schema-change/samples/";
my $plugin = "$trunk/$sample/plugins";
my $output;
my $exit_status;
$sb->load_file('source', "$sample/basic_no_fks.sql");
# Should be big enough, so we can watch how number of chunks changes with different options
my $num_rows = 5000;
diag("Loading $num_rows into the table. This might take some time.");
diag(`util/mysql_random_data_load --host=127.0.0.1 --port=12345 --user=msandbox --password=msandbox pt_osc t $num_rows`);
($output, $exit_status) = full_output(
sub { pt_online_schema_change::main(@args,
"$source_dsn,D=pt_osc,t=t",
'--progress', 'iterations,1',
"--alter", "FORCE",
'--chunk-size=100',
'--execute') },
stderr => 1,
);
my $chunks = () = $output =~ /Copying `pt_osc`.`t`:\s+\d+% \d\d:\d\d remain/gi;
is(
$chunks,
51,
'Chunk size was not modified with default --chunk-time'
) or diag($chunks, $output);
($output, $exit_status) = full_output(
sub { pt_online_schema_change::main(@args,
"$source_dsn,D=pt_osc,t=t",
'--progress', 'iterations,1',
"--alter", "FORCE",
'--chunk-size=100',
'--chunk-time=0',
'--execute') },
stderr => 1,
);
$chunks = () = $output =~ /Copying `pt_osc`.`t`:\s+\d+% \d\d:\d\d remain/gi;
is(
$chunks,
51,
'Chunk size was not modified with --chunk-time=0'
) or diag($chunks, $output);
($output, $exit_status) = full_output(
sub { pt_online_schema_change::main(@args,
"$source_dsn,D=pt_osc,t=t",
'--progress', 'iterations,1',
"--alter", "FORCE",
'--chunk-size=100',
'--chunk-time=0.5',
'--execute') },
stderr => 1,
);
$chunks = () = $output =~ /Copying `pt_osc`.`t`:\s+\d+% \d\d:\d\d remain/gi;
is(
$chunks,
2,
'Chunk size was adjusted to fit specified --chunk-time'
) or diag($chunks, $output);
# #############################################################################
# Done.
# #############################################################################
$sb->wipe_clean($source_dbh);
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
done_testing;