From 255b2764357dd477738811662b9410bd6553c04a Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Wed, 21 Dec 2011 21:13:19 -0700 Subject: [PATCH] Make pt-table-sync use new NibbleIterator-only TableSyncer, and also use Cxn. --- bin/pt-table-sync | 6671 ++++++++++++++----------------- t/pt-table-sync/basics.t | 44 +- t/pt-table-sync/option_sanity.t | 6 +- 3 files changed, 2901 insertions(+), 3820 deletions(-) diff --git a/bin/pt-table-sync b/bin/pt-table-sync index 08be61f7..0f81b465 100755 --- a/bin/pt-table-sync +++ b/bin/pt-table-sync @@ -959,7 +959,7 @@ sub _parse_size { $opt->{value} = ($pre || '') . $num; } else { - $self->save_error("Invalid size for --$opt->{long}"); + $self->save_error("Invalid size for --$opt->{long}: $val"); } return; } @@ -1243,12 +1243,14 @@ sub parse_options { sub as_string { my ( $self, $dsn, $props ) = @_; return $dsn unless ref $dsn; - my %allowed = $props ? map { $_=>1 } @$props : (); + my @keys = $props ? @$props : sort keys %$dsn; return join(',', - map { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) } - grep { defined $dsn->{$_} && $self->{opts}->{$_} } - grep { !$props || $allowed{$_} } - sort keys %$dsn ); + map { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) } + grep { + exists $self->{opts}->{$_} + && exists $dsn->{$_} + && defined $dsn->{$_} + } @keys); } sub usage { @@ -1466,6 +1468,177 @@ sub _d { # End DSNParser package # ########################################################################### +# ########################################################################### +# Cxn package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/Cxn.pm +# t/lib/Cxn.t +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### +{ +package Cxn; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use constant MKDEBUG => $ENV{MKDEBUG} || 0; + +use constant PERCONA_TOOLKIT_TEST_USE_DSN_NAMES => $ENV{PERCONA_TOOLKIT_TEST_USE_DSN_NAMES} || 0; + +sub new { + my ( $class, %args ) = @_; + my @required_args = qw(DSNParser OptionParser); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + }; + my ($dp, $o) = @args{@required_args}; + + my $dsn_defaults = $dp->parse_options($o); + my $prev_dsn = $args{prev_dsn}; + my $dsn = $args{dsn}; + if ( !$dsn ) { + $args{dsn_string} ||= 'h=' . ($dsn_defaults->{h} || 'localhost'); + + $dsn = $dp->parse( + $args{dsn_string}, $prev_dsn, $dsn_defaults); + } + elsif ( $prev_dsn ) { + $dsn = $dp->copy($prev_dsn, $dsn); + } + + my $self = { + dsn => $dsn, + dbh => $args{dbh}, + dsn_name => $dp->as_string($dsn, [qw(h P S)]), + hostname => '', + set => $args{set}, + aux => $args{aux}, + dbh_opts => $args{dbh_opts} || {AutoCommit => 1}, + dbh_set => 0, + OptionParser => $o, + DSNParser => $dp, + }; + + return bless $self, $class; +} + +sub connect { + my ( $self ) = @_; + my $dsn = $self->{dsn}; + my $dp = $self->{DSNParser}; + my $o = $self->{OptionParser}; + + my $dbh = $self->{dbh}; + if ( !$dbh || !$dbh->ping() ) { + if ( $o->get('ask-pass') && !$self->{asked_for_pass} ) { + $dsn->{p} = OptionParser::prompt_noecho("Enter MySQL password: "); + $self->{asked_for_pass} = 1; + } + $dbh = $dp->get_dbh($dp->get_cxn_params($dsn), $self->{dbh_opts}); + } + MKDEBUG && _d($dbh, 'Connected dbh to', $self->{name}); + + if ( $self->{aux} && (!$self->{aux_dbh} || !$self->{aux_dbh}->ping()) ) { + my $aux_dbh = $dp->get_dbh($dp->get_cxn_params($dsn), {AutoCommit => 1}); + MKDEBUG && _d($aux_dbh, 'Connected aux dbh to', $self->{name}); + $dbh->{FetchHashKeyName} = 'NAME_lc'; + $self->{aux_dbh} = $aux_dbh; + } + + return $self->set_dbh($dbh); +} + +sub disconnect { + my ($self) = @_; + if ( $self->{dbh} ) { + MKDEBUG && _d('Disconnecting dbh', $self->{dbh}, $self->{name}); + $self->{dbh}->disconnect(); + } + if ( $self->{aux_dbh} ) { + MKDEBUG && _d('Disconnecting aux dbh', $self->{aux_dbh}); + $self->{aux_dbh}->disconnect(); + } + return; +} + +sub set_dbh { + my ($self, $dbh) = @_; + + if ( $self->{dbh} && $self->{dbh} == $dbh && $self->{dbh_set} ) { + MKDEBUG && _d($dbh, 'Already set dbh'); + return $dbh; + } + + MKDEBUG && _d($dbh, 'Setting dbh'); + + $dbh->{FetchHashKeyName} = 'NAME_lc'; + + my $sql = 'SELECT @@hostname, @@server_id'; + MKDEBUG && _d($dbh, $sql); + my ($hostname, $server_id) = $dbh->selectrow_array($sql); + MKDEBUG && _d($dbh, 'hostname:', $hostname, $server_id); + if ( $hostname ) { + $self->{hostname} = $hostname; + } + + if ( my $set = $self->{set}) { + $set->($dbh); + } + + $self->{dbh} = $dbh; + $self->{dbh_set} = 1; + return $dbh; +} + +sub dbh { + my ($self) = @_; + return $self->{dbh}; +} + +sub aux_dbh { + my ($self) = @_; + return $self->{aux_dbh}; +} + +sub dsn { + my ($self) = @_; + return $self->{dsn}; +} + +sub name { + my ($self) = @_; + return $self->{dsn_name} if PERCONA_TOOLKIT_TEST_USE_DSN_NAMES; + return $self->{hostname} || $self->{dsn_name} || 'unknown host'; +} + +sub DESTROY { + my ($self) = @_; + if ( $self->{dbh} ) { + MKDEBUG && _d('Disconnecting dbh', $self->{dbh}, $self->{name}); + $self->{dbh}->disconnect(); + } + if ( $self->{aux_dbh} ) { + MKDEBUG && _d('Disconnecting aux dbh', $self->{aux_dbh}); + $self->{aux_dbh}->disconnect(); + } + return; +} + +sub _d { + my ($package, undef, $line) = caller 0; + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } + map { defined $_ ? $_ : 'undef' } + @_; + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; +} + +1; +} +# ########################################################################### +# End Cxn package +# ########################################################################### + # ########################################################################### # VersionParser package # This package is a copy without comments from the original. The original @@ -1549,125 +1722,6 @@ sub _d { # End VersionParser package # ########################################################################### -# ########################################################################### -# TableSyncStream package -# This package is a copy without comments from the original. The original -# with comments and its test file can be found in the Bazaar repository at, -# lib/TableSyncStream.pm -# t/lib/TableSyncStream.t -# See https://launchpad.net/percona-toolkit for more information. -# ########################################################################### -{ -package TableSyncStream; - -use strict; -use warnings FATAL => 'all'; -use English qw(-no_match_vars); -use constant MKDEBUG => $ENV{MKDEBUG} || 0; - -sub new { - my ( $class, %args ) = @_; - foreach my $arg ( qw(Quoter) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $self = { %args }; - return bless $self, $class; -} - -sub name { - return 'Stream'; -} - -sub can_sync { - return 1; # We can sync anything. -} - -sub prepare_to_sync { - my ( $self, %args ) = @_; - my @required_args = qw(cols ChangeHandler); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - $self->{cols} = $args{cols}; - $self->{buffer_in_mysql} = $args{buffer_in_mysql}; - $self->{ChangeHandler} = $args{ChangeHandler}; - - $self->{done} = 0; - - return; -} - -sub uses_checksum { - return 0; # We don't need checksum queries. -} - -sub set_checksum_queries { - return; # This shouldn't be called, but just in case. -} - -sub prepare_sync_cycle { - my ( $self, $host ) = @_; - return; -} - -sub get_sql { - my ( $self, %args ) = @_; - return "SELECT " - . ($self->{buffer_in_mysql} ? 'SQL_BUFFER_RESULT ' : '') - . join(', ', map { $self->{Quoter}->quote($_) } @{$self->{cols}}) - . ' FROM ' . $self->{Quoter}->quote(@args{qw(database table)}) - . ' WHERE ' . ( $args{where} || '1=1' ); -} - -sub same_row { - my ( $self, %args ) = @_; - return; -} - -sub not_in_right { - my ( $self, %args ) = @_; - $self->{ChangeHandler}->change('INSERT', $args{lr}, $self->key_cols()); -} - -sub not_in_left { - my ( $self, %args ) = @_; - $self->{ChangeHandler}->change('DELETE', $args{rr}, $self->key_cols()); -} - -sub done_with_rows { - my ( $self ) = @_; - $self->{done} = 1; -} - -sub done { - my ( $self ) = @_; - return $self->{done}; -} - -sub key_cols { - my ( $self ) = @_; - return $self->{cols}; -} - -sub pending_changes { - my ( $self ) = @_; - return; -} - -sub _d { - my ($package, undef, $line) = caller 0; - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } - map { defined $_ ? $_ : 'undef' } - @_; - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; -} - -1; -} -# ########################################################################### -# End TableSyncStream package -# ########################################################################### - # ########################################################################### # TableParser package # This package is a copy without comments from the original. The original @@ -1699,19 +1753,56 @@ sub new { return bless $self, $class; } +sub get_create_table { + my ( $self, $dbh, $db, $tbl ) = @_; + die "I need a dbh parameter" unless $dbh; + die "I need a db parameter" unless $db; + die "I need a tbl parameter" unless $tbl; + my $q = $self->{Quoter}; + + my $sql = '/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, ' + . q{@@SQL_MODE := REPLACE(REPLACE(@@SQL_MODE, 'ANSI_QUOTES', ''), ',,', ','), } + . '@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, ' + . '@@SQL_QUOTE_SHOW_CREATE := 1 */'; + MKDEBUG && _d($sql); + eval { $dbh->do($sql); }; + MKDEBUG && $EVAL_ERROR && _d($EVAL_ERROR); + + $sql = 'USE ' . $q->quote($db); + MKDEBUG && _d($dbh, $sql); + $dbh->do($sql); + + $sql = "SHOW CREATE TABLE " . $q->quote($db, $tbl); + MKDEBUG && _d($sql); + my $href; + eval { $href = $dbh->selectrow_hashref($sql); }; + if ( $EVAL_ERROR ) { + MKDEBUG && _d($EVAL_ERROR); + return; + } + + $sql = '/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, ' + . '@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */'; + MKDEBUG && _d($sql); + $dbh->do($sql); + + my ($key) = grep { m/create table/i } keys %$href; + if ( $key ) { + MKDEBUG && _d('This table is a base table'); + $href->{$key} =~ s/\b[ ]{2,}/ /g; + $href->{$key} .= "\n"; + } + else { + MKDEBUG && _d('This table is a view'); + ($key) = grep { m/create view/i } keys %$href; + } + + return $href->{$key}; +} + sub parse { my ( $self, $ddl, $opts ) = @_; return unless $ddl; - if ( ref $ddl eq 'ARRAY' ) { - if ( lc $ddl->[0] eq 'table' ) { - $ddl = $ddl->[1]; - } - else { - return { - engine => 'VIEW', - }; - } - } if ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) { die "Cannot parse table definition; is ANSI quoting " @@ -2018,41 +2109,31 @@ sub remove_auto_increment { return $ddl; } -sub remove_secondary_indexes { - my ( $self, $ddl ) = @_; - my $sec_indexes_ddl; - my $tbl_struct = $self->parse($ddl); - - if ( ($tbl_struct->{engine} || '') =~ m/InnoDB/i ) { - my $clustered_key = $tbl_struct->{clustered_key}; - $clustered_key ||= ''; - - my @sec_indexes = map { - my $key_def = $_->{ddl}; - $key_def =~ s/([\(\)])/\\$1/g; - $ddl =~ s/\s+$key_def//i; - - my $key_ddl = "ADD $_->{ddl}"; - $key_ddl .= ',' unless $key_ddl =~ m/,$/; - $key_ddl; - } - grep { $_->{name} ne $clustered_key } - values %{$tbl_struct->{keys}}; - MKDEBUG && _d('Secondary indexes:', Dumper(\@sec_indexes)); - - if ( @sec_indexes ) { - $sec_indexes_ddl = join(' ', @sec_indexes); - $sec_indexes_ddl =~ s/,$//; - } - - $ddl =~ s/,(\n\) )/$1/s; +sub get_table_status { + my ( $self, $dbh, $db, $like ) = @_; + my $q = $self->{Quoter}; + my $sql = "SHOW TABLE STATUS FROM " . $q->quote($db); + my @params; + if ( $like ) { + $sql .= ' LIKE ?'; + push @params, $like; } - else { - MKDEBUG && _d('Not removing secondary indexes from', - $tbl_struct->{engine}, 'table'); + MKDEBUG && _d($sql, @params); + my $sth = $dbh->prepare($sql); + eval { $sth->execute(@params); }; + if ($EVAL_ERROR) { + MKDEBUG && _d($EVAL_ERROR); + return; } - - return $ddl, $sec_indexes_ddl, $tbl_struct; + my @tables = @{$sth->fetchall_arrayref({})}; + @tables = map { + my %tbl; # Make a copy with lowercased keys + @tbl{ map { lc $_ } keys %$_ } = values %$_; + $tbl{engine} ||= $tbl{type} || $tbl{comment}; + delete $tbl{type}; + \%tbl; + } @tables; + return @tables; } sub _d { @@ -2560,6 +2641,658 @@ sub _d { # End MySQLDump package # ########################################################################### +# ########################################################################### +# NibbleIterator package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/NibbleIterator.pm +# t/lib/NibbleIterator.t +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### +{ +package NibbleIterator; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use constant MKDEBUG => $ENV{MKDEBUG} || 0; + +use Data::Dumper; +$Data::Dumper::Indent = 1; +$Data::Dumper::Sortkeys = 1; +$Data::Dumper::Quotekeys = 0; + +sub new { + my ( $class, %args ) = @_; + my @required_args = qw(Cxn tbl chunk_size OptionParser Quoter TableNibbler TableParser); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($cxn, $tbl, $chunk_size, $o, $q) = @args{@required_args}; + + my $where = $o->get('where'); + my ($row_est, $mysql_index) = get_row_estimate(%args, where => $where); + MKDEBUG && _d($row_est, 'estimated rows, MySQL chose index', $mysql_index); + my $chunk_size_limit = $o->has('chunk-size-limit') + ? $o->get('chunk-size-limit') + : 1; + my $one_nibble = !defined $args{one_nibble} || $args{one_nibble} + ? $row_est <= $chunk_size * $chunk_size_limit + : 0; + MKDEBUG && _d('One nibble:', $one_nibble ? 'yes' : 'no'); + + if ( $args{resume} + && !defined $args{resume}->{lower_boundary} + && !defined $args{resume}->{upper_boundary} ) { + MKDEBUG && _d('Resuming from one nibble table'); + $one_nibble = 1; + } + + my $index = _find_best_index(%args, mysql_index => $mysql_index); + if ( !$index && !$one_nibble ) { + die "There is no good index and the table is oversized."; + } + my ($index_cols, $order_by); + if ( $index ) { + $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols}; + $order_by = join(', ', map {$q->quote($_)} @{$index_cols}); + } + + my $tbl_struct = $tbl->{tbl_struct}; + my $ignore_col = $o->get('ignore-columns') || {}; + my $all_cols = $o->get('columns') || $tbl_struct->{cols}; + my @cols = grep { !$ignore_col->{$_} } @$all_cols; + my $self; + if ( $one_nibble ) { + my $cols = ($args{select} ? $args{select} + : join(', ', map { $q->quote($_) } @cols)); + my $from = $q->quote(@{$tbl}{qw(db tbl)}); + + my $nibble_sql + = ($args{dml} ? "$args{dml} " : "SELECT ") + . $cols + . " FROM $from " + . ($where ? " AND ($where)" : '') + . " /*checksum table*/"; + MKDEBUG && _d('One nibble statement:', $nibble_sql); + + my $explain_nibble_sql + = "EXPLAIN SELECT $cols FROM $from" + . ($where ? " AND ($where)" : '') + . " /*explain checksum table*/"; + MKDEBUG && _d('Explain one nibble statement:', $explain_nibble_sql); + + $self = { + %args, + one_nibble => 1, + limit => 0, + nibble_sql => $nibble_sql, + explain_nibble_sql => $explain_nibble_sql, + sql => { + columns => $cols, + from => $from, + where => $where, + order_by => $order_by, + }, + }; + } + else { + my $asc = $args{TableNibbler}->generate_asc_stmt( + %args, + tbl_struct => $tbl->{tbl_struct}, + index => $index, + cols => \@cols, + asc_only => 1, + ); + MKDEBUG && _d('Ascend params:', Dumper($asc)); + + my $from = $q->quote(@{$tbl}{qw(db tbl)}) . " FORCE INDEX(`$index`)"; + + my $first_lb_sql + = "SELECT /*!40001 SQL_NO_CACHE */ " + . join(', ', map { $q->quote($_) } @{$asc->{scols}}) + . " FROM $from" + . ($where ? " WHERE $where" : '') + . " ORDER BY $order_by" + . " LIMIT 1" + . " /*first lower boundary*/"; + MKDEBUG && _d('First lower boundary statement:', $first_lb_sql); + + my $resume_lb_sql; + if ( $args{resume} ) { + $resume_lb_sql + = "SELECT /*!40001 SQL_NO_CACHE */ " + . join(', ', map { $q->quote($_) } @{$asc->{scols}}) + . " FROM $from" + . " WHERE " . $asc->{boundaries}->{'>'} + . ($where ? " AND ($where)" : '') + . " ORDER BY $order_by" + . " LIMIT 1" + . " /*resume lower boundary*/"; + MKDEBUG && _d('Resume lower boundary statement:', $resume_lb_sql); + } + + my $last_ub_sql + = "SELECT /*!40001 SQL_NO_CACHE */ " + . join(', ', map { $q->quote($_) } @{$asc->{scols}}) + . " FROM $from" + . ($where ? " WHERE $where" : '') + . " ORDER BY " + . join(' DESC, ', map {$q->quote($_)} @{$index_cols}) . ' DESC' + . " LIMIT 1" + . " /*last upper boundary*/"; + MKDEBUG && _d('Last upper boundary statement:', $last_ub_sql); + + my $ub_sql + = "SELECT /*!40001 SQL_NO_CACHE */ " + . join(', ', map { $q->quote($_) } @{$asc->{scols}}) + . " FROM $from" + . " WHERE " . $asc->{boundaries}->{'>='} + . ($where ? " AND ($where)" : '') + . " ORDER BY $order_by" + . " LIMIT ?, 2" + . " /*next chunk boundary*/"; + MKDEBUG && _d('Upper boundary statement:', $ub_sql); + + my $nibble_sql + = ($args{dml} ? "$args{dml} " : "SELECT ") + . ($args{select} ? $args{select} + : join(', ', map { $q->quote($_) } @{$asc->{cols}})) + . " FROM $from" + . " WHERE " . $asc->{boundaries}->{'>='} # lower boundary + . " AND " . $asc->{boundaries}->{'<='} # upper boundary + . ($where ? " AND ($where)" : '') + . ($args{order_by} ? " ORDER BY $order_by" : "") + . " /*checksum chunk*/"; + MKDEBUG && _d('Nibble statement:', $nibble_sql); + + my $explain_nibble_sql + = "EXPLAIN SELECT " + . ($args{select} ? $args{select} + : join(', ', map { $q->quote($_) } @{$asc->{cols}})) + . " FROM $from" + . " WHERE " . $asc->{boundaries}->{'>='} # lower boundary + . " AND " . $asc->{boundaries}->{'<='} # upper boundary + . ($where ? " AND ($where)" : '') + . ($args{order_by} ? " ORDER BY $order_by" : "") + . " /*explain checksum chunk*/"; + MKDEBUG && _d('Explain nibble statement:', $explain_nibble_sql); + + my $limit = $chunk_size - 1; + MKDEBUG && _d('Initial chunk size (LIMIT):', $limit); + + $self = { + %args, + limit => $limit, + first_lb_sql => $first_lb_sql, + last_ub_sql => $last_ub_sql, + ub_sql => $ub_sql, + nibble_sql => $nibble_sql, + explain_ub_sql => "EXPLAIN $ub_sql", + explain_nibble_sql => $explain_nibble_sql, + resume_lb_sql => $resume_lb_sql, + sql => { + columns => $asc->{scols}, + from => $from, + where => $where, + boundaries => $asc->{boundaries}, + order_by => $order_by, + }, + }; + } + + $self->{index} = $index; + $self->{row_est} = $row_est; + $self->{nibbleno} = 0; + $self->{have_rows} = 0; + $self->{rowno} = 0; + $self->{oktonibble} = 1; + $self->{no_more_boundaries} = 0; + + return bless $self, $class; +} + +sub next { + my ($self) = @_; + + if ( !$self->{oktonibble} ) { + MKDEBUG && _d('Not ok to nibble'); + return; + } + + my %callback_args = ( + Cxn => $self->{Cxn}, + tbl => $self->{tbl}, + NibbleIterator => $self, + ); + + if ($self->{nibbleno} == 0) { + $self->_prepare_sths(); + $self->_get_bounds(); + if ( my $callback = $self->{callbacks}->{init} ) { + $self->{oktonibble} = $callback->(%callback_args); + MKDEBUG && _d('init callback returned', $self->{oktonibble}); + if ( !$self->{oktonibble} ) { + $self->{no_more_boundaries} = 1; + return; + } + } + } + + NIBBLE: + while ( $self->{have_rows} || $self->_next_boundaries() ) { + if ( !$self->{have_rows} ) { + $self->{nibbleno}++; + MKDEBUG && _d($self->{nibble_sth}->{Statement}, 'params:', + join(', ', (@{$self->{lower}}, @{$self->{upper}}))); + if ( my $callback = $self->{callbacks}->{exec_nibble} ) { + $self->{have_rows} = $callback->(%callback_args); + } + else { + $self->{nibble_sth}->execute(@{$self->{lower}}, @{$self->{upper}}); + $self->{have_rows} = $self->{nibble_sth}->rows(); + } + MKDEBUG && _d($self->{have_rows}, 'rows in nibble', $self->{nibbleno}); + } + + if ( $self->{have_rows} ) { + my $row = $self->{fetch_hashref} + ? $self->{nibble_sth}->fetchrow_hashref() + : $self->{nibble_sth}->fetchrow_arrayref(); + if ( $row ) { + $self->{rowno}++; + MKDEBUG && _d('Row', $self->{rowno}, 'in nibble',$self->{nibbleno}); + return $self->{fetch_hashref} ? $row : [ @$row ]; + } + } + + MKDEBUG && _d('No rows in nibble or nibble skipped'); + if ( my $callback = $self->{callbacks}->{after_nibble} ) { + $callback->(%callback_args); + } + $self->{rowno} = 0; + $self->{have_rows} = 0; + } + + MKDEBUG && _d('Done nibbling'); + if ( my $callback = $self->{callbacks}->{done} ) { + $callback->(%callback_args); + } + + return; +} + +sub nibble_number { + my ($self) = @_; + return $self->{nibbleno}; +} + +sub set_nibble_number { + my ($self, $n) = @_; + die "I need a number" unless $n; + $self->{nibbleno} = $n; + MKDEBUG && _d('Set new nibble number:', $n); + return; +} + +sub nibble_index { + my ($self) = @_; + return $self->{index}; +} + +sub statements { + my ($self) = @_; + return { + nibble => $self->{nibble_sth}, + explain_nibble => $self->{explain_nibble_sth}, + upper_boundary => $self->{ub_sth}, + explain_upper_boundary => $self->{explain_ub_sth}, + } +} + +sub boundaries { + my ($self) = @_; + return { + first_lower => $self->{first_lower}, + lower => $self->{lower}, + upper => $self->{upper}, + next_lower => $self->{next_lower}, + last_upper => $self->{last_upper}, + }; +} + +sub set_boundary { + my ($self, $boundary, $values) = @_; + die "I need a boundary parameter" + unless $boundary; + die "Invalid boundary: $boundary" + unless $boundary =~ m/^(?:lower|upper|next_lower|last_upper)$/; + die "I need a values arrayref parameter" + unless $values && ref $values eq 'ARRAY'; + $self->{$boundary} = $values; + MKDEBUG && _d('Set new', $boundary, 'boundary:', Dumper($values)); + return; +} + +sub one_nibble { + my ($self) = @_; + return $self->{one_nibble}; +} + +sub chunk_size { + my ($self) = @_; + return $self->{limit} + 1; +} + +sub set_chunk_size { + my ($self, $limit) = @_; + return if $self->{one_nibble}; + die "Chunk size must be > 0" unless $limit; + $self->{limit} = $limit - 1; + MKDEBUG && _d('Set new chunk size (LIMIT):', $limit); + return; +} + +sub sql { + my ($self) = @_; + return $self->{sql}; +} + +sub more_boundaries { + my ($self) = @_; + return !$self->{no_more_boundaries}; +} + +sub no_more_rows { + my ($self) = @_; + $self->{have_rows} = 0; + return; +} + +sub row_estimate { + my ($self) = @_; + return $self->{row_est}; +} + +sub _find_best_index { + my (%args) = @_; + my @required_args = qw(Cxn tbl TableParser); + my ($cxn, $tbl, $tp) = @args{@required_args}; + my $tbl_struct = $tbl->{tbl_struct}; + my $indexes = $tbl_struct->{keys}; + + my $want_index = $args{chunk_index}; + if ( $want_index ) { + MKDEBUG && _d('User wants to use index', $want_index); + if ( !exists $indexes->{$want_index} ) { + MKDEBUG && _d('Cannot use user index because it does not exist'); + $want_index = undef; + } + } + + if ( !$want_index && $args{mysql_index} ) { + MKDEBUG && _d('MySQL wants to use index', $args{mysql_index}); + $want_index = $args{mysql_index}; + } + + my $best_index; + my @possible_indexes; + if ( $want_index ) { + if ( $indexes->{$want_index}->{is_unique} ) { + MKDEBUG && _d('Will use wanted index'); + $best_index = $want_index; + } + else { + MKDEBUG && _d('Wanted index is a possible index'); + push @possible_indexes, $want_index; + } + } + else { + MKDEBUG && _d('Auto-selecting best index'); + foreach my $index ( $tp->sort_indexes($tbl_struct) ) { + if ( $index eq 'PRIMARY' || $indexes->{$index}->{is_unique} ) { + $best_index = $index; + last; + } + else { + push @possible_indexes, $index; + } + } + } + + if ( !$best_index && @possible_indexes ) { + MKDEBUG && _d('No PRIMARY or unique indexes;', + 'will use index with highest cardinality'); + foreach my $index ( @possible_indexes ) { + $indexes->{$index}->{cardinality} = _get_index_cardinality( + %args, + index => $index, + ); + } + @possible_indexes = sort { + my $cmp + = $indexes->{$b}->{cardinality} <=> $indexes->{$b}->{cardinality}; + if ( $cmp == 0 ) { + $cmp = scalar @{$indexes->{$b}->{cols}} + <=> scalar @{$indexes->{$a}->{cols}}; + } + $cmp; + } @possible_indexes; + $best_index = $possible_indexes[0]; + } + + MKDEBUG && _d('Best index:', $best_index); + return $best_index; +} + +sub _get_index_cardinality { + my (%args) = @_; + my @required_args = qw(Cxn tbl index Quoter); + my ($cxn, $tbl, $index, $q) = @args{@required_args}; + + my $sql = "SHOW INDEXES FROM " . $q->quote(@{$tbl}{qw(db tbl)}) + . " WHERE Key_name = '$index'"; + MKDEBUG && _d($sql); + my $cardinality = 1; + my $rows = $cxn->dbh()->selectall_hashref($sql, 'key_name'); + foreach my $row ( values %$rows ) { + $cardinality *= $row->{cardinality} if $row->{cardinality}; + } + MKDEBUG && _d('Index', $index, 'cardinality:', $cardinality); + return $cardinality; +} + +sub get_row_estimate { + my (%args) = @_; + my @required_args = qw(Cxn tbl OptionParser TableParser Quoter); + my ($cxn, $tbl, $o, $tp, $q) = @args{@required_args}; + + if ( $args{where} ) { + MKDEBUG && _d('WHERE clause, using explain plan for row estimate'); + my $table = $q->quote(@{$tbl}{qw(db tbl)}); + my $sql = "EXPLAIN SELECT * FROM $table WHERE $args{where}"; + MKDEBUG && _d($sql); + my $expl = $cxn->dbh()->selectrow_hashref($sql); + MKDEBUG && _d(Dumper($expl)); + return ($expl->{rows} || 0), $expl->{key}; + } + else { + MKDEBUG && _d('No WHERE clause, using table status for row estimate'); + return $tbl->{tbl_status}->{rows} || 0; + } +} + +sub _prepare_sths { + my ($self) = @_; + MKDEBUG && _d('Preparing statement handles'); + + my $dbh = $self->{Cxn}->dbh(); + + $self->{nibble_sth} = $dbh->prepare($self->{nibble_sql}); + $self->{explain_nibble_sth} = $dbh->prepare($self->{explain_nibble_sql}); + + if ( !$self->{one_nibble} ) { + $self->{ub_sth} = $dbh->prepare($self->{ub_sql}); + $self->{explain_ub_sth} = $dbh->prepare($self->{explain_ub_sql}); + } + + return; +} + +sub _get_bounds { + my ($self) = @_; + + if ( $self->{one_nibble} ) { + if ( $self->{resume} ) { + $self->{no_more_boundaries} = 1; + } + return; + } + + my $dbh = $self->{Cxn}->dbh(); + + $self->{first_lower} = $dbh->selectrow_arrayref($self->{first_lb_sql}); + MKDEBUG && _d('First lower boundary:', Dumper($self->{first_lower})); + + if ( my $nibble = $self->{resume} ) { + if ( defined $nibble->{lower_boundary} + && defined $nibble->{upper_boundary} ) { + my $sth = $dbh->prepare($self->{resume_lb_sql}); + my @ub = split ',', $nibble->{upper_boundary}; + MKDEBUG && _d($sth->{Statement}, 'params:', @ub); + $sth->execute(@ub); + $self->{next_lower} = $sth->fetchrow_arrayref(); + $sth->finish(); + } + } + else { + $self->{next_lower} = $self->{first_lower}; + } + MKDEBUG && _d('Next lower boundary:', Dumper($self->{next_lower})); + + if ( !$self->{next_lower} ) { + MKDEBUG && _d('At end of table, or no more boundaries to resume'); + $self->{no_more_boundaries} = 1; + } + + $self->{last_upper} = $dbh->selectrow_arrayref($self->{last_ub_sql}); + MKDEBUG && _d('Last upper boundary:', Dumper($self->{last_upper})); + + return; +} + +sub _next_boundaries { + my ($self) = @_; + + if ( $self->{no_more_boundaries} ) { + MKDEBUG && _d('No more boundaries'); + return; # stop nibbling + } + + if ( $self->{one_nibble} ) { + $self->{lower} = $self->{upper} = []; + $self->{no_more_boundaries} = 1; # for next call + return 1; # continue nibbling + } + + if ( $self->identical_boundaries($self->{lower}, $self->{next_lower}) ) { + MKDEBUG && _d('Infinite loop detected'); + my $tbl = $self->{tbl}; + my $index = $tbl->{tbl_struct}->{keys}->{$self->{index}}; + my $n_cols = scalar @{$index->{cols}}; + my $chunkno = $self->{nibbleno}; + die "Possible infinite loop detected! " + . "The lower boundary for chunk $chunkno is " + . "<" . join(', ', @{$self->{lower}}) . "> and the lower " + . "boundary for chunk " . ($chunkno + 1) . " is also " + . "<" . join(', ', @{$self->{next_lower}}) . ">. " + . "This usually happens when using a non-unique single " + . "column index. The current chunk index for table " + . "$tbl->{db}.$tbl->{tbl} is $self->{index} which is" + . ($index->{is_unique} ? '' : ' not') . " unique and covers " + . ($n_cols > 1 ? "$n_cols columns" : "1 column") . ".\n"; + } + $self->{lower} = $self->{next_lower}; + + if ( my $callback = $self->{callbacks}->{next_boundaries} ) { + my $oktonibble = $callback->( + Cxn => $self->{Cxn}, + tbl => $self->{tbl}, + NibbleIterator => $self, + ); + MKDEBUG && _d('next_boundaries callback returned', $oktonibble); + if ( !$oktonibble ) { + $self->{no_more_boundaries} = 1; + return; # stop nibbling + } + } + + MKDEBUG && _d($self->{ub_sth}->{Statement}, 'params:', + join(', ', @{$self->{lower}}), $self->{limit}); + $self->{ub_sth}->execute(@{$self->{lower}}, $self->{limit}); + my $boundary = $self->{ub_sth}->fetchall_arrayref(); + MKDEBUG && _d('Next boundary:', Dumper($boundary)); + if ( $boundary && @$boundary ) { + $self->{upper} = $boundary->[0]; # this nibble + if ( $boundary->[1] ) { + $self->{next_lower} = $boundary->[1]; # next nibble + } + else { + $self->{no_more_boundaries} = 1; # for next call + MKDEBUG && _d('Last upper boundary:', Dumper($boundary->[0])); + } + } + else { + $self->{no_more_boundaries} = 1; # for next call + $self->{upper} = $self->{last_upper}; + MKDEBUG && _d('Last upper boundary:', Dumper($self->{upper})); + } + $self->{ub_sth}->finish(); + + return 1; # continue nibbling +} + +sub identical_boundaries { + my ($self, $b1, $b2) = @_; + + return 0 if ($b1 && !$b2) || (!$b1 && $b2); + + return 1 if !$b1 && !$b2; + + die "Boundaries have different numbers of values" + if scalar @$b1 != scalar @$b2; # shouldn't happen + my $n_vals = scalar @$b1; + for my $i ( 0..($n_vals-1) ) { + return 0 if $b1->[$i] ne $b2->[$i]; # diff + } + return 1; +} + +sub DESTROY { + my ( $self ) = @_; + foreach my $key ( keys %$self ) { + if ( $key =~ m/_sth$/ ) { + MKDEBUG && _d('Finish', $key); + $self->{$key}->finish(); + } + } + return; +} + +sub _d { + my ($package, undef, $line) = caller 0; + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } + map { defined $_ ? $_ : 'undef' } + @_; + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; +} + +1; +} +# ########################################################################### +# End NibbleIterator package +# ########################################################################### + # ########################################################################### # ChangeHandler package # This package is a copy without comments from the original. The original @@ -2883,2142 +3616,6 @@ sub _d { # End ChangeHandler package # ########################################################################### -# ########################################################################### -# TableChunker package -# This package is a copy without comments from the original. The original -# with comments and its test file can be found in the Bazaar repository at, -# lib/TableChunker.pm -# t/lib/TableChunker.t -# See https://launchpad.net/percona-toolkit for more information. -# ########################################################################### -{ -package TableChunker; - -use strict; -use warnings FATAL => 'all'; -use English qw(-no_match_vars); -use constant MKDEBUG => $ENV{MKDEBUG} || 0; - -use POSIX qw(floor ceil); -use List::Util qw(min max); -use Data::Dumper; -$Data::Dumper::Indent = 1; -$Data::Dumper::Sortkeys = 1; -$Data::Dumper::Quotekeys = 0; - -sub new { - my ( $class, %args ) = @_; - foreach my $arg ( qw(Quoter MySQLDump) ) { - die "I need a $arg argument" unless $args{$arg}; - } - - my %int_types = map { $_ => 1 } qw(bigint date datetime int mediumint smallint time timestamp tinyint year); - my %real_types = map { $_ => 1 } qw(decimal double float); - - my $self = { - %args, - int_types => \%int_types, - real_types => \%real_types, - EPOCH => '1970-01-01', - }; - - return bless $self, $class; -} - -sub find_chunk_columns { - my ( $self, %args ) = @_; - foreach my $arg ( qw(tbl_struct) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $tbl_struct = $args{tbl_struct}; - - my @possible_indexes; - foreach my $index ( values %{ $tbl_struct->{keys} } ) { - - next unless $index->{type} eq 'BTREE'; - - next if grep { defined } @{$index->{col_prefixes}}; - - if ( $args{exact} ) { - next unless $index->{is_unique} && @{$index->{cols}} == 1; - } - - push @possible_indexes, $index; - } - MKDEBUG && _d('Possible chunk indexes in order:', - join(', ', map { $_->{name} } @possible_indexes)); - - my $can_chunk_exact = 0; - my @candidate_cols; - foreach my $index ( @possible_indexes ) { - my $col = $index->{cols}->[0]; - - my $col_type = $tbl_struct->{type_for}->{$col}; - next unless $self->{int_types}->{$col_type} - || $self->{real_types}->{$col_type} - || $col_type =~ m/char/; - - push @candidate_cols, { column => $col, index => $index->{name} }; - } - - $can_chunk_exact = 1 if $args{exact} && scalar @candidate_cols; - - if ( MKDEBUG ) { - my $chunk_type = $args{exact} ? 'Exact' : 'Inexact'; - _d($chunk_type, 'chunkable:', - join(', ', map { "$_->{column} on $_->{index}" } @candidate_cols)); - } - - my @result; - MKDEBUG && _d('Ordering columns by order in tbl, PK first'); - if ( $tbl_struct->{keys}->{PRIMARY} ) { - my $pk_first_col = $tbl_struct->{keys}->{PRIMARY}->{cols}->[0]; - @result = grep { $_->{column} eq $pk_first_col } @candidate_cols; - @candidate_cols = grep { $_->{column} ne $pk_first_col } @candidate_cols; - } - my $i = 0; - my %col_pos = map { $_ => $i++ } @{$tbl_struct->{cols}}; - push @result, sort { $col_pos{$a->{column}} <=> $col_pos{$b->{column}} } - @candidate_cols; - - if ( MKDEBUG ) { - _d('Chunkable columns:', - join(', ', map { "$_->{column} on $_->{index}" } @result)); - _d('Can chunk exactly:', $can_chunk_exact); - } - - return ($can_chunk_exact, @result); -} - -sub calculate_chunks { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - MKDEBUG && _d('Calculate chunks for', - join(", ", map {"$_=".(defined $args{$_} ? $args{$_} : "undef")} - qw(db tbl chunk_col min max rows_in_range chunk_size zero_chunk exact) - )); - - if ( !$args{rows_in_range} ) { - MKDEBUG && _d("Empty table"); - return '1=1'; - } - - if ( $args{rows_in_range} < $args{chunk_size} ) { - MKDEBUG && _d("Chunk size larger than rows in range"); - return '1=1'; - } - - my $q = $self->{Quoter}; - my $dbh = $args{dbh}; - my $chunk_col = $args{chunk_col}; - my $tbl_struct = $args{tbl_struct}; - my $col_type = $tbl_struct->{type_for}->{$chunk_col}; - MKDEBUG && _d('chunk col type:', $col_type); - - my %chunker; - if ( $tbl_struct->{is_numeric}->{$chunk_col} || $col_type =~ /date|time/ ) { - %chunker = $self->_chunk_numeric(%args); - } - elsif ( $col_type =~ m/char/ ) { - %chunker = $self->_chunk_char(%args); - } - else { - die "Cannot chunk $col_type columns"; - } - MKDEBUG && _d("Chunker:", Dumper(\%chunker)); - my ($col, $start_point, $end_point, $interval, $range_func) - = @chunker{qw(col start_point end_point interval range_func)}; - - my @chunks; - if ( $start_point < $end_point ) { - - push @chunks, "$col = 0" if $chunker{have_zero_chunk}; - - my ($beg, $end); - my $iter = 0; - for ( my $i = $start_point; $i < $end_point; $i += $interval ) { - ($beg, $end) = $self->$range_func($dbh, $i, $interval, $end_point); - - if ( $iter++ == 0 ) { - push @chunks, - ($chunker{have_zero_chunk} ? "$col > 0 AND " : "") - ."$col < " . $q->quote_val($end); - } - else { - push @chunks, "$col >= " . $q->quote_val($beg) . " AND $col < " . $q->quote_val($end); - } - } - - my $chunk_range = lc $args{chunk_range} || 'open'; - my $nullable = $args{tbl_struct}->{is_nullable}->{$args{chunk_col}}; - pop @chunks; - if ( @chunks ) { - push @chunks, "$col >= " . $q->quote_val($beg) - . ($chunk_range eq 'openclosed' - ? " AND $col <= " . $q->quote_val($args{max}) : ""); - } - else { - push @chunks, $nullable ? "$col IS NOT NULL" : '1=1'; - } - if ( $nullable ) { - push @chunks, "$col IS NULL"; - } - } - else { - MKDEBUG && _d('No chunks; using single chunk 1=1'); - push @chunks, '1=1'; - } - - return @chunks; -} - -sub _chunk_numeric { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - my $q = $self->{Quoter}; - my $db_tbl = $q->quote($args{db}, $args{tbl}); - my $col_type = $args{tbl_struct}->{type_for}->{$args{chunk_col}}; - - my $range_func; - if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) { - $range_func = 'range_num'; - } - elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) { - $range_func = "range_$col_type"; - } - elsif ( $col_type eq 'datetime' ) { - $range_func = 'range_datetime'; - } - - my ($start_point, $end_point); - eval { - $start_point = $self->value_to_number( - value => $args{min}, - column_type => $col_type, - dbh => $args{dbh}, - ); - $end_point = $self->value_to_number( - value => $args{max}, - column_type => $col_type, - dbh => $args{dbh}, - ); - }; - if ( $EVAL_ERROR ) { - if ( $EVAL_ERROR =~ m/don't know how to chunk/ ) { - die $EVAL_ERROR; - } - else { - die "Error calculating chunk start and end points for table " - . "`$args{tbl_struct}->{name}` on column `$args{chunk_col}` " - . "with min/max values " - . join('/', - map { defined $args{$_} ? $args{$_} : 'undef' } qw(min max)) - . ":\n\n" - . $EVAL_ERROR - . "\nVerify that the min and max values are valid for the column. " - . "If they are valid, this error could be caused by a bug in the " - . "tool."; - } - } - - if ( !defined $start_point ) { - MKDEBUG && _d('Start point is undefined'); - $start_point = 0; - } - if ( !defined $end_point || $end_point < $start_point ) { - MKDEBUG && _d('End point is undefined or before start point'); - $end_point = 0; - } - MKDEBUG && _d("Actual chunk range:", $start_point, "to", $end_point); - - my $have_zero_chunk = 0; - if ( $args{zero_chunk} ) { - if ( $start_point != $end_point && $start_point >= 0 ) { - MKDEBUG && _d('Zero chunking'); - my $nonzero_val = $self->get_nonzero_value( - %args, - db_tbl => $db_tbl, - col => $args{chunk_col}, - col_type => $col_type, - val => $args{min} - ); - $start_point = $self->value_to_number( - value => $nonzero_val, - column_type => $col_type, - dbh => $args{dbh}, - ); - $have_zero_chunk = 1; - } - else { - MKDEBUG && _d("Cannot zero chunk"); - } - } - MKDEBUG && _d("Using chunk range:", $start_point, "to", $end_point); - - my $interval = $args{chunk_size} - * ($end_point - $start_point) - / $args{rows_in_range}; - if ( $self->{int_types}->{$col_type} ) { - $interval = ceil($interval); - } - $interval ||= $args{chunk_size}; - if ( $args{exact} ) { - $interval = $args{chunk_size}; - } - MKDEBUG && _d('Chunk interval:', $interval, 'units'); - - return ( - col => $q->quote($args{chunk_col}), - start_point => $start_point, - end_point => $end_point, - interval => $interval, - range_func => $range_func, - have_zero_chunk => $have_zero_chunk, - ); -} - -sub _chunk_char { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - my $q = $self->{Quoter}; - my $db_tbl = $q->quote($args{db}, $args{tbl}); - my $dbh = $args{dbh}; - my $chunk_col = $args{chunk_col}; - my $row; - my $sql; - - $sql = "SELECT MIN($chunk_col), MAX($chunk_col) FROM $db_tbl " - . "ORDER BY `$chunk_col`"; - MKDEBUG && _d($dbh, $sql); - $row = $dbh->selectrow_arrayref($sql); - my ($min_col, $max_col) = ($row->[0], $row->[1]); - - $sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord"; - MKDEBUG && _d($dbh, $sql); - my $ord_sth = $dbh->prepare($sql); # avoid quoting issues - $ord_sth->execute($min_col, $max_col); - $row = $ord_sth->fetchrow_arrayref(); - my ($min_col_ord, $max_col_ord) = ($row->[0], $row->[1]); - MKDEBUG && _d("Min/max col char code:", $min_col_ord, $max_col_ord); - - my $base; - my @chars; - MKDEBUG && _d("Table charset:", $args{tbl_struct}->{charset}); - if ( ($args{tbl_struct}->{charset} || "") eq "latin1" ) { - my @sorted_latin1_chars = ( - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, - 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, - 88, 89, 90, 91, 92, 93, 94, 95, 96, 123, 124, 125, 126, 161, - 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, - 190, 191, 215, 216, 222, 223, 247, 255); - - my ($first_char, $last_char); - for my $i ( 0..$#sorted_latin1_chars ) { - $first_char = $i and last if $sorted_latin1_chars[$i] >= $min_col_ord; - } - for my $i ( $first_char..$#sorted_latin1_chars ) { - $last_char = $i and last if $sorted_latin1_chars[$i] >= $max_col_ord; - }; - - @chars = map { chr $_; } @sorted_latin1_chars[$first_char..$last_char]; - $base = scalar @chars; - } - else { - - my $tmp_tbl = '__maatkit_char_chunking_map'; - my $tmp_db_tbl = $q->quote($args{db}, $tmp_tbl); - $sql = "DROP TABLE IF EXISTS $tmp_db_tbl"; - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - my $col_def = $args{tbl_struct}->{defs}->{$chunk_col}; - $sql = "CREATE TEMPORARY TABLE $tmp_db_tbl ($col_def) " - . "ENGINE=MEMORY"; - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - - $sql = "INSERT INTO $tmp_db_tbl VALUE (CHAR(?))"; - MKDEBUG && _d($dbh, $sql); - my $ins_char_sth = $dbh->prepare($sql); # avoid quoting issues - for my $char_code ( $min_col_ord..$max_col_ord ) { - $ins_char_sth->execute($char_code); - } - - $sql = "SELECT `$chunk_col` FROM $tmp_db_tbl " - . "WHERE `$chunk_col` BETWEEN ? AND ? " - . "ORDER BY `$chunk_col`"; - MKDEBUG && _d($dbh, $sql); - my $sel_char_sth = $dbh->prepare($sql); - $sel_char_sth->execute($min_col, $max_col); - - @chars = map { $_->[0] } @{ $sel_char_sth->fetchall_arrayref() }; - $base = scalar @chars; - - $sql = "DROP TABLE $tmp_db_tbl"; - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - } - MKDEBUG && _d("Base", $base, "chars:", @chars); - - - $sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl ORDER BY `$chunk_col`"; - MKDEBUG && _d($dbh, $sql); - $row = $dbh->selectrow_arrayref($sql); - my $max_col_len = $row->[0]; - MKDEBUG && _d("Max column value:", $max_col, $max_col_len); - my $n_values; - for my $n_chars ( 1..$max_col_len ) { - $n_values = $base**$n_chars; - if ( $n_values >= $args{chunk_size} ) { - MKDEBUG && _d($n_chars, "chars in base", $base, "expresses", - $n_values, "values"); - last; - } - } - - my $n_chunks = $args{rows_in_range} / $args{chunk_size}; - my $interval = floor($n_values / $n_chunks) || 1; - - my $range_func = sub { - my ( $self, $dbh, $start, $interval, $max ) = @_; - my $start_char = $self->base_count( - count_to => $start, - base => $base, - symbols => \@chars, - ); - my $end_char = $self->base_count( - count_to => min($max, $start + $interval), - base => $base, - symbols => \@chars, - ); - return $start_char, $end_char; - }; - - return ( - col => $q->quote($chunk_col), - start_point => 0, - end_point => $n_values, - interval => $interval, - range_func => $range_func, - ); -} - -sub get_first_chunkable_column { - my ( $self, %args ) = @_; - foreach my $arg ( qw(tbl_struct) ) { - die "I need a $arg argument" unless $args{$arg}; - } - - my ($exact, @cols) = $self->find_chunk_columns(%args); - my $col = $cols[0]->{column}; - my $idx = $cols[0]->{index}; - - my $wanted_col = $args{chunk_column}; - my $wanted_idx = $args{chunk_index}; - MKDEBUG && _d("Preferred chunk col/idx:", $wanted_col, $wanted_idx); - - if ( $wanted_col && $wanted_idx ) { - foreach my $chunkable_col ( @cols ) { - if ( $wanted_col eq $chunkable_col->{column} - && $wanted_idx eq $chunkable_col->{index} ) { - $col = $wanted_col; - $idx = $wanted_idx; - last; - } - } - } - elsif ( $wanted_col ) { - foreach my $chunkable_col ( @cols ) { - if ( $wanted_col eq $chunkable_col->{column} ) { - $col = $wanted_col; - $idx = $chunkable_col->{index}; - last; - } - } - } - elsif ( $wanted_idx ) { - foreach my $chunkable_col ( @cols ) { - if ( $wanted_idx eq $chunkable_col->{index} ) { - $col = $chunkable_col->{column}; - $idx = $wanted_idx; - last; - } - } - } - - MKDEBUG && _d('First chunkable col/index:', $col, $idx); - return $col, $idx; -} - -sub size_to_rows { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl chunk_size); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ($dbh, $db, $tbl, $chunk_size) = @args{@required_args}; - my $q = $self->{Quoter}; - my $du = $self->{MySQLDump}; - - my ($n_rows, $avg_row_length); - - my ( $num, $suffix ) = $chunk_size =~ m/^(\d+)([MGk])?$/; - if ( $suffix ) { # Convert to bytes. - $chunk_size = $suffix eq 'k' ? $num * 1_024 - : $suffix eq 'M' ? $num * 1_024 * 1_024 - : $num * 1_024 * 1_024 * 1_024; - } - elsif ( $num ) { - $n_rows = $num; - } - else { - die "Invalid chunk size $chunk_size; must be an integer " - . "with optional suffix kMG"; - } - - if ( $suffix || $args{avg_row_length} ) { - my ($status) = $du->get_table_status($dbh, $q, $db, $tbl); - $avg_row_length = $status->{avg_row_length}; - if ( !defined $n_rows ) { - $n_rows = $avg_row_length ? ceil($chunk_size / $avg_row_length) : undef; - } - } - - return $n_rows, $avg_row_length; -} - -sub get_range_statistics { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl chunk_col tbl_struct); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ($dbh, $db, $tbl, $col) = @args{@required_args}; - my $where = $args{where}; - my $q = $self->{Quoter}; - - my $col_type = $args{tbl_struct}->{type_for}->{$col}; - my $col_is_numeric = $args{tbl_struct}->{is_numeric}->{$col}; - - my $db_tbl = $q->quote($db, $tbl); - $col = $q->quote($col); - - my ($min, $max); - eval { - my $sql = "SELECT MIN($col), MAX($col) FROM $db_tbl" - . ($args{index_hint} ? " $args{index_hint}" : "") - . ($where ? " WHERE ($where)" : ''); - MKDEBUG && _d($dbh, $sql); - ($min, $max) = $dbh->selectrow_array($sql); - MKDEBUG && _d("Actual end points:", $min, $max); - - ($min, $max) = $self->get_valid_end_points( - %args, - dbh => $dbh, - db_tbl => $db_tbl, - col => $col, - col_type => $col_type, - min => $min, - max => $max, - ); - MKDEBUG && _d("Valid end points:", $min, $max); - }; - if ( $EVAL_ERROR ) { - die "Error getting min and max values for table $db_tbl " - . "on column $col: $EVAL_ERROR"; - } - - my $sql = "EXPLAIN SELECT * FROM $db_tbl" - . ($args{index_hint} ? " $args{index_hint}" : "") - . ($where ? " WHERE $where" : ''); - MKDEBUG && _d($sql); - my $expl = $dbh->selectrow_hashref($sql); - - return ( - min => $min, - max => $max, - rows_in_range => $expl->{rows}, - ); -} - -sub inject_chunks { - my ( $self, %args ) = @_; - foreach my $arg ( qw(database table chunks chunk_num query) ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - MKDEBUG && _d('Injecting chunk', $args{chunk_num}); - my $query = $args{query}; - my $comment = sprintf("/*%s.%s:%d/%d*/", - $args{database}, $args{table}, - $args{chunk_num} + 1, scalar @{$args{chunks}}); - $query =~ s!/\*PROGRESS_COMMENT\*/!$comment!; - my $where = "WHERE (" . $args{chunks}->[$args{chunk_num}] . ')'; - if ( $args{where} && grep { $_ } @{$args{where}} ) { - $where .= " AND (" - . join(" AND ", map { "($_)" } grep { $_ } @{$args{where}} ) - . ")"; - } - my $db_tbl = $self->{Quoter}->quote(@args{qw(database table)}); - my $index_hint = $args{index_hint} || ''; - - MKDEBUG && _d('Parameters:', - Dumper({WHERE => $where, DB_TBL => $db_tbl, INDEX_HINT => $index_hint})); - $query =~ s!/\*WHERE\*/! $where!; - $query =~ s!/\*DB_TBL\*/!$db_tbl!; - $query =~ s!/\*INDEX_HINT\*/! $index_hint!; - $query =~ s!/\*CHUNK_NUM\*/! $args{chunk_num} AS chunk_num,!; - - return $query; -} - - -sub value_to_number { - my ( $self, %args ) = @_; - my @required_args = qw(column_type dbh); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - my $val = $args{value}; - my ($col_type, $dbh) = @args{@required_args}; - MKDEBUG && _d('Converting MySQL', $col_type, $val); - - return unless defined $val; # value is NULL - - my %mysql_conv_func_for = ( - timestamp => 'UNIX_TIMESTAMP', - date => 'TO_DAYS', - time => 'TIME_TO_SEC', - datetime => 'TO_DAYS', - ); - - my $num; - if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) { - $num = $val; - } - elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) { - my $func = $mysql_conv_func_for{$col_type}; - my $sql = "SELECT $func(?)"; - MKDEBUG && _d($dbh, $sql, $val); - my $sth = $dbh->prepare($sql); - $sth->execute($val); - ($num) = $sth->fetchrow_array(); - } - elsif ( $col_type eq 'datetime' ) { - $num = $self->timestampdiff($dbh, $val); - } - else { - die "I don't know how to chunk $col_type\n"; - } - MKDEBUG && _d('Converts to', $num); - return $num; -} - -sub range_num { - my ( $self, $dbh, $start, $interval, $max ) = @_; - my $end = min($max, $start + $interval); - - - $start = sprintf('%.17f', $start) if $start =~ /e/; - $end = sprintf('%.17f', $end) if $end =~ /e/; - - $start =~ s/\.(\d{5}).*$/.$1/; - $end =~ s/\.(\d{5}).*$/.$1/; - - if ( $end > $start ) { - return ( $start, $end ); - } - else { - die "Chunk size is too small: $end !> $start\n"; - } -} - -sub range_time { - my ( $self, $dbh, $start, $interval, $max ) = @_; - my $sql = "SELECT SEC_TO_TIME($start), SEC_TO_TIME(LEAST($max, $start + $interval))"; - MKDEBUG && _d($sql); - return $dbh->selectrow_array($sql); -} - -sub range_date { - my ( $self, $dbh, $start, $interval, $max ) = @_; - my $sql = "SELECT FROM_DAYS($start), FROM_DAYS(LEAST($max, $start + $interval))"; - MKDEBUG && _d($sql); - return $dbh->selectrow_array($sql); -} - -sub range_datetime { - my ( $self, $dbh, $start, $interval, $max ) = @_; - my $sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $start SECOND), " - . "DATE_ADD('$self->{EPOCH}', INTERVAL LEAST($max, $start + $interval) SECOND)"; - MKDEBUG && _d($sql); - return $dbh->selectrow_array($sql); -} - -sub range_timestamp { - my ( $self, $dbh, $start, $interval, $max ) = @_; - my $sql = "SELECT FROM_UNIXTIME($start), FROM_UNIXTIME(LEAST($max, $start + $interval))"; - MKDEBUG && _d($sql); - return $dbh->selectrow_array($sql); -} - -sub timestampdiff { - my ( $self, $dbh, $time ) = @_; - my $sql = "SELECT (COALESCE(TO_DAYS('$time'), 0) * 86400 + TIME_TO_SEC('$time')) " - . "- TO_DAYS('$self->{EPOCH} 00:00:00') * 86400"; - MKDEBUG && _d($sql); - my ( $diff ) = $dbh->selectrow_array($sql); - $sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $diff SECOND)"; - MKDEBUG && _d($sql); - my ( $check ) = $dbh->selectrow_array($sql); - die <<" EOF" - Incorrect datetime math: given $time, calculated $diff but checked to $check. - This could be due to a version of MySQL that overflows on large interval - values to DATE_ADD(), or the given datetime is not a valid date. If not, - please report this as a bug. - EOF - unless $check eq $time; - return $diff; -} - - - - -sub get_valid_end_points { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db_tbl col col_type); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args}; - my ($real_min, $real_max) = @args{qw(min max)}; - - my $err_fmt = "Error finding a valid %s value for table $db_tbl on " - . "column $col. The real %s value %s is invalid and " - . "no other valid values were found. Verify that the table " - . "has at least one valid value for this column" - . ($args{where} ? " where $args{where}." : "."); - - my $valid_min = $real_min; - if ( defined $valid_min ) { - MKDEBUG && _d("Validating min end point:", $real_min); - $valid_min = $self->_get_valid_end_point( - %args, - val => $real_min, - endpoint => 'min', - ); - die sprintf($err_fmt, 'minimum', 'minimum', - (defined $real_min ? $real_min : "NULL")) - unless defined $valid_min; - } - - my $valid_max = $real_max; - if ( defined $valid_max ) { - MKDEBUG && _d("Validating max end point:", $real_min); - $valid_max = $self->_get_valid_end_point( - %args, - val => $real_max, - endpoint => 'max', - ); - die sprintf($err_fmt, 'maximum', 'maximum', - (defined $real_max ? $real_max : "NULL")) - unless defined $valid_max; - } - - return $valid_min, $valid_max; -} - -sub _get_valid_end_point { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db_tbl col col_type); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args}; - my $val = $args{val}; - - return $val unless defined $val; - - my $validate = $col_type =~ m/time|date/ ? \&_validate_temporal_value - : undef; - - if ( !$validate ) { - MKDEBUG && _d("No validator for", $col_type, "values"); - return $val; - } - - return $val if defined $validate->($dbh, $val); - - MKDEBUG && _d("Value is invalid, getting first valid value"); - $val = $self->get_first_valid_value( - %args, - val => $val, - validate => $validate, - ); - - return $val; -} - -sub get_first_valid_value { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db_tbl col validate endpoint); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ($dbh, $db_tbl, $col, $validate, $endpoint) = @args{@required_args}; - my $tries = defined $args{tries} ? $args{tries} : 5; - my $val = $args{val}; - - return unless defined $val; - - my $cmp = $endpoint =~ m/min/i ? '>' - : $endpoint =~ m/max/i ? '<' - : die "Invalid endpoint arg: $endpoint"; - my $sql = "SELECT $col FROM $db_tbl " - . ($args{index_hint} ? "$args{index_hint} " : "") - . "WHERE $col $cmp ? AND $col IS NOT NULL " - . ($args{where} ? "AND ($args{where}) " : "") - . "ORDER BY $col LIMIT 1"; - MKDEBUG && _d($dbh, $sql); - my $sth = $dbh->prepare($sql); - - my $last_val = $val; - while ( $tries-- ) { - $sth->execute($last_val); - my ($next_val) = $sth->fetchrow_array(); - MKDEBUG && _d('Next value:', $next_val, '; tries left:', $tries); - if ( !defined $next_val ) { - MKDEBUG && _d('No more rows in table'); - last; - } - if ( defined $validate->($dbh, $next_val) ) { - MKDEBUG && _d('First valid value:', $next_val); - $sth->finish(); - return $next_val; - } - $last_val = $next_val; - } - $sth->finish(); - $val = undef; # no valid value found - - return $val; -} - -sub _validate_temporal_value { - my ( $dbh, $val ) = @_; - my $sql = "SELECT IF(TIME_FORMAT(?,'%H:%i:%s')=?, TIME_TO_SEC(?), TO_DAYS(?))"; - my $res; - eval { - MKDEBUG && _d($dbh, $sql, $val); - my $sth = $dbh->prepare($sql); - $sth->execute($val, $val, $val, $val); - ($res) = $sth->fetchrow_array(); - $sth->finish(); - }; - if ( $EVAL_ERROR ) { - MKDEBUG && _d($EVAL_ERROR); - } - return $res; -} - -sub get_nonzero_value { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db_tbl col col_type); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args}; - my $tries = defined $args{tries} ? $args{tries} : 5; - my $val = $args{val}; - - my $is_nonzero = $col_type =~ m/time|date/ ? \&_validate_temporal_value - : sub { return $_[1]; }; - - if ( !$is_nonzero->($dbh, $val) ) { # quasi-double-negative, sorry - MKDEBUG && _d('Discarding zero value:', $val); - my $sql = "SELECT $col FROM $db_tbl " - . ($args{index_hint} ? "$args{index_hint} " : "") - . "WHERE $col > ? AND $col IS NOT NULL " - . ($args{where} ? "AND ($args{where}) " : '') - . "ORDER BY $col LIMIT 1"; - MKDEBUG && _d($sql); - my $sth = $dbh->prepare($sql); - - my $last_val = $val; - while ( $tries-- ) { - $sth->execute($last_val); - my ($next_val) = $sth->fetchrow_array(); - if ( $is_nonzero->($dbh, $next_val) ) { - MKDEBUG && _d('First non-zero value:', $next_val); - $sth->finish(); - return $next_val; - } - $last_val = $next_val; - } - $sth->finish(); - $val = undef; # no non-zero value found - } - - return $val; -} - -sub base_count { - my ( $self, %args ) = @_; - my @required_args = qw(count_to base symbols); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - my ($n, $base, $symbols) = @args{@required_args}; - - return $symbols->[0] if $n == 0; - - my $highest_power = floor(log($n)/log($base)); - if ( $highest_power == 0 ){ - return $symbols->[$n]; - } - - my @base_powers; - for my $power ( 0..$highest_power ) { - push @base_powers, ($base**$power) || 1; - } - - my @base_multiples; - foreach my $base_power ( reverse @base_powers ) { - my $multiples = floor($n / $base_power); - push @base_multiples, $multiples; - $n -= $multiples * $base_power; - } - - return join('', map { $symbols->[$_] } @base_multiples); -} - -sub _d { - my ($package, undef, $line) = caller 0; - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } - map { defined $_ ? $_ : 'undef' } - @_; - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; -} - -1; -} -# ########################################################################### -# End TableChunker package -# ########################################################################### - -# ########################################################################### -# TableChecksum package -# This package is a copy without comments from the original. The original -# with comments and its test file can be found in the Bazaar repository at, -# lib/TableChecksum.pm -# t/lib/TableChecksum.t -# See https://launchpad.net/percona-toolkit for more information. -# ########################################################################### -{ -package TableChecksum; - -use strict; -use warnings FATAL => 'all'; -use English qw(-no_match_vars); -use constant MKDEBUG => $ENV{MKDEBUG} || 0; - -use List::Util qw(max); - -our %ALGOS = ( - CHECKSUM => { pref => 0, hash => 0 }, - BIT_XOR => { pref => 2, hash => 1 }, - ACCUM => { pref => 3, hash => 1 }, -); - -sub new { - my ( $class, %args ) = @_; - foreach my $arg ( qw(Quoter VersionParser) ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - my $self = { %args }; - return bless $self, $class; -} - -sub crc32 { - my ( $self, $string ) = @_; - my $poly = 0xEDB88320; - my $crc = 0xFFFFFFFF; - foreach my $char ( split(//, $string) ) { - my $comp = ($crc ^ ord($char)) & 0xFF; - for ( 1 .. 8 ) { - $comp = $comp & 1 ? $poly ^ ($comp >> 1) : $comp >> 1; - } - $crc = (($crc >> 8) & 0x00FFFFFF) ^ $comp; - } - return $crc ^ 0xFFFFFFFF; -} - -sub get_crc_wid { - my ( $self, $dbh, $func ) = @_; - my $crc_wid = 16; - if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) { - eval { - my ($val) = $dbh->selectrow_array("SELECT $func('a')"); - $crc_wid = max(16, length($val)); - }; - } - return $crc_wid; -} - -sub get_crc_type { - my ( $self, $dbh, $func ) = @_; - my $type = ''; - my $length = 0; - my $sql = "SELECT $func('a')"; - my $sth = $dbh->prepare($sql); - eval { - $sth->execute(); - $type = $sth->{mysql_type_name}->[0]; - $length = $sth->{mysql_length}->[0]; - MKDEBUG && _d($sql, $type, $length); - if ( $type eq 'bigint' && $length < 20 ) { - $type = 'int'; - } - }; - $sth->finish; - MKDEBUG && _d('crc_type:', $type, 'length:', $length); - return ($type, $length); -} - -sub best_algorithm { - my ( $self, %args ) = @_; - my ( $alg, $dbh ) = @args{ qw(algorithm dbh) }; - my $vp = $self->{VersionParser}; - my @choices = sort { $ALGOS{$a}->{pref} <=> $ALGOS{$b}->{pref} } keys %ALGOS; - die "Invalid checksum algorithm $alg" - if $alg && !$ALGOS{$alg}; - - if ( - $args{where} || $args{chunk} # CHECKSUM does whole table - || $args{replicate} # CHECKSUM can't do INSERT.. SELECT - || !$vp->version_ge($dbh, '4.1.1')) # CHECKSUM doesn't exist - { - MKDEBUG && _d('Cannot use CHECKSUM algorithm'); - @choices = grep { $_ ne 'CHECKSUM' } @choices; - } - - if ( !$vp->version_ge($dbh, '4.1.1') ) { - MKDEBUG && _d('Cannot use BIT_XOR algorithm because MySQL < 4.1.1'); - @choices = grep { $_ ne 'BIT_XOR' } @choices; - } - - if ( $alg && grep { $_ eq $alg } @choices ) { - MKDEBUG && _d('User requested', $alg, 'algorithm'); - return $alg; - } - - if ( $args{count} && grep { $_ ne 'CHECKSUM' } @choices ) { - MKDEBUG && _d('Not using CHECKSUM algorithm because COUNT desired'); - @choices = grep { $_ ne 'CHECKSUM' } @choices; - } - - MKDEBUG && _d('Algorithms, in order:', @choices); - return $choices[0]; -} - -sub is_hash_algorithm { - my ( $self, $algorithm ) = @_; - return $ALGOS{$algorithm} && $ALGOS{$algorithm}->{hash}; -} - -sub choose_hash_func { - my ( $self, %args ) = @_; - my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1); - if ( $args{function} ) { - unshift @funcs, $args{function}; - } - my ($result, $error); - do { - my $func; - eval { - $func = shift(@funcs); - my $sql = "SELECT $func('test-string')"; - MKDEBUG && _d($sql); - $args{dbh}->do($sql); - $result = $func; - }; - if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) { - $error .= qq{$func cannot be used because "$1"\n}; - MKDEBUG && _d($func, 'cannot be used because', $1); - } - } while ( @funcs && !$result ); - - die $error unless $result; - MKDEBUG && _d('Chosen hash func:', $result); - return $result; -} - -sub optimize_xor { - my ( $self, %args ) = @_; - my ($dbh, $func) = @args{qw(dbh function)}; - - die "$func never needs the BIT_XOR optimization" - if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i; - - my $opt_slice = 0; - my $unsliced = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0]; - my $sliced = ''; - my $start = 1; - my $crc_wid = length($unsliced) < 16 ? 16 : length($unsliced); - - do { # Try different positions till sliced result equals non-sliced. - MKDEBUG && _d('Trying slice', $opt_slice); - $dbh->do('SET @crc := "", @cnt := 0'); - my $slices = $self->make_xor_slices( - query => "\@crc := $func('a')", - crc_wid => $crc_wid, - opt_slice => $opt_slice, - ); - - my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x"; - $sliced = ($dbh->selectrow_array($sql))[0]; - if ( $sliced ne $unsliced ) { - MKDEBUG && _d('Slice', $opt_slice, 'does not work'); - $start += 16; - ++$opt_slice; - } - } while ( $start < $crc_wid && $sliced ne $unsliced ); - - if ( $sliced eq $unsliced ) { - MKDEBUG && _d('Slice', $opt_slice, 'works'); - return $opt_slice; - } - else { - MKDEBUG && _d('No slice works'); - return undef; - } -} - -sub make_xor_slices { - my ( $self, %args ) = @_; - foreach my $arg ( qw(query crc_wid) ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - my ( $query, $crc_wid, $opt_slice ) = @args{qw(query crc_wid opt_slice)}; - - my @slices; - for ( my $start = 1; $start <= $crc_wid; $start += 16 ) { - my $len = $crc_wid - $start + 1; - if ( $len > 16 ) { - $len = 16; - } - push @slices, - "LPAD(CONV(BIT_XOR(" - . "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))" - . ", 10, 16), $len, '0')"; - } - - if ( defined $opt_slice && $opt_slice < @slices ) { - $slices[$opt_slice] =~ s/\@crc/\@crc := $query/; - } - else { - map { s/\@crc/$query/ } @slices; - } - - return join(', ', @slices); -} - -sub make_row_checksum { - my ( $self, %args ) = @_; - my ( $tbl_struct, $func ) = @args{ qw(tbl_struct function) }; - my $q = $self->{Quoter}; - - my $sep = $args{sep} || '#'; - $sep =~ s/'//g; - $sep ||= '#'; - - my $ignorecols = $args{ignorecols} || {}; - - my %cols = map { lc($_) => 1 } - grep { !exists $ignorecols->{$_} } - ($args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}}); - my %seen; - my @cols = - map { - my $type = $tbl_struct->{type_for}->{$_}; - my $result = $q->quote($_); - if ( $type eq 'timestamp' ) { - $result .= ' + 0'; - } - elsif ( $args{float_precision} && $type =~ m/float|double/ ) { - $result = "ROUND($result, $args{float_precision})"; - } - elsif ( $args{trim} && $type =~ m/varchar/ ) { - $result = "TRIM($result)"; - } - $result; - } - grep { - $cols{$_} && !$seen{$_}++ - } - @{$tbl_struct->{cols}}; - - my $query; - if ( !$args{no_cols} ) { - $query = join(', ', - map { - my $col = $_; - if ( $col =~ m/\+ 0/ ) { - my ($real_col) = /^(\S+)/; - $col .= " AS $real_col"; - } - elsif ( $col =~ m/TRIM/ ) { - my ($real_col) = m/TRIM\(([^\)]+)\)/; - $col .= " AS $real_col"; - } - $col; - } @cols) - . ', '; - } - - if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) { - my @nulls = grep { $cols{$_} } @{$tbl_struct->{null_cols}}; - if ( @nulls ) { - my $bitmap = "CONCAT(" - . join(', ', map { 'ISNULL(' . $q->quote($_) . ')' } @nulls) - . ")"; - push @cols, $bitmap; - } - - $query .= @cols > 1 - ? "$func(CONCAT_WS('$sep', " . join(', ', @cols) . '))' - : "$func($cols[0])"; - } - else { - my $fnv_func = uc $func; - $query .= "$fnv_func(" . join(', ', @cols) . ')'; - } - - return $query; -} - -sub make_checksum_query { - my ( $self, %args ) = @_; - my @required_args = qw(db tbl tbl_struct algorithm crc_wid crc_type); - foreach my $arg( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ( $db, $tbl, $tbl_struct, $algorithm, - $crc_wid, $crc_type) = @args{@required_args}; - my $func = $args{function}; - my $q = $self->{Quoter}; - my $result; - - die "Invalid or missing checksum algorithm" - unless $algorithm && $ALGOS{$algorithm}; - - if ( $algorithm eq 'CHECKSUM' ) { - return "CHECKSUM TABLE " . $q->quote($db, $tbl); - } - - my $expr = $self->make_row_checksum(%args, no_cols=>1); - - if ( $algorithm eq 'BIT_XOR' ) { - if ( $crc_type =~ m/int$/ ) { - $result = "COALESCE(LOWER(CONV(BIT_XOR(CAST($expr AS UNSIGNED)), 10, 16)), 0) AS crc "; - } - else { - my $slices = $self->make_xor_slices( query => $expr, %args ); - $result = "COALESCE(LOWER(CONCAT($slices)), 0) AS crc "; - } - } - else { - if ( $crc_type =~ m/int$/ ) { - $result = "COALESCE(RIGHT(MAX(" - . "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), " - . "CONV(CAST($func(CONCAT(\@crc, $expr)) AS UNSIGNED), 10, 16))" - . "), $crc_wid), 0) AS crc "; - } - else { - $result = "COALESCE(RIGHT(MAX(" - . "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), " - . "$func(CONCAT(\@crc, $expr)))" - . "), $crc_wid), 0) AS crc "; - } - } - if ( $args{replicate} ) { - $result = "REPLACE /*PROGRESS_COMMENT*/ INTO $args{replicate} " - . "(db, tbl, chunk, boundaries, this_cnt, this_crc) " - . "SELECT ?, ?, /*CHUNK_NUM*/ ?, COUNT(*) AS cnt, $result"; - } - else { - $result = "SELECT " - . ($args{buffer} ? 'SQL_BUFFER_RESULT ' : '') - . "/*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, $result"; - } - return $result . "FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/"; -} - -sub find_replication_differences { - my ( $self, $dbh, $table ) = @_; - - (my $sql = <<" EOF") =~ s/\s+/ /gm; - SELECT db, tbl, chunk, boundaries, - COALESCE(this_cnt-master_cnt, 0) AS cnt_diff, - COALESCE( - this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc), - 0 - ) AS crc_diff, - this_cnt, master_cnt, this_crc, master_crc - FROM $table - WHERE master_cnt <> this_cnt OR master_crc <> this_crc - OR ISNULL(master_crc) <> ISNULL(this_crc) - EOF - - MKDEBUG && _d($sql); - my $diffs = $dbh->selectall_arrayref($sql, { Slice => {} }); - return @$diffs; -} - -sub _d { - my ($package, undef, $line) = caller 0; - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } - map { defined $_ ? $_ : 'undef' } - @_; - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; -} - -1; -} -# ########################################################################### -# End TableChecksum package -# ########################################################################### - -# ########################################################################### -# TableSyncChunk package -# This package is a copy without comments from the original. The original -# with comments and its test file can be found in the Bazaar repository at, -# lib/TableSyncChunk.pm -# t/lib/TableSyncChunk.t -# See https://launchpad.net/percona-toolkit for more information. -# ########################################################################### -{ -package TableSyncChunk; - -use strict; -use warnings FATAL => 'all'; -use English qw(-no_match_vars); -use constant MKDEBUG => $ENV{MKDEBUG} || 0; - -use Data::Dumper; -$Data::Dumper::Indent = 1; -$Data::Dumper::Sortkeys = 1; -$Data::Dumper::Quotekeys = 0; - -sub new { - my ( $class, %args ) = @_; - foreach my $arg ( qw(TableChunker Quoter) ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - my $self = { %args }; - return bless $self, $class; -} - -sub name { - return 'Chunk'; -} - -sub set_callback { - my ( $self, $callback, $code ) = @_; - $self->{$callback} = $code; - return; -} - -sub can_sync { - my ( $self, %args ) = @_; - foreach my $arg ( qw(tbl_struct) ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - - my ($exact, @chunkable_cols) = $self->{TableChunker}->find_chunk_columns( - %args, - exact => 1, - ); - return unless $exact; - - my $colno; - if ( $args{chunk_col} || $args{chunk_index} ) { - MKDEBUG && _d('Checking requested col', $args{chunk_col}, - 'and/or index', $args{chunk_index}); - for my $i ( 0..$#chunkable_cols ) { - if ( $args{chunk_col} ) { - next unless $chunkable_cols[$i]->{column} eq $args{chunk_col}; - } - if ( $args{chunk_index} ) { - next unless $chunkable_cols[$i]->{index} eq $args{chunk_index}; - } - $colno = $i; - last; - } - - if ( !$colno ) { - MKDEBUG && _d('Cannot chunk on column', $args{chunk_col}, - 'and/or using index', $args{chunk_index}); - return; - } - } - else { - $colno = 0; # First, best chunkable column/index. - } - - MKDEBUG && _d('Can chunk on column', $chunkable_cols[$colno]->{column}, - 'using index', $chunkable_cols[$colno]->{index}); - return ( - 1, - chunk_col => $chunkable_cols[$colno]->{column}, - chunk_index => $chunkable_cols[$colno]->{index}, - ), -} - -sub prepare_to_sync { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl tbl_struct cols chunk_col - chunk_size crc_col ChangeHandler); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - my $chunker = $self->{TableChunker}; - - $self->{chunk_col} = $args{chunk_col}; - $self->{crc_col} = $args{crc_col}; - $self->{index_hint} = $args{index_hint}; - $self->{buffer_in_mysql} = $args{buffer_in_mysql}; - $self->{ChangeHandler} = $args{ChangeHandler}; - - $self->{ChangeHandler}->fetch_back($args{dbh}); - - push @{$args{cols}}, $args{chunk_col}; - - my @chunks; - my %range_params = $chunker->get_range_statistics(%args); - if ( !grep { !defined $range_params{$_} } qw(min max rows_in_range) ) { - ($args{chunk_size}) = $chunker->size_to_rows(%args); - @chunks = $chunker->calculate_chunks(%args, %range_params); - } - else { - MKDEBUG && _d('No range statistics; using single chunk 1=1'); - @chunks = '1=1'; - } - - $self->{chunks} = \@chunks; - $self->{chunk_num} = 0; - $self->{state} = 0; - - return; -} - -sub uses_checksum { - return 1; -} - -sub set_checksum_queries { - my ( $self, $chunk_sql, $row_sql ) = @_; - die "I need a chunk_sql argument" unless $chunk_sql; - die "I need a row_sql argument" unless $row_sql; - $self->{chunk_sql} = $chunk_sql; - $self->{row_sql} = $row_sql; - return; -} - -sub prepare_sync_cycle { - my ( $self, $host ) = @_; - my $sql = 'SET @crc := "", @cnt := 0'; - MKDEBUG && _d($sql); - $host->{dbh}->do($sql); - return; -} - -sub get_sql { - my ( $self, %args ) = @_; - if ( $self->{state} ) { # select rows in a chunk - my $q = $self->{Quoter}; - return 'SELECT /*rows in chunk*/ ' - . ($self->{buffer_in_mysql} ? 'SQL_BUFFER_RESULT ' : '') - . $self->{row_sql} . " AS $self->{crc_col}" - . ' FROM ' . $self->{Quoter}->quote(@args{qw(database table)}) - . ' '. ($self->{index_hint} || '') - . ' WHERE (' . $self->{chunks}->[$self->{chunk_num}] . ')' - . ($args{where} ? " AND ($args{where})" : '') - . ' ORDER BY ' . join(', ', map {$q->quote($_) } @{$self->key_cols()}); - } - else { # select a chunk of rows - return $self->{TableChunker}->inject_chunks( - database => $args{database}, - table => $args{table}, - chunks => $self->{chunks}, - chunk_num => $self->{chunk_num}, - query => $self->{chunk_sql}, - index_hint => $self->{index_hint}, - where => [ $args{where} ], - ); - } -} - -sub same_row { - my ( $self, %args ) = @_; - my ($lr, $rr) = @args{qw(lr rr)}; - - if ( $self->{state} ) { # checksumming rows - if ( $lr->{$self->{crc_col}} ne $rr->{$self->{crc_col}} ) { - my $action = 'UPDATE'; - my $auth_row = $lr; - my $change_dbh; - - if ( $self->{same_row} ) { - ($action, $auth_row, $change_dbh) = $self->{same_row}->(%args); - } - - $self->{ChangeHandler}->change( - $action, # Execute the action - $auth_row, # with these row values - $self->key_cols(), # identified by these key cols - $change_dbh, # on this dbh - ); - } - } - elsif ( $lr->{cnt} != $rr->{cnt} || $lr->{crc} ne $rr->{crc} ) { - MKDEBUG && _d('Rows:', Dumper($lr, $rr)); - MKDEBUG && _d('Will examine this chunk before moving to next'); - $self->{state} = 1; # Must examine this chunk row-by-row - } -} - -sub not_in_right { - my ( $self, %args ) = @_; - die "Called not_in_right in state 0" unless $self->{state}; - - my $action = 'INSERT'; - my $auth_row = $args{lr}; - my $change_dbh; - - if ( $self->{not_in_right} ) { - ($action, $auth_row, $change_dbh) = $self->{not_in_right}->(%args); - } - - $self->{ChangeHandler}->change( - $action, # Execute the action - $auth_row, # with these row values - $self->key_cols(), # identified by these key cols - $change_dbh, # on this dbh - ); - return; -} - -sub not_in_left { - my ( $self, %args ) = @_; - die "Called not_in_left in state 0" unless $self->{state}; - - my $action = 'DELETE'; - my $auth_row = $args{rr}; - my $change_dbh; - - if ( $self->{not_in_left} ) { - ($action, $auth_row, $change_dbh) = $self->{not_in_left}->(%args); - } - - $self->{ChangeHandler}->change( - $action, # Execute the action - $auth_row, # with these row values - $self->key_cols(), # identified by these key cols - $change_dbh, # on this dbh - ); - return; -} - -sub done_with_rows { - my ( $self ) = @_; - if ( $self->{state} == 1 ) { - $self->{state} = 2; - MKDEBUG && _d('Setting state =', $self->{state}); - } - else { - $self->{state} = 0; - $self->{chunk_num}++; - MKDEBUG && _d('Setting state =', $self->{state}, - 'chunk_num =', $self->{chunk_num}); - } - return; -} - -sub done { - my ( $self ) = @_; - MKDEBUG && _d('Done with', $self->{chunk_num}, 'of', - scalar(@{$self->{chunks}}), 'chunks'); - MKDEBUG && $self->{state} && _d('Chunk differs; must examine rows'); - return $self->{state} == 0 - && $self->{chunk_num} >= scalar(@{$self->{chunks}}) -} - -sub pending_changes { - my ( $self ) = @_; - if ( $self->{state} ) { - MKDEBUG && _d('There are pending changes'); - return 1; - } - else { - MKDEBUG && _d('No pending changes'); - return 0; - } -} - -sub key_cols { - my ( $self ) = @_; - my @cols; - if ( $self->{state} == 0 ) { - @cols = qw(chunk_num); - } - else { - @cols = $self->{chunk_col}; - } - MKDEBUG && _d('State', $self->{state},',', 'key cols', join(', ', @cols)); - return \@cols; -} - -sub _d { - my ($package, undef, $line) = caller 0; - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } - map { defined $_ ? $_ : 'undef' } - @_; - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; -} - -1; -} -# ########################################################################### -# End TableSyncChunk package -# ########################################################################### - -# ########################################################################### -# TableSyncNibble package -# This package is a copy without comments from the original. The original -# with comments and its test file can be found in the Bazaar repository at, -# lib/TableSyncNibble.pm -# t/lib/TableSyncNibble.t -# See https://launchpad.net/percona-toolkit for more information. -# ########################################################################### -{ -package TableSyncNibble; - -use strict; -use warnings FATAL => 'all'; -use English qw(-no_match_vars); -use constant MKDEBUG => $ENV{MKDEBUG} || 0; - -use Data::Dumper; -$Data::Dumper::Indent = 1; -$Data::Dumper::Sortkeys = 1; -$Data::Dumper::Quotekeys = 0; - -sub new { - my ( $class, %args ) = @_; - foreach my $arg ( qw(TableNibbler TableChunker TableParser Quoter) ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - my $self = { %args }; - return bless $self, $class; -} - -sub name { - return 'Nibble'; -} - -sub can_sync { - my ( $self, %args ) = @_; - foreach my $arg ( qw(tbl_struct) ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - - my $nibble_index = $self->{TableParser}->find_best_index($args{tbl_struct}); - if ( $nibble_index ) { - MKDEBUG && _d('Best nibble index:', Dumper($nibble_index)); - if ( !$args{tbl_struct}->{keys}->{$nibble_index}->{is_unique} ) { - MKDEBUG && _d('Best nibble index is not unique'); - return; - } - if ( $args{chunk_index} && $args{chunk_index} ne $nibble_index ) { - MKDEBUG && _d('Best nibble index is not requested index', - $args{chunk_index}); - return; - } - } - else { - MKDEBUG && _d('No best nibble index returned'); - return; - } - - my $small_table = 0; - if ( $args{src} && $args{src}->{dbh} ) { - my $dbh = $args{src}->{dbh}; - my $db = $args{src}->{db}; - my $tbl = $args{src}->{tbl}; - my $table_status; - eval { - my $sql = "SHOW TABLE STATUS FROM `$db` LIKE " - . $self->{Quoter}->literal_like($tbl); - MKDEBUG && _d($sql); - $table_status = $dbh->selectrow_hashref($sql); - }; - MKDEBUG && $EVAL_ERROR && _d($EVAL_ERROR); - if ( $table_status ) { - my $n_rows = defined $table_status->{Rows} ? $table_status->{Rows} - : defined $table_status->{rows} ? $table_status->{rows} - : undef; - $small_table = 1 if defined $n_rows && $n_rows <= 100; - } - } - MKDEBUG && _d('Small table:', $small_table); - - MKDEBUG && _d('Can nibble using index', $nibble_index); - return ( - 1, - chunk_index => $nibble_index, - key_cols => $args{tbl_struct}->{keys}->{$nibble_index}->{cols}, - small_table => $small_table, - ); -} - -sub prepare_to_sync { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl tbl_struct chunk_index key_cols chunk_size - crc_col ChangeHandler); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - - $self->{dbh} = $args{dbh}; - $self->{tbl_struct} = $args{tbl_struct}; - $self->{crc_col} = $args{crc_col}; - $self->{index_hint} = $args{index_hint}; - $self->{key_cols} = $args{key_cols}; - ($self->{chunk_size}) = $self->{TableChunker}->size_to_rows(%args); - $self->{buffer_in_mysql} = $args{buffer_in_mysql}; - $self->{small_table} = $args{small_table}; - $self->{ChangeHandler} = $args{ChangeHandler}; - - $self->{ChangeHandler}->fetch_back($args{dbh}); - - my %seen; - my @ucols = grep { !$seen{$_}++ } @{$args{cols}}, @{$args{key_cols}}; - $args{cols} = \@ucols; - - $self->{sel_stmt} = $self->{TableNibbler}->generate_asc_stmt( - %args, - index => $args{chunk_index}, # expects an index arg, not chunk_index - asc_only => 1, - ); - - $self->{nibble} = 0; - $self->{cached_row} = undef; - $self->{cached_nibble} = undef; - $self->{cached_boundaries} = undef; - $self->{state} = 0; - - return; -} - -sub uses_checksum { - return 1; -} - -sub set_checksum_queries { - my ( $self, $nibble_sql, $row_sql ) = @_; - die "I need a nibble_sql argument" unless $nibble_sql; - die "I need a row_sql argument" unless $row_sql; - $self->{nibble_sql} = $nibble_sql; - $self->{row_sql} = $row_sql; - return; -} - -sub prepare_sync_cycle { - my ( $self, $host ) = @_; - my $sql = 'SET @crc := "", @cnt := 0'; - MKDEBUG && _d($sql); - $host->{dbh}->do($sql); - return; -} - -sub get_sql { - my ( $self, %args ) = @_; - if ( $self->{state} ) { - my $q = $self->{Quoter}; - return 'SELECT /*rows in nibble*/ ' - . ($self->{buffer_in_mysql} ? 'SQL_BUFFER_RESULT ' : '') - . $self->{row_sql} . " AS $self->{crc_col}" - . ' FROM ' . $q->quote(@args{qw(database table)}) - . ' ' . ($self->{index_hint} ? $self->{index_hint} : '') - . ' WHERE (' . $self->__get_boundaries(%args) . ')' - . ($args{where} ? " AND ($args{where})" : '') - . ' ORDER BY ' . join(', ', map {$q->quote($_) } @{$self->key_cols()}); - } - else { - my $where = $self->__get_boundaries(%args); - return $self->{TableChunker}->inject_chunks( - database => $args{database}, - table => $args{table}, - chunks => [ $where ], - chunk_num => 0, - query => $self->{nibble_sql}, - index_hint => $self->{index_hint}, - where => [ $args{where} ], - ); - } -} - -sub __get_boundaries { - my ( $self, %args ) = @_; - my $q = $self->{Quoter}; - my $s = $self->{sel_stmt}; - - my $lb; # Lower boundary part of WHERE - my $ub; # Upper boundary part of WHERE - my $row; # Next upper boundary row or cached_row - - if ( $self->{cached_boundaries} ) { - MKDEBUG && _d('Using cached boundaries'); - return $self->{cached_boundaries}; - } - - if ( $self->{cached_row} && $self->{cached_nibble} == $self->{nibble} ) { - MKDEBUG && _d('Using cached row for boundaries'); - $row = $self->{cached_row}; - } - else { - MKDEBUG && _d('Getting next upper boundary row'); - my $sql; - ($sql, $lb) = $self->__make_boundary_sql(%args); # $lb from outer scope! - - if ( $self->{nibble} == 0 && !$self->{small_table} ) { - my $explain_index = $self->__get_explain_index($sql); - if ( lc($explain_index || '') ne lc($s->{index}) ) { - die 'Cannot nibble table '.$q->quote($args{database}, $args{table}) - . " because MySQL chose " - . ($explain_index ? "the `$explain_index`" : 'no') . ' index' - . " instead of the `$s->{index}` index"; - } - } - - $row = $self->{dbh}->selectrow_hashref($sql); - MKDEBUG && _d($row ? 'Got a row' : "Didn't get a row"); - } - - if ( $row ) { - my $i = 0; - $ub = $s->{boundaries}->{'<='}; - $ub =~ s/\?/$q->quote_val($row->{$s->{scols}->[$i]}, $self->{tbl_struct}->{is_numeric}->{$s->{scols}->[$i++]} || 0)/eg; - } - else { - MKDEBUG && _d('No upper boundary'); - $ub = '1=1'; - } - - my $where = $lb ? "($lb AND $ub)" : $ub; - - $self->{cached_row} = $row; - $self->{cached_nibble} = $self->{nibble}; - $self->{cached_boundaries} = $where; - - MKDEBUG && _d('WHERE clause:', $where); - return $where; -} - -sub __make_boundary_sql { - my ( $self, %args ) = @_; - my $lb; - my $q = $self->{Quoter}; - my $s = $self->{sel_stmt}; - my $sql = "SELECT /*nibble boundary $self->{nibble}*/ " - . join(',', map { $q->quote($_) } @{$s->{cols}}) - . " FROM " . $q->quote($args{database}, $args{table}) - . ' ' . ($self->{index_hint} || '') - . ($args{where} ? " WHERE ($args{where})" : ""); - - if ( $self->{nibble} ) { - my $tmp = $self->{cached_row}; - my $i = 0; - $lb = $s->{boundaries}->{'>'}; - $lb =~ s/\?/$q->quote_val($tmp->{$s->{scols}->[$i]}, $self->{tbl_struct}->{is_numeric}->{$s->{scols}->[$i++]} || 0)/eg; - $sql .= $args{where} ? " AND $lb" : " WHERE $lb"; - } - $sql .= " ORDER BY " . join(',', map { $q->quote($_) } @{$self->{key_cols}}) - . ' LIMIT ' . ($self->{chunk_size} - 1) . ', 1'; - MKDEBUG && _d('Lower boundary:', $lb); - MKDEBUG && _d('Next boundary sql:', $sql); - return $sql, $lb; -} - -sub __get_explain_index { - my ( $self, $sql ) = @_; - return unless $sql; - my $explain; - eval { - $explain = $self->{dbh}->selectall_arrayref("EXPLAIN $sql",{Slice => {}}); - }; - if ( $EVAL_ERROR ) { - MKDEBUG && _d($EVAL_ERROR); - return; - } - MKDEBUG && _d('EXPLAIN key:', $explain->[0]->{key}); - return $explain->[0]->{key}; -} - -sub same_row { - my ( $self, %args ) = @_; - my ($lr, $rr) = @args{qw(lr rr)}; - if ( $self->{state} ) { - if ( $lr->{$self->{crc_col}} ne $rr->{$self->{crc_col}} ) { - $self->{ChangeHandler}->change('UPDATE', $lr, $self->key_cols()); - } - } - elsif ( $lr->{cnt} != $rr->{cnt} || $lr->{crc} ne $rr->{crc} ) { - MKDEBUG && _d('Rows:', Dumper($lr, $rr)); - MKDEBUG && _d('Will examine this nibble before moving to next'); - $self->{state} = 1; # Must examine this nibble row-by-row - } -} - -sub not_in_right { - my ( $self, %args ) = @_; - die "Called not_in_right in state 0" unless $self->{state}; - $self->{ChangeHandler}->change('INSERT', $args{lr}, $self->key_cols()); -} - -sub not_in_left { - my ( $self, %args ) = @_; - die "Called not_in_left in state 0" unless $self->{state}; - $self->{ChangeHandler}->change('DELETE', $args{rr}, $self->key_cols()); -} - -sub done_with_rows { - my ( $self ) = @_; - if ( $self->{state} == 1 ) { - $self->{state} = 2; - MKDEBUG && _d('Setting state =', $self->{state}); - } - else { - $self->{state} = 0; - $self->{nibble}++; - delete $self->{cached_boundaries}; - MKDEBUG && _d('Setting state =', $self->{state}, - ', nibble =', $self->{nibble}); - } -} - -sub done { - my ( $self ) = @_; - MKDEBUG && _d('Done with nibble', $self->{nibble}); - MKDEBUG && $self->{state} && _d('Nibble differs; must examine rows'); - return $self->{state} == 0 && $self->{nibble} && !$self->{cached_row}; -} - -sub pending_changes { - my ( $self ) = @_; - if ( $self->{state} ) { - MKDEBUG && _d('There are pending changes'); - return 1; - } - else { - MKDEBUG && _d('No pending changes'); - return 0; - } -} - -sub key_cols { - my ( $self ) = @_; - my @cols; - if ( $self->{state} == 0 ) { - @cols = qw(chunk_num); - } - else { - @cols = @{$self->{key_cols}}; - } - MKDEBUG && _d('State', $self->{state},',', 'key cols', join(', ', @cols)); - return \@cols; -} - -sub _d { - my ($package, undef, $line) = caller 0; - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } - map { defined $_ ? $_ : 'undef' } - @_; - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; -} - -1; -} -# ########################################################################### -# End TableSyncNibble package -# ########################################################################### - -# ########################################################################### -# TableSyncGroupBy package -# This package is a copy without comments from the original. The original -# with comments and its test file can be found in the Bazaar repository at, -# lib/TableSyncGroupBy.pm -# t/lib/TableSyncGroupBy.t -# See https://launchpad.net/percona-toolkit for more information. -# ########################################################################### -{ -package TableSyncGroupBy; - -use strict; -use warnings FATAL => 'all'; -use English qw(-no_match_vars); -use constant MKDEBUG => $ENV{MKDEBUG} || 0; - -sub new { - my ( $class, %args ) = @_; - foreach my $arg ( qw(Quoter) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $self = { %args }; - return bless $self, $class; -} - -sub name { - return 'GroupBy'; -} - -sub can_sync { - return 1; # We can sync anything. -} - -sub prepare_to_sync { - my ( $self, %args ) = @_; - my @required_args = qw(tbl_struct cols ChangeHandler); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - - $self->{cols} = $args{cols}; - $self->{buffer_in_mysql} = $args{buffer_in_mysql}; - $self->{ChangeHandler} = $args{ChangeHandler}; - - $self->{count_col} = '__maatkit_count'; - while ( $args{tbl_struct}->{is_col}->{$self->{count_col}} ) { - $self->{count_col} = "_$self->{count_col}"; - } - MKDEBUG && _d('COUNT column will be named', $self->{count_col}); - - $self->{done} = 0; - - return; -} - -sub uses_checksum { - return 0; # We don't need checksum queries. -} - -sub set_checksum_queries { - return; # This shouldn't be called, but just in case. -} - -sub prepare_sync_cycle { - my ( $self, $host ) = @_; - return; -} - -sub get_sql { - my ( $self, %args ) = @_; - my $cols = join(', ', map { $self->{Quoter}->quote($_) } @{$self->{cols}}); - return "SELECT" - . ($self->{buffer_in_mysql} ? ' SQL_BUFFER_RESULT' : '') - . " $cols, COUNT(*) AS $self->{count_col}" - . ' FROM ' . $self->{Quoter}->quote(@args{qw(database table)}) - . ' WHERE ' . ( $args{where} || '1=1' ) - . " GROUP BY $cols ORDER BY $cols"; -} - -sub same_row { - my ( $self, %args ) = @_; - my ($lr, $rr) = @args{qw(lr rr)}; - my $cc = $self->{count_col}; - my $lc = $lr->{$cc}; - my $rc = $rr->{$cc}; - my $diff = abs($lc - $rc); - return unless $diff; - $lr = { %$lr }; - delete $lr->{$cc}; - $rr = { %$rr }; - delete $rr->{$cc}; - foreach my $i ( 1 .. $diff ) { - if ( $lc > $rc ) { - $self->{ChangeHandler}->change('INSERT', $lr, $self->key_cols()); - } - else { - $self->{ChangeHandler}->change('DELETE', $rr, $self->key_cols()); - } - } -} - -sub not_in_right { - my ( $self, %args ) = @_; - my $lr = $args{lr}; - $lr = { %$lr }; - my $cnt = delete $lr->{$self->{count_col}}; - foreach my $i ( 1 .. $cnt ) { - $self->{ChangeHandler}->change('INSERT', $lr, $self->key_cols()); - } -} - -sub not_in_left { - my ( $self, %args ) = @_; - my $rr = $args{rr}; - $rr = { %$rr }; - my $cnt = delete $rr->{$self->{count_col}}; - foreach my $i ( 1 .. $cnt ) { - $self->{ChangeHandler}->change('DELETE', $rr, $self->key_cols()); - } -} - -sub done_with_rows { - my ( $self ) = @_; - $self->{done} = 1; -} - -sub done { - my ( $self ) = @_; - return $self->{done}; -} - -sub key_cols { - my ( $self ) = @_; - return $self->{cols}; -} - -sub pending_changes { - my ( $self ) = @_; - return; -} - -sub _d { - my ($package, undef, $line) = caller 0; - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } - map { defined $_ ? $_ : 'undef' } - @_; - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; -} - -1; -} -# ########################################################################### -# End TableSyncGroupBy package -# ########################################################################### - # ########################################################################### # TableSyncer package # This package is a copy without comments from the original. The original @@ -5042,7 +3639,8 @@ $Data::Dumper::Quotekeys = 0; sub new { my ( $class, %args ) = @_; - my @required_args = qw(MasterSlave Quoter VersionParser TableChecksum Retry); + my @required_args = qw(MasterSlave OptionParser Quoter TableParser + TableNibbler RowChecksum RowDiff Retry); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless defined $args{$arg}; } @@ -5050,306 +3648,308 @@ sub new { return bless $self, $class; } -sub get_best_plugin { - my ( $self, %args ) = @_; - foreach my $arg ( qw(plugins tbl_struct) ) { - die "I need a $arg argument" unless $args{$arg}; - } - MKDEBUG && _d('Getting best plugin'); - foreach my $plugin ( @{$args{plugins}} ) { - MKDEBUG && _d('Trying plugin', $plugin->name); - my ($can_sync, %plugin_args) = $plugin->can_sync(%args); - if ( $can_sync ) { - MKDEBUG && _d('Can sync with', $plugin->name, Dumper(\%plugin_args)); - return $plugin, %plugin_args; - } - } - MKDEBUG && _d('No plugin can sync the table'); - return; -} - sub sync_table { my ( $self, %args ) = @_; - my @required_args = qw(plugins src dst tbl_struct cols chunk_size - RowDiff ChangeHandler); + my @required_args = qw(src dst RowSyncer ChangeHandler); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - MKDEBUG && _d('Syncing table with args:', - map { "$_: " . Dumper($args{$_}) } - qw(plugins src dst tbl_struct cols chunk_size)); + my ($src, $dst, $row_syncer, $changer) = @args{@required_args}; + my $changing_src = $args{changing_src}; - my ($plugins, $src, $dst, $tbl_struct, $cols, $chunk_size, $rd, $ch) - = @args{@required_args}; - my $dp = $self->{DSNParser}; - $args{trace} = 1 unless defined $args{trace}; + my $o = $self->{OptionParser}; + my $q = $self->{Quoter}; + my $row_diff = $self->{RowDiff}; + my $row_checksum = $self->{RowChecksum}; - if ( $args{bidirectional} && $args{ChangeHandler}->{queue} ) { - die "Queueing does not work with bidirectional syncing"; + foreach my $host ( $src, $dst ) { + $host->{Cxn}->dbh()->do("USE " . $q->quote($host->{tbl}->{db})); } - $args{index_hint} = 1 unless defined $args{index_hint}; - $args{lock} ||= 0; - $args{wait} ||= 0; - $args{transaction} ||= 0; - $args{timeout_ok} ||= 0; - - my $q = $self->{Quoter}; - my $vp = $self->{VersionParser}; - - my ($plugin, %plugin_args) = $self->get_best_plugin(%args); - die "No plugin can sync $src->{db}.$src->{tbl}" unless $plugin; - - my $crc_col = '__crc'; - while ( $tbl_struct->{is_col}->{$crc_col} ) { - $crc_col = "_$crc_col"; # Prepend more _ until not a column. - } - MKDEBUG && _d('CRC column:', $crc_col); - - my $index_hint; - my $hint = ($vp->version_ge($src->{dbh}, '4.0.9') - && $vp->version_ge($dst->{dbh}, '4.0.9') ? 'FORCE' : 'USE') - . ' INDEX'; - if ( $args{chunk_index} ) { - MKDEBUG && _d('Using given chunk index for index hint'); - $index_hint = "$hint (" . $q->quote($args{chunk_index}) . ")"; - } - elsif ( $plugin_args{chunk_index} && $args{index_hint} ) { - MKDEBUG && _d('Using chunk index chosen by plugin for index hint'); - $index_hint = "$hint (" . $q->quote($plugin_args{chunk_index}) . ")"; - } - MKDEBUG && _d('Index hint:', $index_hint); - - eval { - $plugin->prepare_to_sync( - %args, - %plugin_args, - dbh => $src->{dbh}, - db => $src->{db}, - tbl => $src->{tbl}, - crc_col => $crc_col, - index_hint => $index_hint, - ); - }; - if ( $EVAL_ERROR ) { - die 'Failed to prepare TableSync', $plugin->name, ' plugin: ', - $EVAL_ERROR; + my $trace; + if ( !defined $args{trace} || $args{trace} ) { + chomp(my $hostname = `hostname`); + $trace = "src_host:" . $src->{Cxn}->name() + . " src_tbl:" . join('.', @{$src->{tbl}}{qw(db tbl)}) + . " dst_host:" . $dst->{Cxn}->name() + . " dst_tbl:" . join('.', @{$dst->{tbl}}{qw(db tbl)}) + . " changing_src:" . ($changing_src ? "yes" : "no") + . " " . join(" ", map { "$_:" . ($o->get($_) ? "yes" : "no") } + qw(lock transaction replicate bidirectional)) + . " pid:$PID " + . ($ENV{USER} ? "user:$ENV{USER} " : "") + . ($hostname ? "host:$hostname" : ""); + MKDEBUG && _d("Binlog trace message:", $trace); } - if ( $plugin->uses_checksum() ) { - eval { - my ($chunk_sql, $row_sql) = $self->make_checksum_queries(%args); - $plugin->set_checksum_queries($chunk_sql, $row_sql); - }; - if ( $EVAL_ERROR ) { - die "Failed to make checksum queries: $EVAL_ERROR"; - } - } + my %crc_args = $row_checksum->get_crc_args(dbh => $src->{Cxn}->dbh()); + my $chunk_cols = $row_checksum->make_chunk_checksum( + dbh => $src->{Cxn}->dbh(), + tbl => $src->{tbl}, + %crc_args + ); - if ( $args{dry_run} ) { - return $ch->get_changes(), ALGORITHM => $plugin->name; - } - - - eval { - $src->{dbh}->do("USE `$src->{db}`"); - $dst->{dbh}->do("USE `$dst->{db}`"); - }; - if ( $EVAL_ERROR ) { - die "Failed to USE database on source or destination: $EVAL_ERROR"; - } - - MKDEBUG && _d('left dbh', $src->{dbh}); - MKDEBUG && _d('right dbh', $dst->{dbh}); - - chomp(my $hostname = `hostname`); - my $trace_msg - = $args{trace} ? "src_db:$src->{db} src_tbl:$src->{tbl} " - . ($dp && $src->{dsn} ? "src_dsn:".$dp->as_string($src->{dsn}) : "") - . " dst_db:$dst->{db} dst_tbl:$dst->{tbl} " - . ($dp && $dst->{dsn} ? "dst_dsn:".$dp->as_string($dst->{dsn}) : "") - . " " . join(" ", map { "$_:" . ($args{$_} || 0) } - qw(lock transaction changing_src replicate bidirectional)) - . " pid:$PID " - . ($ENV{USER} ? "user:$ENV{USER} " : "") - . ($hostname ? "host:$hostname" : "") - : ""; - MKDEBUG && _d("Binlog trace message:", $trace_msg); - - $self->lock_and_wait(%args, lock_level => 2); # per-table lock - - my $callback = $args{callback}; - my $cycle = 0; - while ( !$plugin->done() ) { - - MKDEBUG && _d('Beginning sync cycle', $cycle); - my $src_sql = $plugin->get_sql( - database => $src->{db}, - table => $src->{tbl}, - where => $args{where}, - ); - my $dst_sql = $plugin->get_sql( - database => $dst->{db}, - table => $dst->{tbl}, - where => $args{where}, - ); - - if ( $args{transaction} ) { - if ( $args{bidirectional} ) { - $src_sql .= ' FOR UPDATE'; - $dst_sql .= ' FOR UPDATE'; + if ( !defined $src->{sql_lock} || !defined $dst->{dst_lock} ) { + if ( $o->get('transaction') ) { + if ( $o->get('bidirectional') ) { + $src->{sql_lock} = 'FOR UPDATE'; + $dst->{sql_lock} = 'FOR UPDATE'; } - elsif ( $args{changing_src} ) { - $src_sql .= ' FOR UPDATE'; - $dst_sql .= ' LOCK IN SHARE MODE'; + elsif ( $changing_src ) { + $src->{sql_lock} = 'FOR UPDATE'; + $dst->{sql_lock} = 'LOCK IN SHARE MODE'; } else { - $src_sql .= ' LOCK IN SHARE MODE'; - $dst_sql .= ' FOR UPDATE'; + $src->{sql_lock} = 'LOCK IN SHARE MODE'; + $dst->{sql_lock} = 'FOR UPDATE'; } } - MKDEBUG && _d('src:', $src_sql); - MKDEBUG && _d('dst:', $dst_sql); - - $callback->($src_sql, $dst_sql) if $callback; - - $plugin->prepare_sync_cycle($src); - $plugin->prepare_sync_cycle($dst); - - my $src_sth = $src->{dbh}->prepare($src_sql); - my $dst_sth = $dst->{dbh}->prepare($dst_sql); - if ( $args{buffer_to_client} ) { - $src_sth->{mysql_use_result} = 1; - $dst_sth->{mysql_use_result} = 1; + else { + $src->{sql_lock} = ''; + $dst->{sql_lock} = ''; } + MKDEBUG && _d('src sql lock:', $src->{sql_lock}); + MKDEBUG && _d('dst sql lock:', $dst->{sql_lock}); + } - my $executed_src = 0; - if ( !$cycle || !$plugin->pending_changes() ) { - $executed_src - = $self->lock_and_wait(%args, src_sth => $src_sth, lock_level => 1); - } + my $user_where = $o->get('where'); - $src_sth->execute() unless $executed_src; - $dst_sth->execute(); + my ($src_nibble_iter, $dst_nibble_iter); + foreach my $host ($src, $dst) { + my $callbacks = { + init => sub { + my (%args) = @_; + my $cxn = $args{Cxn}; + my $tbl = $args{tbl}; + my $nibble_iter = $args{NibbleIterator}; + my $sths = $nibble_iter->statements(); + my $oktonibble = 1; - $rd->compare_sets( - left_sth => $src_sth, - right_sth => $dst_sth, - left_dbh => $src->{dbh}, - right_dbh => $dst->{dbh}, - syncer => $plugin, - tbl_struct => $tbl_struct, + if ( $o->get('explain') ) { + print "--\n" + . "-- " + . ($cxn->{is_source} ? "Source" : "Destination") + . " " . $cxn->name() + . " " . "$tbl->{db}.$tbl->{tbl}\n" + . "--\n\n"; + my $statements = $nibble_iter->statements(); + foreach my $sth ( sort keys %$statements ) { + next if $sth =~ m/^explain/; + if ( $statements->{$sth} ) { + print $statements->{$sth}->{Statement}, "\n\n"; + } + } + + if ( $o->get('explain') < 2 ) { + $oktonibble = 0; # don't nibble table; next table + } + } + else { + if ( $o->get('buffer-to-client') ) { + $host->{sth}->{mysql_use_result} = 1; + } + + $self->lock_and_wait( + lock_level => 2, + host => $host, + src => $src, + changing_src => $changing_src, + ); + } + + return $oktonibble; + }, + exec_nibble => sub { + my (%args) = @_; + my $nibble_iter = $args{NibbleIterator}; + my $sths = $nibble_iter->statements(); + my $boundary = $nibble_iter->boundaries(); + + if ( $o->get('explain') > 1 ) { + my $lb_quoted = join(',', @{$boundary->{lower} || []}); + my $ub_quoted = join(',', @{$boundary->{upper} || []}); + my $chunk = $nibble_iter->nibble_number(); + printf "%d %s %s\n", + $chunk, + (defined $lb_quoted ? $lb_quoted : '1=1'), + (defined $ub_quoted ? $ub_quoted : '1=1'); + if ( !$nibble_iter->more_boundaries() ) { + print "\n"; # blank line between this table and the next table + } + return 0; # next boundary + } + + $self->lock_and_wait( + %args, + lock_level => 1, + host => $host, + src => $src, + changing_src => $changing_src, + ); + + MKDEBUG && _d('nibble', $args{Cxn}->name()); + $sths->{nibble}->execute(@{$boundary->{lower}}, @{$boundary->{upper}}); + return $sths->{nibble}->rows(); + }, + }; + + my $nibble_iter = new NibbleIterator( + Cxn => $host->{Cxn}, + tbl => $host->{tbl}, + chunk_size => $o->get('chunk-size'), + chunk_index => $o->get('chunk-index'), + select => $chunk_cols, + callbacks => $callbacks, + fetch_hashref => 1, + one_nibble => $args{one_nibble}, + OptionParser => $self->{OptionParser}, + Quoter => $self->{Quoter}, + TableNibbler => $self->{TableNibbler}, + TableParser => $self->{TableParser}, + RowChecksum => $self->{RowChecksum}, ); - $ch->process_rows(1, $trace_msg); - MKDEBUG && _d('Finished sync cycle', $cycle); - $cycle++; + if ( $host->{Cxn}->{is_source} ) { + $src_nibble_iter = $nibble_iter; + } + else { + $dst_nibble_iter = $nibble_iter; + } } - $ch->process_rows(0, $trace_msg); + my $index = $src_nibble_iter->nibble_index(); + my $key_cols = $index ? $src->{tbl}->{tbl_struct}->{keys}->{$index}->{cols} + : $src->{tbl}->{tbl_struct}->{cols}; + $row_syncer->set_key_cols($key_cols); - $self->unlock(%args, lock_level => 2); - - return $ch->get_changes(), ALGORITHM => $plugin->name; -} - -sub make_checksum_queries { - my ( $self, %args ) = @_; - my @required_args = qw(src dst tbl_struct); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; + my $crc_col = 'crc'; + while ( $src->{tbl}->{tbl_struct}->{is_col}->{$crc_col} ) { + $crc_col = "_$crc_col"; # Prepend more _ until not a column. } - my ($src, $dst, $tbl_struct) = @args{@required_args}; - my $checksum = $self->{TableChecksum}; + $row_syncer->set_crc_col($crc_col); + MKDEBUG && _d('CRC column:', $crc_col); - my $src_algo = $checksum->best_algorithm( - algorithm => 'BIT_XOR', - dbh => $src->{dbh}, - where => 1, - chunk => 1, - count => 1, + my $rows_sql; + my $row_cols = $row_checksum->make_row_checksum( + dbh => $src->{Cxn}->dbh(), + tbl => $src->{tbl}, + %crc_args, ); - my $dst_algo = $checksum->best_algorithm( - algorithm => 'BIT_XOR', - dbh => $dst->{dbh}, - where => 1, - chunk => 1, - count => 1, - ); - if ( $src_algo ne $dst_algo ) { - die "Source and destination checksum algorithms are different: ", - "$src_algo on source, $dst_algo on destination" + my $sql_clause = $src_nibble_iter->sql(); + foreach my $host ($src, $dst) { + if ( $src_nibble_iter->one_nibble() ) { + $rows_sql + = 'SELECT /*rows in nibble*/ ' + . ($self->{buffer_in_mysql} ? 'SQL_BUFFER_RESULT ' : '') + . "$row_cols AS $crc_col" + . " FROM " . $q->quote(@{$host->{tbl}}{qw(db tbl)}) + . " WHERE 1=1 " + . ($user_where ? " AND ($user_where)" : '') + . ($sql_clause->{order_by} ? " ORDER BY " . $sql_clause->{order_by} + : ""); + } + else { + $rows_sql + = 'SELECT /*rows in nibble*/ ' + . ($self->{buffer_in_mysql} ? 'SQL_BUFFER_RESULT ' : '') + . "$row_cols AS $crc_col" + . " FROM " . $q->quote(@{$host->{tbl}}{qw(db tbl)}) + . " WHERE " . $sql_clause->{boundaries}->{'>='} # lower boundary + . " AND " . $sql_clause->{boundaries}->{'<='} # upper boundary + . ($user_where ? " AND ($user_where)" : '') + . " ORDER BY " . $sql_clause->{order_by}; + } + $host->{rows_sth} = $host->{Cxn}->dbh()->prepare($rows_sql); } - MKDEBUG && _d('Chosen algo:', $src_algo); - my $src_func = $checksum->choose_hash_func(dbh => $src->{dbh}, %args); - my $dst_func = $checksum->choose_hash_func(dbh => $dst->{dbh}, %args); - if ( $src_func ne $dst_func ) { - die "Source and destination hash functions are different: ", - "$src_func on source, $dst_func on destination"; + while ( $src_nibble_iter->more_boundaries() + || $dst_nibble_iter->more_boundaries() ) { + + my $src_chunk = $src_nibble_iter->next(); + my $dst_chunk = $dst_nibble_iter->next(); + + if ( ($src_chunk->{cnt} || 0) != ($dst_chunk->{cnt} || 0) + || ($src_chunk->{crc} || '') ne ($dst_chunk->{crc} || '') ) { + MKDEBUG && _d("Chunks differ"); + my $boundary = $src_nibble_iter->boundaries(); + foreach my $host ($src, $dst) { + MKDEBUG && _d($host->{Cxn}->name(), $host->{rows_sth}->{Statement}, + 'params:', @{$boundary->{lower}}, @{$boundary->{upper}}); + $host->{rows_sth}->execute( + @{$boundary->{lower}}, @{$boundary->{upper}}); + } + $row_diff->compare_sets( + left_dbh => $src->{Cxn}->dbh(), + left_sth => $src->{rows_sth}, + right_dbh => $dst->{Cxn}->dbh(), + right_sth => $dst->{rows_sth}, + tbl_struct => $src->{tbl}->{tbl_struct}, + syncer => $row_syncer, + ); + $changer->process_rows(1, $trace); + foreach my $host ($src, $dst) { + $host->{rows_sth}->finish(); + } + } + + $src_nibble_iter->no_more_rows(); + $dst_nibble_iter->no_more_rows(); + + my $changes_dbh = $changing_src ? $src->{Cxn}->dbh() + : $dst->{Cxn}->dbh(); + $changes_dbh->commit() unless $changes_dbh->{AutoCommit}; } - MKDEBUG && _d('Chosen hash func:', $src_func); + $changer->process_rows(0, $trace); - my $crc_wid = $checksum->get_crc_wid($src->{dbh}, $src_func); - my ($crc_type) = $checksum->get_crc_type($src->{dbh}, $src_func); - my $opt_slice; - if ( $src_algo eq 'BIT_XOR' && $crc_type !~ m/int$/ ) { - $opt_slice = $checksum->optimize_xor( - dbh => $src->{dbh}, - function => $src_func + foreach my $host ($src, $dst) { + $self->unlock( + lock_level => 2, + host => $host, + OptionParser => $o, ); } - my $chunk_sql = $checksum->make_checksum_query( - %args, - db => $src->{db}, - tbl => $src->{tbl}, - algorithm => $src_algo, - function => $src_func, - crc_wid => $crc_wid, - crc_type => $crc_type, - opt_slice => $opt_slice, - replicate => undef, # replicate means something different to this sub - ); # than what we use it for; do not pass it! - MKDEBUG && _d('Chunk sql:', $chunk_sql); - my $row_sql = $checksum->make_row_checksum( - %args, - function => $src_func, - ); - MKDEBUG && _d('Row sql:', $row_sql); - return $chunk_sql, $row_sql; + return $changer->get_changes(); } sub lock_table { - my ( $self, $dbh, $where, $db_tbl, $mode ) = @_; - my $query = "LOCK TABLES $db_tbl $mode"; - MKDEBUG && _d($query); - $dbh->do($query); - MKDEBUG && _d('Acquired table lock on', $where, 'in', $mode, 'mode'); + my ( $self, %args ) = @_; + my @required_args = qw(host mode); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($host, $mode) = @args{@required_args}; + my $q = $self->{Quoter}; + my $sql = "LOCK TABLES " + . $q->quote(@{$host->{tbl}}{qw(db tbl)}) + . " $mode"; + MKDEBUG && _d($host->{Cxn}->name(), $sql); + $host->{Cxn}->dbh()->do($sql); + return; } sub unlock { my ( $self, %args ) = @_; - - foreach my $arg ( qw(src dst lock transaction lock_level) ) { + my @required_args = qw(lock_level host); + foreach my $arg ( @required_args ) { die "I need a $arg argument" unless defined $args{$arg}; } - my $src = $args{src}; - my $dst = $args{dst}; + my ($lock_level, $host) = @args{@required_args}; + my $o = $self->{OptionParser}; - return unless $args{lock} && $args{lock} <= $args{lock_level}; + my $lock = $o->get('lock'); + return unless $lock && $lock <= $lock_level; + MKDEBUG && _d('Unlocking level', $lock); - foreach my $dbh ( $src->{dbh}, $dst->{dbh} ) { - if ( $args{transaction} ) { - MKDEBUG && _d('Committing', $dbh); - $dbh->commit(); - } - else { - my $sql = 'UNLOCK TABLES'; - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - } + if ( $o->get('transaction') ) { + MKDEBUG && _d('Committing', $host->{Cxn}->name()); + $host->{Cxn}->dbh()->commit(); + } + else { + my $sql = 'UNLOCK TABLES'; + MKDEBUG && _d($host->{Cxn}->name(), $sql); + $host->{Cxn}->dbh()->do($sql); } return; @@ -5357,61 +3957,77 @@ sub unlock { sub lock_and_wait { my ( $self, %args ) = @_; - my $result = 0; - - foreach my $arg ( qw(src dst lock lock_level) ) { + my @required_args = qw(lock_level host src); + foreach my $arg ( @required_args ) { die "I need a $arg argument" unless defined $args{$arg}; } - my $src = $args{src}; - my $dst = $args{dst}; + my ($lock_level, $host, $src) = @args{@required_args}; + my $o = $self->{OptionParser}; - return unless $args{lock} && $args{lock} == $args{lock_level}; - MKDEBUG && _d('lock and wait, lock level', $args{lock}); + my $lock = $o->get('lock'); + return unless $lock && $lock == $lock_level; - foreach my $dbh ( $src->{dbh}, $dst->{dbh} ) { - if ( $args{transaction} ) { - MKDEBUG && _d('Committing', $dbh); - $dbh->commit(); - } - else { - my $sql = 'UNLOCK TABLES'; - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - } - } - - if ( $args{lock} == 3 ) { - my $sql = 'FLUSH TABLES WITH READ LOCK'; - MKDEBUG && _d($src->{dbh}, $sql); - $src->{dbh}->do($sql); + if ( $o->get('transaction') ) { + MKDEBUG && _d('Committing', $host->{Cxn}->name()); + $host->{Cxn}->dbh()->commit(); } else { - if ( $args{transaction} ) { - if ( $args{src_sth} ) { - MKDEBUG && _d('Executing statement on source to lock rows'); - - my $sql = "START TRANSACTION /*!40108 WITH CONSISTENT SNAPSHOT */"; - MKDEBUG && _d($src->{dbh}, $sql); - $src->{dbh}->do($sql); - - $args{src_sth}->execute(); - $result = 1; - } - } - else { - $self->lock_table($src->{dbh}, 'source', - $self->{Quoter}->quote($src->{db}, $src->{tbl}), - $args{changing_src} ? 'WRITE' : 'READ'); - } + my $sql = 'UNLOCK TABLES'; + MKDEBUG && _d($host->{Cxn}->name(), $sql); + $host->{Cxn}->dbh()->do($sql); } + return $host->{Cxn}->{is_source} ? $self->_lock_src(%args) + : $self->_lock_dst(%args); +} + +sub _lock_src { + my ( $self, %args ) = @_; + my @required_args = qw(lock_level host src); + my ($lock_level, $host, $src) = @args{@required_args}; + + my $o = $self->{OptionParser}; + my $lock = $o->get('lock'); + MKDEBUG && _d('Locking', $host->{Cxn}->name(), 'level', $lock); + + if ( $lock == 3 ) { + my $sql = 'FLUSH TABLES WITH READ LOCK'; + MKDEBUG && _d($host->{Cxn}->name(), $sql); + $host->{Cxn}->dbh()->do($sql); + } + else { + if ( $o->get('transaction') ) { + my $sql = "START TRANSACTION /*!40108 WITH CONSISTENT SNAPSHOT */"; + MKDEBUG && _d($host->{Cxn}->name(), $sql); + $host->{Cxn}->dbh()->do($sql); + } + else { + $self->lock_table( + host => $host, + mode => $args{changing_src} ? 'WRITE' : 'READ', + ); + } + } + return; +} + +sub _lock_dst { + my ( $self, %args ) = @_; + my @required_args = qw(lock_level host src); + my ($lock_level, $host, $src) = @args{@required_args}; + + my $o = $self->{OptionParser}; + my $lock = $o->get('lock'); + MKDEBUG && _d('Locking', $host->{Cxn}->name(), 'level', $lock); + eval { - if ( my $timeout = $args{wait} ) { - my $wait = $args{wait_retry_args}->{wait} || 10; + if ( my $timeout = $o->get('wait') ) { + my $ms = $self->{MasterSlave}; + my $wait; my $tries = $args{wait_retry_args}->{tries} || 3; $self->{Retry}->retry( - wait => sub { sleep $wait; }, tries => $tries, + wait => sub { sleep 5; }, try => sub { my ( %args ) = @_; @@ -5419,12 +4035,18 @@ sub lock_and_wait { warn "Retrying MASTER_POS_WAIT() for --wait $timeout..."; } - my $ms = $self->{MasterSlave}; - my $wait = $ms->wait_for_master( - master_status => $ms->get_master_status($src->{misc_dbh}), - slave_dbh => $dst->{dbh}, + $wait = $ms->wait_for_master( + master_status => $ms->get_master_status($src->{Cxn}->aux_dbh()), + slave_dbh => $host->{Cxn}->dbh(), timeout => $timeout, ); + if ( defined $wait->{result} && $wait->{result} != -1 ) { + return; # slave caught up + } + die; # call fail + }, + fail => sub { + my (%args) = @_; if ( !defined $wait->{result} ) { my $msg; if ( $wait->{waited} ) { @@ -5439,20 +4061,14 @@ sub lock_and_wait { $msg .= " Sleeping $wait seconds then retrying " . ($tries - $args{tryno}) . " more times."; } - warn $msg; - return; + warn "$msg\n"; + return 1; # call wait, call try } elsif ( $wait->{result} == -1 ) { - die "Slave did not catch up to its master after waiting " - . "$timeout seconds with MASTER_POS_WAIT. Try inceasing " - . "the --wait time, or disable this feature by specifying " - . "--wait 0."; - } - else { - return $result; # slave caught up + return 0; # call final_fail } }, - on_failure => sub { + final_fail => sub { die "Slave did not catch up to its master after $tries attempts " . "of waiting $timeout seconds with MASTER_POS_WAIT. " . "Check that the slave is running, increase the --wait " @@ -5466,32 +4082,29 @@ sub lock_and_wait { '(syncing via replication or sync-to-master)'); } else { - if ( $args{lock} == 3 ) { + if ( $lock == 3 ) { my $sql = 'FLUSH TABLES WITH READ LOCK'; - MKDEBUG && _d($dst->{dbh}, ',', $sql); - $dst->{dbh}->do($sql); + MKDEBUG && _d($host->{Cxn}->name(), $sql); + $host->{Cxn}->dbh()->do($sql); } - elsif ( !$args{transaction} ) { - $self->lock_table($dst->{dbh}, 'dest', - $self->{Quoter}->quote($dst->{db}, $dst->{tbl}), - $args{execute} ? 'WRITE' : 'READ'); + elsif ( !$o->get('transaction') ) { + $self->lock_table( + host => $host, + mode => 'READ', # $args{execute} ? 'WRITE' : 'READ') + ); } } }; if ( $EVAL_ERROR ) { - if ( $args{src_sth}->{Active} ) { - $args{src_sth}->finish(); - } - foreach my $dbh ( $src->{dbh}, $dst->{dbh}, $src->{misc_dbh} ) { - next unless $dbh; - MKDEBUG && _d('Caught error, unlocking/committing on', $dbh); + foreach my $dbh ( $host->{Cxn}->dbh(), $src->{Cxn}->dbh() ) { + MKDEBUG && _d('Caught error, unlocking/committing', $dbh); $dbh->do('UNLOCK TABLES'); $dbh->commit() unless $dbh->{AutoCommit}; } die $EVAL_ERROR; } - return $result; + return; } sub have_all_privs { @@ -5518,6 +4131,7 @@ sub have_all_privs { return 0; } + sub _d { my ($package, undef, $line) = caller 0; @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } @@ -5565,23 +4179,21 @@ sub generate_asc_stmt { die "I need a $arg argument" unless defined $args{$arg}; } my ($tbl_struct, $index) = @args{@required_args}; - my @cols = $args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}}; + my @cols = $args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}}; my $q = $self->{Quoter}; die "Index '$index' does not exist in table" unless exists $tbl_struct->{keys}->{$index}; + MKDEBUG && _d('Will ascend index', $index); my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}}; - my @asc_slice; - - @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}}; - MKDEBUG && _d('Will ascend index', $index); - MKDEBUG && _d('Will ascend columns', join(', ', @asc_cols)); if ( $args{asc_first} ) { @asc_cols = $asc_cols[0]; MKDEBUG && _d('Ascending only first column'); } + MKDEBUG && _d('Will ascend columns', join(', ', @asc_cols)); + my @asc_slice; my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols }; foreach my $col ( @asc_cols ) { if ( !exists $col_posn{$col} ) { @@ -5816,6 +4428,54 @@ sub new { return bless $self, $class; } +sub get_slaves { + my ($self, %args) = @_; + my @required_args = qw(make_cxn OptionParser DSNParser Quoter); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($make_cxn, $o, $dp) = @args{@required_args}; + + my $slaves = []; + my $method = $o->get('recursion-method'); + MKDEBUG && _d('Slave recursion method:', $method); + if ( !$method || $method =~ m/proocesslist|hosts/i ) { + my @required_args = qw(dbh dsn); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($dbh, $dsn) = @args{@required_args}; + $self->recurse_to_slaves( + { dbh => $dbh, + dsn => $dsn, + dsn_parser => $dp, + recurse => $o->get('recurse'), + method => $o->get('recursion-method'), + callback => sub { + my ( $dsn, $dbh, $level, $parent ) = @_; + return unless $level; + MKDEBUG && _d('Found slave:', $dp->as_string($dsn)); + push @$slaves, $make_cxn->(dsn => $dsn, dbh => $dbh); + return; + }, + } + ); + } + elsif ( $method =~ m/^dsn=/i ) { + my ($dsn_table_dsn) = $method =~ m/^dsn=(.+)/i; + $slaves = $self->get_cxn_from_dsn_table( + %args, + dsn_table_dsn => $dsn_table_dsn, + ); + } + else { + die "Invalid --recusion-method: $method. Valid values are: " + . "dsn=DSN, hosts, or processlist.\n"; + } + + return $slaves; +} + sub recurse_to_slaves { my ( $self, $args, $level ) = @_; $level ||= 0; @@ -6392,6 +5052,43 @@ sub reset_known_replication_threads { return; } +sub get_cxn_from_dsn_table { + my ($self, %args) = @_; + my @required_args = qw(dsn_table_dsn make_cxn DSNParser Quoter); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($dsn_table_dsn, $make_cxn, $dp, $q) = @args{@required_args}; + MKDEBUG && _d('DSN table DSN:', $dsn_table_dsn); + + my $dsn = $dp->parse($dsn_table_dsn); + my $dsn_table; + if ( $dsn->{D} && $dsn->{t} ) { + $dsn_table = $q->quote($dsn->{D}, $dsn->{t}); + } + elsif ( $dsn->{t} && $dsn->{t} =~ m/\./ ) { + $dsn_table = $q->quote($q->split_unquote($dsn->{t})); + } + else { + die "DSN table DSN does not specify a database (D) " + . "or a database-qualified table (t)"; + } + + my $dsn_tbl_cxn = $make_cxn->(dsn => $dsn); + my $dbh = $dsn_tbl_cxn->connect(); + my $sql = "SELECT dsn FROM $dsn_table ORDER BY id"; + MKDEBUG && _d($sql); + my $dsn_strings = $dbh->selectcol_arrayref($sql); + my @cxn; + if ( $dsn_strings ) { + foreach my $dsn_string ( @$dsn_strings ) { + MKDEBUG && _d('DSN from DSN table:', $dsn_string); + push @cxn, $make_cxn->(dsn_string => $dsn_string); + } + } + return \@cxn; +} + sub _d { my ($package, undef, $line) = caller 0; @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } @@ -6593,292 +5290,444 @@ sub _d { # ########################################################################### # ########################################################################### -# SchemaIterator r7141 -# Don't update this package! +# SchemaIterator package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/SchemaIterator.pm +# t/lib/SchemaIterator.t +# See https://launchpad.net/percona-toolkit for more information. # ########################################################################### +{ package SchemaIterator; use strict; use warnings FATAL => 'all'; - use English qw(-no_match_vars); +use constant MKDEBUG => $ENV{MKDEBUG} || 0; + use Data::Dumper; $Data::Dumper::Indent = 1; $Data::Dumper::Sortkeys = 1; $Data::Dumper::Quotekeys = 0; -use constant MKDEBUG => $ENV{MKDEBUG} || 0; +my $open_comment = qr{/\*!\d{5} }; +my $tbl_name = qr{ + CREATE\s+ + (?:TEMPORARY\s+)? + TABLE\s+ + (?:IF NOT EXISTS\s+)? + ([^\(]+) +}x; + sub new { my ( $class, %args ) = @_; - foreach my $arg ( qw(Quoter) ) { + my @required_args = qw(OptionParser Quoter); + foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } + + my ($file_itr, $dbh) = @args{qw(file_itr dbh)}; + die "I need either a dbh or file_itr argument" + if (!$dbh && !$file_itr) || ($dbh && $file_itr); + + my %resume; + if ( my $table = $args{resume} ) { + MKDEBUG && _d('Will resume from or after', $table); + my ($db, $tbl) = $args{Quoter}->split_unquote($table); + die "Resume table must be database-qualified: $table" + unless $db && $tbl; + $resume{db} = $db; + $resume{tbl} = $tbl; + } + my $self = { %args, - filter => undef, - dbs => [], + resume => \%resume, + filters => _make_filters(%args), }; + return bless $self, $class; } -sub make_filter { - my ( $self, $o ) = @_; - my @lines = ( - 'sub {', - ' my ( $dbh, $db, $tbl ) = @_;', - ' my $engine = undef;', - ); - - - my @permit_dbs = _make_filter('unless', '$db', $o->get('databases')) - if $o->has('databases'); - my @reject_dbs = _make_filter('if', '$db', $o->get('ignore-databases')) - if $o->has('ignore-databases'); - my @dbs_regex; - if ( $o->has('databases-regex') && (my $p = $o->get('databases-regex')) ) { - push @dbs_regex, " return 0 unless \$db && (\$db =~ m/$p/o);"; - } - my @reject_dbs_regex; - if ( $o->has('ignore-databases-regex') - && (my $p = $o->get('ignore-databases-regex')) ) { - push @reject_dbs_regex, " return 0 if \$db && (\$db =~ m/$p/o);"; - } - if ( @permit_dbs || @reject_dbs || @dbs_regex || @reject_dbs_regex ) { - push @lines, - ' if ( $db ) {', - (@permit_dbs ? @permit_dbs : ()), - (@reject_dbs ? @reject_dbs : ()), - (@dbs_regex ? @dbs_regex : ()), - (@reject_dbs_regex ? @reject_dbs_regex : ()), - ' }'; - } - - if ( $o->has('tables') || $o->has('ignore-tables') - || $o->has('ignore-tables-regex') ) { - - my $have_qtbl = 0; - my $have_only_qtbls = 0; - my %qtbls; - - my @permit_tbls; - my @permit_qtbls; - my %permit_qtbls; - if ( $o->get('tables') ) { - my %tbls; - map { - if ( $_ =~ m/\./ ) { - $permit_qtbls{$_} = 1; - } - else { - $tbls{$_} = 1; - } - } keys %{ $o->get('tables') }; - @permit_tbls = _make_filter('unless', '$tbl', \%tbls); - @permit_qtbls = _make_filter('unless', '$qtbl', \%permit_qtbls); - - if ( @permit_qtbls ) { - push @lines, - ' my $qtbl = ($db ? "$db." : "") . ($tbl ? $tbl : "");'; - $have_qtbl = 1; - } - } - - my @reject_tbls; - my @reject_qtbls; - my %reject_qtbls; - if ( $o->get('ignore-tables') ) { - my %tbls; - map { - if ( $_ =~ m/\./ ) { - $reject_qtbls{$_} = 1; - } - else { - $tbls{$_} = 1; - } - } keys %{ $o->get('ignore-tables') }; - @reject_tbls= _make_filter('if', '$tbl', \%tbls); - @reject_qtbls = _make_filter('if', '$qtbl', \%reject_qtbls); - - if ( @reject_qtbls && !$have_qtbl ) { - push @lines, - ' my $qtbl = ($db ? "$db." : "") . ($tbl ? $tbl : "");'; - } - } - - if ( keys %permit_qtbls && !@permit_dbs ) { - my $dbs = {}; - map { - my ($db, undef) = split(/\./, $_); - $dbs->{$db} = 1; - } keys %permit_qtbls; - MKDEBUG && _d('Adding restriction "--databases', - (join(',', keys %$dbs) . '"')); - if ( keys %$dbs ) { - $o->set('databases', $dbs); - return $self->make_filter($o); - } - } - - my @tbls_regex; - if ( $o->has('tables-regex') && (my $p = $o->get('tables-regex')) ) { - push @tbls_regex, " return 0 unless \$tbl && (\$tbl =~ m/$p/o);"; - } - my @reject_tbls_regex; - if ( $o->has('ignore-tables-regex') - && (my $p = $o->get('ignore-tables-regex')) ) { - push @reject_tbls_regex, - " return 0 if \$tbl && (\$tbl =~ m/$p/o);"; - } - - my @get_eng; - my @permit_engs; - my @reject_engs; - if ( ($o->has('engines') && $o->get('engines')) - || ($o->has('ignore-engines') && $o->get('ignore-engines')) ) { - push @get_eng, - ' my $sql = "SHOW TABLE STATUS "', - ' . ($db ? "FROM `$db`" : "")', - ' . " LIKE \'$tbl\'";', - ' MKDEBUG && _d($sql);', - ' eval {', - ' $engine = $dbh->selectrow_hashref($sql)->{engine};', - ' };', - ' MKDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);', - ' MKDEBUG && _d($tbl, "uses engine", $engine);', - ' $engine = lc $engine if $engine;', - @permit_engs - = _make_filter('unless', '$engine', $o->get('engines'), 1); - @reject_engs - = _make_filter('if', '$engine', $o->get('ignore-engines'), 1) - } - - if ( @permit_tbls || @permit_qtbls || @reject_tbls || @tbls_regex - || @reject_tbls_regex || @permit_engs || @reject_engs ) { - push @lines, - ' if ( $tbl ) {', - (@permit_tbls ? @permit_tbls : ()), - (@reject_tbls ? @reject_tbls : ()), - (@tbls_regex ? @tbls_regex : ()), - (@reject_tbls_regex ? @reject_tbls_regex : ()), - (@permit_qtbls ? @permit_qtbls : ()), - (@reject_qtbls ? @reject_qtbls : ()), - (@get_eng ? @get_eng : ()), - (@permit_engs ? @permit_engs : ()), - (@reject_engs ? @reject_engs : ()), - ' }'; - } - } - - push @lines, - ' MKDEBUG && _d(\'Passes filters:\', $db, $tbl, $engine, $dbh);', - ' return 1;', '}'; - - my $code = join("\n", @lines); - MKDEBUG && _d('filter sub:', $code); - my $filter_sub= eval $code - or die "Error compiling subroutine code:\n$code\n$EVAL_ERROR"; - - return $filter_sub; -} - -sub set_filter { - my ( $self, $filter_sub ) = @_; - $self->{filter} = $filter_sub; - MKDEBUG && _d('Set filter sub'); - return; -} - -sub get_db_itr { - my ( $self, %args ) = @_; - my @required_args = qw(dbh); +sub _make_filters { + my ( %args ) = @_; + my @required_args = qw(OptionParser Quoter); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my ($dbh) = @args{@required_args}; + my ($o, $q) = @args{@required_args}; - my $filter = $self->{filter}; - my @dbs; - eval { + my %filters; + + + my @simple_filters = qw( + databases tables engines + ignore-databases ignore-tables ignore-engines); + FILTER: + foreach my $filter ( @simple_filters ) { + if ( $o->has($filter) ) { + my $objs = $o->get($filter); + next FILTER unless $objs && scalar keys %$objs; + my $is_table = $filter =~ m/table/ ? 1 : 0; + foreach my $obj ( keys %$objs ) { + die "Undefined value for --$filter" unless $obj; + $obj = lc $obj; + if ( $is_table ) { + my ($db, $tbl) = $q->split_unquote($obj); + $db ||= '*'; + MKDEBUG && _d('Filter', $filter, 'value:', $db, $tbl); + $filters{$filter}->{$tbl} = $db; + } + else { # database + MKDEBUG && _d('Filter', $filter, 'value:', $obj); + $filters{$filter}->{$obj} = 1; + } + } + } + } + + my @regex_filters = qw( + databases-regex tables-regex + ignore-databases-regex ignore-tables-regex); + REGEX_FILTER: + foreach my $filter ( @regex_filters ) { + if ( $o->has($filter) ) { + my $pat = $o->get($filter); + next REGEX_FILTER unless $pat; + $filters{$filter} = qr/$pat/; + MKDEBUG && _d('Filter', $filter, 'value:', $filters{$filter}); + } + } + + MKDEBUG && _d('Schema object filters:', Dumper(\%filters)); + return \%filters; +} + +sub next { + my ( $self ) = @_; + + if ( !$self->{initialized} ) { + $self->{initialized} = 1; + if ( $self->{resume}->{tbl} + && !$self->table_is_allowed(@{$self->{resume}}{qw(db tbl)}) ) { + MKDEBUG && _d('Will resume after', + join('.', @{$self->{resume}}{qw(db tbl)})); + $self->{resume}->{after} = 1; + } + } + + my $schema_obj; + if ( $self->{file_itr} ) { + $schema_obj= $self->_iterate_files(); + } + else { # dbh + $schema_obj= $self->_iterate_dbh(); + } + + if ( $schema_obj ) { + if ( $schema_obj->{ddl} && $self->{TableParser} ) { + $schema_obj->{tbl_struct} + = $self->{TableParser}->parse($schema_obj->{ddl}); + } + + delete $schema_obj->{ddl} unless $self->{keep_ddl}; + delete $schema_obj->{tbl_status} unless $self->{keep_tbl_status}; + + if ( my $schema = $self->{Schema} ) { + $schema->add_schema_object($schema_obj); + } + MKDEBUG && _d('Next schema object:', $schema_obj->{db}, $schema_obj->{tbl}); + } + + return $schema_obj; +} + +sub _iterate_files { + my ( $self ) = @_; + + if ( !$self->{fh} ) { + my ($fh, $file) = $self->{file_itr}->(); + if ( !$fh ) { + MKDEBUG && _d('No more files to iterate'); + return; + } + $self->{fh} = $fh; + $self->{file} = $file; + } + my $fh = $self->{fh}; + MKDEBUG && _d('Getting next schema object from', $self->{file}); + + local $INPUT_RECORD_SEPARATOR = ''; + CHUNK: + while (defined(my $chunk = <$fh>)) { + if ($chunk =~ m/Database: (\S+)/) { + my $db = $1; # XXX + $db =~ s/^`//; # strip leading ` + $db =~ s/`$//; # and trailing ` + if ( $self->database_is_allowed($db) + && $self->_resume_from_database($db) ) { + $self->{db} = $db; + } + } + elsif ($self->{db} && $chunk =~ m/CREATE TABLE/) { + if ($chunk =~ m/DROP VIEW IF EXISTS/) { + MKDEBUG && _d('Table is a VIEW, skipping'); + next CHUNK; + } + + my ($tbl) = $chunk =~ m/$tbl_name/; + $tbl =~ s/^\s*`//; + $tbl =~ s/`\s*$//; + if ( $self->_resume_from_table($tbl) + && $self->table_is_allowed($self->{db}, $tbl) ) { + my ($ddl) = $chunk =~ m/^(?:$open_comment)?(CREATE TABLE.+?;)$/ms; + if ( !$ddl ) { + warn "Failed to parse CREATE TABLE from\n" . $chunk; + next CHUNK; + } + $ddl =~ s/ \*\/;\Z/;/; # remove end of version comment + + my ($engine) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/; + + if ( !$engine || $self->engine_is_allowed($engine) ) { + return { + db => $self->{db}, + tbl => $tbl, + ddl => $ddl, + }; + } + } + } + } # CHUNK + + MKDEBUG && _d('No more schema objects in', $self->{file}); + close $self->{fh}; + $self->{fh} = undef; + + return $self->_iterate_files(); +} + +sub _iterate_dbh { + my ( $self ) = @_; + my $q = $self->{Quoter}; + my $dbh = $self->{dbh}; + MKDEBUG && _d('Getting next schema object from dbh', $dbh); + + if ( !defined $self->{dbs} ) { my $sql = 'SHOW DATABASES'; MKDEBUG && _d($sql); - @dbs = grep { - my $ok = $filter ? $filter->($dbh, $_, undef) : 1; - $ok = 0 if $_ =~ m/information_schema|performance_schema|lost\+found/; - $ok; - } @{ $dbh->selectcol_arrayref($sql) }; + my @dbs = grep { $self->database_is_allowed($_) } + @{$dbh->selectcol_arrayref($sql)}; MKDEBUG && _d('Found', scalar @dbs, 'databases'); - }; - - MKDEBUG && $EVAL_ERROR && _d($EVAL_ERROR); - my $iterator = sub { - return shift @dbs; - }; - - if (wantarray) { - return ($iterator, scalar @dbs); + $self->{dbs} = \@dbs; } - else { - return $iterator; - } -} -sub get_tbl_itr { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; + if ( !$self->{db} ) { + do { + $self->{db} = shift @{$self->{dbs}}; + } until $self->_resume_from_database($self->{db}); + MKDEBUG && _d('Next database:', $self->{db}); + return unless $self->{db}; } - my ($dbh, $db, $views) = @args{@required_args, 'views'}; - my $filter = $self->{filter}; - my @tbls; - if ( $db ) { - eval { - my $sql = 'SHOW /*!50002 FULL*/ TABLES FROM ' - . $self->{Quoter}->quote($db); + if ( !defined $self->{tbls} ) { + my $sql = 'SHOW /*!50002 FULL*/ TABLES FROM ' . $q->quote($self->{db}); + MKDEBUG && _d($sql); + my @tbls = map { + $_->[0]; # (tbl, type) + } + grep { + my ($tbl, $type) = @$_; + (!$type || ($type ne 'VIEW')) + && $self->_resume_from_table($tbl) + && $self->table_is_allowed($self->{db}, $tbl); + } + @{$dbh->selectall_arrayref($sql)}; + MKDEBUG && _d('Found', scalar @tbls, 'tables in database', $self->{db}); + $self->{tbls} = \@tbls; + } + + while ( my $tbl = shift @{$self->{tbls}} ) { + my $tbl_status; + if ( $self->{filters}->{'engines'} + || $self->{filters}->{'ignore-engines'} + || $self->{keep_tbl_status} ) + { + my $sql = "SHOW TABLE STATUS FROM " . $q->quote($self->{db}) + . " LIKE \'$tbl\'"; MKDEBUG && _d($sql); - @tbls = map { - $_->[0] + $tbl_status = $dbh->selectrow_hashref($sql); + MKDEBUG && _d(Dumper($tbl_status)); + } + + if ( !$tbl_status + || $self->engine_is_allowed($tbl_status->{engine}) ) { + my $ddl; + if ( my $tp = $self->{TableParser} ) { + $ddl = $tp->get_create_table($dbh, $self->{db}, $tbl); } - grep { - my ($tbl, $type) = @$_; - my $ok = $filter ? $filter->($dbh, $db, $tbl) : 1; - if ( !$views ) { - $ok = 0 if ($type || '') eq 'VIEW'; - } - $ok; - } - @{ $dbh->selectall_arrayref($sql) }; - MKDEBUG && _d('Found', scalar @tbls, 'tables in', $db); - }; - MKDEBUG && $EVAL_ERROR && _d($EVAL_ERROR); - } - else { - MKDEBUG && _d('No db given so no tables'); + + return { + db => $self->{db}, + tbl => $tbl, + ddl => $ddl, + tbl_status => $tbl_status, + }; + } } - my $iterator = sub { - return shift @tbls; - }; + MKDEBUG && _d('No more tables in database', $self->{db}); + $self->{db} = undef; + $self->{tbls} = undef; - if ( wantarray ) { - return ($iterator, scalar @tbls); - } - else { - return $iterator; - } + return $self->_iterate_dbh(); } -sub _make_filter { - my ( $cond, $var_name, $objs, $lc ) = @_; - my @lines; - if ( scalar keys %$objs ) { - my $test = join(' || ', - map { "$var_name eq '" . ($lc ? lc $_ : $_) ."'" } keys %$objs); - push @lines, " return 0 $cond $var_name && ($test);", +sub database_is_allowed { + my ( $self, $db ) = @_; + die "I need a db argument" unless $db; + + $db = lc $db; + + my $filter = $self->{filters}; + + if ( $db =~ m/information_schema|performance_schema|lost\+found/ ) { + MKDEBUG && _d('Database', $db, 'is a system database, ignoring'); + return 0; } - return @lines; + + if ( $self->{filters}->{'ignore-databases'}->{$db} ) { + MKDEBUG && _d('Database', $db, 'is in --ignore-databases list'); + return 0; + } + + if ( $filter->{'ignore-databases-regex'} + && $db =~ $filter->{'ignore-databases-regex'} ) { + MKDEBUG && _d('Database', $db, 'matches --ignore-databases-regex'); + return 0; + } + + if ( $filter->{'databases'} + && !$filter->{'databases'}->{$db} ) { + MKDEBUG && _d('Database', $db, 'is not in --databases list, ignoring'); + return 0; + } + + if ( $filter->{'databases-regex'} + && $db !~ $filter->{'databases-regex'} ) { + MKDEBUG && _d('Database', $db, 'does not match --databases-regex, ignoring'); + return 0; + } + + return 1; +} + +sub table_is_allowed { + my ( $self, $db, $tbl ) = @_; + die "I need a db argument" unless $db; + die "I need a tbl argument" unless $tbl; + + $db = lc $db; + $tbl = lc $tbl; + + my $filter = $self->{filters}; + + if ( $db eq 'mysql' && ($tbl eq 'general_log' || $tbl eq 'slow_log') ) { + return 0; + } + + if ( $filter->{'ignore-tables'}->{$tbl} + && ($filter->{'ignore-tables'}->{$tbl} eq '*' + || $filter->{'ignore-tables'}->{$tbl} eq $db) ) { + MKDEBUG && _d('Table', $tbl, 'is in --ignore-tables list'); + return 0; + } + + if ( $filter->{'ignore-tables-regex'} + && $tbl =~ $filter->{'ignore-tables-regex'} ) { + MKDEBUG && _d('Table', $tbl, 'matches --ignore-tables-regex'); + return 0; + } + + if ( $filter->{'tables'} + && !$filter->{'tables'}->{$tbl} ) { + MKDEBUG && _d('Table', $tbl, 'is not in --tables list, ignoring'); + return 0; + } + + if ( $filter->{'tables-regex'} + && $tbl !~ $filter->{'tables-regex'} ) { + MKDEBUG && _d('Table', $tbl, 'does not match --tables-regex, ignoring'); + return 0; + } + + if ( $filter->{'tables'} + && $filter->{'tables'}->{$tbl} + && $filter->{'tables'}->{$tbl} ne '*' + && $filter->{'tables'}->{$tbl} ne $db ) { + MKDEBUG && _d('Table', $tbl, 'is only allowed in database', + $filter->{'tables'}->{$tbl}); + return 0; + } + + return 1; +} + +sub engine_is_allowed { + my ( $self, $engine ) = @_; + die "I need an engine argument" unless $engine; + + $engine = lc $engine; + + my $filter = $self->{filters}; + + if ( $filter->{'ignore-engines'}->{$engine} ) { + MKDEBUG && _d('Engine', $engine, 'is in --ignore-databases list'); + return 0; + } + + if ( $filter->{'engines'} + && !$filter->{'engines'}->{$engine} ) { + MKDEBUG && _d('Engine', $engine, 'is not in --engines list, ignoring'); + return 0; + } + + return 1; +} + +sub _resume_from_database { + my ($self, $db) = @_; + + return 1 unless $self->{resume}->{db}; + + if ( $db eq $self->{resume}->{db} ) { + MKDEBUG && _d('At resume db', $db); + delete $self->{resume}->{db}; + return 1; + } + + return 0; +} + +sub _resume_from_table { + my ($self, $tbl) = @_; + + return 1 unless $self->{resume}->{tbl}; + + if ( $tbl eq $self->{resume}->{tbl} ) { + if ( !$self->{resume}->{after} ) { + MKDEBUG && _d('Resuming from table', $tbl); + delete $self->{resume}->{tbl}; + return 1; + } + else { + MKDEBUG && _d('Resuming after table', $tbl); + delete $self->{resume}->{tbl}; + } + } + + return 0; } sub _d { @@ -6890,7 +5739,7 @@ sub _d { } 1; - +} # ########################################################################### # End SchemaIterator package # ########################################################################### @@ -7177,48 +6026,42 @@ sub new { sub retry { my ( $self, %args ) = @_; - my @required_args = qw(try wait); + my @required_args = qw(try fail final_fail); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; }; - my ($try, $wait) = @args{@required_args}; + my ($try, $fail, $final_fail) = @args{@required_args}; + my $wait = $args{wait} || sub { sleep 1; }; my $tries = $args{tries} || 3; + my $last_error; my $tryno = 0; + TRY: while ( ++$tryno <= $tries ) { - MKDEBUG && _d("Retry", $tryno, "of", $tries); + MKDEBUG && _d("Try", $tryno, "of", $tries); my $result; eval { $result = $try->(tryno=>$tryno); }; + if ( $EVAL_ERROR ) { + MKDEBUG && _d("Try code failed:", $EVAL_ERROR); + $last_error = $EVAL_ERROR; - if ( defined $result ) { - MKDEBUG && _d("Try code succeeded"); - if ( my $on_success = $args{on_success} ) { - MKDEBUG && _d("Calling on_success code"); - $on_success->(tryno=>$tryno, result=>$result); + if ( $tryno < $tries ) { # more retries + my $retry = $fail->(tryno=>$tryno, error=>$last_error); + last TRY unless $retry; + MKDEBUG && _d("Calling wait code"); + $wait->(tryno=>$tryno); } + } + else { + MKDEBUG && _d("Try code succeeded"); return $result; } - - if ( $EVAL_ERROR ) { - MKDEBUG && _d("Try code died:", $EVAL_ERROR); - die $EVAL_ERROR unless $args{retry_on_die}; - } - - if ( $tryno < $tries ) { - MKDEBUG && _d("Try code failed, calling wait code"); - $wait->(tryno=>$tryno); - } } - MKDEBUG && _d("Try code did not succeed"); - if ( my $on_failure = $args{on_failure} ) { - MKDEBUG && _d("Calling on_failure code"); - $on_failure->(); - } - - return; + MKDEBUG && _d('Try code did not succeed'); + return $final_fail->(error=>$last_error); } sub _d { @@ -7235,6 +6078,694 @@ sub _d { # End Retry package # ########################################################################### +# ########################################################################### +# RowChecksum package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/RowChecksum.pm +# t/lib/RowChecksum.t +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### +{ +package RowChecksum; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use constant MKDEBUG => $ENV{MKDEBUG} || 0; + +use List::Util qw(max); +use Data::Dumper; +$Data::Dumper::Indent = 1; +$Data::Dumper::Sortkeys = 1; +$Data::Dumper::Quotekeys = 0; + +sub new { + my ( $class, %args ) = @_; + foreach my $arg ( qw(OptionParser Quoter) ) { + die "I need a $arg argument" unless defined $args{$arg}; + } + my $self = { %args }; + return bless $self, $class; +} + +sub make_row_checksum { + my ( $self, %args ) = @_; + my @required_args = qw(tbl); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($tbl) = @args{@required_args}; + + my $o = $self->{OptionParser}; + my $q = $self->{Quoter}; + my $tbl_struct = $tbl->{tbl_struct}; + my $func = $args{func} || uc($o->get('function')); + my $cols = $self->get_checksum_columns(%args); + + my $query; + if ( !$args{no_cols} ) { + $query = join(', ', + map { + my $col = $_; + if ( $col =~ m/\+ 0/ ) { + my ($real_col) = /^(\S+)/; + $col .= " AS $real_col"; + } + elsif ( $col =~ m/TRIM/ ) { + my ($real_col) = m/TRIM\(([^\)]+)\)/; + $col .= " AS $real_col"; + } + $col; + } @{$cols->{select}}) + . ', '; + } + + if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) { + my $sep = ($o->has('separator') && $o->get('separator')) || '#'; + $sep =~ s/'//g; + $sep ||= '#'; + + my @nulls = grep { $cols->{allowed}->{$_} } @{$tbl_struct->{null_cols}}; + if ( @nulls ) { + my $bitmap = "CONCAT(" + . join(', ', map { 'ISNULL(' . $q->quote($_) . ')' } @nulls) + . ")"; + push @{$cols->{select}}, $bitmap; + } + + $query .= @{$cols->{select}} > 1 + ? "$func(CONCAT_WS('$sep', " . join(', ', @{$cols->{select}}) . '))' + : "$func($cols->{select}->[0])"; + } + else { + my $fnv_func = uc $func; + $query .= "$fnv_func(" . join(', ', @{$cols->{select}}) . ')'; + } + + MKDEBUG && _d('Row checksum:', $query); + return $query; +} + +sub make_chunk_checksum { + my ( $self, %args ) = @_; + my @required_args = qw(tbl); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + if ( !$args{dbh} && !($args{func} && $args{crc_width} && $args{crc_type}) ) { + die "I need a dbh argument" + } + my ($tbl) = @args{@required_args}; + my $o = $self->{OptionParser}; + my $q = $self->{Quoter}; + + my %crc_args = $self->get_crc_args(%args); + MKDEBUG && _d("Checksum strat:", Dumper(\%crc_args)); + + my $row_checksum = $self->make_row_checksum( + %args, + %crc_args, + no_cols => 1 + ); + my $crc; + if ( $crc_args{crc_type} =~ m/int$/ ) { + $crc = "COALESCE(LOWER(CONV(BIT_XOR(CAST($row_checksum AS UNSIGNED)), " + . "10, 16)), 0)"; + } + else { + my $slices = $self->_make_xor_slices( + row_checksum => $row_checksum, + %crc_args, + ); + $crc = "COALESCE(LOWER(CONCAT($slices)), 0)"; + } + + my $select = "COUNT(*) AS cnt, $crc AS crc"; + MKDEBUG && _d('Chunk checksum:', $select); + return $select; +} + +sub get_checksum_columns { + my ($self, %args) = @_; + my @required_args = qw(tbl); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($tbl) = @args{@required_args}; + my $o = $self->{OptionParser}; + my $q = $self->{Quoter}; + + my $trim = $o->get('trim'); + my $float_precision = $o->get('float-precision'); + + my $tbl_struct = $tbl->{tbl_struct}; + my $ignore_col = $o->get('ignore-columns') || {}; + my $all_cols = $o->get('columns') || $tbl_struct->{cols}; + my %cols = map { lc($_) => 1 } grep { !$ignore_col->{$_} } @$all_cols; + my %seen; + my @cols = + map { + my $type = $tbl_struct->{type_for}->{$_}; + my $result = $q->quote($_); + if ( $type eq 'timestamp' ) { + $result .= ' + 0'; + } + elsif ( $float_precision && $type =~ m/float|double/ ) { + $result = "ROUND($result, $float_precision)"; + } + elsif ( $trim && $type =~ m/varchar/ ) { + $result = "TRIM($result)"; + } + $result; + } + grep { + $cols{$_} && !$seen{$_}++ + } + @{$tbl_struct->{cols}}; + + return { + select => \@cols, + allowed => \%cols, + }; +} + +sub get_crc_args { + my ($self, %args) = @_; + my $func = $args{func} || $self->_get_hash_func(%args); + my $crc_width = $args{crc_width}|| $self->_get_crc_width(%args, func=>$func); + my $crc_type = $args{crc_type} || $self->_get_crc_type(%args, func=>$func); + my $opt_slice; + if ( $args{dbh} && $crc_type !~ m/int$/ ) { + $opt_slice = $self->_optimize_xor(%args, func=>$func); + } + + return ( + func => $func, + crc_width => $crc_width, + crc_type => $crc_type, + opt_slice => $opt_slice, + ); +} + +sub _get_hash_func { + my ( $self, %args ) = @_; + my @required_args = qw(dbh); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($dbh) = @args{@required_args}; + my $o = $self->{OptionParser}; + my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1); + + if ( my $func = $o->get('function') ) { + unshift @funcs, $func; + } + + my ($result, $error); + foreach my $func ( @funcs ) { + eval { + my $sql = "SELECT $func('test-string')"; + MKDEBUG && _d($sql); + $args{dbh}->do($sql); + }; + if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) { + $error .= qq{$func cannot be used because "$1"\n}; + MKDEBUG && _d($func, 'cannot be used because', $1); + } + MKDEBUG && _d('Chosen hash func:', $result); + return $func; + } + die $error || 'No hash functions (CRC32, MD5, etc.) are available'; +} + +sub _get_crc_width { + my ( $self, %args ) = @_; + my @required_args = qw(dbh func); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($dbh, $func) = @args{@required_args}; + + my $crc_width = 16; + if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) { + eval { + my ($val) = $dbh->selectrow_array("SELECT $func('a')"); + $crc_width = max(16, length($val)); + }; + } + return $crc_width; +} + +sub _get_crc_type { + my ( $self, %args ) = @_; + my @required_args = qw(dbh func); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($dbh, $func) = @args{@required_args}; + + my $type = ''; + my $length = 0; + my $sql = "SELECT $func('a')"; + my $sth = $dbh->prepare($sql); + eval { + $sth->execute(); + $type = $sth->{mysql_type_name}->[0]; + $length = $sth->{mysql_length}->[0]; + MKDEBUG && _d($sql, $type, $length); + if ( $type eq 'bigint' && $length < 20 ) { + $type = 'int'; + } + }; + $sth->finish; + MKDEBUG && _d('crc_type:', $type, 'length:', $length); + return $type; +} + +sub _optimize_xor { + my ( $self, %args ) = @_; + my @required_args = qw(dbh func); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($dbh, $func) = @args{@required_args}; + + die "$func never needs BIT_XOR optimization" + if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i; + + my $opt_slice = 0; + my $unsliced = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0]; + my $sliced = ''; + my $start = 1; + my $crc_width = length($unsliced) < 16 ? 16 : length($unsliced); + + do { # Try different positions till sliced result equals non-sliced. + MKDEBUG && _d('Trying slice', $opt_slice); + $dbh->do('SET @crc := "", @cnt := 0'); + my $slices = $self->_make_xor_slices( + row_checksum => "\@crc := $func('a')", + crc_width => $crc_width, + opt_slice => $opt_slice, + ); + + my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x"; + $sliced = ($dbh->selectrow_array($sql))[0]; + if ( $sliced ne $unsliced ) { + MKDEBUG && _d('Slice', $opt_slice, 'does not work'); + $start += 16; + ++$opt_slice; + } + } while ( $start < $crc_width && $sliced ne $unsliced ); + + if ( $sliced eq $unsliced ) { + MKDEBUG && _d('Slice', $opt_slice, 'works'); + return $opt_slice; + } + else { + MKDEBUG && _d('No slice works'); + return undef; + } +} + +sub _make_xor_slices { + my ( $self, %args ) = @_; + my @required_args = qw(row_checksum crc_width); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($row_checksum, $crc_width) = @args{@required_args}; + my ($opt_slice) = $args{opt_slice}; + + my @slices; + for ( my $start = 1; $start <= $crc_width; $start += 16 ) { + my $len = $crc_width - $start + 1; + if ( $len > 16 ) { + $len = 16; + } + push @slices, + "LPAD(CONV(BIT_XOR(" + . "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))" + . ", 10, 16), $len, '0')"; + } + + if ( defined $opt_slice && $opt_slice < @slices ) { + $slices[$opt_slice] =~ s/\@crc/\@crc := $row_checksum/; + } + else { + map { s/\@crc/$row_checksum/ } @slices; + } + + return join(', ', @slices); +} + +sub find_replication_differences { + my ($self, %args) = @_; + my @required_args = qw(dbh repl_table); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($dbh, $repl_table) = @args{@required_args}; + + my $sql + = "SELECT CONCAT(db, '.', tbl) AS `table`, " + . "chunk, chunk_index, lower_boundary, upper_boundary, " + . "COALESCE(this_cnt-master_cnt, 0) AS cnt_diff, " + . "COALESCE(" + . "this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc), 0" + . ") AS crc_diff, this_cnt, master_cnt, this_crc, master_crc " + . "FROM $repl_table " + . "WHERE (master_cnt <> this_cnt OR master_crc <> this_crc " + . "OR ISNULL(master_crc) <> ISNULL(this_crc))" + . ($args{where} ? " AND ($args{where})" : ""); + MKDEBUG && _d($sql); + my $diffs = $dbh->selectall_arrayref($sql, { Slice => {} }); + return $diffs; +} + +sub _d { + my ($package, undef, $line) = caller 0; + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } + map { defined $_ ? $_ : 'undef' } + @_; + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; +} + +1; +} +# ########################################################################### +# End RowChecksum package +# ########################################################################### + +# ########################################################################### +# RowSyncer package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/RowSyncer.pm +# t/lib/RowSyncer.t +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### +{ +package RowSyncer; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use constant MKDEBUG => $ENV{MKDEBUG} || 0; + +use Data::Dumper; +$Data::Dumper::Indent = 1; +$Data::Dumper::Sortkeys = 1; +$Data::Dumper::Quotekeys = 0; + +sub new { + my ( $class, %args ) = @_; + my @required_args = qw(ChangeHandler); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless defined $args{$arg}; + } + my $self = { + crc_col => 'crc', + %args, + }; + return bless $self, $class; +} + +sub set_crc_col { + my ($self, $crc_col) = @_; + $self->{crc_col} = $crc_col; + return; +} + +sub set_key_cols { + my ($self, $key_cols) = @_; + $self->{key_cols} = $key_cols; + return; +} + +sub key_cols { + my ($self) = @_; + return $self->{key_cols}; +} + +sub same_row { + my ($self, %args) = @_; + my ($lr, $rr) = @args{qw(lr rr)}; + if ( $lr->{$self->{crc_col}} ne $rr->{$self->{crc_col}} ) { + $self->{ChangeHandler}->change('UPDATE', $lr, $self->key_cols()); + } + return; +} + +sub not_in_right { + my ( $self, %args ) = @_; + $self->{ChangeHandler}->change('INSERT', $args{lr}, $self->key_cols()); + return; +} + +sub not_in_left { + my ( $self, %args ) = @_; + $self->{ChangeHandler}->change('DELETE', $args{rr}, $self->key_cols()); + return; +} + +sub done_with_rows { + return; +} + +sub _d { + my ($package, undef, $line) = caller 0; + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } + map { defined $_ ? $_ : 'undef' } + @_; + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; +} + +1; +} +# ########################################################################### +# End RowSyncer package +# ########################################################################### + +# ########################################################################### +# RowSyncerBidirectional package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/RowSyncerBidirectional.pm +# t/lib/RowSyncerBidirectional.t +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### +{ +package RowSyncerBidirectional; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use constant MKDEBUG => $ENV{MKDEBUG} || 0; + +use Data::Dumper; +$Data::Dumper::Indent = 1; +$Data::Dumper::Sortkeys = 1; +$Data::Dumper::Quotekeys = 0; + +use constant UPDATE_LEFT => -1; +use constant UPDATE_RIGHT => 1; +use constant UPDATE_NEITHER => 0; # neither value equals/matches +use constant FAILED_THRESHOLD => 2; # failed to exceed threshold + +sub new { + my ( $class, %args ) = @_; + my @required_args = qw(OptionParser ChangeHandler); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless defined $args{$arg}; + } + my $self = { + crc_col => 'crc', + %args, + }; + return bless $self, $class; +} + +sub set_crc_col { + my ($self, $crc_col) = @_; + $self->{crc_col} = $crc_col; + return; +} + +sub set_key_cols { + my ($self, $key_cols) = @_; + $self->{key_cols} = $key_cols; + return; +} + +sub key_cols { + my ($self) = @_; + return $self->{key_cols}; +} + +sub cmp_conflict_col { + my ( $left_val, $right_val, $cmp, $val, $thr ) = @_; + MKDEBUG && _d('Compare', @_); + my $res; + if ( $cmp eq 'newest' || $cmp eq 'oldest' ) { + $res = $cmp eq 'newest' ? ($left_val || '') cmp ($right_val || '') + : ($right_val || '') cmp ($left_val || ''); + + if ( $thr ) { + $thr = Transformers::time_to_secs($thr); + my $lts = Transformers::any_unix_timestamp($left_val); + my $rts = Transformers::any_unix_timestamp($right_val); + my $diff = abs($lts - $rts); + MKDEBUG && _d('Check threshold, lts rts thr abs-diff:', + $lts, $rts, $thr, $diff); + if ( $diff < $thr ) { + MKDEBUG && _d("Failed threshold"); + return FAILED_THRESHOLD; + } + } + } + elsif ( $cmp eq 'greatest' || $cmp eq 'least' ) { + $res = $cmp eq 'greatest' ? (($left_val ||0) > ($right_val ||0) ? 1 : -1) + : (($left_val ||0) < ($right_val ||0) ? 1 : -1); + $res = 0 if ($left_val || 0) == ($right_val || 0); + if ( $thr ) { + my $diff = abs($left_val - $right_val); + MKDEBUG && _d('Check threshold, abs-diff:', $diff); + if ( $diff < $thr ) { + MKDEBUG && _d("Failed threshold"); + return FAILED_THRESHOLD; + } + } + } + elsif ( $cmp eq 'equals' ) { + $res = ($left_val || '') eq $val ? 1 + : ($right_val || '') eq $val ? -1 + : 0; + } + elsif ( $cmp eq 'matches' ) { + $res = ($left_val || '') =~ m/$val/ ? 1 + : ($right_val || '') =~ m/$val/ ? -1 + : 0; + } + else { + die "Invalid comparison: $cmp"; + } + + return $res; +} + +sub same_row { + my ($self, %args) = @_; + my ($lr, $rr, $syncer) = @args{qw(lr rr syncer)}; + + my $ch = $self->{ChangeHandler}; + my $action = 'UPDATE'; + my $auth_row = $lr; + my $change_dbh; + my $err; + + my $o = $self->{OptionParser}; + my $col = $o->get('conflict-column'); + my $cmp = $o->get('conflict-comparison'); + my $val = $o->get('conflict-value'); + my $thr = $o->get('conflict-threshold'); + + my $left_val = $lr->{$col} || ''; + my $right_val = $rr->{$col} || ''; + MKDEBUG && _d('left', $col, 'value:', $left_val); + MKDEBUG && _d('right', $col, 'value:', $right_val); + + my $res = cmp_conflict_col($left_val, $right_val, $cmp, $val, $thr); + if ( $res == UPDATE_LEFT ) { + MKDEBUG && _d("right dbh $args{right_dbh} $cmp; " + . "update left dbh $args{left_dbh}"); + $ch->set_src('right', $args{right_dbh}); + $auth_row = $args{rr}; + $change_dbh = $args{left_dbh}; + } + elsif ( $res == UPDATE_RIGHT ) { + MKDEBUG && _d("left dbh $args{left_dbh} $cmp; " + . "update right dbh $args{right_dbh}"); + $ch->set_src('left', $args{left_dbh}); + $auth_row = $args{lr}; + $change_dbh = $args{right_dbh}; + } + elsif ( $res == UPDATE_NEITHER ) { + if ( $cmp eq 'equals' || $cmp eq 'matches' ) { + $err = "neither `$col` value $cmp $val"; + } + else { + $err = "`$col` values are the same" + } + } + elsif ( $res == FAILED_THRESHOLD ) { + $err = "`$col` values do not differ by the threhold, $thr." + } + else { + die "cmp_conflict_col() returned an invalid result: $res." + } + + if ( $err ) { + $action = undef; # skip change in case we just warn + my $where = $ch->make_where_clause($lr, $self->key_cols()); + $err = "# Cannot resolve conflict WHERE $where: $err\n"; + + my $print_err = $o->get('conflict-error'); + $print_err =~ m/warn/i ? warn $err + : $print_err =~ m/die/i ? die $err + : $print_err =~ m/ignore/i ? MKDEBUG && _d("Conflict error:", $err) + : die "Invalid --conflict-error: $print_err"; + return; + } + + return $ch->change( + $action, # Execute the action + $auth_row, # with these row values + $self->key_cols(), # identified by these key cols + $change_dbh, # on this dbh + ); +} + +sub not_in_right { + my ( $self, %args ) = @_; + $self->{ChangeHandler}->set_src('left', $args{left_dbh}); + return $self->{ChangeHandler}->change( + 'INSERT', # Execute the action + $args{lr}, # with these row values + $self->key_cols(), # identified by these key cols + $args{right_dbh}, # on this dbh + ); +} + +sub not_in_left { + my ( $self, %args ) = @_; + $self->{ChangeHandler}->set_src('right', $args{right_dbh}); + return $self->{ChangeHandler}->change( + 'INSERT', # Execute the action + $args{rr}, # with these row values + $self->key_cols(), # identified by these key cols + $args{left_dbh}, # on this dbh + ); +} + +sub done_with_rows { + return; +} + +sub _d { + my ($package, undef, $line) = caller 0; + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } + map { defined $_ ? $_ : 'undef' } + @_; + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; +} + +1; +} +# ########################################################################### +# End RowSyncerBidirectional package +# ########################################################################### + # ########################################################################### # This is a combination of modules and programs in one -- a runnable module. # http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last @@ -7247,7 +6778,8 @@ package pt_table_sync; use English qw(-no_match_vars); use List::Util qw(sum max min); -use POSIX qw(ceil); + +use Data::Dumper; Transformers->import(qw(time_to_secs any_unix_timestamp)); @@ -7255,13 +6787,9 @@ use constant MKDEBUG => $ENV{MKDEBUG} || 0; $OUTPUT_AUTOFLUSH = 1; -my %dsn_for; - sub main { - @ARGV = @_; # set global ARGV for this package - - # Reset global vars else tests will have weird results. - %dsn_for = (); + @ARGV = @_; # set global ARGV for this package + my $exit_status = 0; # 1: internal error, 2: tables differed, 3: both # ######################################################################## # Get configuration information. @@ -7279,9 +6807,6 @@ sub main { if ( $o->get('wait') ) { $o->set('lock', 1) unless $o->got('lock'); } - if ( $o->get('dry-run') ) { - $o->set('verbose', 1); - } # There's a conflict of interests: we added 't' and 'D' parts to dp, # and there are -t and -D options (--tables, --databases), so parse_options() @@ -7293,13 +6818,13 @@ sub main { $dsn_defaults->{t} = undef; my @dsns; - while ( my $arg = shift(@ARGV) ) { - my $dsn = $dp->parse($arg, $dsns[0], $dsn_defaults); - die "You specified a t part, but not a D part in $arg" + while ( my $dsn_string = shift(@ARGV) ) { + my $dsn = $dp->parse($dsn_string, $dsns[0], $dsn_defaults); + die "You specified a t part, but not a D part in $dsn_string" if ($dsn->{t} && !$dsn->{D}); if ( $dsn->{D} && !$dsn->{t} ) { - die "You specified a database but not a table in $arg. Are you " - . "trying to sync only tables in the '$dsn->{D}' database? " + die "You specified a database but not a table in $dsn_string. " + . "Are you trying to sync only tables in the $dsn->{D} database? " . "If so, use '--databases $dsn->{D}' instead.\n"; } push @dsns, $dsn; @@ -7347,17 +6872,9 @@ sub main { # Override --algorithms becuase only TableSyncChunk works with # bidirectional syncing. - $o->set('algorithms', 'Chunk'); $o->set('buffer-to-client', 0); } - if ( $o->get('explain-hosts') ) { - foreach my $host ( @dsns ) { - print "# DSN: ", $dp->as_string($host), "\n"; - } - return 0; - } - $o->usage_or_errors(); # ######################################################################## @@ -7372,103 +6889,164 @@ sub main { $daemon->make_PID_file(); } + # ######################################################################## + # Connect to hosts. + # ######################################################################## + my $q = new Quoter(); + my $vp = new VersionParser(); + my $ms = new MasterSlave(VersionParser => $vp); + + my $set_on_connect = sub { + my ($dbh) = @_; + + my $sql; + if ( !$o->get('bin-log') ) { + $sql = "/*!32316 SET SQL_LOG_BIN=0 */"; + MKDEBUG && _d($dbh, $sql); + $dbh->do($sql); + } + if ( !$o->get('unique-checks') ) { + $sql = "/*!40014 SET UNIQUE_CHECKS=0 */"; + MKDEBUG && _d($dbh, $sql); + $dbh->do($sql); + } + if ( !$o->get('foreign-key-checks') ) { + $sql = "/*!40014 SET FOREIGN_KEY_CHECKS=0 */"; + MKDEBUG && _d($dbh, $sql); + $dbh->do($sql); + } + + # Disable auto-increment on zero (bug #1919897). + $sql = '/*!40101 SET @@SQL_MODE := CONCAT(@@SQL_MODE, ' + . '",NO_AUTO_VALUE_ON_ZERO")*/'; + MKDEBUG && _d($dbh, $sql); + $dbh->do($sql); + + # Ensure statement-based replication. + # http://code.google.com/p/maatkit/issues/detail?id=95 + $sql = '/*!50105 SET @@binlog_format="STATEMENT"*/'; + MKDEBUG && _d($dbh, $sql); + $dbh->do($sql); + + if ( $o->get('transaction') ) { + my $sql = "SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ"; + eval { + MKDEBUG && _d($dbh, $sql); + $dbh->do($sql); + }; + die "Failed to $sql: $EVAL_ERROR" if $EVAL_ERROR; + } + }; + + # Do not call "new Cxn(" directly; use this sub so that set_on_connect + # is applied to every cxn. + # TODO: maybe this stuff only needs to be set on master cxn? + my $make_cxn = sub { + my (%args) = @_; + my $cxn = new Cxn( + %args, + DSNParser => $dp, + OptionParser => $o, + set => $set_on_connect, + aux => 1, + dbh_opts => { AutoCommit => 0, }, + ); + return $cxn; + }; + + # dsn[0] is expected to be the master (i.e. the source). So if + # --sync-to-master, then dsn[0] is a slave. Find its master and + # make the master dsn[0] and the slave dsn[1]. + my @cxns; + my $table; + if ( $o->get('sync-to-master') ) { + my $slave_cxn = $make_cxn->(dsn => shift @dsns); + if ( $slave_cxn->dsn()->{t} ) { + $table = $q->join_quote( + $slave_cxn->dsn()->{D}, $slave_cxn->dsn()->{t}); + } + $slave_cxn->connect(); + + MKDEBUG && _d('Getting master of', $slave_cxn->name()); + my $master_dsn = $ms->get_master_dsn( + $slave_cxn->dbh(), $slave_cxn->dsn(), $dp); + die "Cannot determine master of " . $slave_cxn->name() + unless $master_dsn; + my $master_cxn = $make_cxn->(dsn => $master_dsn); + $master_cxn->connect(); + if ( $o->get('check-master') ) { + $ms->is_master_of($master_cxn->dbh(), $slave_cxn->dbh()); + } + + push @cxns, $master_cxn, $slave_cxn; + } + else { + my $src_cxn = $make_cxn->(dsn => shift @dsns); + push @cxns, $src_cxn; + if ( $src_cxn->dsn()->{t} ) { + $table = $q->join_quote( + $src_cxn->dsn()->{D}, $src_cxn->dsn()->{t}); + } + + foreach my $dsn ( @dsns ) { + my $dsn_cxn = $make_cxn->(dsn => $dsn); + push @cxns, $dsn_cxn; + } + } + + $cxns[0]->{is_source} = 1; + + if ( $o->get('explain-hosts') ) { + foreach my $cxn ( @cxns ) { + print "# ", ($cxn->{is_source} ? "Source" : "Destination"), ": ", + $cxn->name(), "\n"; + } + return 0; + } + + $cxns[0]->connect(); + # ######################################################################## # Do the work. # ######################################################################## - my $q = new Quoter(); - my $tp = new TableParser( Quoter => $q ); - my $vp = new VersionParser(); - my $ms = new MasterSlave(VersionParser => $vp); - my $du = new MySQLDump( cache => 0 ); - my $rt = new Retry(); - my $chunker = new TableChunker( Quoter => $q, MySQLDump => $du ); - my $nibbler = new TableNibbler( Quoter => $q, TableParser => $tp ); - my $checksum = new TableChecksum( Quoter => $q, VersionParser => $vp ); - my $syncer = new TableSyncer( - Quoter => $q, - VersionParser => $vp, + my $tp = new TableParser( Quoter => $q ); + my $du = new MySQLDump( cache => 0 ); + my $rt = new Retry(); + my $tn = new TableNibbler(TableParser => $tp, Quoter => $q); + my $rc = new RowChecksum(OptionParser => $o, Quoter => $q); + my $rd = new RowDiff(dbh=>$cxns[0]->aux_dbh()); + + my $table_syncer = new TableSyncer( MasterSlave => $ms, - TableChecksum => $checksum, - DSNParser => $dp, + OptionParser => $o, + Quoter => $q, + TableParser => $tp, + TableNibbler => $tn, + RowChecksum => $rc, + RowDiff => $rd, Retry => $rt, - ); - my %modules = ( + ); + + my %args = ( + cxns => \@cxns, + TableSyncer => $table_syncer, OptionParser => $o, DSNParser => $dp, MySQLDump => $du, TableParser => $tp, Quoter => $q, VersionParser => $vp, - TableChunker => $chunker, - TableNibbler => $nibbler, - TableChecksum => $checksum, + TableNibbler => $tn, + RowChecksum => $rc, MasterSlave => $ms, - TableSyncer => $syncer, + TableSyncer => $table_syncer, + make_cxn => $make_cxn, ); - # Create the sync plugins. - my $plugins = []; - my %have_plugin = get_plugins(); - foreach my $algo ( split(',', $o->get('algorithms')) ) { - my $plugin_name = $have_plugin{lc $algo}; - if ( !$plugin_name ) { - die "The $algo algorithm is not available. Available algorithms: " - . join(", ", sort keys %have_plugin); - } - MKDEBUG && _d('Loading', $plugin_name); - my $plugin; - eval { - $plugin = $plugin_name->new(%modules); - }; - die "Error loading $plugin_name for $algo algorithm: $EVAL_ERROR" - if $EVAL_ERROR; - push @$plugins, $plugin; - } - - # Create callbacks for bidirectional syncing. Currently, this only - # works with TableSyncChunk, so that should be the only plugin because - # --algorithms was overriden earlier. - if ( $o->get('bidirectional') ) { - set_bidirectional_callbacks( - plugin => $plugins->[0], - %modules, - ); - } - - my $exit_status = 0; # 1: internal error, 2: tables differed, 3: both - - # dsn[0] is expected to be the master (i.e. the source). So if - # --sync-to-master, then dsn[0] is a slave. Find its master and - # make the master dsn[0] and the slave dsn[1]. - if ( $o->get('sync-to-master') ) { - MKDEBUG && _d('Getting master of', $dp->as_string($dsns[0])); - $dsns[0]->{dbh} = get_cxn($dsns[0], %modules); - my $master = $ms->get_master_dsn($dsns[0]->{dbh}, $dsns[0], $dp) - or die "Can't determine master of " . $dp->as_string($dsns[0]); - unshift @dsns, $master; # dsn[0]=master, dsn[1]=slave - $dsns[0]->{dbh} = get_cxn($dsns[0], %modules); - if ( $o->get('check-master') ) { - $ms->is_master_of($dsns[0]->{dbh}, $dsns[1]->{dbh}); - } - } - - my %args = ( - dsns => \@dsns, - plugins => $plugins, - %modules, - ); - - if ( $o->get('dry-run') ) { - print "# NOTE: --dry-run does not show if data needs to be synced because it\n" - . "# does not access, compare or sync data. --dry-run only shows\n" - . "# the work that would be done.\n"; - - } - if ( $o->get('lock-and-rename') ) { $exit_status = lock_and_rename(%args); } - elsif ( $dsns[0]->{t} ) { + elsif ( $cxns[0]->{dsn}->{t} ) { $exit_status = sync_one_table(%args); } elsif ( $o->get('replicate') ) { @@ -7492,8 +7070,7 @@ sub main { # %args - Arguments # # Required Arguments: -# dsns - Arrayref of DSNs -# plugins - Arrayref of TableSync* objects +# cxns - Arrayref of DSNs # OptionParser - object # DSNParser - object # Quoter - object @@ -7502,34 +7079,34 @@ sub main { # Exit status sub lock_and_rename { my ( %args ) = @_; - my @required_args = qw(dsns plugins OptionParser DSNParser Quoter + my @required_args = qw(cxns OptionParser DSNParser Quoter VersionParser); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my $dsns = $args{dsns}; + my $cxns = $args{cxns}; my $o = $args{OptionParser}; my $dp = $args{DSNParser}; my $q = $args{Quoter}; MKDEBUG && _d('Locking and syncing ONE TABLE with rename'); my $src = { - dsn => $dsns->[0], - dbh => $dsns->[0]->{dbh} || get_cxn($dsns->[0], %args), - misc_dbh => get_cxn($dsns->[0], %args), - db => $dsns->[0]->{D}, - tbl => $dsns->[0]->{t}, + dsn => $cxns->[0], + dbh => $cxns->[0]->{dbh} || get_cxn($cxns->[0], %args), + misc_dbh => get_cxn($cxns->[0], %args), + db => $cxns->[0]->{D}, + tbl => $cxns->[0]->{t}, }; my $dst = { - dsn => $dsns->[1], - dbh => $dsns->[1]->{dbh} || get_cxn($dsns->[1], %args), - misc_dbh => get_cxn($dsns->[1], %args), - db => $dsns->[1]->{D}, - tbl => $dsns->[1]->{t}, + dsn => $cxns->[1], + dbh => $cxns->[1]->{dbh} || get_cxn($cxns->[1], %args), + misc_dbh => get_cxn($cxns->[1], %args), + db => $cxns->[1]->{D}, + tbl => $cxns->[1]->{t}, }; if ( $o->get('verbose') ) { - print_header("# Lock and rename " . $dp->as_string($src->{dsn})); + print "# Lock and rename " . $dp->as_string($src->{dsn}); } # We don't use lock_server() here because it does the usual stuff wrt @@ -7572,7 +7149,7 @@ sub lock_and_rename { # Sub: sync_one_table # Sync one table between one source host and multiple destination hosts. -# The first DSN in $args{dsns} specifies the source host, database (D), +# The first DSN in $args{cxns} specifies the source host, database (D), # and table (t). The other DSNs are the destination hosts. If a destination # DSN does not specify a database or table, the source database or table # are used as defaults. Else, the destination-specific database or table @@ -7582,8 +7159,7 @@ sub lock_and_rename { # %args - Arguments # # Required Arguments: -# dsns - Arrayref of DSNs -# plugins - Arrayref of TableSync* objects +# cxns - Arrayref of DSNs # OptionParser - object # DSNParser - object # Quoter - object @@ -7593,60 +7169,77 @@ sub lock_and_rename { # Exit status sub sync_one_table { my ( %args ) = @_; - my @required_args = qw(dsns plugins OptionParser DSNParser Quoter - VersionParser); + my @required_args = qw(cxns OptionParser DSNParser Quoter + TableSyncer VersionParser); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my @dsns = @{$args{dsns}}; - my $o = $args{OptionParser}; - my $dp = $args{DSNParser}; + my @cxns = @{$args{cxns}}; + my $o = $args{OptionParser}; + my $dp = $args{DSNParser}; + my $table_syncer = $args{TableSyncer}; MKDEBUG && _d('DSN has t part; syncing ONE TABLE between servers'); my $src = { - dsn => $dsns[0], - dbh => $dsns[0]->{dbh} || get_cxn($dsns[0], %args), - misc_dbh => get_cxn($dsns[0], %args), - db => $dsns[0]->{D}, - tbl => $dsns[0]->{t}, + Cxn => $cxns[0], + tbl => { + db => $cxns[0]->dsn()->{D}, + tbl => $cxns[0]->dsn()->{t}, + }, }; + $table_syncer->lock_and_wait( + lock_level => 3, + host => $src, + src => $src, + ); + my $exit_status = 0; - foreach my $dsn ( @dsns[1 .. $#dsns] ) { + foreach my $cxn ( @cxns[1 .. $#cxns] ) { my $dst = { - dsn => $dsn, - dbh => $dsn->{dbh} || get_cxn($dsn, %args), - misc_dbh => get_cxn($dsn, %args), - db => $dsn->{D} || $src->{db}, - tbl => $dsn->{t} || $src->{tbl}, + Cxn => $cxn, + tbl => { + db => $cxn->dsn()->{D} || $src->{db}, + tbl => $cxn->dsn()->{t} || $src->{tbl}, + }, }; if ( $o->get('verbose') ) { - print_header("# Syncing " . $dp->as_string($dsn) - . ($o->get('dry-run') - ? ' in dry-run mode, without accessing or comparing data' - : '')); + print "# Syncing " . $cxn->name() + . ($o->get('explain') + ? ' in explain mode, without accessing or comparing data' + : '') . "\n"; } - lock_server(src => $src, dst => $dst, %args); + $cxn->connect(); - $exit_status |= sync_a_table( - src => $src, - dst => $dst, - %args, + $table_syncer->lock_and_wait( + lock_level => 3, + host => $dst, + src => $src, ); - unlock_server(src => $src, dst => $dst, %args); - disconnect($dst); + $exit_status |= sync_a_table( + %args, + src => $src, + dst => $dst, + ); + + $table_syncer->unlock(lock_level => 3, host => $dst); + $cxn->disconnect(); } - disconnect($src); + $table_syncer->unlock( + lock_level => 3, + host => $src, + ); + return $exit_status; } # Sub: sync_via_replication # Sync multiple destination hosts to one source host via replication. -# The first DSN in $args{dsns} specifies the source host. +# The first DSN in $args{cxns} specifies the source host. # If --sync-to-master is specified, then the source host is a master # and there is only one destination host which is its slave. # Else, destination hosts are auto-discovered with @@ -7656,13 +7249,12 @@ sub sync_one_table { # %args - Arguments # # Required Arguments: -# dsns - Arrayref of DSNs -# plugins - Arrayref of TableSync* objects +# cxns - Arrayref of DSNs # OptionParser - object # DSNParser - object # Quoter - object # VersionParser - object -# TableChecksum - object +# RowChecksum - object # MasterSlave - object # # Returns: @@ -7672,29 +7264,29 @@ sub sync_one_table { # sub sync_via_replication { my ( %args ) = @_; - my @required_args = qw(dsns plugins OptionParser DSNParser Quoter - VersionParser TableChecksum MasterSlave); + my @required_args = qw(cxns OptionParser DSNParser Quoter + VersionParser RowChecksum MasterSlave TableSyncer + make_cxn); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my $dsns = $args{dsns}; - my $o = $args{OptionParser}; - my $dp = $args{DSNParser}; - my $q = $args{Quoter}; - my $checksum = $args{TableChecksum}; - my $ms = $args{MasterSlave}; + my $cxns = $args{cxns}; + my $o = $args{OptionParser}; + my $dp = $args{DSNParser}; + my $q = $args{Quoter}; + my $checksum = $args{RowChecksum}; + my $ms = $args{MasterSlave}; + my $table_syncer = $args{TableSyncer}; + my $make_cxn = $args{make_cxn}; MKDEBUG && _d('Syncing via replication'); my $src = { - dsn => $dsns->[0], - dbh => $dsns->[0]->{dbh} || get_cxn($dsns->[0], %args), - misc_dbh => get_cxn($dsns->[0], %args), - db => undef, # set later - tbl => undef, # set later + Cxn => $cxns->[0], + tbl => undef, # set later }; # Filters for --databases and --tables. We have to do these manually - # since we don't use MySQLFind for --replicate. + # since we don't use SchemaIterator for --replicate. my $databases = $o->get('databases'); my $tables = $o->get('tables'); @@ -7704,19 +7296,18 @@ sub sync_via_replication { # differences on the slave and sync them. if ( $o->get('sync-to-master') ) { my $dst = { - dsn => $dsns->[1], - dbh => $dsns->[1]->{dbh} || get_cxn($dsns->[1], %args), - misc_dbh => get_cxn($dsns->[1], %args), - db => undef, # set later - tbl => undef, # set later + Cxn => $cxns->[1], + tbl => undef, # set later }; + $dst->{Cxn}->connect(); + # First, check that the master (source) has no discrepancies itself, # and ignore tables that do. my %skip_table; map { $skip_table{$_->{db}}->{$_->{tbl}}++ } $checksum->find_replication_differences( - $src->{dbh}, $o->get('replicate')); + $src->{Cxn}->dbh(), $o->get('replicate')); # Now check the slave for differences and sync them if necessary. my @diffs = filter_diffs( @@ -7724,37 +7315,49 @@ sub sync_via_replication { $databases, $tables, $checksum->find_replication_differences( - $dst->{dbh}, $o->get('replicate')) + $dst->{Cxn}->dbh(), $o->get('replicate')) ); if ( $o->get('verbose') ) { - print_header("# Syncing via replication " . $dp->as_string($dst->{dsn}) - . ($o->get('dry-run') ? - ' in dry-run mode, without accessing or comparing data' : '')); + print "# Syncing via replication " . $dst->{Cxn}->name() + . ($o->get('explain') ? + ' in explain mode, without accessing or comparing data' : '') + . "\n"; } if ( @diffs ) { - lock_server(src => $src, dst => $dst, %args); + $table_syncer->lock_and_wait( + lock_level => 3, + host => $src, + src => $src, + ); + $table_syncer->lock_and_wait( + lock_level => 3, + host => $dst, + src => $src, + ); foreach my $diff ( @diffs ) { - $src->{db} = $dst->{db} = $diff->{db}; - $src->{tbl} = $dst->{tbl} = $diff->{tbl}; + print Dumper($diff); + $src->{tbl} = $diff->{tbl}; + $dst->{tbl} = $diff->{tbl}; $exit_status |= sync_a_table( - src => $src, - dst => $dst, - where => $diff->{boundaries}, %args, + src => $src, + dst => $dst, + boundaries => $diff->{boundaries}, ); } - unlock_server(src => $src, dst => $dst, %args); + $table_syncer->unlock(lock_level => 3, host => $dst); + $table_syncer->unlock(lock_level => 3, host => $src); } else { MKDEBUG && _d('No checksum differences'); } - disconnect($dst); + $dst->disconnect(); } # sync-to-master # The DSN is the master. Connect to each slave, find differences, @@ -7762,8 +7365,8 @@ sub sync_via_replication { else { my %skip_table; $ms->recurse_to_slaves( - { dbh => $src->{dbh}, - dsn => $src->{dsn}, + { dbh => $src->{Cxn}->dbh(), + dsn => $src->{Cxn}->dsn(), dsn_parser => $dp, recurse => 1, callback => sub { @@ -7785,39 +7388,46 @@ sub sync_via_replication { ); if ( $o->get('verbose') ) { - print_header("# Syncing via replication " + print "# Syncing via replication " . $dp->as_string($dsn) - . ($o->get('dry-run') - ? ' in dry-run mode, without ' + . ($o->get('explain') + ? ' in explain mode, without ' . 'accessing or comparing data' - : '')); + : '') . "\n"; } if ( @diffs ) { my $dst = { - dsn => $dsn, - dbh => $dbh, - misc_dbh => get_cxn($dsn, %args), - db => undef, # set later - tbl => undef, # set later + Cxn => $make_cxn->(dsn => $dsn, dbh => $dbh), + tbl => undef, # set later }; - lock_server(src => $src, dst => $dst, %args); + $table_syncer->lock_and_wait( + lock_level => 3, + host => $src, + src => $src, + ); + $table_syncer->lock_and_wait( + lock_level => 3, + host => $dst, + src => $src, + ); foreach my $diff ( @diffs ) { - $src->{db} = $dst->{db} = $diff->{db}; - $src->{tbl} = $dst->{tbl} = $diff->{tbl}; + print Dumper($diff); + $src->{tbl} = $diff->{tbl}; + $dst->{tbl} = $diff->{tbl}; $exit_status |= sync_a_table( - src => $src, - dst => $dst, - where => $diff->{boundaries}, %args, + src => $src, + dst => $dst, + boundaries => $diff->{boundaries}, ); - } + } - unlock_server(src => $src, dst => $dst, %args); - disconnect($dst); + $table_syncer->unlock(lock_level => 3, host => $dst); + $table_syncer->unlock(lock_level => 3, host => $src); } else { MKDEBUG && _d('No checksum differences'); @@ -7831,13 +7441,12 @@ sub sync_via_replication { ); } # DSN is master - disconnect($src); return $exit_status; } # Sub: sync_all # Sync every table between one source host and multiple destination hosts. -# The first DSN in $args{dsns} specifies the source host. The other DSNs +# The first DSN in $args{cxns} specifies the source host. The other DSNs # are the destination hosts. Unlike , the database and # table names must be the same on the source and destination hosts. # @@ -7845,8 +7454,7 @@ sub sync_via_replication { # %args - Arguments # # Required Arguments: -# dsns - Arrayref of DSNs -# plugins - Arrayref of TableSync* objects +# cxns - Arrayref of DSNs # OptionParser - object # DSNParser - object # Quoter - object @@ -7858,173 +7466,79 @@ sub sync_via_replication { # Exit status sub sync_all { my ( %args ) = @_; - my @required_args = qw(dsns plugins OptionParser DSNParser Quoter - VersionParser TableParser MySQLDump); + my @required_args = qw(cxns OptionParser DSNParser Quoter + TableSyncer VersionParser TableParser MySQLDump); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my @dsns = @{$args{dsns}}; - my $o = $args{OptionParser}; - my $dp = $args{DSNParser}; + my @cxns = @{$args{cxns}}; + my $o = $args{OptionParser}; + my $dp = $args{DSNParser}; + my $q = $args{Quoter}; + my $tp = $args{TableParser}; + my $table_syncer = $args{TableSyncer}; MKDEBUG && _d('Syncing all dbs and tbls'); my $src = { - dsn => $dsns[0], - dbh => $dsns[0]->{dbh} || get_cxn($dsns[0], %args), - misc_dbh => get_cxn($dsns[0], %args), - db => undef, # set later - tbl => undef, # set later + Cxn => $cxns[0], + tbl => undef, # set later }; - my $si = new SchemaIterator( - Quoter => $args{Quoter}, + my $schema_iter = new SchemaIterator( + dbh => $src->{Cxn}->dbh(), + keep_tbl_status => 1, + OptionParser => $o, + TableParser => $tp, + Quoter => $q, ); - $si->set_filter($si->make_filter($o)); - - # Make a list of all dbs.tbls on the source. It's more efficient this - # way because it avoids open/closing a dbh for each tbl and dsn, unless - # we pre-opened the dsn. It would also cause confusing verbose output. - my @dbs_tbls; - my $next_db = $si->get_db_itr(dbh => $src->{dbh}); - while ( my $db = $next_db->() ) { - MKDEBUG && _d('Getting tables from', $db); - my $next_tbl = $si->get_tbl_itr( - dbh => $src->{dbh}, - db => $db, - views => 0, - ); - while ( my $tbl = $next_tbl->() ) { - MKDEBUG && _d('Got table', $tbl); - push @dbs_tbls, { db => $db, tbl => $tbl }; - } - } my $exit_status = 0; - foreach my $dsn ( @dsns[1 .. $#dsns] ) { - if ( $o->get('verbose') ) { - print_header("# Syncing " . $dp->as_string($dsn) - . ($o->get('dry-run') - ? ' in dry-run mode, without accessing or comparing data' : '')); - } + TABLE: + while ( my $tbl = $schema_iter->next() ) { + MKDEBUG && _d('Syncing table', join('.', @{$tbl}{qw(db tbl)})); + $src->{tbl} = $tbl; - my $dst = { - dsn => $dsn, - dbh => $dsn->{dbh} || get_cxn($dsn, %args), - misc_dbh => get_cxn($dsn, %args), - db => undef, # set later - tbl => undef, # set later - }; + DEST: + foreach my $cxn ( @cxns[1 .. $#cxns] ) { + if ( $o->get('verbose') ) { + print "# Syncing " . $cxn->name() + . ($o->get('explain') + ? ' in explain mode, without accessing or comparing data' + : '') . "\n"; + } - lock_server(src => $src, dst => $dst, %args); + my $dst = { + Cxn => $cxn, + tbl => $tbl, + }; - foreach my $db_tbl ( @dbs_tbls ) { - $src->{db} = $dst->{db} = $db_tbl->{db}; - $src->{tbl} = $dst->{tbl} = $db_tbl->{tbl}; + $cxn->connect(); + + $table_syncer->lock_and_wait( + lock_level => 3, + host => $dst, + src => $src, + ); $exit_status |= sync_a_table( src => $src, dst => $dst, %args, ); + + $table_syncer->unlock(lock_level => 3, host => $dst); + $cxn->disconnect(); } - - unlock_server(src => $src, dst => $dst, %args); - disconnect($dst); } - disconnect($src); + $table_syncer->unlock( + lock_level => 3, + host => $src, + ); + return $exit_status; } -# Sub: lock_server -# Lock a host with FLUSH TABLES WITH READ LOCK. This implements -# --lock 3 by calling . -# -# Parameters: -# %args - Arguments -# -# Required Arguments: -# src - Hashref with source host information -# dst - Hashref with destination host information -# OptionParser - object -# DSNParser - object -# TableSyncer - object -sub lock_server { - my ( %args ) = @_; - foreach my $arg ( qw(src dst OptionParser DSNParser TableSyncer) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $o = $args{OptionParser}; - - return unless $o->get('lock') && $o->get('lock') == 3; - - eval { - $args{TableSyncer}->lock_and_wait( - %args, - lock => 3, - lock_level => 3, - replicate => $o->get('replicate'), - timeout_ok => $o->get('timeout-ok'), - transaction => $o->get('transaction'), - wait => $o->get('wait'), - ); - }; - if ( $EVAL_ERROR ) { - die "Failed to lock server: $EVAL_ERROR"; - } - return; -} - -# Sub: unlock_server -# Unlock a host with UNLOCK TABLES. This implements -# --lock 3 by calling . -# -# Parameters: -# %args - Arguments -# -# Required Arguments: -# src - Hashref with source host information -# dst - Hashref with destination host information -# OptionParser - object -# DSNParser - object -# TableSyncer - object -sub unlock_server { - my ( %args ) = @_; - my @required_args = qw(src dst OptionParser DSNParser TableSyncer); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ($src, $dst, $o) = @args{@required_args}; - - return unless $o->get('lock') && $o->get('lock') == 3; - - eval { - # Open connections as needed. - $src->{dbh} ||= get_cxn($src->{dsn}, %args); - $dst->{dbh} ||= get_cxn($dst->{dsn}, %args); - $src->{misc_dbh} ||= get_cxn($src->{dsn}, %args); - $args{TableSyncer}->unlock( - src_dbh => $src->{dbh}, - src_db => '', - src_tbl => '', - dst_dbh => $dst->{dbh}, - dst_db => '', - dst_tbl => '', - misc_dbh => $src->{misc_dbh}, - replicate => $o->get('replicate') || 0, - timeout_ok => $o->get('timeout-ok') || 0, - transaction => $o->get('transaction') || 0, - wait => $o->get('wait') || 0, - lock => 3, - lock_level => 3, - ); - }; - if ( $EVAL_ERROR ) { - die "Failed to unlock server: $EVAL_ERROR"; - } - return; -} - # Sub: sync_a_table # Sync the destination host table to the source host table. This sub # is not called directly but indirectly via the other sync_* subs. @@ -8039,7 +7553,6 @@ sub unlock_server { # Required Arguments: # src - Hashref with source host information # dst - Hashref with destination host information -# plugins - Arrayref of TableSync* objects # OptionParser - object # Quoter - object # TableParser - object @@ -8050,66 +7563,71 @@ sub unlock_server { # Exit status sub sync_a_table { my ( %args ) = @_; - my @required_args = qw(src dst plugins OptionParser Quoter TableParser + my @required_args = qw(src dst OptionParser Quoter TableParser MySQLDump TableSyncer); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my ($src, $dst, undef, $o, $q, $tp, $du, $syncer) = @args{@required_args}; + my ($src, $dst, $o, $q, $tp, $du, $syncer) = @args{@required_args}; my ($start_ts, $end_ts); my $exit_status = 0; my %status; eval { - $start_ts = get_server_time($src->{dbh}) if $o->get('verbose'); + $start_ts = get_server_time($src->{Cxn}->dbh()) if $o->get('verbose'); # This will either die if there's a problem or return the tbl struct. my $tbl_struct = ok_to_sync($src, $dst, %args); + $src->{tbl}->{tbl_struct} = $tbl_struct; + $dst->{tbl}->{tbl_struct} = $tbl_struct; + + if ( !$src->{tbl_status} ) { + my $sql = "SHOW TABLE STATUS FROM " . $q->quote($src->{tbl}->{db}) + . " LIKE \'$src->{tbl}->{tbl}\'"; + MKDEBUG && _d($src->{Cxn}->name(), $sql); + my $tbl_status = $src->{Cxn}->dbh()->selectrow_hashref($sql); + MKDEBUG && _d(Dumper($tbl_status)); + $src->{tbl}->{tbl_status} = $tbl_status; + $dst->{tbl}->{tbl_status} = $tbl_status; + } # If the table is InnoDB, prefer to sync it with transactions, unless # the user explicitly said not to. my $use_txn = $o->got('transaction') ? $o->get('transaction') : $tbl_struct->{engine} eq 'InnoDB' ? 1 : 0; + $o->set('transaction', $use_txn); # Turn off AutoCommit if we're using transactions. - $src->{dbh}->{AutoCommit} = !$use_txn; - $src->{misc_dbh}->{AutoCommit} = !$use_txn; - $dst->{dbh}->{AutoCommit} = !$use_txn; - $dst->{misc_dbh}->{AutoCommit} = !$use_txn; + $src->{Cxn}->dbh()->{AutoCommit} = !$use_txn; + $dst->{Cxn}->dbh()->{AutoCommit} = !$use_txn; + $src->{Cxn}->aux_dbh()->{AutoCommit} = !$use_txn; + $dst->{Cxn}->aux_dbh()->{AutoCommit} = !$use_txn; # Determine which columns to compare. - my $ignore_columns = $o->get('ignore-columns'); - my @compare_columns = grep { - !$ignore_columns->{lc $_}; - } @{$o->get('columns') || $tbl_struct->{cols}}; + #my $ignore_columns = $o->get('ignore-columns'); + #my @compare_columns = grep { + # !$ignore_columns->{lc $_}; + #} @{$o->get('columns') || $tbl_struct->{cols}}; # Make sure conflict col is in compare cols else conflicting # rows won't have the col for --conflict-comparison. - if ( my $conflict_col = $o->get('conflict-column') ) { - push @compare_columns, $conflict_col - unless grep { $_ eq $conflict_col } @compare_columns; - } - - # --print --verbose --verbose is the magic formula for having - # all src/dst sql printed so we can see the chunk/row sql. - my $callback; - if ( $o->get('print') && $o->get('verbose') >= 2 ) { - $callback = \&print_sql; - } + #if ( my $conflict_col = $o->get('conflict-column') ) { + # push @compare_columns, $conflict_col + # unless grep { $_ eq $conflict_col } @compare_columns; + #} # get_change_dbh() may die if, for example, the destination is # not a slave. Perhaps its work should be part of can_sync()? - my $change_dbh = get_change_dbh(tbl_struct => $tbl_struct, %args); - my $actions = make_action_subs(change_dbh => $change_dbh, %args); + my $change_cxn = get_change_cxn(tbl_struct => $tbl_struct, %args); + my $actions = make_action_subs(change_cxn => $change_cxn, %args); - my $rd = new RowDiff(dbh => $src->{misc_dbh}); my $ch = new ChangeHandler( - left_db => $src->{db}, - left_tbl => $src->{tbl}, - right_db => $dst->{db}, - right_tbl => $dst->{tbl}, - tbl_struct => $tbl_struct, + left_db => $src->{tbl}->{db}, + left_tbl => $src->{tbl}->{tbl}, + right_db => $dst->{tbl}->{db}, + right_tbl => $dst->{tbl}->{tbl}, + tbl_struct => $src->{tbl_struct}, hex_blob => $o->get('hex-blob'), queue => $o->get('buffer-to-client') ? 1 : 0, replace => $o->get('replace') @@ -8120,61 +7638,52 @@ sub sync_a_table { Quoter => $args{Quoter}, ); + $ch->fetch_back($src->{Cxn}->dbh()); + + my $row_syncer_module + = $o->get('bidirectional') ? 'RowSyncerBidirectional' + : 'RowSyncer'; + + my $row_syncer = $row_syncer_module->new( + OptionParser => $o, + ChangeHandler => $ch, + ); + %status = $syncer->sync_table( %args, - tbl_struct => $tbl_struct, - cols => \@compare_columns, - chunk_size => $o->get('chunk-size'), - RowDiff => $rd, - ChangeHandler => $ch, - transaction => $use_txn, - callback => $callback, - where => $args{where} || $o->get('where'), - bidirectional => $o->get('bidirectional'), - buffer_in_mysql => $o->get('buffer-in-mysql'), - buffer_to_client => $o->get('buffer-to-client'), - changing_src => $o->get('replicate') - || $o->get('sync-to-master') - || $o->get('bidirectional') - || 0, - float_precision => $o->get('float-precision'), - index_hint => $o->get('index-hint'), - chunk_index => $o->get('chunk-index'), - chunk_col => $o->get('chunk-column'), - zero_chunk => $o->get('zero-chunk'), - lock => $o->get('lock'), - replace => $o->get('replace'), - replicate => $o->get('replicate'), - dry_run => $o->get('dry-run'), - timeout_ok => $o->get('timeout-ok'), - trim => $o->get('trim'), - wait => $o->get('wait'), - function => $o->get('function'), + src => $src, + dst => $dst, + ChangeHandler => $ch, + RowSyncer => $row_syncer, + boundaries => $args{boundaries}, + changing_src => $o->get('replicate') + || $o->get('sync-to-master') + || $o->get('bidirectional') + || 0, ); if ( sum(@status{@ChangeHandler::ACTIONS}) ) { $exit_status |= 2; } }; - if ( $EVAL_ERROR ) { - print_err($EVAL_ERROR, $dst->{db}, $dst->{tbl}, $dst->{dsn}->{h}); + print_err($EVAL_ERROR, $dst->{tbl}->{db}, $dst->{tbl}->{tbl}, $dst->{Cxn}->name()); $exit_status |= 1; } # Print this last so that the exit status is its final result. if ( $o->get('verbose') ) { - $end_ts = get_server_time($src->{dbh}) || ""; + $end_ts = get_server_time($src->{Cxn}->dbh()) || ""; print_results( - map { $_ || '0' } @status{@ChangeHandler::ACTIONS, 'ALGORITHM'}, + map { $_ || '0' } @status{@ChangeHandler::ACTIONS}, $start_ts, $end_ts, - $exit_status, $src->{db}, $src->{tbl}); + $exit_status, @{$src->{tbl}}{qw(db tbl)}); } return $exit_status; } -# Sub: get_change_dbh +# Sub: get_change_cxn # Return the dbh to write to for syncing changes. Write statements # are executed on the "change dbh". If --sync-to-master or --replicate # is specified, the source (master) dbh is the "change dbh". This means @@ -8199,7 +7708,7 @@ sub sync_a_table { # # See Also: # -sub get_change_dbh { +sub get_change_cxn { my ( %args ) = @_; my @required_args = qw(src dst tbl_struct OptionParser DSNParser MasterSlave); @@ -8208,7 +7717,8 @@ sub get_change_dbh { } my ($src, $dst, $tbl_struct, $o, $dp, $ms) = @args{@required_args}; - my $change_dbh = $dst->{dbh}; # The default case: making changes on dst. + # The default case: making changes on dst. + my $change_cxn = $dst->{Cxn}; if ( $o->get('sync-to-master') || $o->get('replicate') ) { # Is it possible to make changes on the master (i.e. the source)? @@ -8217,8 +7727,8 @@ sub get_change_dbh { MKDEBUG && _d("This table's replace-ability:", $can_replace); die "Can't make changes on the master because no unique index exists" unless $can_replace; - $change_dbh = $src->{dbh}; # The alternate case. - MKDEBUG && _d('Will make changes on source', $change_dbh); + $change_cxn = $src->{Cxn}; # The alternate case. + MKDEBUG && _d('Will make changes on source', $change_cxn->name()); } elsif ( $o->get('check-slave') ) { # Is it safe to change data on the destination? Only if it's *not* @@ -8227,23 +7737,23 @@ sub get_change_dbh { # logging is disabled, or 2) the check is bypassed. By the way, just # because the server is a slave doesn't mean it's not also the master # of the master (master-master replication). - my $slave_status = $ms->get_slave_status($dst->{dbh}); - my (undef, $log_bin) = $dst->{dbh}->selectrow_array( + my $slave_status = $ms->get_slave_status($dst->{Cxn}->dbh()); + my (undef, $log_bin) = $dst->{Cxn}->dbh()->selectrow_array( 'SHOW VARIABLES LIKE "log_bin"'); - my ($sql_log_bin) = $dst->{dbh}->selectrow_array( + my ($sql_log_bin) = $dst->{Cxn}->dbh()->selectrow_array( 'SELECT @@SQL_LOG_BIN'); MKDEBUG && _d('Variables on destination:', 'log_bin=', (defined $log_bin ? $log_bin : 'NULL'), ' @@SQL_LOG_BIN=', (defined $sql_log_bin ? $sql_log_bin : 'NULL')); if ( $slave_status && $sql_log_bin && ($log_bin || 'OFF') eq 'ON' ) { - die "Can't make changes on ", $dp->as_string($dst->{dsn}), + die "Can't make changes on ", $dst->{Cxn}->name(), " because it's a slave. See the documentation section", " 'REPLICATION SAFETY' for solutions to this problem."; } - MKDEBUG && _d('Will make changes on destination', $change_dbh); + MKDEBUG && _d('Will make changes on destination', $change_cxn->name()); } - return $change_dbh; + return $change_cxn; } # Sub: make_action_subs @@ -8254,26 +7764,26 @@ sub get_change_dbh { # %args - Arguments # # Required Arguments: -# change_dbh - dbh returned by +# change_cxn - dbh returned by # OptionParser - object # # Returns: # Arrayref of callbacks (coderefs) sub make_action_subs { my ( %args ) = @_; - my @required_args = qw(change_dbh OptionParser); + my @required_args = qw(change_cxn OptionParser); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my ($change_dbh, $o) = @args{@required_args}; + my ($change_cxn, $o) = @args{@required_args}; my @actions; if ( $o->get('execute') ) { push @actions, sub { my ( $sql, $dbh ) = @_; # Use $dbh if given. It's from a bidirectional callback. - $dbh ||= $change_dbh; - MKDEBUG && _d('Execute on dbh', $dbh, $sql); + $dbh ||= $change_cxn->dbh(); + MKDEBUG && _d('Executing change on', $dbh, $sql); $dbh->do($sql); }; } @@ -8284,7 +7794,7 @@ sub make_action_subs { my ( $sql, $dbh ) = @_; # Append /*host:port*/ to the sql, if possible, so the user # can see on which host it was/would be ran. - my $dsn = $dsn_for{$dbh} if $dbh; + my $dsn = ""; # TODO if ( $dsn ) { my $h = $dsn->{h} || $dsn->{S} || ''; my $p = $dsn->{P} || ''; @@ -8320,85 +7830,6 @@ sub print_err { print STDERR $msg, "\n"; } -# Sub: get_cxn -# Connect to host specified by DSN. -# -# Parameters: -# $dsn - Host DSN -# %args - Arguments -# -# Required Arguments: -# OptionaParser - object -# DSNParser - object -# -# Returns: -# dbh -sub get_cxn { - my ( $dsn, %args ) = @_; - my @required_args = qw(OptionParser DSNParser); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ($o, $dp) = @args{@required_args}; - - if ( !$dsn->{p} && $o->get('ask-pass') ) { - # Just "F=file" is a valid DSN but fill_in_dsn() can't help us - # because we haven't connected yet. If h is not specified, - # then user is relying on F or .my.cnf/system defaults. - # http://code.google.com/p/maatkit/issues/detail?id=947 - my $host = $dsn->{h} ? $dsn->{h} - : "DSN ". $dp->as_string($dsn); - $dsn->{p} = OptionParser::prompt_noecho("Enter password for $host: "); - } - my $dbh = $dp->get_dbh( - $dp->get_cxn_params($dsn, {}) # get_cxn_params needs the 2nd arg - ); - - my $sql; - if ( !$o->get('bin-log') ) { - $sql = "/*!32316 SET SQL_LOG_BIN=0 */"; - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - } - if ( !$o->get('unique-checks') ) { - $sql = "/*!40014 SET UNIQUE_CHECKS=0 */"; - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - } - if ( !$o->get('foreign-key-checks') ) { - $sql = "/*!40014 SET FOREIGN_KEY_CHECKS=0 */"; - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - } - - # Disable auto-increment on zero (bug #1919897). - $sql = '/*!40101 SET @@SQL_MODE := CONCAT(@@SQL_MODE, ' - . '",NO_AUTO_VALUE_ON_ZERO")*/'; - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - - # Ensure statement-based replication. - # http://code.google.com/p/maatkit/issues/detail?id=95 - $sql = '/*!50105 SET @@binlog_format="STATEMENT"*/'; - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - - if ( $o->get('transaction') ) { - my $sql = "SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ"; - eval { - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - }; - die "Failed to $sql: $EVAL_ERROR" if $EVAL_ERROR; - } - - $dsn_for{$dbh} = $dsn; - - MKDEBUG && _d('Opened dbh', $dbh); - return $dbh; -} - - # Sub: ok_to_sync # Check that the destination host table can be synced to the source host # table. All sorts of sanity checks are performed to help ensure that @@ -8438,18 +7869,18 @@ sub ok_to_sync { my $src_tbl_ddl; eval { # FYI: get_create_table() does USE db but doesn't eval it. - $src->{dbh}->do("USE `$src->{db}`"); - $src_tbl_ddl = $du->get_create_table($src->{dbh}, $q, - $src->{db}, $src->{tbl}); + $src->{Cxn}->dbh()->do("USE `$src->{tbl}->{db}`"); + $src_tbl_ddl = $du->get_create_table( + $src->{Cxn}->dbh(), $q, @{$src->{tbl}}{qw(db tbl)}); }; die $EVAL_ERROR if $EVAL_ERROR; my $dst_tbl_ddl; eval { # FYI: get_create_table() does USE db but doesn't eval it. - $dst->{dbh}->do("USE `$dst->{db}`"); - $dst_tbl_ddl = $du->get_create_table($dst->{dbh}, $q, - $dst->{db}, $dst->{tbl}); + $dst->{Cxn}->dbh()->do("USE `$dst->{tbl}->{db}`"); + $dst_tbl_ddl = $du->get_create_table( + $dst->{Cxn}->dbh(), $q, @{$dst->{tbl}}{qw(db tbl)}); }; die $EVAL_ERROR if $EVAL_ERROR; @@ -8458,36 +7889,40 @@ sub ok_to_sync { # if ( $o->get('check-schema') && ($src_tbl_ddl ne $dst_tbl_ddl) ) { # die "Source and destination tables have different schemas"; # } - my $tbl_struct = $tp->parse($src_tbl_ddl); + my $tbl_struct = $tp->parse($src_tbl_ddl->[1]); # Check that the user has all the necessary privs on the tbls. if ( $o->get('check-privileges') ) { - MKDEBUG && _d('Checking privileges'); - if ( !$syncer->have_all_privs($src->{dbh}, $src->{db}, $src->{tbl}) ) { - my $user = get_current_user($src->{dbh}) || ""; - die "User $user does not have all necessary privileges on ", - $q->quote($src->{db}, $src->{tbl}); - } - if ( !$syncer->have_all_privs($dst->{dbh}, $dst->{db}, $dst->{tbl}) ) { - my $user = get_current_user($dst->{dbh}) || ""; - die "User $user does not have all necessary privileges on ", - $q->quote($dst->{db}, $dst->{tbl}); - } +# MKDEBUG && _d('Checking privileges'); +# if ( !$syncer->have_all_privs( +# $src->dbh->dbh(), @{$src->{tbl}}{qw(db tbl)) ) { +# my $user = get_current_user($src->{Cxn}->dbh()) || ""; +# die "User $user does not have all necessary privileges on ", +# $q->quote(@{$src->{tbl}}{qw(db tbl)); +# } +# if ( !$syncer->have_all_privs( +# $dst->dbh->dbh(), @{$dst->{tbl}}{qw(db tbl)) ) { +# my $user = get_current_user($dst->{Cxn}->dbh()) || ""; +# die "User $user does not have all necessary privileges on ", +# $q->quote(@{$dst->{tbl}}{qw(db tbl)); +# } } # Check that no triggers are defined on the dst tbl. if ( $o->get('check-triggers') ) { MKDEBUG && _d('Checking for triggers'); if ( !defined $dst->{supports_triggers} ) { - $dst->{supports_triggers} = $vp->version_ge($dst->{dbh}, '5.0.2'); + $dst->{supports_triggers} = $vp->version_ge( + $dst->{Cxn}->dbh(), '5.0.2'); } if ( $dst->{supports_triggers} - && $du->get_triggers($dst->{dbh}, $q, $dst->{db}, $dst->{tbl}) ) { + && $du->get_triggers( + $dst->{Cxn}->dbh(), $q, @{$dst->{tbl}}{qw(db tbl)}) ) { die "Triggers are defined on the table"; } else { MKDEBUG && _d('Destination does not support triggers', - $dp->as_string($dst->{dsn})); + $dst->{Cxn}->name()); } } @@ -8516,29 +7951,6 @@ sub filter_diffs { } @diffs; } - -# Sub: disconnect -# Disconnect host dbhs created by . To make sure all dbh -# are closed, pt-table-sync keeps track of the dbh it opens and this -# sub helps keep track of the dbh that are closed. -# -# Parameters: -# @hosts - Array of hashrefs with host information, one for each host -sub disconnect { - my ( @hosts ) = @_; - foreach my $host ( @hosts ) { - foreach my $thing ( qw(dbh misc_dbh) ) { - my $dbh = $host->{$thing}; - next unless $dbh; - delete $dsn_for{$dbh}; - $dbh->commit() unless $dbh->{AutoCommit}; - $dbh->disconnect(); - MKDEBUG && _d('Disconnected dbh', $dbh); - } - } - return; -} - # Sub: print_sql # Callback for if --print --verbose --verbose # is specified. The callback simply prints the SQL statements passed to @@ -8554,212 +7966,21 @@ sub print_sql { return; } -use constant UPDATE_LEFT => -1; -use constant UPDATE_RIGHT => 1; -use constant UPDATE_NEITHER => 0; # neither value equals/matches -use constant FAILED_THRESHOLD => 2; # failed to exceed threshold - -# Sub: cmd_conflict_col -# Compare --conflict-column values for --bidirectional. This sub is -# used as a callback in . -# -# Parameters: -# $left_val - Column value from left (usually the source host) -# $right_val - Column value from right (usually the destination host) -# $cmp - Type of conflict comparison, --conflict-comparison -# $val - Value for certain types of comparisons, --conflict-value -# $thr - Threshold for certain types of comparisons, -# --conflict-threshold -# -# Returns: -# One of the constants above, UPDATE_* or FAILED_THRESHOLD -sub cmp_conflict_col { - my ( $left_val, $right_val, $cmp, $val, $thr ) = @_; - MKDEBUG && _d('Compare', @_); - my $res; - if ( $cmp eq 'newest' || $cmp eq 'oldest' ) { - $res = $cmp eq 'newest' ? ($left_val || '') cmp ($right_val || '') - : ($right_val || '') cmp ($left_val || ''); - - if ( $thr ) { - $thr = time_to_secs($thr); - my $lts = any_unix_timestamp($left_val); - my $rts = any_unix_timestamp($right_val); - my $diff = abs($lts - $rts); - MKDEBUG && _d('Check threshold, lts rts thr abs-diff:', - $lts, $rts, $thr, $diff); - if ( $diff < $thr ) { - MKDEBUG && _d("Failed threshold"); - return FAILED_THRESHOLD; - } - } - } - elsif ( $cmp eq 'greatest' || $cmp eq 'least' ) { - $res = $cmp eq 'greatest' ? (($left_val ||0) > ($right_val ||0) ? 1 : -1) - : (($left_val ||0) < ($right_val ||0) ? 1 : -1); - $res = 0 if ($left_val || 0) == ($right_val || 0); - if ( $thr ) { - my $diff = abs($left_val - $right_val); - MKDEBUG && _d('Check threshold, abs-diff:', $diff); - if ( $diff < $thr ) { - MKDEBUG && _d("Failed threshold"); - return FAILED_THRESHOLD; - } - } - } - elsif ( $cmp eq 'equals' ) { - $res = ($left_val || '') eq $val ? 1 - : ($right_val || '') eq $val ? -1 - : 0; - } - elsif ( $cmp eq 'matches' ) { - $res = ($left_val || '') =~ m/$val/ ? 1 - : ($right_val || '') =~ m/$val/ ? -1 - : 0; - } - else { - # Should happen; caller should have verified this. - die "Invalid comparison: $cmp"; - } - - return $res; -} - -# Sub: set_bidirectional_callbacks -# Set syncer plugin callbacks for --bidirectional. -# -# Parameters: -# %args - Arguments -# -# Required Arguments: -# plugin - TableSync* object -# OptionParser - object -sub set_bidirectional_callbacks { - my ( %args ) = @_; - foreach my $arg ( qw(plugin OptionParser) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $o = $args{OptionParser}; - my $plugin = $args{plugin}; - - my $col = $o->get('conflict-column'); - my $cmp = $o->get('conflict-comparison'); - my $val = $o->get('conflict-value'); - my $thr = $o->get('conflict-threshold'); - - # plugin and syncer are actually the same module. For clarity we - # name them differently. - - $plugin->set_callback('same_row', sub { - my ( %args ) = @_; - my ($lr, $rr, $syncer) = @args{qw(lr rr syncer)}; - my $ch = $syncer->{ChangeHandler}; - my $action = 'UPDATE'; - my $change_dbh; - my $auth_row; - my $err; - - my $left_val = $lr->{$col} || ''; - my $right_val = $rr->{$col} || ''; - MKDEBUG && _d('left', $col, 'value:', $left_val); - MKDEBUG && _d('right', $col, 'value:', $right_val); - - my $res = cmp_conflict_col($left_val, $right_val, $cmp, $val, $thr); - if ( $res == UPDATE_LEFT ) { - MKDEBUG && _d("right dbh $args{right_dbh} $cmp; " - . "update left dbh $args{left_dbh}"); - $ch->set_src('right', $args{right_dbh}); - $auth_row = $args{rr}; - $change_dbh = $args{left_dbh}; - } - elsif ( $res == UPDATE_RIGHT ) { - MKDEBUG && _d("left dbh $args{left_dbh} $cmp; " - . "update right dbh $args{right_dbh}"); - $ch->set_src('left', $args{left_dbh}); - $auth_row = $args{lr}; - $change_dbh = $args{right_dbh}; - } - elsif ( $res == UPDATE_NEITHER ) { - if ( $cmp eq 'equals' || $cmp eq 'matches' ) { - $err = "neither `$col` value $cmp $val"; - } - else { - $err = "`$col` values are the same" - } - } - elsif ( $res == FAILED_THRESHOLD ) { - $err = "`$col` values do not differ by the threhold, $thr." - } - else { - # Shouldn't happen. - die "cmp_conflict_col() returned an invalid result: $res." - } - - if ( $err ) { - $action = undef; # skip change in case we just warn - my $where = $ch->make_where_clause($lr, $syncer->key_cols()); - $err = "# Cannot resolve conflict WHERE $where: $err\n"; - - # die here is caught in sync_a_table(). We're deeply nested: - # sync_a_table > sync_table > compare_sets > syncer > here - $o->get('conflict-error') eq 'warn' ? warn $err : die $err; - } - - return $action, $auth_row, $change_dbh; - }); - - $plugin->set_callback('not_in_right', sub { - my ( %args ) = @_; - $args{syncer}->{ChangeHandler}->set_src('left', $args{left_dbh}); - return 'INSERT', $args{lr}, $args{right_dbh}; - }); - - $plugin->set_callback('not_in_left', sub { - my ( %args ) = @_; - $args{syncer}->{ChangeHandler}->set_src('right', $args{right_dbh}); - return 'INSERT', $args{rr}, $args{left_dbh}; - }); - - return; -} - -# Sub: get_plugins -# Get internal TableSync* plugins. -# -# Returns: -# Hash of available algoritms and the plugin/module names that -# implement them, like "chunk => TableSyncChunk". -sub get_plugins { - my ( %args ) = @_; - - my $file = __FILE__; - open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR"; - my $contents = do { local $/ = undef; <$fh> }; - close $fh; - - my %local_plugins = map { - my $package = $_; - my ($module, $algo) = $package =~ m/(TableSync(\w+))/; - lc $algo => $module; - } $contents =~ m/^package TableSync\w{3,};/gm; - - return %local_plugins; -} - { -# DELETE REPLACE INSERT UPDATE ALGORITHM START END EXIT DATABASE.TABLE -my $hdr = "# %6s %7s %6s %6s %-9s %-8s %-8s %-4s %s.%s\n"; +# DELETE REPLACE INSERT UPDATE START END EXIT DATABASE.TABLE +my $hdr = "# %6s %7s %6s %6s %-8s %-8s %-4s %s.%s\n"; sub print_header { my ( $title ) = @_; print "$title\n" if $title; printf $hdr, @ChangeHandler::ACTIONS, - qw(ALGORITHM START END EXIT DATABASE TABLE); + qw(START END EXIT DATABASE TABLE); return; } sub print_results { my ( @values ) = @_; + print_header(); printf $hdr, @values; return; } @@ -8886,7 +8107,7 @@ tools) and those created by bugs. With great power comes great responsibility! This tool changes data, so it is a good idea to back up your data. It is also very powerful, which means it is -very complex, so you should run it with the L<"--dry-run"> option to see what it +very complex, so you should run it with the L<"--explain"> option to see what it will do, until you're familiar with its operation. If you want to see which rows are different, without changing any data, use L<"--print"> instead of L<"--execute">. @@ -9072,72 +8293,6 @@ L<"--sync-to-master"> option so you don't change the data on the destination server. You will also need to specify C<--no-check-slave> to keep pt-table-sync from complaining that it is changing data on a slave. -=head1 ALGORITHMS - -pt-table-sync has a generic data-syncing framework which uses different -algorithms to find differences. The tool automatically chooses the best -algorithm for each table based on indexes, column types, and the algorithm -preferences specified by L<"--algorithms">. The following algorithms are -available, listed in their default order of preference: - -=over - -=item Chunk - -Finds an index whose first column is numeric (including date and time types), -and divides the column's range of values into chunks of approximately -L<"--chunk-size"> rows. Syncs a chunk at a time by checksumming the entire -chunk. If the chunk differs on the source and destination, checksums each -chunk's rows individually to find the rows that differ. - -It is efficient when the column has sufficient cardinality to make the chunks -end up about the right size. - -The initial per-chunk checksum is quite small and results in minimal network -traffic and memory consumption. If a chunk's rows must be examined, only the -primary key columns and a checksum are sent over the network, not the entire -row. If a row is found to be different, the entire row will be fetched, but not -before. - -=item Nibble - -Finds an index and ascends the index in fixed-size nibbles of L<"--chunk-size"> -rows, using a non-backtracking algorithm (see L for more on this -algorithm). It is very similar to L<"Chunk">, but instead of pre-calculating -the boundaries of each piece of the table based on index cardinality, it uses -C to define each nibble's upper limit, and the previous nibble's upper -limit to define the lower limit. - -It works in steps: one query finds the row that will define the next nibble's -upper boundary, and the next query checksums the entire nibble. If the nibble -differs between the source and destination, it examines the nibble row-by-row, -just as L<"Chunk"> does. - -=item GroupBy - -Selects the entire table grouped by all columns, with a COUNT(*) column added. -Compares all columns, and if they're the same, compares the COUNT(*) column's -value to determine how many rows to insert or delete into the destination. -Works on tables with no primary key or unique index. - -=item Stream - -Selects the entire table in one big stream and compares all columns. Selects -all columns. Much less efficient than the other algorithms, but works when -there is no suitable index for them to use. - -=item Future Plans - -Possibilities for future algorithms are TempTable (what I originally called -bottom-up in earlier versions of this tool), DrillDown (what I originally -called top-down), and GroupByPrefix (similar to how SqlYOG Job Agent works). -Each algorithm has strengths and weaknesses. If you'd like to implement your -favorite technique for finding differences between two sources of data on -possibly different servers, I'm willing to help. The algorithms adhere to a -simple interface that makes it pretty easy to write your own. - -=back - =head1 BIDIRECTIONAL SYNCING Bidirectional syncing is a new, experimental feature. To make it work @@ -9233,7 +8388,7 @@ when pt-table-sync finishes and exits. =head1 OPTIONS -Specify at least one of L<"--print">, L<"--execute">, or L<"--dry-run">. +Specify at least one of L<"--print">, L<"--execute">, or L<"--explain">. L<"--where"> and L<"--replicate"> are mutually exclusive. @@ -9242,16 +8397,6 @@ L<"SYNOPSIS"> and usage information for details. =over -=item --algorithms - -type: string; default: Chunk,Nibble,GroupBy,Stream - -Algorithm to use when comparing the tables, in order of preference. - -For each table, pt-table-sync will check if the table can be synced with -the given algorithms in the order that they're given. The first algorithm -that can sync the table is used. See L<"ALGORITHMS">. - =item --ask-pass Prompt for a password when connecting to MySQL. @@ -9284,8 +8429,7 @@ more memory on the MySQL server instead. You probably want to leave L<"--[no]buffer-to-client"> enabled too, because buffering into a temp table and then fetching it all into Perl's memory is -probably a silly thing to do. This option is most useful for the GroupBy and -Stream algorithms, which may fetch a lot of data from the server. +probably a silly thing to do. =item --[no]buffer-to-client @@ -9366,7 +8510,7 @@ Chunk the table using this index. =item --chunk-size -type: string; default: 1000 +type: size; default: 1000 Number of rows or data size per chunk. @@ -9437,8 +8581,9 @@ How to report unresolvable conflicts and conflict errors This option changes how the user is notified when a conflict cannot be resolved or causes some kind of error. Possible values are: - * warn: Print a warning to STDERR about the unresolvable conflict - * die: Die, stop syncing, and print a warning to STDERR + * ignore: Silently ignore the error, don't change the row + * warn: Print a warning to STDERR about the unresolvable conflict + * die: Die, stop syncing, and print a warning to STDERR This option only works with L<"--bidirectional">. See L<"BIDIRECTIONAL SYNCING"> for more information. @@ -9492,16 +8637,6 @@ short form: -F; type: string Only read mysql options from the given file. You must give an absolute pathname. -=item --dry-run - -Analyze, decide the sync algorithm to use, print and exit. - -Implies L<"--verbose"> so you can see the results. The results are in the same -output format that you'll see from actually running the tool, but there will be -zeros for rows affected. This is because the tool actually executes, but stops -before it compares any data and just returns zeros. The zeros do not mean there -are no changes to be made. - =item --engines short form: -e; type: hash @@ -9516,7 +8651,20 @@ This option makes pt-table-sync actually sync table data by executing all the queries that it created to resolve table differences. Therefore, B And unless you also specify L<"--verbose">, the changes will be made silently. If this is not what you want, see -L<"--print"> or L<"--dry-run">. +L<"--print"> or L<"-explain">. + +=item --explain + +cumulative: yes; default: 0; group: Output + +Print what would be done. + +Implies L<"--verbose"> so you can see the results. The results are in the same +output format that you'll see from actually running the tool, but there will be +zeros for rows affected. This is because the tool actually executes, but stops +before it compares any data and just returns zeros. The zeros do not mean there +are no changes to be made. + =item --explain-hosts @@ -9612,21 +8760,6 @@ Ignore this comma-separated list of tables. Table names may be qualified with the database name. -=item --[no]index-hint - -default: yes - -Add FORCE/USE INDEX hints to the chunk and row queries. - -By default C adds a FORCE/USE INDEX hint to each SQL statement -to coerce MySQL into using the index chosen by the sync algorithm or specified -by L<"--chunk-index">. This is usually a good thing, but in rare cases the -index may not be the best for the query so you can suppress the index hint -by specifying C<--no-index-hint> and let MySQL choose the index. - -This does not affect the queries printed by L<"--print">; it only affects the -chunk and row queries that C uses to select and compare rows. - =item --lock type: int @@ -9896,18 +9029,6 @@ type: string C clause to restrict syncing to part of the table. -=item --[no]zero-chunk - -default: yes - -Add a chunk for rows with zero or zero-equivalent values. The only has an -effect when L<"--chunk-size"> is specified. The purpose of the zero chunk -is to capture a potentially large number of zero values that would imbalance -the size of the first chunk. For example, if a lot of negative numbers were -inserted into an unsigned integer column causing them to be stored as zeros, -then these zero values are captured by the zero chunk instead of the first -chunk and all its non-zero values. - =back =head1 DSN OPTIONS diff --git a/t/pt-table-sync/basics.t b/t/pt-table-sync/basics.t index f4c74a19..534ca27f 100644 --- a/t/pt-table-sync/basics.t +++ b/t/pt-table-sync/basics.t @@ -82,46 +82,6 @@ is_deeply( 'Synced OK with no alg' ); -$sb->load_file('master', 't/pt-table-sync/samples/before.sql'); -$output = run('test1', 'test2', '--algorithms Stream --no-bin-log'); -is($output, "INSERT INTO `test`.`test2`(`a`, `b`) VALUES ('1', 'en'); -INSERT INTO `test`.`test2`(`a`, `b`) VALUES ('2', 'ca');", 'Basic Stream sync'); -is_deeply( - query_slave('select * from test.test2'), - [ { a => 1, b => 'en' }, { a => 2, b => 'ca' } ], - 'Synced OK with Stream' -); - -$sb->load_file('master', 't/pt-table-sync/samples/before.sql'); -$output = run('test1', 'test2', '--algorithms GroupBy --no-bin-log'); -is($output, "INSERT INTO `test`.`test2`(`a`, `b`) VALUES ('1', 'en'); -INSERT INTO `test`.`test2`(`a`, `b`) VALUES ('2', 'ca');", 'Basic GroupBy sync'); -is_deeply( - query_slave('select * from test.test2'), - [ { a => 1, b => 'en' }, { a => 2, b => 'ca' } ], - 'Synced OK with GroupBy' -); - -$sb->load_file('master', 't/pt-table-sync/samples/before.sql'); -$output = run('test1', 'test2', '--algorithms Chunk,GroupBy --no-bin-log'); -is($output, "INSERT INTO `test`.`test2`(`a`, `b`) VALUES ('1', 'en'); -INSERT INTO `test`.`test2`(`a`, `b`) VALUES ('2', 'ca');", 'Basic Chunk sync'); -is_deeply( - query_slave('select * from test.test2'), - [ { a => 1, b => 'en' }, { a => 2, b => 'ca' } ], - 'Synced OK with Chunk' -); - -$sb->load_file('master', 't/pt-table-sync/samples/before.sql'); -$output = run('test1', 'test2', '--algorithms Nibble --no-bin-log'); -is($output, "INSERT INTO `test`.`test2`(`a`, `b`) VALUES ('1', 'en'); -INSERT INTO `test`.`test2`(`a`, `b`) VALUES ('2', 'ca');", 'Basic Nibble sync'); -is_deeply( - query_slave('select * from test.test2'), - [ { a => 1, b => 'en' }, { a => 2, b => 'ca' } ], - 'Synced OK with Nibble' -); - # Save original MKDEBUG env because we modify it below. my $dbg = $ENV{MKDEBUG}; @@ -156,12 +116,12 @@ like( ); like( $output, - qr/2 Chunk\s+\S+\s+\S+\s+2\s+test.test3/, + qr/2\s+\S+\s+\S+\s+2\s+test.test3/, 'Right number of rows to update', ); # Sync a table with Nibble and a chunksize in data size, not number of rows -$output = run('test3', 'test4', '--algorithms Nibble --chunk-size 1k --print --verbose --function MD5'); +$output = run('test3', 'test4', '--chunk-size 1k --print --no-bin-log --verbose --function MD5'); # If it lived, it's OK. ok($output, 'Synced with Nibble and data-size chunksize'); diff --git a/t/pt-table-sync/option_sanity.t b/t/pt-table-sync/option_sanity.t index 7f552360..c3a7d703 100644 --- a/t/pt-table-sync/option_sanity.t +++ b/t/pt-table-sync/option_sanity.t @@ -19,13 +19,13 @@ my $output; # ############################################################################# -# Issue 111: Make mk-table-sync require --print or --execute or --dry-run +# Issue 111: Make mk-table-sync require --print or --execute or --explain # ############################################################################# # This test reuses the test.message table created above for issue 22. $output = `$trunk/bin/pt-table-sync h=127.1,P=12345,u=msandbox,p=msandbox,D=test,t=messages P=12346`; -like($output, qr/Specify at least one of --print, --execute or --dry-run/, - 'Requires --print, --execute or --dry-run'); +like($output, qr/Specify at least one of --print, --execute or --explain/, + 'Requires --print, --execute or --explain'); # ############################################################################# # Don't let people try to restrict syncing with D=foo