mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-01 18:25:59 +00:00
256 lines
10 KiB
Perl
256 lines
10 KiB
Perl
# This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Ireland Ltd.
|
|
# Feedback and improvements are welcome.
|
|
#
|
|
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
|
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
|
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify it under
|
|
# the terms of the GNU General Public License as published by the Free Software
|
|
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
|
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
|
# licenses.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along with
|
|
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
|
# Place, Suite 330, Boston, MA 02111-1307 USA.
|
|
# ###########################################################################
|
|
# RowDiff package
|
|
# ###########################################################################
|
|
{
|
|
# Package: RowDiff
|
|
# RowDiff compares two sets of rows to find ones that are different.
|
|
package RowDiff;
|
|
|
|
use strict;
|
|
use warnings FATAL => 'all';
|
|
use English qw(-no_match_vars);
|
|
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
|
|
|
# Required args:
|
|
# * dbh obj: dbh used for collation-specific string comparisons
|
|
# Optional args:
|
|
# * same_row Callback when rows are identical
|
|
# * not_in_left Callback when right row is not in the left
|
|
# * not_in_right Callback when left row is not in the right
|
|
# * key_cmp Callback when a column value differs
|
|
# * done Callback that stops compare_sets() if it returns true
|
|
# * trf Callback to transform numeric values before comparison
|
|
sub new {
|
|
my ( $class, %args ) = @_;
|
|
die "I need a dbh" unless $args{dbh};
|
|
my $self = { %args };
|
|
return bless $self, $class;
|
|
}
|
|
|
|
# Arguments:
|
|
# * left_sth obj: sth
|
|
# * right_sth obj: sth
|
|
# * syncer obj: TableSync* module
|
|
# * tbl_struct hashref: table struct from TableParser::parser()
|
|
# Iterates through two sets of rows and finds differences. Calls various
|
|
# methods on the $syncer object when it finds differences, passing these
|
|
# args and hashrefs to the differing rows ($lr and $rr).
|
|
sub compare_sets {
|
|
my ( $self, %args ) = @_;
|
|
my @required_args = qw(left_sth right_sth syncer tbl_struct);
|
|
foreach my $arg ( @required_args ) {
|
|
die "I need a $arg argument" unless defined $args{$arg};
|
|
}
|
|
my $left_sth = $args{left_sth};
|
|
my $right_sth = $args{right_sth};
|
|
my $syncer = $args{syncer};
|
|
my $tbl_struct = $args{tbl_struct};
|
|
|
|
my ($lr, $rr); # Current row from the left/right sths.
|
|
$args{key_cols} = $syncer->key_cols(); # for key_cmp()
|
|
|
|
# We have to manually track if the left or right sth is done
|
|
# fetching rows because sth->{Active} is always true with
|
|
# DBD::mysql v3. And we cannot simply while ( $lr || $rr )
|
|
# because in the case where left and right have the same key,
|
|
# we do this:
|
|
# $lr = $rr = undef; # Fetch another row from each side.
|
|
# Unsetting both $lr and $rr there would cause while () to
|
|
# terminate. (And while ( $lr && $rr ) is not what we want
|
|
# either.) Furthermore, we need to avoid trying to fetch more
|
|
# rows if there are none to fetch because doing this would
|
|
# cause a DBI error ("fetch without execute"). That's why we
|
|
# make these checks:
|
|
# if ( !$lr && !$left_done )
|
|
# if ( !$rr && !$right_done )
|
|
# If you make changes here, be sure to test both RowDiff.t
|
|
# and RowDiff-custom.t. Look inside the later to see what
|
|
# is custom about it.
|
|
my $left_done = 0;
|
|
my $right_done = 0;
|
|
my $done = $self->{done};
|
|
|
|
do {
|
|
if ( !$lr && !$left_done ) {
|
|
PTDEBUG && _d('Fetching row from left');
|
|
eval { $lr = $left_sth->fetchrow_hashref(); };
|
|
PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
|
|
$left_done = !$lr || $EVAL_ERROR ? 1 : 0;
|
|
}
|
|
elsif ( PTDEBUG ) {
|
|
_d('Left still has rows');
|
|
}
|
|
|
|
if ( !$rr && !$right_done ) {
|
|
PTDEBUG && _d('Fetching row from right');
|
|
eval { $rr = $right_sth->fetchrow_hashref(); };
|
|
PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR);
|
|
$right_done = !$rr || $EVAL_ERROR ? 1 : 0;
|
|
}
|
|
elsif ( PTDEBUG ) {
|
|
_d('Right still has rows');
|
|
}
|
|
|
|
my $cmp;
|
|
if ( $lr && $rr ) {
|
|
$cmp = $self->key_cmp(%args, lr => $lr, rr => $rr);
|
|
PTDEBUG && _d('Key comparison on left and right:', $cmp);
|
|
}
|
|
if ( $lr || $rr ) {
|
|
# If the current row is the "same row" on both sides, meaning the two
|
|
# rows have the same key, check the contents of the row to see if
|
|
# they're the same.
|
|
if ( $lr && $rr && defined $cmp && $cmp == 0 ) {
|
|
PTDEBUG && _d('Left and right have the same key');
|
|
$syncer->same_row(%args, lr => $lr, rr => $rr);
|
|
$self->{same_row}->(%args, lr => $lr, rr => $rr)
|
|
if $self->{same_row};
|
|
$lr = $rr = undef; # Fetch another row from each side.
|
|
}
|
|
# The row in the left doesn't exist in the right.
|
|
elsif ( !$rr || ( defined $cmp && $cmp < 0 ) ) {
|
|
PTDEBUG && _d('Left is not in right');
|
|
$syncer->not_in_right(%args, lr => $lr, rr => $rr);
|
|
$self->{not_in_right}->(%args, lr => $lr, rr => $rr)
|
|
if $self->{not_in_right};
|
|
$lr = undef;
|
|
}
|
|
# Symmetric to the above.
|
|
else {
|
|
PTDEBUG && _d('Right is not in left');
|
|
$syncer->not_in_left(%args, lr => $lr, rr => $rr);
|
|
$self->{not_in_left}->(%args, lr => $lr, rr => $rr)
|
|
if $self->{not_in_left};
|
|
$rr = undef;
|
|
}
|
|
}
|
|
$left_done = $right_done = 1 if $done && $done->(%args);
|
|
} while ( !($left_done && $right_done) );
|
|
PTDEBUG && _d('No more rows');
|
|
$syncer->done_with_rows();
|
|
}
|
|
|
|
# Compare two rows to determine how they should be ordered. NULL sorts before
|
|
# defined values in MySQL, so I consider undef "less than." Numbers are easy to
|
|
# compare. Otherwise string comparison is tricky. This function must match
|
|
# MySQL exactly or the merge algorithm runs off the rails, so when in doubt I
|
|
# ask MySQL to compare strings for me. I can handle numbers and "normal" latin1
|
|
# characters without asking MySQL. See
|
|
# http://dev.mysql.com/doc/refman/5.0/en/charset-literal.html. $r1 and $r2 are
|
|
# row hashrefs. $key_cols is an arrayref of the key columns to compare. $tbl is the
|
|
# structure returned by TableParser. The result matches Perl's cmp or <=>
|
|
# operators:
|
|
# 1 cmp 0 => 1
|
|
# 1 cmp 1 => 0
|
|
# 1 cmp 2 => -1
|
|
# TODO: must generate the comparator function dynamically for speed, so we don't
|
|
# have to check the type of columns constantly
|
|
sub key_cmp {
|
|
my ( $self, %args ) = @_;
|
|
my @required_args = qw(lr rr key_cols tbl_struct);
|
|
foreach my $arg ( @required_args ) {
|
|
die "I need a $arg argument" unless exists $args{$arg};
|
|
}
|
|
my ($lr, $rr, $key_cols, $tbl_struct) = @args{@required_args};
|
|
PTDEBUG && _d('Comparing keys using columns:', join(',', @$key_cols));
|
|
|
|
# Optional callbacks.
|
|
my $callback = $self->{key_cmp};
|
|
my $trf = $self->{trf};
|
|
|
|
foreach my $col ( @$key_cols ) {
|
|
my $l = $lr->{$col};
|
|
my $r = $rr->{$col};
|
|
if ( !defined $l || !defined $r ) {
|
|
PTDEBUG && _d($col, 'is not defined in both rows');
|
|
return defined $l ? 1 : defined $r ? -1 : 0;
|
|
}
|
|
else {
|
|
if ( $tbl_struct->{is_numeric}->{$col} ) { # Numeric column
|
|
PTDEBUG && _d($col, 'is numeric');
|
|
($l, $r) = $trf->($l, $r, $tbl_struct, $col) if $trf;
|
|
my $cmp = $l <=> $r;
|
|
if ( $cmp ) {
|
|
PTDEBUG && _d('Column', $col, 'differs:', $l, '!=', $r);
|
|
$callback->($col, $l, $r) if $callback;
|
|
return $cmp;
|
|
}
|
|
}
|
|
# Do case-sensitive cmp, expecting most will be eq. If that fails, try
|
|
# a case-insensitive cmp if possible; otherwise ask MySQL how to sort.
|
|
elsif ( $l ne $r ) {
|
|
my $cmp;
|
|
my $coll = $tbl_struct->{collation_for}->{$col};
|
|
if ( $coll && ( $coll ne 'latin1_swedish_ci'
|
|
|| $l =~ m/[^\040-\177]/ || $r =~ m/[^\040-\177]/) )
|
|
{
|
|
PTDEBUG && _d('Comparing', $col, 'via MySQL');
|
|
$cmp = $self->db_cmp($coll, $l, $r);
|
|
}
|
|
else {
|
|
PTDEBUG && _d('Comparing', $col, 'in lowercase');
|
|
$cmp = lc $l cmp lc $r;
|
|
}
|
|
if ( $cmp ) {
|
|
PTDEBUG && _d('Column', $col, 'differs:', $l, 'ne', $r);
|
|
$callback->($col, $l, $r) if $callback;
|
|
return $cmp;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
sub db_cmp {
|
|
my ( $self, $collation, $l, $r ) = @_;
|
|
if ( !$self->{sth}->{$collation} ) {
|
|
if ( !$self->{charset_for} ) {
|
|
PTDEBUG && _d('Fetching collations from MySQL');
|
|
my @collations = @{$self->{dbh}->selectall_arrayref(
|
|
'SHOW COLLATION', {Slice => { collation => 1, charset => 1 }})};
|
|
foreach my $collation ( @collations ) {
|
|
$self->{charset_for}->{$collation->{collation}}
|
|
= $collation->{charset};
|
|
}
|
|
}
|
|
my $sql = "SELECT STRCMP(_$self->{charset_for}->{$collation}? COLLATE $collation, "
|
|
. "_$self->{charset_for}->{$collation}? COLLATE $collation) AS res";
|
|
PTDEBUG && _d($sql);
|
|
$self->{sth}->{$collation} = $self->{dbh}->prepare($sql);
|
|
}
|
|
my $sth = $self->{sth}->{$collation};
|
|
$sth->execute($l, $r);
|
|
return $sth->fetchall_arrayref()->[0]->[0];
|
|
}
|
|
|
|
sub _d {
|
|
my ($package, undef, $line) = caller 0;
|
|
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
|
map { defined $_ ? $_ : 'undef' }
|
|
@_;
|
|
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
|
}
|
|
|
|
1;
|
|
}
|
|
# ###########################################################################
|
|
# End RowDiff package
|
|
# ###########################################################################
|