From 06b4928093e2f016061d5da6b722a426f4a91f73 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Sun, 11 Sep 2011 12:36:27 -0600 Subject: [PATCH] First working but unfinished pt-table-checksum 2.0. --- bin/pt-table-checksum | 5826 ++++++++++++++--------------------------- 1 file changed, 2018 insertions(+), 3808 deletions(-) diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum index 0d55dfa2..e3aa10d4 100755 --- a/bin/pt-table-checksum +++ b/bin/pt-table-checksum @@ -9,15 +9,15 @@ use warnings FATAL => 'all'; use constant MKDEBUG => $ENV{MKDEBUG} || 0; # ########################################################################### -# TableParser package +# DSNParser package # This package is a copy without comments from the original. The original # with comments and its test file can be found in the Bazaar repository at, -# lib/TableParser.pm -# t/lib/TableParser.t +# lib/DSNParser.pm +# t/lib/DSNParser.t # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### { -package TableParser; +package DSNParser; use strict; use warnings FATAL => 'all'; @@ -25,757 +25,329 @@ use English qw(-no_match_vars); use constant MKDEBUG => $ENV{MKDEBUG} || 0; use Data::Dumper; -$Data::Dumper::Indent = 1; -$Data::Dumper::Sortkeys = 1; +$Data::Dumper::Indent = 0; $Data::Dumper::Quotekeys = 0; +eval { + require DBI; +}; +my $have_dbi = $EVAL_ERROR ? 0 : 1; + sub new { my ( $class, %args ) = @_; - my @required_args = qw(Quoter); - foreach my $arg ( @required_args ) { + foreach my $arg ( qw(opts) ) { die "I need a $arg argument" unless $args{$arg}; } - my $self = { %args }; + my $self = { + opts => {} # h, P, u, etc. Should come from DSN OPTIONS section in POD. + }; + foreach my $opt ( @{$args{opts}} ) { + if ( !$opt->{key} || !$opt->{desc} ) { + die "Invalid DSN option: ", Dumper($opt); + } + MKDEBUG && _d('DSN option:', + join(', ', + map { "$_=" . (defined $opt->{$_} ? ($opt->{$_} || '') : 'undef') } + keys %$opt + ) + ); + $self->{opts}->{$opt->{key}} = { + dsn => $opt->{dsn}, + desc => $opt->{desc}, + copy => $opt->{copy} || 0, + }; + } return bless $self, $class; } +sub prop { + my ( $self, $prop, $value ) = @_; + if ( @_ > 2 ) { + MKDEBUG && _d('Setting', $prop, 'property'); + $self->{$prop} = $value; + } + return $self->{$prop}; +} + sub parse { - my ( $self, $ddl, $opts ) = @_; - return unless $ddl; - if ( ref $ddl eq 'ARRAY' ) { - if ( lc $ddl->[0] eq 'table' ) { - $ddl = $ddl->[1]; + my ( $self, $dsn, $prev, $defaults ) = @_; + if ( !$dsn ) { + MKDEBUG && _d('No DSN to parse'); + return; + } + MKDEBUG && _d('Parsing', $dsn); + $prev ||= {}; + $defaults ||= {}; + my %given_props; + my %final_props; + my $opts = $self->{opts}; + + foreach my $dsn_part ( split(/,/, $dsn) ) { + if ( my ($prop_key, $prop_val) = $dsn_part =~ m/^(.)=(.*)$/ ) { + $given_props{$prop_key} = $prop_val; } else { - return { - engine => 'VIEW', - }; + MKDEBUG && _d('Interpreting', $dsn_part, 'as h=', $dsn_part); + $given_props{h} = $dsn_part; } } - if ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) { - die "Cannot parse table definition; is ANSI quoting " - . "enabled or SQL_QUOTE_SHOW_CREATE disabled?"; - } - - my ($name) = $ddl =~ m/CREATE (?:TEMPORARY )?TABLE\s+(`.+?`)/; - (undef, $name) = $self->{Quoter}->split_unquote($name) if $name; - - $ddl =~ s/(`[^`]+`)/\L$1/g; - - my $engine = $self->get_engine($ddl); - - my @defs = $ddl =~ m/^(\s+`.*?),?$/gm; - my @cols = map { $_ =~ m/`([^`]+)`/ } @defs; - MKDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols)); - - my %def_for; - @def_for{@cols} = @defs; - - my (@nums, @null); - my (%type_for, %is_nullable, %is_numeric, %is_autoinc); - foreach my $col ( @cols ) { - my $def = $def_for{$col}; - my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/; - die "Can't determine column type for $def" unless $type; - $type_for{$col} = $type; - if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) { - push @nums, $col; - $is_numeric{$col} = 1; - } - if ( $def !~ m/NOT NULL/ ) { - push @null, $col; - $is_nullable{$col} = 1; - } - $is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0; - } - - my ($keys, $clustered_key) = $self->get_keys($ddl, $opts, \%is_nullable); - - my ($charset) = $ddl =~ m/DEFAULT CHARSET=(\w+)/; - - return { - name => $name, - cols => \@cols, - col_posn => { map { $cols[$_] => $_ } 0..$#cols }, - is_col => { map { $_ => 1 } @cols }, - null_cols => \@null, - is_nullable => \%is_nullable, - is_autoinc => \%is_autoinc, - clustered_key => $clustered_key, - keys => $keys, - defs => \%def_for, - numeric_cols => \@nums, - is_numeric => \%is_numeric, - engine => $engine, - type_for => \%type_for, - charset => $charset, - }; -} - -sub sort_indexes { - my ( $self, $tbl ) = @_; - - my @indexes - = sort { - (($a ne 'PRIMARY') <=> ($b ne 'PRIMARY')) - || ( !$tbl->{keys}->{$a}->{is_unique} <=> !$tbl->{keys}->{$b}->{is_unique} ) - || ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} ) - || ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) ) - } - grep { - $tbl->{keys}->{$_}->{type} eq 'BTREE' - } - sort keys %{$tbl->{keys}}; - - MKDEBUG && _d('Indexes sorted best-first:', join(', ', @indexes)); - return @indexes; -} - -sub find_best_index { - my ( $self, $tbl, $index ) = @_; - my $best; - if ( $index ) { - ($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}}; - } - if ( !$best ) { - if ( $index ) { - die "Index '$index' does not exist in table"; - } - else { - ($best) = $self->sort_indexes($tbl); - } - } - MKDEBUG && _d('Best index found is', $best); - return $best; -} - -sub find_possible_keys { - my ( $self, $dbh, $database, $table, $quoter, $where ) = @_; - return () unless $where; - my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table) - . ' WHERE ' . $where; - MKDEBUG && _d($sql); - my $expl = $dbh->selectrow_hashref($sql); - $expl = { map { lc($_) => $expl->{$_} } keys %$expl }; - if ( $expl->{possible_keys} ) { - MKDEBUG && _d('possible_keys =', $expl->{possible_keys}); - my @candidates = split(',', $expl->{possible_keys}); - my %possible = map { $_ => 1 } @candidates; - if ( $expl->{key} ) { - MKDEBUG && _d('MySQL chose', $expl->{key}); - unshift @candidates, grep { $possible{$_} } split(',', $expl->{key}); - MKDEBUG && _d('Before deduping:', join(', ', @candidates)); - my %seen; - @candidates = grep { !$seen{$_}++ } @candidates; - } - MKDEBUG && _d('Final list:', join(', ', @candidates)); - return @candidates; - } - else { - MKDEBUG && _d('No keys in possible_keys'); - return (); - } -} - -sub check_table { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ($dbh, $db, $tbl) = @args{@required_args}; - my $q = $self->{Quoter}; - my $db_tbl = $q->quote($db, $tbl); - MKDEBUG && _d('Checking', $db_tbl); - - my $sql = "SHOW TABLES FROM " . $q->quote($db) - . ' LIKE ' . $q->literal_like($tbl); - MKDEBUG && _d($sql); - my $row; - eval { - $row = $dbh->selectrow_arrayref($sql); - }; - if ( $EVAL_ERROR ) { - MKDEBUG && _d($EVAL_ERROR); - return 0; - } - if ( !$row->[0] || $row->[0] ne $tbl ) { - MKDEBUG && _d('Table does not exist'); - return 0; - } - - MKDEBUG && _d('Table exists; no privs to check'); - return 1 unless $args{all_privs}; - - $sql = "SHOW FULL COLUMNS FROM $db_tbl"; - MKDEBUG && _d($sql); - eval { - $row = $dbh->selectrow_hashref($sql); - }; - if ( $EVAL_ERROR ) { - MKDEBUG && _d($EVAL_ERROR); - return 0; - } - if ( !scalar keys %$row ) { - MKDEBUG && _d('Table has no columns:', Dumper($row)); - return 0; - } - my $privs = $row->{privileges} || $row->{Privileges}; - - $sql = "DELETE FROM $db_tbl LIMIT 0"; - MKDEBUG && _d($sql); - eval { - $dbh->do($sql); - }; - my $can_delete = $EVAL_ERROR ? 0 : 1; - - MKDEBUG && _d('User privs on', $db_tbl, ':', $privs, - ($can_delete ? 'delete' : '')); - - if ( !($privs =~ m/select/ && $privs =~ m/insert/ && $privs =~ m/update/ - && $can_delete) ) { - MKDEBUG && _d('User does not have all privs'); - return 0; - } - - MKDEBUG && _d('User has all privs'); - return 1; -} - -sub get_engine { - my ( $self, $ddl, $opts ) = @_; - my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/; - MKDEBUG && _d('Storage engine:', $engine); - return $engine || undef; -} - -sub get_keys { - my ( $self, $ddl, $opts, $is_nullable ) = @_; - my $engine = $self->get_engine($ddl); - my $keys = {}; - my $clustered_key = undef; - - KEY: - foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) { - - next KEY if $key =~ m/FOREIGN/; - - my $key_ddl = $key; - MKDEBUG && _d('Parsed key:', $key_ddl); - - if ( $engine !~ m/MEMORY|HEAP/ ) { - $key =~ s/USING HASH/USING BTREE/; - } - - my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/; - my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/; - $type = $type || $special || 'BTREE'; - if ( $opts->{mysql_version} && $opts->{mysql_version} lt '004001000' - && $engine =~ m/HEAP|MEMORY/i ) + foreach my $key ( keys %$opts ) { + MKDEBUG && _d('Finding value for', $key); + $final_props{$key} = $given_props{$key}; + if ( !defined $final_props{$key} + && defined $prev->{$key} && $opts->{$key}->{copy} ) { - $type = 'HASH'; # MySQL pre-4.1 supports only HASH indexes on HEAP + $final_props{$key} = $prev->{$key}; + MKDEBUG && _d('Copying value for', $key, 'from previous DSN'); } - - my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/; - my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0; - my @cols; - my @col_prefixes; - foreach my $col_def ( $cols =~ m/`[^`]+`(?:\(\d+\))?/g ) { - my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/; - push @cols, $name; - push @col_prefixes, $prefix; - } - $name =~ s/`//g; - - MKDEBUG && _d( $name, 'key cols:', join(', ', map { "`$_`" } @cols)); - - $keys->{$name} = { - name => $name, - type => $type, - colnames => $cols, - cols => \@cols, - col_prefixes => \@col_prefixes, - is_unique => $unique, - is_nullable => scalar(grep { $is_nullable->{$_} } @cols), - is_col => { map { $_ => 1 } @cols }, - ddl => $key_ddl, - }; - - if ( $engine =~ m/InnoDB/i && !$clustered_key ) { - my $this_key = $keys->{$name}; - if ( $this_key->{name} eq 'PRIMARY' ) { - $clustered_key = 'PRIMARY'; - } - elsif ( $this_key->{is_unique} && !$this_key->{is_nullable} ) { - $clustered_key = $this_key->{name}; - } - MKDEBUG && $clustered_key && _d('This key is the clustered key'); + if ( !defined $final_props{$key} ) { + $final_props{$key} = $defaults->{$key}; + MKDEBUG && _d('Copying value for', $key, 'from defaults'); } } - return $keys, $clustered_key; -} - -sub get_fks { - my ( $self, $ddl, $opts ) = @_; - my $q = $self->{Quoter}; - my $fks = {}; - - foreach my $fk ( - $ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg ) - { - my ( $name ) = $fk =~ m/CONSTRAINT `(.*?)`/; - my ( $cols ) = $fk =~ m/FOREIGN KEY \(([^\)]+)\)/; - my ( $parent, $parent_cols ) = $fk =~ m/REFERENCES (\S+) \(([^\)]+)\)/; - - my ($db, $tbl) = $q->split_unquote($parent, $opts->{database}); - my %parent_tbl = (tbl => $tbl); - $parent_tbl{db} = $db if $db; - - if ( $parent !~ m/\./ && $opts->{database} ) { - $parent = $q->quote($opts->{database}) . ".$parent"; + foreach my $key ( keys %given_props ) { + die "Unknown DSN option '$key' in '$dsn'. For more details, " + . "please use the --help option, or try 'perldoc $PROGRAM_NAME' " + . "for complete documentation." + unless exists $opts->{$key}; + } + if ( (my $required = $self->prop('required')) ) { + foreach my $key ( keys %$required ) { + die "Missing required DSN option '$key' in '$dsn'. For more details, " + . "please use the --help option, or try 'perldoc $PROGRAM_NAME' " + . "for complete documentation." + unless $final_props{$key}; } - - $fks->{$name} = { - name => $name, - colnames => $cols, - cols => [ map { s/[ `]+//g; $_; } split(',', $cols) ], - parent_tbl => \%parent_tbl, - parent_tblname => $parent, - parent_cols => [ map { s/[ `]+//g; $_; } split(',', $parent_cols) ], - parent_colnames=> $parent_cols, - ddl => $fk, - }; } - return $fks; + return \%final_props; } -sub remove_auto_increment { - my ( $self, $ddl ) = @_; - $ddl =~ s/(^\).*?) AUTO_INCREMENT=\d+\b/$1/m; - return $ddl; +sub parse_options { + my ( $self, $o ) = @_; + die 'I need an OptionParser object' unless ref $o eq 'OptionParser'; + my $dsn_string + = join(',', + map { "$_=".$o->get($_); } + grep { $o->has($_) && $o->get($_) } + keys %{$self->{opts}} + ); + MKDEBUG && _d('DSN string made from options:', $dsn_string); + return $self->parse($dsn_string); } -sub remove_secondary_indexes { - my ( $self, $ddl ) = @_; - my $sec_indexes_ddl; - my $tbl_struct = $self->parse($ddl); +sub as_string { + my ( $self, $dsn, $props ) = @_; + return $dsn unless ref $dsn; + my %allowed = $props ? map { $_=>1 } @$props : (); + return join(',', + map { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) } + grep { defined $dsn->{$_} && $self->{opts}->{$_} } + grep { !$props || $allowed{$_} } + sort keys %$dsn ); +} - if ( ($tbl_struct->{engine} || '') =~ m/InnoDB/i ) { - my $clustered_key = $tbl_struct->{clustered_key}; - $clustered_key ||= ''; +sub usage { + my ( $self ) = @_; + my $usage + = "DSN syntax is key=value[,key=value...] Allowable DSN keys:\n\n" + . " KEY COPY MEANING\n" + . " === ==== =============================================\n"; + my %opts = %{$self->{opts}}; + foreach my $key ( sort keys %opts ) { + $usage .= " $key " + . ($opts{$key}->{copy} ? 'yes ' : 'no ') + . ($opts{$key}->{desc} || '[No description]') + . "\n"; + } + $usage .= "\n If the DSN is a bareword, the word is treated as the 'h' key.\n"; + return $usage; +} - my @sec_indexes = map { - my $key_def = $_->{ddl}; - $key_def =~ s/([\(\)])/\\$1/g; - $ddl =~ s/\s+$key_def//i; - - my $key_ddl = "ADD $_->{ddl}"; - $key_ddl .= ',' unless $key_ddl =~ m/,$/; - $key_ddl; - } - grep { $_->{name} ne $clustered_key } - values %{$tbl_struct->{keys}}; - MKDEBUG && _d('Secondary indexes:', Dumper(\@sec_indexes)); - - if ( @sec_indexes ) { - $sec_indexes_ddl = join(' ', @sec_indexes); - $sec_indexes_ddl =~ s/,$//; - } - - $ddl =~ s/,(\n\) )/$1/s; +sub get_cxn_params { + my ( $self, $info ) = @_; + my $dsn; + my %opts = %{$self->{opts}}; + my $driver = $self->prop('dbidriver') || ''; + if ( $driver eq 'Pg' ) { + $dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';' + . join(';', map { "$opts{$_}->{dsn}=$info->{$_}" } + grep { defined $info->{$_} } + qw(h P)); } else { - MKDEBUG && _d('Not removing secondary indexes from', - $tbl_struct->{engine}, 'table'); + $dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';' + . join(';', map { "$opts{$_}->{dsn}=$info->{$_}" } + grep { defined $info->{$_} } + qw(F h P S A)) + . ';mysql_read_default_group=client'; } - - return $ddl, $sec_indexes_ddl, $tbl_struct; + MKDEBUG && _d($dsn); + return ($dsn, $info->{u}, $info->{p}); } -sub _d { - my ($package, undef, $line) = caller 0; - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } - map { defined $_ ? $_ : 'undef' } - @_; - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; +sub fill_in_dsn { + my ( $self, $dbh, $dsn ) = @_; + my $vars = $dbh->selectall_hashref('SHOW VARIABLES', 'Variable_name'); + my ($user, $db) = $dbh->selectrow_array('SELECT USER(), DATABASE()'); + $user =~ s/@.*//; + $dsn->{h} ||= $vars->{hostname}->{Value}; + $dsn->{S} ||= $vars->{'socket'}->{Value}; + $dsn->{P} ||= $vars->{port}->{Value}; + $dsn->{u} ||= $user; + $dsn->{D} ||= $db; } -1; -} -# ########################################################################### -# End TableParser package -# ########################################################################### - -# ########################################################################### -# TableChecksum package -# This package is a copy without comments from the original. The original -# with comments and its test file can be found in the Bazaar repository at, -# lib/TableChecksum.pm -# t/lib/TableChecksum.t -# See https://launchpad.net/percona-toolkit for more information. -# ########################################################################### -{ -package TableChecksum; - -use strict; -use warnings FATAL => 'all'; -use English qw(-no_match_vars); -use constant MKDEBUG => $ENV{MKDEBUG} || 0; - -use List::Util qw(max); - -our %ALGOS = ( - CHECKSUM => { pref => 0, hash => 0 }, - BIT_XOR => { pref => 2, hash => 1 }, - ACCUM => { pref => 3, hash => 1 }, -); - -sub new { - my ( $class, %args ) = @_; - foreach my $arg ( qw(Quoter VersionParser) ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - my $self = { %args }; - return bless $self, $class; -} - -sub crc32 { - my ( $self, $string ) = @_; - my $poly = 0xEDB88320; - my $crc = 0xFFFFFFFF; - foreach my $char ( split(//, $string) ) { - my $comp = ($crc ^ ord($char)) & 0xFF; - for ( 1 .. 8 ) { - $comp = $comp & 1 ? $poly ^ ($comp >> 1) : $comp >> 1; - } - $crc = (($crc >> 8) & 0x00FFFFFF) ^ $comp; - } - return $crc ^ 0xFFFFFFFF; -} - -sub get_crc_wid { - my ( $self, $dbh, $func ) = @_; - my $crc_wid = 16; - if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) { - eval { - my ($val) = $dbh->selectrow_array("SELECT $func('a')"); - $crc_wid = max(16, length($val)); - }; - } - return $crc_wid; -} - -sub get_crc_type { - my ( $self, $dbh, $func ) = @_; - my $type = ''; - my $length = 0; - my $sql = "SELECT $func('a')"; - my $sth = $dbh->prepare($sql); - eval { - $sth->execute(); - $type = $sth->{mysql_type_name}->[0]; - $length = $sth->{mysql_length}->[0]; - MKDEBUG && _d($sql, $type, $length); - if ( $type eq 'bigint' && $length < 20 ) { - $type = 'int'; - } +sub get_dbh { + my ( $self, $cxn_string, $user, $pass, $opts ) = @_; + $opts ||= {}; + my $defaults = { + AutoCommit => 0, + RaiseError => 1, + PrintError => 0, + ShowErrorStatement => 1, + mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/i ? 1 : 0), }; - $sth->finish; - MKDEBUG && _d('crc_type:', $type, 'length:', $length); - return ($type, $length); -} + @{$defaults}{ keys %$opts } = values %$opts; -sub best_algorithm { - my ( $self, %args ) = @_; - my ( $alg, $dbh ) = @args{ qw(algorithm dbh) }; - my $vp = $self->{VersionParser}; - my @choices = sort { $ALGOS{$a}->{pref} <=> $ALGOS{$b}->{pref} } keys %ALGOS; - die "Invalid checksum algorithm $alg" - if $alg && !$ALGOS{$alg}; - - if ( - $args{where} || $args{chunk} # CHECKSUM does whole table - || $args{replicate} # CHECKSUM can't do INSERT.. SELECT - || !$vp->version_ge($dbh, '4.1.1')) # CHECKSUM doesn't exist - { - MKDEBUG && _d('Cannot use CHECKSUM algorithm'); - @choices = grep { $_ ne 'CHECKSUM' } @choices; + if ( $opts->{mysql_use_result} ) { + $defaults->{mysql_use_result} = 1; } - if ( !$vp->version_ge($dbh, '4.1.1') ) { - MKDEBUG && _d('Cannot use BIT_XOR algorithm because MySQL < 4.1.1'); - @choices = grep { $_ ne 'BIT_XOR' } @choices; + if ( !$have_dbi ) { + die "Cannot connect to MySQL because the Perl DBI module is not " + . "installed or not found. Run 'perl -MDBI' to see the directories " + . "that Perl searches for DBI. If DBI is not installed, try:\n" + . " Debian/Ubuntu apt-get install libdbi-perl\n" + . " RHEL/CentOS yum install perl-DBI\n" + . " OpenSolaris pgk install pkg:/SUNWpmdbi\n"; + } - if ( $alg && grep { $_ eq $alg } @choices ) { - MKDEBUG && _d('User requested', $alg, 'algorithm'); - return $alg; - } + my $dbh; + my $tries = 2; + while ( !$dbh && $tries-- ) { + MKDEBUG && _d($cxn_string, ' ', $user, ' ', $pass, + join(', ', map { "$_=>$defaults->{$_}" } keys %$defaults )); - if ( $args{count} && grep { $_ ne 'CHECKSUM' } @choices ) { - MKDEBUG && _d('Not using CHECKSUM algorithm because COUNT desired'); - @choices = grep { $_ ne 'CHECKSUM' } @choices; - } - - MKDEBUG && _d('Algorithms, in order:', @choices); - return $choices[0]; -} - -sub is_hash_algorithm { - my ( $self, $algorithm ) = @_; - return $ALGOS{$algorithm} && $ALGOS{$algorithm}->{hash}; -} - -sub choose_hash_func { - my ( $self, %args ) = @_; - my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1); - if ( $args{function} ) { - unshift @funcs, $args{function}; - } - my ($result, $error); - do { - my $func; eval { - $func = shift(@funcs); - my $sql = "SELECT $func('test-string')"; - MKDEBUG && _d($sql); - $args{dbh}->do($sql); - $result = $func; + $dbh = DBI->connect($cxn_string, $user, $pass, $defaults); + + if ( $cxn_string =~ m/mysql/i ) { + my $sql; + + $sql = 'SELECT @@SQL_MODE'; + MKDEBUG && _d($dbh, $sql); + my ($sql_mode) = $dbh->selectrow_array($sql); + + $sql = 'SET @@SQL_QUOTE_SHOW_CREATE = 1' + . '/*!40101, @@SQL_MODE=\'NO_AUTO_VALUE_ON_ZERO' + . ($sql_mode ? ",$sql_mode" : '') + . '\'*/'; + MKDEBUG && _d($dbh, $sql); + $dbh->do($sql); + + if ( my ($charset) = $cxn_string =~ m/charset=(\w+)/ ) { + $sql = "/*!40101 SET NAMES $charset*/"; + MKDEBUG && _d($dbh, ':', $sql); + $dbh->do($sql); + MKDEBUG && _d('Enabling charset for STDOUT'); + if ( $charset eq 'utf8' ) { + binmode(STDOUT, ':utf8') + or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR"; + } + else { + binmode(STDOUT) or die "Can't binmode(STDOUT): $OS_ERROR"; + } + } + + if ( $self->prop('set-vars') ) { + $sql = "SET " . $self->prop('set-vars'); + MKDEBUG && _d($dbh, ':', $sql); + $dbh->do($sql); + } + } }; - if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) { - $error .= qq{$func cannot be used because "$1"\n}; - MKDEBUG && _d($func, 'cannot be used because', $1); - } - } while ( @funcs && !$result ); - - die $error unless $result; - MKDEBUG && _d('Chosen hash func:', $result); - return $result; -} - -sub optimize_xor { - my ( $self, %args ) = @_; - my ($dbh, $func) = @args{qw(dbh function)}; - - die "$func never needs the BIT_XOR optimization" - if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i; - - my $opt_slice = 0; - my $unsliced = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0]; - my $sliced = ''; - my $start = 1; - my $crc_wid = length($unsliced) < 16 ? 16 : length($unsliced); - - do { # Try different positions till sliced result equals non-sliced. - MKDEBUG && _d('Trying slice', $opt_slice); - $dbh->do('SET @crc := "", @cnt := 0'); - my $slices = $self->make_xor_slices( - query => "\@crc := $func('a')", - crc_wid => $crc_wid, - opt_slice => $opt_slice, - ); - - my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x"; - $sliced = ($dbh->selectrow_array($sql))[0]; - if ( $sliced ne $unsliced ) { - MKDEBUG && _d('Slice', $opt_slice, 'does not work'); - $start += 16; - ++$opt_slice; - } - } while ( $start < $crc_wid && $sliced ne $unsliced ); - - if ( $sliced eq $unsliced ) { - MKDEBUG && _d('Slice', $opt_slice, 'works'); - return $opt_slice; - } - else { - MKDEBUG && _d('No slice works'); - return undef; - } -} - -sub make_xor_slices { - my ( $self, %args ) = @_; - foreach my $arg ( qw(query crc_wid) ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - my ( $query, $crc_wid, $opt_slice ) = @args{qw(query crc_wid opt_slice)}; - - my @slices; - for ( my $start = 1; $start <= $crc_wid; $start += 16 ) { - my $len = $crc_wid - $start + 1; - if ( $len > 16 ) { - $len = 16; - } - push @slices, - "LPAD(CONV(BIT_XOR(" - . "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))" - . ", 10, 16), $len, '0')"; - } - - if ( defined $opt_slice && $opt_slice < @slices ) { - $slices[$opt_slice] =~ s/\@crc/\@crc := $query/; - } - else { - map { s/\@crc/$query/ } @slices; - } - - return join(', ', @slices); -} - -sub make_row_checksum { - my ( $self, %args ) = @_; - my ( $tbl_struct, $func ) = @args{ qw(tbl_struct function) }; - my $q = $self->{Quoter}; - - my $sep = $args{sep} || '#'; - $sep =~ s/'//g; - $sep ||= '#'; - - my $ignorecols = $args{ignorecols} || {}; - - my %cols = map { lc($_) => 1 } - grep { !exists $ignorecols->{$_} } - ($args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}}); - my %seen; - my @cols = - map { - my $type = $tbl_struct->{type_for}->{$_}; - my $result = $q->quote($_); - if ( $type eq 'timestamp' ) { - $result .= ' + 0'; + if ( !$dbh && $EVAL_ERROR ) { + MKDEBUG && _d($EVAL_ERROR); + if ( $EVAL_ERROR =~ m/not a compiled character set|character set utf8/ ) { + MKDEBUG && _d('Going to try again without utf8 support'); + delete $defaults->{mysql_enable_utf8}; } - elsif ( $args{float_precision} && $type =~ m/float|double/ ) { - $result = "ROUND($result, $args{float_precision})"; + elsif ( $EVAL_ERROR =~ m/locate DBD\/mysql/i ) { + die "Cannot connect to MySQL because the Perl DBD::mysql module is " + . "not installed or not found. Run 'perl -MDBD::mysql' to see " + . "the directories that Perl searches for DBD::mysql. If " + . "DBD::mysql is not installed, try:\n" + . " Debian/Ubuntu apt-get install libdbd-mysql-perl\n" + . " RHEL/CentOS yum install perl-DBD-MySQL\n" + . " OpenSolaris pgk install pkg:/SUNWapu13dbd-mysql\n"; } - elsif ( $args{trim} && $type =~ m/varchar/ ) { - $result = "TRIM($result)"; + if ( !$tries ) { + die $EVAL_ERROR; } - $result; } - grep { - $cols{$_} && !$seen{$_}++ - } - @{$tbl_struct->{cols}}; - - my $query; - if ( !$args{no_cols} ) { - $query = join(', ', - map { - my $col = $_; - if ( $col =~ m/\+ 0/ ) { - my ($real_col) = /^(\S+)/; - $col .= " AS $real_col"; - } - elsif ( $col =~ m/TRIM/ ) { - my ($real_col) = m/TRIM\(([^\)]+)\)/; - $col .= " AS $real_col"; - } - $col; - } @cols) - . ', '; } - if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) { - my @nulls = grep { $cols{$_} } @{$tbl_struct->{null_cols}}; - if ( @nulls ) { - my $bitmap = "CONCAT(" - . join(', ', map { 'ISNULL(' . $q->quote($_) . ')' } @nulls) - . ")"; - push @cols, $bitmap; - } + MKDEBUG && _d('DBH info: ', + $dbh, + Dumper($dbh->selectrow_hashref( + 'SELECT DATABASE(), CONNECTION_ID(), VERSION()/*!50038 , @@hostname*/')), + 'Connection info:', $dbh->{mysql_hostinfo}, + 'Character set info:', Dumper($dbh->selectall_arrayref( + 'SHOW VARIABLES LIKE "character_set%"', { Slice => {}})), + '$DBD::mysql::VERSION:', $DBD::mysql::VERSION, + '$DBI::VERSION:', $DBI::VERSION, + ); - $query .= @cols > 1 - ? "$func(CONCAT_WS('$sep', " . join(', ', @cols) . '))' - : "$func($cols[0])"; - } - else { - my $fnv_func = uc $func; - $query .= "$fnv_func(" . join(', ', @cols) . ')'; - } - - return $query; + return $dbh; } -sub make_checksum_query { - my ( $self, %args ) = @_; - my @required_args = qw(db tbl tbl_struct algorithm crc_wid crc_type); - foreach my $arg( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; +sub get_hostname { + my ( $self, $dbh ) = @_; + if ( my ($host) = ($dbh->{mysql_hostinfo} || '') =~ m/^(\w+) via/ ) { + return $host; } - my ( $db, $tbl, $tbl_struct, $algorithm, - $crc_wid, $crc_type) = @args{@required_args}; - my $func = $args{function}; - my $q = $self->{Quoter}; - my $result; + my ( $hostname, $one ) = $dbh->selectrow_array( + 'SELECT /*!50038 @@hostname, */ 1'); + return $hostname; +} - die "Invalid or missing checksum algorithm" - unless $algorithm && $ALGOS{$algorithm}; +sub disconnect { + my ( $self, $dbh ) = @_; + MKDEBUG && $self->print_active_handles($dbh); + $dbh->disconnect; +} - if ( $algorithm eq 'CHECKSUM' ) { - return "CHECKSUM TABLE " . $q->quote($db, $tbl); +sub print_active_handles { + my ( $self, $thing, $level ) = @_; + $level ||= 0; + printf("# Active %sh: %s %s %s\n", ($thing->{Type} || 'undef'), "\t" x $level, + $thing, (($thing->{Type} || '') eq 'st' ? $thing->{Statement} || '' : '')) + or die "Cannot print: $OS_ERROR"; + foreach my $handle ( grep {defined} @{ $thing->{ChildHandles} } ) { + $self->print_active_handles( $handle, $level + 1 ); } +} - my $expr = $self->make_row_checksum(%args, no_cols=>1); - - if ( $algorithm eq 'BIT_XOR' ) { - if ( $crc_type =~ m/int$/ ) { - $result = "COALESCE(LOWER(CONV(BIT_XOR(CAST($expr AS UNSIGNED)), 10, 16)), 0) AS crc "; +sub copy { + my ( $self, $dsn_1, $dsn_2, %args ) = @_; + die 'I need a dsn_1 argument' unless $dsn_1; + die 'I need a dsn_2 argument' unless $dsn_2; + my %new_dsn = map { + my $key = $_; + my $val; + if ( $args{overwrite} ) { + $val = defined $dsn_1->{$key} ? $dsn_1->{$key} : $dsn_2->{$key}; } else { - my $slices = $self->make_xor_slices( query => $expr, %args ); - $result = "COALESCE(LOWER(CONCAT($slices)), 0) AS crc "; + $val = defined $dsn_2->{$key} ? $dsn_2->{$key} : $dsn_1->{$key}; } - } - else { - if ( $crc_type =~ m/int$/ ) { - $result = "COALESCE(RIGHT(MAX(" - . "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), " - . "CONV(CAST($func(CONCAT(\@crc, $expr)) AS UNSIGNED), 10, 16))" - . "), $crc_wid), 0) AS crc "; - } - else { - $result = "COALESCE(RIGHT(MAX(" - . "\@crc := CONCAT(LPAD(\@cnt := \@cnt + 1, 16, '0'), " - . "$func(CONCAT(\@crc, $expr)))" - . "), $crc_wid), 0) AS crc "; - } - } - if ( $args{replicate} ) { - $result = "REPLACE /*PROGRESS_COMMENT*/ INTO $args{replicate} " - . "(db, tbl, chunk, boundaries, this_cnt, this_crc) " - . "SELECT ?, ?, /*CHUNK_NUM*/ ?, COUNT(*) AS cnt, $result"; - } - else { - $result = "SELECT " - . ($args{buffer} ? 'SQL_BUFFER_RESULT ' : '') - . "/*PROGRESS_COMMENT*//*CHUNK_NUM*/ COUNT(*) AS cnt, $result"; - } - return $result . "FROM /*DB_TBL*//*INDEX_HINT*//*WHERE*/"; -} - -sub find_replication_differences { - my ( $self, $dbh, $table ) = @_; - - (my $sql = <<" EOF") =~ s/\s+/ /gm; - SELECT db, tbl, chunk, boundaries, - COALESCE(this_cnt-master_cnt, 0) AS cnt_diff, - COALESCE( - this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc), - 0 - ) AS crc_diff, - this_cnt, master_cnt, this_crc, master_crc - FROM $table - WHERE master_cnt <> this_cnt OR master_crc <> this_crc - OR ISNULL(master_crc) <> ISNULL(this_crc) - EOF - - MKDEBUG && _d($sql); - my $diffs = $dbh->selectall_arrayref($sql, { Slice => {} }); - return @$diffs; + $key => $val; + } keys %{$self->{opts}}; + return \%new_dsn; } sub _d { @@ -789,7 +361,7 @@ sub _d { 1; } # ########################################################################### -# End TableChecksum package +# End DSNParser package # ########################################################################### # ########################################################################### @@ -1818,359 +1390,80 @@ if ( MKDEBUG ) { # ########################################################################### # ########################################################################### -# DSNParser package +# Quoter package # This package is a copy without comments from the original. The original # with comments and its test file can be found in the Bazaar repository at, -# lib/DSNParser.pm -# t/lib/DSNParser.t +# lib/Quoter.pm +# t/lib/Quoter.t # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### { -package DSNParser; +package Quoter; use strict; use warnings FATAL => 'all'; use English qw(-no_match_vars); use constant MKDEBUG => $ENV{MKDEBUG} || 0; -use Data::Dumper; -$Data::Dumper::Indent = 0; -$Data::Dumper::Quotekeys = 0; - -eval { - require DBI; -}; -my $have_dbi = $EVAL_ERROR ? 0 : 1; - sub new { my ( $class, %args ) = @_; - foreach my $arg ( qw(opts) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $self = { - opts => {} # h, P, u, etc. Should come from DSN OPTIONS section in POD. - }; - foreach my $opt ( @{$args{opts}} ) { - if ( !$opt->{key} || !$opt->{desc} ) { - die "Invalid DSN option: ", Dumper($opt); - } - MKDEBUG && _d('DSN option:', - join(', ', - map { "$_=" . (defined $opt->{$_} ? ($opt->{$_} || '') : 'undef') } - keys %$opt - ) - ); - $self->{opts}->{$opt->{key}} = { - dsn => $opt->{dsn}, - desc => $opt->{desc}, - copy => $opt->{copy} || 0, - }; - } - return bless $self, $class; + return bless {}, $class; } -sub prop { - my ( $self, $prop, $value ) = @_; - if ( @_ > 2 ) { - MKDEBUG && _d('Setting', $prop, 'property'); - $self->{$prop} = $value; +sub quote { + my ( $self, @vals ) = @_; + foreach my $val ( @vals ) { + $val =~ s/`/``/g; } - return $self->{$prop}; + return join('.', map { '`' . $_ . '`' } @vals); } -sub parse { - my ( $self, $dsn, $prev, $defaults ) = @_; - if ( !$dsn ) { - MKDEBUG && _d('No DSN to parse'); - return; - } - MKDEBUG && _d('Parsing', $dsn); - $prev ||= {}; - $defaults ||= {}; - my %given_props; - my %final_props; - my $opts = $self->{opts}; +sub quote_val { + my ( $self, $val ) = @_; - foreach my $dsn_part ( split(/,/, $dsn) ) { - if ( my ($prop_key, $prop_val) = $dsn_part =~ m/^(.)=(.*)$/ ) { - $given_props{$prop_key} = $prop_val; - } - else { - MKDEBUG && _d('Interpreting', $dsn_part, 'as h=', $dsn_part); - $given_props{h} = $dsn_part; - } - } + return 'NULL' unless defined $val; # undef = NULL + return "''" if $val eq ''; # blank string = '' + return $val if $val =~ m/^0x[0-9a-fA-F]+$/; # hex data - foreach my $key ( keys %$opts ) { - MKDEBUG && _d('Finding value for', $key); - $final_props{$key} = $given_props{$key}; - if ( !defined $final_props{$key} - && defined $prev->{$key} && $opts->{$key}->{copy} ) - { - $final_props{$key} = $prev->{$key}; - MKDEBUG && _d('Copying value for', $key, 'from previous DSN'); - } - if ( !defined $final_props{$key} ) { - $final_props{$key} = $defaults->{$key}; - MKDEBUG && _d('Copying value for', $key, 'from defaults'); - } - } - - foreach my $key ( keys %given_props ) { - die "Unknown DSN option '$key' in '$dsn'. For more details, " - . "please use the --help option, or try 'perldoc $PROGRAM_NAME' " - . "for complete documentation." - unless exists $opts->{$key}; - } - if ( (my $required = $self->prop('required')) ) { - foreach my $key ( keys %$required ) { - die "Missing required DSN option '$key' in '$dsn'. For more details, " - . "please use the --help option, or try 'perldoc $PROGRAM_NAME' " - . "for complete documentation." - unless $final_props{$key}; - } - } - - return \%final_props; + $val =~ s/(['\\])/\\$1/g; + return "'$val'"; } -sub parse_options { - my ( $self, $o ) = @_; - die 'I need an OptionParser object' unless ref $o eq 'OptionParser'; - my $dsn_string - = join(',', - map { "$_=".$o->get($_); } - grep { $o->has($_) && $o->get($_) } - keys %{$self->{opts}} - ); - MKDEBUG && _d('DSN string made from options:', $dsn_string); - return $self->parse($dsn_string); -} - -sub as_string { - my ( $self, $dsn, $props ) = @_; - return $dsn unless ref $dsn; - my %allowed = $props ? map { $_=>1 } @$props : (); - return join(',', - map { "$_=" . ($_ eq 'p' ? '...' : $dsn->{$_}) } - grep { defined $dsn->{$_} && $self->{opts}->{$_} } - grep { !$props || $allowed{$_} } - sort keys %$dsn ); -} - -sub usage { - my ( $self ) = @_; - my $usage - = "DSN syntax is key=value[,key=value...] Allowable DSN keys:\n\n" - . " KEY COPY MEANING\n" - . " === ==== =============================================\n"; - my %opts = %{$self->{opts}}; - foreach my $key ( sort keys %opts ) { - $usage .= " $key " - . ($opts{$key}->{copy} ? 'yes ' : 'no ') - . ($opts{$key}->{desc} || '[No description]') - . "\n"; +sub split_unquote { + my ( $self, $db_tbl, $default_db ) = @_; + $db_tbl =~ s/`//g; + my ( $db, $tbl ) = split(/[.]/, $db_tbl); + if ( !$tbl ) { + $tbl = $db; + $db = $default_db; } - $usage .= "\n If the DSN is a bareword, the word is treated as the 'h' key.\n"; - return $usage; + return ($db, $tbl); } -sub get_cxn_params { - my ( $self, $info ) = @_; - my $dsn; - my %opts = %{$self->{opts}}; - my $driver = $self->prop('dbidriver') || ''; - if ( $driver eq 'Pg' ) { - $dsn = 'DBI:Pg:dbname=' . ( $info->{D} || '' ) . ';' - . join(';', map { "$opts{$_}->{dsn}=$info->{$_}" } - grep { defined $info->{$_} } - qw(h P)); +sub literal_like { + my ( $self, $like ) = @_; + return unless $like; + $like =~ s/([%_])/\\$1/g; + return "'$like'"; +} + +sub join_quote { + my ( $self, $default_db, $db_tbl ) = @_; + return unless $db_tbl; + my ($db, $tbl) = split(/[.]/, $db_tbl); + if ( !$tbl ) { + $tbl = $db; + $db = $default_db; } - else { - $dsn = 'DBI:mysql:' . ( $info->{D} || '' ) . ';' - . join(';', map { "$opts{$_}->{dsn}=$info->{$_}" } - grep { defined $info->{$_} } - qw(F h P S A)) - . ';mysql_read_default_group=client'; - } - MKDEBUG && _d($dsn); - return ($dsn, $info->{u}, $info->{p}); -} - -sub fill_in_dsn { - my ( $self, $dbh, $dsn ) = @_; - my $vars = $dbh->selectall_hashref('SHOW VARIABLES', 'Variable_name'); - my ($user, $db) = $dbh->selectrow_array('SELECT USER(), DATABASE()'); - $user =~ s/@.*//; - $dsn->{h} ||= $vars->{hostname}->{Value}; - $dsn->{S} ||= $vars->{'socket'}->{Value}; - $dsn->{P} ||= $vars->{port}->{Value}; - $dsn->{u} ||= $user; - $dsn->{D} ||= $db; -} - -sub get_dbh { - my ( $self, $cxn_string, $user, $pass, $opts ) = @_; - $opts ||= {}; - my $defaults = { - AutoCommit => 0, - RaiseError => 1, - PrintError => 0, - ShowErrorStatement => 1, - mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/i ? 1 : 0), - }; - @{$defaults}{ keys %$opts } = values %$opts; - - if ( $opts->{mysql_use_result} ) { - $defaults->{mysql_use_result} = 1; - } - - if ( !$have_dbi ) { - die "Cannot connect to MySQL because the Perl DBI module is not " - . "installed or not found. Run 'perl -MDBI' to see the directories " - . "that Perl searches for DBI. If DBI is not installed, try:\n" - . " Debian/Ubuntu apt-get install libdbi-perl\n" - . " RHEL/CentOS yum install perl-DBI\n" - . " OpenSolaris pgk install pkg:/SUNWpmdbi\n"; - - } - - my $dbh; - my $tries = 2; - while ( !$dbh && $tries-- ) { - MKDEBUG && _d($cxn_string, ' ', $user, ' ', $pass, - join(', ', map { "$_=>$defaults->{$_}" } keys %$defaults )); - - eval { - $dbh = DBI->connect($cxn_string, $user, $pass, $defaults); - - if ( $cxn_string =~ m/mysql/i ) { - my $sql; - - $sql = 'SELECT @@SQL_MODE'; - MKDEBUG && _d($dbh, $sql); - my ($sql_mode) = $dbh->selectrow_array($sql); - - $sql = 'SET @@SQL_QUOTE_SHOW_CREATE = 1' - . '/*!40101, @@SQL_MODE=\'NO_AUTO_VALUE_ON_ZERO' - . ($sql_mode ? ",$sql_mode" : '') - . '\'*/'; - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - - if ( my ($charset) = $cxn_string =~ m/charset=(\w+)/ ) { - $sql = "/*!40101 SET NAMES $charset*/"; - MKDEBUG && _d($dbh, ':', $sql); - $dbh->do($sql); - MKDEBUG && _d('Enabling charset for STDOUT'); - if ( $charset eq 'utf8' ) { - binmode(STDOUT, ':utf8') - or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR"; - } - else { - binmode(STDOUT) or die "Can't binmode(STDOUT): $OS_ERROR"; - } - } - - if ( $self->prop('set-vars') ) { - $sql = "SET " . $self->prop('set-vars'); - MKDEBUG && _d($dbh, ':', $sql); - $dbh->do($sql); - } - } - }; - if ( !$dbh && $EVAL_ERROR ) { - MKDEBUG && _d($EVAL_ERROR); - if ( $EVAL_ERROR =~ m/not a compiled character set|character set utf8/ ) { - MKDEBUG && _d('Going to try again without utf8 support'); - delete $defaults->{mysql_enable_utf8}; - } - elsif ( $EVAL_ERROR =~ m/locate DBD\/mysql/i ) { - die "Cannot connect to MySQL because the Perl DBD::mysql module is " - . "not installed or not found. Run 'perl -MDBD::mysql' to see " - . "the directories that Perl searches for DBD::mysql. If " - . "DBD::mysql is not installed, try:\n" - . " Debian/Ubuntu apt-get install libdbd-mysql-perl\n" - . " RHEL/CentOS yum install perl-DBD-MySQL\n" - . " OpenSolaris pgk install pkg:/SUNWapu13dbd-mysql\n"; - } - if ( !$tries ) { - die $EVAL_ERROR; - } - } - } - - MKDEBUG && _d('DBH info: ', - $dbh, - Dumper($dbh->selectrow_hashref( - 'SELECT DATABASE(), CONNECTION_ID(), VERSION()/*!50038 , @@hostname*/')), - 'Connection info:', $dbh->{mysql_hostinfo}, - 'Character set info:', Dumper($dbh->selectall_arrayref( - 'SHOW VARIABLES LIKE "character_set%"', { Slice => {}})), - '$DBD::mysql::VERSION:', $DBD::mysql::VERSION, - '$DBI::VERSION:', $DBI::VERSION, - ); - - return $dbh; -} - -sub get_hostname { - my ( $self, $dbh ) = @_; - if ( my ($host) = ($dbh->{mysql_hostinfo} || '') =~ m/^(\w+) via/ ) { - return $host; - } - my ( $hostname, $one ) = $dbh->selectrow_array( - 'SELECT /*!50038 @@hostname, */ 1'); - return $hostname; -} - -sub disconnect { - my ( $self, $dbh ) = @_; - MKDEBUG && $self->print_active_handles($dbh); - $dbh->disconnect; -} - -sub print_active_handles { - my ( $self, $thing, $level ) = @_; - $level ||= 0; - printf("# Active %sh: %s %s %s\n", ($thing->{Type} || 'undef'), "\t" x $level, - $thing, (($thing->{Type} || '') eq 'st' ? $thing->{Statement} || '' : '')) - or die "Cannot print: $OS_ERROR"; - foreach my $handle ( grep {defined} @{ $thing->{ChildHandles} } ) { - $self->print_active_handles( $handle, $level + 1 ); - } -} - -sub copy { - my ( $self, $dsn_1, $dsn_2, %args ) = @_; - die 'I need a dsn_1 argument' unless $dsn_1; - die 'I need a dsn_2 argument' unless $dsn_2; - my %new_dsn = map { - my $key = $_; - my $val; - if ( $args{overwrite} ) { - $val = defined $dsn_1->{$key} ? $dsn_1->{$key} : $dsn_2->{$key}; - } - else { - $val = defined $dsn_2->{$key} ? $dsn_2->{$key} : $dsn_1->{$key}; - } - $key => $val; - } keys %{$self->{opts}}; - return \%new_dsn; -} - -sub _d { - my ($package, undef, $line) = caller 0; - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } - map { defined $_ ? $_ : 'undef' } - @_; - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; + $db = "`$db`" if $db && $db !~ m/^`/; + $tbl = "`$tbl`" if $tbl && $tbl !~ m/^`/; + return $db ? "$db.$tbl" : $tbl; } 1; } # ########################################################################### -# End DSNParser package +# End Quoter package # ########################################################################### # ########################################################################### @@ -2256,6 +1549,664 @@ sub _d { # End VersionParser package # ########################################################################### +# ########################################################################### +# TableParser package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/TableParser.pm +# t/lib/TableParser.t +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### +{ +package TableParser; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use constant MKDEBUG => $ENV{MKDEBUG} || 0; + +use Data::Dumper; +$Data::Dumper::Indent = 1; +$Data::Dumper::Sortkeys = 1; +$Data::Dumper::Quotekeys = 0; + +sub new { + my ( $class, %args ) = @_; + my @required_args = qw(Quoter); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my $self = { %args }; + return bless $self, $class; +} + +sub parse { + my ( $self, $ddl, $opts ) = @_; + return unless $ddl; + if ( ref $ddl eq 'ARRAY' ) { + if ( lc $ddl->[0] eq 'table' ) { + $ddl = $ddl->[1]; + } + else { + return { + engine => 'VIEW', + }; + } + } + + if ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) { + die "Cannot parse table definition; is ANSI quoting " + . "enabled or SQL_QUOTE_SHOW_CREATE disabled?"; + } + + my ($name) = $ddl =~ m/CREATE (?:TEMPORARY )?TABLE\s+(`.+?`)/; + (undef, $name) = $self->{Quoter}->split_unquote($name) if $name; + + $ddl =~ s/(`[^`]+`)/\L$1/g; + + my $engine = $self->get_engine($ddl); + + my @defs = $ddl =~ m/^(\s+`.*?),?$/gm; + my @cols = map { $_ =~ m/`([^`]+)`/ } @defs; + MKDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols)); + + my %def_for; + @def_for{@cols} = @defs; + + my (@nums, @null); + my (%type_for, %is_nullable, %is_numeric, %is_autoinc); + foreach my $col ( @cols ) { + my $def = $def_for{$col}; + my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/; + die "Can't determine column type for $def" unless $type; + $type_for{$col} = $type; + if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) { + push @nums, $col; + $is_numeric{$col} = 1; + } + if ( $def !~ m/NOT NULL/ ) { + push @null, $col; + $is_nullable{$col} = 1; + } + $is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0; + } + + my ($keys, $clustered_key) = $self->get_keys($ddl, $opts, \%is_nullable); + + my ($charset) = $ddl =~ m/DEFAULT CHARSET=(\w+)/; + + return { + name => $name, + cols => \@cols, + col_posn => { map { $cols[$_] => $_ } 0..$#cols }, + is_col => { map { $_ => 1 } @cols }, + null_cols => \@null, + is_nullable => \%is_nullable, + is_autoinc => \%is_autoinc, + clustered_key => $clustered_key, + keys => $keys, + defs => \%def_for, + numeric_cols => \@nums, + is_numeric => \%is_numeric, + engine => $engine, + type_for => \%type_for, + charset => $charset, + }; +} + +sub sort_indexes { + my ( $self, $tbl ) = @_; + + my @indexes + = sort { + (($a ne 'PRIMARY') <=> ($b ne 'PRIMARY')) + || ( !$tbl->{keys}->{$a}->{is_unique} <=> !$tbl->{keys}->{$b}->{is_unique} ) + || ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} ) + || ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) ) + } + grep { + $tbl->{keys}->{$_}->{type} eq 'BTREE' + } + sort keys %{$tbl->{keys}}; + + MKDEBUG && _d('Indexes sorted best-first:', join(', ', @indexes)); + return @indexes; +} + +sub find_best_index { + my ( $self, $tbl, $index ) = @_; + my $best; + if ( $index ) { + ($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}}; + } + if ( !$best ) { + if ( $index ) { + die "Index '$index' does not exist in table"; + } + else { + ($best) = $self->sort_indexes($tbl); + } + } + MKDEBUG && _d('Best index found is', $best); + return $best; +} + +sub find_possible_keys { + my ( $self, $dbh, $database, $table, $quoter, $where ) = @_; + return () unless $where; + my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table) + . ' WHERE ' . $where; + MKDEBUG && _d($sql); + my $expl = $dbh->selectrow_hashref($sql); + $expl = { map { lc($_) => $expl->{$_} } keys %$expl }; + if ( $expl->{possible_keys} ) { + MKDEBUG && _d('possible_keys =', $expl->{possible_keys}); + my @candidates = split(',', $expl->{possible_keys}); + my %possible = map { $_ => 1 } @candidates; + if ( $expl->{key} ) { + MKDEBUG && _d('MySQL chose', $expl->{key}); + unshift @candidates, grep { $possible{$_} } split(',', $expl->{key}); + MKDEBUG && _d('Before deduping:', join(', ', @candidates)); + my %seen; + @candidates = grep { !$seen{$_}++ } @candidates; + } + MKDEBUG && _d('Final list:', join(', ', @candidates)); + return @candidates; + } + else { + MKDEBUG && _d('No keys in possible_keys'); + return (); + } +} + +sub check_table { + my ( $self, %args ) = @_; + my @required_args = qw(dbh db tbl); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($dbh, $db, $tbl) = @args{@required_args}; + my $q = $self->{Quoter}; + my $db_tbl = $q->quote($db, $tbl); + MKDEBUG && _d('Checking', $db_tbl); + + my $sql = "SHOW TABLES FROM " . $q->quote($db) + . ' LIKE ' . $q->literal_like($tbl); + MKDEBUG && _d($sql); + my $row; + eval { + $row = $dbh->selectrow_arrayref($sql); + }; + if ( $EVAL_ERROR ) { + MKDEBUG && _d($EVAL_ERROR); + return 0; + } + if ( !$row->[0] || $row->[0] ne $tbl ) { + MKDEBUG && _d('Table does not exist'); + return 0; + } + + MKDEBUG && _d('Table exists; no privs to check'); + return 1 unless $args{all_privs}; + + $sql = "SHOW FULL COLUMNS FROM $db_tbl"; + MKDEBUG && _d($sql); + eval { + $row = $dbh->selectrow_hashref($sql); + }; + if ( $EVAL_ERROR ) { + MKDEBUG && _d($EVAL_ERROR); + return 0; + } + if ( !scalar keys %$row ) { + MKDEBUG && _d('Table has no columns:', Dumper($row)); + return 0; + } + my $privs = $row->{privileges} || $row->{Privileges}; + + $sql = "DELETE FROM $db_tbl LIMIT 0"; + MKDEBUG && _d($sql); + eval { + $dbh->do($sql); + }; + my $can_delete = $EVAL_ERROR ? 0 : 1; + + MKDEBUG && _d('User privs on', $db_tbl, ':', $privs, + ($can_delete ? 'delete' : '')); + + if ( !($privs =~ m/select/ && $privs =~ m/insert/ && $privs =~ m/update/ + && $can_delete) ) { + MKDEBUG && _d('User does not have all privs'); + return 0; + } + + MKDEBUG && _d('User has all privs'); + return 1; +} + +sub get_engine { + my ( $self, $ddl, $opts ) = @_; + my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/; + MKDEBUG && _d('Storage engine:', $engine); + return $engine || undef; +} + +sub get_keys { + my ( $self, $ddl, $opts, $is_nullable ) = @_; + my $engine = $self->get_engine($ddl); + my $keys = {}; + my $clustered_key = undef; + + KEY: + foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) { + + next KEY if $key =~ m/FOREIGN/; + + my $key_ddl = $key; + MKDEBUG && _d('Parsed key:', $key_ddl); + + if ( $engine !~ m/MEMORY|HEAP/ ) { + $key =~ s/USING HASH/USING BTREE/; + } + + my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/; + my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/; + $type = $type || $special || 'BTREE'; + if ( $opts->{mysql_version} && $opts->{mysql_version} lt '004001000' + && $engine =~ m/HEAP|MEMORY/i ) + { + $type = 'HASH'; # MySQL pre-4.1 supports only HASH indexes on HEAP + } + + my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/; + my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0; + my @cols; + my @col_prefixes; + foreach my $col_def ( $cols =~ m/`[^`]+`(?:\(\d+\))?/g ) { + my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/; + push @cols, $name; + push @col_prefixes, $prefix; + } + $name =~ s/`//g; + + MKDEBUG && _d( $name, 'key cols:', join(', ', map { "`$_`" } @cols)); + + $keys->{$name} = { + name => $name, + type => $type, + colnames => $cols, + cols => \@cols, + col_prefixes => \@col_prefixes, + is_unique => $unique, + is_nullable => scalar(grep { $is_nullable->{$_} } @cols), + is_col => { map { $_ => 1 } @cols }, + ddl => $key_ddl, + }; + + if ( $engine =~ m/InnoDB/i && !$clustered_key ) { + my $this_key = $keys->{$name}; + if ( $this_key->{name} eq 'PRIMARY' ) { + $clustered_key = 'PRIMARY'; + } + elsif ( $this_key->{is_unique} && !$this_key->{is_nullable} ) { + $clustered_key = $this_key->{name}; + } + MKDEBUG && $clustered_key && _d('This key is the clustered key'); + } + } + + return $keys, $clustered_key; +} + +sub get_fks { + my ( $self, $ddl, $opts ) = @_; + my $q = $self->{Quoter}; + my $fks = {}; + + foreach my $fk ( + $ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg ) + { + my ( $name ) = $fk =~ m/CONSTRAINT `(.*?)`/; + my ( $cols ) = $fk =~ m/FOREIGN KEY \(([^\)]+)\)/; + my ( $parent, $parent_cols ) = $fk =~ m/REFERENCES (\S+) \(([^\)]+)\)/; + + my ($db, $tbl) = $q->split_unquote($parent, $opts->{database}); + my %parent_tbl = (tbl => $tbl); + $parent_tbl{db} = $db if $db; + + if ( $parent !~ m/\./ && $opts->{database} ) { + $parent = $q->quote($opts->{database}) . ".$parent"; + } + + $fks->{$name} = { + name => $name, + colnames => $cols, + cols => [ map { s/[ `]+//g; $_; } split(',', $cols) ], + parent_tbl => \%parent_tbl, + parent_tblname => $parent, + parent_cols => [ map { s/[ `]+//g; $_; } split(',', $parent_cols) ], + parent_colnames=> $parent_cols, + ddl => $fk, + }; + } + + return $fks; +} + +sub remove_auto_increment { + my ( $self, $ddl ) = @_; + $ddl =~ s/(^\).*?) AUTO_INCREMENT=\d+\b/$1/m; + return $ddl; +} + +sub remove_secondary_indexes { + my ( $self, $ddl ) = @_; + my $sec_indexes_ddl; + my $tbl_struct = $self->parse($ddl); + + if ( ($tbl_struct->{engine} || '') =~ m/InnoDB/i ) { + my $clustered_key = $tbl_struct->{clustered_key}; + $clustered_key ||= ''; + + my @sec_indexes = map { + my $key_def = $_->{ddl}; + $key_def =~ s/([\(\)])/\\$1/g; + $ddl =~ s/\s+$key_def//i; + + my $key_ddl = "ADD $_->{ddl}"; + $key_ddl .= ',' unless $key_ddl =~ m/,$/; + $key_ddl; + } + grep { $_->{name} ne $clustered_key } + values %{$tbl_struct->{keys}}; + MKDEBUG && _d('Secondary indexes:', Dumper(\@sec_indexes)); + + if ( @sec_indexes ) { + $sec_indexes_ddl = join(' ', @sec_indexes); + $sec_indexes_ddl =~ s/,$//; + } + + $ddl =~ s/,(\n\) )/$1/s; + } + else { + MKDEBUG && _d('Not removing secondary indexes from', + $tbl_struct->{engine}, 'table'); + } + + return $ddl, $sec_indexes_ddl, $tbl_struct; +} + +sub _d { + my ($package, undef, $line) = caller 0; + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } + map { defined $_ ? $_ : 'undef' } + @_; + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; +} + +1; +} +# ########################################################################### +# End TableParser package +# ########################################################################### + +# ########################################################################### +# TableNibbler package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/TableNibbler.pm +# t/lib/TableNibbler.t +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### +{ +package TableNibbler; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use constant MKDEBUG => $ENV{MKDEBUG} || 0; + +sub new { + my ( $class, %args ) = @_; + my @required_args = qw(TableParser Quoter); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my $self = { %args }; + return bless $self, $class; +} + +sub generate_asc_stmt { + my ( $self, %args ) = @_; + my @required_args = qw(tbl_struct index); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless defined $args{$arg}; + } + my ($tbl_struct, $index) = @args{@required_args}; + my @cols = $args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}}; + my $q = $self->{Quoter}; + + die "Index '$index' does not exist in table" + unless exists $tbl_struct->{keys}->{$index}; + MKDEBUG && _d('Will ascend index', $index); + + my @asc_cols = @{$tbl_struct->{keys}->{$index}->{cols}}; + if ( $args{asc_first} ) { + @asc_cols = $asc_cols[0]; + MKDEBUG && _d('Ascending only first column'); + } + MKDEBUG && _d('Will ascend columns', join(', ', @asc_cols)); + + my @asc_slice; + my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols }; + foreach my $col ( @asc_cols ) { + if ( !exists $col_posn{$col} ) { + push @cols, $col; + $col_posn{$col} = $#cols; + } + push @asc_slice, $col_posn{$col}; + } + MKDEBUG && _d('Will ascend, in ordinal position:', join(', ', @asc_slice)); + + my $asc_stmt = { + cols => \@cols, + index => $index, + where => '', + slice => [], + scols => [], + }; + + if ( @asc_slice ) { + my $cmp_where; + foreach my $cmp ( qw(< <= >= >) ) { + $cmp_where = $self->generate_cmp_where( + type => $cmp, + slice => \@asc_slice, + cols => \@cols, + quoter => $q, + is_nullable => $tbl_struct->{is_nullable}, + ); + $asc_stmt->{boundaries}->{$cmp} = $cmp_where->{where}; + } + my $cmp = $args{asc_only} ? '>' : '>='; + $asc_stmt->{where} = $asc_stmt->{boundaries}->{$cmp}; + $asc_stmt->{slice} = $cmp_where->{slice}; + $asc_stmt->{scols} = $cmp_where->{scols}; + } + + return $asc_stmt; +} + +sub generate_cmp_where { + my ( $self, %args ) = @_; + foreach my $arg ( qw(type slice cols is_nullable) ) { + die "I need a $arg arg" unless defined $args{$arg}; + } + my @slice = @{$args{slice}}; + my @cols = @{$args{cols}}; + my $is_nullable = $args{is_nullable}; + my $type = $args{type}; + my $q = $self->{Quoter}; + + (my $cmp = $type) =~ s/=//; + + my @r_slice; # Resulting slice columns, by ordinal + my @r_scols; # Ditto, by name + + my @clauses; + foreach my $i ( 0 .. $#slice ) { + my @clause; + + foreach my $j ( 0 .. $i - 1 ) { + my $ord = $slice[$j]; + my $col = $cols[$ord]; + my $quo = $q->quote($col); + if ( $is_nullable->{$col} ) { + push @clause, "((? IS NULL AND $quo IS NULL) OR ($quo = ?))"; + push @r_slice, $ord, $ord; + push @r_scols, $col, $col; + } + else { + push @clause, "$quo = ?"; + push @r_slice, $ord; + push @r_scols, $col; + } + } + + my $ord = $slice[$i]; + my $col = $cols[$ord]; + my $quo = $q->quote($col); + my $end = $i == $#slice; # Last clause of the whole group. + if ( $is_nullable->{$col} ) { + if ( $type =~ m/=/ && $end ) { + push @clause, "(? IS NULL OR $quo $type ?)"; + } + elsif ( $type =~ m/>/ ) { + push @clause, "((? IS NULL AND $quo IS NOT NULL) OR ($quo $cmp ?))"; + } + else { # If $type =~ m/ \@r_slice, + scols => \@r_scols, + where => $result, + }; + return $where; +} + +sub generate_del_stmt { + my ( $self, %args ) = @_; + + my $tbl = $args{tbl_struct}; + my @cols = $args{cols} ? @{$args{cols}} : (); + my $tp = $self->{TableParser}; + my $q = $self->{Quoter}; + + my @del_cols; + my @del_slice; + + my $index = $tp->find_best_index($tbl, $args{index}); + die "Cannot find an ascendable index in table" unless $index; + + if ( $index ) { + @del_cols = @{$tbl->{keys}->{$index}->{cols}}; + } + else { + @del_cols = @{$tbl->{cols}}; + } + MKDEBUG && _d('Columns needed for DELETE:', join(', ', @del_cols)); + + my %col_posn = do { my $i = 0; map { $_ => $i++ } @cols }; + foreach my $col ( @del_cols ) { + if ( !exists $col_posn{$col} ) { + push @cols, $col; + $col_posn{$col} = $#cols; + } + push @del_slice, $col_posn{$col}; + } + MKDEBUG && _d('Ordinals needed for DELETE:', join(', ', @del_slice)); + + my $del_stmt = { + cols => \@cols, + index => $index, + where => '', + slice => [], + scols => [], + }; + + my @clauses; + foreach my $i ( 0 .. $#del_slice ) { + my $ord = $del_slice[$i]; + my $col = $cols[$ord]; + my $quo = $q->quote($col); + if ( $tbl->{is_nullable}->{$col} ) { + push @clauses, "((? IS NULL AND $quo IS NULL) OR ($quo = ?))"; + push @{$del_stmt->{slice}}, $ord, $ord; + push @{$del_stmt->{scols}}, $col, $col; + } + else { + push @clauses, "$quo = ?"; + push @{$del_stmt->{slice}}, $ord; + push @{$del_stmt->{scols}}, $col; + } + } + + $del_stmt->{where} = '(' . join(' AND ', @clauses) . ')'; + + return $del_stmt; +} + +sub generate_ins_stmt { + my ( $self, %args ) = @_; + foreach my $arg ( qw(ins_tbl sel_cols) ) { + die "I need a $arg argument" unless $args{$arg}; + } + my $ins_tbl = $args{ins_tbl}; + my @sel_cols = @{$args{sel_cols}}; + + die "You didn't specify any SELECT columns" unless @sel_cols; + + my @ins_cols; + my @ins_slice; + for my $i ( 0..$#sel_cols ) { + next unless $ins_tbl->{is_col}->{$sel_cols[$i]}; + push @ins_cols, $sel_cols[$i]; + push @ins_slice, $i; + } + + return { + cols => \@ins_cols, + slice => \@ins_slice, + }; +} + +sub _d { + my ($package, undef, $line) = caller 0; + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } + map { defined $_ ? $_ : 'undef' } + @_; + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; +} + +1; +} +# ########################################################################### +# End TableNibbler package +# ########################################################################### + # ########################################################################### # MySQLDump package # This package is a copy without comments from the original. The original @@ -2561,1013 +2512,6 @@ sub _d { # End MySQLDump package # ########################################################################### -# ########################################################################### -# TableChunker package -# This package is a copy without comments from the original. The original -# with comments and its test file can be found in the Bazaar repository at, -# lib/TableChunker.pm -# t/lib/TableChunker.t -# See https://launchpad.net/percona-toolkit for more information. -# ########################################################################### -{ -package TableChunker; - -use strict; -use warnings FATAL => 'all'; -use English qw(-no_match_vars); -use constant MKDEBUG => $ENV{MKDEBUG} || 0; - -use POSIX qw(floor ceil); -use List::Util qw(min max); -use Data::Dumper; -$Data::Dumper::Indent = 1; -$Data::Dumper::Sortkeys = 1; -$Data::Dumper::Quotekeys = 0; - -sub new { - my ( $class, %args ) = @_; - foreach my $arg ( qw(Quoter MySQLDump) ) { - die "I need a $arg argument" unless $args{$arg}; - } - - my %int_types = map { $_ => 1 } qw(bigint date datetime int mediumint smallint time timestamp tinyint year); - my %real_types = map { $_ => 1 } qw(decimal double float); - - my $self = { - %args, - int_types => \%int_types, - real_types => \%real_types, - EPOCH => '1970-01-01', - }; - - return bless $self, $class; -} - -sub find_chunk_columns { - my ( $self, %args ) = @_; - foreach my $arg ( qw(tbl_struct) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $tbl_struct = $args{tbl_struct}; - - my @possible_indexes; - foreach my $index ( values %{ $tbl_struct->{keys} } ) { - - next unless $index->{type} eq 'BTREE'; - - next if grep { defined } @{$index->{col_prefixes}}; - - if ( $args{exact} ) { - next unless $index->{is_unique} && @{$index->{cols}} == 1; - } - - push @possible_indexes, $index; - } - MKDEBUG && _d('Possible chunk indexes in order:', - join(', ', map { $_->{name} } @possible_indexes)); - - my $can_chunk_exact = 0; - my @candidate_cols; - foreach my $index ( @possible_indexes ) { - my $col = $index->{cols}->[0]; - - my $col_type = $tbl_struct->{type_for}->{$col}; - next unless $self->{int_types}->{$col_type} - || $self->{real_types}->{$col_type} - || $col_type =~ m/char/; - - push @candidate_cols, { column => $col, index => $index->{name} }; - } - - $can_chunk_exact = 1 if $args{exact} && scalar @candidate_cols; - - if ( MKDEBUG ) { - my $chunk_type = $args{exact} ? 'Exact' : 'Inexact'; - _d($chunk_type, 'chunkable:', - join(', ', map { "$_->{column} on $_->{index}" } @candidate_cols)); - } - - my @result; - MKDEBUG && _d('Ordering columns by order in tbl, PK first'); - if ( $tbl_struct->{keys}->{PRIMARY} ) { - my $pk_first_col = $tbl_struct->{keys}->{PRIMARY}->{cols}->[0]; - @result = grep { $_->{column} eq $pk_first_col } @candidate_cols; - @candidate_cols = grep { $_->{column} ne $pk_first_col } @candidate_cols; - } - my $i = 0; - my %col_pos = map { $_ => $i++ } @{$tbl_struct->{cols}}; - push @result, sort { $col_pos{$a->{column}} <=> $col_pos{$b->{column}} } - @candidate_cols; - - if ( MKDEBUG ) { - _d('Chunkable columns:', - join(', ', map { "$_->{column} on $_->{index}" } @result)); - _d('Can chunk exactly:', $can_chunk_exact); - } - - return ($can_chunk_exact, @result); -} - -sub calculate_chunks { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - MKDEBUG && _d('Calculate chunks for', - join(", ", map {"$_=".(defined $args{$_} ? $args{$_} : "undef")} - qw(db tbl chunk_col min max rows_in_range chunk_size zero_chunk exact) - )); - - if ( !$args{rows_in_range} ) { - MKDEBUG && _d("Empty table"); - return '1=1'; - } - - if ( $args{rows_in_range} < $args{chunk_size} ) { - MKDEBUG && _d("Chunk size larger than rows in range"); - return '1=1'; - } - - my $q = $self->{Quoter}; - my $dbh = $args{dbh}; - my $chunk_col = $args{chunk_col}; - my $tbl_struct = $args{tbl_struct}; - my $col_type = $tbl_struct->{type_for}->{$chunk_col}; - MKDEBUG && _d('chunk col type:', $col_type); - - my %chunker; - if ( $tbl_struct->{is_numeric}->{$chunk_col} || $col_type =~ /date|time/ ) { - %chunker = $self->_chunk_numeric(%args); - } - elsif ( $col_type =~ m/char/ ) { - %chunker = $self->_chunk_char(%args); - } - else { - die "Cannot chunk $col_type columns"; - } - MKDEBUG && _d("Chunker:", Dumper(\%chunker)); - my ($col, $start_point, $end_point, $interval, $range_func) - = @chunker{qw(col start_point end_point interval range_func)}; - - my @chunks; - if ( $start_point < $end_point ) { - - push @chunks, "$col = 0" if $chunker{have_zero_chunk}; - - my ($beg, $end); - my $iter = 0; - for ( my $i = $start_point; $i < $end_point; $i += $interval ) { - ($beg, $end) = $self->$range_func($dbh, $i, $interval, $end_point); - - if ( $iter++ == 0 ) { - push @chunks, - ($chunker{have_zero_chunk} ? "$col > 0 AND " : "") - ."$col < " . $q->quote_val($end); - } - else { - push @chunks, "$col >= " . $q->quote_val($beg) . " AND $col < " . $q->quote_val($end); - } - } - - my $chunk_range = lc $args{chunk_range} || 'open'; - my $nullable = $args{tbl_struct}->{is_nullable}->{$args{chunk_col}}; - pop @chunks; - if ( @chunks ) { - push @chunks, "$col >= " . $q->quote_val($beg) - . ($chunk_range eq 'openclosed' - ? " AND $col <= " . $q->quote_val($args{max}) : ""); - } - else { - push @chunks, $nullable ? "$col IS NOT NULL" : '1=1'; - } - if ( $nullable ) { - push @chunks, "$col IS NULL"; - } - } - else { - MKDEBUG && _d('No chunks; using single chunk 1=1'); - push @chunks, '1=1'; - } - - return @chunks; -} - -sub _chunk_numeric { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl tbl_struct chunk_col rows_in_range chunk_size); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - my $q = $self->{Quoter}; - my $db_tbl = $q->quote($args{db}, $args{tbl}); - my $col_type = $args{tbl_struct}->{type_for}->{$args{chunk_col}}; - - my $range_func; - if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) { - $range_func = 'range_num'; - } - elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) { - $range_func = "range_$col_type"; - } - elsif ( $col_type eq 'datetime' ) { - $range_func = 'range_datetime'; - } - - my ($start_point, $end_point); - eval { - $start_point = $self->value_to_number( - value => $args{min}, - column_type => $col_type, - dbh => $args{dbh}, - ); - $end_point = $self->value_to_number( - value => $args{max}, - column_type => $col_type, - dbh => $args{dbh}, - ); - }; - if ( $EVAL_ERROR ) { - if ( $EVAL_ERROR =~ m/don't know how to chunk/ ) { - die $EVAL_ERROR; - } - else { - die "Error calculating chunk start and end points for table " - . "`$args{tbl_struct}->{name}` on column `$args{chunk_col}` " - . "with min/max values " - . join('/', - map { defined $args{$_} ? $args{$_} : 'undef' } qw(min max)) - . ":\n\n" - . $EVAL_ERROR - . "\nVerify that the min and max values are valid for the column. " - . "If they are valid, this error could be caused by a bug in the " - . "tool."; - } - } - - if ( !defined $start_point ) { - MKDEBUG && _d('Start point is undefined'); - $start_point = 0; - } - if ( !defined $end_point || $end_point < $start_point ) { - MKDEBUG && _d('End point is undefined or before start point'); - $end_point = 0; - } - MKDEBUG && _d("Actual chunk range:", $start_point, "to", $end_point); - - my $have_zero_chunk = 0; - if ( $args{zero_chunk} ) { - if ( $start_point != $end_point && $start_point >= 0 ) { - MKDEBUG && _d('Zero chunking'); - my $nonzero_val = $self->get_nonzero_value( - %args, - db_tbl => $db_tbl, - col => $args{chunk_col}, - col_type => $col_type, - val => $args{min} - ); - $start_point = $self->value_to_number( - value => $nonzero_val, - column_type => $col_type, - dbh => $args{dbh}, - ); - $have_zero_chunk = 1; - } - else { - MKDEBUG && _d("Cannot zero chunk"); - } - } - MKDEBUG && _d("Using chunk range:", $start_point, "to", $end_point); - - my $interval = $args{chunk_size} - * ($end_point - $start_point) - / $args{rows_in_range}; - if ( $self->{int_types}->{$col_type} ) { - $interval = ceil($interval); - } - $interval ||= $args{chunk_size}; - if ( $args{exact} ) { - $interval = $args{chunk_size}; - } - MKDEBUG && _d('Chunk interval:', $interval, 'units'); - - return ( - col => $q->quote($args{chunk_col}), - start_point => $start_point, - end_point => $end_point, - interval => $interval, - range_func => $range_func, - have_zero_chunk => $have_zero_chunk, - ); -} - -sub _chunk_char { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl tbl_struct chunk_col min max rows_in_range chunk_size); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - my $q = $self->{Quoter}; - my $db_tbl = $q->quote($args{db}, $args{tbl}); - my $dbh = $args{dbh}; - my $chunk_col = $args{chunk_col}; - my $row; - my $sql; - - my ($min_col, $max_col) = @{args}{qw(min max)}; - $sql = "SELECT ORD(?) AS min_col_ord, ORD(?) AS max_col_ord"; - MKDEBUG && _d($dbh, $sql); - my $ord_sth = $dbh->prepare($sql); # avoid quoting issues - $ord_sth->execute($min_col, $max_col); - $row = $ord_sth->fetchrow_arrayref(); - my ($min_col_ord, $max_col_ord) = ($row->[0], $row->[1]); - MKDEBUG && _d("Min/max col char code:", $min_col_ord, $max_col_ord); - - my $base; - my @chars; - MKDEBUG && _d("Table charset:", $args{tbl_struct}->{charset}); - if ( ($args{tbl_struct}->{charset} || "") eq "latin1" ) { - my @sorted_latin1_chars = ( - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, - 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, - 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, - 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, - 88, 89, 90, 91, 92, 93, 94, 95, 96, 123, 124, 125, 126, 161, - 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, - 190, 191, 215, 216, 222, 223, 247, 255); - - my ($first_char, $last_char); - for my $i ( 0..$#sorted_latin1_chars ) { - $first_char = $i and last if $sorted_latin1_chars[$i] >= $min_col_ord; - } - for my $i ( $first_char..$#sorted_latin1_chars ) { - $last_char = $i and last if $sorted_latin1_chars[$i] >= $max_col_ord; - }; - - @chars = map { chr $_; } @sorted_latin1_chars[$first_char..$last_char]; - $base = scalar @chars; - } - else { - - my $tmp_tbl = '__maatkit_char_chunking_map'; - my $tmp_db_tbl = $q->quote($args{db}, $tmp_tbl); - $sql = "DROP TABLE IF EXISTS $tmp_db_tbl"; - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - my $col_def = $args{tbl_struct}->{defs}->{$chunk_col}; - $sql = "CREATE TEMPORARY TABLE $tmp_db_tbl ($col_def) " - . "ENGINE=MEMORY"; - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - - $sql = "INSERT INTO $tmp_db_tbl VALUE (CHAR(?))"; - MKDEBUG && _d($dbh, $sql); - my $ins_char_sth = $dbh->prepare($sql); # avoid quoting issues - for my $char_code ( $min_col_ord..$max_col_ord ) { - $ins_char_sth->execute($char_code); - } - - $sql = "SELECT `$chunk_col` FROM $tmp_db_tbl " - . "WHERE `$chunk_col` BETWEEN ? AND ? " - . "ORDER BY `$chunk_col`"; - MKDEBUG && _d($dbh, $sql); - my $sel_char_sth = $dbh->prepare($sql); - $sel_char_sth->execute($min_col, $max_col); - - @chars = map { $_->[0] } @{ $sel_char_sth->fetchall_arrayref() }; - $base = scalar @chars; - - $sql = "DROP TABLE $tmp_db_tbl"; - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - } - MKDEBUG && _d("Base", $base, "chars:", @chars); - - - $sql = "SELECT MAX(LENGTH($chunk_col)) FROM $db_tbl " - . ($args{where} ? "WHERE $args{where} " : "") - . "ORDER BY `$chunk_col`"; - MKDEBUG && _d($dbh, $sql); - $row = $dbh->selectrow_arrayref($sql); - my $max_col_len = $row->[0]; - MKDEBUG && _d("Max column value:", $max_col, $max_col_len); - my $n_values; - for my $n_chars ( 1..$max_col_len ) { - $n_values = $base**$n_chars; - if ( $n_values >= $args{chunk_size} ) { - MKDEBUG && _d($n_chars, "chars in base", $base, "expresses", - $n_values, "values"); - last; - } - } - - my $n_chunks = $args{rows_in_range} / $args{chunk_size}; - my $interval = floor($n_values / $n_chunks) || 1; - - my $range_func = sub { - my ( $self, $dbh, $start, $interval, $max ) = @_; - my $start_char = $self->base_count( - count_to => $start, - base => $base, - symbols => \@chars, - ); - my $end_char = $self->base_count( - count_to => min($max, $start + $interval), - base => $base, - symbols => \@chars, - ); - return $start_char, $end_char; - }; - - return ( - col => $q->quote($chunk_col), - start_point => 0, - end_point => $n_values, - interval => $interval, - range_func => $range_func, - ); -} - -sub get_first_chunkable_column { - my ( $self, %args ) = @_; - foreach my $arg ( qw(tbl_struct) ) { - die "I need a $arg argument" unless $args{$arg}; - } - - my ($exact, @cols) = $self->find_chunk_columns(%args); - my $col = $cols[0]->{column}; - my $idx = $cols[0]->{index}; - - my $wanted_col = $args{chunk_column}; - my $wanted_idx = $args{chunk_index}; - MKDEBUG && _d("Preferred chunk col/idx:", $wanted_col, $wanted_idx); - - if ( $wanted_col && $wanted_idx ) { - foreach my $chunkable_col ( @cols ) { - if ( $wanted_col eq $chunkable_col->{column} - && $wanted_idx eq $chunkable_col->{index} ) { - $col = $wanted_col; - $idx = $wanted_idx; - last; - } - } - } - elsif ( $wanted_col ) { - foreach my $chunkable_col ( @cols ) { - if ( $wanted_col eq $chunkable_col->{column} ) { - $col = $wanted_col; - $idx = $chunkable_col->{index}; - last; - } - } - } - elsif ( $wanted_idx ) { - foreach my $chunkable_col ( @cols ) { - if ( $wanted_idx eq $chunkable_col->{index} ) { - $col = $chunkable_col->{column}; - $idx = $wanted_idx; - last; - } - } - } - - MKDEBUG && _d('First chunkable col/index:', $col, $idx); - return $col, $idx; -} - -sub size_to_rows { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl chunk_size); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ($dbh, $db, $tbl, $chunk_size) = @args{@required_args}; - my $q = $self->{Quoter}; - my $du = $self->{MySQLDump}; - - my ($n_rows, $avg_row_length); - - my ( $num, $suffix ) = $chunk_size =~ m/^(\d+)([MGk])?$/; - if ( $suffix ) { # Convert to bytes. - $chunk_size = $suffix eq 'k' ? $num * 1_024 - : $suffix eq 'M' ? $num * 1_024 * 1_024 - : $num * 1_024 * 1_024 * 1_024; - } - elsif ( $num ) { - $n_rows = $num; - } - else { - die "Invalid chunk size $chunk_size; must be an integer " - . "with optional suffix kMG"; - } - - if ( $suffix || $args{avg_row_length} ) { - my ($status) = $du->get_table_status($dbh, $q, $db, $tbl); - $avg_row_length = $status->{avg_row_length}; - if ( !defined $n_rows ) { - $n_rows = $avg_row_length ? ceil($chunk_size / $avg_row_length) : undef; - } - } - - return $n_rows, $avg_row_length; -} - -sub get_range_statistics { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db tbl chunk_col tbl_struct); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ($dbh, $db, $tbl, $col) = @args{@required_args}; - my $where = $args{where}; - my $q = $self->{Quoter}; - - my $col_type = $args{tbl_struct}->{type_for}->{$col}; - my $col_is_numeric = $args{tbl_struct}->{is_numeric}->{$col}; - - my $db_tbl = $q->quote($db, $tbl); - $col = $q->quote($col); - - my ($min, $max); - eval { - my $sql = "SELECT MIN($col), MAX($col) FROM $db_tbl" - . ($args{index_hint} ? " $args{index_hint}" : "") - . ($where ? " WHERE ($where)" : ''); - MKDEBUG && _d($dbh, $sql); - ($min, $max) = $dbh->selectrow_array($sql); - MKDEBUG && _d("Actual end points:", $min, $max); - - ($min, $max) = $self->get_valid_end_points( - %args, - dbh => $dbh, - db_tbl => $db_tbl, - col => $col, - col_type => $col_type, - min => $min, - max => $max, - ); - MKDEBUG && _d("Valid end points:", $min, $max); - }; - if ( $EVAL_ERROR ) { - die "Error getting min and max values for table $db_tbl " - . "on column $col: $EVAL_ERROR"; - } - - my $sql = "EXPLAIN SELECT * FROM $db_tbl" - . ($args{index_hint} ? " $args{index_hint}" : "") - . ($where ? " WHERE $where" : ''); - MKDEBUG && _d($sql); - my $expl = $dbh->selectrow_hashref($sql); - - return ( - min => $min, - max => $max, - rows_in_range => $expl->{rows}, - ); -} - -sub inject_chunks { - my ( $self, %args ) = @_; - foreach my $arg ( qw(database table chunks chunk_num query) ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - MKDEBUG && _d('Injecting chunk', $args{chunk_num}); - my $query = $args{query}; - my $comment = sprintf("/*%s.%s:%d/%d*/", - $args{database}, $args{table}, - $args{chunk_num} + 1, scalar @{$args{chunks}}); - $query =~ s!/\*PROGRESS_COMMENT\*/!$comment!; - my $where = "WHERE (" . $args{chunks}->[$args{chunk_num}] . ')'; - if ( $args{where} && grep { $_ } @{$args{where}} ) { - $where .= " AND (" - . join(" AND ", map { "($_)" } grep { $_ } @{$args{where}} ) - . ")"; - } - my $db_tbl = $self->{Quoter}->quote(@args{qw(database table)}); - my $index_hint = $args{index_hint} || ''; - - MKDEBUG && _d('Parameters:', - Dumper({WHERE => $where, DB_TBL => $db_tbl, INDEX_HINT => $index_hint})); - $query =~ s!/\*WHERE\*/! $where!; - $query =~ s!/\*DB_TBL\*/!$db_tbl!; - $query =~ s!/\*INDEX_HINT\*/! $index_hint!; - $query =~ s!/\*CHUNK_NUM\*/! $args{chunk_num} AS chunk_num,!; - - return $query; -} - - -sub value_to_number { - my ( $self, %args ) = @_; - my @required_args = qw(column_type dbh); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - my $val = $args{value}; - my ($col_type, $dbh) = @args{@required_args}; - MKDEBUG && _d('Converting MySQL', $col_type, $val); - - return unless defined $val; # value is NULL - - my %mysql_conv_func_for = ( - timestamp => 'UNIX_TIMESTAMP', - date => 'TO_DAYS', - time => 'TIME_TO_SEC', - datetime => 'TO_DAYS', - ); - - my $num; - if ( $col_type =~ m/(?:int|year|float|double|decimal)$/ ) { - $num = $val; - } - elsif ( $col_type =~ m/^(?:timestamp|date|time)$/ ) { - my $func = $mysql_conv_func_for{$col_type}; - my $sql = "SELECT $func(?)"; - MKDEBUG && _d($dbh, $sql, $val); - my $sth = $dbh->prepare($sql); - $sth->execute($val); - ($num) = $sth->fetchrow_array(); - } - elsif ( $col_type eq 'datetime' ) { - $num = $self->timestampdiff($dbh, $val); - } - else { - die "I don't know how to chunk $col_type\n"; - } - MKDEBUG && _d('Converts to', $num); - return $num; -} - -sub range_num { - my ( $self, $dbh, $start, $interval, $max ) = @_; - my $end = min($max, $start + $interval); - - - $start = sprintf('%.17f', $start) if $start =~ /e/; - $end = sprintf('%.17f', $end) if $end =~ /e/; - - $start =~ s/\.(\d{5}).*$/.$1/; - $end =~ s/\.(\d{5}).*$/.$1/; - - if ( $end > $start ) { - return ( $start, $end ); - } - else { - die "Chunk size is too small: $end !> $start\n"; - } -} - -sub range_time { - my ( $self, $dbh, $start, $interval, $max ) = @_; - my $sql = "SELECT SEC_TO_TIME($start), SEC_TO_TIME(LEAST($max, $start + $interval))"; - MKDEBUG && _d($sql); - return $dbh->selectrow_array($sql); -} - -sub range_date { - my ( $self, $dbh, $start, $interval, $max ) = @_; - my $sql = "SELECT FROM_DAYS($start), FROM_DAYS(LEAST($max, $start + $interval))"; - MKDEBUG && _d($sql); - return $dbh->selectrow_array($sql); -} - -sub range_datetime { - my ( $self, $dbh, $start, $interval, $max ) = @_; - my $sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $start SECOND), " - . "DATE_ADD('$self->{EPOCH}', INTERVAL LEAST($max, $start + $interval) SECOND)"; - MKDEBUG && _d($sql); - return $dbh->selectrow_array($sql); -} - -sub range_timestamp { - my ( $self, $dbh, $start, $interval, $max ) = @_; - my $sql = "SELECT FROM_UNIXTIME($start), FROM_UNIXTIME(LEAST($max, $start + $interval))"; - MKDEBUG && _d($sql); - return $dbh->selectrow_array($sql); -} - -sub timestampdiff { - my ( $self, $dbh, $time ) = @_; - my $sql = "SELECT (COALESCE(TO_DAYS('$time'), 0) * 86400 + TIME_TO_SEC('$time')) " - . "- TO_DAYS('$self->{EPOCH} 00:00:00') * 86400"; - MKDEBUG && _d($sql); - my ( $diff ) = $dbh->selectrow_array($sql); - $sql = "SELECT DATE_ADD('$self->{EPOCH}', INTERVAL $diff SECOND)"; - MKDEBUG && _d($sql); - my ( $check ) = $dbh->selectrow_array($sql); - die <<" EOF" - Incorrect datetime math: given $time, calculated $diff but checked to $check. - This could be due to a version of MySQL that overflows on large interval - values to DATE_ADD(), or the given datetime is not a valid date. If not, - please report this as a bug. - EOF - unless $check eq $time; - return $diff; -} - - - - -sub get_valid_end_points { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db_tbl col col_type); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args}; - my ($real_min, $real_max) = @args{qw(min max)}; - - my $err_fmt = "Error finding a valid %s value for table $db_tbl on " - . "column $col. The real %s value %s is invalid and " - . "no other valid values were found. Verify that the table " - . "has at least one valid value for this column" - . ($args{where} ? " where $args{where}." : "."); - - my $valid_min = $real_min; - if ( defined $valid_min ) { - MKDEBUG && _d("Validating min end point:", $real_min); - $valid_min = $self->_get_valid_end_point( - %args, - val => $real_min, - endpoint => 'min', - ); - die sprintf($err_fmt, 'minimum', 'minimum', - (defined $real_min ? $real_min : "NULL")) - unless defined $valid_min; - } - - my $valid_max = $real_max; - if ( defined $valid_max ) { - MKDEBUG && _d("Validating max end point:", $real_min); - $valid_max = $self->_get_valid_end_point( - %args, - val => $real_max, - endpoint => 'max', - ); - die sprintf($err_fmt, 'maximum', 'maximum', - (defined $real_max ? $real_max : "NULL")) - unless defined $valid_max; - } - - return $valid_min, $valid_max; -} - -sub _get_valid_end_point { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db_tbl col col_type); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args}; - my $val = $args{val}; - - return $val unless defined $val; - - my $validate = $col_type =~ m/time|date/ ? \&_validate_temporal_value - : undef; - - if ( !$validate ) { - MKDEBUG && _d("No validator for", $col_type, "values"); - return $val; - } - - return $val if defined $validate->($dbh, $val); - - MKDEBUG && _d("Value is invalid, getting first valid value"); - $val = $self->get_first_valid_value( - %args, - val => $val, - validate => $validate, - ); - - return $val; -} - -sub get_first_valid_value { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db_tbl col validate endpoint); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ($dbh, $db_tbl, $col, $validate, $endpoint) = @args{@required_args}; - my $tries = defined $args{tries} ? $args{tries} : 5; - my $val = $args{val}; - - return unless defined $val; - - my $cmp = $endpoint =~ m/min/i ? '>' - : $endpoint =~ m/max/i ? '<' - : die "Invalid endpoint arg: $endpoint"; - my $sql = "SELECT $col FROM $db_tbl " - . ($args{index_hint} ? "$args{index_hint} " : "") - . "WHERE $col $cmp ? AND $col IS NOT NULL " - . ($args{where} ? "AND ($args{where}) " : "") - . "ORDER BY $col LIMIT 1"; - MKDEBUG && _d($dbh, $sql); - my $sth = $dbh->prepare($sql); - - my $last_val = $val; - while ( $tries-- ) { - $sth->execute($last_val); - my ($next_val) = $sth->fetchrow_array(); - MKDEBUG && _d('Next value:', $next_val, '; tries left:', $tries); - if ( !defined $next_val ) { - MKDEBUG && _d('No more rows in table'); - last; - } - if ( defined $validate->($dbh, $next_val) ) { - MKDEBUG && _d('First valid value:', $next_val); - $sth->finish(); - return $next_val; - } - $last_val = $next_val; - } - $sth->finish(); - $val = undef; # no valid value found - - return $val; -} - -sub _validate_temporal_value { - my ( $dbh, $val ) = @_; - my $sql = "SELECT IF(TIME_FORMAT(?,'%H:%i:%s')=?, TIME_TO_SEC(?), TO_DAYS(?))"; - my $res; - eval { - MKDEBUG && _d($dbh, $sql, $val); - my $sth = $dbh->prepare($sql); - $sth->execute($val, $val, $val, $val); - ($res) = $sth->fetchrow_array(); - $sth->finish(); - }; - if ( $EVAL_ERROR ) { - MKDEBUG && _d($EVAL_ERROR); - } - return $res; -} - -sub get_nonzero_value { - my ( $self, %args ) = @_; - my @required_args = qw(dbh db_tbl col col_type); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - my ($dbh, $db_tbl, $col, $col_type) = @args{@required_args}; - my $tries = defined $args{tries} ? $args{tries} : 5; - my $val = $args{val}; - - my $is_nonzero = $col_type =~ m/time|date/ ? \&_validate_temporal_value - : sub { return $_[1]; }; - - if ( !$is_nonzero->($dbh, $val) ) { # quasi-double-negative, sorry - MKDEBUG && _d('Discarding zero value:', $val); - my $sql = "SELECT $col FROM $db_tbl " - . ($args{index_hint} ? "$args{index_hint} " : "") - . "WHERE $col > ? AND $col IS NOT NULL " - . ($args{where} ? "AND ($args{where}) " : '') - . "ORDER BY $col LIMIT 1"; - MKDEBUG && _d($sql); - my $sth = $dbh->prepare($sql); - - my $last_val = $val; - while ( $tries-- ) { - $sth->execute($last_val); - my ($next_val) = $sth->fetchrow_array(); - if ( $is_nonzero->($dbh, $next_val) ) { - MKDEBUG && _d('First non-zero value:', $next_val); - $sth->finish(); - return $next_val; - } - $last_val = $next_val; - } - $sth->finish(); - $val = undef; # no non-zero value found - } - - return $val; -} - -sub base_count { - my ( $self, %args ) = @_; - my @required_args = qw(count_to base symbols); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless defined $args{$arg}; - } - my ($n, $base, $symbols) = @args{@required_args}; - - return $symbols->[0] if $n == 0; - - my $highest_power = floor(log($n)/log($base)); - if ( $highest_power == 0 ){ - return $symbols->[$n]; - } - - my @base_powers; - for my $power ( 0..$highest_power ) { - push @base_powers, ($base**$power) || 1; - } - - my @base_multiples; - foreach my $base_power ( reverse @base_powers ) { - my $multiples = floor($n / $base_power); - push @base_multiples, $multiples; - $n -= $multiples * $base_power; - } - - return join('', map { $symbols->[$_] } @base_multiples); -} - -sub _d { - my ($package, undef, $line) = caller 0; - @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } - map { defined $_ ? $_ : 'undef' } - @_; - print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; -} - -1; -} -# ########################################################################### -# End TableChunker package -# ########################################################################### - -# ########################################################################### -# Quoter package -# This package is a copy without comments from the original. The original -# with comments and its test file can be found in the Bazaar repository at, -# lib/Quoter.pm -# t/lib/Quoter.t -# See https://launchpad.net/percona-toolkit for more information. -# ########################################################################### -{ -package Quoter; - -use strict; -use warnings FATAL => 'all'; -use English qw(-no_match_vars); -use constant MKDEBUG => $ENV{MKDEBUG} || 0; - -sub new { - my ( $class, %args ) = @_; - return bless {}, $class; -} - -sub quote { - my ( $self, @vals ) = @_; - foreach my $val ( @vals ) { - $val =~ s/`/``/g; - } - return join('.', map { '`' . $_ . '`' } @vals); -} - -sub quote_val { - my ( $self, $val ) = @_; - - return 'NULL' unless defined $val; # undef = NULL - return "''" if $val eq ''; # blank string = '' - return $val if $val =~ m/^0x[0-9a-fA-F]+$/; # hex data - - $val =~ s/(['\\])/\\$1/g; - return "'$val'"; -} - -sub split_unquote { - my ( $self, $db_tbl, $default_db ) = @_; - $db_tbl =~ s/`//g; - my ( $db, $tbl ) = split(/[.]/, $db_tbl); - if ( !$tbl ) { - $tbl = $db; - $db = $default_db; - } - return ($db, $tbl); -} - -sub literal_like { - my ( $self, $like ) = @_; - return unless $like; - $like =~ s/([%_])/\\$1/g; - return "'$like'"; -} - -sub join_quote { - my ( $self, $default_db, $db_tbl ) = @_; - return unless $db_tbl; - my ($db, $tbl) = split(/[.]/, $db_tbl); - if ( !$tbl ) { - $tbl = $db; - $db = $default_db; - } - $db = "`$db`" if $db && $db !~ m/^`/; - $tbl = "`$tbl`" if $tbl && $tbl !~ m/^`/; - return $db ? "$db.$tbl" : $tbl; -} - -1; -} -# ########################################################################### -# End Quoter package -# ########################################################################### - # ########################################################################### # MasterSlave package # This package is a copy without comments from the original. The original @@ -4183,6 +3127,702 @@ sub _d { # End MasterSlave package # ########################################################################### +# ########################################################################### +# RowChecksum package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/RowChecksum.pm +# t/lib/RowChecksum.t +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### +{ +package RowChecksum; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use constant MKDEBUG => $ENV{MKDEBUG} || 0; + +use List::Util qw(max); + +sub new { + my ( $class, %args ) = @_; + foreach my $arg ( qw(OptionParser Quoter) ) { + die "I need a $arg argument" unless defined $args{$arg}; + } + my $self = { %args }; + return bless $self, $class; +} + +sub make_row_checksum { + my ( $self, %args ) = @_; + my @required_args = qw(tbl); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($tbl) = @args{@required_args}; + + my $o = $self->{OptionParser}; + my $q = $self->{Quoter}; + my $tbl_struct = $tbl->{tbl_struct}; + my $func = $args{func} || uc($o->get('function')); + + my $sep = $args{sep} || '#'; + $sep =~ s/'//g; + $sep ||= '#'; + + my $ignorecols = $args{ignorecols} || {}; + + my %cols = map { lc($_) => 1 } + grep { !exists $ignorecols->{$_} } + ($args{cols} ? @{$args{cols}} : @{$tbl_struct->{cols}}); + my %seen; + my @cols = + map { + my $type = $tbl_struct->{type_for}->{$_}; + my $result = $q->quote($_); + if ( $type eq 'timestamp' ) { + $result .= ' + 0'; + } + elsif ( $args{float_precision} && $type =~ m/float|double/ ) { + $result = "ROUND($result, $args{float_precision})"; + } + elsif ( $args{trim} && $type =~ m/varchar/ ) { + $result = "TRIM($result)"; + } + $result; + } + grep { + $cols{$_} && !$seen{$_}++ + } + @{$tbl_struct->{cols}}; + + my $query; + if ( !$args{no_cols} ) { + $query = join(', ', + map { + my $col = $_; + if ( $col =~ m/\+ 0/ ) { + my ($real_col) = /^(\S+)/; + $col .= " AS $real_col"; + } + elsif ( $col =~ m/TRIM/ ) { + my ($real_col) = m/TRIM\(([^\)]+)\)/; + $col .= " AS $real_col"; + } + $col; + } @cols) + . ', '; + } + + if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) { + my @nulls = grep { $cols{$_} } @{$tbl_struct->{null_cols}}; + if ( @nulls ) { + my $bitmap = "CONCAT(" + . join(', ', map { 'ISNULL(' . $q->quote($_) . ')' } @nulls) + . ")"; + push @cols, $bitmap; + } + + $query .= @cols > 1 + ? "$func(CONCAT_WS('$sep', " . join(', ', @cols) . '))' + : "$func($cols[0])"; + } + else { + my $fnv_func = uc $func; + $query .= "$fnv_func(" . join(', ', @cols) . ')'; + } + + MKDEBUG && _d('Row checksum:', $query); + return $query; +} + +sub make_chunk_checksum { + my ( $self, %args ) = @_; + my @required_args = qw(tbl); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + if ( !$args{dbh} && !($args{func} && $args{crc_width} && $args{crc_type}) ) { + die "I need a dbh argument" + } + + my ($tbl) = @args{@required_args}; + my $o = $self->{OptionParser}; + my $q = $self->{Quoter}; + + my $func = $args{func} || $self->_get_hash_func(%args); + my $crc_width = $args{crc_width}|| $self->_get_crc_width(%args, func=>$func); + my $crc_type = $args{crc_type} || $self->_get_crc_type(%args, func=>$func); + my $opt_slice; + if ( $o->get('optimize-xor') ) { + if ( $crc_type !~ m/int$/ ) { + $opt_slice = $self->_optimize_xor(%args, func => $func); + warn "Cannot use --optimize-xor" unless defined $opt_slice; + } + } + MKDEBUG && _d("Checksum strat:", $func, $crc_width, $crc_type, $opt_slice); + + my $row_checksum = $self->make_row_checksum( + %args, + func => $func, + no_cols => 1 + ); + my $crc; + if ( $crc_type =~ m/int$/ ) { + $crc = "COALESCE(LOWER(CONV(BIT_XOR(CAST($row_checksum AS UNSIGNED)), " + . "10, 16)), 0)"; + } + else { + my $slices = $self->_make_xor_slices( + row_checksum => $row_checksum, + crc_width => $crc_width + ); + $crc = "COALESCE(LOWER(CONCAT($slices)), 0)"; + } + + my $select = "COUNT(*) AS cnt, $crc AS crc"; + MKDEBUG && _d('Chunk checksum:', $select); + return $select; +} + +sub _get_hash_func { + my ( $self, %args ) = @_; + my @required_args = qw(dbh); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($dbh) = @args{@required_args}; + my $o = $self->{OptionParser}; + my @funcs = qw(CRC32 FNV1A_64 FNV_64 MD5 SHA1); + + if ( my $func = $o->get('function') ) { + unshift @funcs, $func; + } + + my ($result, $error); + foreach my $func ( @funcs ) { + eval { + my $sql = "SELECT $func('test-string')"; + MKDEBUG && _d($sql); + $args{dbh}->do($sql); + }; + if ( $EVAL_ERROR && $EVAL_ERROR =~ m/failed: (.*?) at \S+ line/ ) { + $error .= qq{$func cannot be used because "$1"\n}; + MKDEBUG && _d($func, 'cannot be used because', $1); + } + MKDEBUG && _d('Chosen hash func:', $result); + return $func; + } + die $error || 'No hash functions (CRC32, MD5, etc.) are available'; +} + +sub _get_crc_width { + my ( $self, %args ) = @_; + my @required_args = qw(dbh func); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($dbh, $func) = @args{@required_args}; + + my $crc_width = 16; + if ( uc $func ne 'FNV_64' && uc $func ne 'FNV1A_64' ) { + eval { + my ($val) = $dbh->selectrow_array("SELECT $func('a')"); + $crc_width = max(16, length($val)); + }; + } + return $crc_width; +} + +sub _get_crc_type { + my ( $self, %args ) = @_; + my @required_args = qw(dbh func); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($dbh, $func) = @args{@required_args}; + + my $type = ''; + my $length = 0; + my $sql = "SELECT $func('a')"; + my $sth = $dbh->prepare($sql); + eval { + $sth->execute(); + $type = $sth->{mysql_type_name}->[0]; + $length = $sth->{mysql_length}->[0]; + MKDEBUG && _d($sql, $type, $length); + if ( $type eq 'bigint' && $length < 20 ) { + $type = 'int'; + } + }; + $sth->finish; + MKDEBUG && _d('crc_type:', $type, 'length:', $length); + return $type; +} + +sub _optimize_xor { + my ( $self, %args ) = @_; + my @required_args = qw(dbh func); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($dbh, $func) = @args{@required_args}; + + die "$func never needs BIT_XOR optimization" + if $func =~ m/^(?:FNV1A_64|FNV_64|CRC32)$/i; + + my $opt_slice = 0; + my $unsliced = uc $dbh->selectall_arrayref("SELECT $func('a')")->[0]->[0]; + my $sliced = ''; + my $start = 1; + my $crc_width = length($unsliced) < 16 ? 16 : length($unsliced); + + do { # Try different positions till sliced result equals non-sliced. + MKDEBUG && _d('Trying slice', $opt_slice); + $dbh->do('SET @crc := "", @cnt := 0'); + my $slices = $self->_make_xor_slices( + row_checksum => "\@crc := $func('a')", + crc_width => $crc_width, + opt_slice => $opt_slice, + ); + + my $sql = "SELECT CONCAT($slices) AS TEST FROM (SELECT NULL) AS x"; + $sliced = ($dbh->selectrow_array($sql))[0]; + if ( $sliced ne $unsliced ) { + MKDEBUG && _d('Slice', $opt_slice, 'does not work'); + $start += 16; + ++$opt_slice; + } + } while ( $start < $crc_width && $sliced ne $unsliced ); + + if ( $sliced eq $unsliced ) { + MKDEBUG && _d('Slice', $opt_slice, 'works'); + return $opt_slice; + } + else { + MKDEBUG && _d('No slice works'); + return undef; + } +} + +sub _make_xor_slices { + my ( $self, %args ) = @_; + my @required_args = qw(row_checksum crc_width); + foreach my $arg( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($row_checksum, $crc_width) = @args{@required_args}; + my ($opt_slice) = $args{opt_slice}; + + my @slices; + for ( my $start = 1; $start <= $crc_width; $start += 16 ) { + my $len = $crc_width - $start + 1; + if ( $len > 16 ) { + $len = 16; + } + push @slices, + "LPAD(CONV(BIT_XOR(" + . "CAST(CONV(SUBSTRING(\@crc, $start, $len), 16, 10) AS UNSIGNED))" + . ", 10, 16), $len, '0')"; + } + + if ( defined $opt_slice && $opt_slice < @slices ) { + $slices[$opt_slice] =~ s/\@crc/\@crc := $row_checksum/; + } + else { + map { s/\@crc/$row_checksum/ } @slices; + } + + return join(', ', @slices); +} + +sub find_replication_differences { + my ( $self, $dbh, $table ) = @_; + + (my $sql = <<" EOF") =~ s/\s+/ /gm; + SELECT db, tbl, chunk, boundaries, + COALESCE(this_cnt-master_cnt, 0) AS cnt_diff, + COALESCE( + this_crc <> master_crc OR ISNULL(master_crc) <> ISNULL(this_crc), + 0 + ) AS crc_diff, + this_cnt, master_cnt, this_crc, master_crc + FROM $table + WHERE master_cnt <> this_cnt OR master_crc <> this_crc + OR ISNULL(master_crc) <> ISNULL(this_crc) + EOF + + MKDEBUG && _d($sql); + my $diffs = $dbh->selectall_arrayref($sql, { Slice => {} }); + return @$diffs; +} + +sub _d { + my ($package, undef, $line) = caller 0; + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } + map { defined $_ ? $_ : 'undef' } + @_; + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; +} + +1; +} +# ########################################################################### +# End RowChecksum package +# ########################################################################### + +# ########################################################################### +# NibbleIterator package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/NibbleIterator.pm +# t/lib/NibbleIterator.t +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### +{ +package NibbleIterator; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use constant MKDEBUG => $ENV{MKDEBUG} || 0; + +use Data::Dumper; +$Data::Dumper::Indent = 1; +$Data::Dumper::Sortkeys = 1; +$Data::Dumper::Quotekeys = 0; + +sub new { + my ( $class, %args ) = @_; + my @required_args = qw(dbh tbl OptionParser Quoter TableNibbler TableParser); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + my ($dbh, $tbl, $o, $q) = @args{@required_args}; + + my $index = $args{TableParser}->find_best_index( + $tbl->{tbl_struct}, + $o->get('chunk-index'), + ); + die "No index to nibble table $tbl->{db}.$tbl->{tbl}" unless $index; + my $index_cols = $tbl->{tbl_struct}->{keys}->{$index}->{cols}; + + my $asc = $args{TableNibbler}->generate_asc_stmt( + %args, + tbl_struct => $tbl->{tbl_struct}, + index => $index, + asc_only => 1, + ); + + my $from = $q->quote(@{$tbl}{qw(db tbl)}) . " FORCE INDEX(`$index`)"; + my $order_by = join(', ', map {$q->quote($_)} @{$index_cols}); + + my $first_lb_sql + = "SELECT /*!40001 SQL_NO_CACHE */ " + . join(', ', map { $q->quote($_) } @{$index_cols}) + . " FROM $from " + . ($args{where} ? " WHERE $args{where}" : '') + . " ORDER BY $order_by " + . " LIMIT 1" + . " /*first lower boundary*/"; + MKDEBUG && _d('First lower boundary statement:', $first_lb_sql); + + my $last_ub_sql + = "SELECT /*!40001 SQL_NO_CACHE */ " + . join(', ', map { $q->quote($_) } @{$index_cols}) + . " FROM $from " + . ($args{where} ? " WHERE $args{where}" : '') + . " ORDER BY $order_by DESC " + . " LIMIT 1" + . " /*last upper boundary*/"; + MKDEBUG && _d('Last upper boundary statement:', $last_ub_sql); + + my $ub_sql + = "SELECT /*!40001 SQL_NO_CACHE */ " + . join(', ', map { $q->quote($_) } @{$index_cols}) + . " FROM $from " + . " WHERE " . $asc->{boundaries}->{'>='} # lower boundary + . ($args{where} ? " AND ($args{where})" : '') + . " ORDER BY $order_by " + . " LIMIT 2 OFFSET " . (($o->get('chunk-size') || 1) - 1) + . " /*upper boundary*/"; + MKDEBUG && _d('Next upper boundary statement:', $ub_sql); + + my $nibble_sql + = ($args{dms} ? "$args{dms} " : "SELECT ") + . ($args{select} ? $args{select} + : join(', ', map { $q->quote($_) } @{$asc->{cols}})) + . " FROM $from " + . " WHERE " . $asc->{boundaries}->{'>='} # lower boundary + . " AND " . $asc->{boundaries}->{'<='} # upper boundary + . ($args{where} ? " AND ($args{where})" : '') + . " ORDER BY $order_by" + . " /*nibble*/"; + MKDEBUG && _d('Nibble statement:', $nibble_sql); + + my $explain_nibble_sql + = "EXPLAIN SELECT " + . ($args{select} ? $args{select} + : join(', ', map { $q->quote($_) } @{$asc->{cols}})) + . " FROM $from " + . " WHERE " . $asc->{boundaries}->{'>='} # lower boundary + . " AND " . $asc->{boundaries}->{'<='} # upper boundary + . ($args{where} ? " AND ($args{where})" : '') + . " ORDER BY $order_by" + . " /*explain nibble*/"; + MKDEBUG && _d('Explain nibble statement:', $explain_nibble_sql); + + my $one_nibble_sql + = ($args{dms} ? "$args{dms} " : "SELECT ") + . ($args{select} ? $args{select} + : join(', ', map { $q->quote($_) } @{$asc->{cols}})) + . " FROM $from " + . ($args{where} ? " AND ($args{where})" : '') + . " ORDER BY $order_by" + . " /*one nibble*/"; + MKDEBUG && _d('One nibble statement:', $one_nibble_sql); + + my $explain_one_nibble_sql + = "EXPLAIN SELECT " + . ($args{select} ? $args{select} + : join(', ', map { $q->quote($_) } @{$asc->{cols}})) + . " FROM $from " + . ($args{where} ? " AND ($args{where})" : '') + . " ORDER BY $order_by" + . " /*explain one nibble*/"; + MKDEBUG && _d('Explain one nibble statement:', $explain_one_nibble_sql); + + my $self = { + %args, + index => $index, + first_lb_sql => $first_lb_sql, + last_ub_sql => $last_ub_sql, + ub_sql => $ub_sql, + nibble_sql => $nibble_sql, + explain_nibble_sql => $explain_nibble_sql, + one_nibble_sql => $one_nibble_sql, + explain_one_nibble_sql => $explain_one_nibble_sql, + nibbleno => 0, + have_rows => 0, + rowno => 0, + }; + + return bless $self, $class; +} + +sub next { + my ($self) = @_; + + if ($self->{nibbleno} == 0) { + $self->_can_nibble_once(); + $self->_prepare_sths(); + $self->_get_bounds(); + if ( my $callback = $self->{callbacks}->{init} ) { + $callback->(); + } + } + + BOUNDARY: + while ( $self->{have_rows} || $self->_next_boundaries() ) { + if ( !$self->{have_rows} ) { + $self->{nibbleno}++; + MKDEBUG && _d($self->{nibble_sth}->{Statement}, 'params:', + join(', ', (@{$self->{lb}}, @{$self->{ub}}))); + if ( my $callback = $self->{callbacks}->{exec_nibble} ) { + $self->{have_rows} = $callback->( + dbh => $self->{dbh}, + tbl => $self->{tbl}, + sth => $self->{nibble_sth}, + lb => $self->{lb}, + ub => $self->{ub}, + nibbleno => $self->{nibbleno}, + explain_sth => $self->{explain_sth}, + ); + } + else { + $self->{nibble_sth}->execute(@{$self->{lb}}, @{$self->{ub}}); + $self->{have_rows} = $self->{nibble_sth}->rows(); + } + } + + if ( $self->{have_rows} ) { + MKDEBUG && _d($self->{have_rows}, 'rows in nibble', $self->{nibbleno}); + my $row = $self->{nibble_sth}->fetchrow_arrayref(); + if ( $row ) { + $self->{rowno}++; + MKDEBUG && _d('Row', $self->{rowno}, 'in nibble',$self->{nibbleno}); + return [ @$row ]; + } + } + + MKDEBUG && _d('No rows in nibble or nibble skipped'); + if ( my $callback = $self->{callbacks}->{after_nibble} ) { + $callback->( + dbh => $self->{dbh}, + tbl => $self->{tbl}, + nibbleno => $self->{nibbleno}, + explain_sth => $self->{explain_sth}, + ); + } + $self->{rowno} = 0; + $self->{have_rows} = 0; + } + + MKDEBUG && _d('Done nibbling'); + if ( my $callback = $self->{callbacks}->{done} ) { + $callback->( + dbh => $self->{dbh}, + tbl => $self->{tbl}, + ); + } + return; +} + +sub nibble_number { + my ($self) = @_; + return $self->{nibbleno}; +} + +sub _can_nibble_once { + my ($self) = @_; + my ($dbh, $tbl, $q) = @{$self}{qw(dbh tbl Quoter)}; + my $table_status; + eval { + my $sql = "SHOW TABLE STATUS FROM " . $q->quote($tbl->{db}) + . " LIKE " . $q->literal_like($tbl->{tbl}); + MKDEBUG && _d($sql); + $table_status = $dbh->selectrow_hashref($sql); + MKDEBUG && _d('Table status:', Dumper($table_status)); + }; + if ( $EVAL_ERROR ) { + warn $EVAL_ERROR; + return 0; + } + my $n_rows = defined $table_status->{Rows} ? $table_status->{Rows} + : defined $table_status->{rows} ? $table_status->{rows} + : 0; + my $chunk_size = $self->{OptionParser}->get('chunk-size') || 1; + $self->{one_nibble} = $n_rows <= $chunk_size ? 1 : 0; + MKDEBUG && _d('One nibble:', $self->{one_nibble} ? 'yes' : 'no'); + return $self->{one_nibble}; +} + +sub _prepare_sths { + my ($self) = @_; + MKDEBUG && _d('Preparing statement handles'); + if ( $self->{one_nibble} ) { + $self->{nibble_sth} = $self->{dbh}->prepare($self->{one_nibble_sql}); + $self->{explain_sth} = $self->{dbh}->prepare($self->{explain_one_nibble_sql}); + } + else { + $self->{ub_sth} = $self->{dbh}->prepare($self->{ub_sql}); + $self->{nibble_sth} = $self->{dbh}->prepare($self->{nibble_sql}); + $self->{explain_sth} = $self->{dbh}->prepare($self->{explain_nibble_sql}); + } +} + +sub _get_bounds { + my ($self) = @_; + return if $self->{one_nibble}; + + $self->{next_lb} = $self->{dbh}->selectrow_arrayref($self->{first_lb_sql}); + MKDEBUG && _d('First lower boundary:', Dumper($self->{next_lb})); + + $self->{last_ub} = $self->{dbh}->selectrow_arrayref($self->{last_ub_sql}); + MKDEBUG && _d('Last upper boundary:', Dumper($self->{last_ub})); + + return; +} + +sub _check_index_usage { + my ($self) = @_; + my ($dbh, $tbl, $q) = @{$self}{qw(dbh tbl Quoter)}; + + my $explain; + eval { + $explain = $dbh->selectall_arrayref("", {Slice => {}}); + }; + if ( $EVAL_ERROR ) { + warn "Cannot check if MySQL is using the chunk index: $EVAL_ERROR"; + return; + } + my $explain_index = lc($explain->[0]->{key} || ''); + MKDEBUG && _d('EXPLAIN index:', $explain_index); + if ( $explain_index ne $self->{index} ) { + die "Cannot nibble table $tbl->{db}.$tbl->{tbl} because MySQL chose " + . ($explain_index ? "the `$explain_index`" : 'no') . ' index' + . " instead of the chunk index `$self->{asc}->{index}`"; + } + + return; +} + +sub _next_boundaries { + my ($self) = @_; + + if ( $self->{no_more_boundaries} ) { + MKDEBUG && _d('No more boundaries'); + return; + } + + if ( $self->{one_nibble} ) { + $self->{lb} = $self->{ub} = []; + $self->{no_more_boundaries} = 1; # for next call + return 1; + } + + $self->{lb} = $self->{next_lb}; + + MKDEBUG && _d($self->{ub_sth}->{Statement}, 'params:', + join(', ', @{$self->{lb}})); + $self->{ub_sth}->execute(@{$self->{lb}}); + my $boundary = $self->{ub_sth}->fetchall_arrayref(); + MKDEBUG && _d('Next boundary:', Dumper($boundary)); + if ( $boundary && @$boundary ) { + $self->{ub} = $boundary->[0]; # this nibble + if ( $boundary->[1] ) { + $self->{next_lb} = $boundary->[1]; # next nibble + } + else { + $self->{no_more_boundaries} = 1; # for next call + MKDEBUG && _d('Last upper boundary:', Dumper($boundary->[0])); + } + } + else { + $self->{no_more_boundaries} = 1; # for next call + $self->{ub} = $self->{last_ub}; + MKDEBUG && _d('Last upper boundary:', Dumper($self->{ub})); + } + $self->{ub_sth}->finish(); + + return 1; # have boundary +} + +sub DESTROY { + my ( $self ) = @_; + foreach my $key ( keys %$self ) { + if ( $key =~ m/_sth$/ ) { + $self->{$key}->finish(); + } + } + return; +} + +sub _d { + my ($package, undef, $line) = caller 0; + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } + map { defined $_ ? $_ : 'undef' } + @_; + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; +} + +1; +} +# ########################################################################### +# End NibbleIterator package +# ########################################################################### + # ########################################################################### # Daemon package # This package is a copy without comments from the original. The original @@ -4370,12 +4010,16 @@ sub _d { # ########################################################################### # ########################################################################### -# SchemaIterator r7512 -# Don't update this package! +# SchemaIterator package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/SchemaIterator.pm +# t/lib/SchemaIterator.t +# See https://launchpad.net/percona-toolkit for more information. # ########################################################################### +{ package SchemaIterator; -{ # package scope use strict; use warnings FATAL => 'all'; use English qw(-no_match_vars); @@ -4472,16 +4116,29 @@ sub _make_filters { sub next_schema_object { my ( $self ) = @_; - my %schema_object; + my $schema_obj; if ( $self->{file_itr} ) { - %schema_object = $self->_iterate_files(); + $schema_obj= $self->_iterate_files(); } else { # dbh - %schema_object = $self->_iterate_dbh(); + $schema_obj= $self->_iterate_dbh(); } - MKDEBUG && _d('Next schema object:', Dumper(\%schema_object)); - return %schema_object; + if ( $schema_obj ) { + if ( $schema_obj->{ddl} && $self->{TableParser} ) { + $schema_obj->{tbl_struct} + = $self->{TableParser}->parse($schema_obj->{ddl}); + } + + delete $schema_obj->{ddl} unless $self->{keep_ddl}; + + if ( my $schema = $self->{Schema} ) { + $schema->add_schema_object($schema_obj); + } + MKDEBUG && _d('Next schema object:', $schema_obj->{db}, $schema_obj->{tbl}); + } + + return $schema_obj; } sub _iterate_files { @@ -4530,11 +4187,11 @@ sub _iterate_files { my ($engine) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/; if ( !$engine || $self->engine_is_allowed($engine) ) { - return ( + return { db => $self->{db}, tbl => $tbl, ddl => $ddl, - ); + }; } } } @@ -4602,11 +4259,11 @@ sub _iterate_dbh { $ddl = $du->get_create_table($dbh, $q, $self->{db}, $tbl)->[1]; } - return ( + return { db => $self->{db}, tbl => $tbl, ddl => $ddl, - ); + }; } } @@ -4733,9 +4390,8 @@ sub _d { print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; } -} # package scope 1; - +} # ########################################################################### # End SchemaIterator package # ########################################################################### @@ -4975,42 +4631,20 @@ sub _d { # ########################################################################### package pt_table_checksum; +use strict; +use warnings FATAL => 'all'; use English qw(-no_match_vars); -use List::Util qw(max maxstr); -use Time::HiRes qw(gettimeofday sleep); -use Data::Dumper; -$Data::Dumper::Indent = 0; -$Data::Dumper::Quotekeys = 0; - use constant MKDEBUG => $ENV{MKDEBUG} || 0; -$OUTPUT_AUTOFLUSH = 1; - -# Global variables. -my $checksum_table_data; -my ( $fetch_sth, $update_sth, $savesince_sth ); -my ( $crc_wid, $md5sum_fmt ); -my $already_checksummed; -# %tables_to_checksum has the following structure: -# database => [ -# { table }, -# ... -# ], -# ... -my %tables_to_checksum; +use Time::HiRes qw(sleep); +use Data::Dumper; +$Data::Dumper::Indent = 1; +$Data::Dumper::Sortkeys = 1; +$Data::Dumper::Quotekeys = 0; sub main { @ARGV = @_; # set global ARGV for this package - # Reset global vars else tests which run this tool as a module - # will have strange, overlapping results. - $checksum_table_data = undef; - ( $fetch_sth, $update_sth, $savesince_sth ) = (undef, undef, undef); - ( $crc_wid, $md5sum_fmt ) = (undef, undef); - $already_checksummed = undef; - %tables_to_checksum = (); - - my $q = new Quoter(); my $exit_status = 0; # ######################################################################## @@ -5025,129 +4659,17 @@ sub main { my $dp = $o->DSNParser(); $dp->prop('set-vars', $o->get('set-vars')); - # This list contains all the command-line arguments that can be overridden - # by a table that contains arguments for each table to be checksummed. - # The long form of each argument is given. The values are read from the - # POD by finding the magical token. - my %overridable_args; - { - my $para = $o->read_para_after( - __FILE__, qr/MAGIC_overridable_args/); - foreach my $arg ( $para =~ m/([\w-]+)/g ) { - die "Magical argument $arg mentioned in POD is not a " - . "command-line argument" unless $o->has($arg); - $overridable_args{$arg} = 1; - } - }; - - # Post-process command-line options and arguments. - if ( $o->get('replicate') ) { - # --replicate says that it disables these options. We don't - # check got() because these opts aren't used in do_tbl_replicate() - # or its caller so they're completely useless with --replicate. - $o->set('lock', undef); - $o->set('wait', undef); - $o->set('slave-lag', undef); - } - else { - $o->set('lock', 1) if $o->get('wait'); - $o->set('slave-lag', 1) if $o->get('lock'); - } - - if ( !@ARGV ) { - $o->save_error("No hosts specified."); - } - - my @hosts; - my $dsn_defaults = $dp->parse_options($o); - { - foreach my $arg ( unique(@ARGV) ) { - push @hosts, $dp->parse($arg, $hosts[0], $dsn_defaults); - } - } - - if ( $o->get('explain-hosts') ) { - foreach my $host ( @hosts ) { - print "Server $host->{h}:\n ", $dp->as_string($host), "\n"; - } - return 0; - } - - # Checksumming table data is the normal operation. But if we're only to - # compare schemas, then we can skip a lot of work, like selecting an algo, - # replication stuff, etc. - $checksum_table_data = $o->get('schema') ? 0 : 1; - - if ( $o->get('checksum') ) { - $o->set('count', 0); - } - - if ( $o->get('explain') ) { - @hosts = $hosts[0]; - } - - # --replicate auto-enables --throttle-method slavelag unless user - # set --throttle-method explicitly. - $o->set('throttle-method', 'slavelag') - if $o->get('replicate') && !$o->got('throttle-method'); - - # These options are only needed if a --chunk-size is specified. - if ( !$o->get('chunk-size') ) { - $o->set('chunk-size-limit', undef); - $o->set('unchunkable-tables', 1); - } - if ( !$o->get('help') ) { - if ( $o->get('replicate-check') && !$o->get('replicate') ) { - $o->save_error("--replicate-check requires --replicate."); - } - if ( $o->get('save-since') && !$o->get('arg-table') ) { - $o->save_error("--save-since requires --arg-table."); - } - elsif ( $o->get('replicate') && @hosts > 1 ) { - $o->save_error("You can only specify one host with --replicate."); + if ( !@ARGV ) { + $o->save_error("No host specified"); } - if ( $o->get('resume-replicate') && !$o->get('replicate') ) { - $o->save_error("--resume-replicate requires --replicate."); - } - if ( $o->get('resume') && $o->get('replicate') ) { - $o->save_error('--resume does not work with --replicate. ' - . 'Use --resume-replicate instead.'); + if ( ($o->get('replicate') || '') !~ m/[\w`]\.[\w`]/ ) { + $o->save_error('The --replicate table must be database-qualified'); } - if ( my $throttle_method = $o->get('throttle-method') ) { - $throttle_method = lc $throttle_method; - if ( !grep { $throttle_method eq $_ } qw(none slavelag) ) { - $o->save_error("Invalid --throttle-method: $throttle_method"); - } - } - - if ( $o->get('check-slave-lag') && $o->get('throttle-method') eq 'none') { - # User specified --check-slave-lag DSN and --throttle-method none. - # They probably meant just --check-slave-lag DSN. - $o->save_error('-throttle-method=none contradicts --check-slave-lag ' - . 'because --check-slave-lag implies --throttle-method=slavelag'); - } - if ( $o->get('throttle-method') ne 'none' && !$o->get('replicate') ) { - # User did --throttle-method (explicitly) without --replicate. - $o->save_error('--throttle-method ', $o->get('throttle-method'), - ' requires --replicate'); - } - - # Make sure --replicate has a db. - if ( my $replicate_table = $o->get('replicate') ) { - my ($db, $tbl) = $q->split_unquote($replicate_table); - if ( !$db ) { - $o->save_error('The --replicate table must be database-qualified'); - } - } - - if ( $o->get('chunk-size-limit') ) { - my $factor = $o->get('chunk-size-limit'); - if ( $factor < 0 # can't be negative - || ($factor > 0 && $factor < 1) ) # can't be less than 1 - { + if ( my $limit = $o->get('chunk-size-limit') ) { + if ( $limit < 0 || ($limit > 0 && $limit < 1) ) { $o->save_error('--chunk-size-limit must be >= 1 or 0 to disable'); } } @@ -5159,16 +4681,6 @@ sub main { $o->save_error("--progress $EVAL_ERROR"); } } - - if ( my $chunk_range = $o->get('chunk-range') ) { - $chunk_range = lc $chunk_range; - my $para = $o->read_para_after(__FILE__, qr/MAGIC_chunk_range/); - my @vals = $para =~ m/\s+([a-z]+)\s+[A-Z]+/g; - if ( !grep { $chunk_range eq $_} @vals ) { - $o->save_error("Invalid value for --chunk-range. " - . "Valid values are: " . join(", ", @vals)); - } - } } $o->usage_or_errors(); @@ -5186,27 +4698,77 @@ sub main { } # ######################################################################## - # Ready to work now. + # Connect to MySQL. # ######################################################################## - my $vp = new VersionParser(); - my $tp = new TableParser(Quoter => $q); - my $tc = new TableChecksum(Quoter=> $q, VersionParser => $vp); - my $ms = new MasterSlave(VersionParser => $vp); - my $du = new MySQLDump(); - my $ch = new TableChunker(Quoter => $q, MySQLDump => $du); - my %common_modules = ( - ch => $ch, - dp => $dp, - du => $du, - o => $o, - ms => $ms, - q => $q, - tc => $tc, - tp => $tp, - vp => $vp, + my $dsn_defaults = $dp->parse_options($o); + my $dsn = $dp->parse(shift @ARGV, undef, $dsn_defaults); + my $dbh = get_cxn( + dsn => $dsn, + DSNParser => $dp, + OptionParser => $o, ); - my $main_dbh = get_cxn($hosts[0], %common_modules); + # ######################################################################## + # Create common modules. + # ######################################################################## + my $q = new Quoter(); + my $vp = new VersionParser(); + my $tp = new TableParser(Quoter => $q); + my $tn = new TableNibbler(TableParser => $tp, Quoter => $q); + my $rc = new RowChecksum(Quoter=> $q, OptionParser => $o); + my $ms = new MasterSlave(VersionParser => $vp); + my $du = new MySQLDump(); + my $rr = new Retry(); + my %common_modules = ( + DSNParser => $dp, + MySQLDump => $du, + OptionParser => $o, + MasterSlave => $ms, + Quoter => $q, + RowChecksum => $rc, + TableParser => $tp, + TableNibbler => $tn, + VersionParser => $vp, + Retry => $rr, + Quoter => $q, + ); + + # ######################################################################## + # Check that the replication table exists, or possibly create it. + # ######################################################################## + my $repl_table = $q->quote($q->split_unquote($o->get('replicate'))); + eval { + check_repl_table( + dbh => $dbh, + repl_table => $repl_table, + %common_modules, + ); + }; + if ($EVAL_ERROR) { + $dbh->disconnect(); + die $EVAL_ERROR; + } + + # ######################################################################## + # Set transaction isolation level. + # http://code.google.com/p/maatkit/issues/detail?id=720 + # ######################################################################## + my $sql = "SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ"; + eval { + MKDEBUG && _d($dbh, $sql); + $dbh->do($sql); + }; + if ( $EVAL_ERROR ) { + $dbh->disconnect(); + die "Failed to $sql: $EVAL_ERROR\n" + . "If the --replicate table is InnoDB and the default server " + . "transaction isolation level is not REPEATABLE-READ then " + . "checksumming may fail with errors like \"Binary logging not " + . "possible. Message: Transaction level 'READ-COMMITTED' in " + . "InnoDB is not safe for binlog mode 'STATEMENT'\". In that " + . "case you will need to manually set the transaction isolation " + . "level to REPEATABLE-READ."; + } # ######################################################################### # Prepare --throttle-method. @@ -5218,14 +4780,14 @@ sub main { MKDEBUG && _d('Using --check-slave-lag DSN for throttle'); # OptionParser can't auto-copy DSN vals from a cmd line DSN # to an opt DSN, so we copy them manually. - my $dsn = $dp->copy($hosts[0], $o->get('check-slave-lag')); + my $dsn = $dp->copy($dsn, $o->get('check-slave-lag')); push @slaves, { dsn=>$dsn, dbh=>get_cxn($dsn, %common_modules) }; } else { MKDEBUG && _d('Recursing to slaves for throttle'); $ms->recurse_to_slaves( - { dbh => $main_dbh, - dsn => $hosts[0], + { dbh => $dbh, + dsn => $dsn, dsn_parser => $dp, recurse => $o->get('recurse'), method => $o->get('recursion-method'), @@ -5243,46 +4805,15 @@ sub main { } } - # ######################################################################## - # Load --arg-table information. - # ######################################################################## - my %args_for; - if ( my $arg_tbl = $o->get('arg-table') ) { - my %col_in_argtable; - my $rows = $main_dbh->selectall_arrayref( - "SELECT * FROM $arg_tbl", { Slice => {} }); - foreach my $row ( @$rows ) { - die "Invalid entry in --arg-table: db and tbl must be set" - unless $row->{db} && $row->{tbl}; - $args_for{$row->{db}}->{$row->{tbl}} = { - map { $_ => $row->{$_} } - grep { $overridable_args{$_} && defined $row->{$_} } - keys %$row - }; - if ( !%col_in_argtable ) { # do only once - foreach my $key ( keys %$row ) { - next if $key =~ m/^(db|tbl|ts)$/; - die "Column $key (from $arg_tbl given by --arg-table) is not " - . "an overridable argument" unless $overridable_args{$key}; - $col_in_argtable{$key} = 1; - } - } - } - if ( $col_in_argtable{since} ) { - $savesince_sth = $main_dbh->prepare( - "UPDATE $arg_tbl SET since=COALESCE(?, NOW()) WHERE db=? AND tbl=?"); - } - } - # ######################################################################## # Check for replication filters. # ######################################################################## - if ( $o->get('replicate') && $o->get('check-replication-filters') ) { + if ( $o->get('check-replication-filters') ) { MKDEBUG && _d("Recursing to slaves to check for replication filters"); my @all_repl_filters; $ms->recurse_to_slaves( - { dbh => $main_dbh, - dsn => $hosts[0], + { dbh => $dbh, + dsn => $dsn, dsn_parser => $dp, recurse => undef, # check for filters anywhere method => $o->get('recursion-method'), @@ -5330,14 +4861,14 @@ sub main { ? \&save_inconsistent_tbls : \&print_inconsistent_tbls; $ms->recurse_to_slaves( - { dbh => $main_dbh, - dsn => $hosts[0], + { dbh => $dbh, + dsn => $dsn, dsn_parser => $dp, recurse => $o->get('replicate-check'), method => $o->get('recursion-method'), callback => sub { my ( $dsn, $dbh, $level, $parent ) = @_; - my @tbls = $tc->find_replication_differences( + my @tbls = $rc->find_replication_differences( $dbh, $o->get('replicate')); return unless @tbls; $exit_status = 1; @@ -5345,12 +4876,11 @@ sub main { # the inconsistent tables. # o dbh db tbl args_for $callback->( - dsn => $dsn, - dbh => $dbh, - level => $level, - parent => $parent, - tbls => \@tbls, - args_for => \%args_for, + dsn => $dsn, + dbh => $dbh, + level => $level, + parent => $parent, + tbls => \@tbls, %common_modules ); }, @@ -5360,935 +4890,232 @@ sub main { } # ######################################################################## - # Otherwise get ready to checksum table data, unless we have only to check - # schemas in which case we can skip all such work, knowing already that we - # will use CRC32. + # Checksum query statementn and sths to update the checksum table. # ######################################################################## - if ( $checksum_table_data ) { - # Verify that CONCAT_WS is compatible across all servers. On older - # versions of MySQL it skips both empty strings and NULL; on newer - # just NULL. - if ( $o->get('verify') && @hosts > 1 ) { - verify_checksum_compat(hosts=>\@hosts, %common_modules); - } + my $checksum_dms = "REPLACE INTO $repl_table " + . "(db, tbl, chunk, boundaries, this_cnt, this_crc) " + . "SELECT ?, ?, ?, ?,"; + my $fetch_sth = $dbh->prepare( + "SELECT this_crc, this_cnt FROM $repl_table " + . "WHERE db = ? AND tbl = ? AND chunk = ?"); + my $update_sth = $dbh->prepare( + "UPDATE $repl_table SET master_crc = ?, master_cnt = ? " + . "WHERE db = ? AND tbl = ? AND chunk = ?"); - ($fetch_sth, $update_sth) - = check_repl_table(dbh=>$main_dbh, %common_modules); - } - else { - $crc_wid = 16; # Wider than the widest CRC32. - } # ######################################################################## - # If resuming a previous run, figure out what the previous run finished. - # ######################################################################## - if ( $o->get('replicate') && $o->get('resume-replicate') ) { - $already_checksummed = read_repl_table( - dbh => $main_dbh, - host => $hosts[0]->{h}, - %common_modules, - ); - } - elsif ( $o->get('resume') ) { - $already_checksummed = parse_resume_file($o->get('resume')); - } + # Callbacks for the nibble iterator. + # ######################################################################## + my $callbacks = { + exec_nibble => sub { + my (%args) = @_; + my $tbl = $args{tbl}; + # First, check if the chunk is too large. + if ( $o->get('chunk-size-limit') + && is_oversize_chunk(%args, %common_modules) ) { + $tbl->{checksum_results}->{oversize_chunks}++; + $tbl->{checksum_results}->{exit_status} |= 1; + return 0; # next boundary + } + # Exec and time the chunk checksum query. If it fails, retry. + return exec_nibble( + %args, + %common_modules, + ); + }, + after_nibble => sub { + my (%args) = @_; + return after_nibble(%args); + }, + done => sub { + my (%args) = @_; + return print_checksum_results(%args); + }, + }; # ######################################################################## - # Set transaction isolation level. - # http://code.google.com/p/maatkit/issues/detail?id=720 + # Checksum each table. # ######################################################################## - if ( $o->get('replicate') ) { - my $sql = "SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ"; - eval { - MKDEBUG && _d($main_dbh, $sql); - $main_dbh->do($sql); - }; - if ( $EVAL_ERROR ) { - die "Failed to $sql: $EVAL_ERROR\n" - . "If the --replicate table is InnoDB and the default server " - . "transaction isolation level is not REPEATABLE-READ then " - . "checksumming may fail with errors like \"Binary logging not " - . "possible. Message: Transaction level 'READ-COMMITTED' in " - . "InnoDB is not safe for binlog mode 'STATEMENT'\". In that " - . "case you will need to manually set the transaction isolation " - . "level to REPEATABLE-READ."; - } - } - - # ######################################################################## - # Iterate through databases and tables and do the checksums. - # ######################################################################## - - # Get table info for all hosts, all slaves, unless we're in the special - # "repl-re-check" mode in which case %tables_to_checksum has already the - # inconsistent tables that we need to re-checksum. - get_all_tbls_info( - dbh => $main_dbh, - args_for => \%args_for, + my $schema_iter = new SchemaIterator( + dbh => $dbh, + keep_ddl => 1, %common_modules, - ) unless ($o->get('replicate-check') && $o->get('recheck')); + ); - # Finally, checksum the tables. - foreach my $database ( keys %tables_to_checksum ) { - my $tables = $tables_to_checksum{$database}; - $exit_status |= checksum_tables( - dbh => $main_dbh, - db => $database, - tbls => $tables, - hosts => \@hosts, - slaves => \@slaves, + TABLE: + foreach my $tbl ( $schema_iter->next_schema_object() ) { + use_repl_db( + dbh => $dbh, + tbl => $tbl, + repl_table => $repl_table, %common_modules ); + + # Results, stats, and info related to checksuming this table can + # be saved here. print_checksum_results() uses this info. + $tbl->{checksum_results}->{exit_status} = 0; + + my $checksum_cols = $rc->make_chunk_checksum(dbh => $dbh, tbl => $tbl); + my $nibble_iter = new NibbleIterator( + dbh => $dbh, + tbl => $tbl, + dms => $checksum_dms, + select => $checksum_cols, + callbacks => $callbacks, + %common_modules, + ); + eval { + NIBBLE: + while ( my $checksum = $nibble_iter->next() ) { + my $chunkno = $nibble_iter->nibble_number(); + $fetch_sth->execute(@{$tbl}{qw(db tbl)}, $chunkno); + my ($crc, $cnt) = $fetch_sth->fetchrow_array(); + $update_sth->execute($crc, $cnt, @{$tbl}{qw(db tbl)}, $chunkno); + } + }; + if ($EVAL_ERROR) { + warn "Error checksumming $tbl->{db}.$tbl->{$tbl}: $EVAL_ERROR\n"; + $tbl->{checksum_results}->{exit_status} |= 1; + } + + $exit_status |= $tbl->{checksum_results}->{exit_status}; } + $fetch_sth->finish(); + $update_sth->finish(); + $dbh->disconnect(); + return $exit_status; } # ############################################################################ # Subroutines # ############################################################################ - -sub get_all_tbls_info { +sub get_cxn { my ( %args ) = @_; - foreach my $arg ( qw(o dbh q tp du ch args_for) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $dbh = $args{dbh}; - MKDEBUG && _d('Getting all schema objects'); + my ($dsn, $dp, $o) = @args{qw(dsn DSNParser OptionParser)}; - my $si = new SchemaIterator( - dbh => $dbh, - OptionParser => $args{o}, - Quoter => $args{q}, - ); - while ( my %schema_obj = $si->next_schema_object() ) { - my $final_o = get_final_opts( - %args, - %schema_obj, - ); - save_tbl_to_checksum( - %args, - %schema_obj, - final_o => $final_o, - ); + if ( $o->get('ask-pass') ) { + $dsn->{p} = OptionParser::prompt_noecho("Enter password: "); } - - return; + my $dbh = $dp->get_dbh($dp->get_cxn_params($dsn)); + $dbh->{FetchHashKeyName} = 'NAME_lc'; + return $dbh; } -sub save_tbl_to_checksum { - my ( %args ) = @_; - foreach my $arg ( qw(q ch du final_o tp dbh db tbl du tp ch vp) ) { +sub exec_nibble { + my (%args) = @_; + my @required_args = qw(dbh tbl sth lb ub Retry); + foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my $du = $args{du}; - my $tp = $args{tp}; - my $ch = $args{ch}; - my $final_o = $args{final_o}; - my $dbh = $args{dbh}; - my $db = $args{db}; - my $tbl = $args{tbl}; - my $q = $args{q}; - my $vp = $args{vp}; + my ($dbh, $tbl, $sth, $lb, $ub, $retry) = @args{@required_args}; - # Skip the table in which checksums are stored. - return if ($final_o->get('replicate') - && $final_o->get('replicate') eq "$db.$tbl"); + return $retry->retry( + tries => 2, + wait => sub { return; }, + retry_on_die => 1, + try => sub { + # Reset the BIT_XOR user vars. + my $sql = 'SET @crc := "", @cnt := 0 /*!50108 , ' + . '@@binlog_format := "STATEMENT"*/'; + MKDEBUG && _d($sql); + $dbh->do($sql); - eval { # Catch errors caused by tables being dropped during work. + my $boundaries = @$lb || @$ub ? join(',', @$lb, @$ub) : '1=1'; - # Parse the table and determine a column that's chunkable. This is - # used not only for chunking, but also for --since. - my $create = $du->get_create_table($dbh, $q, $db, $tbl); - my $struct = $tp->parse($create); - - # If there's a --where clause and the user didn't specify a chunk index - # a chunk they want, then get MySQL's chosen index for the where clause - # and make it the preferred index. - # http://code.google.com/p/maatkit/issues/detail?id=378 - if ( $final_o->get('where') - && !$final_o->get('chunk-column') - && !$final_o->get('chunk-index') ) - { - my ($mysql_chosen_index) = $tp->find_possible_keys( - $dbh, $db, $tbl, $q, $final_o->get('where')); - MKDEBUG && _d("Index chosen by MySQL for --where:", - $mysql_chosen_index); - $final_o->set('chunk-index', $mysql_chosen_index) - if $mysql_chosen_index; - } - - - # Get the first chunkable column and index, taking into account - # --chunk-column and --chunk-index. If either of those options - # is specified, get_first_chunkable_column() will try to satisfy - # the request but there's no guarantee either will be selected. - # http://code.google.com/p/maatkit/issues/detail?id=519 - my ($chunk_col, $chunk_index) = $ch->get_first_chunkable_column( - %args, - chunk_column => $final_o->get('chunk-column'), - chunk_index => $final_o->get('chunk-index'), - tbl_struct => $struct, - ); - - my $index_hint; - if ( $final_o->get('use-index') && $chunk_col ) { - my $hint = $vp->version_ge($dbh, '4.0.9') ? 'FORCE' : 'USE'; - $index_hint = "$hint INDEX (" . $q->quote($chunk_index) . ")"; - } - MKDEBUG && _d('Index hint:', $index_hint); - - my @chunks = '1=1'; # Default. - my $rows_per_chunk = undef; - my $maxval = undef; - if ( $final_o->get('chunk-size') ) { - ($rows_per_chunk) = $ch->size_to_rows( - dbh => $dbh, - db => $db, - tbl => $tbl, - chunk_size => $final_o->get('chunk-size'), + # Execute the REPLACE...SELECT checksum query. + MKDEBUG && _d($sth->{Statement}, 'params:', + @{$tbl}{qw(db tbl)}, + $args{nibbleno}, + $boundaries, + @$lb, + @$ub, + ); + $sth->execute( + @{$tbl}{qw(db tbl)}, + $args{nibbleno}, + $boundaries, + @$lb, + @$ub, ); - if ( $chunk_col ) { - # Calculate chunks for this table. - my %params = $ch->get_range_statistics( - dbh => $dbh, - db => $db, - tbl => $tbl, - chunk_col => $chunk_col, - tbl_struct => $struct, - where => $final_o->get('where'), - ); - if ( !grep { !defined $params{$_} } qw(min max rows_in_range) ) { - @chunks = $ch->calculate_chunks( - dbh => $dbh, - db => $db, - tbl => $tbl, - tbl_struct => $struct, - chunk_col => $chunk_col, - chunk_size => $rows_per_chunk, - zero_chunk => $final_o->get('zero-chunk'), - chunk_range => $final_o->get('chunk-range'), - where => $final_o->get('where'), - %params, - ); - $maxval = $params{max}; + # Check if checksum query caused any warnings. + my $sql_warn = 'SHOW WARNINGS'; + MKDEBUG && _d($sql_warn); + my $warnings = $dbh->selectall_arrayref($sql_warn, { Slice => {} } ); + foreach my $warning ( @$warnings ) { + if ( $warning->{message} + =~ m/Data truncated for column 'boundaries'/ ) { + _d('Warning: WHERE clause too large for boundaries column;', + 'pt-table-sync may fail'); } - } - } - - push @{ $tables_to_checksum{$db} }, { - struct => $struct, - create => $create, - database => $db, - table => $tbl, - column => $chunk_col, - chunk_index => $chunk_index, - chunk_size => $rows_per_chunk, - maxval => $maxval, - index => $index_hint, - chunks => \@chunks, - final_o => $final_o, - }; - }; - if ( $EVAL_ERROR ) { - print_err($final_o, $EVAL_ERROR, $db, $tbl); - } - - return; -} - -# Checksum the tables in the given database. -# A separate report for each database and its tables is printed. -sub checksum_tables { - my ( %args ) = @_; - foreach my $arg ( qw(tc du o q db dbh hosts tbls) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $tc = $args{tc}; - my $du = $args{du}; - my $o = $args{o}; - my $db = $args{db}; - my $dbh = $args{dbh}; - my $hosts = $args{hosts}; - my $tbls = $args{tbls}; - my $q = $args{q}; - - my ($hdr, $explain); - my $exit_status = 0; - - # NOTE: remember, you can't 'next TABLE' inside the eval{}. - # NOTE: remember to use the final_o embedded within each $table, not $o - foreach my $table ( @$tbls ) { - MKDEBUG && _d("Doing", $db, '.', $table->{table}); - MKDEBUG && _d("Table:", Dumper($table)); - my $final_o = $table->{final_o}; - - my $is_chunkable_table = 1; # table should be chunkable unless... - - # If there's a chunk size but no chunk index and unchunkable tables - # aren't allowed (they're not by default), then table may still be - # chunkable if it's small, i.e. total rows in table <= chunk size. - if ( $table->{chunk_size} - && !$table->{chunk_index} - && !$final_o->get('unchunkable-tables') ) - { - $is_chunkable_table = is_chunkable_table( - dbh => $dbh, - db => $db, - tbl => $table->{table}, - chunk_size => $table->{chunk_size}, - where => $final_o->{where}, - Quoter => $q, - ); - MKDEBUG && _d("Unchunkable table small enough to chunk:", - $is_chunkable_table ? 'yes' : 'no'); - } - - if ( !$is_chunkable_table ) { - $exit_status |= 1; - print "# cannot chunk $table->{database} $table->{table}\n"; - } - else { - eval { - my $do_table = 1; - - # Determine the checksum strategy for every table because it - # might change given various --arg-table opts for each table. - my $strat_ref; - my ( $strat, $crc_type, $func, $opt_slice ); - if ( $checksum_table_data && $do_table ) { - $strat_ref = determine_checksum_strat( - dbh => $dbh, - tc => $tc, - o => $final_o, - ); - ( $strat, $crc_wid, $crc_type, $func, $opt_slice ) - = @$strat_ref{ qw(strat crc_wid crc_type func opt_slice) }; - MKDEBUG && _d("Checksum strat:", Dumper($strat_ref)); + elsif ( ($warning->{code} || 0) == 1592 ) { + # Error: 1592 SQLSTATE: HY000 (ER_BINLOG_UNSAFE_STATEMENT) + # Message: Statement may not be safe to log in statement format. + # Ignore this warning because we have purposely set + # statement-based replication. + MKDEBUG && _d('Ignoring warning:', $warning->{message}); } else { - # --schema doesn't use a checksum strategy, but do_tbl() - # requires a strat arg. - $strat = '--schema'; - } - $md5sum_fmt = "%-${crc_wid}s %s.%s.%s.%d\n"; - - # Design and print header unless we are resuming in which case - # we should have already re-printed the partial output of the - # resume file in parse_resume_file(). This only has to be done - # once and done here because we need $crc_wid which is determined - # by the checksum strat above. - if ( !$hdr ) { - if ( $o->get('tab') ) { - $hdr = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n"; - $explain = "%s\t%s\t%s\n"; - } - else { - my $max_tbl = max(5, map { length($_->{table}) } @$tbls); - my $max_db = max(8, length($db)); - my $max_host = max(4, map { length($_->{h}) } @$hosts); - $hdr = "%-${max_db}s %-${max_tbl}s %5s " - . "%-${max_host}s %-6s %10s %${crc_wid}s %4s %4s %4s %4s\n"; - $explain = "%-${max_db}s %-${max_tbl}s %s\n"; - } - my @hdr_args = qw(DATABASE TABLE CHUNK HOST ENGINE - COUNT CHECKSUM TIME WAIT STAT LAG); - unless ( $o->get('quiet') - || $o->get('explain') - || $o->get('checksum') - || $o->get('resume') ) - { - printf($hdr, @hdr_args) - or die "Cannot print: $OS_ERROR"; - } - } - - # Clean out the replication table entry for this table. - # http://code.google.com/p/maatkit/issues/detail?id=304 - if ( (my $replicate_table = $final_o->get('replicate')) - && !$final_o->get('explain') ) { - use_repl_db(%args); # USE the proper replicate db - my $max_chunkno = scalar @{$table->{chunks}} - 1; - my $del_sql = "DELETE FROM $replicate_table " - . "WHERE db=? AND tbl=? AND chunk > ?"; - MKDEBUG && _d($dbh, $del_sql, $db, $table->{table},$max_chunkno); - $dbh->do($del_sql, {}, $db, $table->{table}, $max_chunkno); - } - - # If --since is given, figure out either - # 1) for temporal sinces, if the table has an update time and that - # time is newer than --since, then checksum the whole table, - # otherwise skip it; or - # 2) for "numerical" sinces, which column to use: either the - # specified column (--sincecolumn) or the auto-discovered one, - # whichever exists in the table, in that order. - # Then, if --savesince is given, save either 1) the current timestamp - # or 2) the resulting WHERE clause. - if ( $final_o->get('since') ) { - if ( is_temporal($final_o->get('since')) ) { - MKDEBUG && _d('--since is temporal'); - my ( $stat ) - = $du->get_table_status($dbh, $q, $db, $table->{table}); - my $time = $stat->{update_time}; - if ( $time && $time lt $final_o->get('since') ) { - MKDEBUG && _d("Skipping table because --since value", - $final_o->get('since'), "is newer than", $time); - $do_table = 0; - $table->{chunks} = []; - } - } - else { - MKDEBUG && _d('--since is numerical'); - # For numerical sinces, choose the column to apply --since to. - # It may not be the column the user said to use! If the user - # didn't specify a column that's good to chunk on, we'll use - # something else instead. - - # $table->{column} is the first chunkable column returned from - # the call to get_first_chunkable_column() in - # save_tbl_to_checksum(). - my ( $sincecol ) = - grep { $_ && $table->{struct}->{is_col}->{$_} } - ( $table->{column}, $final_o->get('since-column') ); - - if ( $sincecol ) { - MKDEBUG && _d('Column for numerical --since:', - $db, '.', $table->{table}, '.', $sincecol); - # This ends up being an additional WHERE clause. - $table->{since} = $q->quote($sincecol) - . '>=' . $q->quote_val($final_o->get('since')); - } - else { - MKDEBUG && _d('No column for numerical --since for', - $db, '.', $table->{table}); - } - } - } - - # ################################################################## - # The query is independent of the chunk, so I make it once for every - # one. - # ################################################################## - my $query; - if ( $checksum_table_data && $do_table ) { - $query = $tc->make_checksum_query( - db => $db, - tbl => $table->{table}, - tbl_struct => $table->{struct}, - algorithm => $strat, - function => $func, - crc_wid => $crc_wid, - crc_type => $crc_type, - opt_slice => $opt_slice, - cols => $final_o->get('columns'), - sep => $final_o->get('separator'), - replicate => $final_o->get('replicate'), - float_precision => $final_o->get('float-precision'), - trim => $final_o->get('trim'), - ignorecols => $final_o->get('ignore-columns'), - ); - } - else { # --schema - $query = undef; - } - - $exit_status |= checksum_chunks( - %args, - tbl => $table, - query => $query, - hdr => $hdr, - explain => $explain, - final_o => $final_o, - strat => $strat, - ); - - # Save the --since value if - # 1) it's temporal and the tbl had changed since --since; or - # 2) it's "numerical" and it had a chunkable or nibble-able - # column and it wasn't empty - # See issues 121 and 122. - if ( $final_o->get('save-since') && $savesince_sth ) { - if ( is_temporal($final_o->get('since')) ) { - MKDEBUG && _d( - "Saving temporal --since value: current timestamp for", - $db, '.', $table->{table}); - $savesince_sth->execute(undef, - $db, $table->{table}); - } - elsif ( defined $table->{maxval} ) { - MKDEBUG && _d("Saving numerical --since value:", - $table->{maxval}, "for", $db, '.', $table->{table}); - $savesince_sth->execute($table->{maxval}, - $db, $table->{table}); - } - else { - MKDEBUG && _d("Cannot save --since value:", - $table->{maxval}, "for", $db, '.', $table->{table}); - } - } - }; - if ( $EVAL_ERROR ) { - print_err($o, $EVAL_ERROR, $db, $table->{table}); - } - } # chunkable table - } - - return $exit_status; -} - -sub checksum_chunks { - my ( %args ) = @_; - foreach my $arg ( qw(dp final_o ms o q db tbl hosts hdr explain) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $dp = $args{dp}; - my $du = $args{du}; - my $final_o = $args{final_o}; - my $ms = $args{ms}; - my $o = $args{o}; - my $q = $args{q}; - my $db = $args{db}; - my $dbh = $args{dbh}; - my @hosts = @{$args{hosts}}; - my $tbl = $args{tbl}; - - my $retry = new Retry(); - - # ################################################################## - # This loop may seem suboptimal, because it causes a new child to be - # forked for each table, for each host, for each chunk. It also - # causes the program to parallelize only within the chunk; that is, - # no two child processes are running on different chunks at a time. - # This is by design. It lets me unlock the table on the master - # between chunks. - # ################################################################## - my $exit_status = 0; - my $num_chunks = scalar(@{$tbl->{chunks}}); - my $throttle_method = $o->get('throttle-method'); - MKDEBUG && _d('Checksumming', $num_chunks, 'chunks'); - CHUNK: - foreach my $chunk_num ( 0 .. $num_chunks - 1 ) { - - if ( $final_o->get('chunk-size-limit') - && $final_o->get('chunk-size') - && $tbl->{chunk_size} - && !$final_o->get('explain') ) - { - my $is_oversize_chunk = is_oversize_chunk( - %args, - db => $tbl->{database}, - tbl => $tbl->{table}, - chunk => $tbl->{chunks}->[$chunk_num], - chunk_size => $tbl->{chunk_size}, - index_hint => $tbl->{index}, - where => [$final_o->get('where'), $tbl->{since}], - limit => $final_o->get('chunk-size-limit'), - Quoter => $q, - ); - if ( $is_oversize_chunk ) { - $exit_status |= 1; - if ( !$final_o->get('quiet') ) { - if ( $final_o->get('checksum') ) { - printf($md5sum_fmt, 'NULL', '', - @{$tbl}{qw(database table)}, $chunk_num) - or die "Cannot print: $OS_ERROR"; - } - else { - printf($args{hdr}, - @{$tbl}{qw(database table)}, $chunk_num, - $hosts[0]->{h}, $tbl->{struct}->{engine}, 'OVERSIZE', - 'NULL', 'NULL', 'NULL', 'NULL', 'NULL') - or die "Cannot print: $OS_ERROR"; - } - } - next CHUNK; - } - } - - if ( $throttle_method eq 'slavelag' ) { - my $pr; - if ( $o->get('progress') ) { - $pr = new Progress( - jobsize => scalar @{$args{slaves}}, - spec => $o->get('progress'), - name => "Wait for slave(s) to catch up", - ); - } - wait_for_slaves( - slaves => $args{slaves}, - max_lag => $o->get('max-lag'), - check_interval => $o->get('check-interval'), - DSNParser => $dp, - MasterSlave => $ms, - progress => $pr, - ); - } - - if ( ($num_chunks > 1 || $final_o->get('single-chunk')) - && $checksum_table_data - && defined $final_o->get('probability') - && rand(100) >= $final_o->get('probability') ) { - MKDEBUG && _d('Skipping chunk because of --probability'); - next CHUNK; - } - - if ( $num_chunks > 1 - && $checksum_table_data - && $final_o->get('modulo') - && ($chunk_num % $final_o->get('modulo') != $final_o->get('offset'))) - { - MKDEBUG && _d('Skipping chunk', $chunk_num, 'because of --modulo'); - next CHUNK; - } - - my $chunk_start_time = gettimeofday(); - MKDEBUG && _d('Starting chunk', $chunk_num, 'at', $chunk_start_time); - - if ( $final_o->get('replicate') ) { - # We're in --replicate mode. - - # If resuming, check if this db.tbl.chunk.host can be skipped. - if ( $o->get('resume-replicate') ) { - if ( already_checksummed($tbl->{database}, - $tbl->{table}, - $chunk_num, - $hosts[0]->{h}) ) { - print "# already checksummed:" - . " $tbl->{database}" - . " $tbl->{table}" - . " $chunk_num " - . $hosts[0]->{h} - . "\n" - unless $o->get('quiet'); - next CHUNK; + # die doesn't permit extra line breaks so warn then die. + warn "\nChecksum query caused a warning:\n" + . join("\n", + map { "\t$_: " . $warning->{$_} || '' } + qw(level code message) + ) + . "\n\tquery: " . $sth->{Statement} . "\n\n"; + die; } } - $hosts[0]->{dbh} ||= $dbh; - - do_tbl_replicate( - $chunk_num, - %args, - host => $hosts[0], - retry => $retry, - ); - } - else { - # We're in "normal" mode. Lock table and get position on the master. - - if ( !$final_o->get('explain') ) { - if ( $final_o->get('lock') ) { - my $sql = "LOCK TABLES " - . $q->quote($db, $tbl->{table}) . " READ"; - MKDEBUG && _d($sql); - $dbh->do($sql); - } - if ( $final_o->get('wait') ) { - $tbl->{master_status} = $ms->get_master_status($dbh); - } - } - - my %children; - HOST: - foreach my $i ( 0 .. $#hosts ) { - my $is_master = $i == 0; # First host is assumed to be master. - my $host = $hosts[$i]; - - # Open a single connection for each host. Re-use the - # connection for the master/single host. - if ( $is_master ) { - $dbh->{InactiveDestroy} = 1; # Ensure that this is set. - $host->{dbh} ||= $dbh; - } - else { - $host->{dbh} ||= get_cxn($host, %args); - } - - # If resuming, check if this db.tbl.chunk.host can be skipped. - if ( $final_o->get('resume') ) { - next HOST if already_checksummed($tbl->{database}, - $tbl->{table}, - $chunk_num, - $host->{h}); - } - - # Fork, but only if there's more than one host. - my $pid = @hosts > 1 ? fork() : undef; - - if ( @hosts == 1 || (defined($pid) && $pid == 0) ) { - # Do the work (I'm a child, or there's only one host) - - eval { - do_tbl( - $chunk_num, - $is_master, - %args, - dbh => $host->{dbh}, - host => $host, - ); - }; - if ( $EVAL_ERROR ) { - print_err($o, $EVAL_ERROR, $db, $tbl->{table}, - $dp->as_string($host)); - exit(1) if @hosts > 1; # exit only if I'm a child - } - - exit(0) if @hosts > 1; # exit only if I'm a child - } - elsif ( @hosts > 1 && !defined($pid) ) { - die("Unable to fork!"); - } - - # I already exited if I'm a child, so I'm the parent. - $children{$host->{h}} = $pid if @hosts > 1; - } - - # Wait for the children to exit. - foreach my $host ( keys %children ) { - my $pid = waitpid($children{$host}, 0); - MKDEBUG && _d("Child", $pid, "exited with", $CHILD_ERROR); - $exit_status ||= $CHILD_ERROR >> 8; - } - if ( ($final_o->get('lock') && !$final_o->get('explain')) ) { - my $sql = "UNLOCK TABLES"; - MKDEBUG && _d($dbh, $sql); - $dbh->do($sql); - } - } - - my $chunk_stop_time = gettimeofday(); - MKDEBUG && _d('Finished chunk at', $chunk_stop_time); - - # --sleep between chunks. Don't sleep if this is the last/only chunk. - if ( $chunk_num < $num_chunks - 1 ) { - if ( $final_o->get('sleep') && !$final_o->get('explain') ) { - MKDEBUG && _d('Sleeping', $final_o->get('sleep')); - sleep($final_o->get('sleep')); - } - elsif ( $final_o->get('sleep-coef') && !$final_o->get('explain') ) { - my $sleep_time - = ($chunk_stop_time - $chunk_start_time) - * $final_o->get('sleep-coef'); - MKDEBUG && _d('Sleeping', $sleep_time); - if ( $sleep_time < 0 ) { - warn "Calculated invalid sleep time: " - . "$sleep_time = ($chunk_stop_time - $chunk_start_time) * " - . $final_o->get('sleep-coef') - . ". Sleep time set to 1 second instead."; - $sleep_time = 1; - } - sleep($sleep_time); - } - } - } # End foreach CHUNK - - return $exit_status; -} - -# Override the command-line arguments with those from --arg-table -# if necessary. Returns a cloned OptionParser object ($final_o). -# This clone is only a partial OptionParser object. -sub get_final_opts { - my ( %args ) = @_; - foreach my $arg ( qw(o dbh db tbl args_for) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $o = $args{o}; - my $dbh = $args{dbh}; - my $db = $args{db}; - my $tbl = $args{tbl}; - my $args_for = $args{args_for}; - - my $final_o = $o->clone(); - if ( my $override = $args_for->{$db}->{$tbl} ) { - map { $final_o->set($_, $override->{$_}); } keys %$override; - } - - # --since and --offset are potentially expressions that should be - # evaluated by the DB server. This has to be done after the override - # from the --arg-table table. - foreach my $opt ( qw(since offset) ) { - # Don't get MySQL to evaluate if it's temporal, as 2008-08-01 --> 1999 - my $val = $final_o->get($opt); - if ( $val && !is_temporal($val) ) { - $final_o->set($opt, eval_expr($opt, $val, $dbh)); - } - } - - return $final_o; -} - -sub is_temporal { - my ( $val ) = @_; - return $val && $val =~ m/^\d{4}-\d{2}-\d{2}(?:.[0-9:]+)?/; -} - -sub print_inconsistent_tbls { - my ( %args ) = @_; - foreach my $arg ( qw(o dp dsn tbls) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $o = $args{o}; - my $dp = $args{dp}; - my $dsn = $args{dsn}; - my $tbls = $args{tbls}; - - return if $o->get('quiet'); - - my @headers = qw(db tbl chunk cnt_diff crc_diff boundaries); - print "Differences on " . $dp->as_string($dsn, [qw(h P F)]) . "\n"; - my $max_db = max(5, map { length($_->{db}) } @$tbls); - my $max_tbl = max(5, map { length($_->{tbl}) } @$tbls); - my $fmt = "%-${max_db}s %-${max_tbl}s %5s %8s %8s %s\n"; - printf($fmt, map { uc } @headers) or die "Cannot print: $OS_ERROR"; - foreach my $tbl ( @$tbls ) { - printf($fmt, @{$tbl}{@headers}) or die "Cannot print: $OS_ERROR"; - } - print "\n" or die "Cannot print: $OS_ERROR"; - - return; -} - -sub save_inconsistent_tbls { - my ( %args ) = @_; - foreach my $arg ( qw(dbh tbls) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $dbh = $args{dbh}; - my $tbls = $args{tbls}; - - foreach my $tbl ( @$tbls ) { - MKDEBUG && _d("Will recheck", $tbl->{db}, '.', $tbl->{tbl}, - "(chunk:", $tbl->{boundaries}, ')'); - my $final_o = get_final_opts( - %args, - db => $tbl->{db}, - tbl => $tbl->{tbl}, - ); - my $chunks = [ $tbl->{boundaries} ]; - save_tbl_to_checksum( - %args, - db => $tbl->{db}, - tbl => $tbl->{tbl}, - final_o => $final_o, - ); - } - return; -} - -# The value may be an expression like 'NOW() - INTERVAL 7 DAY' -# and we should evaluate it. -sub eval_expr { - my ( $name, $val, $dbh ) = @_; - my $result = $val; - eval { - ($result) = $dbh->selectrow_array("SELECT $val"); - MKDEBUG && _d("option", $name, "evaluates to:", $result); - }; - if ( $EVAL_ERROR && MKDEBUG ) { - chomp $EVAL_ERROR; - _d("Error evaluating option", $name, $EVAL_ERROR); - } - return $result; -} - -sub determine_checksum_strat { - my ( %args ) = @_; - foreach my $arg ( qw(o dbh tc) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $o = $args{o}; - my $dbh = $args{dbh}; - my $tc = $args{tc}; - - my $ret = { # return vals in easy-to-swallow hash form - strat => undef, - crc_type => 'varchar', - crc_wid => 16, - func => undef, - opt_slice => undef, - }; - - $ret->{strat} = $tc->best_algorithm( - algorithm => $o->get('algorithm'), - dbh => $dbh, - where => $o->get('where') || $o->get('since'), - chunk => $o->get('chunk-size'), - replicate => $o->get('replicate'), - count => $o->get('count'), + return 0; + }, + on_failure => sub { + $tbl->{checksum_results}->{checksum_errors}++; + $tbl->{checksum_results}->{exit_status} |= 1; + warn $EVAL_ERROR; + }, ); - - if ( $o->get('algorithm') && $o->get('algorithm') ne $ret->{strat} ) { - warn "--algorithm=".$o->get('algorithm')." can't be used; " - . "falling back to $ret->{strat}\n"; - } - - # If using a cryptographic hash strategy, decide what hash function to use, - # and if using BIT_XOR whether and which slice to place the user variable in. - if ( $tc->is_hash_algorithm( $ret->{strat} ) ) { - $ret->{func} = $tc->choose_hash_func( - function => $o->get('function'), - dbh => $dbh, - ); - if ( $o->get('function') && $o->get('function') ne $ret->{func} ) { - warn "Checksum function ".$o->get('function')." cannot be used; " - . "using $ret->{func}\n"; - } - $ret->{crc_wid} = $tc->get_crc_wid($dbh, $ret->{func}); - ($ret->{crc_type}) = $tc->get_crc_type($dbh, $ret->{func}); - - if ( $o->get('optimize-xor') && $ret->{strat} eq 'BIT_XOR' ) { - if ( $ret->{crc_type} !~ m/int$/ ) { - $ret->{opt_slice} - = $tc->optimize_xor(dbh => $dbh, function => $ret->{func}); - if ( !defined $ret->{opt_slice} ) { - warn "Cannot use --optimize-xor, disabling"; - $o->set('optimize-xor', 0); - } - } - else { - # FNV_64 doesn't need the optimize_xor gizmo. - $o->get('optimize-xor', 0); - } - } - } - - return $ret; } -sub verify_checksum_compat { - my ( %args ) = @_; - foreach my $arg ( qw(o hosts) ) { +sub after_nibble { + my (%args) = @_; + MKDEBUG && _d('After nibble'); +# my $o = $args{OptionParser}; +# +# if ( $throttle_method eq 'slavelag' ) { +# my $pr; +# if ( $o->get('progress') ) { +# $pr = new Progress( +# jobsize => scalar @{$args{slaves}}, +# spec => $o->get('progress'), +# name => "Wait for slave(s) to catch up", +# ); +# } +# wait_for_slaves( +# slaves => $args{slaves}, +# max_lag => $o->get('max-lag'), +# check_interval => $o->get('check-interval'), +# DSNParser => $dp, +# MasterSlave => $ms, +# progress => $pr, +# ); +# } + + return; +}; + +sub print_checksum_results { + my (%args) = @_; + my @required_args = qw(tbl); + foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my $o = $args{o}; - my $hosts = $args{hosts}; + my ($tbl) = @args{@required_args}; + + print "$tbl->{db}.$tbl->{tbl} $tbl->{checksum_results}->{exit_status}\n"; - my @verify_sums; - foreach my $host ( @$hosts ) { - my $dbh = get_cxn($host, %args); - my $sql = "SELECT MD5(CONCAT_WS(',', '1', ''))"; - MKDEBUG && _d($dbh, $sql); - my $cks = $dbh->selectall_arrayref($sql)->[0]->[0]; - push @verify_sums, { - host => $host->{h}, - ver => $dbh->{mysql_serverinfo}, - sum => $cks, - }; - } - if ( unique(map { $_->{sum} } @verify_sums ) > 1 ) { - my $max = max(map { length($_->{h}) } @$hosts); - die "Not all servers have compatible versions. Some return different\n" - . "checksum values for the same query, and cannot be compared. This\n" - . "behavior changed in MySQL 4.0.14. Here is info on each host:\n\n" - . join("\n", - map { sprintf("%-${max}s %-32s %s", @{$_}{qw(host sum ver)}) } - { host => 'HOST', sum => 'CHECKSUM', ver => 'VERSION'}, - @verify_sums - ) - . "\n\nYou can disable this check with --no-verify.\n"; - } return; } @@ -6298,20 +5125,15 @@ sub verify_checksum_compat { # Returns fetch and update statement handles. sub check_repl_table { my ( %args ) = @_; - foreach my $arg ( qw(o dbh tp q) ) { + my @required_args = qw(dbh repl_table OptionParser TableParser Quoter); + foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my $o = $args{o}; - my $dbh = $args{dbh}; - my $tp = $args{tp}; - my $q = $args{q}; + my ($dbh, $repl_table, $o, $tp, $q) = @args{@required_args}; + MKDEBUG && _d('Checking --replicate table', $repl_table); + use_repl_db(%args); - my $replicate_table = $o->get('replicate'); - return unless $replicate_table; - - use_repl_db(%args); # USE the proper replicate db - - my ($db, $tbl) = $q->split_unquote($replicate_table); + my ($db, $tbl) = $q->split_unquote($repl_table); my $tbl_exists = $tp->check_table( dbh => $dbh, db => $db, @@ -6320,17 +5142,15 @@ sub check_repl_table { if ( !$tbl_exists ) { if ( $o->get('create-replicate-table') ) { create_repl_table(%args) - or die "--create-replicate-table failed to create " - . $replicate_table; } else { - die "--replicate table $replicate_table does not exist; " + die "--replicate table $repl_table does not exist; " . "read the documentation or use --create-replicate-table " - . "to create it."; + . "to create it.\n"; } } else { - MKDEBUG && _d('--replicate table', $replicate_table, 'already exists'); + MKDEBUG && _d('--replicate table', $repl_table, 'already exists'); # Check it again but this time check the privs. my $have_tbl_privs = $tp->check_table( dbh => $dbh, @@ -6338,25 +5158,18 @@ sub check_repl_table { tbl => $tbl, all_privs => 1, ); - die "User does not have all necessary privileges on $replicate_table" + die "User does not have all necessary privileges on $repl_table" unless $have_tbl_privs; } # Clean out the replicate table globally. if ( $o->get('empty-replicate-table') ) { - my $del_sql = "DELETE FROM $replicate_table"; + my $del_sql = "DELETE FROM $repl_table"; MKDEBUG && _d($dbh, $del_sql); $dbh->do($del_sql); } - my $fetch_sth = $dbh->prepare( - "SELECT this_crc, this_cnt FROM $replicate_table " - . "WHERE db = ? AND tbl = ? AND chunk = ?"); - my $update_sth = $dbh->prepare( - "UPDATE $replicate_table SET master_crc = ?, master_cnt = ? " - . "WHERE db = ? AND tbl = ? AND chunk = ?"); - - return ($fetch_sth, $update_sth); + return; } # This sub should be called before any work is done with the @@ -6369,23 +5182,18 @@ sub check_repl_table { # See http://code.google.com/p/maatkit/issues/detail?id=982 sub use_repl_db { my ( %args ) = @_; - my @required_args = qw(dbh o q); + my @required_args = qw(dbh repl_table OptionParser Quoter); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my ($dbh, $o, $q) = @args{@required_args}; + my ($dbh, $repl_table, $o, $q) = @args{@required_args}; - my $replicate_table = $o->get('replicate'); - return unless $replicate_table; - - # db and tbl from --replicate - my ($db, $tbl) = $q->split_unquote($replicate_table); - + my ($db, $tbl) = $q->split_unquote($repl_table); if ( my $tbl = $args{tbl} ) { # Caller is checksumming this table, USE its db unless # --replicate-database is in effect. $db = $o->get('replicate-database') ? $o->get('replicate-database') - : $tbl->{database}; + : $tbl->{db}; } else { # Caller is doing something just to the --replicate table. @@ -6396,7 +5204,7 @@ sub use_repl_db { eval { my $sql = "USE " . $q->quote($db); - MKDEBUG && _d($dbh, $sql); + MKDEBUG && _d($sql); $dbh->do($sql); }; if ( $EVAL_ERROR ) { @@ -6414,487 +5222,28 @@ sub use_repl_db { return; } -# Returns 1 on successful creation of the replicate table, -# or 0 on failure. sub create_repl_table { my ( %args ) = @_; - foreach my $arg ( qw(o dbh) ) { + my @required_args = qw(dbh repl_table OptionParser); + foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } - my $o = $args{o}; - my $dbh = $args{dbh}; - - my $replicate_table = $o->get('replicate'); - - my $sql = $o->read_para_after( - __FILE__, qr/MAGIC_create_replicate/); - $sql =~ s/CREATE TABLE checksum/CREATE TABLE $replicate_table/; + my ($dbh, $repl_table, $o) = @args{@required_args}; + MKDEBUG && _d('Creating --replicate table', $repl_table); + my $sql = $o->read_para_after(__FILE__, qr/MAGIC_create_replicate/); + $sql =~ s/CREATE TABLE checksum/CREATE TABLE $repl_table/; $sql =~ s/;$//; MKDEBUG && _d($dbh, $sql); eval { $dbh->do($sql); }; if ( $EVAL_ERROR ) { - MKDEBUG && _d('--create-replicate-table failed:', $EVAL_ERROR); - return 0; - } - - return 1; -} - -sub read_repl_table { - my ( %args ) = @_; - foreach my $arg ( qw(o dbh host) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $o = $args{o}; - my $dbh = $args{dbh}; - my $host = $args{host}; - - my $replicate_table = $o->get('replicate'); - die "Cannot read replicate table because --replicate was not specified" - unless $replicate_table; - - # Read checksums from replicate table. - my $already_checksummed; - my $checksums - = $dbh->selectall_arrayref("SELECT db, tbl, chunk FROM $replicate_table"); - - # Save each finished checksum. - foreach my $checksum ( @$checksums ) { - my ( $db, $tbl, $chunk ) = @$checksum[0..2]; - $already_checksummed->{$db}->{$tbl}->{$chunk}->{$host} = 1; - } - - return $already_checksummed; -} - -sub parse_resume_file { - my ( $resume_file ) = @_; - - open my $resume_fh, '<', $resume_file - or die "Cannot open resume file $resume_file: $OS_ERROR"; - - # The resume file, being the output from a previous run, should - # have the columns DATABASE TABLE CHUNK HOST ... (in that order). - # We only need those first 4 columns. We re-print every line of - # the resume file so the end result will be the whole, finished - # output: what the previous run got done plus what we are about - # to resume and finish. - my $already_checksummed; - while ( my $line = <$resume_fh> ) { - # Re-print every line. - print $line; - - # If the line is a checksum line, parse from it the db, tbl, - # checksum and host. - if ( $line =~ m/^\S+\s+\S+\s+\d+\s+/ ) { - my ( $db, $tbl, $chunk, $host ) = $line =~ m/(\S+)/g; - $already_checksummed->{$db}->{$tbl}->{$chunk}->{$host} = 1; - } - } - - close $resume_fh; - MKDEBUG && _d("Already checksummed:", Dumper($already_checksummed)); - - return $already_checksummed; -} - -sub already_checksummed { - my ( $d, $t, $c, $h ) = @_; # db, tbl, chunk num, host - if ( exists $already_checksummed->{$d}->{$t}->{$c}->{$h} ) { - MKDEBUG && _d("Skipping chunk because of --resume:", $d, $t, $c, $h); - return 1; - } - return 0; -} - -sub do_tbl_replicate { - my ( $chunk_num, %args ) = @_; - foreach my $arg ( qw(q host query tbl hdr explain final_o ch retry) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $ch = $args{ch}; - my $final_o = $args{final_o}; - my $q = $args{q}; - my $host = $args{host}; - my $hdr = $args{hdr}; - my $explain = $args{explain}; - my $tbl = $args{tbl}; - my $retry = $args{retry}; - - MKDEBUG && _d('Replicating chunk', $chunk_num, - 'of table', $tbl->{database}, '.', $tbl->{table}, - 'on', $host->{h}, ':', $host->{P}); - - my $dbh = $host->{dbh}; - my $sql; - - use_repl_db(%args); # USE the proper replicate db - - my $cnt = 'NULL'; - my $crc = 'NULL'; - my $beg = time(); - $sql = $ch->inject_chunks( - query => $args{query}, - database => $tbl->{database}, - table => $tbl->{table}, - chunks => $tbl->{chunks}, - chunk_num => $chunk_num, - where => [$final_o->get('where'), $tbl->{since}], - index_hint => $tbl->{index}, - ); - - if ( MKDEBUG && $chunk_num == 0 ) { - _d("SQL for inject chunk 0:", $sql); - } - - my $where = $tbl->{chunks}->[$chunk_num]; - if ( $final_o->get('explain') ) { - if ( $chunk_num == 0 ) { - printf($explain, @{$tbl}{qw(database table)}, $sql) - or die "Cannot print: $OS_ERROR"; - } - printf($explain, @{$tbl}{qw(database table)}, $where) - or die "Cannot print: $OS_ERROR"; - return; - } - - # Actually run the checksum query - $retry->retry( - tries => 2, - wait => sub { return; }, - retry_on_die => 1, - try => sub { - $dbh->do('SET @crc := "", @cnt := 0 /*!50108 , ' - . '@@binlog_format := "STATEMENT"*/'); - $dbh->do($sql, {}, @{$tbl}{qw(database table)}, $where); - return 1; - }, - on_failure => sub { - die $EVAL_ERROR; # caught in checksum_tables() - }, - ); - - # Catch any warnings thrown.... - my $sql_warn = 'SHOW WARNINGS'; - MKDEBUG && _d($sql_warn); - my $warnings = $dbh->selectall_arrayref($sql_warn, { Slice => {} } ); - foreach my $warning ( @$warnings ) { - if ( $warning->{message} =~ m/Data truncated for column 'boundaries'/ ) { - _d("Warning: WHERE clause too large for boundaries column; ", - "pt-table-sync may fail; value:", $where); - } - elsif ( ($warning->{code} || 0) == 1592 ) { - # Error: 1592 SQLSTATE: HY000 (ER_BINLOG_UNSAFE_STATEMENT) - # Message: Statement may not be safe to log in statement format. - # Ignore this warning because we have purposely set statement-based - # replication. - MKDEBUG && _d('Ignoring warning:', $warning->{message}); - } - else { - # die doesn't permit extra line breaks so warn then die. - warn "\nChecksum query caused a warning:\n" - . join("\n", - map { "\t$_: " . $warning->{$_} || '' } qw(level code message) - ) - . "\n\tquery: $sql\n\n"; - die; - } - } - - # Update the master_crc etc columns - $fetch_sth->execute(@{$tbl}{qw(database table)}, $chunk_num); - ( $crc, $cnt ) = $fetch_sth->fetchrow_array(); - $update_sth->execute($crc, $cnt, @{$tbl}{qw(database table)}, $chunk_num); - - my $end = time(); - $crc ||= 'NULL'; - if ( !$final_o->get('quiet') && !$final_o->get('explain') ) { - if ( $final_o->get('checksum') ) { - printf($md5sum_fmt, $crc, $host->{h}, - @{$tbl}{qw(database table)}, $chunk_num) - or die "Cannot print: $OS_ERROR"; - } - else { - printf($hdr, - @{$tbl}{qw(database table)}, $chunk_num, - $host->{h}, $tbl->{struct}->{engine}, $cnt, $crc, - $end - $beg, 'NULL', 'NULL', 'NULL') - or die "Cannot print: $OS_ERROR"; - } + die "--create-replicate-table failed: $EVAL_ERROR"; } return; } -sub do_tbl { - my ( $chunk_num, $is_master, %args ) = @_; - foreach my $arg ( qw(du final_o ms q tc dbh host tbl hdr explain strat) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $du = $args{du}; - my $final_o = $args{final_o}; - my $ms = $args{ms}; - my $tc = $args{tc}; - my $tp = $args{tp}; - my $q = $args{q}; - my $host = $args{host}; - my $tbl = $args{tbl}; - my $explain = $args{explain}; - my $hdr = $args{hdr}; - my $strat = $args{strat}; - - MKDEBUG && _d('Checksumming chunk', $chunk_num, - 'of table', $tbl->{database}, '.', $tbl->{table}, - 'on', $host->{h}, ':', $host->{P}, - 'using algorithm', $strat); - - my $dbh = $host->{dbh}; - $dbh->do("USE " . $q->quote($tbl->{database})); - - my $cnt = 'NULL'; - my $crc = 'NULL'; - my $sta = 'NULL'; - my $lag = 'NULL'; - - # Begin timing the checksum operation. - my $beg = time(); - - # I'm a slave. Wait to catch up to the master. Calculate slave lag. - if ( !$is_master && !$final_o->get('explain') ) { - if ( $final_o->get('wait') ) { - MKDEBUG && _d('Waiting to catch up to master for --wait'); - my $result = $ms->wait_for_master( - master_status => $tbl->{master_status}, - slave_dbh => $dbh, - timeout => $final_o->get('wait'), - ); - $sta = $result && defined $result->{result} - ? $result->{result} - : 'NULL'; - } - - if ( $final_o->get('slave-lag') ) { - MKDEBUG && _d('Getting slave lag for --slave-lag'); - my $res = $ms->get_slave_status($dbh); - $lag = $res && defined $res->{seconds_behind_master} - ? $res->{seconds_behind_master} - : 'NULL'; - } - } - - # Time the checksum operation and the wait-for-master operation separately. - my $mid = time(); - - # Check that table exists on slave. - my $have_table = 1; - if ( !$is_master || !$checksum_table_data ) { - $have_table = $tp->check_table( - dbh => $dbh, - db => $tbl->{database}, - tbl => $tbl->{table}, - ); - warn "$tbl->{database}.$tbl->{table} does not exist on slave" - . ($host->{h} ? " $host->{h}" : '') - . ($host->{P} ? ":$host->{P}" : '') - unless $have_table; - } - - if ( $have_table ) { - # Do the checksum operation. - if ( $checksum_table_data ) { - if ( $strat eq 'CHECKSUM' ) { - if ( $final_o->get('crc') ) { - $crc = do_checksum(%args); - } - if ( $final_o->get('count') ) { - $cnt = do_count($chunk_num, %args); - } - } - elsif ( $final_o->get('crc') ) { - ( $cnt, $crc ) = do_var_crc($chunk_num, %args); - $crc ||= 'NULL'; - } - else { - $cnt = do_count($chunk_num, %args); - } - } - else { # Checksum SHOW CREATE TABLE for --schema. - my $create - = $du->get_create_table($dbh, $q, $tbl->{database}, $tbl->{table}); - $create = $create->[1]; - $create = $tp->remove_auto_increment($create); - $crc = $tc->crc32($create); - } - } - - my $end = time(); - - if ( !$final_o->get('quiet') && !$final_o->get('explain') ) { - if ( $final_o->get('checksum') ) { - printf($md5sum_fmt, $crc, $host->{h}, - @{$tbl}{qw(database table)}, $chunk_num) - or die "Cannot print: $OS_ERROR"; - } - else { - printf($hdr, - @{$tbl}{qw(database table)}, $chunk_num, - $host->{h}, $tbl->{struct}->{engine}, $cnt, $crc, - $end - $mid, $mid - $beg, $sta, $lag) - or die "Cannot print: $OS_ERROR"; - } - } - - return; -} - -sub get_cxn { - my ( $dsn, %args ) = @_; - foreach my $arg ( qw(o dp) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $dp = $args{dp}; - my $o = $args{o}; - - if ( $o->get('ask-pass') && !defined $dsn->{p} ) { - $dsn->{p} = OptionParser::prompt_noecho("Enter password for $dsn->{h}: "); - } - - my $ac = $o->get('lock') ? 0 : 1; - my $dbh = $dp->get_dbh( - $dp->get_cxn_params($dsn), { AutoCommit => $ac }); - $dp->fill_in_dsn($dbh, $dsn); - $dbh->{InactiveDestroy} = 1; # Prevent destroying on fork. - $dbh->{FetchHashKeyName} = 'NAME_lc'; - return $dbh; -} - -sub do_var_crc { - my ( $chunk_num, %args ) = @_; - foreach my $arg ( qw(ch dbh query tbl explain final_o) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $final_o = $args{final_o}; - my $ch = $args{ch}; - my $tbl = $args{tbl}; - my $explain = $args{explain}; - my $dbh = $args{dbh}; - - MKDEBUG && _d("do_var_crc for", $tbl->{table}); - - my $sql = $ch->inject_chunks( - query => $args{query}, - database => $tbl->{database}, - table => $tbl->{table}, - chunks => $tbl->{chunks}, - chunk_num => $chunk_num, - where => [$final_o->get('where'), $tbl->{since}], - index_hint => $tbl->{index}, - ); - - if ( MKDEBUG && $chunk_num == 0 ) { - _d("SQL for chunk 0:", $sql); - } - - if ( $final_o->get('explain') ) { - if ( $chunk_num == 0 ) { - printf($explain, @{$tbl}{qw(database table)}, $sql) - or die "Cannot print: $OS_ERROR"; - } - printf($explain, @{$tbl}{qw(database table)},$tbl->{chunks}->[$chunk_num]) - or die "Cannot print: $OS_ERROR"; - return; - } - - $dbh->do('set @crc := "", @cnt := 0'); - my $res = $dbh->selectall_arrayref($sql, { Slice => {} })->[0]; - return ($res->{cnt}, $res->{crc}); -} - -sub do_checksum { - my ( %args ) = @_; - foreach my $arg ( qw(dbh query tbl explain final_o) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $dbh = $args{dbh}; - my $final_o = $args{final_o}; - my $tbl = $args{tbl}; - my $query = $args{query}; - my $explain = $args{explain}; - - MKDEBUG && _d("do_checksum for", $tbl->{table}); - - if ( $final_o->get('explain') ) { - printf($explain, @{$tbl}{qw(database table)}, $query) - or die "Cannot print: $OS_ERROR"; - } - else { - my $res = $dbh->selectrow_hashref($query); - if ( $res ) { - my ($key) = grep { m/checksum/i } keys %$res; - return defined $res->{$key} ? $res->{$key} : 'NULL'; - } - } - - return; -} - -sub do_count { - my ( $chunk_num, %args ) = @_; - foreach my $arg ( qw(q dbh tbl explain final_o) ) { - die "I need a $arg argument" unless $args{$arg}; - } - my $final_o = $args{final_o}; - my $tbl = $args{tbl}; - my $explain = $args{explain}; - my $dbh = $args{dbh}; - my $q = $args{q}; - - MKDEBUG && _d("do_count for", $tbl->{table}); - - my $sql = "SELECT COUNT(*) FROM " - . $q->quote(@{$tbl}{qw(database table)}); - if ( $final_o->get('where') || $final_o->get('since') ) { - my $where_since = ($final_o->get('where'), $final_o->get('since')); - $sql .= " WHERE (" - . join(" AND ", map { "($_)" } grep { $_ } @$where_since ) - . ")"; - } - if ( $final_o->get('explain') ) { - printf($explain, @{$tbl}{qw(database table)}, $sql) - or die "Cannot print: $OS_ERROR"; - } - else { - return $dbh->selectall_arrayref($sql)->[0]->[0]; - } - - return; -} - -sub unique { - my %seen; - grep { !$seen{$_}++ } @_; -} - -# Tries to extract the MySQL error message and print it -sub print_err { - my ( $o, $msg, $db, $tbl, $host ) = @_; - return if !defined $msg - # Honor --quiet in the (common?) event of dropped tables or deadlocks - or ($o->get('quiet') - && $EVAL_ERROR =~ m/: Table .*? doesn't exist|Deadlock found/); - $msg =~ s/^.*?failed: (.*?) at \S+ line (\d+).*$/$1 at line $2/s; - $msg =~ s/\s+/ /g; - if ( $db && $tbl ) { - $msg .= " while doing $db.$tbl"; - } - if ( $host ) { - $msg .= " on $host"; - } - print STDERR $msg, "\n"; -} - # Returns when Seconds_Behind_Master on all the given slaves # is < max_lag, waits check_interval seconds between checks # if a slave is lagging too much. @@ -6957,83 +5306,42 @@ sub wait_for_slaves { # %args - Arguments # # Required Arguments: -# * dbh - dbh -# * db - db name, not quoted -# * tbl - tbl name, not quoted -# * chunk_size - chunk size in number of rows -# * chunk - chunk, e.g. "`a` > 10" -# * limit - oversize if rows > factor * chunk_size -# * Quoter - object -# -# Optional Arguments: -# * where - Arrayref of WHERE clauses added to chunk -# * index_hint - FORCE INDEX clause +# * dbh - dbh +# * tbl - Tbl ref +# * sth - sth +# * lb - Lower boundary arrayref +# * ub - Upper boundary arrayref +# * OptionParser - # # Returns: # True if EXPLAIN rows is >= chunk_size * limit, else false sub is_oversize_chunk { my ( %args ) = @_; - my @required_args = qw(dbh db tbl chunk_size chunk limit Quoter); + my @required_args = qw(explain_sth lb ub OptionParser); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } + my ($expl_sth, $lb, $ub, $o) = @args{@required_args}; - my $where = [$args{chunk}, $args{where} ? @{$args{where}} : ()]; - my $expl; + my $expl_res; eval { - $expl = _explain(%args, where => $where); + MKDEBUG && _d($expl_sth->{Statement}); + $expl_sth->execute(@$lb, @$ub); + $expl_res = $expl_sth->fetchrow_hashref(); + $expl_sth->finish(); }; if ( $EVAL_ERROR ) { # This shouldn't happen in production but happens in testing because # we chunk tables that don't actually exist. - MKDEBUG && _d("Failed to EXPLAIN chunk:", $EVAL_ERROR); - return $args{chunk}; + warn "Failed to " . $expl_sth->{Statement} . ": $EVAL_ERROR"; + return 0; } - MKDEBUG && _d("Chunk", $args{chunk}, "covers", ($expl->{rows} || 0), "rows"); + MKDEBUG && _d('EXPLAIN result:', Dumper($expl_res)); - return ($expl->{rows} || 0) >= $args{chunk_size} * $args{limit} ? 1 : 0; + return ($expl_res->{rows} || 0) + >= $o->get('chunk-size') * $o->get('chunk-size-limit') ? 1 : 0; } -# Sub: is_chunkable_table -# Determine if the table is chunkable. -# -# Parameters: -# %args - Arguments -# -# Required Arguments: -# * dbh - dbh -# * db - db name, not quoted -# * tbl - tbl name, not quoted -# * chunk_size - chunk size in number of rows -# * Quoter - object -# -# Optional Arguments: -# * where - Arrayref of WHERE clauses added to chunk -# * index_hint - FORCE INDEX clause -# -# Returns: -# True if EXPLAIN rows is <= chunk_size, else false -sub is_chunkable_table { - my ( %args ) = @_; - my @required_args = qw(dbh db tbl chunk_size Quoter); - foreach my $arg ( @required_args ) { - die "I need a $arg argument" unless $args{$arg}; - } - - my $expl; - eval { - $expl = _explain(%args); - }; - if ( $EVAL_ERROR ) { - # This shouldn't happen in production but happens in testing because - # we chunk tables that don't actually exist. - MKDEBUG && _d("Failed to EXPLAIN table:", $EVAL_ERROR); - return; # errr on the side of caution: not chunkable if not explainable - } - MKDEBUG && _d("Table has", ($expl->{rows} || 0), "rows"); - - return ($expl->{rows} || 0) <= $args{chunk_size} ? 1 : 0; -} # Sub: _explain # EXPLAIN a chunk or table. @@ -7101,16 +5409,13 @@ pt-table-checksum - Perform an online replication consistency check, or checksum =head1 SYNOPSIS -Usage: pt-table-checksum [OPTION...] DSN [DSN...] +Usage: pt-table-checksum [OPTION...] DSN pt-table-checksum checksums MySQL tables efficiently on one or more hosts. Each host is specified as a DSN and missing values are inherited from the first host. If you specify multiple hosts, the first is assumed to be the master. -B Are you checksumming slaves against a master? Then be sure to learn -what L<"--replicate"> does. It is probably the option you want to use. - Checksum all slaves against the master: pt-table-checksum \ @@ -7125,12 +5430,6 @@ Checksum all slaves against the master: --replicat mydb.checksums \ --replicate-check 2 -Checksum all databases and tables on two servers and print the differences: - - pt-table-checksum h=host1,u=user h=host2 | pt-checksum-filter - -See L<"SPECIFYING HOSTS"> for more on the syntax of the host arguments. - =head1 RISKS The following section is included to inform users about the potential risks, @@ -7273,30 +5572,6 @@ C is the default checksum function to use, and should be enough for most cases. If you need stronger guarantees that your data is identical, you should use one of the other functions. -=head1 ALGORITHM SELECTION - -The L<"--algorithm"> option allows you to specify which algorithm you would -like to use, but it does not guarantee that pt-table-checksum will use this -algorithm. pt-table-checksum will ultimately select the best algorithm possible -given various factors such as the MySQL version and other command line options. - -The three basic algorithms in descending order of preference are CHECKSUM, -BIT_XOR and ACCUM. CHECKSUM cannot be used if any one of these criteria -is true: - - * --where is used - * --since is used - * --chunk-size is used - * --replicate is used - * --count is used - * MySQL version less than 4.1.1 - -The BIT_XOR algorithm also requires MySQL version 4.1.1 or later. - -After checking these criteria, if the requested L<"--algorithm"> remains then it -is used, otherwise the first remaining algorithm with the highest preference -is used. - =head1 CONSISTENT CHECKSUMS If you are using this tool to verify your slaves still have the same data as the @@ -7648,17 +5923,6 @@ Pause checksumming until the specified slave's lag is less than L<"--max-lag">. If this option is specified and L<"--throttle-method"> is set to C then L<"--throttle-method"> only checks this slave. -=item --checksum - -group: Output - -Print checksums and table names in the style of md5sum (disables -L<"--[no]count">). - -Makes the output behave more like the output of C. The checksum is -first on the line, followed by the host, database, table, and chunk number, -concatenated with dots. - =item --chunk-column type: string @@ -7685,44 +5949,9 @@ C clause. Be careful when using this option; a poor choice of index could cause bad performance. This is probably best to use when you are checksumming only a single table, not an entire server. -=item --chunk-range - -type: string; default: open - -Set which ends of the chunk range are open or closed. Possible values are -one of MAGIC_chunk_range: - - VALUE OPENS/CLOSES - ========== ====================== - open Both ends are open - openclosed Low end open, high end closed - -By default pt-table-checksum uses an open range of chunks like: - - `id` < '10' - `id` >= '10' AND < '20' - `id` >= '20' - -That range is open because the last chunk selects any row with id greater than -(or equal to) 20. An open range can be a problem in cases where a lot of new -rows are inserted with IDs greater than 20 while pt-table-checksumming is -running because the final open-ended chunk will select all the newly inserted -rows. (The less common case of inserting rows with IDs less than 10 would -require a C range but that is not currently implemented.) -Specifying C will cause the final chunk to be closed like: - - `id` >= '20' AND `id` <= N - -N is the C that pt-table-checksum used when it first chunked -the rows. Therefore, it will only chunk the range of rows that existed when -the tool started and not any newly inserted rows (unless those rows happen -to be inserted with IDs less than N). - -See also L<"--chunk-size-limit">. - =item --chunk-size -type: string +type: string; default: 1000 Approximate number of rows or size of data to checksum at a time. Allowable suffixes are k, M, G. Disallows C<--algorithm CHECKSUM>. @@ -7788,25 +6017,6 @@ type: Array; group: Config Read this comma-separated list of config files; if specified, this must be the first option on the command line. -=item --[no]count - -Count rows in tables. This is built into ACCUM and BIT_XOR, but requires an -extra query for CHECKSUM. - -This is disabled by default to avoid an extra COUNT(*) query when -L<"--algorithm"> is CHECKSUM. If you have only MyISAM tables and live checksums -are enabled, both CHECKSUM and COUNT will be very fast, but otherwise you may -want to use one of the other algorithms. - -=item --[no]crc - -default: yes - -Do a CRC (checksum) of tables. - -Take the checksum of the rows as well as their count. This is enabled by -default. If you disable it, you'll just get COUNT(*) queries. - =item --create-replicate-table Create the replicate table given by L<"--replicate"> if it does not exist. @@ -8134,9 +6344,9 @@ If it doesn't find any slaves, the other methods will be tried. =item --replicate -type: string +type: string; default: percona_toolkit.checksums -Replicate checksums to slaves (disallows --algorithm CHECKSUM). +Replicate checksums to slaves. This option enables a completely different checksum strategy for a consistent, lock-free checksum across a master and its slaves. Instead of running the @@ -8694,7 +6904,7 @@ Replace C with the name of any tool. =head1 AUTHORS -Baron Schwartz +Baron Schwartz and Daniel Nichter =head1 ACKNOWLEDGMENTS