From 240062f847e3dc7e64e178b5562b34d80edfb2c2 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 26 Mar 2012 10:06:10 -0600 Subject: [PATCH 1/4] Add pt-fingerprint. --- bin/pt-fingerprint | 2101 +++++++++++++++++++++++++++++++++++++ t/pt-fingerprint/basics.t | 31 + 2 files changed, 2132 insertions(+) create mode 100755 bin/pt-fingerprint create mode 100644 t/pt-fingerprint/basics.t diff --git a/bin/pt-fingerprint b/bin/pt-fingerprint new file mode 100755 index 00000000..e91be727 --- /dev/null +++ b/bin/pt-fingerprint @@ -0,0 +1,2101 @@ +#!/usr/bin/env perl + +# This program is part of Percona Toolkit: http://www.percona.com/software/ +# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal +# notices and disclaimers. + +use strict; +use warnings FATAL => 'all'; +use constant MKDEBUG => $ENV{MKDEBUG} || 0; + +# ########################################################################### +# OptionParser package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/OptionParser.pm +# t/lib/OptionParser.t +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### +{ +package OptionParser; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use constant PTDEBUG => $ENV{PTDEBUG} || 0; + +use List::Util qw(max); +use Getopt::Long; + +my $POD_link_re = '[LC]<"?([^">]+)"?>'; + +sub new { + my ( $class, %args ) = @_; + my @required_args = qw(); + foreach my $arg ( @required_args ) { + die "I need a $arg argument" unless $args{$arg}; + } + + my ($program_name) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/; + $program_name ||= $PROGRAM_NAME; + my $home = $ENV{HOME} || $ENV{HOMEPATH} || $ENV{USERPROFILE} || '.'; + + my %attributes = ( + 'type' => 1, + 'short form' => 1, + 'group' => 1, + 'default' => 1, + 'cumulative' => 1, + 'negatable' => 1, + ); + + my $self = { + head1 => 'OPTIONS', # These args are used internally + skip_rules => 0, # to instantiate another Option- + item => '--(.*)', # Parser obj that parses the + attributes => \%attributes, # DSN OPTIONS section. Tools + parse_attributes => \&_parse_attribs, # don't tinker with these args. + + %args, + + strict => 1, # disabled by a special rule + program_name => $program_name, + opts => {}, + got_opts => 0, + short_opts => {}, + defaults => {}, + groups => {}, + allowed_groups => {}, + errors => [], + rules => [], # desc of rules for --help + mutex => [], # rule: opts are mutually exclusive + atleast1 => [], # rule: at least one opt is required + disables => {}, # rule: opt disables other opts + defaults_to => {}, # rule: opt defaults to value of other opt + DSNParser => undef, + default_files => [ + "/etc/percona-toolkit/percona-toolkit.conf", + "/etc/percona-toolkit/$program_name.conf", + "$home/.percona-toolkit.conf", + "$home/.$program_name.conf", + ], + types => { + string => 's', # standard Getopt type + int => 'i', # standard Getopt type + float => 'f', # standard Getopt type + Hash => 'H', # hash, formed from a comma-separated list + hash => 'h', # hash as above, but only if a value is given + Array => 'A', # array, similar to Hash + array => 'a', # array, similar to hash + DSN => 'd', # DSN + size => 'z', # size with kMG suffix (powers of 2^10) + time => 'm', # time, with an optional suffix of s/h/m/d + }, + }; + + return bless $self, $class; +} + +sub get_specs { + my ( $self, $file ) = @_; + $file ||= $self->{file} || __FILE__; + my @specs = $self->_pod_to_specs($file); + $self->_parse_specs(@specs); + + open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR"; + my $contents = do { local $/ = undef; <$fh> }; + close $fh; + if ( $contents =~ m/^=head1 DSN OPTIONS/m ) { + PTDEBUG && _d('Parsing DSN OPTIONS'); + my $dsn_attribs = { + dsn => 1, + copy => 1, + }; + my $parse_dsn_attribs = sub { + my ( $self, $option, $attribs ) = @_; + map { + my $val = $attribs->{$_}; + if ( $val ) { + $val = $val eq 'yes' ? 1 + : $val eq 'no' ? 0 + : $val; + $attribs->{$_} = $val; + } + } keys %$attribs; + return { + key => $option, + %$attribs, + }; + }; + my $dsn_o = new OptionParser( + description => 'DSN OPTIONS', + head1 => 'DSN OPTIONS', + dsn => 0, # XXX don't infinitely recurse! + item => '\* (.)', # key opts are a single character + skip_rules => 1, # no rules before opts + attributes => $dsn_attribs, + parse_attributes => $parse_dsn_attribs, + ); + my @dsn_opts = map { + my $opts = { + key => $_->{spec}->{key}, + dsn => $_->{spec}->{dsn}, + copy => $_->{spec}->{copy}, + desc => $_->{desc}, + }; + $opts; + } $dsn_o->_pod_to_specs($file); + $self->{DSNParser} = DSNParser->new(opts => \@dsn_opts); + } + + if ( $contents =~ m/^=head1 VERSION\n\n^(.+)$/m ) { + $self->{version} = $1; + PTDEBUG && _d($self->{version}); + } + + return; +} + +sub DSNParser { + my ( $self ) = @_; + return $self->{DSNParser}; +}; + +sub get_defaults_files { + my ( $self ) = @_; + return @{$self->{default_files}}; +} + +sub _pod_to_specs { + my ( $self, $file ) = @_; + $file ||= $self->{file} || __FILE__; + open my $fh, '<', $file or die "Cannot open $file: $OS_ERROR"; + + my @specs = (); + my @rules = (); + my $para; + + local $INPUT_RECORD_SEPARATOR = ''; + while ( $para = <$fh> ) { + next unless $para =~ m/^=head1 $self->{head1}/; + last; + } + + while ( $para = <$fh> ) { + last if $para =~ m/^=over/; + next if $self->{skip_rules}; + chomp $para; + $para =~ s/\s+/ /g; + $para =~ s/$POD_link_re/$1/go; + PTDEBUG && _d('Option rule:', $para); + push @rules, $para; + } + + die "POD has no $self->{head1} section" unless $para; + + do { + if ( my ($option) = $para =~ m/^=item $self->{item}/ ) { + chomp $para; + PTDEBUG && _d($para); + my %attribs; + + $para = <$fh>; # read next paragraph, possibly attributes + + if ( $para =~ m/: / ) { # attributes + $para =~ s/\s+\Z//g; + %attribs = map { + my ( $attrib, $val) = split(/: /, $_); + die "Unrecognized attribute for --$option: $attrib" + unless $self->{attributes}->{$attrib}; + ($attrib, $val); + } split(/; /, $para); + if ( $attribs{'short form'} ) { + $attribs{'short form'} =~ s/-//; + } + $para = <$fh>; # read next paragraph, probably short help desc + } + else { + PTDEBUG && _d('Option has no attributes'); + } + + $para =~ s/\s+\Z//g; + $para =~ s/\s+/ /g; + $para =~ s/$POD_link_re/$1/go; + + $para =~ s/\.(?:\n.*| [A-Z].*|\Z)//s; + PTDEBUG && _d('Short help:', $para); + + die "No description after option spec $option" if $para =~ m/^=item/; + + if ( my ($base_option) = $option =~ m/^\[no\](.*)/ ) { + $option = $base_option; + $attribs{'negatable'} = 1; + } + + push @specs, { + spec => $self->{parse_attributes}->($self, $option, \%attribs), + desc => $para + . (defined $attribs{default} ? " (default $attribs{default})" : ''), + group => ($attribs{'group'} ? $attribs{'group'} : 'default'), + }; + } + while ( $para = <$fh> ) { + last unless $para; + if ( $para =~ m/^=head1/ ) { + $para = undef; # Can't 'last' out of a do {} block. + last; + } + last if $para =~ m/^=item /; + } + } while ( $para ); + + die "No valid specs in $self->{head1}" unless @specs; + + close $fh; + return @specs, @rules; +} + +sub _parse_specs { + my ( $self, @specs ) = @_; + my %disables; # special rule that requires deferred checking + + foreach my $opt ( @specs ) { + if ( ref $opt ) { # It's an option spec, not a rule. + PTDEBUG && _d('Parsing opt spec:', + map { ($_, '=>', $opt->{$_}) } keys %$opt); + + my ( $long, $short ) = $opt->{spec} =~ m/^([\w-]+)(?:\|([^!+=]*))?/; + if ( !$long ) { + die "Cannot parse long option from spec $opt->{spec}"; + } + $opt->{long} = $long; + + die "Duplicate long option --$long" if exists $self->{opts}->{$long}; + $self->{opts}->{$long} = $opt; + + if ( length $long == 1 ) { + PTDEBUG && _d('Long opt', $long, 'looks like short opt'); + $self->{short_opts}->{$long} = $long; + } + + if ( $short ) { + die "Duplicate short option -$short" + if exists $self->{short_opts}->{$short}; + $self->{short_opts}->{$short} = $long; + $opt->{short} = $short; + } + else { + $opt->{short} = undef; + } + + $opt->{is_negatable} = $opt->{spec} =~ m/!/ ? 1 : 0; + $opt->{is_cumulative} = $opt->{spec} =~ m/\+/ ? 1 : 0; + $opt->{is_required} = $opt->{desc} =~ m/required/ ? 1 : 0; + + $opt->{group} ||= 'default'; + $self->{groups}->{ $opt->{group} }->{$long} = 1; + + $opt->{value} = undef; + $opt->{got} = 0; + + my ( $type ) = $opt->{spec} =~ m/=(.)/; + $opt->{type} = $type; + PTDEBUG && _d($long, 'type:', $type); + + + $opt->{spec} =~ s/=./=s/ if ( $type && $type =~ m/[HhAadzm]/ ); + + if ( (my ($def) = $opt->{desc} =~ m/default\b(?: ([^)]+))?/) ) { + $self->{defaults}->{$long} = defined $def ? $def : 1; + PTDEBUG && _d($long, 'default:', $def); + } + + if ( $long eq 'config' ) { + $self->{defaults}->{$long} = join(',', $self->get_defaults_files()); + } + + if ( (my ($dis) = $opt->{desc} =~ m/(disables .*)/) ) { + $disables{$long} = $dis; + PTDEBUG && _d('Deferring check of disables rule for', $opt, $dis); + } + + $self->{opts}->{$long} = $opt; + } + else { # It's an option rule, not a spec. + PTDEBUG && _d('Parsing rule:', $opt); + push @{$self->{rules}}, $opt; + my @participants = $self->_get_participants($opt); + my $rule_ok = 0; + + if ( $opt =~ m/mutually exclusive|one and only one/ ) { + $rule_ok = 1; + push @{$self->{mutex}}, \@participants; + PTDEBUG && _d(@participants, 'are mutually exclusive'); + } + if ( $opt =~ m/at least one|one and only one/ ) { + $rule_ok = 1; + push @{$self->{atleast1}}, \@participants; + PTDEBUG && _d(@participants, 'require at least one'); + } + if ( $opt =~ m/default to/ ) { + $rule_ok = 1; + $self->{defaults_to}->{$participants[0]} = $participants[1]; + PTDEBUG && _d($participants[0], 'defaults to', $participants[1]); + } + if ( $opt =~ m/restricted to option groups/ ) { + $rule_ok = 1; + my ($groups) = $opt =~ m/groups ([\w\s\,]+)/; + my @groups = split(',', $groups); + %{$self->{allowed_groups}->{$participants[0]}} = map { + s/\s+//; + $_ => 1; + } @groups; + } + if( $opt =~ m/accepts additional command-line arguments/ ) { + $rule_ok = 1; + $self->{strict} = 0; + PTDEBUG && _d("Strict mode disabled by rule"); + } + + die "Unrecognized option rule: $opt" unless $rule_ok; + } + } + + foreach my $long ( keys %disables ) { + my @participants = $self->_get_participants($disables{$long}); + $self->{disables}->{$long} = \@participants; + PTDEBUG && _d('Option', $long, 'disables', @participants); + } + + return; +} + +sub _get_participants { + my ( $self, $str ) = @_; + my @participants; + foreach my $long ( $str =~ m/--(?:\[no\])?([\w-]+)/g ) { + die "Option --$long does not exist while processing rule $str" + unless exists $self->{opts}->{$long}; + push @participants, $long; + } + PTDEBUG && _d('Participants for', $str, ':', @participants); + return @participants; +} + +sub opts { + my ( $self ) = @_; + my %opts = %{$self->{opts}}; + return %opts; +} + +sub short_opts { + my ( $self ) = @_; + my %short_opts = %{$self->{short_opts}}; + return %short_opts; +} + +sub set_defaults { + my ( $self, %defaults ) = @_; + $self->{defaults} = {}; + foreach my $long ( keys %defaults ) { + die "Cannot set default for nonexistent option $long" + unless exists $self->{opts}->{$long}; + $self->{defaults}->{$long} = $defaults{$long}; + PTDEBUG && _d('Default val for', $long, ':', $defaults{$long}); + } + return; +} + +sub get_defaults { + my ( $self ) = @_; + return $self->{defaults}; +} + +sub get_groups { + my ( $self ) = @_; + return $self->{groups}; +} + +sub _set_option { + my ( $self, $opt, $val ) = @_; + my $long = exists $self->{opts}->{$opt} ? $opt + : exists $self->{short_opts}->{$opt} ? $self->{short_opts}->{$opt} + : die "Getopt::Long gave a nonexistent option: $opt"; + + $opt = $self->{opts}->{$long}; + if ( $opt->{is_cumulative} ) { + $opt->{value}++; + } + else { + $opt->{value} = $val; + } + $opt->{got} = 1; + PTDEBUG && _d('Got option', $long, '=', $val); +} + +sub get_opts { + my ( $self ) = @_; + + foreach my $long ( keys %{$self->{opts}} ) { + $self->{opts}->{$long}->{got} = 0; + $self->{opts}->{$long}->{value} + = exists $self->{defaults}->{$long} ? $self->{defaults}->{$long} + : $self->{opts}->{$long}->{is_cumulative} ? 0 + : undef; + } + $self->{got_opts} = 0; + + $self->{errors} = []; + + if ( @ARGV && $ARGV[0] eq "--config" ) { + shift @ARGV; + $self->_set_option('config', shift @ARGV); + } + if ( $self->has('config') ) { + my @extra_args; + foreach my $filename ( split(',', $self->get('config')) ) { + eval { + push @extra_args, $self->_read_config_file($filename); + }; + if ( $EVAL_ERROR ) { + if ( $self->got('config') ) { + die $EVAL_ERROR; + } + elsif ( PTDEBUG ) { + _d($EVAL_ERROR); + } + } + } + unshift @ARGV, @extra_args; + } + + Getopt::Long::Configure('no_ignore_case', 'bundling'); + GetOptions( + map { $_->{spec} => sub { $self->_set_option(@_); } } + grep { $_->{long} ne 'config' } # --config is handled specially above. + values %{$self->{opts}} + ) or $self->save_error('Error parsing options'); + + if ( exists $self->{opts}->{version} && $self->{opts}->{version}->{got} ) { + if ( $self->{version} ) { + print $self->{version}, "\n"; + } + else { + print "Error parsing version. See the VERSION section of the tool's documentation.\n"; + } + exit 0; + } + + if ( @ARGV && $self->{strict} ) { + $self->save_error("Unrecognized command-line options @ARGV"); + } + + foreach my $mutex ( @{$self->{mutex}} ) { + my @set = grep { $self->{opts}->{$_}->{got} } @$mutex; + if ( @set > 1 ) { + my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" } + @{$mutex}[ 0 .. scalar(@$mutex) - 2] ) + . ' and --'.$self->{opts}->{$mutex->[-1]}->{long} + . ' are mutually exclusive.'; + $self->save_error($err); + } + } + + foreach my $required ( @{$self->{atleast1}} ) { + my @set = grep { $self->{opts}->{$_}->{got} } @$required; + if ( @set == 0 ) { + my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" } + @{$required}[ 0 .. scalar(@$required) - 2] ) + .' or --'.$self->{opts}->{$required->[-1]}->{long}; + $self->save_error("Specify at least one of $err"); + } + } + + $self->_check_opts( keys %{$self->{opts}} ); + $self->{got_opts} = 1; + return; +} + +sub _check_opts { + my ( $self, @long ) = @_; + my $long_last = scalar @long; + while ( @long ) { + foreach my $i ( 0..$#long ) { + my $long = $long[$i]; + next unless $long; + my $opt = $self->{opts}->{$long}; + if ( $opt->{got} ) { + if ( exists $self->{disables}->{$long} ) { + my @disable_opts = @{$self->{disables}->{$long}}; + map { $self->{opts}->{$_}->{value} = undef; } @disable_opts; + PTDEBUG && _d('Unset options', @disable_opts, + 'because', $long,'disables them'); + } + + if ( exists $self->{allowed_groups}->{$long} ) { + + my @restricted_groups = grep { + !exists $self->{allowed_groups}->{$long}->{$_} + } keys %{$self->{groups}}; + + my @restricted_opts; + foreach my $restricted_group ( @restricted_groups ) { + RESTRICTED_OPT: + foreach my $restricted_opt ( + keys %{$self->{groups}->{$restricted_group}} ) + { + next RESTRICTED_OPT if $restricted_opt eq $long; + push @restricted_opts, $restricted_opt + if $self->{opts}->{$restricted_opt}->{got}; + } + } + + if ( @restricted_opts ) { + my $err; + if ( @restricted_opts == 1 ) { + $err = "--$restricted_opts[0]"; + } + else { + $err = join(', ', + map { "--$self->{opts}->{$_}->{long}" } + grep { $_ } + @restricted_opts[0..scalar(@restricted_opts) - 2] + ) + . ' or --'.$self->{opts}->{$restricted_opts[-1]}->{long}; + } + $self->save_error("--$long is not allowed with $err"); + } + } + + } + elsif ( $opt->{is_required} ) { + $self->save_error("Required option --$long must be specified"); + } + + $self->_validate_type($opt); + if ( $opt->{parsed} ) { + delete $long[$i]; + } + else { + PTDEBUG && _d('Temporarily failed to parse', $long); + } + } + + die "Failed to parse options, possibly due to circular dependencies" + if @long == $long_last; + $long_last = @long; + } + + return; +} + +sub _validate_type { + my ( $self, $opt ) = @_; + return unless $opt; + + if ( !$opt->{type} ) { + $opt->{parsed} = 1; + return; + } + + my $val = $opt->{value}; + + if ( $val && $opt->{type} eq 'm' ) { # type time + PTDEBUG && _d('Parsing option', $opt->{long}, 'as a time value'); + my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/; + if ( !$suffix ) { + my ( $s ) = $opt->{desc} =~ m/\(suffix (.)\)/; + $suffix = $s || 's'; + PTDEBUG && _d('No suffix given; using', $suffix, 'for', + $opt->{long}, '(value:', $val, ')'); + } + if ( $suffix =~ m/[smhd]/ ) { + $val = $suffix eq 's' ? $num # Seconds + : $suffix eq 'm' ? $num * 60 # Minutes + : $suffix eq 'h' ? $num * 3600 # Hours + : $num * 86400; # Days + $opt->{value} = ($prefix || '') . $val; + PTDEBUG && _d('Setting option', $opt->{long}, 'to', $val); + } + else { + $self->save_error("Invalid time suffix for --$opt->{long}"); + } + } + elsif ( $val && $opt->{type} eq 'd' ) { # type DSN + PTDEBUG && _d('Parsing option', $opt->{long}, 'as a DSN'); + my $prev = {}; + my $from_key = $self->{defaults_to}->{ $opt->{long} }; + if ( $from_key ) { + PTDEBUG && _d($opt->{long}, 'DSN copies from', $from_key, 'DSN'); + if ( $self->{opts}->{$from_key}->{parsed} ) { + $prev = $self->{opts}->{$from_key}->{value}; + } + else { + PTDEBUG && _d('Cannot parse', $opt->{long}, 'until', + $from_key, 'parsed'); + return; + } + } + my $defaults = $self->{DSNParser}->parse_options($self); + $opt->{value} = $self->{DSNParser}->parse($val, $prev, $defaults); + } + elsif ( $val && $opt->{type} eq 'z' ) { # type size + PTDEBUG && _d('Parsing option', $opt->{long}, 'as a size value'); + $self->_parse_size($opt, $val); + } + elsif ( $opt->{type} eq 'H' || (defined $val && $opt->{type} eq 'h') ) { + $opt->{value} = { map { $_ => 1 } split(/(?{type} eq 'A' || (defined $val && $opt->{type} eq 'a') ) { + $opt->{value} = [ split(/(?{long}, 'type', $opt->{type}, 'value', $val); + } + + $opt->{parsed} = 1; + return; +} + +sub get { + my ( $self, $opt ) = @_; + my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); + die "Option $opt does not exist" + unless $long && exists $self->{opts}->{$long}; + return $self->{opts}->{$long}->{value}; +} + +sub got { + my ( $self, $opt ) = @_; + my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); + die "Option $opt does not exist" + unless $long && exists $self->{opts}->{$long}; + return $self->{opts}->{$long}->{got}; +} + +sub has { + my ( $self, $opt ) = @_; + my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); + return defined $long ? exists $self->{opts}->{$long} : 0; +} + +sub set { + my ( $self, $opt, $val ) = @_; + my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt); + die "Option $opt does not exist" + unless $long && exists $self->{opts}->{$long}; + $self->{opts}->{$long}->{value} = $val; + return; +} + +sub save_error { + my ( $self, $error ) = @_; + push @{$self->{errors}}, $error; + return; +} + +sub errors { + my ( $self ) = @_; + return $self->{errors}; +} + +sub usage { + my ( $self ) = @_; + warn "No usage string is set" unless $self->{usage}; # XXX + return "Usage: " . ($self->{usage} || '') . "\n"; +} + +sub descr { + my ( $self ) = @_; + warn "No description string is set" unless $self->{description}; # XXX + my $descr = ($self->{description} || $self->{program_name} || '') + . " For more details, please use the --help option, " + . "or try 'perldoc $PROGRAM_NAME' " + . "for complete documentation."; + $descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g) + unless $ENV{DONT_BREAK_LINES}; + $descr =~ s/ +$//mg; + return $descr; +} + +sub usage_or_errors { + my ( $self, $file, $return ) = @_; + $file ||= $self->{file} || __FILE__; + + if ( !$self->{description} || !$self->{usage} ) { + PTDEBUG && _d("Getting description and usage from SYNOPSIS in", $file); + my %synop = $self->_parse_synopsis($file); + $self->{description} ||= $synop{description}; + $self->{usage} ||= $synop{usage}; + PTDEBUG && _d("Description:", $self->{description}, + "\nUsage:", $self->{usage}); + } + + if ( $self->{opts}->{help}->{got} ) { + print $self->print_usage() or die "Cannot print usage: $OS_ERROR"; + exit 0 unless $return; + } + elsif ( scalar @{$self->{errors}} ) { + print $self->print_errors() or die "Cannot print errors: $OS_ERROR"; + exit 0 unless $return; + } + + return; +} + +sub print_errors { + my ( $self ) = @_; + my $usage = $self->usage() . "\n"; + if ( (my @errors = @{$self->{errors}}) ) { + $usage .= join("\n * ", 'Errors in command-line arguments:', @errors) + . "\n"; + } + return $usage . "\n" . $self->descr(); +} + +sub print_usage { + my ( $self ) = @_; + die "Run get_opts() before print_usage()" unless $self->{got_opts}; + my @opts = values %{$self->{opts}}; + + my $maxl = max( + map { + length($_->{long}) # option long name + + ($_->{is_negatable} ? 4 : 0) # "[no]" if opt is negatable + + ($_->{type} ? 2 : 0) # "=x" where x is the opt type + } + @opts); + + my $maxs = max(0, + map { + length($_) + + ($self->{opts}->{$_}->{is_negatable} ? 4 : 0) + + ($self->{opts}->{$_}->{type} ? 2 : 0) + } + values %{$self->{short_opts}}); + + my $lcol = max($maxl, ($maxs + 3)); + my $rcol = 80 - $lcol - 6; + my $rpad = ' ' x ( 80 - $rcol ); + + $maxs = max($lcol - 3, $maxs); + + my $usage = $self->descr() . "\n" . $self->usage(); + + my @groups = reverse sort grep { $_ ne 'default'; } keys %{$self->{groups}}; + push @groups, 'default'; + + foreach my $group ( reverse @groups ) { + $usage .= "\n".($group eq 'default' ? 'Options' : $group).":\n\n"; + foreach my $opt ( + sort { $a->{long} cmp $b->{long} } + grep { $_->{group} eq $group } + @opts ) + { + my $long = $opt->{is_negatable} ? "[no]$opt->{long}" : $opt->{long}; + my $short = $opt->{short}; + my $desc = $opt->{desc}; + + $long .= $opt->{type} ? "=$opt->{type}" : ""; + + if ( $opt->{type} && $opt->{type} eq 'm' ) { + my ($s) = $desc =~ m/\(suffix (.)\)/; + $s ||= 's'; + $desc =~ s/\s+\(suffix .\)//; + $desc .= ". Optional suffix s=seconds, m=minutes, h=hours, " + . "d=days; if no suffix, $s is used."; + } + $desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol})(?:\s+|$)/g); + $desc =~ s/ +$//mg; + if ( $short ) { + $usage .= sprintf(" --%-${maxs}s -%s %s\n", $long, $short, $desc); + } + else { + $usage .= sprintf(" --%-${lcol}s %s\n", $long, $desc); + } + } + } + + $usage .= "\nOption types: s=string, i=integer, f=float, h/H/a/A=comma-separated list, d=DSN, z=size, m=time\n"; + + if ( (my @rules = @{$self->{rules}}) ) { + $usage .= "\nRules:\n\n"; + $usage .= join("\n", map { " $_" } @rules) . "\n"; + } + if ( $self->{DSNParser} ) { + $usage .= "\n" . $self->{DSNParser}->usage(); + } + $usage .= "\nOptions and values after processing arguments:\n\n"; + foreach my $opt ( sort { $a->{long} cmp $b->{long} } @opts ) { + my $val = $opt->{value}; + my $type = $opt->{type} || ''; + my $bool = $opt->{spec} =~ m/^[\w-]+(?:\|[\w-])?!?$/; + $val = $bool ? ( $val ? 'TRUE' : 'FALSE' ) + : !defined $val ? '(No value)' + : $type eq 'd' ? $self->{DSNParser}->as_string($val) + : $type =~ m/H|h/ ? join(',', sort keys %$val) + : $type =~ m/A|a/ ? join(',', @$val) + : $val; + $usage .= sprintf(" --%-${lcol}s %s\n", $opt->{long}, $val); + } + return $usage; +} + +sub prompt_noecho { + shift @_ if ref $_[0] eq __PACKAGE__; + my ( $prompt ) = @_; + local $OUTPUT_AUTOFLUSH = 1; + print $prompt + or die "Cannot print: $OS_ERROR"; + my $response; + eval { + require Term::ReadKey; + Term::ReadKey::ReadMode('noecho'); + chomp($response = ); + Term::ReadKey::ReadMode('normal'); + print "\n" + or die "Cannot print: $OS_ERROR"; + }; + if ( $EVAL_ERROR ) { + die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR"; + } + return $response; +} + +sub _read_config_file { + my ( $self, $filename ) = @_; + open my $fh, "<", $filename or die "Cannot open $filename: $OS_ERROR\n"; + my @args; + my $prefix = '--'; + my $parse = 1; + + LINE: + while ( my $line = <$fh> ) { + chomp $line; + next LINE if $line =~ m/^\s*(?:\#|\;|$)/; + $line =~ s/\s+#.*$//g; + $line =~ s/^\s+|\s+$//g; + if ( $line eq '--' ) { + $prefix = ''; + $parse = 0; + next LINE; + } + if ( $parse + && (my($opt, $arg) = $line =~ m/^\s*([^=\s]+?)(?:\s*=\s*(.*?)\s*)?$/) + ) { + push @args, grep { defined $_ } ("$prefix$opt", $arg); + } + elsif ( $line =~ m/./ ) { + push @args, $line; + } + else { + die "Syntax error in file $filename at line $INPUT_LINE_NUMBER"; + } + } + close $fh; + return @args; +} + +sub read_para_after { + my ( $self, $file, $regex ) = @_; + open my $fh, "<", $file or die "Can't open $file: $OS_ERROR"; + local $INPUT_RECORD_SEPARATOR = ''; + my $para; + while ( $para = <$fh> ) { + next unless $para =~ m/^=pod$/m; + last; + } + while ( $para = <$fh> ) { + next unless $para =~ m/$regex/; + last; + } + $para = <$fh>; + chomp($para); + close $fh or die "Can't close $file: $OS_ERROR"; + return $para; +} + +sub clone { + my ( $self ) = @_; + + my %clone = map { + my $hashref = $self->{$_}; + my $val_copy = {}; + foreach my $key ( keys %$hashref ) { + my $ref = ref $hashref->{$key}; + $val_copy->{$key} = !$ref ? $hashref->{$key} + : $ref eq 'HASH' ? { %{$hashref->{$key}} } + : $ref eq 'ARRAY' ? [ @{$hashref->{$key}} ] + : $hashref->{$key}; + } + $_ => $val_copy; + } qw(opts short_opts defaults); + + foreach my $scalar ( qw(got_opts) ) { + $clone{$scalar} = $self->{$scalar}; + } + + return bless \%clone; +} + +sub _parse_size { + my ( $self, $opt, $val ) = @_; + + if ( lc($val || '') eq 'null' ) { + PTDEBUG && _d('NULL size for', $opt->{long}); + $opt->{value} = 'null'; + return; + } + + my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824); + my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/; + if ( defined $num ) { + if ( $factor ) { + $num *= $factor_for{$factor}; + PTDEBUG && _d('Setting option', $opt->{y}, + 'to num', $num, '* factor', $factor); + } + $opt->{value} = ($pre || '') . $num; + } + else { + $self->save_error("Invalid size for --$opt->{long}: $val"); + } + return; +} + +sub _parse_attribs { + my ( $self, $option, $attribs ) = @_; + my $types = $self->{types}; + return $option + . ($attribs->{'short form'} ? '|' . $attribs->{'short form'} : '' ) + . ($attribs->{'negatable'} ? '!' : '' ) + . ($attribs->{'cumulative'} ? '+' : '' ) + . ($attribs->{'type'} ? '=' . $types->{$attribs->{type}} : '' ); +} + +sub _parse_synopsis { + my ( $self, $file ) = @_; + $file ||= $self->{file} || __FILE__; + PTDEBUG && _d("Parsing SYNOPSIS in", $file); + + local $INPUT_RECORD_SEPARATOR = ''; # read paragraphs + open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR"; + my $para; + 1 while defined($para = <$fh>) && $para !~ m/^=head1 SYNOPSIS/; + die "$file does not contain a SYNOPSIS section" unless $para; + my @synop; + for ( 1..2 ) { # 1 for the usage, 2 for the description + my $para = <$fh>; + push @synop, $para; + } + close $fh; + PTDEBUG && _d("Raw SYNOPSIS text:", @synop); + my ($usage, $desc) = @synop; + die "The SYNOPSIS section in $file is not formatted properly" + unless $usage && $desc; + + $usage =~ s/^\s*Usage:\s+(.+)/$1/; + chomp $usage; + + $desc =~ s/\n/ /g; + $desc =~ s/\s{2,}/ /g; + $desc =~ s/\. ([A-Z][a-z])/. $1/g; + $desc =~ s/\s+$//; + + return ( + description => $desc, + usage => $usage, + ); +}; + +sub _d { + my ($package, undef, $line) = caller 0; + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } + map { defined $_ ? $_ : 'undef' } + @_; + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; +} + +if ( PTDEBUG ) { + print '# ', $^X, ' ', $], "\n"; + if ( my $uname = `uname -a` ) { + $uname =~ s/\s+/ /g; + print "# $uname\n"; + } + print '# Arguments: ', + join(' ', map { my $a = "_[$_]_"; $a =~ s/\n/\n# /g; $a; } @ARGV), "\n"; +} + +1; +} +# ########################################################################### +# End OptionParser package +# ########################################################################### + +# ########################################################################### +# QueryParser package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/QueryParser.pm +# t/lib/QueryParser.t +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### +{ +package QueryParser; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use constant PTDEBUG => $ENV{PTDEBUG} || 0; + +our $tbl_ident = qr/(?:`[^`]+`|\w+)(?:\.(?:`[^`]+`|\w+))?/; +our $tbl_regex = qr{ + \b(?:FROM|JOIN|(?get_tables($select); + } + my ($tbl) = $query =~ m/TABLE\s+($tbl_ident)(\s+.*)?/i; + PTDEBUG && _d('Matches table:', $tbl); + return ($tbl); + } + + $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig; + + if ( $query =~ /^\s*LOCK TABLES/i ) { + PTDEBUG && _d('Special table type: LOCK TABLES'); + $query =~ s/^(\s*LOCK TABLES\s+)//; + $query =~ s/\s+(?:READ|WRITE|LOCAL)+\s*//g; + PTDEBUG && _d('Locked tables:', $query); + $query = "FROM $query"; + } + + $query =~ s/\\["']//g; # quoted strings + $query =~ s/".*?"/?/sg; # quoted strings + $query =~ s/'.*?'/?/sg; # quoted strings + + my @tables; + foreach my $tbls ( $query =~ m/$tbl_regex/gio ) { + PTDEBUG && _d('Match tables:', $tbls); + + next if $tbls =~ m/\ASELECT\b/i; + + foreach my $tbl ( split(',', $tbls) ) { + $tbl =~ s/\s*($tbl_ident)(\s+.*)?/$1/gio; + + if ( $tbl !~ m/[a-zA-Z]/ ) { + PTDEBUG && _d('Skipping suspicious table name:', $tbl); + next; + } + + push @tables, $tbl; + } + } + return @tables; +} + +sub has_derived_table { + my ( $self, $query ) = @_; + my $match = $query =~ m/$has_derived/; + PTDEBUG && _d($query, 'has ' . ($match ? 'a' : 'no') . ' derived table'); + return $match; +} + +sub get_aliases { + my ( $self, $query, $list ) = @_; + + my $result = { + DATABASE => {}, + TABLE => {}, + }; + return $result unless $query; + + $query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig; + + $query =~ s/ (?:INNER|OUTER|CROSS|LEFT|RIGHT|NATURAL)//ig; + + my @tbl_refs; + my ($tbl_refs, $from) = $query =~ m{ + ( + (FROM|INTO|UPDATE)\b\s* # Keyword before table refs + .+? # Table refs + ) + (?:\s+|\z) # If the query does not end with the table + (?:WHERE|ORDER|LIMIT|HAVING|SET|VALUES|\z) # Keyword after table refs + }ix; + + if ( $tbl_refs ) { + + if ( $query =~ m/^(?:INSERT|REPLACE)/i ) { + $tbl_refs =~ s/\([^\)]+\)\s*//; + } + + PTDEBUG && _d('tbl refs:', $tbl_refs); + + my $before_tbl = qr/(?:,|JOIN|\s|$from)+/i; + + my $after_tbl = qr/(?:,|JOIN|ON|USING|\z)/i; + + $tbl_refs =~ s/ = /=/g; + + while ( + $tbl_refs =~ m{ + $before_tbl\b\s* + ( ($tbl_ident) (?:\s+ (?:AS\s+)? (\w+))? ) + \s*$after_tbl + }xgio ) + { + my ( $tbl_ref, $db_tbl, $alias ) = ($1, $2, $3); + PTDEBUG && _d('Match table:', $tbl_ref); + push @tbl_refs, $tbl_ref; + $alias = $self->trim_identifier($alias); + + if ( $tbl_ref =~ m/^AS\s+\w+/i ) { + PTDEBUG && _d('Subquery', $tbl_ref); + $result->{TABLE}->{$alias} = undef; + next; + } + + my ( $db, $tbl ) = $db_tbl =~ m/^(?:(.*?)\.)?(.*)/; + $db = $self->trim_identifier($db); + $tbl = $self->trim_identifier($tbl); + $result->{TABLE}->{$alias || $tbl} = $tbl; + $result->{DATABASE}->{$tbl} = $db if $db; + } + } + else { + PTDEBUG && _d("No tables ref in", $query); + } + + if ( $list ) { + return \@tbl_refs; + } + else { + return $result; + } +} + +sub split { + my ( $self, $query ) = @_; + return unless $query; + $query = $self->clean_query($query); + PTDEBUG && _d('Splitting', $query); + + my $verbs = qr{SELECT|INSERT|UPDATE|DELETE|REPLACE|UNION|CREATE}i; + + my @split_statements = grep { $_ } split(m/\b($verbs\b(?!(?:\s*\()))/io, $query); + + my @statements; + if ( @split_statements == 1 ) { + push @statements, $query; + } + else { + for ( my $i = 0; $i <= $#split_statements; $i += 2 ) { + push @statements, $split_statements[$i].$split_statements[$i+1]; + + if ( $statements[-2] && $statements[-2] =~ m/on duplicate key\s+$/i ) { + $statements[-2] .= pop @statements; + } + } + } + + PTDEBUG && _d('statements:', map { $_ ? "<$_>" : 'none' } @statements); + return @statements; +} + +sub clean_query { + my ( $self, $query ) = @_; + return unless $query; + $query =~ s!/\*.*?\*/! !g; # Remove /* comment blocks */ + $query =~ s/^\s+//; # Remove leading spaces + $query =~ s/\s+$//; # Remove trailing spaces + $query =~ s/\s{2,}/ /g; # Remove extra spaces + return $query; +} + +sub split_subquery { + my ( $self, $query ) = @_; + return unless $query; + $query = $self->clean_query($query); + $query =~ s/;$//; + + my @subqueries; + my $sqno = 0; # subquery number + my $pos = 0; + while ( $query =~ m/(\S+)(?:\s+|\Z)/g ) { + $pos = pos($query); + my $word = $1; + PTDEBUG && _d($word, $sqno); + if ( $word =~ m/^\(?SELECT\b/i ) { + my $start_pos = $pos - length($word) - 1; + if ( $start_pos ) { + $sqno++; + PTDEBUG && _d('Subquery', $sqno, 'starts at', $start_pos); + $subqueries[$sqno] = { + start_pos => $start_pos, + end_pos => 0, + len => 0, + words => [$word], + lp => 1, # left parentheses + rp => 0, # right parentheses + done => 0, + }; + } + else { + PTDEBUG && _d('Main SELECT at pos 0'); + } + } + else { + next unless $sqno; # next unless we're in a subquery + PTDEBUG && _d('In subquery', $sqno); + my $sq = $subqueries[$sqno]; + if ( $sq->{done} ) { + PTDEBUG && _d('This subquery is done; SQL is for', + ($sqno - 1 ? "subquery $sqno" : "the main SELECT")); + next; + } + push @{$sq->{words}}, $word; + my $lp = ($word =~ tr/\(//) || 0; + my $rp = ($word =~ tr/\)//) || 0; + PTDEBUG && _d('parentheses left', $lp, 'right', $rp); + if ( ($sq->{lp} + $lp) - ($sq->{rp} + $rp) == 0 ) { + my $end_pos = $pos - 1; + PTDEBUG && _d('Subquery', $sqno, 'ends at', $end_pos); + $sq->{end_pos} = $end_pos; + $sq->{len} = $end_pos - $sq->{start_pos}; + } + } + } + + for my $i ( 1..$#subqueries ) { + my $sq = $subqueries[$i]; + next unless $sq; + $sq->{sql} = join(' ', @{$sq->{words}}); + substr $query, + $sq->{start_pos} + 1, # +1 for ( + $sq->{len} - 1, # -1 for ) + "__subquery_$i"; + } + + return $query, map { $_->{sql} } grep { defined $_ } @subqueries; +} + +sub query_type { + my ( $self, $query, $qr ) = @_; + my ($type, undef) = $qr->distill_verbs($query); + my $rw; + if ( $type =~ m/^SELECT\b/ ) { + $rw = 'read'; + } + elsif ( $type =~ m/^$data_manip_stmts\b/ + || $type =~ m/^$data_def_stmts\b/ ) { + $rw = 'write' + } + + return { + type => $type, + rw => $rw, + } +} + +sub get_columns { + my ( $self, $query ) = @_; + my $cols = []; + return $cols unless $query; + my $cols_def; + + if ( $query =~ m/^SELECT/i ) { + $query =~ s/ + ^SELECT\s+ + (?:ALL + |DISTINCT + |DISTINCTROW + |HIGH_PRIORITY + |STRAIGHT_JOIN + |SQL_SMALL_RESULT + |SQL_BIG_RESULT + |SQL_BUFFER_RESULT + |SQL_CACHE + |SQL_NO_CACHE + |SQL_CALC_FOUND_ROWS + )\s+ + /SELECT /xgi; + ($cols_def) = $query =~ m/^SELECT\s+(.+?)\s+FROM/i; + } + elsif ( $query =~ m/^(?:INSERT|REPLACE)/i ) { + ($cols_def) = $query =~ m/\(([^\)]+)\)\s*VALUE/i; + } + + PTDEBUG && _d('Columns:', $cols_def); + if ( $cols_def ) { + @$cols = split(',', $cols_def); + map { + my $col = $_; + $col = s/^\s+//g; + $col = s/\s+$//g; + $col; + } @$cols; + } + + return $cols; +} + +sub parse { + my ( $self, $query ) = @_; + return unless $query; + my $parsed = {}; + + $query =~ s/\n/ /g; + $query = $self->clean_query($query); + + $parsed->{query} = $query, + $parsed->{tables} = $self->get_aliases($query, 1); + $parsed->{columns} = $self->get_columns($query); + + my ($type) = $query =~ m/^(\w+)/; + $parsed->{type} = lc $type; + + + $parsed->{sub_queries} = []; + + return $parsed; +} + +sub extract_tables { + my ( $self, %args ) = @_; + my $query = $args{query}; + my $default_db = $args{default_db}; + my $q = $self->{Quoter} || $args{Quoter}; + return unless $query; + PTDEBUG && _d('Extracting tables'); + my @tables; + my %seen; + foreach my $db_tbl ( $self->get_tables($query) ) { + next unless $db_tbl; + next if $seen{$db_tbl}++; # Unique-ify for issue 337. + my ( $db, $tbl ) = $q->split_unquote($db_tbl); + push @tables, [ $db || $default_db, $tbl ]; + } + return @tables; +} + +sub trim_identifier { + my ($self, $str) = @_; + return unless defined $str; + $str =~ s/`//g; + $str =~ s/^\s+//; + $str =~ s/\s+$//; + return $str; +} + +sub _d { + my ($package, undef, $line) = caller 0; + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } + map { defined $_ ? $_ : 'undef' } + @_; + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; +} + +1; +} +# ########################################################################### +# End QueryParser package +# ########################################################################### + +# ########################################################################### +# QueryRewriter package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/QueryRewriter.pm +# t/lib/QueryRewriter.t +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### +{ +package QueryRewriter; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use constant PTDEBUG => $ENV{PTDEBUG} || 0; + +our $verbs = qr{^SHOW|^FLUSH|^COMMIT|^ROLLBACK|^BEGIN|SELECT|INSERT + |UPDATE|DELETE|REPLACE|^SET|UNION|^START|^LOCK}xi; +my $quote_re = qr/"(?:(?!(? [^()]+ ) # Non-parens without backtracking + | + (??{ $bal }) # Group with matching parens + )* + \) + /x; + +my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments +my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */ +my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */ +my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW + + +sub new { + my ( $class, %args ) = @_; + my $self = { %args }; + return bless $self, $class; +} + +sub strip_comments { + my ( $self, $query ) = @_; + return unless $query; + $query =~ s/$olc_re//go; + $query =~ s/$mlc_re//go; + if ( $query =~ m/$vlc_rf/i ) { # contains show + version + $query =~ s/$vlc_re//go; + } + return $query; +} + +sub shorten { + my ( $self, $query, $length ) = @_; + $query =~ s{ + \A( + (?:INSERT|REPLACE) + (?:\s+LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)? + (?:\s\w+)*\s+\S+\s+VALUES\s*\(.*?\) + ) + \s*,\s*\(.*?(ON\s+DUPLICATE|\Z)} + {$1 /*... omitted ...*/$2}xsi; + + return $query unless $query =~ m/IN\s*\(\s*(?!select)/i; + + my $last_length = 0; + my $query_length = length($query); + while ( + $length > 0 + && $query_length > $length + && $query_length < ( $last_length || $query_length + 1 ) + ) { + $last_length = $query_length; + $query =~ s{ + (\bIN\s*\() # The opening of an IN list + ([^\)]+) # Contents of the list, assuming no item contains paren + (?=\)) # Close of the list + } + { + $1 . __shorten($2) + }gexsi; + } + + return $query; +} + +sub __shorten { + my ( $snippet ) = @_; + my @vals = split(/,/, $snippet); + return $snippet unless @vals > 20; + my @keep = splice(@vals, 0, 20); # Remove and save the first 20 items + return + join(',', @keep) + . "/*... omitted " + . scalar(@vals) + . " items ...*/"; +} + +sub fingerprint { + my ( $self, $query ) = @_; + + $query =~ m#\ASELECT /\*!40001 SQL_NO_CACHE \*/ \* FROM `# # mysqldump query + && return 'mysqldump'; + $query =~ m#/\*\w+\.\w+:[0-9]/[0-9]\*/# # pt-table-checksum, etc query + && return 'percona-toolkit'; + $query =~ m/\Aadministrator command: / + && return $query; + $query =~ m/\A\s*(call\s+\S+)\(/i + && return lc($1); # Warning! $1 used, be careful. + if ( my ($beginning) = $query =~ m/\A((?:INSERT|REPLACE)(?: IGNORE)?\s+INTO.+?VALUES\s*\(.*?\))\s*,\s*\(/is ) { + $query = $beginning; # Shorten multi-value INSERT statements ASAP + } + + $query =~ s/$olc_re//go; + $query =~ s/$mlc_re//go; + $query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE + && return $query; + + $query =~ s/\\["']//g; # quoted strings + $query =~ s/".*?"/?/sg; # quoted strings + $query =~ s/'.*?'/?/sg; # quoted strings + $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;# Anything vaguely resembling numbers + $query =~ s/[xb.+-]\?/?/g; # Clean up leftovers + $query =~ s/\A\s+//; # Chop off leading whitespace + chomp $query; # Kill trailing whitespace + $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace + $query = lc $query; + $query =~ s/\bnull\b/?/g; # Get rid of NULLs + $query =~ s{ # Collapse IN and VALUES lists + \b(in|values?)(?:[\s,]*\([\s?,]*\))+ + } + {$1(?+)}gx; + $query =~ s{ # Collapse UNION + \b(select\s.*?)(?:(\sunion(?:\sall)?)\s\1)+ + } + {$1 /*repeat$2*/}xg; + $query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT + + if ( $query =~ m/\bORDER BY /gi ) { # Find, anchor on ORDER BY clause + 1 while $query =~ s/\G(.+?)\s+ASC/$1/gi && pos $query; + } + + return $query; +} + +sub distill_verbs { + my ( $self, $query ) = @_; + + $query =~ m/\A\s*call\s+(\S+)\(/i && return "CALL $1"; + $query =~ m/\A\s*use\s+/ && return "USE"; + $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK"; + $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1"; + + if ( $query =~ m/\Aadministrator command:/ ) { + $query =~ s/administrator command:/ADMIN/; + $query = uc $query; + return $query; + } + + $query = $self->strip_comments($query); + + if ( $query =~ m/\A\s*SHOW\s+/i ) { + PTDEBUG && _d($query); + + $query = uc $query; + $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g; + $query =~ s/\s+COUNT[^)]+\)//g; + + $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms; + + $query =~ s/\A(SHOW(?:\s+\S+){1,2}).*\Z/$1/s; + $query =~ s/\s+/ /g; + PTDEBUG && _d($query); + return $query; + } + + eval $QueryParser::data_def_stmts; + eval $QueryParser::tbl_ident; + my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i; + if ( $dds) { + my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i; + $obj = uc $obj if $obj; + PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj); + my ($db_or_tbl) + = $query =~ m/(?:TABLE|DATABASE)\s+($QueryParser::tbl_ident)(\s+.*)?/i; + PTDEBUG && _d('Matches db or table:', $db_or_tbl); + return uc($dds . ($obj ? " $obj" : '')), $db_or_tbl; + } + + my @verbs = $query =~ m/\b($verbs)\b/gio; + @verbs = do { + my $last = ''; + grep { my $pass = $_ ne $last; $last = $_; $pass } map { uc } @verbs; + }; + + if ( ($verbs[0] || '') eq 'SELECT' && @verbs > 1 ) { + PTDEBUG && _d("False-positive verbs after SELECT:", @verbs[1..$#verbs]); + my $union = grep { $_ eq 'UNION' } @verbs; + @verbs = $union ? qw(SELECT UNION) : qw(SELECT); + } + + my $verb_str = join(q{ }, @verbs); + return $verb_str; +} + +sub __distill_tables { + my ( $self, $query, $table, %args ) = @_; + my $qp = $args{QueryParser} || $self->{QueryParser}; + die "I need a QueryParser argument" unless $qp; + + my @tables = map { + $_ =~ s/`//g; + $_ =~ s/(_?)[0-9]+/$1?/g; + $_; + } grep { defined $_ } $qp->get_tables($query); + + push @tables, $table if $table; + + @tables = do { + my $last = ''; + grep { my $pass = $_ ne $last; $last = $_; $pass } @tables; + }; + + return @tables; +} + +sub distill { + my ( $self, $query, %args ) = @_; + + if ( $args{generic} ) { + my ($cmd, $arg) = $query =~ m/^(\S+)\s+(\S+)/; + return '' unless $cmd; + $query = (uc $cmd) . ($arg ? " $arg" : ''); + } + else { + my ($verbs, $table) = $self->distill_verbs($query, %args); + + if ( $verbs && $verbs =~ m/^SHOW/ ) { + my %alias_for = qw( + SCHEMA DATABASE + KEYS INDEX + INDEXES INDEX + ); + map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for; + $query = $verbs; + } + else { + my @tables = $self->__distill_tables($query, $table, %args); + $query = join(q{ }, $verbs, @tables); + } + } + + if ( $args{trf} ) { + $query = $args{trf}->($query, %args); + } + + return $query; +} + +sub convert_to_select { + my ( $self, $query ) = @_; + return unless $query; + + return if $query =~ m/=\s*\(\s*SELECT /i; + + $query =~ s{ + \A.*? + update(?:\s+(?:low_priority|ignore))?\s+(.*?) + \s+set\b(.*?) + (?:\s*where\b(.*?))? + (limit\s*[0-9]+(?:\s*,\s*[0-9]+)?)? + \Z + } + {__update_to_select($1, $2, $3, $4)}exsi + || $query =~ s{ + \A.*? + (?:insert(?:\s+ignore)?|replace)\s+ + .*?\binto\b(.*?)\(([^\)]+)\)\s* + values?\s*(\(.*?\))\s* + (?:\blimit\b|on\s+duplicate\s+key.*)?\s* + \Z + } + {__insert_to_select($1, $2, $3)}exsi + || $query =~ s{ + \A.*? + (?:insert(?:\s+ignore)?|replace)\s+ + (?:.*?\binto)\b(.*?)\s* + set\s+(.*?)\s* + (?:\blimit\b|on\s+duplicate\s+key.*)?\s* + \Z + } + {__insert_to_select_with_set($1, $2)}exsi + || $query =~ s{ + \A.*? + delete\s+(.*?) + \bfrom\b(.*) + \Z + } + {__delete_to_select($1, $2)}exsi; + $query =~ s/\s*on\s+duplicate\s+key\s+update.*\Z//si; + $query =~ s/\A.*?(?=\bSELECT\s*\b)//ism; + return $query; +} + +sub convert_select_list { + my ( $self, $query ) = @_; + $query =~ s{ + \A\s*select(.*?)\bfrom\b + } + {$1 =~ m/\*/ ? "select 1 from" : "select isnull(coalesce($1)) from"}exi; + return $query; +} + +sub __delete_to_select { + my ( $delete, $join ) = @_; + if ( $join =~ m/\bjoin\b/ ) { + return "select 1 from $join"; + } + return "select * from $join"; +} + +sub __insert_to_select { + my ( $tbl, $cols, $vals ) = @_; + PTDEBUG && _d('Args:', @_); + my @cols = split(/,/, $cols); + PTDEBUG && _d('Cols:', @cols); + $vals =~ s/^\(|\)$//g; # Strip leading/trailing parens + my @vals = $vals =~ m/($quote_re|[^,]*${bal}[^,]*|[^,]+)/g; + PTDEBUG && _d('Vals:', @vals); + if ( @cols == @vals ) { + return "select * from $tbl where " + . join(' and ', map { "$cols[$_]=$vals[$_]" } (0..$#cols)); + } + else { + return "select * from $tbl limit 1"; + } +} + +sub __insert_to_select_with_set { + my ( $from, $set ) = @_; + $set =~ s/,/ and /g; + return "select * from $from where $set "; +} + +sub __update_to_select { + my ( $from, $set, $where, $limit ) = @_; + return "select $set from $from " + . ( $where ? "where $where" : '' ) + . ( $limit ? " $limit " : '' ); +} + +sub wrap_in_derived { + my ( $self, $query ) = @_; + return unless $query; + return $query =~ m/\A\s*select/i + ? "select 1 from ($query) as x limit 1" + : $query; +} + +sub _d { + my ($package, undef, $line) = caller 0; + @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; } + map { defined $_ ? $_ : 'undef' } + @_; + print STDERR "# $package:$line $PID ", join(' ', @_), "\n"; +} + +1; +} +# ########################################################################### +# End QueryRewriter package +# ########################################################################### + +# ########################################################################### +# This is a combination of modules and programs in one -- a runnable module. +# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last +# Or, look it up in the Camel book on pages 642 and 643 in the 3rd edition. +# +# Check at the end of this package for the call to main() which actually runs +# the program. +# ########################################################################### +package pt_fingerprint; + +use English qw(-no_match_vars); +use Data::Dumper; +$Data::Dumper::Indent = 1; +$OUTPUT_AUTOFLUSH = 1; + +use constant MKDEBUG => $ENV{MKDEBUG} || 0; + +sub main { + @ARGV = @_; # set global ARGV for this package + + # ########################################################################## + # Get configuration information. + # ########################################################################## + my $o = new OptionParser(); + $o->get_specs(); + $o->get_opts(); + $o->usage_or_errors(); + + my $qp = new QueryParser(); + my $qr = new QueryRewriter(QueryParser=>$qp); + + if ( $o->got('query') ) { + print $qr->fingerprint($o->get('query')), "\n"; + } + else { + local $INPUT_RECORD_SEPARATOR = ";\n"; + while ( <> ) { + my $query = $_; + chomp $query; + print $qr->fingerprint($query), "\n"; + } + } +} + +# ############################################################################ +# Run the program. +# ############################################################################ +if ( !caller ) { exit main(@ARGV); } + +1; # Because this is a module as well as a script. + +# ############################################################################# +# Documentation. +# ############################################################################# + +=pod + +=head1 NAME + +pt-fingerprint - Convert queries into fingerprints. + +=head1 SYNOPSIS + +Usage: pt-fingerprint [OPTIONS] [FILES] + +pt-fingerprint converts queries into fingerprints. With the --query +option, converts the option's value into a fingerprint. With no options, treats +command-line arguments as FILEs and reads and converts semicolon-separated +queries from the FILEs. When FILE is -, it read standard input. + +Convert a single query: + + pt-fingerprint --query "select a, b, c from users where id = 500" + +Convert a file full of queries: + + pt-fingerprint /path/to/file.txt + +=head1 RISKS + +The following section is included to inform users about the potential risks, +whether known or unknown, of using this tool. The two main categories of risks +are those created by the nature of the tool (e.g. read-only tools vs. read-write +tools) and those created by bugs. + +The pt-fingerprint tool simply reads data and transforms it, so risks are +minimal. + +See also L<"BUGS"> for more information on filing bugs and getting help. + +=head1 DESCRIPTION + +A query fingerprint is the abstracted form of a query, which makes it possible +to group similar queries together. Abstracting a query removes literal values, +normalizes whitespace, and so on. For example, consider these two queries: + + SELECT name, password FROM user WHERE id='12823'; + select name, password from user + where id=5; + +Both of those queries will fingerprint to + + select name, password from user where id=? + +Once the query's fingerprint is known, we can then talk about a query as though +it represents all similar queries. + +Query fingerprinting accommodates a great many special cases, which have proven +necessary in the real world. For example, an IN list with 5 literals is really +equivalent to one with 4 literals, so lists of literals are collapsed to a +single one. If you want to understand more about how and why all of these cases +are handled, please review the test cases in the Subversion repository. If you +find something that is not fingerprinted properly, please submit a bug report +with a reproducible test case. Here is a list of transformations during +fingerprinting, which might not be exhaustive: + +=over + +=item * + +Group all SELECT queries from mysqldump together, even if they are against +different tables. Ditto for all of pt-table-checksum's checksum queries. + +=item * + +Shorten multi-value INSERT statements to a single VALUES() list. + +=item * + +Strip comments. + +=item * + +Abstract the databases in USE statements, so all USE statements are grouped +together. + +=item * + +Replace all literals, such as quoted strings. For efficiency, the code that +replaces literal numbers is somewhat non-selective, and might replace some +things as numbers when they really are not. Hexadecimal literals are also +replaced. NULL is treated as a literal. Numbers embedded in identifiers are +also replaced, so tables named similarly will be fingerprinted to the same +values (e.g. users_2009 and users_2010 will fingerprint identically). + +=item * + +Collapse all whitespace into a single space. + +=item * + +Lowercase the entire query. + +=item * + +Replace all literals inside of IN() and VALUES() lists with a single +placeholder, regardless of cardinality. + +=item * + +Collapse multiple identical UNION queries into a single one. + +=back + +=head1 OPTIONS + +This tool accepts additional command-line arguments. Refer to the +L<"SYNOPSIS"> and usage information for details. + +=over + +=item --config + +type: Array + +Read this comma-separated list of config files; if specified, this must be the +first option on the command line. + +=item --help + +Show help and exit. + +=item --query + +type: string + +The query to convert into a fingerprint. + +=item --version + +Show version and exit. + +=back + +=head1 ENVIRONMENT + +The environment variable C enables verbose debugging output to STDERR. +To enable debugging and capture all output to a file, run the tool like: + + PTDEBUG=1 pt-fingerprint ... > FILE 2>&1 + +Be careful: debugging output is voluminous and can generate several megabytes +of output. + +=head1 SYSTEM REQUIREMENTS + +You need Perl, DBI, DBD::mysql, and some core packages that ought to be +installed in any reasonably new version of Perl. + +=head1 BUGS + +For a list of known bugs, see L. + +Please report bugs at L. +Include the following information in your bug report: + +=over + +=item * Complete command-line used to run the tool + +=item * Tool L<"--version"> + +=item * MySQL version of all servers involved + +=item * Output from the tool including STDERR + +=item * Input files (log/dump/config files, etc.) + +=back + +If possible, include debugging output by running the tool with C; +see L<"ENVIRONMENT">. + +=head1 DOWNLOADING + +Visit L to download the +latest release of Percona Toolkit. Or, get the latest release from the +command line: + + wget percona.com/get/percona-toolkit.tar.gz + + wget percona.com/get/percona-toolkit.rpm + + wget percona.com/get/percona-toolkit.deb + +You can also get individual tools from the latest release: + + wget percona.com/get/TOOL + +Replace C with the name of any tool. + +=head1 AUTHORS + +Baron Schwartz and Daniel Nichter + +=head1 ABOUT PERCONA TOOLKIT + +This tool is part of Percona Toolkit, a collection of advanced command-line +tools developed by Percona for MySQL support and consulting. Percona Toolkit +was forked from two projects in June, 2011: Maatkit and Aspersa. Those +projects were created by Baron Schwartz and developed primarily by him and +Daniel Nichter, both of whom are employed by Percona. Visit +L for more software developed by Percona. + +=head1 COPYRIGHT, LICENSE, AND WARRANTY + +This program is copyright 2011-2012 Percona Inc. +Feedback and improvements are welcome. + +THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation, version 2; OR the Perl Artistic License. On UNIX and similar +systems, you can issue `man perlgpl' or `man perlartistic' to read these +licenses. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA. + +=head1 VERSION + +pt-fingerprint 2.0.0 + +=cut diff --git a/t/pt-fingerprint/basics.t b/t/pt-fingerprint/basics.t new file mode 100644 index 00000000..b6288e02 --- /dev/null +++ b/t/pt-fingerprint/basics.t @@ -0,0 +1,31 @@ +#!/usr/bin/env perl + +BEGIN { + die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" + unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; + unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; +}; + +use strict; +use warnings FATAL => 'all'; +use English qw(-no_match_vars); +use Test::More tests => 1; + +use PerconaTest; +require "$trunk/bin/pt-fingerprint"; + +my @args = qw(--report-format=query_report --limit 10); +my $sample = "$trunk/t/lib/samples/slowlogs/"; +my $output; + +$output = `$trunk/bin/pt-fingerprint --help`; +like( + $output, + qr/--help/, + "It runs" +); + +# ############################################################################# +# Done. +# ############################################################################# +exit; From a81d25c489d1f0d032fe7d102d130de7446f140d Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 26 Mar 2012 10:45:46 -0600 Subject: [PATCH 2/4] Add fingerprint_md5 and preserve_embedded_numbers flags to QueryRewriter, used in fingerprint() to handle customer's requirements. --- lib/QueryRewriter.pm | 28 +++++++++++++++++--- t/lib/QueryRewriter.t | 60 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 5 deletions(-) diff --git a/lib/QueryRewriter.pm b/lib/QueryRewriter.pm index b89ea1c6..1789e634 100644 --- a/lib/QueryRewriter.pm +++ b/lib/QueryRewriter.pm @@ -175,10 +175,30 @@ sub fingerprint { $query =~ s/\\["']//g; # quoted strings $query =~ s/".*?"/?/sg; # quoted strings $query =~ s/'.*?'/?/sg; # quoted strings - # This regex is extremely broad in its definition of what looks like a - # number. That is for speed. - $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;# Anything vaguely resembling numbers - $query =~ s/[xb.+-]\?/?/g; # Clean up leftovers + + # MD5 checksums which are always 32 hex chars + if ( $self->{fingerprint_md5} ) { + $query =~ s/([._-])[a-f0-9]{32}/$1?/g; + } + + # Things resembling numbers/hex. + if ( !$self->{preserve_embedded_numbers} ) { + # For speed, this regex is extremely broad in its definition + # of what looks like a number. + $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g; + } + else { + $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g; + } + + # Clean up leftovers + if ( $self->{fingerprint_md5} ) { + $query =~ s/[xb+-]\?/?/g; + } + else { + $query =~ s/[xb.+-]\?/?/g; + } + $query =~ s/\A\s+//; # Chop off leading whitespace chomp $query; # Kill trailing whitespace $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace diff --git a/t/lib/QueryRewriter.t b/t/lib/QueryRewriter.t index c60f6dce..d2fa4e97 100644 --- a/t/lib/QueryRewriter.t +++ b/t/lib/QueryRewriter.t @@ -10,7 +10,7 @@ BEGIN { use strict; use warnings FATAL => 'all'; use English qw(-no_match_vars); -use Test::More tests => 266; +use Test::More tests => 271; use QueryRewriter; use QueryParser; @@ -349,6 +349,64 @@ is( "Fingerprint LOAD DATA INFILE" ); +# fingerprint MD5 checksums, 32 char hex strings. This is a +# special feature used by pt-fingerprint. +$qr = new QueryRewriter( + QueryParser => $qp, + fingerprint_md5 => 1, +); + +is( + $qr->fingerprint( + "SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1" + ), + "select * from db.?_temp where id=?", + "Fingerprint db.MD5_tbl" +); + +is( + $qr->fingerprint( + "SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1" + ), + "select * from db.temp_? where id=?", + "Fingerprint db.tbl_MD5" +); + +$qr = new QueryRewriter( + QueryParser => $qp, + fingerprint_md5 => 1, + preserve_embedded_numbers => 1, +); + +is( + $qr->fingerprint( + "SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1" + ), + "select * from db.?_temp where id=?", + "Fingerprint db.MD5_tbl (with preserve_embedded_numbers)" +); + +is( + $qr->fingerprint( + "SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1" + ), + "select * from db.temp_? where id=?", + "Fingerprint db.tbl_MD5 (with preserve_embedded_numbers)" +); + +$qr = new QueryRewriter( + QueryParser => $qp, + preserve_embedded_numbers => 1, +); + +is( + $qr->fingerprint( + "SELECT * FROM prices.rt_5min where id=1" + ), + "select * from prices.rt_5min where id=?", + "Fingerprint db.tblname (preserve number)" +); + # ############################################################################# # convert_to_select() # ############################################################################# From beaa9240e7145eb6b822dfc1054396675423f2c9 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 26 Mar 2012 16:40:46 -0600 Subject: [PATCH 3/4] Rename preserve_embedded_numbers to match_embedded_numbers, and fingerprint_md5 to match_md5_checksums. Add corresponding options to pt-fingerprint. --- bin/pt-fingerprint | 45 ++++++++++++++++++++++++++++++++++++++++--- lib/QueryRewriter.pm | 6 +++--- t/lib/QueryRewriter.t | 12 ++++++------ 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/bin/pt-fingerprint b/bin/pt-fingerprint index e91be727..12512ddd 100755 --- a/bin/pt-fingerprint +++ b/bin/pt-fingerprint @@ -1559,8 +1559,25 @@ sub fingerprint { $query =~ s/\\["']//g; # quoted strings $query =~ s/".*?"/?/sg; # quoted strings $query =~ s/'.*?'/?/sg; # quoted strings - $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;# Anything vaguely resembling numbers - $query =~ s/[xb.+-]\?/?/g; # Clean up leftovers + + if ( $self->{match_md5_checksums} ) { + $query =~ s/([._-])[a-f0-9]{32}/$1?/g; + } + + if ( !$self->{match_embedded_numbers} ) { + $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g; + } + else { + $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g; + } + + if ( $self->{match_md5_checksums} ) { + $query =~ s/[xb+-]\?/?/g; + } + else { + $query =~ s/[xb.+-]\?/?/g; + } + $query =~ s/\A\s+//; # Chop off leading whitespace chomp $query; # Kill trailing whitespace $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace @@ -1840,7 +1857,11 @@ sub main { $o->usage_or_errors(); my $qp = new QueryParser(); - my $qr = new QueryRewriter(QueryParser=>$qp); + my $qr = new QueryRewriter( + QueryParser => $qp, + match_md5_checksums => $o->get('match-md5-checksums'), + match_embedded_numbers => $o->get('match-embedded-numbers'), + ); if ( $o->got('query') ) { print $qr->fingerprint($o->get('query')), "\n"; @@ -1993,6 +2014,24 @@ first option on the command line. Show help and exit. +=item --match-embedded-numbers + +Match numbers embedded in words and replace as single values. This option +causes the tool to be more careful about matching numbers so that words +with numbers, like C are matched and replaced as a single C +placeholder. Otherwise the default number matching pattern will replace +C as C. + +This is helpful if database or table names contain numbers. + +=item --match-md5-checksums + +Match MD5 checksums and replace as single values. This option causes +the tool to be more careful about matching numbers so that MD5 checksums +like C are matched and replaced as a +single C placeholder. Otherwise, the default number matching pattern will +replace C as C. + =item --query type: string diff --git a/lib/QueryRewriter.pm b/lib/QueryRewriter.pm index 1789e634..43d426e9 100644 --- a/lib/QueryRewriter.pm +++ b/lib/QueryRewriter.pm @@ -177,12 +177,12 @@ sub fingerprint { $query =~ s/'.*?'/?/sg; # quoted strings # MD5 checksums which are always 32 hex chars - if ( $self->{fingerprint_md5} ) { + if ( $self->{match_md5_checksums} ) { $query =~ s/([._-])[a-f0-9]{32}/$1?/g; } # Things resembling numbers/hex. - if ( !$self->{preserve_embedded_numbers} ) { + if ( !$self->{match_embedded_numbers} ) { # For speed, this regex is extremely broad in its definition # of what looks like a number. $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g; @@ -192,7 +192,7 @@ sub fingerprint { } # Clean up leftovers - if ( $self->{fingerprint_md5} ) { + if ( $self->{match_md5_checksums} ) { $query =~ s/[xb+-]\?/?/g; } else { diff --git a/t/lib/QueryRewriter.t b/t/lib/QueryRewriter.t index d2fa4e97..71bce780 100644 --- a/t/lib/QueryRewriter.t +++ b/t/lib/QueryRewriter.t @@ -353,7 +353,7 @@ is( # special feature used by pt-fingerprint. $qr = new QueryRewriter( QueryParser => $qp, - fingerprint_md5 => 1, + match_md5_checksums => 1, ); is( @@ -374,8 +374,8 @@ is( $qr = new QueryRewriter( QueryParser => $qp, - fingerprint_md5 => 1, - preserve_embedded_numbers => 1, + match_md5_checksums => 1, + match_embedded_numbers => 1, ); is( @@ -383,7 +383,7 @@ is( "SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1" ), "select * from db.?_temp where id=?", - "Fingerprint db.MD5_tbl (with preserve_embedded_numbers)" + "Fingerprint db.MD5_tbl (with match_embedded_numbers)" ); is( @@ -391,12 +391,12 @@ is( "SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1" ), "select * from db.temp_? where id=?", - "Fingerprint db.tbl_MD5 (with preserve_embedded_numbers)" + "Fingerprint db.tbl_MD5 (with match_embedded_numbers)" ); $qr = new QueryRewriter( QueryParser => $qp, - preserve_embedded_numbers => 1, + match_embedded_numbers => 1, ); is( From 9c0e642443b2a8dd6da0124c13dde3e632b9d935 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 26 Mar 2012 17:24:44 -0600 Subject: [PATCH 4/4] Test pt-fingerprint. --- bin/pt-fingerprint | 3 + t/pt-fingerprint/basics.t | 76 ++++++++++++++++++- t/pt-fingerprint/samples/query001 | 2 + t/pt-fingerprint/samples/query001.fingerprint | 1 + t/pt-fingerprint/samples/query002 | 2 + t/pt-fingerprint/samples/query002.fingerprint | 1 + 6 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 t/pt-fingerprint/samples/query001 create mode 100644 t/pt-fingerprint/samples/query001.fingerprint create mode 100644 t/pt-fingerprint/samples/query002 create mode 100644 t/pt-fingerprint/samples/query002.fingerprint diff --git a/bin/pt-fingerprint b/bin/pt-fingerprint index 12512ddd..187004b7 100755 --- a/bin/pt-fingerprint +++ b/bin/pt-fingerprint @@ -1871,6 +1871,9 @@ sub main { while ( <> ) { my $query = $_; chomp $query; + $query =~ s/^#.+$//mg; + $query =~ s/^\s+//; + next unless $query =~ m/^\w/; print $qr->fingerprint($query), "\n"; } } diff --git a/t/pt-fingerprint/basics.t b/t/pt-fingerprint/basics.t index b6288e02..06c28230 100644 --- a/t/pt-fingerprint/basics.t +++ b/t/pt-fingerprint/basics.t @@ -9,14 +9,15 @@ BEGIN { use strict; use warnings FATAL => 'all'; use English qw(-no_match_vars); -use Test::More tests => 1; +use Test::More tests => 7; use PerconaTest; require "$trunk/bin/pt-fingerprint"; -my @args = qw(--report-format=query_report --limit 10); -my $sample = "$trunk/t/lib/samples/slowlogs/"; +my @args = qw(); my $output; +my $sample = "$trunk/t/pt-fingerprint/samples"; +my $pqd = "$trunk/bin/pt-query-digest"; $output = `$trunk/bin/pt-fingerprint --help`; like( @@ -25,6 +26,75 @@ like( "It runs" ); + +sub test_query_file { + my ($file) = @_; + if ( ! -f "$sample/$file.fingerprint" ) { + `$pqd --fingerprint $sample/$file | awk '/Fingerprint/ { getline; print; exit; }' | sed -e 's/^#[ ]*//' > $sample/$file.fingerprint`; + diag("Created $sample/$file.fingerprint"); + } + chomp(my $expect = `cat $sample/$file.fingerprint`); + my $got = output( + sub { pt_fingerprint::main("$sample/$file") } + ); + chomp($got); + is( + $got, + $expect, + "$file fingerprint" + ); +}; + +opendir my $dir, $sample or die "Cannot open $sample: $OS_ERROR\n"; +while (defined(my $file = readdir($dir))) { + next unless $file =~ m/^query\d+$/; + test_query_file($file); +} +closedir $dir; + + +sub test_query { + my (%args) = @_; + my $query = $args{query}; + my $expect = $args{expect}; + my @ops = $args{ops} ? @{$args{ops}} : (); + + $output = output( + sub { pt_fingerprint::main('--query', $query, @ops) } + ); + chomp($output); + is( + $output, + $expect, + $args{name} ? $args{name} : "Fingerprint " . substr($query, 0, 70) + ); +} + +test_query( + query => 'select * from tbl where id=1', + expect => 'select * from tbl where id=?', +); + +test_query( + name => "Fingerprint MD5_word", + query => "SELECT c FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1", + expect => "select c from db.?_temp where id=?", + ops => [qw(--match-md5-checksums)], +); + +test_query( + name => "Fingerprint word_MD5", + query => "SELECT c FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1", + expect => "select c from db.temp_? where id=?", + ops => [qw(--match-md5-checksums)], +); + +test_query( + name => "Fingerprint word", + query => "SELECT c FROM db.catch22 WHERE id is null", + expect => "select c from db.catch22 where id is ?", + ops => [qw(--match-embedded-numbers)], +); # ############################################################################# # Done. # ############################################################################# diff --git a/t/pt-fingerprint/samples/query001 b/t/pt-fingerprint/samples/query001 new file mode 100644 index 00000000..b6450a5c --- /dev/null +++ b/t/pt-fingerprint/samples/query001 @@ -0,0 +1,2 @@ +# Query_time: 1 +select * from db.tbl where id=1 or foo='bar'; diff --git a/t/pt-fingerprint/samples/query001.fingerprint b/t/pt-fingerprint/samples/query001.fingerprint new file mode 100644 index 00000000..7e0a009f --- /dev/null +++ b/t/pt-fingerprint/samples/query001.fingerprint @@ -0,0 +1 @@ +select * from db.tbl where id=? or foo=? diff --git a/t/pt-fingerprint/samples/query002 b/t/pt-fingerprint/samples/query002 new file mode 100644 index 00000000..bb97304c --- /dev/null +++ b/t/pt-fingerprint/samples/query002 @@ -0,0 +1,2 @@ +# Query_time: 1 +select col from db.tbl1 where id in (1, 2, 3); diff --git a/t/pt-fingerprint/samples/query002.fingerprint b/t/pt-fingerprint/samples/query002.fingerprint new file mode 100644 index 00000000..9a120fa7 --- /dev/null +++ b/t/pt-fingerprint/samples/query002.fingerprint @@ -0,0 +1 @@ +select col from db.tbl? where id in(?+)