mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-01 18:25:59 +00:00
2290 lines
67 KiB
Perl
Executable File
2290 lines
67 KiB
Perl
Executable File
#!/usr/bin/env perl
|
|
|
|
# This program is part of Percona Toolkit: http://www.percona.com/software/
|
|
# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal
|
|
# notices and disclaimers.
|
|
|
|
use strict;
|
|
use warnings FATAL => 'all';
|
|
|
|
# This tool is "fat-packed": most of its dependent modules are embedded
|
|
# in this file. Setting %INC to this file for each module makes Perl aware
|
|
# of this so it will not try to load the module from @INC. See the tool's
|
|
# documentation for a full list of dependencies.
|
|
BEGIN {
|
|
$INC{$_} = __FILE__ for map { (my $pkg = "$_.pm") =~ s!::!/!g; $pkg } (qw(
|
|
OptionParser
|
|
QueryParser
|
|
QueryRewriter
|
|
));
|
|
}
|
|
|
|
# ###########################################################################
|
|
# OptionParser package
|
|
# This package is a copy without comments from the original. The original
|
|
# with comments and its test file can be found in the GitHub repository at,
|
|
# lib/OptionParser.pm
|
|
# t/lib/OptionParser.t
|
|
# See https://github.com/percona/percona-toolkit for more information.
|
|
# ###########################################################################
|
|
{
|
|
package OptionParser;
|
|
|
|
use strict;
|
|
use warnings FATAL => 'all';
|
|
use English qw(-no_match_vars);
|
|
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
|
|
|
use List::Util qw(max);
|
|
use Getopt::Long;
|
|
use Data::Dumper;
|
|
|
|
my $POD_link_re = '[LC]<"?([^">]+)"?>';
|
|
|
|
sub new {
|
|
my ( $class, %args ) = @_;
|
|
my @required_args = qw();
|
|
foreach my $arg ( @required_args ) {
|
|
die "I need a $arg argument" unless $args{$arg};
|
|
}
|
|
|
|
my ($program_name) = $PROGRAM_NAME =~ m/([.A-Za-z-]+)$/;
|
|
$program_name ||= $PROGRAM_NAME;
|
|
my $home = $ENV{HOME} || $ENV{HOMEPATH} || $ENV{USERPROFILE} || '.';
|
|
|
|
my %attributes = (
|
|
'type' => 1,
|
|
'short form' => 1,
|
|
'group' => 1,
|
|
'default' => 1,
|
|
'cumulative' => 1,
|
|
'negatable' => 1,
|
|
'repeatable' => 1, # means it can be specified more than once
|
|
);
|
|
|
|
my $self = {
|
|
head1 => 'OPTIONS', # These args are used internally
|
|
skip_rules => 0, # to instantiate another Option-
|
|
item => '--(.*)', # Parser obj that parses the
|
|
attributes => \%attributes, # DSN OPTIONS section. Tools
|
|
parse_attributes => \&_parse_attribs, # don't tinker with these args.
|
|
|
|
%args,
|
|
|
|
strict => 1, # disabled by a special rule
|
|
program_name => $program_name,
|
|
opts => {},
|
|
got_opts => 0,
|
|
short_opts => {},
|
|
defaults => {},
|
|
groups => {},
|
|
allowed_groups => {},
|
|
errors => [],
|
|
rules => [], # desc of rules for --help
|
|
mutex => [], # rule: opts are mutually exclusive
|
|
atleast1 => [], # rule: at least one opt is required
|
|
disables => {}, # rule: opt disables other opts
|
|
defaults_to => {}, # rule: opt defaults to value of other opt
|
|
DSNParser => undef,
|
|
default_files => [
|
|
"/etc/percona-toolkit/percona-toolkit.conf",
|
|
"/etc/percona-toolkit/$program_name.conf",
|
|
"$home/.percona-toolkit.conf",
|
|
"$home/.$program_name.conf",
|
|
],
|
|
types => {
|
|
string => 's', # standard Getopt type
|
|
int => 'i', # standard Getopt type
|
|
float => 'f', # standard Getopt type
|
|
Hash => 'H', # hash, formed from a comma-separated list
|
|
hash => 'h', # hash as above, but only if a value is given
|
|
Array => 'A', # array, similar to Hash
|
|
array => 'a', # array, similar to hash
|
|
DSN => 'd', # DSN
|
|
size => 'z', # size with kMG suffix (powers of 2^10)
|
|
time => 'm', # time, with an optional suffix of s/h/m/d
|
|
},
|
|
};
|
|
|
|
return bless $self, $class;
|
|
}
|
|
|
|
sub get_specs {
|
|
my ( $self, $file ) = @_;
|
|
$file ||= $self->{file} || __FILE__;
|
|
my @specs = $self->_pod_to_specs($file);
|
|
$self->_parse_specs(@specs);
|
|
|
|
open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
|
|
my $contents = do { local $/ = undef; <$fh> };
|
|
close $fh;
|
|
if ( $contents =~ m/^=head1 DSN OPTIONS/m ) {
|
|
PTDEBUG && _d('Parsing DSN OPTIONS');
|
|
my $dsn_attribs = {
|
|
dsn => 1,
|
|
copy => 1,
|
|
};
|
|
my $parse_dsn_attribs = sub {
|
|
my ( $self, $option, $attribs ) = @_;
|
|
map {
|
|
my $val = $attribs->{$_};
|
|
if ( $val ) {
|
|
$val = $val eq 'yes' ? 1
|
|
: $val eq 'no' ? 0
|
|
: $val;
|
|
$attribs->{$_} = $val;
|
|
}
|
|
} keys %$attribs;
|
|
return {
|
|
key => $option,
|
|
%$attribs,
|
|
};
|
|
};
|
|
my $dsn_o = new OptionParser(
|
|
description => 'DSN OPTIONS',
|
|
head1 => 'DSN OPTIONS',
|
|
dsn => 0, # XXX don't infinitely recurse!
|
|
item => '\* (.)', # key opts are a single character
|
|
skip_rules => 1, # no rules before opts
|
|
attributes => $dsn_attribs,
|
|
parse_attributes => $parse_dsn_attribs,
|
|
);
|
|
my @dsn_opts = map {
|
|
my $opts = {
|
|
key => $_->{spec}->{key},
|
|
dsn => $_->{spec}->{dsn},
|
|
copy => $_->{spec}->{copy},
|
|
desc => $_->{desc},
|
|
};
|
|
$opts;
|
|
} $dsn_o->_pod_to_specs($file);
|
|
$self->{DSNParser} = DSNParser->new(opts => \@dsn_opts);
|
|
}
|
|
|
|
if ( $contents =~ m/^=head1 VERSION\n\n^(.+)$/m ) {
|
|
$self->{version} = $1;
|
|
PTDEBUG && _d($self->{version});
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
sub DSNParser {
|
|
my ( $self ) = @_;
|
|
return $self->{DSNParser};
|
|
};
|
|
|
|
sub get_defaults_files {
|
|
my ( $self ) = @_;
|
|
return @{$self->{default_files}};
|
|
}
|
|
|
|
sub _pod_to_specs {
|
|
my ( $self, $file ) = @_;
|
|
$file ||= $self->{file} || __FILE__;
|
|
open my $fh, '<', $file or die "Cannot open $file: $OS_ERROR";
|
|
|
|
my @specs = ();
|
|
my @rules = ();
|
|
my $para;
|
|
|
|
local $INPUT_RECORD_SEPARATOR = '';
|
|
while ( $para = <$fh> ) {
|
|
next unless $para =~ m/^=head1 $self->{head1}/;
|
|
last;
|
|
}
|
|
|
|
while ( $para = <$fh> ) {
|
|
last if $para =~ m/^=over/;
|
|
next if $self->{skip_rules};
|
|
chomp $para;
|
|
$para =~ s/\s+/ /g;
|
|
$para =~ s/$POD_link_re/$1/go;
|
|
PTDEBUG && _d('Option rule:', $para);
|
|
push @rules, $para;
|
|
}
|
|
|
|
die "POD has no $self->{head1} section" unless $para;
|
|
|
|
do {
|
|
if ( my ($option) = $para =~ m/^=item $self->{item}/ ) {
|
|
chomp $para;
|
|
PTDEBUG && _d($para);
|
|
my %attribs;
|
|
|
|
$para = <$fh>; # read next paragraph, possibly attributes
|
|
|
|
if ( $para =~ m/: / ) { # attributes
|
|
$para =~ s/\s+\Z//g;
|
|
%attribs = map {
|
|
my ( $attrib, $val) = split(/: /, $_);
|
|
die "Unrecognized attribute for --$option: $attrib"
|
|
unless $self->{attributes}->{$attrib};
|
|
($attrib, $val);
|
|
} split(/; /, $para);
|
|
if ( $attribs{'short form'} ) {
|
|
$attribs{'short form'} =~ s/-//;
|
|
}
|
|
$para = <$fh>; # read next paragraph, probably short help desc
|
|
}
|
|
else {
|
|
PTDEBUG && _d('Option has no attributes');
|
|
}
|
|
|
|
$para =~ s/\s+\Z//g;
|
|
$para =~ s/\s+/ /g;
|
|
$para =~ s/$POD_link_re/$1/go;
|
|
|
|
$para =~ s/\.(?:\n.*| [A-Z].*|\Z)//s;
|
|
PTDEBUG && _d('Short help:', $para);
|
|
|
|
die "No description after option spec $option" if $para =~ m/^=item/;
|
|
|
|
if ( my ($base_option) = $option =~ m/^\[no\](.*)/ ) {
|
|
$option = $base_option;
|
|
$attribs{'negatable'} = 1;
|
|
}
|
|
|
|
push @specs, {
|
|
spec => $self->{parse_attributes}->($self, $option, \%attribs),
|
|
desc => $para
|
|
. (defined $attribs{default} ? " (default $attribs{default})" : ''),
|
|
group => ($attribs{'group'} ? $attribs{'group'} : 'default'),
|
|
attributes => \%attribs
|
|
};
|
|
}
|
|
while ( $para = <$fh> ) {
|
|
last unless $para;
|
|
if ( $para =~ m/^=head1/ ) {
|
|
$para = undef; # Can't 'last' out of a do {} block.
|
|
last;
|
|
}
|
|
last if $para =~ m/^=item /;
|
|
}
|
|
} while ( $para );
|
|
|
|
die "No valid specs in $self->{head1}" unless @specs;
|
|
|
|
close $fh;
|
|
return @specs, @rules;
|
|
}
|
|
|
|
sub _parse_specs {
|
|
my ( $self, @specs ) = @_;
|
|
my %disables; # special rule that requires deferred checking
|
|
|
|
foreach my $opt ( @specs ) {
|
|
if ( ref $opt ) { # It's an option spec, not a rule.
|
|
PTDEBUG && _d('Parsing opt spec:',
|
|
map { ($_, '=>', $opt->{$_}) } keys %$opt);
|
|
|
|
my ( $long, $short ) = $opt->{spec} =~ m/^([\w-]+)(?:\|([^!+=]*))?/;
|
|
if ( !$long ) {
|
|
die "Cannot parse long option from spec $opt->{spec}";
|
|
}
|
|
$opt->{long} = $long;
|
|
|
|
die "Duplicate long option --$long" if exists $self->{opts}->{$long};
|
|
$self->{opts}->{$long} = $opt;
|
|
|
|
if ( length $long == 1 ) {
|
|
PTDEBUG && _d('Long opt', $long, 'looks like short opt');
|
|
$self->{short_opts}->{$long} = $long;
|
|
}
|
|
|
|
if ( $short ) {
|
|
die "Duplicate short option -$short"
|
|
if exists $self->{short_opts}->{$short};
|
|
$self->{short_opts}->{$short} = $long;
|
|
$opt->{short} = $short;
|
|
}
|
|
else {
|
|
$opt->{short} = undef;
|
|
}
|
|
|
|
$opt->{is_negatable} = $opt->{spec} =~ m/!/ ? 1 : 0;
|
|
$opt->{is_cumulative} = $opt->{spec} =~ m/\+/ ? 1 : 0;
|
|
$opt->{is_repeatable} = $opt->{attributes}->{repeatable} ? 1 : 0;
|
|
$opt->{is_required} = $opt->{desc} =~ m/required/ ? 1 : 0;
|
|
|
|
$opt->{group} ||= 'default';
|
|
$self->{groups}->{ $opt->{group} }->{$long} = 1;
|
|
|
|
$opt->{value} = undef;
|
|
$opt->{got} = 0;
|
|
|
|
my ( $type ) = $opt->{spec} =~ m/=(.)/;
|
|
$opt->{type} = $type;
|
|
PTDEBUG && _d($long, 'type:', $type);
|
|
|
|
|
|
$opt->{spec} =~ s/=./=s/ if ( $type && $type =~ m/[HhAadzm]/ );
|
|
|
|
if ( (my ($def) = $opt->{desc} =~ m/default\b(?: ([^)]+))?/) ) {
|
|
$def = defined $def ? $def : 1;
|
|
$def = $def eq 'yes' ? 1 : $def eq 'no' ? 0 : $def;
|
|
$self->{defaults}->{$long} = $def;
|
|
PTDEBUG && _d($long, 'default:', $def);
|
|
}
|
|
|
|
if ( $long eq 'config' ) {
|
|
$self->{defaults}->{$long} = join(',', $self->get_defaults_files());
|
|
}
|
|
|
|
if ( (my ($dis) = $opt->{desc} =~ m/(disables .*)/) ) {
|
|
$disables{$long} = $dis;
|
|
PTDEBUG && _d('Deferring check of disables rule for', $opt, $dis);
|
|
}
|
|
|
|
$self->{opts}->{$long} = $opt;
|
|
}
|
|
else { # It's an option rule, not a spec.
|
|
PTDEBUG && _d('Parsing rule:', $opt);
|
|
push @{$self->{rules}}, $opt;
|
|
my @participants = $self->_get_participants($opt);
|
|
my $rule_ok = 0;
|
|
|
|
if ( $opt =~ m/mutually exclusive|one and only one/ ) {
|
|
$rule_ok = 1;
|
|
push @{$self->{mutex}}, \@participants;
|
|
PTDEBUG && _d(@participants, 'are mutually exclusive');
|
|
}
|
|
if ( $opt =~ m/at least one|one and only one/ ) {
|
|
$rule_ok = 1;
|
|
push @{$self->{atleast1}}, \@participants;
|
|
PTDEBUG && _d(@participants, 'require at least one');
|
|
}
|
|
if ( $opt =~ m/default to/ ) {
|
|
$rule_ok = 1;
|
|
$self->{defaults_to}->{$participants[0]} = $participants[1];
|
|
PTDEBUG && _d($participants[0], 'defaults to', $participants[1]);
|
|
}
|
|
if ( $opt =~ m/restricted to option groups/ ) {
|
|
$rule_ok = 1;
|
|
my ($groups) = $opt =~ m/groups ([\w\s\,]+)/;
|
|
my @groups = split(',', $groups);
|
|
%{$self->{allowed_groups}->{$participants[0]}} = map {
|
|
s/\s+//;
|
|
$_ => 1;
|
|
} @groups;
|
|
}
|
|
if( $opt =~ m/accepts additional command-line arguments/ ) {
|
|
$rule_ok = 1;
|
|
$self->{strict} = 0;
|
|
PTDEBUG && _d("Strict mode disabled by rule");
|
|
}
|
|
|
|
die "Unrecognized option rule: $opt" unless $rule_ok;
|
|
}
|
|
}
|
|
|
|
foreach my $long ( keys %disables ) {
|
|
my @participants = $self->_get_participants($disables{$long});
|
|
$self->{disables}->{$long} = \@participants;
|
|
PTDEBUG && _d('Option', $long, 'disables', @participants);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
sub _get_participants {
|
|
my ( $self, $str ) = @_;
|
|
my @participants;
|
|
foreach my $long ( $str =~ m/--(?:\[no\])?([\w-]+)/g ) {
|
|
die "Option --$long does not exist while processing rule $str"
|
|
unless exists $self->{opts}->{$long};
|
|
push @participants, $long;
|
|
}
|
|
PTDEBUG && _d('Participants for', $str, ':', @participants);
|
|
return @participants;
|
|
}
|
|
|
|
sub opts {
|
|
my ( $self ) = @_;
|
|
my %opts = %{$self->{opts}};
|
|
return %opts;
|
|
}
|
|
|
|
sub short_opts {
|
|
my ( $self ) = @_;
|
|
my %short_opts = %{$self->{short_opts}};
|
|
return %short_opts;
|
|
}
|
|
|
|
sub set_defaults {
|
|
my ( $self, %defaults ) = @_;
|
|
$self->{defaults} = {};
|
|
foreach my $long ( keys %defaults ) {
|
|
die "Cannot set default for nonexistent option $long"
|
|
unless exists $self->{opts}->{$long};
|
|
$self->{defaults}->{$long} = $defaults{$long};
|
|
PTDEBUG && _d('Default val for', $long, ':', $defaults{$long});
|
|
}
|
|
return;
|
|
}
|
|
|
|
sub get_defaults {
|
|
my ( $self ) = @_;
|
|
return $self->{defaults};
|
|
}
|
|
|
|
sub get_groups {
|
|
my ( $self ) = @_;
|
|
return $self->{groups};
|
|
}
|
|
|
|
sub _set_option {
|
|
my ( $self, $opt, $val ) = @_;
|
|
my $long = exists $self->{opts}->{$opt} ? $opt
|
|
: exists $self->{short_opts}->{$opt} ? $self->{short_opts}->{$opt}
|
|
: die "Getopt::Long gave a nonexistent option: $opt";
|
|
$opt = $self->{opts}->{$long};
|
|
if ( $opt->{is_cumulative} ) {
|
|
$opt->{value}++;
|
|
}
|
|
elsif ( ($opt->{type} || '') eq 's' && $val =~ m/^--?(.+)/ ) {
|
|
my $next_opt = $1;
|
|
if ( exists $self->{opts}->{$next_opt}
|
|
|| exists $self->{short_opts}->{$next_opt} ) {
|
|
$self->save_error("--$long requires a string value");
|
|
return;
|
|
}
|
|
else {
|
|
if ($opt->{is_repeatable}) {
|
|
push @{$opt->{value}} , $val;
|
|
}
|
|
else {
|
|
$opt->{value} = $val;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if ($opt->{is_repeatable}) {
|
|
push @{$opt->{value}} , $val;
|
|
}
|
|
else {
|
|
$opt->{value} = $val;
|
|
}
|
|
}
|
|
$opt->{got} = 1;
|
|
PTDEBUG && _d('Got option', $long, '=', $val);
|
|
}
|
|
|
|
sub get_opts {
|
|
my ( $self ) = @_;
|
|
|
|
foreach my $long ( keys %{$self->{opts}} ) {
|
|
$self->{opts}->{$long}->{got} = 0;
|
|
$self->{opts}->{$long}->{value}
|
|
= exists $self->{defaults}->{$long} ? $self->{defaults}->{$long}
|
|
: $self->{opts}->{$long}->{is_cumulative} ? 0
|
|
: undef;
|
|
}
|
|
$self->{got_opts} = 0;
|
|
|
|
$self->{errors} = [];
|
|
|
|
if ( @ARGV && $ARGV[0] =~/^--config=/ ) {
|
|
$ARGV[0] = substr($ARGV[0],9);
|
|
$ARGV[0] =~ s/^'(.*)'$/$1/;
|
|
$ARGV[0] =~ s/^"(.*)"$/$1/;
|
|
$self->_set_option('config', shift @ARGV);
|
|
}
|
|
if ( @ARGV && $ARGV[0] eq "--config" ) {
|
|
shift @ARGV;
|
|
$self->_set_option('config', shift @ARGV);
|
|
}
|
|
if ( $self->has('config') ) {
|
|
my @extra_args;
|
|
foreach my $filename ( split(',', $self->get('config')) ) {
|
|
eval {
|
|
push @extra_args, $self->_read_config_file($filename);
|
|
};
|
|
if ( $EVAL_ERROR ) {
|
|
if ( $self->got('config') ) {
|
|
die $EVAL_ERROR;
|
|
}
|
|
elsif ( PTDEBUG ) {
|
|
_d($EVAL_ERROR);
|
|
}
|
|
}
|
|
}
|
|
unshift @ARGV, @extra_args;
|
|
}
|
|
|
|
Getopt::Long::Configure('no_ignore_case', 'bundling');
|
|
GetOptions(
|
|
map { $_->{spec} => sub { $self->_set_option(@_); } }
|
|
grep { $_->{long} ne 'config' } # --config is handled specially above.
|
|
values %{$self->{opts}}
|
|
) or $self->save_error('Error parsing options');
|
|
|
|
if ( exists $self->{opts}->{version} && $self->{opts}->{version}->{got} ) {
|
|
if ( $self->{version} ) {
|
|
print $self->{version}, "\n";
|
|
exit 0;
|
|
}
|
|
else {
|
|
print "Error parsing version. See the VERSION section of the tool's documentation.\n";
|
|
exit 1;
|
|
}
|
|
}
|
|
|
|
if ( exists $self->{opts}->{'buffer-stdout'} && $self->{opts}->{'buffer-stdout'}->{got} ) {
|
|
STDOUT->autoflush(1 - $self->{opts}->{'buffer-stdout'}->{value});
|
|
}
|
|
|
|
if ( @ARGV && $self->{strict} ) {
|
|
$self->save_error("Unrecognized command-line options @ARGV");
|
|
}
|
|
|
|
foreach my $mutex ( @{$self->{mutex}} ) {
|
|
my @set = grep { $self->{opts}->{$_}->{got} } @$mutex;
|
|
if ( @set > 1 ) {
|
|
my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
|
|
@{$mutex}[ 0 .. scalar(@$mutex) - 2] )
|
|
. ' and --'.$self->{opts}->{$mutex->[-1]}->{long}
|
|
. ' are mutually exclusive.';
|
|
$self->save_error($err);
|
|
}
|
|
}
|
|
|
|
foreach my $required ( @{$self->{atleast1}} ) {
|
|
my @set = grep { $self->{opts}->{$_}->{got} } @$required;
|
|
if ( @set == 0 ) {
|
|
my $err = join(', ', map { "--$self->{opts}->{$_}->{long}" }
|
|
@{$required}[ 0 .. scalar(@$required) - 2] )
|
|
.' or --'.$self->{opts}->{$required->[-1]}->{long};
|
|
$self->save_error("Specify at least one of $err");
|
|
}
|
|
}
|
|
|
|
$self->_check_opts( keys %{$self->{opts}} );
|
|
$self->{got_opts} = 1;
|
|
return;
|
|
}
|
|
|
|
sub _check_opts {
|
|
my ( $self, @long ) = @_;
|
|
my $long_last = scalar @long;
|
|
while ( @long ) {
|
|
foreach my $i ( 0..$#long ) {
|
|
my $long = $long[$i];
|
|
next unless $long;
|
|
my $opt = $self->{opts}->{$long};
|
|
if ( $opt->{got} ) {
|
|
if ( exists $self->{disables}->{$long} ) {
|
|
my @disable_opts = @{$self->{disables}->{$long}};
|
|
map { $self->{opts}->{$_}->{value} = undef; } @disable_opts;
|
|
PTDEBUG && _d('Unset options', @disable_opts,
|
|
'because', $long,'disables them');
|
|
}
|
|
|
|
if ( exists $self->{allowed_groups}->{$long} ) {
|
|
|
|
my @restricted_groups = grep {
|
|
!exists $self->{allowed_groups}->{$long}->{$_}
|
|
} keys %{$self->{groups}};
|
|
|
|
my @restricted_opts;
|
|
foreach my $restricted_group ( @restricted_groups ) {
|
|
RESTRICTED_OPT:
|
|
foreach my $restricted_opt (
|
|
keys %{$self->{groups}->{$restricted_group}} )
|
|
{
|
|
next RESTRICTED_OPT if $restricted_opt eq $long;
|
|
push @restricted_opts, $restricted_opt
|
|
if $self->{opts}->{$restricted_opt}->{got};
|
|
}
|
|
}
|
|
|
|
if ( @restricted_opts ) {
|
|
my $err;
|
|
if ( @restricted_opts == 1 ) {
|
|
$err = "--$restricted_opts[0]";
|
|
}
|
|
else {
|
|
$err = join(', ',
|
|
map { "--$self->{opts}->{$_}->{long}" }
|
|
grep { $_ }
|
|
@restricted_opts[0..scalar(@restricted_opts) - 2]
|
|
)
|
|
. ' or --'.$self->{opts}->{$restricted_opts[-1]}->{long};
|
|
}
|
|
$self->save_error("--$long is not allowed with $err");
|
|
}
|
|
}
|
|
|
|
}
|
|
elsif ( $opt->{is_required} ) {
|
|
$self->save_error("Required option --$long must be specified");
|
|
}
|
|
|
|
$self->_validate_type($opt);
|
|
if ( $opt->{parsed} ) {
|
|
delete $long[$i];
|
|
}
|
|
else {
|
|
PTDEBUG && _d('Temporarily failed to parse', $long);
|
|
}
|
|
}
|
|
|
|
die "Failed to parse options, possibly due to circular dependencies"
|
|
if @long == $long_last;
|
|
$long_last = @long;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
sub _validate_type {
|
|
my ( $self, $opt ) = @_;
|
|
return unless $opt;
|
|
|
|
if ( !$opt->{type} ) {
|
|
$opt->{parsed} = 1;
|
|
return;
|
|
}
|
|
|
|
my $val = $opt->{value};
|
|
|
|
if ( $val && $opt->{type} eq 'm' ) { # type time
|
|
PTDEBUG && _d('Parsing option', $opt->{long}, 'as a time value');
|
|
my ( $prefix, $num, $suffix ) = $val =~ m/([+-]?)(\d+)([a-z])?$/;
|
|
if ( !$suffix ) {
|
|
my ( $s ) = $opt->{desc} =~ m/\(suffix (.)\)/;
|
|
$suffix = $s || 's';
|
|
PTDEBUG && _d('No suffix given; using', $suffix, 'for',
|
|
$opt->{long}, '(value:', $val, ')');
|
|
}
|
|
if ( $suffix =~ m/[smhd]/ ) {
|
|
$val = $suffix eq 's' ? $num # Seconds
|
|
: $suffix eq 'm' ? $num * 60 # Minutes
|
|
: $suffix eq 'h' ? $num * 3600 # Hours
|
|
: $num * 86400; # Days
|
|
$opt->{value} = ($prefix || '') . $val;
|
|
PTDEBUG && _d('Setting option', $opt->{long}, 'to', $val);
|
|
}
|
|
else {
|
|
$self->save_error("Invalid time suffix for --$opt->{long}");
|
|
}
|
|
}
|
|
elsif ( $val && $opt->{type} eq 'd' ) { # type DSN
|
|
PTDEBUG && _d('Parsing option', $opt->{long}, 'as a DSN');
|
|
my $prev = {};
|
|
my $from_key = $self->{defaults_to}->{ $opt->{long} };
|
|
if ( $from_key ) {
|
|
PTDEBUG && _d($opt->{long}, 'DSN copies from', $from_key, 'DSN');
|
|
if ( $self->{opts}->{$from_key}->{parsed} ) {
|
|
$prev = $self->{opts}->{$from_key}->{value};
|
|
}
|
|
else {
|
|
PTDEBUG && _d('Cannot parse', $opt->{long}, 'until',
|
|
$from_key, 'parsed');
|
|
return;
|
|
}
|
|
}
|
|
my $defaults = $self->{DSNParser}->parse_options($self);
|
|
if (!$opt->{attributes}->{repeatable}) {
|
|
$opt->{value} = $self->{DSNParser}->parse($val, $prev, $defaults);
|
|
} else {
|
|
my $values = [];
|
|
for my $dsn_string (@$val) {
|
|
push @$values, $self->{DSNParser}->parse($dsn_string, $prev, $defaults);
|
|
}
|
|
$opt->{value} = $values;
|
|
}
|
|
}
|
|
elsif ( $val && $opt->{type} eq 'z' ) { # type size
|
|
PTDEBUG && _d('Parsing option', $opt->{long}, 'as a size value');
|
|
$self->_parse_size($opt, $val);
|
|
}
|
|
elsif ( $opt->{type} eq 'H' || (defined $val && $opt->{type} eq 'h') ) {
|
|
$opt->{value} = { map { $_ => 1 } split(/(?<!\\),\s*/, ($val || '')) };
|
|
}
|
|
elsif ( $opt->{type} eq 'A' || (defined $val && $opt->{type} eq 'a') ) {
|
|
$opt->{value} = [ split(/(?<!\\),\s*/, ($val || '')) ];
|
|
}
|
|
else {
|
|
PTDEBUG && _d('Nothing to validate for option',
|
|
$opt->{long}, 'type', $opt->{type}, 'value', $val);
|
|
}
|
|
|
|
$opt->{parsed} = 1;
|
|
return;
|
|
}
|
|
|
|
sub get {
|
|
my ( $self, $opt ) = @_;
|
|
my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
|
|
die "Option $opt does not exist"
|
|
unless $long && exists $self->{opts}->{$long};
|
|
return $self->{opts}->{$long}->{value};
|
|
}
|
|
|
|
sub got {
|
|
my ( $self, $opt ) = @_;
|
|
my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
|
|
die "Option $opt does not exist"
|
|
unless $long && exists $self->{opts}->{$long};
|
|
return $self->{opts}->{$long}->{got};
|
|
}
|
|
|
|
sub has {
|
|
my ( $self, $opt ) = @_;
|
|
my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
|
|
return defined $long ? exists $self->{opts}->{$long} : 0;
|
|
}
|
|
|
|
sub set {
|
|
my ( $self, $opt, $val ) = @_;
|
|
my $long = (length $opt == 1 ? $self->{short_opts}->{$opt} : $opt);
|
|
die "Option $opt does not exist"
|
|
unless $long && exists $self->{opts}->{$long};
|
|
$self->{opts}->{$long}->{value} = $val;
|
|
return;
|
|
}
|
|
|
|
sub save_error {
|
|
my ( $self, $error ) = @_;
|
|
push @{$self->{errors}}, $error;
|
|
return;
|
|
}
|
|
|
|
sub errors {
|
|
my ( $self ) = @_;
|
|
return $self->{errors};
|
|
}
|
|
|
|
sub usage {
|
|
my ( $self ) = @_;
|
|
warn "No usage string is set" unless $self->{usage}; # XXX
|
|
return "Usage: " . ($self->{usage} || '') . "\n";
|
|
}
|
|
|
|
sub descr {
|
|
my ( $self ) = @_;
|
|
warn "No description string is set" unless $self->{description}; # XXX
|
|
my $descr = ($self->{description} || $self->{program_name} || '')
|
|
. " For more details, please use the --help option, "
|
|
. "or try 'perldoc $PROGRAM_NAME' "
|
|
. "for complete documentation.";
|
|
$descr = join("\n", $descr =~ m/(.{0,80})(?:\s+|$)/g)
|
|
unless $ENV{DONT_BREAK_LINES};
|
|
$descr =~ s/ +$//mg;
|
|
return $descr;
|
|
}
|
|
|
|
sub usage_or_errors {
|
|
my ( $self, $file, $return ) = @_;
|
|
$file ||= $self->{file} || __FILE__;
|
|
|
|
if ( !$self->{description} || !$self->{usage} ) {
|
|
PTDEBUG && _d("Getting description and usage from SYNOPSIS in", $file);
|
|
my %synop = $self->_parse_synopsis($file);
|
|
$self->{description} ||= $synop{description};
|
|
$self->{usage} ||= $synop{usage};
|
|
PTDEBUG && _d("Description:", $self->{description},
|
|
"\nUsage:", $self->{usage});
|
|
}
|
|
|
|
if ( $self->{opts}->{help}->{got} ) {
|
|
print $self->print_usage() or die "Cannot print usage: $OS_ERROR";
|
|
exit 0 unless $return;
|
|
}
|
|
elsif ( scalar @{$self->{errors}} ) {
|
|
print $self->print_errors() or die "Cannot print errors: $OS_ERROR";
|
|
exit 1 unless $return;
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
sub print_errors {
|
|
my ( $self ) = @_;
|
|
my $usage = $self->usage() . "\n";
|
|
if ( (my @errors = @{$self->{errors}}) ) {
|
|
$usage .= join("\n * ", 'Errors in command-line arguments:', @errors)
|
|
. "\n";
|
|
}
|
|
return $usage . "\n" . $self->descr();
|
|
}
|
|
|
|
sub print_usage {
|
|
my ( $self ) = @_;
|
|
die "Run get_opts() before print_usage()" unless $self->{got_opts};
|
|
my @opts = values %{$self->{opts}};
|
|
|
|
my $maxl = max(
|
|
map {
|
|
length($_->{long}) # option long name
|
|
+ ($_->{is_negatable} ? 4 : 0) # "[no]" if opt is negatable
|
|
+ ($_->{type} ? 2 : 0) # "=x" where x is the opt type
|
|
}
|
|
@opts);
|
|
|
|
my $maxs = max(0,
|
|
map {
|
|
length($_)
|
|
+ ($self->{opts}->{$_}->{is_negatable} ? 4 : 0)
|
|
+ ($self->{opts}->{$_}->{type} ? 2 : 0)
|
|
}
|
|
values %{$self->{short_opts}});
|
|
|
|
my $lcol = max($maxl, ($maxs + 3));
|
|
my $rcol = 80 - $lcol - 6;
|
|
my $rpad = ' ' x ( 80 - $rcol );
|
|
|
|
$maxs = max($lcol - 3, $maxs);
|
|
|
|
my $usage = $self->descr() . "\n" . $self->usage();
|
|
|
|
my @groups = reverse sort grep { $_ ne 'default'; } keys %{$self->{groups}};
|
|
push @groups, 'default';
|
|
|
|
foreach my $group ( reverse @groups ) {
|
|
$usage .= "\n".($group eq 'default' ? 'Options' : $group).":\n\n";
|
|
foreach my $opt (
|
|
sort { $a->{long} cmp $b->{long} }
|
|
grep { $_->{group} eq $group }
|
|
@opts )
|
|
{
|
|
my $long = $opt->{is_negatable} ? "[no]$opt->{long}" : $opt->{long};
|
|
my $short = $opt->{short};
|
|
my $desc = $opt->{desc};
|
|
|
|
$long .= $opt->{type} ? "=$opt->{type}" : "";
|
|
|
|
if ( $opt->{type} && $opt->{type} eq 'm' ) {
|
|
my ($s) = $desc =~ m/\(suffix (.)\)/;
|
|
$s ||= 's';
|
|
$desc =~ s/\s+\(suffix .\)//;
|
|
$desc .= ". Optional suffix s=seconds, m=minutes, h=hours, "
|
|
. "d=days; if no suffix, $s is used.";
|
|
}
|
|
$desc = join("\n$rpad", grep { $_ } $desc =~ m/(.{0,$rcol}(?!\W))(?:\s+|(?<=\W)|$)/g);
|
|
$desc =~ s/ +$//mg;
|
|
if ( $short ) {
|
|
$usage .= sprintf(" --%-${maxs}s -%s %s\n", $long, $short, $desc);
|
|
}
|
|
else {
|
|
$usage .= sprintf(" --%-${lcol}s %s\n", $long, $desc);
|
|
}
|
|
}
|
|
}
|
|
|
|
$usage .= "\nOption types: s=string, i=integer, f=float, h/H/a/A=comma-separated list, d=DSN, z=size, m=time\n";
|
|
|
|
if ( (my @rules = @{$self->{rules}}) ) {
|
|
$usage .= "\nRules:\n\n";
|
|
$usage .= join("\n", map { " $_" } @rules) . "\n";
|
|
}
|
|
if ( $self->{DSNParser} ) {
|
|
$usage .= "\n" . $self->{DSNParser}->usage();
|
|
}
|
|
$usage .= "\nOptions and values after processing arguments:\n\n";
|
|
foreach my $opt ( sort { $a->{long} cmp $b->{long} } @opts ) {
|
|
my $val = $opt->{value};
|
|
my $type = $opt->{type} || '';
|
|
my $bool = $opt->{spec} =~ m/^[\w-]+(?:\|[\w-])?!?$/;
|
|
$val = $bool ? ( $val ? 'TRUE' : 'FALSE' )
|
|
: !defined $val ? '(No value)'
|
|
: $type eq 'd' ? $self->{DSNParser}->as_string($val)
|
|
: $type =~ m/H|h/ ? join(',', sort keys %$val)
|
|
: $type =~ m/A|a/ ? join(',', @$val)
|
|
: $val;
|
|
$usage .= sprintf(" --%-${lcol}s %s\n", $opt->{long}, $val);
|
|
}
|
|
return $usage;
|
|
}
|
|
|
|
sub prompt_noecho {
|
|
shift @_ if ref $_[0] eq __PACKAGE__;
|
|
my ( $prompt ) = @_;
|
|
local $OUTPUT_AUTOFLUSH = 1;
|
|
print STDERR $prompt
|
|
or die "Cannot print: $OS_ERROR";
|
|
my $response;
|
|
eval {
|
|
require Term::ReadKey;
|
|
Term::ReadKey::ReadMode('noecho');
|
|
chomp($response = <STDIN>);
|
|
Term::ReadKey::ReadMode('normal');
|
|
print "\n"
|
|
or die "Cannot print: $OS_ERROR";
|
|
};
|
|
if ( $EVAL_ERROR ) {
|
|
die "Cannot read response; is Term::ReadKey installed? $EVAL_ERROR";
|
|
}
|
|
return $response;
|
|
}
|
|
|
|
sub _read_config_file {
|
|
my ( $self, $filename ) = @_;
|
|
open my $fh, "<", $filename or die "Cannot open $filename: $OS_ERROR\n";
|
|
my @args;
|
|
my $prefix = '--';
|
|
my $parse = 1;
|
|
|
|
LINE:
|
|
while ( my $line = <$fh> ) {
|
|
chomp $line;
|
|
next LINE if $line =~ m/^\s*(?:\#|\;|$)/;
|
|
$line =~ s/\s+#.*$//g;
|
|
$line =~ s/^\s+|\s+$//g;
|
|
if ( $line eq '--' ) {
|
|
$prefix = '';
|
|
$parse = 0;
|
|
next LINE;
|
|
}
|
|
|
|
if ( $parse
|
|
&& !$self->has('version-check')
|
|
&& $line =~ /version-check/
|
|
) {
|
|
next LINE;
|
|
}
|
|
|
|
if ( $parse
|
|
&& (my($opt, $arg) = $line =~ m/^\s*([^=\s]+?)(?:\s*=\s*(.*?)\s*)?$/)
|
|
) {
|
|
push @args, grep { defined $_ } ("$prefix$opt", $arg);
|
|
}
|
|
elsif ( $line =~ m/./ ) {
|
|
push @args, $line;
|
|
}
|
|
else {
|
|
die "Syntax error in file $filename at line $INPUT_LINE_NUMBER";
|
|
}
|
|
}
|
|
close $fh;
|
|
return @args;
|
|
}
|
|
|
|
sub read_para_after {
|
|
my ( $self, $file, $regex ) = @_;
|
|
open my $fh, "<", $file or die "Can't open $file: $OS_ERROR";
|
|
local $INPUT_RECORD_SEPARATOR = '';
|
|
my $para;
|
|
while ( $para = <$fh> ) {
|
|
next unless $para =~ m/^=pod$/m;
|
|
last;
|
|
}
|
|
while ( $para = <$fh> ) {
|
|
next unless $para =~ m/$regex/;
|
|
last;
|
|
}
|
|
$para = <$fh>;
|
|
chomp($para);
|
|
close $fh or die "Can't close $file: $OS_ERROR";
|
|
return $para;
|
|
}
|
|
|
|
sub clone {
|
|
my ( $self ) = @_;
|
|
|
|
my %clone = map {
|
|
my $hashref = $self->{$_};
|
|
my $val_copy = {};
|
|
foreach my $key ( keys %$hashref ) {
|
|
my $ref = ref $hashref->{$key};
|
|
$val_copy->{$key} = !$ref ? $hashref->{$key}
|
|
: $ref eq 'HASH' ? { %{$hashref->{$key}} }
|
|
: $ref eq 'ARRAY' ? [ @{$hashref->{$key}} ]
|
|
: $hashref->{$key};
|
|
}
|
|
$_ => $val_copy;
|
|
} qw(opts short_opts defaults);
|
|
|
|
foreach my $scalar ( qw(got_opts) ) {
|
|
$clone{$scalar} = $self->{$scalar};
|
|
}
|
|
|
|
return bless \%clone;
|
|
}
|
|
|
|
sub _parse_size {
|
|
my ( $self, $opt, $val ) = @_;
|
|
|
|
if ( lc($val || '') eq 'null' ) {
|
|
PTDEBUG && _d('NULL size for', $opt->{long});
|
|
$opt->{value} = 'null';
|
|
return;
|
|
}
|
|
|
|
my %factor_for = (k => 1_024, M => 1_048_576, G => 1_073_741_824);
|
|
my ($pre, $num, $factor) = $val =~ m/^([+-])?(\d+)([kMG])?$/;
|
|
if ( defined $num ) {
|
|
if ( $factor ) {
|
|
$num *= $factor_for{$factor};
|
|
PTDEBUG && _d('Setting option', $opt->{y},
|
|
'to num', $num, '* factor', $factor);
|
|
}
|
|
$opt->{value} = ($pre || '') . $num;
|
|
}
|
|
else {
|
|
$self->save_error("Invalid size for --$opt->{long}: $val");
|
|
}
|
|
return;
|
|
}
|
|
|
|
sub _parse_attribs {
|
|
my ( $self, $option, $attribs ) = @_;
|
|
my $types = $self->{types};
|
|
return $option
|
|
. ($attribs->{'short form'} ? '|' . $attribs->{'short form'} : '' )
|
|
. ($attribs->{'negatable'} ? '!' : '' )
|
|
. ($attribs->{'cumulative'} ? '+' : '' )
|
|
. ($attribs->{'type'} ? '=' . $types->{$attribs->{type}} : '' );
|
|
}
|
|
|
|
sub _parse_synopsis {
|
|
my ( $self, $file ) = @_;
|
|
$file ||= $self->{file} || __FILE__;
|
|
PTDEBUG && _d("Parsing SYNOPSIS in", $file);
|
|
|
|
local $INPUT_RECORD_SEPARATOR = ''; # read paragraphs
|
|
open my $fh, "<", $file or die "Cannot open $file: $OS_ERROR";
|
|
my $para;
|
|
1 while defined($para = <$fh>) && $para !~ m/^=head1 SYNOPSIS/;
|
|
die "$file does not contain a SYNOPSIS section" unless $para;
|
|
my @synop;
|
|
for ( 1..2 ) { # 1 for the usage, 2 for the description
|
|
my $para = <$fh>;
|
|
push @synop, $para;
|
|
}
|
|
close $fh;
|
|
PTDEBUG && _d("Raw SYNOPSIS text:", @synop);
|
|
my ($usage, $desc) = @synop;
|
|
die "The SYNOPSIS section in $file is not formatted properly"
|
|
unless $usage && $desc;
|
|
|
|
$usage =~ s/^\s*Usage:\s+(.+)/$1/;
|
|
chomp $usage;
|
|
|
|
$desc =~ s/\n/ /g;
|
|
$desc =~ s/\s{2,}/ /g;
|
|
$desc =~ s/\. ([A-Z][a-z])/. $1/g;
|
|
$desc =~ s/\s+$//;
|
|
|
|
return (
|
|
description => $desc,
|
|
usage => $usage,
|
|
);
|
|
};
|
|
|
|
sub set_vars {
|
|
my ($self, $file) = @_;
|
|
$file ||= $self->{file} || __FILE__;
|
|
|
|
my %user_vars;
|
|
my $user_vars = $self->has('set-vars') ? $self->get('set-vars') : undef;
|
|
if ( $user_vars ) {
|
|
foreach my $var_val ( @$user_vars ) {
|
|
my ($var, $val) = $var_val =~ m/([^\s=]+)=(\S+)/;
|
|
die "Invalid --set-vars value: $var_val\n" unless $var && defined $val;
|
|
$user_vars{$var} = {
|
|
val => $val,
|
|
default => 0,
|
|
};
|
|
}
|
|
}
|
|
|
|
my %default_vars;
|
|
my $default_vars = $self->read_para_after($file, qr/MAGIC_set_vars/);
|
|
if ( $default_vars ) {
|
|
%default_vars = map {
|
|
my $var_val = $_;
|
|
my ($var, $val) = $var_val =~ m/([^\s=]+)=(\S+)/;
|
|
die "Invalid --set-vars value: $var_val\n" unless $var && defined $val;
|
|
$var => {
|
|
val => $val,
|
|
default => 1,
|
|
};
|
|
} split("\n", $default_vars);
|
|
}
|
|
|
|
my %vars = (
|
|
%default_vars, # first the tool's defaults
|
|
%user_vars, # then the user's which overwrite the defaults
|
|
);
|
|
PTDEBUG && _d('--set-vars:', Dumper(\%vars));
|
|
return \%vars;
|
|
}
|
|
|
|
sub _d {
|
|
my ($package, undef, $line) = caller 0;
|
|
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
|
map { defined $_ ? $_ : 'undef' }
|
|
@_;
|
|
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
|
}
|
|
|
|
if ( PTDEBUG ) {
|
|
print STDERR '# ', $^X, ' ', $], "\n";
|
|
if ( my $uname = `uname -a` ) {
|
|
$uname =~ s/\s+/ /g;
|
|
print STDERR "# $uname\n";
|
|
}
|
|
print STDERR '# Arguments: ',
|
|
join(' ', map { my $a = "_[$_]_"; $a =~ s/\n/\n# /g; $a; } @ARGV), "\n";
|
|
}
|
|
|
|
1;
|
|
}
|
|
# ###########################################################################
|
|
# End OptionParser package
|
|
# ###########################################################################
|
|
|
|
# ###########################################################################
|
|
# QueryParser package
|
|
# This package is a copy without comments from the original. The original
|
|
# with comments and its test file can be found in the GitHub repository at,
|
|
# lib/QueryParser.pm
|
|
# t/lib/QueryParser.t
|
|
# See https://github.com/percona/percona-toolkit for more information.
|
|
# ###########################################################################
|
|
{
|
|
package QueryParser;
|
|
|
|
use strict;
|
|
use warnings FATAL => 'all';
|
|
use English qw(-no_match_vars);
|
|
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
|
|
|
our $tbl_ident = qr/(?:`[^`]+`|\w+)(?:\.(?:`[^`]+`|\w+))?/;
|
|
our $tbl_regex = qr{
|
|
\b(?:FROM|JOIN|(?<!KEY\s)UPDATE|INTO) # Words that precede table names
|
|
\b\s*
|
|
\(? # Optional paren around tables
|
|
($tbl_ident
|
|
(?: (?:\s+ (?:AS\s+)? \w+)?, \s*$tbl_ident )*
|
|
)
|
|
}xio;
|
|
our $has_derived = qr{
|
|
\b(?:FROM|JOIN|,)
|
|
\s*\(\s*SELECT
|
|
}xi;
|
|
|
|
our $data_def_stmts = qr/(?:CREATE|ALTER|TRUNCATE|DROP|RENAME)/i;
|
|
|
|
our $data_manip_stmts = qr/(?:INSERT|UPDATE|DELETE|REPLACE)/i;
|
|
|
|
sub new {
|
|
my ( $class ) = @_;
|
|
bless {}, $class;
|
|
}
|
|
|
|
sub get_tables {
|
|
my ( $self, $query ) = @_;
|
|
return unless $query;
|
|
PTDEBUG && _d('Getting tables for', $query);
|
|
|
|
my ( $ddl_stmt ) = $query =~ m/^\s*($data_def_stmts)\b/i;
|
|
if ( $ddl_stmt ) {
|
|
PTDEBUG && _d('Special table type:', $ddl_stmt);
|
|
$query =~ s/IF\s+(?:NOT\s+)?EXISTS//i;
|
|
if ( $query =~ m/$ddl_stmt DATABASE\b/i ) {
|
|
PTDEBUG && _d('Query alters a database, not a table');
|
|
return ();
|
|
}
|
|
if ( $ddl_stmt =~ m/CREATE/i && $query =~ m/$ddl_stmt\b.+?\bSELECT\b/i ) {
|
|
my ($select) = $query =~ m/\b(SELECT\b.+)/is;
|
|
PTDEBUG && _d('CREATE TABLE ... SELECT:', $select);
|
|
return $self->get_tables($select);
|
|
}
|
|
my ($tbl) = $query =~ m/TABLE\s+($tbl_ident)(\s+.*)?/i;
|
|
PTDEBUG && _d('Matches table:', $tbl);
|
|
return ($tbl);
|
|
}
|
|
|
|
$query =~ s/(?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN|DELAYED)\s+/ /ig;
|
|
|
|
if ( $query =~ s/^\s*LOCK TABLES\s+//i ) {
|
|
PTDEBUG && _d('Special table type: LOCK TABLES');
|
|
$query =~ s/\s+(?:READ(?:\s+LOCAL)?|WRITE)\s*//gi;
|
|
PTDEBUG && _d('Locked tables:', $query);
|
|
$query = "FROM $query";
|
|
}
|
|
|
|
$query =~ s/\\["']//g; # quoted strings
|
|
$query =~ s/".*?"/?/sg; # quoted strings
|
|
$query =~ s/'.*?'/?/sg; # quoted strings
|
|
|
|
if ( $query =~ m/\A\s*(?:INSERT|REPLACE)(?!\s+INTO)/i ) {
|
|
$query =~ s/\A\s*((?:INSERT|REPLACE))\s+/$1 INTO /i;
|
|
}
|
|
|
|
if ( $query =~ m/\A\s*LOAD DATA/i ) {
|
|
my ($tbl) = $query =~ m/INTO TABLE\s+(\S+)/i;
|
|
return $tbl;
|
|
}
|
|
|
|
my @tables;
|
|
foreach my $tbls ( $query =~ m/$tbl_regex/gio ) {
|
|
PTDEBUG && _d('Match tables:', $tbls);
|
|
|
|
next if $tbls =~ m/\ASELECT\b/i;
|
|
|
|
foreach my $tbl ( split(',', $tbls) ) {
|
|
$tbl =~ s/\s*($tbl_ident)(\s+.*)?/$1/gio;
|
|
|
|
if ( $tbl !~ m/[a-zA-Z]/ ) {
|
|
PTDEBUG && _d('Skipping suspicious table name:', $tbl);
|
|
next;
|
|
}
|
|
|
|
push @tables, $tbl;
|
|
}
|
|
}
|
|
return @tables;
|
|
}
|
|
|
|
sub has_derived_table {
|
|
my ( $self, $query ) = @_;
|
|
my $match = $query =~ m/$has_derived/;
|
|
PTDEBUG && _d($query, 'has ' . ($match ? 'a' : 'no') . ' derived table');
|
|
return $match;
|
|
}
|
|
|
|
sub get_aliases {
|
|
my ( $self, $query, $list ) = @_;
|
|
|
|
my $result = {
|
|
DATABASE => {},
|
|
TABLE => {},
|
|
};
|
|
return $result unless $query;
|
|
|
|
$query =~ s/ (?:LOW_PRIORITY|IGNORE|STRAIGHT_JOIN)//ig;
|
|
|
|
$query =~ s/ (?:INNER|OUTER|CROSS|LEFT|RIGHT|NATURAL)//ig;
|
|
|
|
my @tbl_refs;
|
|
my ($tbl_refs, $from) = $query =~ m{
|
|
(
|
|
(FROM|INTO|UPDATE)\b\s* # Keyword before table refs
|
|
.+? # Table refs
|
|
)
|
|
(?:\s+|\z) # If the query does not end with the table
|
|
(?:WHERE|ORDER|LIMIT|HAVING|SET|VALUES|\z) # Keyword after table refs
|
|
}ix;
|
|
|
|
if ( $tbl_refs ) {
|
|
|
|
if ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
|
|
$tbl_refs =~ s/\([^\)]+\)\s*//;
|
|
}
|
|
|
|
PTDEBUG && _d('tbl refs:', $tbl_refs);
|
|
|
|
my $before_tbl = qr/(?:,|JOIN|\s|$from)+/i;
|
|
|
|
my $after_tbl = qr/(?:,|JOIN|ON|USING|\z)/i;
|
|
|
|
$tbl_refs =~ s/ = /=/g;
|
|
|
|
while (
|
|
$tbl_refs =~ m{
|
|
$before_tbl\b\s*
|
|
( ($tbl_ident) (?:\s+ (?:AS\s+)? (\w+))? )
|
|
\s*$after_tbl
|
|
}xgio )
|
|
{
|
|
my ( $tbl_ref, $db_tbl, $alias ) = ($1, $2, $3);
|
|
PTDEBUG && _d('Match table:', $tbl_ref);
|
|
push @tbl_refs, $tbl_ref;
|
|
$alias = $self->trim_identifier($alias);
|
|
|
|
if ( $tbl_ref =~ m/^AS\s+\w+/i ) {
|
|
PTDEBUG && _d('Subquery', $tbl_ref);
|
|
$result->{TABLE}->{$alias} = undef;
|
|
next;
|
|
}
|
|
|
|
my ( $db, $tbl ) = $db_tbl =~ m/^(?:(.*?)\.)?(.*)/;
|
|
$db = $self->trim_identifier($db);
|
|
$tbl = $self->trim_identifier($tbl);
|
|
$result->{TABLE}->{$alias || $tbl} = $tbl;
|
|
$result->{DATABASE}->{$tbl} = $db if $db;
|
|
}
|
|
}
|
|
else {
|
|
PTDEBUG && _d("No tables ref in", $query);
|
|
}
|
|
|
|
if ( $list ) {
|
|
return \@tbl_refs;
|
|
}
|
|
else {
|
|
return $result;
|
|
}
|
|
}
|
|
|
|
sub split {
|
|
my ( $self, $query ) = @_;
|
|
return unless $query;
|
|
$query = $self->clean_query($query);
|
|
PTDEBUG && _d('Splitting', $query);
|
|
|
|
my $verbs = qr{SELECT|INSERT|UPDATE|DELETE|REPLACE|UNION|CREATE}i;
|
|
|
|
my @split_statements = grep { $_ } split(m/\b($verbs\b(?!(?:\s*\()))/io, $query);
|
|
|
|
my @statements;
|
|
if ( @split_statements == 1 ) {
|
|
push @statements, $query;
|
|
}
|
|
else {
|
|
for ( my $i = 0; $i <= $#split_statements; $i += 2 ) {
|
|
push @statements, $split_statements[$i].$split_statements[$i+1];
|
|
|
|
if ( $statements[-2] && $statements[-2] =~ m/on duplicate key\s+$/i ) {
|
|
$statements[-2] .= pop @statements;
|
|
}
|
|
}
|
|
}
|
|
|
|
PTDEBUG && _d('statements:', map { $_ ? "<$_>" : 'none' } @statements);
|
|
return @statements;
|
|
}
|
|
|
|
sub clean_query {
|
|
my ( $self, $query ) = @_;
|
|
return unless $query;
|
|
$query =~ s!/\*.*?\*/! !g; # Remove /* comment blocks */
|
|
$query =~ s/^\s+//; # Remove leading spaces
|
|
$query =~ s/\s+$//; # Remove trailing spaces
|
|
$query =~ s/\s{2,}/ /g; # Remove extra spaces
|
|
return $query;
|
|
}
|
|
|
|
sub split_subquery {
|
|
my ( $self, $query ) = @_;
|
|
return unless $query;
|
|
$query = $self->clean_query($query);
|
|
$query =~ s/;$//;
|
|
|
|
my @subqueries;
|
|
my $sqno = 0; # subquery number
|
|
my $pos = 0;
|
|
while ( $query =~ m/(\S+)(?:\s+|\Z)/g ) {
|
|
$pos = pos($query);
|
|
my $word = $1;
|
|
PTDEBUG && _d($word, $sqno);
|
|
if ( $word =~ m/^\(?SELECT\b/i ) {
|
|
my $start_pos = $pos - length($word) - 1;
|
|
if ( $start_pos ) {
|
|
$sqno++;
|
|
PTDEBUG && _d('Subquery', $sqno, 'starts at', $start_pos);
|
|
$subqueries[$sqno] = {
|
|
start_pos => $start_pos,
|
|
end_pos => 0,
|
|
len => 0,
|
|
words => [$word],
|
|
lp => 1, # left parentheses
|
|
rp => 0, # right parentheses
|
|
done => 0,
|
|
};
|
|
}
|
|
else {
|
|
PTDEBUG && _d('Main SELECT at pos 0');
|
|
}
|
|
}
|
|
else {
|
|
next unless $sqno; # next unless we're in a subquery
|
|
PTDEBUG && _d('In subquery', $sqno);
|
|
my $sq = $subqueries[$sqno];
|
|
if ( $sq->{done} ) {
|
|
PTDEBUG && _d('This subquery is done; SQL is for',
|
|
($sqno - 1 ? "subquery $sqno" : "the main SELECT"));
|
|
next;
|
|
}
|
|
push @{$sq->{words}}, $word;
|
|
my $lp = ($word =~ tr/\(//) || 0;
|
|
my $rp = ($word =~ tr/\)//) || 0;
|
|
PTDEBUG && _d('parentheses left', $lp, 'right', $rp);
|
|
if ( ($sq->{lp} + $lp) - ($sq->{rp} + $rp) == 0 ) {
|
|
my $end_pos = $pos - 1;
|
|
PTDEBUG && _d('Subquery', $sqno, 'ends at', $end_pos);
|
|
$sq->{end_pos} = $end_pos;
|
|
$sq->{len} = $end_pos - $sq->{start_pos};
|
|
}
|
|
}
|
|
}
|
|
|
|
for my $i ( 1..$#subqueries ) {
|
|
my $sq = $subqueries[$i];
|
|
next unless $sq;
|
|
$sq->{sql} = join(' ', @{$sq->{words}});
|
|
substr $query,
|
|
$sq->{start_pos} + 1, # +1 for (
|
|
$sq->{len} - 1, # -1 for )
|
|
"__subquery_$i";
|
|
}
|
|
|
|
return $query, map { $_->{sql} } grep { defined $_ } @subqueries;
|
|
}
|
|
|
|
sub query_type {
|
|
my ( $self, $query, $qr ) = @_;
|
|
my ($type, undef) = $qr->distill_verbs($query);
|
|
my $rw;
|
|
if ( $type =~ m/^SELECT\b/ ) {
|
|
$rw = 'read';
|
|
}
|
|
elsif ( $type =~ m/^$data_manip_stmts\b/
|
|
|| $type =~ m/^$data_def_stmts\b/ ) {
|
|
$rw = 'write'
|
|
}
|
|
|
|
return {
|
|
type => $type,
|
|
rw => $rw,
|
|
}
|
|
}
|
|
|
|
sub get_columns {
|
|
my ( $self, $query ) = @_;
|
|
my $cols = [];
|
|
return $cols unless $query;
|
|
my $cols_def;
|
|
|
|
if ( $query =~ m/^SELECT/i ) {
|
|
$query =~ s/
|
|
^SELECT\s+
|
|
(?:ALL
|
|
|DISTINCT
|
|
|DISTINCTROW
|
|
|HIGH_PRIORITY
|
|
|STRAIGHT_JOIN
|
|
|SQL_SMALL_RESULT
|
|
|SQL_BIG_RESULT
|
|
|SQL_BUFFER_RESULT
|
|
|SQL_CACHE
|
|
|SQL_NO_CACHE
|
|
|SQL_CALC_FOUND_ROWS
|
|
)\s+
|
|
/SELECT /xgi;
|
|
($cols_def) = $query =~ m/^SELECT\s+(.+?)\s+FROM/i;
|
|
}
|
|
elsif ( $query =~ m/^(?:INSERT|REPLACE)/i ) {
|
|
($cols_def) = $query =~ m/\(([^\)]+)\)\s*VALUE/i;
|
|
}
|
|
|
|
PTDEBUG && _d('Columns:', $cols_def);
|
|
if ( $cols_def ) {
|
|
@$cols = split(',', $cols_def);
|
|
map {
|
|
my $col = $_;
|
|
$col = s/^\s+//g;
|
|
$col = s/\s+$//g;
|
|
$col;
|
|
} @$cols;
|
|
}
|
|
|
|
return $cols;
|
|
}
|
|
|
|
sub parse {
|
|
my ( $self, $query ) = @_;
|
|
return unless $query;
|
|
my $parsed = {};
|
|
|
|
$query =~ s/\n/ /g;
|
|
$query = $self->clean_query($query);
|
|
|
|
$parsed->{query} = $query,
|
|
$parsed->{tables} = $self->get_aliases($query, 1);
|
|
$parsed->{columns} = $self->get_columns($query);
|
|
|
|
my ($type) = $query =~ m/^(\w+)/;
|
|
$parsed->{type} = lc $type;
|
|
|
|
|
|
$parsed->{sub_queries} = [];
|
|
|
|
return $parsed;
|
|
}
|
|
|
|
sub extract_tables {
|
|
my ( $self, %args ) = @_;
|
|
my $query = $args{query};
|
|
my $default_db = $args{default_db};
|
|
my $q = $self->{Quoter} || $args{Quoter};
|
|
return unless $query;
|
|
PTDEBUG && _d('Extracting tables');
|
|
my @tables;
|
|
my %seen;
|
|
foreach my $db_tbl ( $self->get_tables($query) ) {
|
|
next unless $db_tbl;
|
|
next if $seen{$db_tbl}++; # Unique-ify for issue 337.
|
|
my ( $db, $tbl ) = $q->split_unquote($db_tbl);
|
|
push @tables, [ $db || $default_db, $tbl ];
|
|
}
|
|
return @tables;
|
|
}
|
|
|
|
sub trim_identifier {
|
|
my ($self, $str) = @_;
|
|
return unless defined $str;
|
|
$str =~ s/`//g;
|
|
$str =~ s/^\s+//;
|
|
$str =~ s/\s+$//;
|
|
return $str;
|
|
}
|
|
|
|
sub _d {
|
|
my ($package, undef, $line) = caller 0;
|
|
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
|
map { defined $_ ? $_ : 'undef' }
|
|
@_;
|
|
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
|
}
|
|
|
|
1;
|
|
}
|
|
# ###########################################################################
|
|
# End QueryParser package
|
|
# ###########################################################################
|
|
|
|
# ###########################################################################
|
|
# QueryRewriter package
|
|
# This package is a copy without comments from the original. The original
|
|
# with comments and its test file can be found in the GitHub repository at,
|
|
# lib/QueryRewriter.pm
|
|
# t/lib/QueryRewriter.t
|
|
# See https://github.com/percona/percona-toolkit for more information.
|
|
# ###########################################################################
|
|
{
|
|
package QueryRewriter;
|
|
|
|
use strict;
|
|
use warnings FATAL => 'all';
|
|
use English qw(-no_match_vars);
|
|
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
|
|
|
our $verbs = qr{^SHOW|^FLUSH|^COMMIT|^ROLLBACK|^BEGIN|SELECT|INSERT
|
|
|UPDATE|DELETE|REPLACE|^SET|UNION|^START|^LOCK}xi;
|
|
my $quote_re = qr/"(?:(?!(?<!\\)").)*"|'(?:(?!(?<!\\)').)*'/; # Costly!
|
|
my $bal;
|
|
$bal = qr/
|
|
\(
|
|
(?:
|
|
(?> [^()]+ ) # Non-parens without backtracking
|
|
|
|
|
(??{ $bal }) # Group with matching parens
|
|
)*
|
|
\)
|
|
/x;
|
|
|
|
my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments
|
|
my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */
|
|
my $vlc_re = qr#/\*.*?[0-9]+.*?\*/#sm; # For SHOW + /*!version */
|
|
my $vlc_rf = qr#^(?:SHOW).*?/\*![0-9]+(.*?)\*/#sm; # Variation for SHOW
|
|
|
|
|
|
sub new {
|
|
my ( $class, %args ) = @_;
|
|
my $self = { %args };
|
|
return bless $self, $class;
|
|
}
|
|
|
|
sub strip_comments {
|
|
my ( $self, $query ) = @_;
|
|
return unless $query;
|
|
$query =~ s/$mlc_re//go;
|
|
$query =~ s/$olc_re//go;
|
|
if ( $query =~ m/$vlc_rf/i ) { # contains show + version
|
|
my $qualifier = $1 || '';
|
|
$query =~ s/$vlc_re/$qualifier/go;
|
|
}
|
|
return $query;
|
|
}
|
|
|
|
sub shorten {
|
|
my ( $self, $query, $length ) = @_;
|
|
$query =~ s{
|
|
\A(
|
|
(?:INSERT|REPLACE)
|
|
(?:\s+LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)?
|
|
(?:\s\w+)*\s+\S+\s+VALUES\s*\(.*?\)
|
|
)
|
|
\s*,\s*\(.*?(ON\s+DUPLICATE|\Z)}
|
|
{$1 /*... omitted ...*/$2}xsi;
|
|
|
|
return $query unless $query =~ m/IN\s*\(\s*(?!select)/i;
|
|
|
|
my $last_length = 0;
|
|
my $query_length = length($query);
|
|
while (
|
|
$length > 0
|
|
&& $query_length > $length
|
|
&& $query_length < ( $last_length || $query_length + 1 )
|
|
) {
|
|
$last_length = $query_length;
|
|
$query =~ s{
|
|
(\bIN\s*\() # The opening of an IN list
|
|
([^\)]+) # Contents of the list, assuming no item contains paren
|
|
(?=\)) # Close of the list
|
|
}
|
|
{
|
|
$1 . __shorten($2)
|
|
}gexsi;
|
|
}
|
|
|
|
return $query;
|
|
}
|
|
|
|
sub __shorten {
|
|
my ( $snippet ) = @_;
|
|
my @vals = split(/,/, $snippet);
|
|
return $snippet unless @vals > 20;
|
|
my @keep = splice(@vals, 0, 20); # Remove and save the first 20 items
|
|
return
|
|
join(',', @keep)
|
|
. "/*... omitted "
|
|
. scalar(@vals)
|
|
. " items ...*/";
|
|
}
|
|
|
|
sub fingerprint {
|
|
my ( $self, $query ) = @_;
|
|
|
|
$query =~ m#\ASELECT /\*!40001 SQL_NO_CACHE \*/ \* FROM `# # mysqldump query
|
|
&& return 'mysqldump';
|
|
$query =~ m#/\*\w+\.\w+:[0-9]/[0-9]\*/# # pt-table-checksum, etc query
|
|
&& return 'percona-toolkit';
|
|
$query =~ m/\Aadministrator command: /
|
|
&& return $query;
|
|
$query =~ m/\A\s*(call\s+\S+)\(/i
|
|
&& return lc($1); # Warning! $1 used, be careful.
|
|
if ( my ($beginning) = $query =~ m/\A((?:INSERT|REPLACE)(?: IGNORE)?\s+INTO.+?VALUES\s*\(.*?\))\s*,\s*\(/is ) {
|
|
$query = $beginning; # Shorten multi-value INSERT statements ASAP
|
|
}
|
|
|
|
$query =~ s/$mlc_re//go;
|
|
$query =~ s/$olc_re//go;
|
|
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
|
|
&& return $query;
|
|
|
|
$query =~ s/([^\\])(\\')/$1/sg;
|
|
$query =~ s/([^\\])(\\")/$1/sg;
|
|
$query =~ s/\\\\//sg;
|
|
$query =~ s/\\'//sg;
|
|
$query =~ s/\\"//sg;
|
|
$query =~ s/([^\\])(".*?[^\\]?")/$1?/sg;
|
|
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
|
|
|
|
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values
|
|
|
|
if ( $self->{match_md5_checksums} ) {
|
|
$query =~ s/([._-])[a-f0-9]{32}/$1?/g;
|
|
}
|
|
|
|
if ( !$self->{match_embedded_numbers} ) {
|
|
$query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;
|
|
}
|
|
else {
|
|
$query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g;
|
|
}
|
|
|
|
if ( $self->{match_md5_checksums} ) {
|
|
$query =~ s/[xb+-]\?/?/g;
|
|
}
|
|
else {
|
|
$query =~ s/[xb.+-]\?/?/g;
|
|
}
|
|
|
|
$query =~ s/\A\s+//; # Chop off leading whitespace
|
|
chomp $query; # Kill trailing whitespace
|
|
$query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace
|
|
$query = lc $query;
|
|
$query =~ s/\bnull\b/?/g; # Get rid of NULLs
|
|
$query =~ s{ # Collapse IN and VALUES lists
|
|
\b(in|values?)(?:[\s,]*\([\s?,]*\))+
|
|
}
|
|
{$1(?+)}gx;
|
|
$query =~ s{ # Collapse UNION
|
|
\b(select\s.*?)(?:(\sunion(?:\sall)?)\s\1)+
|
|
}
|
|
{$1 /*repeat$2*/}xg;
|
|
$query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT
|
|
|
|
if ( $query =~ m/\bORDER BY /gi ) { # Find, anchor on ORDER BY clause
|
|
1 while $query =~ s/\G(.+?)\s+ASC/$1/gi && pos $query;
|
|
}
|
|
|
|
return $query;
|
|
}
|
|
|
|
sub distill_verbs {
|
|
my ( $self, $query ) = @_;
|
|
|
|
$query =~ m/\A\s*call\s+(\S+)\(/i && return "CALL $1";
|
|
$query =~ m/\A\s*use\s+/ && return "USE";
|
|
$query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK";
|
|
$query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1";
|
|
|
|
if ( $query =~ m/\A\s*LOAD/i ) {
|
|
my ($tbl) = $query =~ m/INTO TABLE\s+(\S+)/i;
|
|
$tbl ||= '';
|
|
$tbl =~ s/`//g;
|
|
return "LOAD DATA $tbl";
|
|
}
|
|
|
|
if ( $query =~ m/\Aadministrator command:/ ) {
|
|
$query =~ s/administrator command:/ADMIN/;
|
|
$query = uc $query;
|
|
return $query;
|
|
}
|
|
|
|
$query = $self->strip_comments($query);
|
|
|
|
if ( $query =~ m/\A\s*SHOW\s+/i ) {
|
|
PTDEBUG && _d($query);
|
|
|
|
$query = uc $query;
|
|
$query =~ s/\s+(?:SESSION|FULL|STORAGE|ENGINE)\b/ /g;
|
|
$query =~ s/\s+COUNT[^)]+\)//g;
|
|
|
|
$query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms;
|
|
|
|
$query =~ s/\A(SHOW(?:\s+\S+){1,2}).*\Z/$1/s;
|
|
$query =~ s/\s+/ /g;
|
|
PTDEBUG && _d($query);
|
|
return $query;
|
|
}
|
|
|
|
eval $QueryParser::data_def_stmts;
|
|
eval $QueryParser::tbl_ident;
|
|
my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i;
|
|
if ( $dds) {
|
|
$query =~ s/\s+IF(?:\s+NOT)?\s+EXISTS/ /i;
|
|
my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i;
|
|
$obj = uc $obj if $obj;
|
|
PTDEBUG && _d('Data def statement:', $dds, 'obj:', $obj);
|
|
my ($db_or_tbl)
|
|
= $query =~ m/(?:TABLE|DATABASE)\s+($QueryParser::tbl_ident)(\s+.*)?/i;
|
|
PTDEBUG && _d('Matches db or table:', $db_or_tbl);
|
|
return uc($dds . ($obj ? " $obj" : '')), $db_or_tbl;
|
|
}
|
|
|
|
my @verbs = $query =~ m/\b($verbs)\b/gio;
|
|
@verbs = do {
|
|
my $last = '';
|
|
grep { my $pass = $_ ne $last; $last = $_; $pass } map { uc } @verbs;
|
|
};
|
|
|
|
if ( ($verbs[0] || '') eq 'SELECT' && @verbs > 1 ) {
|
|
PTDEBUG && _d("False-positive verbs after SELECT:", @verbs[1..$#verbs]);
|
|
my $union = grep { $_ eq 'UNION' } @verbs;
|
|
@verbs = $union ? qw(SELECT UNION) : qw(SELECT);
|
|
}
|
|
|
|
my $verb_str = join(q{ }, @verbs);
|
|
return $verb_str;
|
|
}
|
|
|
|
sub __distill_tables {
|
|
my ( $self, $query, $table, %args ) = @_;
|
|
my $qp = $args{QueryParser} || $self->{QueryParser};
|
|
die "I need a QueryParser argument" unless $qp;
|
|
|
|
my @tables = map {
|
|
$_ =~ s/`//g;
|
|
$_ =~ s/(_?)[0-9]+/$1?/g;
|
|
$_;
|
|
} grep { defined $_ } $qp->get_tables($query);
|
|
|
|
push @tables, $table if $table;
|
|
|
|
@tables = do {
|
|
my $last = '';
|
|
grep { my $pass = $_ ne $last; $last = $_; $pass } @tables;
|
|
};
|
|
|
|
return @tables;
|
|
}
|
|
|
|
sub distill {
|
|
my ( $self, $query, %args ) = @_;
|
|
|
|
if ( $args{generic} ) {
|
|
my ($cmd, $arg) = $query =~ m/^(\S+)\s+(\S+)/;
|
|
return '' unless $cmd;
|
|
$query = (uc $cmd) . ($arg ? " $arg" : '');
|
|
}
|
|
else {
|
|
my ($verbs, $table) = $self->distill_verbs($query, %args);
|
|
|
|
if ( $verbs && $verbs =~ m/^SHOW/ ) {
|
|
my %alias_for = qw(
|
|
SCHEMA DATABASE
|
|
KEYS INDEX
|
|
INDEXES INDEX
|
|
);
|
|
map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for;
|
|
$query = $verbs;
|
|
}
|
|
elsif ( $verbs && $verbs =~ m/^LOAD DATA/ ) {
|
|
return $verbs;
|
|
}
|
|
else {
|
|
my @tables = $self->__distill_tables($query, $table, %args);
|
|
$query = join(q{ }, $verbs, @tables);
|
|
}
|
|
}
|
|
|
|
if ( $args{trf} ) {
|
|
$query = $args{trf}->($query, %args);
|
|
}
|
|
|
|
return $query;
|
|
}
|
|
|
|
sub convert_to_select {
|
|
my ( $self, $query ) = @_;
|
|
return unless $query;
|
|
|
|
return if $query =~ m/=\s*\(\s*SELECT /i;
|
|
|
|
$query =~ s{
|
|
\A.*?
|
|
update(?:\s+(?:low_priority|ignore))?\s+(.*?)
|
|
\s+set\b(.*?)
|
|
(?:\s*where\b(.*?))?
|
|
(limit\s*[0-9]+(?:\s*,\s*[0-9]+)?)?
|
|
\Z
|
|
}
|
|
{__update_to_select($1, $2, $3, $4)}exsi
|
|
|| $query =~ s{
|
|
\A.*?
|
|
(?:insert(?:\s+ignore)?|replace)\s+
|
|
.*?\binto\b(.*?)\(([^\)]+)\)\s*
|
|
values?\s*(\(.*?\))\s*
|
|
(?:\blimit\b|on\s+duplicate\s+key.*)?\s*
|
|
\Z
|
|
}
|
|
{__insert_to_select($1, $2, $3)}exsi
|
|
|| $query =~ s{
|
|
\A.*?
|
|
(?:insert(?:\s+ignore)?|replace)\s+
|
|
(?:.*?\binto)\b(.*?)\s*
|
|
set\s+(.*?)\s*
|
|
(?:\blimit\b|on\s+duplicate\s+key.*)?\s*
|
|
\Z
|
|
}
|
|
{__insert_to_select_with_set($1, $2)}exsi
|
|
|| $query =~ s{
|
|
\A.*?
|
|
delete\s+(.*?)
|
|
\bfrom\b(.*)
|
|
\Z
|
|
}
|
|
{__delete_to_select($1, $2)}exsi;
|
|
$query =~ s/\s*on\s+duplicate\s+key\s+update.*\Z//si;
|
|
$query =~ s/\A.*?(?=\bSELECT\s*\b)//ism;
|
|
return $query;
|
|
}
|
|
|
|
sub convert_select_list {
|
|
my ( $self, $query ) = @_;
|
|
$query =~ s{
|
|
\A\s*select(.*?)\bfrom\b
|
|
}
|
|
{$1 =~ m/\*/ ? "select 1 from" : "select isnull(coalesce($1)) from"}exi;
|
|
return $query;
|
|
}
|
|
|
|
sub __delete_to_select {
|
|
my ( $delete, $join ) = @_;
|
|
if ( $join =~ m/\bjoin\b/ ) {
|
|
return "select 1 from $join";
|
|
}
|
|
return "select * from $join";
|
|
}
|
|
|
|
sub __insert_to_select {
|
|
my ( $tbl, $cols, $vals ) = @_;
|
|
PTDEBUG && _d('Args:', @_);
|
|
my @cols = split(/,/, $cols);
|
|
PTDEBUG && _d('Cols:', @cols);
|
|
$vals =~ s/^\(|\)$//g; # Strip leading/trailing parens
|
|
my @vals = $vals =~ m/($quote_re|[^,]*${bal}[^,]*|[^,]+)/g;
|
|
PTDEBUG && _d('Vals:', @vals);
|
|
if ( @cols == @vals ) {
|
|
return "select * from $tbl where "
|
|
. join(' and ', map { "$cols[$_]=$vals[$_]" } (0..$#cols));
|
|
}
|
|
else {
|
|
return "select * from $tbl limit 1";
|
|
}
|
|
}
|
|
|
|
sub __insert_to_select_with_set {
|
|
my ( $from, $set ) = @_;
|
|
$set =~ s/,/ and /g;
|
|
return "select * from $from where $set ";
|
|
}
|
|
|
|
sub __update_to_select {
|
|
my ( $from, $set, $where, $limit ) = @_;
|
|
return "select $set from $from "
|
|
. ( $where ? "where $where" : '' )
|
|
. ( $limit ? " $limit " : '' );
|
|
}
|
|
|
|
sub wrap_in_derived {
|
|
my ( $self, $query ) = @_;
|
|
return unless $query;
|
|
return $query =~ m/\A\s*select/i
|
|
? "select 1 from ($query) as x limit 1"
|
|
: $query;
|
|
}
|
|
|
|
sub _d {
|
|
my ($package, undef, $line) = caller 0;
|
|
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
|
map { defined $_ ? $_ : 'undef' }
|
|
@_;
|
|
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
|
}
|
|
|
|
1;
|
|
}
|
|
# ###########################################################################
|
|
# End QueryRewriter package
|
|
# ###########################################################################
|
|
|
|
# ###########################################################################
|
|
# This is a combination of modules and programs in one -- a runnable module.
|
|
# http://www.perl.com/pub/a/2006/07/13/lightning-articles.html?page=last
|
|
# Or, look it up in the Camel book on pages 642 and 643 in the 3rd edition.
|
|
#
|
|
# Check at the end of this package for the call to main() which actually runs
|
|
# the program.
|
|
# ###########################################################################
|
|
package pt_fingerprint;
|
|
|
|
use English qw(-no_match_vars);
|
|
use Data::Dumper;
|
|
$Data::Dumper::Indent = 1;
|
|
$OUTPUT_AUTOFLUSH = 1;
|
|
|
|
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
|
|
|
sub main {
|
|
@ARGV = @_; # set global ARGV for this package
|
|
|
|
# ##########################################################################
|
|
# Get configuration information.
|
|
# ##########################################################################
|
|
my $o = new OptionParser();
|
|
$o->get_specs();
|
|
$o->get_opts();
|
|
$o->usage_or_errors();
|
|
|
|
my $qp = new QueryParser();
|
|
my $qr = new QueryRewriter(
|
|
QueryParser => $qp,
|
|
match_md5_checksums => $o->get('match-md5-checksums'),
|
|
match_embedded_numbers => $o->get('match-embedded-numbers'),
|
|
);
|
|
|
|
if ( $o->got('query') ) {
|
|
print $qr->fingerprint($o->get('query')), "\n";
|
|
}
|
|
else {
|
|
local $INPUT_RECORD_SEPARATOR = ";\n";
|
|
while ( <> ) {
|
|
my $query = $_;
|
|
chomp $query;
|
|
$query =~ s/^#.+$//mg;
|
|
$query =~ s/^\s+//;
|
|
next unless $query =~ m/^\w/;
|
|
print $qr->fingerprint($query), "\n";
|
|
}
|
|
}
|
|
}
|
|
|
|
# ############################################################################
|
|
# Run the program.
|
|
# ############################################################################
|
|
if ( !caller ) { exit main(@ARGV); }
|
|
|
|
1; # Because this is a module as well as a script.
|
|
|
|
# #############################################################################
|
|
# Documentation.
|
|
# #############################################################################
|
|
|
|
=pod
|
|
|
|
=head1 NAME
|
|
|
|
pt-fingerprint - Convert queries into fingerprints.
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
Usage: pt-fingerprint [OPTIONS] [FILES]
|
|
|
|
pt-fingerprint converts queries into fingerprints. With the --query
|
|
option, converts the option's value into a fingerprint. With no options, treats
|
|
command-line arguments as FILEs and reads and converts semicolon-separated
|
|
queries from the FILEs. When FILE is -, it read standard input.
|
|
|
|
Convert a single query:
|
|
|
|
pt-fingerprint --query "select a, b, c from users where id = 500"
|
|
|
|
Convert a file full of queries:
|
|
|
|
pt-fingerprint /path/to/file.txt
|
|
|
|
=head1 RISKS
|
|
|
|
Percona Toolkit is mature, proven in the real world, and well tested,
|
|
but all database tools can pose a risk to the system and the database
|
|
server. Before using this tool, please:
|
|
|
|
=over
|
|
|
|
=item * Read the tool's documentation
|
|
|
|
=item * Review the tool's known L<"BUGS">
|
|
|
|
=item * Test the tool on a non-production server
|
|
|
|
=item * Backup your production server and verify the backups
|
|
|
|
=back
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
A query fingerprint is the abstracted form of a query, which makes it possible
|
|
to group similar queries together. Abstracting a query removes literal values,
|
|
normalizes whitespace, and so on. For example, consider these two queries:
|
|
|
|
SELECT name, password FROM user WHERE id='12823';
|
|
select name, password from user
|
|
where id=5;
|
|
|
|
Both of those queries will fingerprint to
|
|
|
|
select name, password from user where id=?
|
|
|
|
Once the query's fingerprint is known, we can then talk about a query as though
|
|
it represents all similar queries.
|
|
|
|
Query fingerprinting accommodates a great many special cases, which have proven
|
|
necessary in the real world. For example, an IN list with 5 literals is really
|
|
equivalent to one with 4 literals, so lists of literals are collapsed to a
|
|
single one. If you want to understand more about how and why all of these cases
|
|
are handled, please review the test cases in the Github repository. If you
|
|
find something that is not fingerprinted properly, please submit a bug report
|
|
with a reproducible test case. Here is a list of transformations during
|
|
fingerprinting, which might not be exhaustive:
|
|
|
|
=over
|
|
|
|
=item *
|
|
|
|
Group all SELECT queries from mysqldump together, even if they are against
|
|
different tables. Ditto for all of pt-table-checksum's checksum queries.
|
|
|
|
=item *
|
|
|
|
Shorten multi-value INSERT statements to a single VALUES() list.
|
|
|
|
=item *
|
|
|
|
Strip comments.
|
|
|
|
=item *
|
|
|
|
Abstract the databases in USE statements, so all USE statements are grouped
|
|
together.
|
|
|
|
=item *
|
|
|
|
Replace all literals, such as quoted strings. For efficiency, the code that
|
|
replaces literal numbers is somewhat non-selective, and might replace some
|
|
things as numbers when they really are not. Hexadecimal literals are also
|
|
replaced. NULL is treated as a literal. Numbers embedded in identifiers are
|
|
also replaced, so tables named similarly will be fingerprinted to the same
|
|
values (e.g. users_2009 and users_2010 will fingerprint identically).
|
|
|
|
=item *
|
|
|
|
Collapse all whitespace into a single space.
|
|
|
|
=item *
|
|
|
|
Lowercase the entire query.
|
|
|
|
=item *
|
|
|
|
Replace all literals inside of IN() and VALUES() lists with a single
|
|
placeholder, regardless of cardinality.
|
|
|
|
=item *
|
|
|
|
Collapse multiple identical UNION queries into a single one.
|
|
|
|
=back
|
|
|
|
=head1 OPTIONS
|
|
|
|
This tool accepts additional command-line arguments. Refer to the
|
|
L<"SYNOPSIS"> and usage information for details.
|
|
|
|
=over
|
|
|
|
=item --[no]buffer-stdout
|
|
|
|
default: yes
|
|
|
|
Enables STDOUT buffering. Disable this option if you want to see live progress
|
|
when using output post-processing tools such as C<tee> or C<kubectl logs>.
|
|
|
|
=item --config
|
|
|
|
type: Array
|
|
|
|
Read this comma-separated list of config files; if specified, this must be the
|
|
first option on the command line.
|
|
|
|
=item --help
|
|
|
|
Show help and exit.
|
|
|
|
=item --match-embedded-numbers
|
|
|
|
Match numbers embedded in words and replace as single values. This option
|
|
causes the tool to be more careful about matching numbers so that words
|
|
with numbers, like C<catch22> are matched and replaced as a single C<?>
|
|
placeholder. Otherwise the default number matching pattern will replace
|
|
C<catch22> as C<catch?>.
|
|
|
|
This is helpful if database or table names contain numbers.
|
|
|
|
=item --match-md5-checksums
|
|
|
|
Match MD5 checksums and replace as single values. This option causes
|
|
the tool to be more careful about matching numbers so that MD5 checksums
|
|
like C<fbc5e685a5d3d45aa1d0347fdb7c4d35> are matched and replaced as a
|
|
single C<?> placeholder. Otherwise, the default number matching pattern will
|
|
replace C<fbc5e685a5d3d45aa1d0347fdb7c4d35> as C<fbc?>.
|
|
|
|
=item --query
|
|
|
|
type: string
|
|
|
|
The query to convert into a fingerprint.
|
|
|
|
=item --version
|
|
|
|
Show version and exit.
|
|
|
|
=back
|
|
|
|
=head1 ENVIRONMENT
|
|
|
|
The environment variable C<PTDEBUG> enables verbose debugging output to STDERR.
|
|
To enable debugging and capture all output to a file, run the tool like:
|
|
|
|
PTDEBUG=1 pt-fingerprint ... > FILE 2>&1
|
|
|
|
Be careful: debugging output is voluminous and can generate several megabytes
|
|
of output.
|
|
|
|
=head1 ATTENTION
|
|
|
|
Using <PTDEBUG> might expose passwords. When debug is enabled, all command line
|
|
parameters are shown in the output.
|
|
|
|
=head1 SYSTEM REQUIREMENTS
|
|
|
|
You need Perl, DBI, DBD::mysql, and some core packages that ought to be
|
|
installed in any reasonably new version of Perl.
|
|
|
|
=head1 BUGS
|
|
|
|
For a list of known bugs, see L<https://jira.percona.com/projects/PT/issues>.
|
|
|
|
Please report bugs at L<https://jira.percona.com/projects/PT>.
|
|
Include the following information in your bug report:
|
|
|
|
=over
|
|
|
|
=item * Complete command-line used to run the tool
|
|
|
|
=item * Tool L<"--version">
|
|
|
|
=item * MySQL version of all servers involved
|
|
|
|
=item * Output from the tool including STDERR
|
|
|
|
=item * Input files (log/dump/config files, etc.)
|
|
|
|
=back
|
|
|
|
If possible, include debugging output by running the tool with C<PTDEBUG>;
|
|
see L<"ENVIRONMENT">.
|
|
|
|
=head1 DOWNLOADING
|
|
|
|
Visit L<http://www.percona.com/software/percona-toolkit/> to download the
|
|
latest release of Percona Toolkit. Or, get the latest release from the
|
|
command line:
|
|
|
|
wget percona.com/get/percona-toolkit.tar.gz
|
|
|
|
wget percona.com/get/percona-toolkit.rpm
|
|
|
|
wget percona.com/get/percona-toolkit.deb
|
|
|
|
You can also get individual tools from the latest release:
|
|
|
|
wget percona.com/get/TOOL
|
|
|
|
Replace C<TOOL> with the name of any tool.
|
|
|
|
=head1 AUTHORS
|
|
|
|
Baron Schwartz and Daniel Nichter
|
|
|
|
=head1 ABOUT PERCONA TOOLKIT
|
|
|
|
This tool is part of Percona Toolkit, a collection of advanced command-line
|
|
tools for MySQL developed by Percona. Percona Toolkit was forked from two
|
|
projects in June, 2011: Maatkit and Aspersa. Those projects were created by
|
|
Baron Schwartz and primarily developed by him and Daniel Nichter. Visit
|
|
L<http://www.percona.com/software/> to learn about other free, open-source
|
|
software from Percona.
|
|
|
|
=head1 COPYRIGHT, LICENSE, AND WARRANTY
|
|
|
|
This program is copyright 2011-2024 Percona LLC and/or its affiliates.
|
|
|
|
THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
|
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
|
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
|
|
This program is free software; you can redistribute it and/or modify it under
|
|
the terms of the GNU General Public License as published by the Free Software
|
|
Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
|
systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
|
licenses.
|
|
|
|
You should have received a copy of the GNU General Public License along with
|
|
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
|
Place, Suite 330, Boston, MA 02111-1307 USA.
|
|
|
|
=head1 VERSION
|
|
|
|
pt-fingerprint 3.7.0
|
|
|
|
=cut
|