Files
percona-toolkit/util/check-tool
Viktor Szépe 2bd40d8c39 Remove trailing spaces (#665)
* Remove trailing spaces

* PR-665 -  Remove trailing spaces

- Updated not stable test t/pt-online-schema-change/preserve_triggers.t
- Updated utilities in bin directory

* PR-665 -  Remove trailing spaces

- Fixed typos

* PR-665 -  Remove trailing spaces

- Fixed typos

---------

Co-authored-by: Sveta Smirnova <sveta.smirnova@percona.com>
2023-09-06 01:15:12 +03:00

502 lines
15 KiB
Perl
Executable File

#!/usr/bin/env perl
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Quotekeys = 0;
my @tool_files = @ARGV;
if ( !@tool_files ) {
die "Usage: $PROGRAM_NAME [TOOL...]\n";
}
my $exit_status = 0; # 0 no problems, 1 any problems
my $tool_file; # bin/pt-archiver
my $tool_name; # pt-archiver (no path)
my $tool_type; # perl or bash
my @check_subs = (qw(
check_alpha_order
check_module_usage
check_pod_header_order
check_pod_formatting
check_pod_links
check_option_usage
check_option_types
check_option_typos
));
TOOL:
while ( defined($tool_file = shift @ARGV) ) {
my $fh;
eval {
open $fh, "<", $tool_file;
};
if ( $EVAL_ERROR ) {
$exit_status = 1;
warn "Cannot open $tool_file: $OS_ERROR";
next TOOL;
}
# This make `bin/$ ../util/check-tool *' if . isn't in PATH.
if ( $tool_file !~ m{/} ) {
$tool_file = "./$tool_file";
}
($tool_name) = $tool_file =~ m/([a-z-]+)$/;
if ( !$tool_name ) {
$exit_status = 1;
warn "Cannot parse tool name from $tool_file";
next TOOL;
}
$tool_type = get_tool_type($tool_file);
if ( !$tool_type ) {
$exit_status = 1;
warn "Cannot determine if $tool_name is Perl or Bash; assuming Perl";
$tool_type = 'perl';
}
print '# ', ('#' x (70 - length $tool_name)), " $tool_name\n";
foreach my $check_sub ( @check_subs ) {
seek $fh, 0, 0;
print "# $check_sub ", ('#' x (70 - length $check_sub)), "\n";
my $sub = \&$check_sub;
eval {
&$sub($fh);
};
if ( $EVAL_ERROR ) {
$exit_status = 1;
warn "Error while checking $tool_name: $EVAL_ERROR";
}
}
print "\n\n";
}
exit $exit_status;
# ############################################################################
# Subroutines
# ############################################################################
sub get_tool_type {
my ($file) = @_;
return unless $file;
my $head = `head -n 1 $file`;
return unless $head;
return 'bash' if $head =~ m/bash|sh/;
return 'perl' if $head =~ m/perl/;
return;
}
# Check that options in the tool's POD are in alphabetical order. Only the
# head1 OPTIONS section and any head2 subsections are checked. Subsections
# are considered independent. The "[no]" part of negatable options is not
# part of the base option name, so it's ignored.
sub check_alpha_order {
my ($fh) = @_;
local $INPUT_RECORD_SEPARATOR = '';
my $para;
my $section;
while ( $para = <$fh> ) {
last if ($section) = $para =~ m/^=head1 (OPTIONS)/;
}
die "Cannot find =head1 OPTIONS" unless $section;
parse_options($fh, $section);
return;
}
sub parse_options {
my ($fh, $section) = @_;
my $para;
my @opts;
while ( $para = <$fh> ) {
last if $para =~ m/^=head1/;
if ( my ($option) = $para =~ m/^=item --(?:\[no\])?(.*)/ ) {
push @opts, $option;
}
elsif ( $para =~ m/^=head2 (.+)/ ) {
parse_options($fh, $1);
}
}
my $fmt = "%-20s %-20s\n";
my @sorted = sort @opts;
for my $i ( 0..$#sorted ) {
if ( $opts[$i] ne $sorted[$i] ) {
$exit_status = 1;
printf "$tool_name has unsorted options in $section\n";
printf $fmt, 'ACTUAL', 'CORRECT';
printf $fmt, '=' x 20, '=' x 20;
map { printf $fmt, $opts[$_], $sorted[$_] }
grep { $opts[$_] ne $sorted[$_] }
($i..$#sorted);
last;
}
}
return;
}
sub check_module_usage {
my ($fh) = @_;
if ( $tool_type ne 'perl' ) {
print "Not a Perl tool\n";
return;
}
# These modules are not instantiated as objects.
my %not_obj = (
Transformers => 1,
);
# Many tools dynamically instantiate objs like $plugin="WatchStatus",
# $plugin->new(). So this script can't detect that.
my %dynamic = (
'pt-query-digest' => {
TcpdumpParser => 1,
MySQLProtocolParser => 1,
PgLogParser => 1,
SlowLogParser => 1,
MemcachedProtocolParser => 1,
MemcachedEvent => 1,
BinaryLogParser => 1,
GeneralLogParser => 1,
ProtocolParser => 1,
HTTPProtocolParser => 1,
},
'pt-table-sync' => {
TableSyncStream => 1,
TableSyncChunk => 1,
TableSyncNibble => 1,
TableSyncGroupBy => 1,
},
'pt-table-usage' => {
SlowLogParser => 1,
},
);
# If these base-class modules are present, they should be accompanied
# by a subclass.
my %base_class = (
'AdvisorRules' => [ qw(VariableAdvisorRules) ],
);
# Nearly every tool has or needs these modules.
my %ignore = (
OptionParser => 1,
DSNParser => 1,
);
my $contents = do { local $/ = undef; <$fh> };
my %uses = map { $_ => 1 } $contents =~ m/new ([A-Z]\w+)(?:\(|;)/gm;
my @unused = grep {
my $module = $_;
my $unused = 0;
if ( $not_obj{$module} ) {
# Transformers->import
chomp(my $i = `grep -c '${_}->import' $tool_file`);
$unused = 1 unless $i;
}
elsif ( $dynamic{$tool_name}->{$module} ) {
# Can't detect these.
}
elsif ( $base_class{$module} ) {
$unused = 1 unless grep { $uses{$_} } @{$base_class{$module}};
}
else {
$unused = 1 unless $uses{$module};
}
$unused;
}
grep { !$ignore{$_} } $contents =~ m/^# (\w+) package$/gm;
if ( @unused ) {
print "$tool_name has unused modules:\n"
. join('', map { "\t$_\n" } @unused);
$exit_status = 1;
}
return;
}
sub check_option_types {
my ($fh) = @_;
if ( $tool_type ne 'perl' ) {
print "Not a Perl tool\n";
return;
}
# Standard options: http://code.google.com/p/maatkit/wiki/CommandLineOptions
my $sop = {
'defaults-file' => {type => 's', short => 'F' },
'host' => {type => 's', short => 'h' },
'password' => {type => 's', short => 'p' },
'port' => {type => 'i', short => 'P' },
'socket' => {type => 's', short => 'S' },
'user' => {type => 's', short => 'u' },
'charset' => {type => 's', short => 'A' },
'ask-pass' => {type => '', short => '', },
'database' => {type => 's', short => 'D' },
'set-vars' => {type => 's', short => '', },
'where' => {type => 's', short => '', },
'databases' => {type => 'h', short => 'd' },
'tables' => {type => 'h', short => 't' },
'columns' => {type => 'a', short => 'c' },
'engines' => {type => 'h', short => 'e' },
'ignore-databases'=> {type => 'H', short => '', },
'ignore-tables' => {type => 'H', short => '', },
'ignore-columns' => {type => 'H', short => '', },
'ignore-engines' => {type => 'H', short => '', },
'config' => {type => 'A', short => '', },
'daemonize' => {type => '', short => '', },
'dry-run' => {type => '', short => '', },
'log' => {type => 's', short => '', },
'pid' => {type => 's', short => '', },
# --progress is not standard. Some older tools had their own special
# progress, whereas newer tools use Progress.
# 'progress' => {type => 'a', short => '', },
'quiet' => {type => '', short => 'q' },
'sentinel' => {type => 's', short => '', },
'stop' => {type => '', short => '', },
'run-time' => {type => 'm', short => '', },
'threads' => {type => 'i', short => '', },
'verbose' => {type => '', short => 'v' },
'wait' => {type => 'm', short => 'w' },
'recurse' => {type => 'i', short => '' },
};
# Exceptions are inevitable. E.g., pt-deadlock-logger --columns is not the
# standard filter --columns (e.g. same family as --databases, --tables, etc.)
# These exceptions are good candidates for change, so our standard options
# really are standard across all tools.
my $exception = {
'pt-deadlock-logger' => { # not standard filter
columns => {
type => 'h',
short => '',
},
},
'pt-checksum-filter' => { # not standard filter
'ignore-databases' => {
type => '',
short => '',
},
},
};
my $help = `$tool_file --help`;
# Options are listed after the line "Options:" (default group).
# Each line is like:
# --defaults-file=s -F Only read mysql options from the given file
# The short form (-F) is optional. The list is terminated at
# the line "Option types:". Problem is: there's a second list
# of options. The second list shows each option's value. So
# we stop parsing when we get an option that we already have.
my $opt = {};
while ( $help =~ m/^\s{2,}--(\S+?)(?:=(.))?\s+(?:-([a-zA-Z]))?\s+\w+/mg ) {
my ($long, $type, $short) = ($1, $2, $3);
die "Failed to parse $help" unless $long;
last if $opt->{$long};
$opt->{$long} = 1;
if ( $sop->{$long} ) {
# Check option type.
my $expected_type = $sop->{$long}->{type};
$expected_type = $exception->{$tool_name}->{$long}->{type}
if exists $exception->{$tool_name}->{$long}->{type};
$expected_type = '' unless defined $expected_type;
if ( ($type || '') ne $expected_type ) {
$exit_status = 1;
print "$tool_name --$long "
. ($type ? "is type $type" : "has no type")
. " but should "
. ($expected_type ? "be type $expected_type"
: "have no type")
. "\n";
}
# Check short form.
my $expected_short = $sop->{$long}->{short};
$expected_short = $exception->{$tool_name}->{$long}->{short}
if exists $exception->{$tool_name}->{$long}->{short};
$expected_short = '' unless defined $expected_short;
if ( ($short || '') ne $expected_short ) {
print "$tool_name --$long "
. ($short ? "has short form -$short" : "has no short form")
. " but should have "
. ($expected_short ? "short form -$expected_short"
: "no short form")
. "\n";
}
}
}
return;
}
# Check that the POD headers are in standard order. Only major, required
# headers are checked. For example, there maybe be other headers between
# DESCRIPTION and OPTIONS, but these are ignored.
sub check_pod_header_order {
my @std_hdrs = (
'NAME',
'SYNOPSIS',
'RISKS',
'DESCRIPTION',
'OPTIONS',
'ENVIRONMENT',
'SYSTEM REQUIREMENTS',
'BUGS',
'DOWNLOADING',
'AUTHORS',
'COPYRIGHT, LICENSE, AND WARRANTY',
'VERSION',
);
my @hdrs;
foreach my $hdr ( split(/\n/, `grep '^=head1' $tool_file`) ) {
$hdr =~ s/^=head1 //;
if ( $hdr =~ s/\s+$// ) {
print "Extra space after $hdr\n";
}
push @hdrs, $hdr if grep { $hdr eq $_ } @std_hdrs;
}
my $fmt = "%-32s %-32s\n";
for my $i ( 0..$#std_hdrs ) {
if ( ($hdrs[$i] || '') ne $std_hdrs[$i] ) {
$exit_status = 1;
print "$tool_name has missing or out-of-order standard headers:\n";
printf $fmt, 'ACTUAL', 'CORRECT';
printf $fmt, '=' x 32, '=' x 32;
map { printf $fmt, ($hdrs[$_] || ''), $std_hdrs[$_] }
grep { ($hdrs[$_] || '') ne $std_hdrs[$_] }
($i..$#std_hdrs);
last;
}
}
return;
}
sub check_pod_formatting {
my ($fh) = @_;
my $output = `perldoc -T $tool_file 2>&1`;
# unlike() will print the whole POD if this fails; ok() is more terse.
if ( $output =~ m/can't break/ ) {
$exit_status = 1;
print "$tool_name POD has lines that are too long\n";
}
chomp(my $podchecker = `which podchecker`);
if ( $podchecker ) {
$output = `$podchecker $tool_file 2>&1`;
if ( $output !~ m/pod syntax OK/ ) {
$exit_status = 1;
print "$output\n";
}
}
return;
}
sub check_option_usage {
my ($fh) = @_;
if ( $tool_type ne 'perl' ) {
print "Not a Perl tool\n";
return;
}
# help and version are special opts, intrinsic to OptionParser.
# The other opts are usually processed all at once by calling
# DSNParser::parse_options().
my %ignore = qw(
version 1
help 1
charset 1
defaults-file 1
host 1
password 1
port 1
socket 1
user 1
);
my $contents = do { local $/ = undef; <$fh> };
my @options = grep { !$ignore{$_} }
map { s/^\[no\]//; $_; }
$contents =~ m/^=item --(.+)$/gm;
my @unused = grep {
chomp(my $get = `grep -c "get('$_')" $tool_file`);
chomp(my $got = `grep -c "got('$_')" $tool_file`);
$get ||= 0;
$got ||= 0;
my $used = $get + $got;
!$used;
} @options;
if ( @unused ) {
$exit_status = 1;
print "$tool_name has unused options:\n"
. join('', map { "\t--$_\n" } @unused);
}
(my $pkg = $tool_name) =~ s/-/_/g;
my $main = `grep -A 1000 $pkg $tool_file`;
if ( $main !~ m/->parse_options\(/ ) {
$exit_status = 1;
print "$tool_name does not call DSNParser::parse_options()\n";
}
return;
}
sub check_option_typos {
my ($fh) = @_;
my %ops = map { $_=>1 } split /\n/, `awk '/^=item --/ {print \$2}' $tool_file`;
my $len = `wc -l $tool_file`;
my $doc = `grep '^=pod' -A $len`;
while ( $doc =~ m/(--[a-z]+[a-z-]+)/sg ) {
my $op = $1;
my $nop = $op;
$nop =~ s/^--no-/--[no]/;
if ( !$ops{$op} && !$ops{$nop} ) {
print "Unknown option in documentation: $op\n"
}
}
}
sub check_pod_links {
my $offset = `cat $tool_file | grep '^=head1 NAME' --byte-offset | cut -d ':' -f 1`;
if ( !$offset ) {
warn "Cannot find '^=head1 NAME' in $tool_file";
return;
}
chomp $offset;
my $pod = `tail -c +$offset $tool_file`;
if ( !$pod ) {
warn "Failed to parse POD from $tool_file";
return;
}
my @links_in_lit = $pod =~ m/^([ ]+.*L<.+)$/mg;
if ( @links_in_lit ) {
print "$tool_name has POD links in literal blocks:\n";
foreach my $line ( @links_in_lit ) {
print "$line\n";
}
}
}