#!/usr/bin/env perl use strict; use warnings FATAL => 'all'; use English qw(-no_match_vars); use constant MKDEBUG => $ENV{MKDEBUG} || 0; use Data::Dumper; $Data::Dumper::Indent = 1; $Data::Dumper::Sortkeys = 1; $Data::Dumper::Quotekeys = 0; my @tool_files = @ARGV; if ( !@tool_files ) { die "Usage: $PROGRAM_NAME [TOOL...]\n"; } my $exit_status = 0; # 0 no problems, 1 any problems my $tool_file; # bin/pt-archiver my $tool_name; # pt-archiver (no path) my $tool_type; # perl or bash my @check_subs = (qw( check_alpha_order check_module_usage check_pod_header_order check_pod_formatting check_pod_links check_option_usage check_option_types check_option_typos )); TOOL: while ( defined($tool_file = shift @ARGV) ) { my $fh; eval { open $fh, "<", $tool_file; }; if ( $EVAL_ERROR ) { $exit_status = 1; warn "Cannot open $tool_file: $OS_ERROR"; next TOOL; } # This make `bin/$ ../util/check-tool *' if . isn't in PATH. if ( $tool_file !~ m{/} ) { $tool_file = "./$tool_file"; } ($tool_name) = $tool_file =~ m/([a-z-]+)$/; if ( !$tool_name ) { $exit_status = 1; warn "Cannot parse tool name from $tool_file"; next TOOL; } $tool_type = get_tool_type($tool_file); if ( !$tool_type ) { $exit_status = 1; warn "Cannot determine if $tool_name is Perl or Bash; assuming Perl"; $tool_type = 'perl'; } print '# ', ('#' x (70 - length $tool_name)), " $tool_name\n"; foreach my $check_sub ( @check_subs ) { seek $fh, 0, 0; print "# $check_sub ", ('#' x (70 - length $check_sub)), "\n"; my $sub = \&$check_sub; eval { &$sub($fh); }; if ( $EVAL_ERROR ) { $exit_status = 1; warn "Error while checking $tool_name: $EVAL_ERROR"; } } print "\n\n"; } exit $exit_status; # ############################################################################ # Subroutines # ############################################################################ sub get_tool_type { my ($file) = @_; return unless $file; my $head = `head -n 1 $file`; return unless $head; return 'bash' if $head =~ m/bash|sh/; return 'perl' if $head =~ m/perl/; return; } # Check that options in the tool's POD are in alphabetical order. Only the # head1 OPTIONS section and any head2 subsections are checked. Subsections # are considered independent. The "[no]" part of negatable options is not # part of the base option name, so it's ignored. sub check_alpha_order { my ($fh) = @_; local $INPUT_RECORD_SEPARATOR = ''; my $para; my $section; while ( $para = <$fh> ) { last if ($section) = $para =~ m/^=head1 (OPTIONS)/; } die "Cannot find =head1 OPTIONS" unless $section; parse_options($fh, $section); return; } sub parse_options { my ($fh, $section) = @_; my $para; my @opts; while ( $para = <$fh> ) { last if $para =~ m/^=head1/; if ( my ($option) = $para =~ m/^=item --(?:\[no\])?(.*)/ ) { push @opts, $option; } elsif ( $para =~ m/^=head2 (.+)/ ) { parse_options($fh, $1); } } my $fmt = "%-20s %-20s\n"; my @sorted = sort @opts; for my $i ( 0..$#sorted ) { if ( $opts[$i] ne $sorted[$i] ) { $exit_status = 1; printf "$tool_name has unsorted options in $section\n"; printf $fmt, 'ACTUAL', 'CORRECT'; printf $fmt, '=' x 20, '=' x 20; map { printf $fmt, $opts[$_], $sorted[$_] } grep { $opts[$_] ne $sorted[$_] } ($i..$#sorted); last; } } return; } sub check_module_usage { my ($fh) = @_; if ( $tool_type ne 'perl' ) { print "Not a Perl tool\n"; return; } # These modules are not instantiated as objects. my %not_obj = ( Transformers => 1, ); # Many tools dyanmically instantiate objs like $plugin="WatchStatus", # $plugin->new(). So this script can't detect that. my %dynamic = ( 'pt-query-digest' => { TcpdumpParser => 1, MySQLProtocolParser => 1, PgLogParser => 1, SlowLogParser => 1, MemcachedProtocolParser => 1, MemcachedEvent => 1, BinaryLogParser => 1, GeneralLogParser => 1, ProtocolParser => 1, HTTPProtocolParser => 1, }, 'pt-table-sync' => { TableSyncStream => 1, TableSyncChunk => 1, TableSyncNibble => 1, TableSyncGroupBy => 1, }, 'pt-table-usage' => { SlowLogParser => 1, }, ); # If these base-class modules are present, they should be accompanied # by a subclass. my %base_class = ( 'AdvisorRules' => [ qw(VariableAdvisorRules) ], ); # Nearly every tool has or needs these modules. my %ignore = ( OptionParser => 1, DSNParser => 1, ); my $contents = do { local $/ = undef; <$fh> }; my %uses = map { $_ => 1 } $contents =~ m/new ([A-Z]\w+)(?:\(|;)/gm; my @unused = grep { my $module = $_; my $unused = 0; if ( $not_obj{$module} ) { # Transformers->import chomp(my $i = `grep -c '${_}->import' $tool_file`); $unused = 1 unless $i; } elsif ( $dynamic{$tool_name}->{$module} ) { # Can't detect these. } elsif ( $base_class{$module} ) { $unused = 1 unless grep { $uses{$_} } @{$base_class{$module}}; } else { $unused = 1 unless $uses{$module}; } $unused; } grep { !$ignore{$_} } $contents =~ m/^# (\w+) package$/gm; if ( @unused ) { print "$tool_name has unused modules:\n" . join('', map { "\t$_\n" } @unused); $exit_status = 1; } return; } sub check_option_types { my ($fh) = @_; if ( $tool_type ne 'perl' ) { print "Not a Perl tool\n"; return; } # Standard options: http://code.google.com/p/maatkit/wiki/CommandLineOptions my $sop = { 'defaults-file' => {type => 's', short => 'F' }, 'host' => {type => 's', short => 'h' }, 'password' => {type => 's', short => 'p' }, 'port' => {type => 'i', short => 'P' }, 'socket' => {type => 's', short => 'S' }, 'user' => {type => 's', short => 'u' }, 'charset' => {type => 's', short => 'A' }, 'ask-pass' => {type => '', short => '', }, 'database' => {type => 's', short => 'D' }, 'set-vars' => {type => 's', short => '', }, 'where' => {type => 's', short => '', }, 'databases' => {type => 'h', short => 'd' }, 'tables' => {type => 'h', short => 't' }, 'columns' => {type => 'a', short => 'c' }, 'engines' => {type => 'h', short => 'e' }, 'ignore-databases'=> {type => 'H', short => '', }, 'ignore-tables' => {type => 'H', short => '', }, 'ignore-columns' => {type => 'H', short => '', }, 'ignore-engines' => {type => 'H', short => '', }, 'config' => {type => 'A', short => '', }, 'daemonize' => {type => '', short => '', }, 'dry-run' => {type => '', short => '', }, 'log' => {type => 's', short => '', }, 'pid' => {type => 's', short => '', }, # --progress is not standard. Some older tools had their own special # progress, whereas newer tools use Progress. # 'progress' => {type => 'a', short => '', }, 'quiet' => {type => '', short => 'q' }, 'sentinel' => {type => 's', short => '', }, 'stop' => {type => '', short => '', }, 'run-time' => {type => 'm', short => '', }, 'threads' => {type => 'i', short => '', }, 'verbose' => {type => '', short => 'v' }, 'wait' => {type => 'm', short => 'w' }, 'recurse' => {type => 'i', short => '' }, }; # Exceptions are inevitable. E.g., pt-deadlock-logger --columns is not the # standard filter --columns (e.g. same family as --databases, --tables, etc.) # These exceptions are good candidates for change, so our standard options # really are standard across all tools. my $exception = { 'pt-deadlock-logger' => { # not standard filter columns => { type => 'h', short => '', }, }, 'pt-checksum-filter' => { # not standard filter 'ignore-databases' => { type => '', short => '', }, }, }; my $help = `$tool_file --help`; # Options are listed after the line "Options:" (default group). # Each line is like: # --defaults-file=s -F Only read mysql options from the given file # The short form (-F) is optional. The list is terminated at # the line "Option types:". Problem is: there's a second list # of options. The second list shows each option's value. So # we stop parsing when we get an option that we already have. my $opt = {}; while ( $help =~ m/^\s{2,}--(\S+?)(?:=(.))?\s+(?:-([a-zA-Z]))?\s+\w+/mg ) { my ($long, $type, $short) = ($1, $2, $3); die "Failed to parse $help" unless $long; last if $opt->{$long}; $opt->{$long} = 1; if ( $sop->{$long} ) { # Check option type. my $expected_type = $sop->{$long}->{type}; $expected_type = $exception->{$tool_name}->{$long}->{type} if exists $exception->{$tool_name}->{$long}->{type}; $expected_type = '' unless defined $expected_type; if ( ($type || '') ne $expected_type ) { $exit_status = 1; print "$tool_name --$long " . ($type ? "is type $type" : "has no type") . " but should " . ($expected_type ? "be type $expected_type" : "have no type") . "\n"; } # Check short form. my $expected_short = $sop->{$long}->{short}; $expected_short = $exception->{$tool_name}->{$long}->{short} if exists $exception->{$tool_name}->{$long}->{short}; $expected_short = '' unless defined $expected_short; if ( ($short || '') ne $expected_short ) { print "$tool_name --$long " . ($short ? "has short form -$short" : "has no short form") . " but should have " . ($expected_short ? "short form -$expected_short" : "no short form") . "\n"; } } } return; } # Check that the POD headers are in standard order. Only major, required # headers are checked. For example, there maybe be other headers between # DESCRIPTION and OPTIONS, but these are ignored. sub check_pod_header_order { my @std_hdrs = ( 'NAME', 'SYNOPSIS', 'RISKS', 'DESCRIPTION', 'OPTIONS', 'ENVIRONMENT', 'SYSTEM REQUIREMENTS', 'BUGS', 'DOWNLOADING', 'AUTHORS', 'COPYRIGHT, LICENSE, AND WARRANTY', 'VERSION', ); my @hdrs; foreach my $hdr ( split(/\n/, `grep '^=head1' $tool_file`) ) { $hdr =~ s/^=head1 //; if ( $hdr =~ s/\s+$// ) { print "Extra space after $hdr\n"; } push @hdrs, $hdr if grep { $hdr eq $_ } @std_hdrs; } my $fmt = "%-32s %-32s\n"; for my $i ( 0..$#std_hdrs ) { if ( ($hdrs[$i] || '') ne $std_hdrs[$i] ) { $exit_status = 1; print "$tool_name has missing or out-of-order standard headers:\n"; printf $fmt, 'ACTUAL', 'CORRECT'; printf $fmt, '=' x 32, '=' x 32; map { printf $fmt, ($hdrs[$_] || ''), $std_hdrs[$_] } grep { ($hdrs[$_] || '') ne $std_hdrs[$_] } ($i..$#std_hdrs); last; } } return; } sub check_pod_formatting { my ($fh) = @_; my $output = `perldoc -T $tool_file 2>&1`; # unlike() will print the whole POD if this fails; ok() is more terse. if ( $output =~ m/can't break/ ) { $exit_status = 1; print "$tool_name POD has lines that are too long\n"; } chomp(my $podchecker = `which podchecker`); if ( $podchecker ) { $output = `$podchecker $tool_file 2>&1`; if ( $output !~ m/pod syntax OK/ ) { $exit_status = 1; print "$output\n"; } } return; } sub check_option_usage { my ($fh) = @_; if ( $tool_type ne 'perl' ) { print "Not a Perl tool\n"; return; } # help and version are special opts, intrinsic to OptionParser. # The other opts are usually processed all at once by calling # DSNParser::parse_options(). my %ignore = qw( version 1 help 1 charset 1 defaults-file 1 host 1 password 1 port 1 socket 1 user 1 ); my $contents = do { local $/ = undef; <$fh> }; my @options = grep { !$ignore{$_} } map { s/^\[no\]//; $_; } $contents =~ m/^=item --(.+)$/gm; my @unused = grep { chomp(my $get = `grep -c "get('$_')" $tool_file`); chomp(my $got = `grep -c "got('$_')" $tool_file`); $get ||= 0; $got ||= 0; my $used = $get + $got; !$used; } @options; if ( @unused ) { $exit_status = 1; print "$tool_name has unused options:\n" . join('', map { "\t--$_\n" } @unused); } (my $pkg = $tool_name) =~ s/-/_/g; my $main = `grep -A 1000 $pkg $tool_file`; if ( $main !~ m/->parse_options\(/ ) { $exit_status = 1; print "$tool_name does not call DSNParser::parse_options()\n"; } return; } sub check_option_typos { my ($fh) = @_; my %ops = map { $_=>1 } split /\n/, `awk '/^=item --/ {print \$2}' $tool_file`; my $len = `wc -l $tool_file`; my $doc = `grep '^=pod' -A $len`; while ( $doc =~ m/(--[a-z]+[a-z-]+)/sg ) { my $op = $1; my $nop = $op; $nop =~ s/^--no-/--[no]/; if ( !$ops{$op} && !$ops{$nop} ) { print "Unknown option in documentation: $op\n" } } } sub check_pod_links { my $offset = `cat $tool_file | grep '^=head1 NAME' --byte-offset | cut -d ':' -f 1`; if ( !$offset ) { warn "Cannot find '^=head1 NAME' in $tool_file"; return; } chomp $offset; my $pod = `tail -c +$offset $tool_file`; if ( !$pod ) { warn "Failed to parse POD from $tool_file"; return; } my @links_in_lit = $pod =~ m/^([ ]+.*L<.+)$/mg; if ( @links_in_lit ) { print "$tool_name has POD links in literal blocks:\n"; foreach my $line ( @links_in_lit ) { print "$line\n"; } } }