percona-toolkit/lib/QueryReportFormatter.pm

# This program is copyright 2008-2011 Percona Ireland Ltd.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA  02111-1307  USA.
# ###########################################################################
# QueryReportFormatter package
# ###########################################################################
{
# Package: QueryReportFormatter
# QueryReportFormatter is used primarily by mk-query-digest to print reports.
# The main sub is print_reports() which prints the various reports for
# mk-query-digest --report-format.  Each report is produced in a sub of
# the same name; e.g. --report-format=query_report == sub query_report().
# The given ea (<EventAggregator> object) is expected to be "complete"; i.e.
# fully aggregated and $ea->calculate_statistical_metrics() already called.
# Subreports "profile" and "prepared" require the ReportFormatter module,
# which is also in mk-query-digest.
package QueryReportFormatter;

use Lmo;
use English qw(-no_match_vars);
use POSIX qw(floor);

Transformers->import(qw(
   shorten micro_t parse_timestamp unix_timestamp make_checksum percentage_of
   crc32
));

use constant PTDEBUG           => $ENV{PTDEBUG} || 0;
use constant LINE_LENGTH       => 74;
use constant MAX_STRING_LENGTH => 10;

{ local $EVAL_ERROR; eval { require Quoter } };
{ local $EVAL_ERROR; eval { require ReportFormatter } };

# Sub: new
#
# Parameters:
#   %args - Required arguments
#
# Required Arguments:
#   OptionParser  - <OptionParser> object
#   QueryRewriter - <QueryRewriter> object
#   Quoter        - <Quoter> object
#
# Optional arguments:
#   QueryReview     - <QueryReview> object used in <query_report()>
#   dbh             - dbh used in <explain_report()>
#
# Returns:
#   QueryReportFormatter object
has Quoter => (
   is      => 'ro',
   isa     => 'Quoter',
   default => sub { Quoter->new() },
);

has label_width => (
   is      => 'ro',
   isa     => 'Int',
);

has global_headers => (
   is      => 'ro',
   isa     => 'ArrayRef',
   default => sub { [qw(    total min max avg 95% stddev median)] },
);

has event_headers => (
   is      => 'ro',
   isa     => 'ArrayRef',
   default => sub { [qw(pct total min max avg 95% stddev median)] },
);

has show_all => (
   is      => 'ro',
   isa     => 'HashRef',
   default => sub { {} },
);

has ReportFormatter => (
   is      => 'ro',
   isa     => 'ReportFormatter',
   builder => '_build_report_formatter',
);

sub _build_report_formatter {
   return ReportFormatter->new(
      line_width       => LINE_LENGTH,
      extend_right     => 1,
   );
}

sub BUILDARGS {
   my $class = shift;
   my $args  = $class->SUPER::BUILDARGS(@_);

   foreach my $arg ( qw(OptionParser QueryRewriter) ) {
      die "I need a $arg argument" unless $args->{$arg};
   }

   # If ever someone wishes for a wider label width.
   my $label_width = $args->{label_width} ||= 12;
   PTDEBUG && _d('Label width:', $label_width);

   my $o = delete $args->{OptionParser};
   my $self = {
      %$args,
      options        => {
         shorten          => 1024,
         report_all       => $o->get('report-all'),
         report_histogram => $o->get('report-histogram'),
         output           => $o->got('output') ? $o->get('output') : '',
      },
      num_format     => '# %1$-'.$label_width.'s %2$3s %3$7s %4$7s %5$7s %6$7s %7$7s %8$7s %9$7s',
      bool_format    => '# %1$-'.$label_width.'s %2$3d%% yes, %3$3d%% no',
      string_format  => '# %1$-'.$label_width.'s %2$s',
      # MySQL 8 doesn't support partitions.
      # sub explain_report can handle the error but we need to know if /*!50100 PARTITIONS */
      # was used or not, to write the correct report headers
      no_partitions  => 0,
      hidden_attrib  => {   # Don't sort/print these attribs in the reports.
         arg         => 1, # They're usually handled specially, or not
         fingerprint => 1, # printed at all.
         pos_in_log  => 1,
         ts          => 1,
      },
   };
   if (!defined($self->{max_hostname_length})) {
       $self->{max_hostname_length} = MAX_STRING_LENGTH;
   }
   if (!defined($self->{max_line_length})) {
       $self->{max_line_length} = LINE_LENGTH;
   }
   return $self;
}

# Arguments:
#   * reports       arrayref: reports to print
#   * ea            obj: EventAggregator
#   * worst         arrayref: worst items
#   * orderby       scalar: attrib worst items ordered by
#   * groupby       scalar: attrib worst items grouped by
# Optional arguments:
#   * other         arrayref: other items (that didn't make it into top worst)
#   * files         arrayref: files read for input
#   * group         hashref: don't add blank line between these reports
#                            if they appear together
# Prints the given reports (rusage, header (global), query_report, etc.) in
# the given order.  These usually come from mk-query-digest --report-format.
# Most of the required args are for header() and query_report().
sub print_reports {
   my ( $self, %args ) = @_;
   foreach my $arg ( qw(reports ea worst orderby groupby) ) {
      die "I need a $arg argument" unless exists $args{$arg};
   }
   my $reports = $args{reports};
   my $group   = $args{group};
   my $last_report;

   foreach my $report ( @$reports ) {
      PTDEBUG && _d('Printing', $report, 'report');
      my $report_output = $self->$report(%args);
      if ( $report_output ) {
         print "\n"
            if !$last_report || !($group->{$last_report} && $group->{$report});
         print $report_output;
      }
      else {
         PTDEBUG && _d('No', $report, 'report');
      }
      $last_report = $report;
   }

   return;
}

sub rusage {
   my ( $self ) = @_;
   my ( $rss, $vsz, $user, $system ) = ( 0, 0, 0, 0 );
   my $rusage = '';
   eval {
      my $mem = `ps -o rss,vsz -p $PID 2>&1`;
      ( $rss, $vsz ) = $mem =~ m/(\d+)/g;
      ( $user, $system ) = times();
      $rusage = sprintf "# %s user time, %s system time, %s rss, %s vsz\n",
         micro_t( $user,   p_s => 1, p_ms => 1 ),
         micro_t( $system, p_s => 1, p_ms => 1 ),
         shorten( ($rss || 0) * 1_024 ),
         shorten( ($vsz || 0) * 1_024 );
   };
   if ( $EVAL_ERROR ) {
      PTDEBUG && _d($EVAL_ERROR);
   }
   return $rusage ? $rusage : "# Could not get rusage\n";
}

sub date {
   my ( $self ) = @_;
   return "# Current date: " . (scalar localtime) . "\n";
}

sub hostname {
   my ( $self ) = @_;
   my $hostname = `hostname`;
   if ( $hostname ) {
      chomp $hostname;
      return "# Hostname: $hostname\n";
   }
   return;
}

sub files {
   my ( $self, %args ) = @_;
   if ( $args{files} ) {
      return "# Files: " . join(', ', map { $_->{name} } @{$args{files}}) . "\n";
   }
   return;
}

# Arguments:
#   * ea         obj: EventAggregator
#   * orderby    scalar: attrib items ordered by
# Optional arguments:
#   * select     arrayref: attribs to print, mostly for testing
# Print a report about the global statistics in the EventAggregator.
# Formerly called "global_report()."
sub header {
   my ( $self, %args ) = @_;
   foreach my $arg ( qw(ea orderby) ) {
      die "I need a $arg argument" unless defined $args{$arg};
   }
   my $ea      = $args{ea};
   my $orderby = $args{orderby};
   my $results = $ea->results();
   my @result;

   # Get global count
   my $global_cnt = $results->{globals}->{$orderby}->{cnt} || 0;

   # Calculate QPS (queries per second) by looking at the min/max timestamp.
   my ($qps, $conc) = (0, 0);
   if ( $global_cnt && $results->{globals}->{ts}
      && ($results->{globals}->{ts}->{max} || '')
         gt ($results->{globals}->{ts}->{min} || '')
   ) {
      eval {
         my $min  = parse_timestamp($results->{globals}->{ts}->{min});
         my $max  = parse_timestamp($results->{globals}->{ts}->{max});
         my $diff = unix_timestamp($max) - unix_timestamp($min);
         $qps     = $global_cnt / ($diff || 1);
         $conc    = $results->{globals}->{$args{orderby}}->{sum} / $diff;
      };
   }

   # First line
   PTDEBUG && _d('global_cnt:', $global_cnt, 'unique:',
      scalar keys %{$results->{classes}}, 'qps:', $qps, 'conc:', $conc);
   my $line = sprintf(
      '# Overall: %s total, %s unique, %s QPS, %sx concurrency ',
      shorten($global_cnt, d=>1_000),
      shorten(scalar keys %{$results->{classes}}, d=>1_000),
      shorten($qps  || 0, d=>1_000),
      shorten($conc || 0, d=>1_000));
   $line .= ('_' x (LINE_LENGTH - length($line) + $self->label_width() - 12));
   push @result, $line;

   # Second line: time range
   if ( my $ts = $results->{globals}->{ts} ) {
      my $time_range = $self->format_time_range($ts) || "unknown";
      push @result, "# Time range: $time_range";
   }

   # Third line: rate limiting, if any
   if ( $results->{globals}->{rate_limit} ) {
      print "# Rate limits apply\n";
   }

   # Global column headers
   push @result, $self->make_global_header();

   # Sort the attributes, removing any hidden attributes.
   my $attribs = $self->sort_attribs( $ea );

   foreach my $type ( qw(num innodb) ) {
      # Add "InnoDB:" sub-header before grouped InnoDB_* attributes.
      if ( $type eq 'innodb' && @{$attribs->{$type}} ) {
         push @result, "# InnoDB:";
      };

      NUM_ATTRIB:
      foreach my $attrib ( @{$attribs->{$type}} ) {
         next unless exists $results->{globals}->{$attrib};
         my $store   = $results->{globals}->{$attrib};
         my $metrics = $ea->stats()->{globals}->{$attrib};
         my $func    = $attrib =~ m/time|wait$/ ? \&micro_t : \&shorten;
         my @values  = (
            @{$store}{qw(sum min max)},
            $store->{sum} / $store->{cnt},
            @{$metrics}{qw(pct_95 stddev median)},
         );
         @values = map { defined $_ ? $func->($_) : '' } @values;

         push @result,
            sprintf $self->{num_format},
               $self->make_label($attrib), '', @values;
      }
   }

   if ( @{$attribs->{bool}} ) {
      push @result, "# Boolean:";
      my $printed_bools = 0;
      BOOL_ATTRIB:
      foreach my $attrib ( @{$attribs->{bool}} ) {
         next unless exists $results->{globals}->{$attrib};

         my $store = $results->{globals}->{$attrib};
         if ( $store->{sum} > 0 ) {
            push @result,
               sprintf $self->{bool_format},
                  $self->make_label($attrib), $self->bool_percents($store);
            $printed_bools = 1;
         }
      }
      pop @result unless $printed_bools;
   }

   return join("\n", map { s/\s+$//; $_ } @result) . "\n";
}

sub query_report_values {
   my ($self, %args) = @_;
   foreach my $arg ( qw(ea worst orderby groupby) ) {
      die "I need a $arg argument" unless defined $arg;
   }
   my $ea      = $args{ea};
   my $groupby = $args{groupby};
   my $worst   = $args{worst};

   my $q   = $self->Quoter;
   my $qv  = $self->{QueryReview};
   my $qr  = $self->{QueryRewriter};

   my @values;
   # Print each worst item: its stats/metrics (sum/min/max/95%/etc.),
   # Query_time distro chart, tables, EXPLAIN, fingerprint, etc.
   # Items are usually unique queries/fingerprints--depends on how
   # the events were grouped.
   ITEM:
   foreach my $top_event ( @$worst ) {
      my $item       = $top_event->[0];
      my $reason     = $args{explain_why} ? $top_event->[1] : '';
      my $rank       = $top_event->[2];
      my $stats      = $ea->results->{classes}->{$item};
      my $sample     = $ea->results->{samples}->{$item};
      my $samp_query = ($self->{options}->{output} eq 'secure-slowlog') ? $sample->{fingerprint} || '' : $sample->{arg} || '';

      my %item_vals = (
         item       => $item,
         samp_query => $samp_query,
         rank       => ($rank || 0),
         reason     => $reason,
      );

      # ###############################################################
      # Possibly skip item for --review.
      # ###############################################################
      my $review_vals;
      if ( $qv ) {
         $review_vals = $qv->get_review_info($item);
         next ITEM if $review_vals->{reviewed_by} && !$self->{options}->{report_all};
         for my $col ( $qv->review_cols() ) {
            push @{$item_vals{review_vals}}, [$col, $review_vals->{$col}];
         }
      }

      $item_vals{default_db} = $sample->{db}       ? $sample->{db}
                              : $stats->{db}->{unq} ? keys %{$stats->{db}->{unq}}
                              :                       undef;
      $item_vals{tables} = [$self->{QueryParser}->extract_tables(
            query      => $samp_query,
            default_db => $item_vals{default_db},
            Quoter     => $self->Quoter,
         )];

      if ( $samp_query && ($args{variations} && @{$args{variations}}) ) {
         $item_vals{crc} = crc32($samp_query);
      }

      push @values, \%item_vals;
   }
   return \@values;
}

# Arguments:
#   * ea       obj: EventAggregator
#   * worst    arrayref: worst items
#   * orderby  scalar: attrib worst items ordered by
#   * groupby  scalar: attrib worst items grouped by
# Optional arguments:
#   * select       arrayref: attribs to print, mostly for test
#   * explain_why  bool: print reason why item is reported
#   * print_header  bool: "Report grouped by" header
sub query_report {
   my ( $self, %args ) = @_;

   my $ea      = $args{ea};
   my $groupby = $args{groupby};
   my $report_values = $self->query_report_values(%args);

   my $qr  = $self->{QueryRewriter};

   my $report = '';

   if ( $args{print_header} ) {
      $report .= "# " . ( '#' x 72 ) . "\n"
               . "# Report grouped by $groupby\n"
               . '# ' . ( '#' x 72 ) . "\n\n";
   }

   # Sort the attributes, removing any hidden attributes.
   my $attribs = $self->sort_attribs( $ea );

   # Print each worst item: its stats/metrics (sum/min/max/95%/etc.),
   # Query_time distro chart, tables, EXPLAIN, fingerprint, etc.
   # Items are usually unique queries/fingerprints--depends on how
   # the events were grouped.
   ITEM:
   foreach my $vals ( @$report_values ) {
      my $item = $vals->{item};
      # ###############################################################
      # Print the standard query analysis report.
      # ###############################################################
      $report .= "\n" if $vals->{rank} > 1;  # space between each event report
      $report .= $self->event_report(
         %args,
         item    => $item,
         sample  => $ea->results->{samples}->{$item},
         rank    => $vals->{rank},
         reason  => $vals->{reason},
         attribs => $attribs,
         db      => $vals->{default_db},
      );

      if ( $self->{options}->{report_histogram} ) {
         $report .= $self->chart_distro(
            %args,
            attrib => $self->{options}->{report_histogram},
            item   => $vals->{item},
         );
      }

      if ( $vals->{review_vals} ) {
         # Print the review information that is already in the table
         # before putting anything new into the table.
         $report .= "# Review information\n";
         foreach my $elem ( @{$vals->{review_vals}} ) {
            my ($col, $val) = @$elem;
            if ( !$val || $val ne '0000-00-00 00:00:00' ) { # issue 202
               $report .= sprintf "# %13s: %-s\n", $col, ($val ? $val : '');
            }
         }
      }

      my $partitions_msg = $self->{no_partitions} ? '' : '/*!50100 PARTITIONS*/';
      if ( $groupby eq 'fingerprint' ) {
         # Shorten it if necessary (issue 216 and 292).
         my $samp_query = $qr->shorten($vals->{samp_query}, $self->{options}->{shorten})
            if $self->{options}->{shorten};

         # Print query fingerprint.
         PTDEBUG && _d("Fingerprint\n#    $vals->{item}\n");

         # Print tables used by query.
         $report .= $self->tables_report($vals->{tables}, \%args);

         # Print sample (worst) query's CRC % 1_000.  We mod 1_000 because
         # that's actually the value stored in the ea, not the full checksum.
         # So the report will print something like,
         #   # arg crc      685 (2/66%), 159 (1/33%)
         # Thus we want our "CRC" line to be 685 and not 18547302820.
         if ( $vals->{crc} ) {
            $report.= "# CRC " . ($vals->{crc} % 1_000) . "\n";
         }

         my $log_type = $args{log_type} || '';
         my $mark     = $args{no_v_format} ? '' : '\G';

         if ( $item =~ m/^(?:[\(\s]*select|insert|replace)/ ) {
            if ( $item =~ m/^(?:insert|replace)/ ) { # No EXPLAIN
               $report .= "$samp_query${mark}\n";
            }
            else {
               $report .= "# EXPLAIN $partitions_msg\n$samp_query${mark}\n";
               $report .= $self->explain_report($samp_query, $vals->{default_db});
            }
         }
         else {
            $report .= "$samp_query${mark}\n";
            my $converted = $qr->convert_to_select($samp_query);
            if ( $converted
                 && $converted =~ m/^[\(\s]*select/i ) {
               # It converted OK to a SELECT
               $report .= "# Converted for EXPLAIN\n# EXPLAIN $partitions_msg\n$converted${mark}\n";
            }
         }
      }
      else {
         if ( $groupby eq 'tables' ) {
            my ( $db, $tbl ) = $self->Quoter->split_unquote($item);
            $report .= $self->tables_report([ [$db, $tbl] ], \%args);
         }
         $report .= "$item\n";
      }
   }

   return $report;
}

# Arguments:
#   * ea          obj: EventAggregator
#   * item        scalar: Item in ea results
#   * orderby     scalar: attribute that events are ordered by
# Optional arguments:
#   * select      arrayref: attribs to print, mostly for testing
#   * reason      scalar: why this item is being reported (top|outlier)
#   * rank        scalar: item rank among the worst
# Print a report about the statistics in the EventAggregator.
# Called by query_report().
sub event_report_values {
   my ($self, %args) = @_;

   my $ea   = $args{ea};
   my $item = $args{item};
   my $orderby = $args{orderby};
   my $results = $ea->results();

   my %vals;

   # Return unless the item exists in the results (it should).
   my $store = $results->{classes}->{$item};

   return unless $store;

   # Pick the first attribute to get counts
   my $global_cnt = $results->{globals}->{$orderby}->{cnt};
   my $class_cnt  = $store->{$orderby}->{cnt};

   # Calculate QPS (queries per second) by looking at the min/max timestamp.
   my ($qps, $conc) = (0, 0);
   if ( $global_cnt && $store->{ts}
      && ($store->{ts}->{max} || '')
         gt ($store->{ts}->{min} || '')
   ) {
      eval {
         my $min  = parse_timestamp($store->{ts}->{min});
         my $max  = parse_timestamp($store->{ts}->{max});
         my $diff = unix_timestamp($max) - unix_timestamp($min);
         $qps     = $class_cnt / $diff;
         $conc    = $store->{$orderby}->{sum} / $diff;
      };
   }

   $vals{groupby}     = $ea->{groupby};
   $vals{qps}         = $qps  || 0;
   $vals{concurrency} = $conc || 0;
   $vals{checksum}    = make_checksum($item);
   $vals{pos_in_log}  = $results->{samples}->{$item}->{pos_in_log} || 0;
   $vals{reason}      = $args{reason};
   $vals{variance_to_mean} = do {
      my $query_time = $ea->metrics(where => $item, attrib => 'Query_time');
      $query_time->{stddev}**2 / ($query_time->{avg} || 1)
   };

   $vals{counts} = {
      class_cnt        => $class_cnt,
      global_cnt       => $global_cnt,
   };

   if ( my $ts = $store->{ts}) {
      $vals{time_range} = $self->format_time_range($ts) || "unknown";
   }

   # Sort the attributes, removing any hidden attributes, if they're not
   # already given to us.  In mk-query-digest, this sub is called from
   # query_report(), but in testing it's called directly.  query_report()
   # will sort and pass the attribs so they're not for every event.
   my $attribs = $args{attribs};
   if ( !$attribs ) {
      $attribs = $self->sort_attribs( $ea );
   }

   $vals{attributes} = { map { $_ => [] } qw(num innodb bool string) };

   foreach my $type ( qw(num innodb) ) {
      # Add "InnoDB:" sub-header before grouped InnoDB_* attributes.

      NUM_ATTRIB:
      foreach my $attrib ( @{$attribs->{$type}} ) {
         next NUM_ATTRIB unless exists $store->{$attrib};
         my $vals = $store->{$attrib};
         next unless scalar %$vals;

         my $pct;
         my $func    = $attrib =~ m/time|wait$/ ? \&micro_t : \&shorten;
         my $metrics = $ea->stats()->{classes}->{$item}->{$attrib};
         my @values = (
            @{$vals}{qw(sum min max)},
            $vals->{sum} / $vals->{cnt},
            @{$metrics}{qw(pct_95 stddev median)},
         );
         @values = map { defined $_ ? $func->($_) : '' } @values;
         $pct   = percentage_of(
            $vals->{sum}, $results->{globals}->{$attrib}->{sum});

         push @{$vals{attributes}{$type}},
               [ $attrib, $pct, @values ];
      }
   }

   if ( @{$attribs->{bool}} ) {
      BOOL_ATTRIB:
      foreach my $attrib ( @{$attribs->{bool}} ) {
         next BOOL_ATTRIB unless exists $store->{$attrib};
         my $vals = $store->{$attrib};
         next unless scalar %$vals;

         if ( $vals->{sum} > 0 ) {
            push @{$vals{attributes}{bool}},
                  [ $attrib, $self->bool_percents($vals) ];
         }
      }
   }

   if ( @{$attribs->{string}} ) {
      STRING_ATTRIB:
      foreach my $attrib ( @{$attribs->{string}} ) {
         next STRING_ATTRIB unless exists $store->{$attrib};
         my $vals = $store->{$attrib};
         next unless scalar %$vals;

         push @{$vals{attributes}{string}},
               [ $attrib, $vals ];
      }
   }


   return \%vals;
}

# TODO I maybe've broken the groupby report

sub event_report {
   my ( $self, %args ) = @_;
   foreach my $arg ( qw(ea item orderby) ) {
      die "I need a $arg argument" unless defined $args{$arg};
   }

   my $item = $args{item};
   my $val  = $self->event_report_values(%args);
   my @result;

   return "# No such event $item\n" unless $val;

   # First line like:
   # Query 1: 9 QPS, 0x concurrency, ID 0x7F7D57ACDD8A346E at byte 5 ________
   my $line = sprintf(
      '# %s %d: %s QPS, %sx concurrency, ID 0x%s at byte %.f ',
      ($val->{groupby} eq 'fingerprint' ? 'Query' : 'Item'),
      $args{rank} || 0,
      shorten($val->{qps}, d=>1_000),
      shorten($val->{concurrency}, d=>1_000),
      $val->{checksum},
      $val->{pos_in_log},
   );
   my $underscores = LINE_LENGTH - length($line) + $self->label_width() - 12;
   if ( $underscores < 0 ) {
      $underscores = 0;
   }
   $line .= ('_' x $underscores);
   push @result, $line;

   # Second line: reason why this class is being reported.
   if ( $val->{reason} ) {
      push @result,
         "# This item is included in the report because it matches "
            . ($val->{reason} eq 'top' ? '--limit.' : '--outliers.');
   }

   # Third line: Variance-to-mean (V/M) ratio, like:
   # Scores: V/M = 1.5
   push @result,
      sprintf("# Scores: V/M = %.2f", $val->{variance_to_mean} );

   # Time range
   if ( $val->{time_range} ) {
      push @result, "# Time range: $val->{time_range}";
   }

   # Column header line
   push @result, $self->make_event_header();

   # Count line
   push @result,
      sprintf $self->{num_format}, 'Count',
         percentage_of($val->{counts}{class_cnt}, $val->{counts}{global_cnt}),
         $val->{counts}{class_cnt},
         map { '' } (1..8);


   my $attribs = $val->{attributes};

   foreach my $type ( qw(num innodb) ) {
      # Add "InnoDB:" sub-header before grouped InnoDB_* attributes.
      if ( $type eq 'innodb' && @{$attribs->{$type}} ) {
         push @result, "# InnoDB:";
      };

      NUM_ATTRIB:
      foreach my $attrib ( @{$attribs->{$type}} ) {
         my ($attrib_name, @vals) = @$attrib;
         push @result,
            sprintf $self->{num_format},
               $self->make_label($attrib_name), @vals;
      }
   }

   if ( @{$attribs->{bool}} ) {
      push @result, "# Boolean:";
      BOOL_ATTRIB:
      foreach my $attrib ( @{$attribs->{bool}} ) {
         my ($attrib_name, @vals) = @$attrib;
         push @result,
            sprintf $self->{bool_format},
               $self->make_label($attrib_name), @vals;
      }
   }

   if ( @{$attribs->{string}} ) {
      push @result, "# String:";
      STRING_ATTRIB:
      foreach my $attrib ( @{$attribs->{string}} ) {
         my ($attrib_name, $vals) = @$attrib;
         push @result,
            sprintf $self->{string_format},
               $self->make_label($attrib_name),
               $self->format_string_list($attrib_name, $vals, $val->{counts}{class_cnt});
      }
   }


   return join("\n", map { s/\s+$//; $_ } @result) . "\n";
}

# Arguments:
#  * ea      obj: EventAggregator
#  * item    scalar: item in ea results
#  * attrib  scalar: item's attribute to chart
# Creates a chart of value distributions in buckets.  Right now it bucketizes
# into 8 buckets, powers of ten starting with .000001.
sub chart_distro {
   my ( $self, %args ) = @_;
   foreach my $arg ( qw(ea item attrib) ) {
      die "I need a $arg argument" unless defined $args{$arg};
   }
   my $ea     = $args{ea};
   my $item   = $args{item};
   my $attrib = $args{attrib};

   my $results = $ea->results();
   my $store   = $results->{classes}->{$item}->{$attrib};
   my $vals    = $store->{all};
   return "" unless defined $vals && scalar %$vals;

   # TODO: this is broken.
   my @buck_tens = $ea->buckets_of(10);
   my @distro = map { 0 } (0 .. 7);

   # See similar code in EventAggregator::_calc_metrics() or
   # http://code.google.com/p/maatkit/issues/detail?id=866
   my @buckets = map { 0 } (0..999);
   map { $buckets[$_] = $vals->{$_} } keys %$vals;
   $vals = \@buckets;  # repoint vals from given hashref to our array

   map { $distro[$buck_tens[$_]] += $vals->[$_] } (1 .. @$vals - 1);

   my $vals_per_mark; # number of vals represented by 1 #-mark
   my $max_val        = 0;
   my $max_disp_width = 64;
   my $bar_fmt        = "# %5s%s";
   my @distro_labels  = qw(1us 10us 100us 1ms 10ms 100ms 1s 10s+);
   my @results        = "# $attrib distribution";

   # Find the distro with the most values. This will set
   # vals_per_mark and become the bar at max_disp_width.
   foreach my $n_vals ( @distro ) {
      $max_val = $n_vals if $n_vals > $max_val;
   }
   $vals_per_mark = $max_val / $max_disp_width;

   foreach my $i ( 0 .. $#distro ) {
      my $n_vals  = $distro[$i];
      my $n_marks = $n_vals / ($vals_per_mark || 1);

      # Always print at least 1 mark for any bucket that has at least
      # 1 value. This skews the graph a tiny bit, but it allows us to
      # see all buckets that have values.
      $n_marks = 1 if $n_marks < 1 && $n_vals > 0;

      my $bar = ($n_marks ? '  ' : '') . '#' x $n_marks;
      push @results, sprintf $bar_fmt, $distro_labels[$i], $bar;
   }

   return join("\n", @results) . "\n";
}

# Profile subreport (issue 381).
# Arguments:
#   * ea            obj: EventAggregator
#   * worst         arrayref: worst items
#   * groupby       scalar: attrib worst items grouped by
# Optional arguments:
#   * other            arrayref: other items (that didn't make it into top worst)
#   * distill_args     hashref: extra args for distill()
sub profile {
   my ( $self, %args ) = @_;
   foreach my $arg ( qw(ea worst groupby) ) {
      die "I need a $arg argument" unless defined $arg;
   }
   my $ea      = $args{ea};
   my $worst   = $args{worst};
   my $other   = $args{other};
   my $groupby = $args{groupby};

   my $qr  = $self->{QueryRewriter};

   # Total response time of all events.
   my $results = $ea->results();
   my $total_r = $results->{globals}->{Query_time}->{sum} || 0;

   my @profiles;
   foreach my $top_event ( @$worst ) {
      my $item       = $top_event->[0];
      my $rank       = $top_event->[2];
      my $stats      = $ea->results->{classes}->{$item};
      my $sample     = $ea->results->{samples}->{$item};
      my $samp_query = $sample->{arg} || '';
      my $query_time = $ea->metrics(where => $item, attrib => 'Query_time');

      my %profile    = (
         rank   => $rank,
         r      => $stats->{Query_time}->{sum},
         cnt    => $stats->{Query_time}->{cnt},
         sample => $groupby eq 'fingerprint' ?
                    $qr->distill($samp_query, %{$args{distill_args}}) : $item,
         id     => $groupby eq 'fingerprint' ? make_checksum($item)   : '',
         vmr    => ($query_time->{stddev}**2) / ($query_time->{avg} || 1),
      );

      push @profiles, \%profile;
   }

   my $report = $self->ReportFormatter();
   $report->title('Profile');
   my @cols = (
      { name => 'Rank',          right_justify => 1,             },
      { name => 'Query ID',      width => 35                     },
      { name => 'Response time', right_justify => 1,             },
      { name => 'Calls',         right_justify => 1,             },
      { name => 'R/Call',        right_justify => 1,             },
      { name => 'V/M',           right_justify => 1, width => 5, },
      { name => 'Item',                                          },
   );
   $report->set_columns(@cols);

   foreach my $item ( sort { $a->{rank} <=> $b->{rank} } @profiles ) {
      my $rt  = sprintf('%10.4f', $item->{r});
      my $rtp = sprintf('%4.1f%%', $item->{r} / ($total_r || 1) * 100);
      my $rc  = sprintf('%8.4f', $item->{r} / $item->{cnt});
      my $vmr = sprintf('%4.2f', $item->{vmr});
      my @vals = (
         $item->{rank},
         "0x$item->{id}",
         "$rt $rtp",
         $item->{cnt},
         $rc,
         $vmr,
         $item->{sample},
      );
      $report->add_line(@vals);
   }

   # The last line of the profile is for all the other, non-worst items.
   # http://code.google.com/p/maatkit/issues/detail?id=1043
   if ( $other && @$other ) {
      my $misc = {
            r   => 0,
            cnt => 0,
      };
      foreach my $other_event ( @$other ) {
         my $item      = $other_event->[0];
         my $stats     = $ea->results->{classes}->{$item};
         $misc->{r}   += $stats->{Query_time}->{sum};
         $misc->{cnt} += $stats->{Query_time}->{cnt};
      }
      my $rt  = sprintf('%10.4f', $misc->{r});
      my $rtp = sprintf('%4.1f%%', $misc->{r} / ($total_r || 1) * 100);
      my $rc  = sprintf('%8.4f', $misc->{r} / $misc->{cnt});
      $report->add_line(
         "MISC",
         "0xMISC",
         "$rt $rtp",
         $misc->{cnt},
         $rc,
         '0.0',  # variance-to-mean ratio is not meaningful here
         "<".scalar @$other." ITEMS>",
      );
   }

   return $report->get_report();
}

# Prepared statements subreport (issue 740).
# Arguments:
#   * ea            obj: EventAggregator
#   * worst         arrayref: worst items
#   * groupby       scalar: attrib worst items grouped by
# Optional arguments:
#   * distill_args  hashref: extra args for distill()
sub prepared {
   my ( $self, %args ) = @_;
   foreach my $arg ( qw(ea worst groupby) ) {
      die "I need a $arg argument" unless defined $arg;
   }
   my $ea      = $args{ea};
   my $worst   = $args{worst};
   my $groupby = $args{groupby};

   my $qr = $self->{QueryRewriter};

   my @prepared;       # prepared statements
   my %seen_prepared;  # report each PREP-EXEC pair once
   my $total_r = 0;

   foreach my $top_event ( @$worst ) {
      my $item       = $top_event->[0];
      my $rank       = $top_event->[2];
      my $stats      = $ea->results->{classes}->{$item};
      my $sample     = $ea->results->{samples}->{$item};
      my $samp_query = $sample->{arg} || '';

      $total_r += $stats->{Query_time}->{sum};
      next unless $stats->{Statement_id} && $item =~ m/^(?:prepare|execute) /;

      # Each PREPARE (probably) has some EXECUTE and each EXECUTE (should)
      # have some PREPARE.  But these are only the top N events so we can get
      # here a PREPARE but not its EXECUTE or vice-versa.  The prepared
      # statements report requires both so this code gets the missing pair
      # from the ea stats.
      my ($prep_stmt, $prep, $prep_r, $prep_cnt);
      my ($exec_stmt, $exec, $exec_r, $exec_cnt);

      if ( $item =~ m/^prepare / ) {
         $prep_stmt           = $item;
         ($exec_stmt = $item) =~ s/^prepare /execute /;
      }
      else {
         ($prep_stmt = $item) =~ s/^execute /prepare /;
         $exec_stmt           = $item;
      }

      # Report each PREPARE/EXECUTE pair once.
      if ( !$seen_prepared{$prep_stmt}++ ) {
         if ( exists $ea->results->{classes}->{$exec_stmt} ) {
            $exec     = $ea->results->{classes}->{$exec_stmt};
            $exec_r   = $exec->{Query_time}->{sum};
            $exec_cnt = $exec->{Query_time}->{cnt};
         }
         else {
            PTDEBUG && _d('Statement prepared but not executed:', $item);
            $exec_r   = 0;
            $exec_cnt = 0;
         }

         if ( exists $ea->results->{classes}->{$prep_stmt} ) {
            $prep     = $ea->results->{classes}->{$prep_stmt};
            $prep_r   = $prep->{Query_time}->{sum};
            $prep_cnt = scalar keys %{$prep->{Statement_id}->{unq}},
         }
         else {
            PTDEBUG && _d('Statement executed but not prepared:', $item);
            $prep_r   = 0;
            $prep_cnt = 0;
         }

         push @prepared, {
            prep_r   => $prep_r,
            prep_cnt => $prep_cnt,
            exec_r   => $exec_r,
            exec_cnt => $exec_cnt,
            rank     => $rank,
            sample   => $groupby eq 'fingerprint'
                          ? $qr->distill($samp_query, %{$args{distill_args}})
                          : $item,
            id       => $groupby eq 'fingerprint' ? make_checksum($item)
                                                  : '',
         };
      }
   }

   # Return unless there are prepared statements to report.
   return unless scalar @prepared;

   my $report = $self->ReportFormatter();
   $report->title('Prepared statements');
   $report->set_columns(
      { name => 'Rank',          right_justify => 1, },
      { name => 'Query ID',                          },
      { name => 'PREP',          right_justify => 1, },
      { name => 'PREP Response', right_justify => 1, },
      { name => 'EXEC',          right_justify => 1, },
      { name => 'EXEC Response', right_justify => 1, },
      { name => 'Item',                              },
   );

   foreach my $item ( sort { $a->{rank} <=> $b->{rank} } @prepared ) {
      my $exec_rt  = sprintf('%10.4f', $item->{exec_r});
      my $exec_rtp = sprintf('%4.1f%%',$item->{exec_r}/($total_r || 1) * 100);
      my $prep_rt  = sprintf('%10.4f', $item->{prep_r});
      my $prep_rtp = sprintf('%4.1f%%',$item->{prep_r}/($total_r || 1) * 100);
      $report->add_line(
         $item->{rank},
         "0x$item->{id}",
         $item->{prep_cnt} || 0,
         "$prep_rt $prep_rtp",
         $item->{exec_cnt} || 0,
         "$exec_rt $exec_rtp",
         $item->{sample},
      );
   }
   return $report->get_report();
}

sub make_global_header {
   my ( $self ) = @_;
   my @lines;

   # First line:
   # Attribute          total     min     max     avg     95%  stddev  median
   push @lines,
      sprintf $self->{num_format}, "Attribute", '', @{$self->global_headers()};

   # Underline first line:
   # =========        ======= ======= ======= ======= ======= ======= =======
   # The numbers 7, 7, 7, etc. are the field widths from make_header().
   # Hard-coded values aren't ideal but this code rarely changes.
   push @lines,
      sprintf $self->{num_format},
         (map { "=" x $_ } $self->label_width()),
         (map { " " x $_ } qw(3)),  # no pct column in global header
         (map { "=" x $_ } qw(7 7 7 7 7 7 7));

   # End result should be like:
   # Attribute          total     min     max     avg     95%  stddev  median
   # =========        ======= ======= ======= ======= ======= ======= =======
   return @lines;
}

sub make_event_header {
   my ( $self ) = @_;

   # Event headers are all the same so we just make them once.
   return @{$self->{event_header_lines}} if $self->{event_header_lines};

   my @lines;
   push @lines,
      sprintf $self->{num_format}, "Attribute", @{$self->event_headers()};

   # The numbers 6, 7, 7, etc. are the field widths from make_header().
   # Hard-coded values aren't ideal but this code rarely changes.
   push @lines,
      sprintf $self->{num_format},
         map { "=" x $_ } ($self->label_width(), qw(3 7 7 7 7 7 7 7));

   # End result should be like:
   # Attribute    pct   total     min     max     avg     95%  stddev  median
   # ========= ====== ======= ======= ======= ======= ======= ======= =======
   $self->{event_header_lines} = \@lines;
   return @lines;
}

# Convert attribute names into labels
sub make_label {
   my ( $self, $val ) = @_;
   return '' unless $val;

   $val =~ s/_/ /g;

   if ( $val =~ m/^InnoDB/ ) {
      $val =~ s/^InnoDB //;
      $val = $val eq 'trx id' ? "InnoDB trxID"
           : substr($val, 0, $self->label_width());
   }

   $val = $val eq 'user'            ? 'Users'
        : $val eq 'db'              ? 'Databases'
        : $val eq 'Query time'      ? 'Exec time'
        : $val eq 'host'            ? 'Hosts'
        : $val eq 'Error no'        ? 'Errors'
        : $val eq 'bytes'           ? 'Query size'
        : $val eq 'Tmp disk tables' ? 'Tmp disk tbl'
        : $val eq 'Tmp table sizes' ? 'Tmp tbl size'
        : substr($val, 0, $self->label_width);

   return $val;
}

sub bool_percents {
   my ( $self, $vals ) = @_;
   # Since the value is either 1 or 0, the sum is the number of
   # all true events and the number of false events is the total
   # number of events minus those that were true.
   my $p_true  = percentage_of($vals->{sum},  $vals->{cnt});
   my $p_false = percentage_of(($vals->{cnt} - $vals->{sum}), $vals->{cnt});
   return $p_true, $p_false;
}

# Does pretty-printing for lists of strings like users, hosts, db.
sub format_string_list {
   my ( $self, $attrib, $vals, $class_cnt ) = @_;

   # Only class result values have unq.  So if unq doesn't exist,
   # then we've been given global values.
   if ( !exists $vals->{unq} ) {
      return ($vals->{cnt});
   }

   my $show_all = $self->show_all();

   my $cnt_for = $vals->{unq};
   if ( 1 == keys %$cnt_for ) {
      my ($str) = keys %$cnt_for;
      # - 30 for label, spacing etc.
      $str = substr($str, 0, LINE_LENGTH - 30) . '...'
         if length $str > LINE_LENGTH - 30;
      return $str;
   }
   my $line = '';
   my @top = sort { $cnt_for->{$b} <=> $cnt_for->{$a} || $a cmp $b }
                  keys %$cnt_for;
   my $i = 0;
   foreach my $str ( @top ) {
      my $print_str;
      if ( $str =~ m/(?:\d+\.){3}\d+/ ) {
         $print_str = $str;  # Do not shorten IP addresses.
      }
      elsif ( $self->{max_hostname_length} > 0 and length $str > $self->{max_hostname_length} ) {
         $print_str = substr($str, 0, $self->{max_hostname_length}) . '...';
      } else {
         $print_str = $str;
      }
      my $p = percentage_of($cnt_for->{$str}, $class_cnt);
      $print_str .= " ($cnt_for->{$str}/$p%)";
      my $trim_length = LINE_LENGTH;
      if ($self->{max_hostname_length} == 0 or $self->{max_hostname_length} > LINE_LENGTH) {
          $trim_length = $self->{max_hostname_length};
      }
      if ( $self->{max_line_length} > 0 and !$show_all->{$attrib} ) {
         last if (length $line) + (length $print_str)  > $self->{max_line_length} - 27;
      }
      $line .= "$print_str, ";
      $i++;
   }

   $line =~ s/, $//;

   if ( $i < @top ) {
      $line .= "... " . (@top - $i) . " more";
   }

   return $line;
}

sub sort_attribs {
   my ( $self, $ea ) = @_;
   my $attribs = $ea->get_attributes();
   return unless $attribs && @$attribs;
   PTDEBUG && _d("Sorting attribs:", @$attribs);

   # Sort order for numeric attribs.  Attribs not listed here come after these
   # in alphabetical order.
   my @num_order = qw(
      Query_time
      Exec_orig_time
      Transmit_time
      Lock_time
      Rows_sent
      Rows_examined
      Rows_affected
      Rows_read
      Bytes_sent
      Merge_passes
      Tmp_tables
      Tmp_disk_tables
      Tmp_table_sizes
      bytes
   );
   my $i         = 0;
   my %num_order = map { $_ => $i++ } @num_order;

   my (@num, @innodb, @bool, @string);
   ATTRIB:
   foreach my $attrib ( @$attribs ) {
      next if $self->{hidden_attrib}->{$attrib};

      # Default type is string in EventAggregator::make_handler().
      my $type = $ea->type_for($attrib) || 'string';
      if ( $type eq 'num' ) {
         if ( $attrib =~ m/^InnoDB_/ ) {
            push @innodb, $attrib;
         }
         else {
            push @num, $attrib;
         }
      }
      elsif ( $type eq 'bool' ) {
         push @bool, $attrib;
      }
      elsif ( $type eq 'string' ) {
         push @string, $attrib;
      }
      else {
         PTDEBUG && _d("Unknown attrib type:", $type, "for", $attrib);
      }
   }

   @num    = sort { pref_sort($a, $num_order{$a}, $b, $num_order{$b}) } @num;
   @innodb = sort { uc $a cmp uc $b } @innodb;
   @bool   = sort { uc $a cmp uc $b } @bool;
   @string = sort { uc $a cmp uc $b } @string;

   return {
      num     => \@num,
      innodb  => \@innodb,
      string  => \@string,
      bool    => \@bool,
   };
}

sub pref_sort {
   my ( $attrib_a, $order_a, $attrib_b, $order_b ) = @_;

   # Neither has preferred order so sort by attrib name alphabetically.
   if ( !defined $order_a && !defined $order_b ) {
      return $attrib_a cmp $attrib_b;
   }

   # By have a preferred order so sort by their order.
   if ( defined $order_a && defined $order_b ) {
      return $order_a <=> $order_b;
   }

   # Only one has a preferred order so sort it first.
   if ( !defined $order_a ) {
      return 1;
   }
   else {
      return -1;
   }
}

# Gets an arrayref of [db, tbl] arrayrefs pairs to print out
sub tables_report {
   my ( $self, $tables_ref, $args_ref ) = @_;
   return '' unless @$tables_ref;
   my $q      = $self->Quoter();
   my $tables = "";
   my $mark   = $args_ref->{no_v_format} ? '' : '\G';
   foreach my $db_tbl ( @$tables_ref ) {
      my ( $db, $tbl ) = @$db_tbl;
      $tables .= '#    SHOW TABLE STATUS'
               . ($db ? " FROM `$db`" : '')
               . " LIKE '$tbl'${mark}\n";
      $tables .= "#    SHOW CREATE TABLE "
               . $q->quote(grep { $_ } @$db_tbl)
               . "${mark}\n";
   }
   return $tables ? "# Tables\n$tables" : "# No tables\n";
}

sub explain_report {
   my ( $self, $query, $db ) = @_;
   return '' unless $query;

   my $dbh = $self->{dbh};
   my $q   = $self->Quoter();
   my $qp  = $self->{QueryParser};
   return '' unless $dbh && $q && $qp;

   my $explain = '';
   eval {
      if ( !$qp->has_derived_table($query) ) {
         if ( $db ) {
            PTDEBUG && _d($dbh, "USE", $db);
            $dbh->do("USE " . $q->quote($db));
         }
         my $sth;
         eval {
             $sth = $dbh->prepare("EXPLAIN /*!50100 PARTITIONS*/ $query");
             $sth->execute();
         };
         if ($EVAL_ERROR) { # MySQL 8.0+ doesn't support PARTITIONS
             $self->{no_partitions} = 1;
             $sth = $dbh->prepare("EXPLAIN $query");
             $sth->execute();
         }
         $sth->execute();
         my $i = 1;
         while ( my @row = $sth->fetchrow_array() ) {
            $explain .= "# *************************** $i. "
                      . "row ***************************\n";
            foreach my $j ( 0 .. $#row ) {
               my $value_format = $sth->{NAME}->[$j] eq 'filtered' ? "%.02f" : "%s";
               $explain .= sprintf "# %13s: $value_format\n", $sth->{NAME}->[$j],
                  defined $row[$j] ? $row[$j] : 'NULL';
            }
            $i++;  # next row number
         }
      }
   };
   if ( $EVAL_ERROR ) {
      PTDEBUG && _d("EXPLAIN failed:", $query, $EVAL_ERROR);
   }
   return $explain ? $explain : "# EXPLAIN failed: $EVAL_ERROR";
}

sub format_time_range {
   my ( $self, $vals ) = @_;
   my $min = parse_timestamp($vals->{min} || '');
   my $max = parse_timestamp($vals->{max} || '');

   if ( $min && $max && $min eq $max ) {
      return "all events occurred at $min";
   }

   # Remove common prefix (day).
   my ($min_day) = split(' ', $min) if $min;
   my ($max_day) = split(' ', $max) if $max;
   if ( ($min_day || '') eq ($max_day || '') ) {
      (undef, $max) = split(' ', $max);
   }

   return $min && $max ? "$min to $max" : '';
}

sub _d {
   my ($package, undef, $line) = caller 0;
   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
        map { defined $_ ? $_ : 'undef' }
        @_;
   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}

no Lmo;
1;
}
# ###########################################################################
# End QueryReportFormatter package
# ###########################################################################