percona-toolkit/lib/JSONReportFormatter.pm

# This program is copyright 2013 Percona Ireland Ltd.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA  02111-1307  USA.
# ###########################################################################
# JSONReportFormatter package
# ###########################################################################
{
package JSONReportFormatter;
use Lmo;

use List::Util   qw(sum);
use Transformers qw(make_checksum parse_timestamp);

use constant PTDEBUG => $ENV{PTDEBUG} || 0;

my $have_json = eval { require JSON };

our $pretty_json = $ENV{PTTEST_PRETTY_JSON} || 0;
our $sorted_json = $ENV{PTTEST_PRETTY_JSON} || 0;


extends qw(QueryReportFormatter);

has 'QueryRewriter' => (
   is       => 'ro',
   isa      => 'Object',
   required => 1,
);

has 'QueryParser' => (
   is       => 'ro',
   isa      => 'Object',
   required => 1,
);

has 'Quoter' => (
   is       => 'ro',
   isa      => 'Object',
   required => 1,
);

has _json => (
   is       => 'ro',
   init_arg => undef,
   builder  => '_build_json',
);

has 'max_query_length' => (
   is       => 'rw',
   isa      => 'Int',
   required => 0,
   default  => sub { return 10_000; }, # characters, not bytes
);

has 'max_fingerprint_length' => (
   is       => 'rw',
   isa      => 'Int',
   required => 0,
   default  => sub { return 5_000; }, # characters, not bytes
);

sub _build_json {
   return unless $have_json;
   return JSON->new->utf8
                   ->pretty($pretty_json)
                   ->canonical($sorted_json);
}

sub encode_json {
   my ($self, $encode) = @_;
   if ( my $json = $self->_json ) {
      return $json->encode($encode);
   }
   else {
      return Transformers::encode_json($encode);
   }
}

override [qw(rusage date hostname files header profile prepared)] => sub {
   return;
};

override event_report => sub {
   my ($self, %args) = @_;
   return $self->event_report_values(%args);
};

override query_report => sub {
   my ($self, %args) = @_;
   foreach my $arg ( qw(ea worst orderby groupby) ) {
      die "I need a $arg argument" unless defined $arg;
   }
   my $ea      = $args{ea};
   my $worst   = $args{worst};
   my $orderby = $args{orderby};
   my $groupby = $args{groupby};

   my $results = $ea->results();
   my @attribs = @{$ea->get_attributes()};

   my $q  = $self->Quoter;
   my $qr = $self->QueryRewriter;

   # ########################################################################
   # Global data
   # ########################################################################
   my $global_data = {
      metrics => {},
      files   => $args{files},
      ($args{resume} && scalar keys %{$args{resume}} ? (resume  => $args{resume}) : ()),
   };

   # Get global count
   my $global_cnt = $results->{globals}->{$orderby}->{cnt} || 0;
   my $global_unq = scalar keys %{$results->{classes}};

   # Calculate QPS (queries per second) by looking at the min/max timestamp.
   my ($qps, $conc) = (0, 0);
   if ( $global_cnt && $results->{globals}->{ts}
        && ($results->{globals}->{ts}->{max} || '')
            gt ($results->{globals}->{ts}->{min} || '') )
   {
      eval {
         my $min  = parse_timestamp($results->{globals}->{ts}->{min});
         my $max  = parse_timestamp($results->{globals}->{ts}->{max});
         my $diff = unix_timestamp($max) - unix_timestamp($min);
         $qps     = $global_cnt / ($diff || 1);
         $conc    = $results->{globals}->{$orderby}->{sum} / $diff;
      };
   }

   $global_data->{query_count}        = $global_cnt;
   $global_data->{unique_query_count} = $global_unq;
   $global_data->{queries_per_second} = $qps  if $qps;
   $global_data->{concurrency}        = $conc if $conc;

   if ( exists $results->{globals}->{rate_limit} ) {
      my $rate_limit = $results->{globals}->{rate_limit}->{min} || '';
      my ($type, $limit) = $rate_limit =~ m/^(\w+):(\d+)$/;
      if ( $type && $limit ) {
         $global_data->{rate_limit} = {
            type  => $type,
            limit => int($limit),
         };
      }
      else {
         $global_data->{rate_limit}->{error} = "Invalid rate limit: $rate_limit";
      }

      if (    ($results->{globals}->{rate_limit}->{min} || '')
           ne ($results->{globals}->{rate_limit}->{max} || '') ) {
         $global_data->{rate_limit}->{diff} = 1;
      }
   }

   my %hidden_attrib = (
      arg         => 1,
      fingerprint => 1,
      pos_in_log  => 1,
      ts          => 1,
   );

   foreach my $attrib ( grep { !$hidden_attrib{$_} } @attribs ) {
      my $type = $ea->type_for($attrib) || 'string';
      next if $type eq 'string';
      next unless exists $results->{globals}->{$attrib};

      my $store   = $results->{globals}->{$attrib};
      my $metrics = $ea->stats()->{globals}->{$attrib};
      my $int     = $attrib =~ m/(?:time|wait)$/ ? 0 : 1;

      # Be careful of Perl references: changing $attrib really changes it;
      # it's not a local copy or copy-on-write.
      my $real_attrib = $attrib eq 'bytes' ? 'Query_length' : $attrib;

      if ( $type eq 'num' ) {
         foreach my $m ( qw(sum min max) ) {
            if ( $int ) {
               $global_data->{metrics}->{$real_attrib}->{$m}
                  = sprintf('%d', $store->{$m} || 0);
            }
            else {  # microsecond
               $global_data->{metrics}->{$real_attrib}->{$m}
                  = sprintf('%.6f',  $store->{$m} || 0);
            }
         }
         foreach my $m ( qw(pct_95 stddev median) ) {
            if ( $int ) {
               $global_data->{metrics}->{$real_attrib}->{$m}
                  = sprintf('%d', $metrics->{$m} || 0);
            }
            else {  # microsecond
               $global_data->{metrics}->{$real_attrib}->{$m}
                  = sprintf('%.6f',  $metrics->{$m} || 0);
            }
         }
         if ( $int ) {
            $global_data->{metrics}->{$real_attrib}->{avg}
               = sprintf('%d', $store->{sum} / $store->{cnt});
         }
         else {
            $global_data->{metrics}->{$real_attrib}->{avg}
               = sprintf('%.6f', $store->{sum} / $store->{cnt});
         }
      }
      elsif ( $type eq 'bool' ) {
         my $store = $results->{globals}->{$real_attrib};
         $global_data->{metrics}->{$real_attrib}->{cnt}
            = sprintf('%d', $store->{sum});
      }
   }

   # ########################################################################
   # Query class data
   # ########################################################################

   my @classes;
   foreach my $worst_info ( @$worst ) {
      my $item   = $worst_info->[0];
      my $stats  = $ea->results->{classes}->{$item};
      my $sample = $ea->results->{samples}->{$item};

      my $all_log_pos = $ea->{result_classes}->{$item}->{pos_in_log}->{all};
      my $times_seen  = sum values %$all_log_pos;

      # Distill the query.
      my $distill     = $groupby eq 'fingerprint' ? $qr->distill($sample->{arg})
                      :                             undef;
      my $fingerprint = substr($item, 0, $self->max_fingerprint_length);
      my $checksum    = make_checksum($item);
      my $explain     = $self->explain_report($sample->{arg}, $sample->{db});
      my $class       = {
         checksum    => $checksum,
         fingerprint => $fingerprint,
         distillate  => $distill,
         attribute   => $groupby,
         query_count => $times_seen,
         $args{anon} ? () : (
            example     => {
               query      => substr($sample->{arg}, 0, $self->max_query_length),
               ts         => $sample->{ts} ? parse_timestamp($sample->{ts}) : undef,
               Query_time => $sample->{Query_time},
               $explain ?
                  ( explain  => $explain ): (),
            },
         ),
      };

      my %metrics;
      foreach my $attrib ( @attribs ) {
         my $real_attrib = $attrib eq 'bytes' ? 'Query_length' : $attrib;
         next if $real_attrib eq 'Rows_affected'
            && $distill && $distill =~ m/^(?:SELECT|SHOW|SET|ADMIN)/;
         $metrics{$real_attrib} = $ea->metrics(
            attrib => $attrib,
            where  => $item,
         );
      }

      foreach my $attrib ( keys %metrics ) {
         if ( ! grep { $_ } values %{$metrics{$attrib}} ) {
            delete $metrics{$attrib};
            next;
         }
         delete $metrics{pos_in_log};
         delete $metrics{$attrib}->{cnt};

         if ($attrib eq 'ts') {
            my $ts = delete $metrics{ts};
            foreach my $thing ( qw(min max) ) {
               next unless defined $ts && defined $ts->{$thing};
               $ts->{$thing} = parse_timestamp($ts->{$thing});
            }
            $class->{ts_min} = $ts->{min};
            $class->{ts_max} = $ts->{max};
         }
         else {
            my $type = $attrib eq 'Query_length' ? 'num' : $ea->type_for($attrib) || 'string';
            if ( $type eq 'string' ) {
               $metrics{$attrib} = { value => $metrics{$attrib}{max} };
            }
            elsif ( $type eq 'num' ) {
               # Avoid scientific notation in the metrics by forcing it to use
               # six decimal places.
               foreach my $value ( values %{$metrics{$attrib}} ) {
                  next unless defined $value;
                  if ( $attrib =~ m/_(?:time|wait)$/ ) {
                     $value = sprintf('%.6f', $value);
                  }
                  else {
                     $value = sprintf('%d', $value);
                  }
               }
            }
            elsif ( $type eq 'bool' ) {
               $metrics{$attrib} = {
                  yes => sprintf('%d', $metrics{$attrib}->{sum}),
               };
            }
         }
      }

      # Add "copy-paste" info, i.e. this stuff from the regular report:
      #
      # Tables
      #    SHOW TABLE STATUS FROM `db2` LIKE 'tuningdetail_21_265507'\G
      #    SHOW CREATE TABLE `db2`.`tuningdetail_21_265507`\G
      #    SHOW TABLE STATUS FROM `db1` LIKE 'gonzo'\G
      #    SHOW CREATE TABLE `db1`.`gonzo`\G
      #     update db2.tuningdetail_21_265507 n
      #           inner join db1.gonzo a using(gonzo)
      #                 set n.column1 = a.column1, n.word3 = a.word3\G
      # Converted for EXPLAIN
      # EXPLAIN /*!50100 PARTITIONS*/
      #    select  n.column1 = a.column1, n.word3 = a.word3
      #    from db2.tuningdetail_21_265507 n
      #    inner join db1.gonzo a using(gonzo) \G
      #
      # The formatting isn't included, just the useful data, like:
      #
      # $tables = [
      #    {
      #      create => "SHOW CREATE TABLE db.foo",
      #      status => "SHOW TABLE STATUS FROM db LIKE foo",
      #    },
      #    explain => "select ..."
      # ]
      #
      # This is called "copy-paste" because users can copy-paste these
      # ready-made lines into MySQL.
      my @tables;
      if ( $groupby eq 'fingerprint' ) {
         # Get SHOW CREATE TABLE and SHOW TABLE STATUS.
         my $default_db = $sample->{db}       ? $sample->{db}
                        : $stats->{db}->{unq} ? keys %{$stats->{db}->{unq}}
                        :                       undef;
         my @table_names = $self->QueryParser->extract_tables(
            query      => $sample->{arg} || '',
            default_db => $default_db,
            Quoter     => $q,
         );
         my $mark = $args{no_v_format} ? '' : '\G';

         foreach my $db_tbl ( @table_names ) {
            my ( $db, $tbl ) = @$db_tbl;
            my $status
               = 'SHOW TABLE STATUS'
               . ($db ? " FROM `$db`" : '')
               . " LIKE '$tbl'${mark}";
            my $create
               = "SHOW CREATE TABLE "
               . $q->quote(grep { $_ } @$db_tbl)
               . ${mark};
            push @tables, { status => $status, create => $create };
         }

         if ( !$args{anon} ) {
            # Convert possible non-SELECTs for EXPLAIN.
            if ( $item =~ m/^(?:[\(\s]*select|insert|replace)/ ) {
               if ( $item =~ m/^(?:insert|replace)/ ) {
                  # Cannot convert or EXPLAIN INSERT or REPLACE queries.
               }
               else {
                  # SELECT queries don't need to converted for EXPLAIN.

                  # TODO: return the actual EXPLAIN plan
                  # $self->explain_report($query, $vals->{default_db});
               }
            }
            else {
               # Query is not SELECT, INSERT, or REPLACE, so we can convert
               # it for EXPLAIN.
               my $converted = $qr->convert_to_select(
                  $sample->{arg} || '',
               );
               if ( $converted && $converted =~ m/^[\(\s]*select/i ) {
                  $class->{example}->{as_select} = $converted;
               }
            }
         }
      }

      # Add response time histogram for Query_time
      my $vals = $stats->{Query_time}->{all};
      if ( defined $vals && scalar %$vals ) {
         # TODO: this is broken.
         my @buck_tens = $ea->buckets_of(10);
         my @distro = map { 0 } (0 .. 7);
         my @buckets = map { 0 } (0..999);
         map { $buckets[$_] = $vals->{$_} } keys %$vals;
         $vals = \@buckets;  # repoint vals from given hashref to our array
         map { $distro[$buck_tens[$_]] += $vals->[$_] } (1 .. @$vals - 1);
         # @distro = qw(1us 10us 100us 1ms 10ms 100ms 1s 10s+)
         $class->{histograms}->{Query_time} = \@distro;
      } # histogram

      $class->{metrics} = \%metrics;
      if ( @tables ) {
         $class->{tables} = \@tables;
      }
      push @classes, $class;
   }

   # ########################################################################
   # Done, combine, encode, and return global and query class data
   # ########################################################################
   my $data = {
      global  => $global_data,
      classes => \@classes,
   };
   my $json = $self->encode_json($data);
   $json .= "\n" unless $json =~ /\n\Z/;
   return $json;
};

no Lmo;
1;
}
# ###########################################################################
# End JSONReportFormatter package
# ###########################################################################