Files
percona-toolkit/lib/JSONReportFormatter.pm
Sveta Smirnova 17820dbf23 PR-471 - Support adding explain output for slow query in JSON report as well
- Put fix into the proper place
- Added test cases
2023-11-06 17:21:28 +03:00

435 lines
15 KiB
Perl

# This program is copyright 2013 Percona Ireland Ltd.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA.
# ###########################################################################
# JSONReportFormatter package
# ###########################################################################
{
package JSONReportFormatter;
use Lmo;
use List::Util qw(sum);
use Transformers qw(make_checksum parse_timestamp);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
my $have_json = eval { require JSON };
our $pretty_json = $ENV{PTTEST_PRETTY_JSON} || 0;
our $sorted_json = $ENV{PTTEST_PRETTY_JSON} || 0;
extends qw(QueryReportFormatter);
has 'QueryRewriter' => (
is => 'ro',
isa => 'Object',
required => 1,
);
has 'QueryParser' => (
is => 'ro',
isa => 'Object',
required => 1,
);
has 'Quoter' => (
is => 'ro',
isa => 'Object',
required => 1,
);
has _json => (
is => 'ro',
init_arg => undef,
builder => '_build_json',
);
has 'max_query_length' => (
is => 'rw',
isa => 'Int',
required => 0,
default => sub { return 10_000; }, # characters, not bytes
);
has 'max_fingerprint_length' => (
is => 'rw',
isa => 'Int',
required => 0,
default => sub { return 5_000; }, # characters, not bytes
);
sub _build_json {
return unless $have_json;
return JSON->new->utf8
->pretty($pretty_json)
->canonical($sorted_json);
}
sub encode_json {
my ($self, $encode) = @_;
if ( my $json = $self->_json ) {
return $json->encode($encode);
}
else {
return Transformers::encode_json($encode);
}
}
override [qw(rusage date hostname files header profile prepared)] => sub {
return;
};
override event_report => sub {
my ($self, %args) = @_;
return $self->event_report_values(%args);
};
override query_report => sub {
my ($self, %args) = @_;
foreach my $arg ( qw(ea worst orderby groupby) ) {
die "I need a $arg argument" unless defined $arg;
}
my $ea = $args{ea};
my $worst = $args{worst};
my $orderby = $args{orderby};
my $groupby = $args{groupby};
my $results = $ea->results();
my @attribs = @{$ea->get_attributes()};
my $q = $self->Quoter;
my $qr = $self->QueryRewriter;
# ########################################################################
# Global data
# ########################################################################
my $global_data = {
metrics => {},
files => $args{files},
($args{resume} && scalar keys %{$args{resume}} ? (resume => $args{resume}) : ()),
};
# Get global count
my $global_cnt = $results->{globals}->{$orderby}->{cnt} || 0;
my $global_unq = scalar keys %{$results->{classes}};
# Calculate QPS (queries per second) by looking at the min/max timestamp.
my ($qps, $conc) = (0, 0);
if ( $global_cnt && $results->{globals}->{ts}
&& ($results->{globals}->{ts}->{max} || '')
gt ($results->{globals}->{ts}->{min} || '') )
{
eval {
my $min = parse_timestamp($results->{globals}->{ts}->{min});
my $max = parse_timestamp($results->{globals}->{ts}->{max});
my $diff = unix_timestamp($max) - unix_timestamp($min);
$qps = $global_cnt / ($diff || 1);
$conc = $results->{globals}->{$orderby}->{sum} / $diff;
};
}
$global_data->{query_count} = $global_cnt;
$global_data->{unique_query_count} = $global_unq;
$global_data->{queries_per_second} = $qps if $qps;
$global_data->{concurrency} = $conc if $conc;
if ( exists $results->{globals}->{rate_limit} ) {
my $rate_limit = $results->{globals}->{rate_limit}->{min} || '';
my ($type, $limit) = $rate_limit =~ m/^(\w+):(\d+)$/;
if ( $type && $limit ) {
$global_data->{rate_limit} = {
type => $type,
limit => int($limit),
};
}
else {
$global_data->{rate_limit}->{error} = "Invalid rate limit: $rate_limit";
}
if ( ($results->{globals}->{rate_limit}->{min} || '')
ne ($results->{globals}->{rate_limit}->{max} || '') ) {
$global_data->{rate_limit}->{diff} = 1;
}
}
my %hidden_attrib = (
arg => 1,
fingerprint => 1,
pos_in_log => 1,
ts => 1,
);
foreach my $attrib ( grep { !$hidden_attrib{$_} } @attribs ) {
my $type = $ea->type_for($attrib) || 'string';
next if $type eq 'string';
next unless exists $results->{globals}->{$attrib};
my $store = $results->{globals}->{$attrib};
my $metrics = $ea->stats()->{globals}->{$attrib};
my $int = $attrib =~ m/(?:time|wait)$/ ? 0 : 1;
# Be careful of Perl references: changing $attrib really changes it;
# it's not a local copy or copy-on-write.
my $real_attrib = $attrib eq 'bytes' ? 'Query_length' : $attrib;
if ( $type eq 'num' ) {
foreach my $m ( qw(sum min max) ) {
if ( $int ) {
$global_data->{metrics}->{$real_attrib}->{$m}
= sprintf('%d', $store->{$m} || 0);
}
else { # microsecond
$global_data->{metrics}->{$real_attrib}->{$m}
= sprintf('%.6f', $store->{$m} || 0);
}
}
foreach my $m ( qw(pct_95 stddev median) ) {
if ( $int ) {
$global_data->{metrics}->{$real_attrib}->{$m}
= sprintf('%d', $metrics->{$m} || 0);
}
else { # microsecond
$global_data->{metrics}->{$real_attrib}->{$m}
= sprintf('%.6f', $metrics->{$m} || 0);
}
}
if ( $int ) {
$global_data->{metrics}->{$real_attrib}->{avg}
= sprintf('%d', $store->{sum} / $store->{cnt});
}
else {
$global_data->{metrics}->{$real_attrib}->{avg}
= sprintf('%.6f', $store->{sum} / $store->{cnt});
}
}
elsif ( $type eq 'bool' ) {
my $store = $results->{globals}->{$real_attrib};
$global_data->{metrics}->{$real_attrib}->{cnt}
= sprintf('%d', $store->{sum});
}
}
# ########################################################################
# Query class data
# ########################################################################
my @classes;
foreach my $worst_info ( @$worst ) {
my $item = $worst_info->[0];
my $stats = $ea->results->{classes}->{$item};
my $sample = $ea->results->{samples}->{$item};
my $all_log_pos = $ea->{result_classes}->{$item}->{pos_in_log}->{all};
my $times_seen = sum values %$all_log_pos;
# Distill the query.
my $distill = $groupby eq 'fingerprint' ? $qr->distill($sample->{arg})
: undef;
my $fingerprint = substr($item, 0, $self->max_fingerprint_length);
my $checksum = make_checksum($item);
my $explain = $self->explain_report($sample->{arg}, $sample->{db});
my $class = {
checksum => $checksum,
fingerprint => $fingerprint,
distillate => $distill,
attribute => $groupby,
query_count => $times_seen,
$args{anon} ? () : (
example => {
query => substr($sample->{arg}, 0, $self->max_query_length),
ts => $sample->{ts} ? parse_timestamp($sample->{ts}) : undef,
Query_time => $sample->{Query_time},
$explain ?
( explain => $explain ): (),
},
),
};
my %metrics;
foreach my $attrib ( @attribs ) {
my $real_attrib = $attrib eq 'bytes' ? 'Query_length' : $attrib;
next if $real_attrib eq 'Rows_affected'
&& $distill && $distill =~ m/^(?:SELECT|SHOW|SET|ADMIN)/;
$metrics{$real_attrib} = $ea->metrics(
attrib => $attrib,
where => $item,
);
}
foreach my $attrib ( keys %metrics ) {
if ( ! grep { $_ } values %{$metrics{$attrib}} ) {
delete $metrics{$attrib};
next;
}
delete $metrics{pos_in_log};
delete $metrics{$attrib}->{cnt};
if ($attrib eq 'ts') {
my $ts = delete $metrics{ts};
foreach my $thing ( qw(min max) ) {
next unless defined $ts && defined $ts->{$thing};
$ts->{$thing} = parse_timestamp($ts->{$thing});
}
$class->{ts_min} = $ts->{min};
$class->{ts_max} = $ts->{max};
}
else {
my $type = $attrib eq 'Query_length' ? 'num' : $ea->type_for($attrib) || 'string';
if ( $type eq 'string' ) {
$metrics{$attrib} = { value => $metrics{$attrib}{max} };
}
elsif ( $type eq 'num' ) {
# Avoid scientific notation in the metrics by forcing it to use
# six decimal places.
foreach my $value ( values %{$metrics{$attrib}} ) {
next unless defined $value;
if ( $attrib =~ m/_(?:time|wait)$/ ) {
$value = sprintf('%.6f', $value);
}
else {
$value = sprintf('%d', $value);
}
}
}
elsif ( $type eq 'bool' ) {
$metrics{$attrib} = {
yes => sprintf('%d', $metrics{$attrib}->{sum}),
};
}
}
}
# Add "copy-paste" info, i.e. this stuff from the regular report:
#
# Tables
# SHOW TABLE STATUS FROM `db2` LIKE 'tuningdetail_21_265507'\G
# SHOW CREATE TABLE `db2`.`tuningdetail_21_265507`\G
# SHOW TABLE STATUS FROM `db1` LIKE 'gonzo'\G
# SHOW CREATE TABLE `db1`.`gonzo`\G
# update db2.tuningdetail_21_265507 n
# inner join db1.gonzo a using(gonzo)
# set n.column1 = a.column1, n.word3 = a.word3\G
# Converted for EXPLAIN
# EXPLAIN /*!50100 PARTITIONS*/
# select n.column1 = a.column1, n.word3 = a.word3
# from db2.tuningdetail_21_265507 n
# inner join db1.gonzo a using(gonzo) \G
#
# The formatting isn't included, just the useful data, like:
#
# $tables = [
# {
# create => "SHOW CREATE TABLE db.foo",
# status => "SHOW TABLE STATUS FROM db LIKE foo",
# },
# explain => "select ..."
# ]
#
# This is called "copy-paste" because users can copy-paste these
# ready-made lines into MySQL.
my @tables;
if ( $groupby eq 'fingerprint' ) {
# Get SHOW CREATE TABLE and SHOW TABLE STATUS.
my $default_db = $sample->{db} ? $sample->{db}
: $stats->{db}->{unq} ? keys %{$stats->{db}->{unq}}
: undef;
my @table_names = $self->QueryParser->extract_tables(
query => $sample->{arg} || '',
default_db => $default_db,
Quoter => $q,
);
my $mark = $args{no_v_format} ? '' : '\G';
foreach my $db_tbl ( @table_names ) {
my ( $db, $tbl ) = @$db_tbl;
my $status
= 'SHOW TABLE STATUS'
. ($db ? " FROM `$db`" : '')
. " LIKE '$tbl'${mark}";
my $create
= "SHOW CREATE TABLE "
. $q->quote(grep { $_ } @$db_tbl)
. ${mark};
push @tables, { status => $status, create => $create };
}
if ( !$args{anon} ) {
# Convert possible non-SELECTs for EXPLAIN.
if ( $item =~ m/^(?:[\(\s]*select|insert|replace)/ ) {
if ( $item =~ m/^(?:insert|replace)/ ) {
# Cannot convert or EXPLAIN INSERT or REPLACE queries.
}
else {
# SELECT queries don't need to converted for EXPLAIN.
# TODO: return the actual EXPLAIN plan
# $self->explain_report($query, $vals->{default_db});
}
}
else {
# Query is not SELECT, INSERT, or REPLACE, so we can convert
# it for EXPLAIN.
my $converted = $qr->convert_to_select(
$sample->{arg} || '',
);
if ( $converted && $converted =~ m/^[\(\s]*select/i ) {
$class->{example}->{as_select} = $converted;
}
}
}
}
# Add response time histogram for Query_time
my $vals = $stats->{Query_time}->{all};
if ( defined $vals && scalar %$vals ) {
# TODO: this is broken.
my @buck_tens = $ea->buckets_of(10);
my @distro = map { 0 } (0 .. 7);
my @buckets = map { 0 } (0..999);
map { $buckets[$_] = $vals->{$_} } keys %$vals;
$vals = \@buckets; # repoint vals from given hashref to our array
map { $distro[$buck_tens[$_]] += $vals->[$_] } (1 .. @$vals - 1);
# @distro = qw(1us 10us 100us 1ms 10ms 100ms 1s 10s+)
$class->{histograms}->{Query_time} = \@distro;
} # histogram
$class->{metrics} = \%metrics;
if ( @tables ) {
$class->{tables} = \@tables;
}
push @classes, $class;
}
# ########################################################################
# Done, combine, encode, and return global and query class data
# ########################################################################
my $data = {
global => $global_data,
classes => \@classes,
};
my $json = $self->encode_json($data);
$json .= "\n" unless $json =~ /\n\Z/;
return $json;
};
no Lmo;
1;
}
# ###########################################################################
# End JSONReportFormatter package
# ###########################################################################