Merge advisor-json-output and clean up pt-query-advisor docs.

This commit is contained in:
Daniel Nichter
2012-06-01 09:13:11 -06:00
5 changed files with 359 additions and 35 deletions

View File

@@ -2688,6 +2688,7 @@ use constant PTDEBUG => $ENV{PTDEBUG} || 0;
use Time::Local qw(timegm timelocal);
use Digest::MD5 qw(md5_hex);
use B qw();
require Exporter;
our @ISA = qw(Exporter);
@@ -2705,6 +2706,7 @@ our @EXPORT_OK = qw(
any_unix_timestamp
make_checksum
crc32
encode_json
);
our $mysql_ts = qr/(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)(\.\d+)?/;
@@ -2912,6 +2914,96 @@ sub crc32 {
return $crc ^ 0xFFFFFFFF;
}
my $got_json = eval { require JSON };
sub encode_json {
return JSON::encode_json(@_) if $got_json;
my ( $data ) = @_;
return (object_to_json($data) || '');
}
sub object_to_json {
my ($obj) = @_;
my $type = ref($obj);
if($type eq 'HASH'){
return hash_to_json($obj);
}
elsif($type eq 'ARRAY'){
return array_to_json($obj);
}
else {
return value_to_json($obj);
}
}
sub hash_to_json {
my ($obj) = @_;
my @res;
for my $k ( sort { $a cmp $b } keys %$obj ) {
push @res, string_to_json( $k )
. ":"
. ( object_to_json( $obj->{$k} ) || value_to_json( $obj->{$k} ) );
}
return '{' . ( @res ? join( ",", @res ) : '' ) . '}';
}
sub array_to_json {
my ($obj) = @_;
my @res;
for my $v (@$obj) {
push @res, object_to_json($v) || value_to_json($v);
}
return '[' . ( @res ? join( ",", @res ) : '' ) . ']';
}
sub value_to_json {
my ($value) = @_;
return 'null' if(!defined $value);
my $b_obj = B::svref_2object(\$value); # for round trip problem
my $flags = $b_obj->FLAGS;
return $value # as is
if $flags & ( B::SVp_IOK | B::SVp_NOK ) and !( $flags & B::SVp_POK ); # SvTYPE is IV or NV?
my $type = ref($value);
if( !$type ) {
return string_to_json($value);
}
else {
return 'null';
}
}
my %esc = (
"\n" => '\n',
"\r" => '\r',
"\t" => '\t',
"\f" => '\f',
"\b" => '\b',
"\"" => '\"',
"\\" => '\\\\',
"\'" => '\\\'',
);
sub string_to_json {
my ($arg) = @_;
$arg =~ s/([\x22\x5c\n\r\t\f\b])/$esc{$1}/g;
$arg =~ s/\//\\\//g;
$arg =~ s/([\x00-\x08\x0b\x0e-\x1f])/'\\u00' . unpack('H2', $1)/eg;
utf8::upgrade($arg);
utf8::encode($arg);
return '"' . $arg . '"';
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
@@ -6195,8 +6287,10 @@ sub main {
return $event;
};
my $json = $o->get('report-type')->{json}
? {} : undef;
# Print info (advice) about each rule that matched this query.
if ( $groupby eq 'none' ) {
if ( $groupby eq 'none' || $json ) {
push @pipeline, sub {
my ( %args ) = @_;
PTDEBUG && _d('callback: print advice');
@@ -6210,6 +6304,7 @@ sub main {
severity_count => \%severity_count,
verbose => $o->get('verbose'),
report_format => $o->get('report-format'),
json => $json,
Advisor => $adv,
);
return $event;
@@ -6322,7 +6417,7 @@ sub main {
# ########################################################################
# Aggregate and report items for group-by reports
# ########################################################################
if ( $groupby ne 'none' ) {
if ( $groupby ne 'none' && !$json ) {
print_grouped_report(
advice_queue => \%advice_queue,
group_by => $groupby,
@@ -6334,7 +6429,7 @@ sub main {
# ########################################################################
# Create and print profile of each items note/warn/crit count.
# ########################################################################
if ( keys %severity_count ) {
if ( keys %severity_count && !$json ) {
eval {
my $profile = new ReportFormatter(
long_last_column => 1,
@@ -6365,6 +6460,8 @@ sub main {
};
}
print Transformers::encode_json($json), "\n" if $json;
return 0;
}
@@ -6380,6 +6477,7 @@ sub print_advice {
my $adv = $args{Advisor};
my $seen_id = $args{seen_id};
my $severity_count = $args{severity_count};
my $json = $args{json};
my $advice = $event->{advice};
my $near_pos = $event->{near_pos};
@@ -6388,7 +6486,9 @@ sub print_advice {
# Header
my $query_id = $event->{query_id} || "";
print "\n# Query ID 0x$query_id at byte " . ($event->{pos_in_log} || 0) . "\n";
print "\n# Query ID 0x$query_id at byte " . ($event->{pos_in_log} || 0) . "\n"
unless $json;
# New check IDs and their descriptions
foreach my $i ( 1..$n_advice ) {
@@ -6409,9 +6509,10 @@ sub print_advice {
: $verbose == 2 ? "$desc[0] $desc[1]" # fuller
: $verbose > 2 ? $desc # complete
: ''; # none
print "# ", uc $info->{severity}, " $rule_id $desc\n";
print "# ", uc $info->{severity}, " $rule_id $desc\n"
unless $json;
if ( $pos ) {
if ( $pos && !$json ) {
my $offset = $pos > POS_CONTEXT ? $pos - POS_CONTEXT : 0;
print "# matches near: ",
substr($event->{arg}, $offset, ($pos - $offset) + POS_CONTEXT),
@@ -6419,14 +6520,24 @@ sub print_advice {
}
}
if ( $json ) {
my $info_for_json = {
rule => $rule_id,
%$info
};
push @{$json->{$query_id} ||= []}, $info_for_json;
}
$severity_count->{$query_id}->{$info->{severity}}++;
}
# Already seen check IDs
print "# Also: @seen_ids\n" if scalar @seen_ids;
if ( !$json ) {
# Already seen check IDs
print "# Also: @seen_ids\n" if scalar @seen_ids;
# The query
print "$event->{arg}\n";
# The query
print "$event->{arg}\n";
}
return;
}
@@ -6476,7 +6587,6 @@ sub print_grouped_report {
my $verbose = $args{verbose} || 0;
my %seen;
foreach my $groupby_attrib ( sort keys %$advice_queue ) {
print "\n" . ($groupby eq 'query_id' ? "0x" : "") . $groupby_attrib;
foreach my $groupby_value (sort keys %{$advice_queue->{$groupby_attrib}}){
@@ -6648,14 +6758,16 @@ wildcard is potentially a bug in the SQL.
severity: warn
SELECT without WHERE. The SELECT statement has no WHERE clause.
SELECT without WHERE. The SELECT statement has no WHERE clause and could
examine many more rows than intended.
=item CLA.002
severity: note
ORDER BY RAND(). ORDER BY RAND() is a very inefficient way to
retrieve a random row from the results.
retrieve a random row from the results, because it sorts the entire result
and then throws most of it away.
=item CLA.003
@@ -6663,7 +6775,8 @@ severity: note
LIMIT with OFFSET. Paginating a result set with LIMIT and OFFSET is
O(n^2) complexity, and will cause performance problems as the data
grows larger.
grows larger. Pagination techniques such as bookmarked scans are much more
efficient.
=item CLA.004
@@ -6677,13 +6790,16 @@ query is changed.
severity: warn
ORDER BY constant column.
ORDER BY constant column. This is probably a bug in your SQL; at best it is a
useless operation that does not change the query results.
=item CLA.006
severity: warn
GROUP BY or ORDER BY different tables will force a temp table and filesort.
GROUP BY or ORDER BY on different tables. This will force the use of a temporary
table and filesort, which can be a huge performance problem and can consume
large amounts of memory and temporary space on disk.
=item CLA.007
@@ -6723,8 +6839,9 @@ more efficient to store IP addresses as integers.
severity: warn
Unquoted date/time literal. A query such as "WHERE col<2010-02-12"
is valid SQL but is probably a bug; the literal should be quoted.
Unquoted date/time literal. A query such as "WHERE col<2010-02-12" is valid SQL
but is probably a bug, because it will be interpreted as "WHERE col<1996"; the
literal should be quoted.
=item KWR.001
@@ -6739,23 +6856,25 @@ result screens.
severity: crit
Mixing comma and ANSI joins. Mixing comma joins and ANSI joins
is confusing to humans, and the behavior differs between some
MySQL versions.
Mixing comma and ANSI joins. Mixing comma joins and ANSI joins is confusing to
humans, and the behavior and precedence differs between some MySQL versions,
which can introduce bugs.
=item JOI.002
severity: crit
A table is joined twice. The same table appears at least twice in the
FROM clause.
FROM clause in a manner that can be reduced to a single access to the table.
=item JOI.003
severity: warn
Reference to outer table column in WHERE clause prevents OUTER JOIN,
implicitly converts to INNER JOIN.
OUTER JOIN defeated. The reference to an outer table column in the WHERE clause
prevents the OUTER JOIN from returning any non-matched rows, which implicitly
converts the query to an INNER JOIN. This is probably a bug in the query or a
misunderstanding of how OUTER JOIN works.
=item JOI.004
@@ -6786,7 +6905,8 @@ non-deterministic results, depending on the query execution plan.
severity: note
!= is non-standard. Use the <> operator to test for inequality.
The != operator is non-standard. Use the <> operator to test for inequality
instead.
=item SUB.001
@@ -6862,11 +6982,11 @@ type: string; default: rule_id
Group items in the report by this attribute. Possible attributes are:
ATTRIBUTE GROUPS
========= ==========================================================
rule_id Items matching the same rule ID
query_id Queries with the same ID (the same fingerprint)
none No grouping, report each query and its advice individually
ATTRIBUTE GROUPS
========= ========================================================
rule_id Items matching the same rule ID
query_id Queries with the same ID (the same fingerprint)
none No grouping, report each query and its advice separately
=item --help
@@ -6934,6 +7054,13 @@ report contains the description of the rules it matched, even if this
information was previously displayed. In compact mode, the repeated
information is suppressed, and only the rule ID is displayed.
=item --report-type
type: Hash
Alternative formats to output the report. Currently, only "json" is
recognized -- anything else is ignored and the default behavior used.
=item --review
type: DSN

View File

@@ -29,6 +29,7 @@ use constant PTDEBUG => $ENV{PTDEBUG} || 0;
use Time::Local qw(timegm timelocal);
use Digest::MD5 qw(md5_hex);
use B qw();
require Exporter;
our @ISA = qw(Exporter);
@@ -46,6 +47,7 @@ our @EXPORT_OK = qw(
any_unix_timestamp
make_checksum
crc32
encode_json
);
our $mysql_ts = qr/(\d\d)(\d\d)(\d\d) +(\d+):(\d+):(\d+)(\.\d+)?/;
@@ -285,6 +287,98 @@ sub crc32 {
return $crc ^ 0xFFFFFFFF;
}
my $got_json = eval { require JSON };
sub encode_json {
return JSON::encode_json(@_) if $got_json;
my ( $data ) = @_;
return (object_to_json($data) || '');
}
# The following is a stripped down version of JSON::PP by Makamaka Hannyaharamitu
# https://metacpan.org/module/JSON::PP
sub object_to_json {
my ($obj) = @_;
my $type = ref($obj);
if($type eq 'HASH'){
return hash_to_json($obj);
}
elsif($type eq 'ARRAY'){
return array_to_json($obj);
}
else {
return value_to_json($obj);
}
}
sub hash_to_json {
my ($obj) = @_;
my @res;
for my $k ( sort { $a cmp $b } keys %$obj ) {
push @res, string_to_json( $k )
. ":"
. ( object_to_json( $obj->{$k} ) || value_to_json( $obj->{$k} ) );
}
return '{' . ( @res ? join( ",", @res ) : '' ) . '}';
}
sub array_to_json {
my ($obj) = @_;
my @res;
for my $v (@$obj) {
push @res, object_to_json($v) || value_to_json($v);
}
return '[' . ( @res ? join( ",", @res ) : '' ) . ']';
}
sub value_to_json {
my ($value) = @_;
return 'null' if(!defined $value);
my $b_obj = B::svref_2object(\$value); # for round trip problem
my $flags = $b_obj->FLAGS;
return $value # as is
if $flags & ( B::SVp_IOK | B::SVp_NOK ) and !( $flags & B::SVp_POK ); # SvTYPE is IV or NV?
my $type = ref($value);
if( !$type ) {
return string_to_json($value);
}
else {
return 'null';
}
}
my %esc = (
"\n" => '\n',
"\r" => '\r',
"\t" => '\t',
"\f" => '\f',
"\b" => '\b',
"\"" => '\"',
"\\" => '\\\\',
"\'" => '\\\'',
);
sub string_to_json {
my ($arg) = @_;
$arg =~ s/([\x22\x5c\n\r\t\f\b])/$esc{$1}/g;
$arg =~ s/\//\\\//g;
$arg =~ s/([\x00-\x08\x0b\x0e-\x1f])/'\\u00' . unpack('H2', $1)/eg;
utf8::upgrade($arg);
utf8::encode($arg);
return '"' . $arg . '"';
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }

View File

@@ -1,5 +1,7 @@
#!/usr/bin/perl
# This file is encoded in UTF-8.
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
@@ -12,14 +14,14 @@ BEGIN {
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More tests => 49;
use Test::More tests => 73;
use Transformers;
use PerconaTest;
Transformers->import( qw(parse_timestamp micro_t shorten secs_to_time
time_to_secs percentage_of unix_timestamp make_checksum any_unix_timestamp
ts crc32) );
ts crc32 encode_json) );
# #############################################################################
# micro_t() tests.
@@ -189,6 +191,107 @@ is(
'any_unix_timestamp MySQL expression that looks like another type'
);
{
# Tests borrowed from http://api.metacpan.org/source/MAKAMAKA/JSON-2.53/t/08_pc_base.t
my $obj = {};
my $js = encode_json($obj);
is($js,'{}', '{}');
$obj = [];
$js = encode_json($obj);
is($js,'[]', '[]');
$obj = {"foo" => "bar"};
$js = encode_json($obj);
is($js,'{"foo":"bar"}', '{"foo":"bar"}');
$js = encode_json({"foo" => ""});
is($js,'{"foo":""}', '{"foo":""}');
$js = encode_json({"foo" => " "});
is($js,'{"foo":" "}' ,'{"foo":" "}');
$js = encode_json({"foo" => "0"});
is($js,'{"foo":"0"}',q|{"foo":"0"} - autoencode (default)|);
$js = encode_json({"foo" => "0 0"});
is($js,'{"foo":"0 0"}','{"foo":"0 0"}');
$js = encode_json([1,2,3]);
is($js,'[1,2,3]');
$js = encode_json({"foo"=>{"bar"=>"hoge"}});
is($js,q|{"foo":{"bar":"hoge"}}|);
$obj = [{"foo"=>[1,2,3]},-0.12,{"a"=>"b"}];
$js = encode_json($obj);
is($js,q|[{"foo":[1,2,3]},-0.12,{"a":"b"}]|);
$obj = ["\x01"];
is(encode_json($obj),'["\\u0001"]');
$obj = ["\e"];
is(encode_json($obj),'["\\u001b"]');
{
# http://api.metacpan.org/source/MAKAMAKA/JSON-2.53/t/07_pc_esc.t
use utf8;
$obj = {test => qq|abc"def|};
my $str = encode_json($obj);
is($str,q|{"test":"abc\"def"}|);
$obj = {qq|te"st| => qq|abc"def|};
$str = encode_json($obj);
is($str,q|{"te\"st":"abc\"def"}|);
$obj = {test => q|abc\def|};
$str = encode_json($obj);
is($str,q|{"test":"abc\\\\def"}|);
$obj = {test => "abc\bdef"};
$str = encode_json($obj);
is($str,q|{"test":"abc\bdef"}|);
$obj = {test => "abc\fdef"};
$str = encode_json($obj);
is($str,q|{"test":"abc\fdef"}|);
$obj = {test => "abc\ndef"};
$str = encode_json($obj);
is($str,q|{"test":"abc\ndef"}|);
$obj = {test => "abc\rdef"};
$str = encode_json($obj);
is($str,q|{"test":"abc\rdef"}|);
$obj = {test => "abc-def"};
$str = encode_json($obj);
is($str,q|{"test":"abc-def"}|);
$obj = {test => "abc(def"};
$str = encode_json($obj);
is($str,q|{"test":"abc(def"}|);
$obj = {test => "abc\\def"};
$str = encode_json($obj);
is($str,q|{"test":"abc\\\\def"}|);
$obj = {test => "あいうえお"};
$str = encode_json($obj);
my $expect = q|{"test":"あいうえお"}|;
utf8::encode($expect);
is($str,$expect);
$obj = {"あいうえお" => "かきくけこ"};
$str = encode_json($obj);
$expect = q|{"あいうえお":"かきくけこ"}|;
utf8::encode($expect);
is($str,$expect);
}
}
# #############################################################################
# Done.

View File

@@ -1,7 +1,7 @@
# Query ID 0xAED2E885BDADA166 at byte 0
# WARN CLA.001 SELECT without WHERE.
# WARN CLA.006 GROUP BY or ORDER BY different tables will force a temp table and filesort.
# WARN CLA.006 GROUP BY or ORDER BY on different tables.
select id from tbl1 join tbl2 using (a) group by tbl1.id, tbl2.id
# Profile

View File

@@ -1,6 +1,6 @@
# Query ID 0xBA2547D924C5140D at byte 0
# WARN CLA.007 ORDER BY different directions prevents index from being used.
# WARN CLA.007 ORDER BY clauses that sort the results in different directions prevents indexes from being used.
select c1, c2 from t where i=1 order by c1 desc, c2 asc
# Profile