Files
percona-toolkit/lib/ExplainAnalyzer.pm
2013-01-31 14:52:34 -03:00

241 lines
8.8 KiB
Perl

# This program is copyright 2010-2011 Percona Ireland Ltd.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA.
# ###########################################################################
# ExplainAnalyzer package
# ###########################################################################
{
# Package: ExplainAnalyzer
# ExplainAnalyzer contains utility methods for getting and manipulating
# EXPLAIN data. It also has methods to save and retrieve information,
# so it actually has state itself if used in this way -- it is not a data-less
# collection of methods.
package ExplainAnalyzer;
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Quotekeys = 0;
sub new {
my ( $class, %args ) = @_;
foreach my $arg ( qw(QueryRewriter QueryParser) ) {
die "I need a $arg argument" unless defined $args{$arg};
}
my $self = {
%args,
};
return bless $self, $class;
}
# Gets an EXPLAIN plan for a query. The arguments are:
# dbh The $dbh, which should already have the correct default database. This
# module does not run USE to select a default database.
# query The query text.
# The return value is an arrayref of hash references gotten from EXPLAIN. If
# the query is not a SELECT, we try to convert it into one.
sub explain_query {
my ( $self, %args ) = @_;
foreach my $arg ( qw(dbh query) ) {
die "I need a $arg argument" unless defined $args{$arg};
}
my ($query, $dbh) = @args{qw(query dbh)};
$query = $self->{QueryRewriter}->convert_to_select($query);
if ( $query !~ m/^\s*select/i ) {
PTDEBUG && _d("Cannot EXPLAIN non-SELECT query:",
(length $query <= 100 ? $query : substr($query, 0, 100) . "..."));
return;
}
my $sql = "EXPLAIN $query";
PTDEBUG && _d($dbh, $sql);
my $explain = $dbh->selectall_arrayref($sql, { Slice => {} });
PTDEBUG && _d("Result of EXPLAIN:", Dumper($explain));
return $explain;
}
# Normalizes raw EXPLAIN into a format that's easier to work with. For example,
# the Extra column is parsed into a hash. Accepts the output of explain_query()
# as input.
sub normalize {
my ( $self, $explain ) = @_;
my @result; # Don't modify the input.
foreach my $row ( @$explain ) {
$row = { %$row }; # Make a copy -- don't modify the input.
# Several of the columns are really arrays of values in many cases. For
# example, the "key" column has an array when there is an index merge.
foreach my $col ( qw(key possible_keys key_len ref) ) {
$row->{$col} = [ split(/,/, $row->{$col} || '') ];
}
# Handle the Extra column. Parse it into a hash by splitting on
# semicolons. There are many special cases to handle.
$row->{Extra} = {
map {
my $var = $_;
# Index merge query plans have an array of indexes to split up.
if ( my ($key, $vals) = $var =~ m/(Using union)\(([^)]+)\)/ ) {
$key => [ split(/,/, $vals) ];
}
# The default is just "this key/characteristic/flag exists."
else {
$var => 1;
}
}
split(/; /, $row->{Extra} || '') # Split on semicolons.
};
push @result, $row;
}
return \@result;
}
# Trims down alternate indexes to those that were truly alternates (were not
# actually used). For example, if key = 'foo' and possible_keys = 'foo,bar',
# then foo isn't an alternate index, only bar is. The arguments are arrayrefs,
# and the return value is an arrayref too.
sub get_alternate_indexes {
my ( $self, $keys, $possible_keys ) = @_;
my %used = map { $_ => 1 } @$keys;
return [ grep { !$used{$_} } @$possible_keys ];
}
# Returns a data structure that shows which indexes were used and considered for
# a given query and EXPLAIN plan. Input parameters are:
# query The SQL of the query.
# db The default database. When a table's database is not explicitly
# qualified in the SQL itself, it defaults to this (optional) value.
# explain The normalized EXPLAIN plan: the output from $self->normalize().
# The return value is an arrayref of hashrefs, one per row in the query. Each
# hashref has the following structure:
# db => The database of the table in question
# tbl => The table that was accessed
# idx => An arrayref of indexes accessed in this table
# alt => An arrayref of indexes considered but not accessed
sub get_index_usage {
my ( $self, %args ) = @_;
foreach my $arg ( qw(query explain) ) {
die "I need a $arg argument" unless defined $args{$arg};
}
my ($query, $explain) = @args{qw(query explain)};
my @result;
# First we must get a lookup data structure to translate the possibly aliased
# names back into real table names.
my $lookup = $self->{QueryParser}->get_aliases($query);
foreach my $row ( @$explain ) {
# Filter out any row that doesn't access a (real) table. However, a row
# that accesses a table but not an index is still interesting, so we do
# not filter that out.
next if !defined $row->{table}
# Tables named like <union1,2> are just internal temp tables, not real
# tables that we can analyze.
|| $row->{table} =~ m/^<(derived|union)\d/;
my $table = $lookup->{TABLE}->{$row->{table}} || $row->{table};
my $db = $lookup->{DATABASE}->{$table} || $args{db};
push @result, {
db => $db,
tbl => $table,
idx => $row->{key},
alt => $self->get_alternate_indexes(
$row->{key}, $row->{possible_keys}),
};
}
PTDEBUG && _d("Index usage for",
(length $query <= 100 ? $query : substr($query, 0, 100) . "..."),
":", Dumper(\@result));
return \@result;
}
# This method retrieves information about how a query uses indexes, if it
# has been saved through save_usage_for(). It is basically a cache for
# remembering "oh, I've seen exactly this query before. No need to re-EXPLAIN
# and all that stuff." The information returned is in the same form as that of
# get_index_usage(). If no usage has been saved for the arguments, the return
# value is undef. The arguments are:
# - The query's checksum (not the fingerprint's checksum)
# - The database connection's default database. If a query is run against two
# different databases, it might use different tables and indexes.
sub get_usage_for {
my ( $self, $checksum, $db ) = @_;
die "I need a checksum and db" unless defined $checksum && defined $db;
my $usage;
if ( exists $self->{usage}->{$db} # Don't auto-vivify
&& exists $self->{usage}->{$db}->{$checksum} )
{
$usage = $self->{usage}->{$db}->{$checksum};
}
PTDEBUG && _d("Usage for",
(length $checksum <= 100 ? $checksum : substr($checksum, 0, 100) . "..."),
"on", $db, ":", Dumper($usage));
return $usage;
}
# This methods saves the query's index usage patterns for later retrieval with
# get_usage_for(). See that method for an explanation of the arguments.
sub save_usage_for {
my ( $self, $checksum, $db, $usage ) = @_;
die "I need a checksum and db" unless defined $checksum && defined $db;
$self->{usage}->{$db}->{$checksum} = $usage;
}
# Sub: fingerprint
# Create a fingerprint of EXPLAIN data from <normalize()>.
#
# Parameters:
# %args - Arguments
#
# Required Arguments:
# explain - Hashref of normalized EXPLAIN data
#
# Returns:
# Fingerprint string
sub fingerprint {
my ( $self, %args ) = @_;
my @required_args = qw(explain);
foreach my $arg ( @required_args ) {
die "I need a $arg argument" unless defined $args{$arg};
}
my ($explain) = @args{@required_args};
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;
}
# ###########################################################################
# End ExplainAnalyzer package
# ###########################################################################