mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-01 18:25:59 +00:00
241 lines
8.8 KiB
Perl
241 lines
8.8 KiB
Perl
# This program is copyright 2010-2011 Percona Ireland Ltd.
|
|
# Feedback and improvements are welcome.
|
|
#
|
|
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
|
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
|
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify it under
|
|
# the terms of the GNU General Public License as published by the Free Software
|
|
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
|
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
|
# licenses.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along with
|
|
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
|
# Place, Suite 330, Boston, MA 02111-1307 USA.
|
|
# ###########################################################################
|
|
# ExplainAnalyzer package
|
|
# ###########################################################################
|
|
{
|
|
# Package: ExplainAnalyzer
|
|
# ExplainAnalyzer contains utility methods for getting and manipulating
|
|
# EXPLAIN data. It also has methods to save and retrieve information,
|
|
# so it actually has state itself if used in this way -- it is not a data-less
|
|
# collection of methods.
|
|
package ExplainAnalyzer;
|
|
|
|
use strict;
|
|
use warnings FATAL => 'all';
|
|
use English qw(-no_match_vars);
|
|
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
|
|
|
use Data::Dumper;
|
|
$Data::Dumper::Indent = 1;
|
|
$Data::Dumper::Sortkeys = 1;
|
|
$Data::Dumper::Quotekeys = 0;
|
|
|
|
sub new {
|
|
my ( $class, %args ) = @_;
|
|
foreach my $arg ( qw(QueryRewriter QueryParser) ) {
|
|
die "I need a $arg argument" unless defined $args{$arg};
|
|
}
|
|
my $self = {
|
|
%args,
|
|
};
|
|
return bless $self, $class;
|
|
}
|
|
|
|
# Gets an EXPLAIN plan for a query. The arguments are:
|
|
# dbh The $dbh, which should already have the correct default database. This
|
|
# module does not run USE to select a default database.
|
|
# query The query text.
|
|
# The return value is an arrayref of hash references gotten from EXPLAIN. If
|
|
# the query is not a SELECT, we try to convert it into one.
|
|
sub explain_query {
|
|
my ( $self, %args ) = @_;
|
|
foreach my $arg ( qw(dbh query) ) {
|
|
die "I need a $arg argument" unless defined $args{$arg};
|
|
}
|
|
my ($query, $dbh) = @args{qw(query dbh)};
|
|
$query = $self->{QueryRewriter}->convert_to_select($query);
|
|
if ( $query !~ m/^\s*select/i ) {
|
|
PTDEBUG && _d("Cannot EXPLAIN non-SELECT query:",
|
|
(length $query <= 100 ? $query : substr($query, 0, 100) . "..."));
|
|
return;
|
|
}
|
|
my $sql = "EXPLAIN $query";
|
|
PTDEBUG && _d($dbh, $sql);
|
|
my $explain = $dbh->selectall_arrayref($sql, { Slice => {} });
|
|
PTDEBUG && _d("Result of EXPLAIN:", Dumper($explain));
|
|
return $explain;
|
|
}
|
|
|
|
# Normalizes raw EXPLAIN into a format that's easier to work with. For example,
|
|
# the Extra column is parsed into a hash. Accepts the output of explain_query()
|
|
# as input.
|
|
sub normalize {
|
|
my ( $self, $explain ) = @_;
|
|
my @result; # Don't modify the input.
|
|
|
|
foreach my $row ( @$explain ) {
|
|
$row = { %$row }; # Make a copy -- don't modify the input.
|
|
|
|
# Several of the columns are really arrays of values in many cases. For
|
|
# example, the "key" column has an array when there is an index merge.
|
|
foreach my $col ( qw(key possible_keys key_len ref) ) {
|
|
$row->{$col} = [ split(/,/, $row->{$col} || '') ];
|
|
}
|
|
|
|
# Handle the Extra column. Parse it into a hash by splitting on
|
|
# semicolons. There are many special cases to handle.
|
|
$row->{Extra} = {
|
|
map {
|
|
my $var = $_;
|
|
|
|
# Index merge query plans have an array of indexes to split up.
|
|
if ( my ($key, $vals) = $var =~ m/(Using union)\(([^)]+)\)/ ) {
|
|
$key => [ split(/,/, $vals) ];
|
|
}
|
|
|
|
# The default is just "this key/characteristic/flag exists."
|
|
else {
|
|
$var => 1;
|
|
}
|
|
}
|
|
split(/; /, $row->{Extra} || '') # Split on semicolons.
|
|
};
|
|
|
|
push @result, $row;
|
|
}
|
|
|
|
return \@result;
|
|
}
|
|
|
|
# Trims down alternate indexes to those that were truly alternates (were not
|
|
# actually used). For example, if key = 'foo' and possible_keys = 'foo,bar',
|
|
# then foo isn't an alternate index, only bar is. The arguments are arrayrefs,
|
|
# and the return value is an arrayref too.
|
|
sub get_alternate_indexes {
|
|
my ( $self, $keys, $possible_keys ) = @_;
|
|
my %used = map { $_ => 1 } @$keys;
|
|
return [ grep { !$used{$_} } @$possible_keys ];
|
|
}
|
|
|
|
# Returns a data structure that shows which indexes were used and considered for
|
|
# a given query and EXPLAIN plan. Input parameters are:
|
|
# query The SQL of the query.
|
|
# db The default database. When a table's database is not explicitly
|
|
# qualified in the SQL itself, it defaults to this (optional) value.
|
|
# explain The normalized EXPLAIN plan: the output from $self->normalize().
|
|
# The return value is an arrayref of hashrefs, one per row in the query. Each
|
|
# hashref has the following structure:
|
|
# db => The database of the table in question
|
|
# tbl => The table that was accessed
|
|
# idx => An arrayref of indexes accessed in this table
|
|
# alt => An arrayref of indexes considered but not accessed
|
|
sub get_index_usage {
|
|
my ( $self, %args ) = @_;
|
|
foreach my $arg ( qw(query explain) ) {
|
|
die "I need a $arg argument" unless defined $args{$arg};
|
|
}
|
|
my ($query, $explain) = @args{qw(query explain)};
|
|
my @result;
|
|
|
|
# First we must get a lookup data structure to translate the possibly aliased
|
|
# names back into real table names.
|
|
my $lookup = $self->{QueryParser}->get_aliases($query);
|
|
|
|
foreach my $row ( @$explain ) {
|
|
|
|
# Filter out any row that doesn't access a (real) table. However, a row
|
|
# that accesses a table but not an index is still interesting, so we do
|
|
# not filter that out.
|
|
next if !defined $row->{table}
|
|
# Tables named like <union1,2> are just internal temp tables, not real
|
|
# tables that we can analyze.
|
|
|| $row->{table} =~ m/^<(derived|union)\d/;
|
|
|
|
my $table = $lookup->{TABLE}->{$row->{table}} || $row->{table};
|
|
my $db = $lookup->{DATABASE}->{$table} || $args{db};
|
|
push @result, {
|
|
db => $db,
|
|
tbl => $table,
|
|
idx => $row->{key},
|
|
alt => $self->get_alternate_indexes(
|
|
$row->{key}, $row->{possible_keys}),
|
|
};
|
|
}
|
|
|
|
PTDEBUG && _d("Index usage for",
|
|
(length $query <= 100 ? $query : substr($query, 0, 100) . "..."),
|
|
":", Dumper(\@result));
|
|
return \@result;
|
|
}
|
|
|
|
# This method retrieves information about how a query uses indexes, if it
|
|
# has been saved through save_usage_for(). It is basically a cache for
|
|
# remembering "oh, I've seen exactly this query before. No need to re-EXPLAIN
|
|
# and all that stuff." The information returned is in the same form as that of
|
|
# get_index_usage(). If no usage has been saved for the arguments, the return
|
|
# value is undef. The arguments are:
|
|
# - The query's checksum (not the fingerprint's checksum)
|
|
# - The database connection's default database. If a query is run against two
|
|
# different databases, it might use different tables and indexes.
|
|
sub get_usage_for {
|
|
my ( $self, $checksum, $db ) = @_;
|
|
die "I need a checksum and db" unless defined $checksum && defined $db;
|
|
my $usage;
|
|
if ( exists $self->{usage}->{$db} # Don't auto-vivify
|
|
&& exists $self->{usage}->{$db}->{$checksum} )
|
|
{
|
|
$usage = $self->{usage}->{$db}->{$checksum};
|
|
}
|
|
PTDEBUG && _d("Usage for",
|
|
(length $checksum <= 100 ? $checksum : substr($checksum, 0, 100) . "..."),
|
|
"on", $db, ":", Dumper($usage));
|
|
return $usage;
|
|
}
|
|
|
|
# This methods saves the query's index usage patterns for later retrieval with
|
|
# get_usage_for(). See that method for an explanation of the arguments.
|
|
sub save_usage_for {
|
|
my ( $self, $checksum, $db, $usage ) = @_;
|
|
die "I need a checksum and db" unless defined $checksum && defined $db;
|
|
$self->{usage}->{$db}->{$checksum} = $usage;
|
|
}
|
|
|
|
# Sub: fingerprint
|
|
# Create a fingerprint of EXPLAIN data from <normalize()>.
|
|
#
|
|
# Parameters:
|
|
# %args - Arguments
|
|
#
|
|
# Required Arguments:
|
|
# explain - Hashref of normalized EXPLAIN data
|
|
#
|
|
# Returns:
|
|
# Fingerprint string
|
|
sub fingerprint {
|
|
my ( $self, %args ) = @_;
|
|
my @required_args = qw(explain);
|
|
foreach my $arg ( @required_args ) {
|
|
die "I need a $arg argument" unless defined $args{$arg};
|
|
}
|
|
my ($explain) = @args{@required_args};
|
|
}
|
|
|
|
sub _d {
|
|
my ($package, undef, $line) = caller 0;
|
|
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
|
map { defined $_ ? $_ : 'undef' }
|
|
@_;
|
|
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
|
}
|
|
|
|
1;
|
|
}
|
|
# ###########################################################################
|
|
# End ExplainAnalyzer package
|
|
# ###########################################################################
|