mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-02 18:45:57 +00:00
516 lines
17 KiB
Perl
516 lines
17 KiB
Perl
# This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Inc.
|
|
# Feedback and improvements are welcome.
|
|
#
|
|
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
|
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
|
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify it under
|
|
# the terms of the GNU General Public License as published by the Free Software
|
|
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
|
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
|
# licenses.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along with
|
|
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
|
# Place, Suite 330, Boston, MA 02111-1307 USA.
|
|
# ###########################################################################
|
|
# TableParser package
|
|
# ###########################################################################
|
|
{
|
|
# Package: TableParser
|
|
# TableParser parses SHOW CREATE TABLE.
|
|
#
|
|
# Several subs in this module require either a $ddl or $tbl param.
|
|
#
|
|
# $ddl is the return value from MySQLDump::get_create_table() (which returns
|
|
# the output of SHOW CREATE TALBE).
|
|
#
|
|
# $tbl is the return value from the sub below, parse().
|
|
#
|
|
# And some subs have an optional $opts param which is a hashref of options.
|
|
# $opts->{mysql_version} is typically used, which is the return value from
|
|
# VersionParser::parser() (which returns a zero-padded MySQL version,
|
|
# e.g. 004001000 for 4.1.0).
|
|
package TableParser;
|
|
|
|
use strict;
|
|
use warnings FATAL => 'all';
|
|
use English qw(-no_match_vars);
|
|
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
|
|
|
|
use Data::Dumper;
|
|
$Data::Dumper::Indent = 1;
|
|
$Data::Dumper::Sortkeys = 1;
|
|
$Data::Dumper::Quotekeys = 0;
|
|
|
|
sub new {
|
|
my ( $class, %args ) = @_;
|
|
my @required_args = qw(Quoter);
|
|
foreach my $arg ( @required_args ) {
|
|
die "I need a $arg argument" unless $args{$arg};
|
|
}
|
|
my $self = { %args };
|
|
return bless $self, $class;
|
|
}
|
|
|
|
# Sub: parse
|
|
# Parse SHOW CREATE TABLE.
|
|
#
|
|
# Returns:
|
|
# Hashref of table structure
|
|
sub parse {
|
|
my ( $self, $ddl, $opts ) = @_;
|
|
return unless $ddl;
|
|
if ( ref $ddl eq 'ARRAY' ) {
|
|
if ( lc $ddl->[0] eq 'table' ) {
|
|
$ddl = $ddl->[1];
|
|
}
|
|
else {
|
|
return {
|
|
engine => 'VIEW',
|
|
};
|
|
}
|
|
}
|
|
|
|
if ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) {
|
|
die "Cannot parse table definition; is ANSI quoting "
|
|
. "enabled or SQL_QUOTE_SHOW_CREATE disabled?";
|
|
}
|
|
|
|
my ($name) = $ddl =~ m/CREATE (?:TEMPORARY )?TABLE\s+(`.+?`)/;
|
|
(undef, $name) = $self->{Quoter}->split_unquote($name) if $name;
|
|
|
|
# Lowercase identifiers to avoid issues with case-sensitivity in Perl.
|
|
# (Bug #1910276).
|
|
$ddl =~ s/(`[^`]+`)/\L$1/g;
|
|
|
|
my $engine = $self->get_engine($ddl);
|
|
|
|
my @defs = $ddl =~ m/^(\s+`.*?),?$/gm;
|
|
my @cols = map { $_ =~ m/`([^`]+)`/ } @defs;
|
|
MKDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols));
|
|
|
|
# Save the column definitions *exactly*
|
|
my %def_for;
|
|
@def_for{@cols} = @defs;
|
|
|
|
# Find column types, whether numeric, whether nullable, whether
|
|
# auto-increment.
|
|
my (@nums, @null);
|
|
my (%type_for, %is_nullable, %is_numeric, %is_autoinc);
|
|
foreach my $col ( @cols ) {
|
|
my $def = $def_for{$col};
|
|
my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/;
|
|
die "Can't determine column type for $def" unless $type;
|
|
$type_for{$col} = $type;
|
|
if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) {
|
|
push @nums, $col;
|
|
$is_numeric{$col} = 1;
|
|
}
|
|
if ( $def !~ m/NOT NULL/ ) {
|
|
push @null, $col;
|
|
$is_nullable{$col} = 1;
|
|
}
|
|
$is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0;
|
|
}
|
|
|
|
# TODO: passing is_nullable this way is just a quick hack. Ultimately,
|
|
# we probably should decompose this sub further, taking out the block
|
|
# above that parses col props like nullability, auto_inc, type, etc.
|
|
my ($keys, $clustered_key) = $self->get_keys($ddl, $opts, \%is_nullable);
|
|
|
|
my ($charset) = $ddl =~ m/DEFAULT CHARSET=(\w+)/;
|
|
|
|
return {
|
|
name => $name,
|
|
cols => \@cols,
|
|
col_posn => { map { $cols[$_] => $_ } 0..$#cols },
|
|
is_col => { map { $_ => 1 } @cols },
|
|
null_cols => \@null,
|
|
is_nullable => \%is_nullable,
|
|
is_autoinc => \%is_autoinc,
|
|
clustered_key => $clustered_key,
|
|
keys => $keys,
|
|
defs => \%def_for,
|
|
numeric_cols => \@nums,
|
|
is_numeric => \%is_numeric,
|
|
engine => $engine,
|
|
type_for => \%type_for,
|
|
charset => $charset,
|
|
};
|
|
}
|
|
|
|
# Sorts indexes in this order: PRIMARY, unique, non-nullable, any (shortest
|
|
# first, alphabetical). Only BTREE indexes are considered.
|
|
# TODO: consider length as # of bytes instead of # of columns.
|
|
sub sort_indexes {
|
|
my ( $self, $tbl ) = @_;
|
|
|
|
my @indexes
|
|
= sort {
|
|
(($a ne 'PRIMARY') <=> ($b ne 'PRIMARY'))
|
|
|| ( !$tbl->{keys}->{$a}->{is_unique} <=> !$tbl->{keys}->{$b}->{is_unique} )
|
|
|| ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} )
|
|
|| ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) )
|
|
}
|
|
grep {
|
|
$tbl->{keys}->{$_}->{type} eq 'BTREE'
|
|
}
|
|
sort keys %{$tbl->{keys}};
|
|
|
|
MKDEBUG && _d('Indexes sorted best-first:', join(', ', @indexes));
|
|
return @indexes;
|
|
}
|
|
|
|
# Finds the 'best' index; if the user specifies one, dies if it's not in the
|
|
# table.
|
|
sub find_best_index {
|
|
my ( $self, $tbl, $index ) = @_;
|
|
my $best;
|
|
if ( $index ) {
|
|
($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}};
|
|
}
|
|
if ( !$best ) {
|
|
if ( $index ) {
|
|
# The user specified an index, so we can't choose our own.
|
|
die "Index '$index' does not exist in table";
|
|
}
|
|
else {
|
|
# Try to pick the best index.
|
|
# TODO: eliminate indexes that have column prefixes.
|
|
($best) = $self->sort_indexes($tbl);
|
|
}
|
|
}
|
|
MKDEBUG && _d('Best index found is', $best);
|
|
return $best;
|
|
}
|
|
|
|
# Takes a dbh, database, table, quoter, and WHERE clause, and reports the
|
|
# indexes MySQL thinks are best for EXPLAIN SELECT * FROM that table. If no
|
|
# WHERE, just returns an empty list. If no possible_keys, returns empty list,
|
|
# even if 'key' is not null. Only adds 'key' to the list if it's included in
|
|
# possible_keys.
|
|
sub find_possible_keys {
|
|
my ( $self, $dbh, $database, $table, $quoter, $where ) = @_;
|
|
return () unless $where;
|
|
my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table)
|
|
. ' WHERE ' . $where;
|
|
MKDEBUG && _d($sql);
|
|
my $expl = $dbh->selectrow_hashref($sql);
|
|
# Normalize columns to lowercase
|
|
$expl = { map { lc($_) => $expl->{$_} } keys %$expl };
|
|
if ( $expl->{possible_keys} ) {
|
|
MKDEBUG && _d('possible_keys =', $expl->{possible_keys});
|
|
my @candidates = split(',', $expl->{possible_keys});
|
|
my %possible = map { $_ => 1 } @candidates;
|
|
if ( $expl->{key} ) {
|
|
MKDEBUG && _d('MySQL chose', $expl->{key});
|
|
unshift @candidates, grep { $possible{$_} } split(',', $expl->{key});
|
|
MKDEBUG && _d('Before deduping:', join(', ', @candidates));
|
|
my %seen;
|
|
@candidates = grep { !$seen{$_}++ } @candidates;
|
|
}
|
|
MKDEBUG && _d('Final list:', join(', ', @candidates));
|
|
return @candidates;
|
|
}
|
|
else {
|
|
MKDEBUG && _d('No keys in possible_keys');
|
|
return ();
|
|
}
|
|
}
|
|
|
|
# Required args:
|
|
# * dbh dbh: active dbh
|
|
# * db scalar: database name to check
|
|
# * tbl scalar: table name to check
|
|
# Optional args:
|
|
# * all_privs bool: check for all privs (select,insert,update,delete)
|
|
# Returns: bool
|
|
# Can die: no
|
|
# check_table() checks the given table for certain criteria and returns
|
|
# true if all criteria are found, else it returns false. The existence
|
|
# of the table is always checked; if no optional args are given, then this
|
|
# is the only check. Any error causes a false return value (e.g. if the
|
|
# table is crashed).
|
|
sub check_table {
|
|
my ( $self, %args ) = @_;
|
|
my @required_args = qw(dbh db tbl);
|
|
foreach my $arg ( @required_args ) {
|
|
die "I need a $arg argument" unless $args{$arg};
|
|
}
|
|
my ($dbh, $db, $tbl) = @args{@required_args};
|
|
my $q = $self->{Quoter};
|
|
my $db_tbl = $q->quote($db, $tbl);
|
|
MKDEBUG && _d('Checking', $db_tbl);
|
|
|
|
my $sql = "SHOW TABLES FROM " . $q->quote($db)
|
|
. ' LIKE ' . $q->literal_like($tbl);
|
|
MKDEBUG && _d($sql);
|
|
my $row;
|
|
eval {
|
|
$row = $dbh->selectrow_arrayref($sql);
|
|
};
|
|
if ( $EVAL_ERROR ) {
|
|
MKDEBUG && _d($EVAL_ERROR);
|
|
return 0;
|
|
}
|
|
if ( !$row->[0] || $row->[0] ne $tbl ) {
|
|
MKDEBUG && _d('Table does not exist');
|
|
return 0;
|
|
}
|
|
|
|
# Table exists, return true unless we have privs to check.
|
|
MKDEBUG && _d('Table exists; no privs to check');
|
|
return 1 unless $args{all_privs};
|
|
|
|
# Get privs select,insert,update.
|
|
$sql = "SHOW FULL COLUMNS FROM $db_tbl";
|
|
MKDEBUG && _d($sql);
|
|
eval {
|
|
$row = $dbh->selectrow_hashref($sql);
|
|
};
|
|
if ( $EVAL_ERROR ) {
|
|
MKDEBUG && _d($EVAL_ERROR);
|
|
return 0;
|
|
}
|
|
if ( !scalar keys %$row ) {
|
|
# This should never happen.
|
|
MKDEBUG && _d('Table has no columns:', Dumper($row));
|
|
return 0;
|
|
}
|
|
my $privs = $row->{privileges} || $row->{Privileges};
|
|
|
|
# Get delete priv since FULL COLUMNS doesn't show it.
|
|
$sql = "DELETE FROM $db_tbl LIMIT 0";
|
|
MKDEBUG && _d($sql);
|
|
eval {
|
|
$dbh->do($sql);
|
|
};
|
|
my $can_delete = $EVAL_ERROR ? 0 : 1;
|
|
|
|
MKDEBUG && _d('User privs on', $db_tbl, ':', $privs,
|
|
($can_delete ? 'delete' : ''));
|
|
|
|
# Check that we have all privs.
|
|
if ( !($privs =~ m/select/ && $privs =~ m/insert/ && $privs =~ m/update/
|
|
&& $can_delete) ) {
|
|
MKDEBUG && _d('User does not have all privs');
|
|
return 0;
|
|
}
|
|
|
|
MKDEBUG && _d('User has all privs');
|
|
return 1;
|
|
}
|
|
|
|
sub get_engine {
|
|
my ( $self, $ddl, $opts ) = @_;
|
|
my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;
|
|
MKDEBUG && _d('Storage engine:', $engine);
|
|
return $engine || undef;
|
|
}
|
|
|
|
# $ddl is a SHOW CREATE TABLE returned from MySQLDumper::get_create_table().
|
|
# The general format of a key is
|
|
# [FOREIGN|UNIQUE|PRIMARY|FULLTEXT|SPATIAL] KEY `name` [USING BTREE|HASH] (`cols`).
|
|
# Returns a hashref of keys and their properties and the clustered key (if
|
|
# the engine is InnoDB):
|
|
# {
|
|
# key => {
|
|
# type => BTREE, FULLTEXT or SPATIAL
|
|
# name => column name, like: "foo_key"
|
|
# colnames => original col def string, like: "(`a`,`b`)"
|
|
# cols => arrayref containing the col names, like: [qw(a b)]
|
|
# col_prefixes => arrayref containing any col prefixes (parallels cols)
|
|
# is_unique => 1 if the col is UNIQUE or PRIMARY
|
|
# is_nullable => true (> 0) if one or more col can be NULL
|
|
# is_col => hashref with key for each col=>1
|
|
# ddl => original key def string
|
|
# },
|
|
# },
|
|
# 'PRIMARY', # clustered key
|
|
#
|
|
# Foreign keys are ignored; use get_fks() instead.
|
|
sub get_keys {
|
|
my ( $self, $ddl, $opts, $is_nullable ) = @_;
|
|
my $engine = $self->get_engine($ddl);
|
|
my $keys = {};
|
|
my $clustered_key = undef;
|
|
|
|
KEY:
|
|
foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) {
|
|
|
|
# If you want foreign keys, use get_fks() below.
|
|
next KEY if $key =~ m/FOREIGN/;
|
|
|
|
my $key_ddl = $key;
|
|
MKDEBUG && _d('Parsed key:', $key_ddl);
|
|
|
|
# Make allowances for HASH bugs in SHOW CREATE TABLE. A non-MEMORY table
|
|
# will report its index as USING HASH even when this is not supported.
|
|
# The true type should be BTREE. See
|
|
# http://bugs.mysql.com/bug.php?id=22632
|
|
if ( $engine !~ m/MEMORY|HEAP/ ) {
|
|
$key =~ s/USING HASH/USING BTREE/;
|
|
}
|
|
|
|
# Determine index type
|
|
my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/;
|
|
my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/;
|
|
$type = $type || $special || 'BTREE';
|
|
if ( $opts->{mysql_version} && $opts->{mysql_version} lt '004001000'
|
|
&& $engine =~ m/HEAP|MEMORY/i )
|
|
{
|
|
$type = 'HASH'; # MySQL pre-4.1 supports only HASH indexes on HEAP
|
|
}
|
|
|
|
my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/;
|
|
my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0;
|
|
my @cols;
|
|
my @col_prefixes;
|
|
foreach my $col_def ( $cols =~ m/`[^`]+`(?:\(\d+\))?/g ) {
|
|
# Parse columns of index including potential column prefixes
|
|
# E.g.: `a`,`b`(20)
|
|
my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/;
|
|
push @cols, $name;
|
|
push @col_prefixes, $prefix;
|
|
}
|
|
$name =~ s/`//g;
|
|
|
|
MKDEBUG && _d( $name, 'key cols:', join(', ', map { "`$_`" } @cols));
|
|
|
|
$keys->{$name} = {
|
|
name => $name,
|
|
type => $type,
|
|
colnames => $cols,
|
|
cols => \@cols,
|
|
col_prefixes => \@col_prefixes,
|
|
is_unique => $unique,
|
|
is_nullable => scalar(grep { $is_nullable->{$_} } @cols),
|
|
is_col => { map { $_ => 1 } @cols },
|
|
ddl => $key_ddl,
|
|
};
|
|
|
|
# Find clustered key (issue 295).
|
|
if ( $engine =~ m/InnoDB/i && !$clustered_key ) {
|
|
my $this_key = $keys->{$name};
|
|
if ( $this_key->{name} eq 'PRIMARY' ) {
|
|
$clustered_key = 'PRIMARY';
|
|
}
|
|
elsif ( $this_key->{is_unique} && !$this_key->{is_nullable} ) {
|
|
$clustered_key = $this_key->{name};
|
|
}
|
|
MKDEBUG && $clustered_key && _d('This key is the clustered key');
|
|
}
|
|
}
|
|
|
|
return $keys, $clustered_key;
|
|
}
|
|
|
|
# Like get_keys() above but only returns a hash of foreign keys.
|
|
sub get_fks {
|
|
my ( $self, $ddl, $opts ) = @_;
|
|
my $q = $self->{Quoter};
|
|
my $fks = {};
|
|
|
|
foreach my $fk (
|
|
$ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg )
|
|
{
|
|
my ( $name ) = $fk =~ m/CONSTRAINT `(.*?)`/;
|
|
my ( $cols ) = $fk =~ m/FOREIGN KEY \(([^\)]+)\)/;
|
|
my ( $parent, $parent_cols ) = $fk =~ m/REFERENCES (\S+) \(([^\)]+)\)/;
|
|
|
|
my ($db, $tbl) = $q->split_unquote($parent, $opts->{database});
|
|
my %parent_tbl = (tbl => $tbl);
|
|
$parent_tbl{db} = $db if $db;
|
|
|
|
if ( $parent !~ m/\./ && $opts->{database} ) {
|
|
$parent = $q->quote($opts->{database}) . ".$parent";
|
|
}
|
|
|
|
$fks->{$name} = {
|
|
name => $name,
|
|
colnames => $cols,
|
|
cols => [ map { s/[ `]+//g; $_; } split(',', $cols) ],
|
|
parent_tbl => \%parent_tbl,
|
|
parent_tblname => $parent,
|
|
parent_cols => [ map { s/[ `]+//g; $_; } split(',', $parent_cols) ],
|
|
parent_colnames=> $parent_cols,
|
|
ddl => $fk,
|
|
};
|
|
}
|
|
|
|
return $fks;
|
|
}
|
|
|
|
# Removes the AUTO_INCREMENT property from the end of SHOW CREATE TABLE. A
|
|
# sample:
|
|
# ) ENGINE=InnoDB AUTO_INCREMENT=201 DEFAULT CHARSET=utf8;
|
|
sub remove_auto_increment {
|
|
my ( $self, $ddl ) = @_;
|
|
$ddl =~ s/(^\).*?) AUTO_INCREMENT=\d+\b/$1/m;
|
|
return $ddl;
|
|
}
|
|
|
|
sub remove_secondary_indexes {
|
|
my ( $self, $ddl ) = @_;
|
|
my $sec_indexes_ddl;
|
|
my $tbl_struct = $self->parse($ddl);
|
|
|
|
if ( ($tbl_struct->{engine} || '') =~ m/InnoDB/i ) {
|
|
my $clustered_key = $tbl_struct->{clustered_key};
|
|
$clustered_key ||= '';
|
|
|
|
my @sec_indexes = map {
|
|
# Remove key from CREATE TABLE ddl.
|
|
my $key_def = $_->{ddl};
|
|
# Escape ( ) in the key def so Perl treats them literally.
|
|
$key_def =~ s/([\(\)])/\\$1/g;
|
|
$ddl =~ s/\s+$key_def//i;
|
|
|
|
my $key_ddl = "ADD $_->{ddl}";
|
|
# Last key in table won't have trailing comma, but since
|
|
# we're iterating through a hash the last key may not be
|
|
# the last in the list we're creating.
|
|
# http://code.google.com/p/maatkit/issues/detail?id=833
|
|
$key_ddl .= ',' unless $key_ddl =~ m/,$/;
|
|
$key_ddl;
|
|
}
|
|
grep { $_->{name} ne $clustered_key }
|
|
values %{$tbl_struct->{keys}};
|
|
MKDEBUG && _d('Secondary indexes:', Dumper(\@sec_indexes));
|
|
|
|
if ( @sec_indexes ) {
|
|
$sec_indexes_ddl = join(' ', @sec_indexes);
|
|
$sec_indexes_ddl =~ s/,$//;
|
|
}
|
|
|
|
# Remove trailing comma on last key. Cases like:
|
|
# PK,
|
|
# KEY,
|
|
# ) ENGINE=...
|
|
# will leave a trailing comma on PK.
|
|
$ddl =~ s/,(\n\) )/$1/s;
|
|
}
|
|
else {
|
|
MKDEBUG && _d('Not removing secondary indexes from',
|
|
$tbl_struct->{engine}, 'table');
|
|
}
|
|
|
|
return $ddl, $sec_indexes_ddl, $tbl_struct;
|
|
}
|
|
|
|
sub _d {
|
|
my ($package, undef, $line) = caller 0;
|
|
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
|
map { defined $_ ? $_ : 'undef' }
|
|
@_;
|
|
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
|
}
|
|
|
|
1;
|
|
}
|
|
# ###########################################################################
|
|
# End TableParser package
|
|
# ###########################################################################
|