# This program is copyright 2007-2011 Baron Schwartz, 2011 Percona Ireland Ltd. # Feedback and improvements are welcome. # # THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software # Foundation, version 2; OR the Perl Artistic License. On UNIX and similar # systems, you can issue `man perlgpl' or `man perlartistic' to read these # licenses. # # You should have received a copy of the GNU General Public License along with # this program; if not, write to the Free Software Foundation, Inc., 59 Temple # Place, Suite 330, Boston, MA 02111-1307 USA. # ########################################################################### # TableParser package # ########################################################################### { # Package: TableParser # TableParser parses SHOW CREATE TABLE. # # Several subs in this module require either a $ddl or $tbl param. # # $tbl is the return value from the sub below, parse(). # # And some subs have an optional $opts param which is a hashref of options. package TableParser; use strict; use warnings FATAL => 'all'; use English qw(-no_match_vars); use constant PTDEBUG => $ENV{PTDEBUG} || 0; use Data::Dumper; $Data::Dumper::Indent = 1; $Data::Dumper::Sortkeys = 1; $Data::Dumper::Quotekeys = 0; local $EVAL_ERROR; eval { require Quoter; }; sub new { my ( $class, %args ) = @_; my $self = { %args }; $self->{Quoter} ||= Quoter->new(); return bless $self, $class; } sub Quoter { shift->{Quoter} } sub get_create_table { my ( $self, $dbh, $db, $tbl ) = @_; die "I need a dbh parameter" unless $dbh; die "I need a db parameter" unless $db; die "I need a tbl parameter" unless $tbl; my $q = $self->{Quoter}; # To ensure a consistent output, we save the current (old) SQL mode, # then set it to the new SQL mode that what we need, which is the # default sql_mode=''. When done, even if an error occurs, we restore # the old SQL mode. The main thing is that we do not want ANSI_QUOTES # because there's code all throughout the tools that expect backtick ` # quoted idents, not double-quote " quoted idents. For example: # https://bugs.launchpad.net/percona-toolkit/+bug/1058285 my $new_sql_mode = q{/*!40101 SET @OLD_SQL_MODE := @@SQL_MODE, } . q{@@SQL_MODE := '', } . q{@OLD_QUOTE := @@SQL_QUOTE_SHOW_CREATE, } . q{@@SQL_QUOTE_SHOW_CREATE := 1 */}; my $old_sql_mode = q{/*!40101 SET @@SQL_MODE := @OLD_SQL_MODE, } . q{@@SQL_QUOTE_SHOW_CREATE := @OLD_QUOTE */}; # Set new SQL mode. PTDEBUG && _d($new_sql_mode); eval { $dbh->do($new_sql_mode); }; PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR); # Must USE the tbl's db because some bug with SHOW CREATE TABLE on a # view when the current db isn't the view's db causes MySQL to crash. my $use_sql = 'USE ' . $q->quote($db); PTDEBUG && _d($dbh, $use_sql); $dbh->do($use_sql); my $show_sql = "SHOW CREATE TABLE " . $q->quote($db, $tbl); PTDEBUG && _d($show_sql); my $href; eval { $href = $dbh->selectrow_hashref($show_sql); }; if ( my $e = $EVAL_ERROR ) { # Restore old SQL mode. PTDEBUG && _d($old_sql_mode); eval { $dbh->do($old_sql_mode); }; PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR); die $e; } # Restore old SQL mode. PTDEBUG && _d($old_sql_mode); eval { $dbh->do($old_sql_mode); }; PTDEBUG && $EVAL_ERROR && _d($EVAL_ERROR); # SHOW CREATE TABLE has at least 2 columns like: # mysql> show create table city\G # *************************** 1. row *************************** # Table: city # Create Table: CREATE TABLE `city` ( # `city_id` smallint(5) unsigned NOT NULL AUTO_INCREMENT, # ... # We want the second column. my ($key) = grep { m/create (?:table|view)/i } keys %$href; if ( !$key ) { die "Error: no 'Create Table' or 'Create View' in result set from " . "$show_sql: " . Dumper($href); } return $href->{$key}; } # Sub: parse # Parse SHOW CREATE TABLE. # # Returns: # Hashref of table structure sub parse { my ( $self, $ddl, $opts ) = @_; return unless $ddl; # If ANSI_QUOTES is enabled, we can't parse. But we can translate ANSI_QUOTES # into legacy quoting with backticks. The rules are: an identifier is # surrounded with the quote characters, and embedded quote characters are # doubled. if ( $ddl =~ m/CREATE (?:TEMPORARY )?TABLE "/ ) { $ddl = $self->ansi_to_legacy($ddl); } elsif ( $ddl !~ m/CREATE (?:TEMPORARY )?TABLE `/ ) { die "TableParser doesn't handle CREATE TABLE without quoting."; } my ($name) = $ddl =~ m/CREATE (?:TEMPORARY )?TABLE\s+(`.+?`)/; (undef, $name) = $self->{Quoter}->split_unquote($name) if $name; # Lowercase identifiers to avoid issues with case-sensitivity in Perl. # (Bug #1910276). $ddl =~ s/(`[^`\n]+`)/\L$1/gm; my $engine = $self->get_engine($ddl); my @defs = $ddl =~ m/(?:(?<=,\n)|(?<=\(\n))(\s+`(?:.|\n)+?`.+?),?\n/g; my @cols = map { $_ =~ m/`([^`]+)`/ } @defs; PTDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols)); # Save the column definitions *exactly* my %def_for; @def_for{@cols} = @defs; # Find column types, whether numeric, whether nullable, whether # auto-increment. my (@nums, @null, @non_generated); my (%type_for, %is_nullable, %is_numeric, %is_autoinc, %is_generated); foreach my $col ( @cols ) { my $def = $def_for{$col}; # Remove literal backticks (``) because they're superfluous for parsing # the col. # https://bugs.launchpad.net/percona-toolkit/+bug/1462904 $def =~ s/``//g; my ( $type ) = $def =~ m/`[^`]+`\s([a-z]+)/; die "Can't determine column type for $def" unless $type; $type_for{$col} = $type; if ( $type =~ m/(?:(?:tiny|big|medium|small)?int|float|double|decimal|year)/ ) { push @nums, $col; $is_numeric{$col} = 1; } if ( $def !~ m/NOT NULL/ ) { push @null, $col; $is_nullable{$col} = 1; } if ( remove_quoted_text($def) =~ m/\WGENERATED\W/i ) { $is_generated{$col} = 1; } else { push @non_generated, $col; } $is_autoinc{$col} = $def =~ m/AUTO_INCREMENT/i ? 1 : 0; } # TODO: passing is_nullable this way is just a quick hack. Ultimately, # we probably should decompose this sub further, taking out the block # above that parses col props like nullability, auto_inc, type, etc. my ($keys, $clustered_key) = $self->get_keys($ddl, $opts, \%is_nullable); my ($charset) = $ddl =~ m/DEFAULT CHARSET=(\w+)/; return { name => $name, cols => \@cols, col_posn => { map { $cols[$_] => $_ } 0..$#cols }, is_col => { map { $_ => 1 } @non_generated }, null_cols => \@null, is_nullable => \%is_nullable, non_generated_cols => \@non_generated, is_autoinc => \%is_autoinc, is_generated => \%is_generated, clustered_key => $clustered_key, keys => $keys, defs => \%def_for, numeric_cols => \@nums, is_numeric => \%is_numeric, engine => $engine, type_for => \%type_for, charset => $charset, }; } sub remove_quoted_text { my ($string) = @_; $string =~ s/\\['"]//g; $string =~ s/`[^`]*?`//g; $string =~ s/"[^"]*?"//g; $string =~ s/'[^']*?'//g; return $string; } # Sorts indexes in this order: PRIMARY, unique, non-nullable, any (shortest # first, alphabetical). Only BTREE indexes are considered. # TODO: consider length as # of bytes instead of # of columns. sub sort_indexes { my ( $self, $tbl ) = @_; my @indexes = sort { (($a ne 'PRIMARY') <=> ($b ne 'PRIMARY')) || ( !$tbl->{keys}->{$a}->{is_unique} <=> !$tbl->{keys}->{$b}->{is_unique} ) || ( $tbl->{keys}->{$a}->{is_nullable} <=> $tbl->{keys}->{$b}->{is_nullable} ) || ( scalar(@{$tbl->{keys}->{$a}->{cols}}) <=> scalar(@{$tbl->{keys}->{$b}->{cols}}) ) } grep { $tbl->{keys}->{$_}->{type} eq 'BTREE' } sort keys %{$tbl->{keys}}; PTDEBUG && _d('Indexes sorted best-first:', join(', ', @indexes)); return @indexes; } # Finds the 'best' index; if the user specifies one, dies if it's not in the # table. sub find_best_index { my ( $self, $tbl, $index ) = @_; my $best; if ( $index ) { ($best) = grep { uc $_ eq uc $index } keys %{$tbl->{keys}}; } if ( !$best ) { if ( $index ) { # The user specified an index, so we can't choose our own. die "Index '$index' does not exist in table"; } else { # Try to pick the best index. # TODO: eliminate indexes that have column prefixes. ($best) = $self->sort_indexes($tbl); } } PTDEBUG && _d('Best index found is', $best); return $best; } # Takes a dbh, database, table, quoter, and WHERE clause, and reports the # indexes MySQL thinks are best for EXPLAIN SELECT * FROM that table. If no # WHERE, just returns an empty list. If no possible_keys, returns empty list, # even if 'key' is not null. Only adds 'key' to the list if it's included in # possible_keys. sub find_possible_keys { my ( $self, $dbh, $database, $table, $quoter, $where ) = @_; return () unless $where; my $sql = 'EXPLAIN SELECT * FROM ' . $quoter->quote($database, $table) . ' WHERE ' . $where; PTDEBUG && _d($sql); my $expl = $dbh->selectrow_hashref($sql); # Normalize columns to lowercase $expl = { map { lc($_) => $expl->{$_} } keys %$expl }; if ( $expl->{possible_keys} ) { PTDEBUG && _d('possible_keys =', $expl->{possible_keys}); my @candidates = split(',', $expl->{possible_keys}); my %possible = map { $_ => 1 } @candidates; if ( $expl->{key} ) { PTDEBUG && _d('MySQL chose', $expl->{key}); unshift @candidates, grep { $possible{$_} } split(',', $expl->{key}); PTDEBUG && _d('Before deduping:', join(', ', @candidates)); my %seen; @candidates = grep { !$seen{$_}++ } @candidates; } PTDEBUG && _d('Final list:', join(', ', @candidates)); return @candidates; } else { PTDEBUG && _d('No keys in possible_keys'); return (); } } # Required args: # * dbh dbh: active dbh # * db scalar: database name to check # * tbl scalar: table name to check # Returns: bool # Can die: no # check_table() checks the given table for the existence and returns # true if the table is found, else it returns false. # Any error causes a false return value (e.g. if the table is crashed). sub check_table { my ( $self, %args ) = @_; my @required_args = qw(dbh db tbl); foreach my $arg ( @required_args ) { die "I need a $arg argument" unless $args{$arg}; } my ($dbh, $db, $tbl) = @args{@required_args}; my $q = $self->{Quoter} || 'Quoter'; $self->{check_table_error} = undef; # https://dev.mysql.com/doc/refman/8.0/en/identifier-case-sensitivity.html # MySQL may use use case-insensitive table lookup, this is controller by # @@lower_case_table_names. 0 means case sensitive search, 1 or 2 means # case insensitive lookup. my $lctn_sql = 'SELECT @@lower_case_table_names'; PTDEBUG && _d($lctn_sql); my $lower_case_table_names; eval { ($lower_case_table_names) = $dbh->selectrow_array($lctn_sql); }; if ( $EVAL_ERROR ) { PTDEBUG && _d($EVAL_ERROR); $self->{check_table_error} = $EVAL_ERROR; return 0; } my $db_tbl = $q->quote($db, $tbl); PTDEBUG && _d('Checking', $db_tbl); my $sql = "SHOW TABLES FROM " . $q->quote($db) . ' LIKE ' . $q->literal_like($tbl); PTDEBUG && _d($sql); my $row; eval { $row = $dbh->selectrow_arrayref($sql); }; if ( my $e = $EVAL_ERROR ) { PTDEBUG && _d($e); $self->{check_table_error} = $e; return 0; } if ( !$row->[0] || ( $lower_case_table_names == 0 && $row->[0] ne $tbl ) || ( $lower_case_table_names > 0 && lc $row->[0] ne lc $tbl ) ) { PTDEBUG && _d('Table does not exist'); return 0; } PTDEBUG && _d('Table', $db, $tbl, 'exists'); return 1; # No more privs check: # https://bugs.launchpad.net/percona-toolkit/+bug/1036747 } sub get_engine { my ( $self, $ddl, $opts ) = @_; my ( $engine ) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/; PTDEBUG && _d('Storage engine:', $engine); return $engine || undef; } # $ddl is a SHOW CREATE TABLE returned from get_create_table(). # The general format of a key is # [FOREIGN|UNIQUE|PRIMARY|FULLTEXT|SPATIAL] KEY `name` [USING BTREE|HASH] (`cols`). # Returns a hashref of keys and their properties and the clustered key (if # the engine is InnoDB): # { # key => { # type => BTREE, FULLTEXT or SPATIAL # name => column name, like: "foo_key" # colnames => original col def string, like: "(`a`,`b`)" # cols => arrayref containing the col names, like: [qw(a b)] # col_prefixes => arrayref containing any col prefixes (parallels cols) # is_unique => 1 if the col is UNIQUE or PRIMARY # is_nullable => true (> 0) if one or more col can be NULL # is_col => hashref with key for each col=>1 # ddl => original key def string # }, # }, # 'PRIMARY', # clustered key # # Foreign keys are ignored; use get_fks() instead. sub get_keys { my ( $self, $ddl, $opts, $is_nullable ) = @_; my $engine = $self->get_engine($ddl); my $keys = {}; my $clustered_key = undef; KEY: foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY [\s\S]*?\),?.*)$/gm ) { # If you want foreign keys, use get_fks() below. next KEY if $key =~ m/FOREIGN/; my $key_ddl = $key; PTDEBUG && _d('Parsed key:', $key_ddl); # Make allowances for HASH bugs in SHOW CREATE TABLE. A non-MEMORY table # will report its index as USING HASH even when this is not supported. # The true type should be BTREE. See # http://bugs.mysql.com/bug.php?id=22632 # If ANSI quoting is in effect, we may not know the engine at all. if ( !$engine || $engine !~ m/MEMORY|HEAP/ ) { $key =~ s/USING HASH/USING BTREE/; } # Determine index type my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \(([\s\S]+)\)/; my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/; $type = $type || $special || 'BTREE'; my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/; my $unique = $key =~ m/PRIMARY|UNIQUE/ ? 1 : 0; my @cols; my @col_prefixes; foreach my $col_def ( $cols =~ m/`[^`]+`(?:\(\d+\))?/g ) { # Parse columns of index including potential column prefixes # E.g.: `a`,`b`(20) my ($name, $prefix) = $col_def =~ m/`([^`]+)`(?:\((\d+)\))?/; push @cols, $name; push @col_prefixes, $prefix; } $name =~ s/`//g; PTDEBUG && _d( $name, 'key cols:', join(', ', map { "`$_`" } @cols)); $keys->{$name} = { name => $name, type => $type, colnames => $cols, cols => \@cols, col_prefixes => \@col_prefixes, is_unique => $unique, is_nullable => scalar(grep { $is_nullable->{$_} } @cols), is_col => { map { $_ => 1 } @cols }, ddl => $key_ddl, }; # Find clustered key (issue 295). if ( ($engine || '') =~ m/(InnoDB)|(TokuDB)|(RocksDB)/i && !$clustered_key ) { my $this_key = $keys->{$name}; if ( $this_key->{name} eq 'PRIMARY' ) { $clustered_key = 'PRIMARY'; } elsif ( $this_key->{is_unique} && !$this_key->{is_nullable} ) { $clustered_key = $this_key->{name}; } PTDEBUG && $clustered_key && _d('This key is the clustered key'); } } return $keys, $clustered_key; } # Like get_keys() above but only returns a hash of foreign keys. sub get_fks { my ( $self, $ddl, $opts ) = @_; my $q = $self->{Quoter}; my $fks = {}; foreach my $fk ( $ddl =~ m/CONSTRAINT .* FOREIGN KEY .* REFERENCES [^\)]*\)/mg ) { my ( $name ) = $fk =~ m/CONSTRAINT `(.*?)`/; my ( $cols ) = $fk =~ m/FOREIGN KEY \(([^\)]+)\)/; my ( $parent, $parent_cols ) = $fk =~ m/REFERENCES (\S+) \(([^\)]+)\)/; my ($db, $tbl) = $q->split_unquote($parent, $opts->{database}); my %parent_tbl = (tbl => $tbl); $parent_tbl{db} = $db if $db; if ( $parent !~ m/\./ && $opts->{database} ) { $parent = $q->quote($opts->{database}) . ".$parent"; } $fks->{$name} = { name => $name, colnames => $cols, cols => [ map { s/[ `]+//g; $_; } split(',', $cols) ], parent_tbl => \%parent_tbl, parent_tblname => $parent, parent_cols => [ map { s/[ `]+//g; $_; } split(',', $parent_cols) ], parent_colnames=> $parent_cols, ddl => $fk, }; } return $fks; } # Removes the AUTO_INCREMENT property from the end of SHOW CREATE TABLE. A # sample: # ) ENGINE=InnoDB AUTO_INCREMENT=201 DEFAULT CHARSET=utf8; sub remove_auto_increment { my ( $self, $ddl ) = @_; $ddl =~ s/(^\).*?) AUTO_INCREMENT=\d+\b/$1/m; return $ddl; } sub get_table_status { my ( $self, $dbh, $db, $like ) = @_; my $q = $self->{Quoter}; my $sql = "SHOW TABLE STATUS FROM " . $q->quote($db); my @params; if ( $like ) { $sql .= ' LIKE ?'; push @params, $like; } PTDEBUG && _d($sql, @params); my $sth = $dbh->prepare($sql); eval { $sth->execute(@params); }; if ($EVAL_ERROR) { PTDEBUG && _d($EVAL_ERROR); return; } my @tables = @{$sth->fetchall_arrayref({})}; @tables = map { my %tbl; # Make a copy with lowercased keys @tbl{ map { lc $_ } keys %$_ } = values %$_; $tbl{engine} ||= $tbl{type} || $tbl{comment}; delete $tbl{type}; \%tbl; } @tables; return @tables; } # Translates ANSI quoting around SHOW CREATE TABLE (specifically this query's # output, not an arbitrary query) into legacy backtick-quoting. # DOESNT WORK: my $ansi_quote_re = qr/"(?:(?!(?