mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-10-22 02:39:04 +00:00
Add lib/, t/lib/, and sandbox/. All modules are updated and passing on MySQL 5.1.
This commit is contained in:
501
lib/SchemaIterator.pm
Normal file
501
lib/SchemaIterator.pm
Normal file
@@ -0,0 +1,501 @@
|
||||
# This program is copyright 2009-2011 Percona Inc.
|
||||
# Feedback and improvements are welcome.
|
||||
#
|
||||
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
||||
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify it under
|
||||
# the terms of the GNU General Public License as published by the Free Software
|
||||
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
||||
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
||||
# licenses.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along with
|
||||
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
||||
# Place, Suite 330, Boston, MA 02111-1307 USA.
|
||||
# ###########################################################################
|
||||
# SchemaIterator package $Revision: 7547 $
|
||||
# ###########################################################################
|
||||
|
||||
# SchemaIterator
|
||||
# SchemaIterator iterates schema objects.
|
||||
{
|
||||
package SchemaIterator;
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use constant MKDEBUG => $ENV{MKDEBUG} || 0;
|
||||
|
||||
use Data::Dumper;
|
||||
$Data::Dumper::Indent = 1;
|
||||
$Data::Dumper::Sortkeys = 1;
|
||||
$Data::Dumper::Quotekeys = 0;
|
||||
|
||||
my $open_comment = qr{/\*!\d{5} };
|
||||
my $tbl_name = qr{
|
||||
CREATE\s+
|
||||
(?:TEMPORARY\s+)?
|
||||
TABLE\s+
|
||||
(?:IF NOT EXISTS\s+)?
|
||||
([^\(]+)
|
||||
}x;
|
||||
|
||||
|
||||
# Sub: new
|
||||
# Create a new SchemaIterator object with either a dbh or a file_itr.
|
||||
#
|
||||
# Parameters:
|
||||
# %args - Arguments
|
||||
#
|
||||
# Required Arguments:
|
||||
# dbh - dbh to iterate. Mutually exclusive with file_itr.
|
||||
# file_itr - <FileIterator::get_file_itr()> iterator for dump file.
|
||||
# Mutually exclusive with dbh.
|
||||
# OptionParser - <OptionParser> object. All filters are gotten from this
|
||||
# obj: --databases, --tables, etc.
|
||||
# Quoter - <Quoter> object.
|
||||
#
|
||||
# Optional Arguments:
|
||||
# Schema - <Schema> object to initialize while iterating.
|
||||
# MySQLDump - <MySQLDump> object to get CREATE TABLE when iterating dbh.
|
||||
# TableParser - <TableParser> object to parse CREATE TABLE for tbl_struct.
|
||||
# keep_ddl - Keep CREATE TABLE (default false)
|
||||
#
|
||||
# Returns:
|
||||
# SchemaIterator object
|
||||
sub new {
|
||||
my ( $class, %args ) = @_;
|
||||
my @required_args = qw(OptionParser Quoter);
|
||||
foreach my $arg ( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
|
||||
# Either a dbh or a file_itr is required, but not both.
|
||||
my ($file_itr, $dbh) = @args{qw(file_itr dbh)};
|
||||
die "I need either a dbh or file_itr argument"
|
||||
if (!$dbh && !$file_itr) || ($dbh && $file_itr);
|
||||
|
||||
my $self = {
|
||||
%args,
|
||||
filters => _make_filters(%args),
|
||||
};
|
||||
|
||||
return bless $self, $class;
|
||||
}
|
||||
|
||||
# Sub: _make_filters
|
||||
# Create schema object filters from <OptionParser> options. The OptionParser
|
||||
# object passed to <new()> is checked for filter options like --database,
|
||||
# --tables, --ignore-tables, etc. For all such options, a hash is built
|
||||
# keyed off the same option name. So $filter{tables} represents --tables,
|
||||
# etc. Regex filters are pre-compiled. It is very important to avoid
|
||||
# auto-vivifying certain key-values; see below. The filter hash is used
|
||||
# in sub like <database_is_allowed()>.
|
||||
#
|
||||
# This sub is called from <new()>. That's the only place and time it
|
||||
# needs to be called because options shouldn't change between runs.
|
||||
#
|
||||
# Parameters:
|
||||
# %args - Arguments
|
||||
#
|
||||
# Required Arguments:
|
||||
# OptionParser - <OptionParser> object. All filters are gotten from this
|
||||
# obj: --databases, --tables, etc.
|
||||
# Quoter - <Quoter> object.
|
||||
#
|
||||
# Returns:
|
||||
# Hashref of filters keyed on corresponding option names.
|
||||
sub _make_filters {
|
||||
my ( %args ) = @_;
|
||||
my @required_args = qw(OptionParser Quoter);
|
||||
foreach my $arg ( @required_args ) {
|
||||
die "I need a $arg argument" unless $args{$arg};
|
||||
}
|
||||
my ($o, $q) = @args{@required_args};
|
||||
|
||||
my %filters;
|
||||
|
||||
# Do not auto-vivify things like $filters{database} else a check like
|
||||
# if ( !$filters{databases}->{foo} ) will be TRUE when it should be FALSE
|
||||
# if no --databases where given. When in doubt: SchemaIterator.t and
|
||||
# check test coverage. These filters must be accurate or else we may
|
||||
# access something the user doesn't want us to.
|
||||
|
||||
my @simple_filters = qw(
|
||||
databases tables engines
|
||||
ignore-databases ignore-tables ignore-engines);
|
||||
FILTER:
|
||||
foreach my $filter ( @simple_filters ) {
|
||||
if ( $o->has($filter) ) {
|
||||
my $objs = $o->get($filter);
|
||||
next FILTER unless $objs && scalar keys %$objs;
|
||||
my $is_table = $filter =~ m/table/ ? 1 : 0;
|
||||
foreach my $obj ( keys %$objs ) {
|
||||
die "Undefined value for --$filter" unless $obj;
|
||||
$obj = lc $obj;
|
||||
if ( $is_table ) {
|
||||
my ($db, $tbl) = $q->split_unquote($obj);
|
||||
# Database-qualified tables require special handling.
|
||||
# See table_is_allowed().
|
||||
$db ||= '*';
|
||||
MKDEBUG && _d('Filter', $filter, 'value:', $db, $tbl);
|
||||
$filters{$filter}->{$tbl} = $db;
|
||||
}
|
||||
else { # database
|
||||
MKDEBUG && _d('Filter', $filter, 'value:', $obj);
|
||||
$filters{$filter}->{$obj} = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
my @regex_filters = qw(
|
||||
databases-regex tables-regex
|
||||
ignore-databases-regex ignore-tables-regex);
|
||||
REGEX_FILTER:
|
||||
foreach my $filter ( @regex_filters ) {
|
||||
if ( $o->has($filter) ) {
|
||||
my $pat = $o->get($filter);
|
||||
next REGEX_FILTER unless $pat;
|
||||
$filters{$filter} = qr/$pat/;
|
||||
MKDEBUG && _d('Filter', $filter, 'value:', $filters{$filter});
|
||||
}
|
||||
}
|
||||
|
||||
MKDEBUG && _d('Schema object filters:', Dumper(\%filters));
|
||||
return \%filters;
|
||||
}
|
||||
|
||||
# Sub: next_schema_object
|
||||
# Return the next schema object or undef when no more schema objects.
|
||||
# Only filtered schema objects are returned. If iterating dump files
|
||||
# (i.e. the obj was created with a file_itr arg), then the returned
|
||||
# schema object will always have a ddl (see below). But if iterating
|
||||
# a dbh, then you must create the obj with a MySQLDump obj to get a ddl.
|
||||
# If this object was created with a TableParser, then the ddl, if present,
|
||||
# is parsed, too.
|
||||
#
|
||||
# Returns:
|
||||
# Hashref of schema object with at least a db and tbl keys, like
|
||||
# (start code)
|
||||
# {
|
||||
# db => 'test',
|
||||
# tbl => 'a',
|
||||
# ddl => 'CREATE TABLE `a` ( ...', # if keep_ddl
|
||||
# tbl_struct => <TableParser::parse()> hashref of parsed ddl,
|
||||
# }
|
||||
# (end code)
|
||||
# The ddl is suitable for <TableParser::parse()>.
|
||||
sub next_schema_object {
|
||||
my ( $self ) = @_;
|
||||
|
||||
my $schema_obj;
|
||||
if ( $self->{file_itr} ) {
|
||||
$schema_obj= $self->_iterate_files();
|
||||
}
|
||||
else { # dbh
|
||||
$schema_obj= $self->_iterate_dbh();
|
||||
}
|
||||
|
||||
if ( $schema_obj ) {
|
||||
if ( $schema_obj->{ddl} && $self->{TableParser} ) {
|
||||
$schema_obj->{tbl_struct}
|
||||
= $self->{TableParser}->parse($schema_obj->{ddl});
|
||||
}
|
||||
|
||||
delete $schema_obj->{ddl} unless $self->{keep_ddl};
|
||||
|
||||
if ( my $schema = $self->{Schema} ) {
|
||||
$schema->add_schema_object($schema_obj);
|
||||
}
|
||||
}
|
||||
|
||||
MKDEBUG && _d('Next schema object:', $schema_obj->{db}, $schema_obj->{tbl});
|
||||
return $schema_obj;
|
||||
}
|
||||
|
||||
sub _iterate_files {
|
||||
my ( $self ) = @_;
|
||||
|
||||
if ( !$self->{fh} ) {
|
||||
my ($fh, $file) = $self->{file_itr}->();
|
||||
if ( !$fh ) {
|
||||
MKDEBUG && _d('No more files to iterate');
|
||||
return;
|
||||
}
|
||||
$self->{fh} = $fh;
|
||||
$self->{file} = $file;
|
||||
}
|
||||
my $fh = $self->{fh};
|
||||
MKDEBUG && _d('Getting next schema object from', $self->{file});
|
||||
|
||||
local $INPUT_RECORD_SEPARATOR = '';
|
||||
CHUNK:
|
||||
while (defined(my $chunk = <$fh>)) {
|
||||
if ($chunk =~ m/Database: (\S+)/) {
|
||||
# If the file is a dump of one db, then the only indication of that
|
||||
# db is in a comment at the start of the file like,
|
||||
# -- Host: localhost Database: sakila
|
||||
# If the dump is of multiple dbs, then there are both these same
|
||||
# comments and USE statements. We look for the comment which is
|
||||
# unique to both single and multi-db dumps.
|
||||
my $db = $1; # XXX
|
||||
$db =~ s/^`//; # strip leading `
|
||||
$db =~ s/`$//; # and trailing `
|
||||
if ( $self->database_is_allowed($db) ) {
|
||||
$self->{db} = $db;
|
||||
}
|
||||
}
|
||||
elsif ($self->{db} && $chunk =~ m/CREATE TABLE/) {
|
||||
if ($chunk =~ m/DROP VIEW IF EXISTS/) {
|
||||
# Tables that are actually views have this DROP statment in the
|
||||
# chunk just before the CREATE TABLE. We don't want views.
|
||||
MKDEBUG && _d('Table is a VIEW, skipping');
|
||||
next CHUNK;
|
||||
}
|
||||
|
||||
# The open comment is usually present for a view table, which we
|
||||
# probably already detected and skipped above, but this is left her
|
||||
# just in case mysqldump wraps other CREATE TABLE statements in a
|
||||
# a version comment that I don't know about yet.
|
||||
my ($tbl) = $chunk =~ m/$tbl_name/;
|
||||
$tbl =~ s/^\s*`//;
|
||||
$tbl =~ s/`\s*$//;
|
||||
if ( $self->table_is_allowed($self->{db}, $tbl) ) {
|
||||
my ($ddl) = $chunk =~ m/^(?:$open_comment)?(CREATE TABLE.+?;)$/ms;
|
||||
if ( !$ddl ) {
|
||||
warn "Failed to parse CREATE TABLE from\n" . $chunk;
|
||||
next CHUNK;
|
||||
}
|
||||
$ddl =~ s/ \*\/;\Z/;/; # remove end of version comment
|
||||
|
||||
my ($engine) = $ddl =~ m/\).*?(?:ENGINE|TYPE)=(\w+)/;
|
||||
|
||||
if ( !$engine || $self->engine_is_allowed($engine) ) {
|
||||
return {
|
||||
db => $self->{db},
|
||||
tbl => $tbl,
|
||||
ddl => $ddl,
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
} # CHUNK
|
||||
|
||||
MKDEBUG && _d('No more schema objects in', $self->{file});
|
||||
close $self->{fh};
|
||||
$self->{fh} = undef;
|
||||
|
||||
# Recurse to get next file and begin iterating it. If there's no next
|
||||
# file, then the call will return undef and we'll return undef, too
|
||||
return $self->_iterate_files();
|
||||
}
|
||||
|
||||
sub _iterate_dbh {
|
||||
my ( $self ) = @_;
|
||||
my $q = $self->{Quoter};
|
||||
my $dbh = $self->{dbh};
|
||||
MKDEBUG && _d('Getting next schema object from dbh', $dbh);
|
||||
|
||||
if ( !defined $self->{dbs} ) {
|
||||
# This happens once, the first time we're called.
|
||||
my $sql = 'SHOW DATABASES';
|
||||
MKDEBUG && _d($sql);
|
||||
my @dbs = grep { $self->database_is_allowed($_) }
|
||||
@{$dbh->selectcol_arrayref($sql)};
|
||||
MKDEBUG && _d('Found', scalar @dbs, 'databases');
|
||||
$self->{dbs} = \@dbs;
|
||||
}
|
||||
|
||||
if ( !$self->{db} ) {
|
||||
$self->{db} = shift @{$self->{dbs}};
|
||||
MKDEBUG && _d('Next database:', $self->{db});
|
||||
return unless $self->{db};
|
||||
}
|
||||
|
||||
if ( !defined $self->{tbls} ) {
|
||||
my $sql = 'SHOW /*!50002 FULL*/ TABLES FROM ' . $q->quote($self->{db});
|
||||
MKDEBUG && _d($sql);
|
||||
my @tbls = map {
|
||||
$_->[0]; # (tbl, type)
|
||||
}
|
||||
grep {
|
||||
my ($tbl, $type) = @$_;
|
||||
$self->table_is_allowed($self->{db}, $tbl)
|
||||
&& (!$type || ($type ne 'VIEW'));
|
||||
}
|
||||
@{$dbh->selectall_arrayref($sql)};
|
||||
MKDEBUG && _d('Found', scalar @tbls, 'tables in database', $self->{db});
|
||||
$self->{tbls} = \@tbls;
|
||||
}
|
||||
|
||||
while ( my $tbl = shift @{$self->{tbls}} ) {
|
||||
my $engine;
|
||||
if ( $self->{filters}->{'engines'}
|
||||
|| $self->{filters}->{'ignore-engines'} ) {
|
||||
my $sql = "SHOW TABLE STATUS FROM " . $q->quote($self->{db})
|
||||
. " LIKE \'$tbl\'";
|
||||
MKDEBUG && _d($sql);
|
||||
$engine = $dbh->selectrow_hashref($sql)->{engine};
|
||||
MKDEBUG && _d($tbl, 'uses', $engine, 'engine');
|
||||
}
|
||||
|
||||
|
||||
if ( !$engine || $self->engine_is_allowed($engine) ) {
|
||||
my $ddl;
|
||||
if ( my $du = $self->{MySQLDump} ) {
|
||||
$ddl = $du->get_create_table($dbh, $q, $self->{db}, $tbl)->[1];
|
||||
}
|
||||
|
||||
return {
|
||||
db => $self->{db},
|
||||
tbl => $tbl,
|
||||
ddl => $ddl,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
MKDEBUG && _d('No more tables in database', $self->{db});
|
||||
$self->{db} = undef;
|
||||
$self->{tbls} = undef;
|
||||
|
||||
# Recurse to get the next database. If there's no next db, then the
|
||||
# call will return undef and we'll return undef, too.
|
||||
return $self->_iterate_dbh();
|
||||
}
|
||||
|
||||
sub database_is_allowed {
|
||||
my ( $self, $db ) = @_;
|
||||
die "I need a db argument" unless $db;
|
||||
|
||||
$db = lc $db;
|
||||
|
||||
my $filter = $self->{filters};
|
||||
|
||||
if ( $db =~ m/information_schema|performance_schema|lost\+found/ ) {
|
||||
MKDEBUG && _d('Database', $db, 'is a system database, ignoring');
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( $self->{filters}->{'ignore-databases'}->{$db} ) {
|
||||
MKDEBUG && _d('Database', $db, 'is in --ignore-databases list');
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( $filter->{'ignore-databases-regex'}
|
||||
&& $db =~ $filter->{'ignore-databases-regex'} ) {
|
||||
MKDEBUG && _d('Database', $db, 'matches --ignore-databases-regex');
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( $filter->{'databases'}
|
||||
&& !$filter->{'databases'}->{$db} ) {
|
||||
MKDEBUG && _d('Database', $db, 'is not in --databases list, ignoring');
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( $filter->{'databases-regex'}
|
||||
&& $db !~ $filter->{'databases-regex'} ) {
|
||||
MKDEBUG && _d('Database', $db, 'does not match --databases-regex, ignoring');
|
||||
return 0;
|
||||
}
|
||||
|
||||
# MKDEBUG && _d('Database', $db, 'is allowed');
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub table_is_allowed {
|
||||
my ( $self, $db, $tbl ) = @_;
|
||||
die "I need a db argument" unless $db;
|
||||
die "I need a tbl argument" unless $tbl;
|
||||
|
||||
$db = lc $db;
|
||||
$tbl = lc $tbl;
|
||||
|
||||
my $filter = $self->{filters};
|
||||
|
||||
if ( $filter->{'ignore-tables'}->{$tbl}
|
||||
&& ($filter->{'ignore-tables'}->{$tbl} eq '*'
|
||||
|| $filter->{'ignore-tables'}->{$tbl} eq $db) ) {
|
||||
MKDEBUG && _d('Table', $tbl, 'is in --ignore-tables list');
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( $filter->{'ignore-tables-regex'}
|
||||
&& $tbl =~ $filter->{'ignore-tables-regex'} ) {
|
||||
MKDEBUG && _d('Table', $tbl, 'matches --ignore-tables-regex');
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( $filter->{'tables'}
|
||||
&& !$filter->{'tables'}->{$tbl} ) {
|
||||
MKDEBUG && _d('Table', $tbl, 'is not in --tables list, ignoring');
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( $filter->{'tables-regex'}
|
||||
&& $tbl !~ $filter->{'tables-regex'} ) {
|
||||
MKDEBUG && _d('Table', $tbl, 'does not match --tables-regex, ignoring');
|
||||
return 0;
|
||||
}
|
||||
|
||||
# This handles a special case like "-d d2 -t d1.t1" where the user probably
|
||||
# wants "all tables from database d1 plus table t1 from database d1." In
|
||||
# _make_filters() we cannot add d1 to the allowed databases filter because
|
||||
# then we'll get d1 tables when the user only wants d2 tables. So when
|
||||
# a table passes allow filters, reaching this point, meaning it is allowed,
|
||||
# we make this final to check to see if it's allowed in any database (*)
|
||||
# or allowed in the specific database that the user qualifed the table with.
|
||||
# The first two checks are to prevent auto-vivifying the filters which will
|
||||
# cause bad results (see a similar comment in _make_filters()).
|
||||
if ( $filter->{'tables'}
|
||||
&& $filter->{'tables'}->{$tbl}
|
||||
&& $filter->{'tables'}->{$tbl} ne '*'
|
||||
&& $filter->{'tables'}->{$tbl} ne $db ) {
|
||||
MKDEBUG && _d('Table', $tbl, 'is only allowed in database',
|
||||
$filter->{'tables'}->{$tbl});
|
||||
return 0;
|
||||
}
|
||||
|
||||
# MKDEBUG && _d('Table', $tbl, 'is allowed');
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub engine_is_allowed {
|
||||
my ( $self, $engine ) = @_;
|
||||
die "I need an engine argument" unless $engine;
|
||||
|
||||
$engine = lc $engine;
|
||||
|
||||
my $filter = $self->{filters};
|
||||
|
||||
if ( $filter->{'ignore-engines'}->{$engine} ) {
|
||||
MKDEBUG && _d('Engine', $engine, 'is in --ignore-databases list');
|
||||
return 0;
|
||||
}
|
||||
|
||||
if ( $filter->{'engines'}
|
||||
&& !$filter->{'engines'}->{$engine} ) {
|
||||
MKDEBUG && _d('Engine', $engine, 'is not in --engines list, ignoring');
|
||||
return 0;
|
||||
}
|
||||
|
||||
# MKDEBUG && _d('Engine', $engine, 'is allowed');
|
||||
return 1;
|
||||
}
|
||||
|
||||
sub _d {
|
||||
my ($package, undef, $line) = caller 0;
|
||||
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
||||
map { defined $_ ? $_ : 'undef' }
|
||||
@_;
|
||||
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
||||
}
|
||||
|
||||
1;
|
||||
}
|
||||
# ###########################################################################
|
||||
# End SchemaIterator package
|
||||
# ###########################################################################
|
||||
Reference in New Issue
Block a user