Support --where in NibbleIterator. If --where, prefer index that MySQL wants to use. Keep SHOW TABLE STATUS from SchemaIterator and re-use it in NibbleIterator (avoids double execution).

This commit is contained in:
Daniel Nichter
2011-10-13 08:49:32 -06:00
parent 083fb1b8ca
commit 28fa7d62c2
3 changed files with 126 additions and 55 deletions

View File

@@ -46,6 +46,7 @@ $Data::Dumper::Quotekeys = 0;
# Optional Arguments: # Optional Arguments:
# chunk_index - Index to use for nibbling # chunk_index - Index to use for nibbling
# one_nibble - Allow one-chunk tables (default yes) # one_nibble - Allow one-chunk tables (default yes)
# where - WHERE clause
# #
# Returns: # Returns:
# NibbleIterator object # NibbleIterator object
@@ -57,16 +58,19 @@ sub new {
} }
my ($cxn, $tbl, $chunk_size, $o, $q) = @args{@required_args}; my ($cxn, $tbl, $chunk_size, $o, $q) = @args{@required_args};
my ($row_est, $mysql_index) = _get_row_estimate(%args);
my $one_nibble = !defined $args{one_nibble} || $args{one_nibble} my $one_nibble = !defined $args{one_nibble} || $args{one_nibble}
? _can_nibble_once(dbh => $cxn->dbh(), %args) ? $row_est < $chunk_size * $o->get('chunk-size-limit')
: 0; : 0;
MKDEBUG && _d('One nibble:', $one_nibble ? 'yes' : 'no');
# Get an index to nibble by. We'll order rows by the index's columns. # Get an index to nibble by. We'll order rows by the index's columns.
my $index = _find_best_index(dbh => $cxn->dbh(), %args); my $index = _find_best_index(%args, mysql_index => $mysql_index);
if ( !$index && !$one_nibble ) { if ( !$index && !$one_nibble ) {
die "There is no good index and the table is oversized."; die "There is no good index and the table is oversized.";
} }
my $where = $o->get('where');
my $self; my $self;
if ( $one_nibble ) { if ( $one_nibble ) {
my $tbl_struct = $tbl->{tbl_struct}; my $tbl_struct = $tbl->{tbl_struct};
@@ -81,7 +85,7 @@ sub new {
. ($args{select} ? $args{select} . ($args{select} ? $args{select}
: join(', ', map { $q->quote($_) } @cols)) : join(', ', map { $q->quote($_) } @cols))
. " FROM " . $q->quote(@{$tbl}{qw(db tbl)}) . " FROM " . $q->quote(@{$tbl}{qw(db tbl)})
. ($args{where} ? " AND ($args{where})" : '') . ($where ? " AND ($where)" : '')
. " /*checksum table*/"; . " /*checksum table*/";
MKDEBUG && _d('One nibble statement:', $nibble_sql); MKDEBUG && _d('One nibble statement:', $nibble_sql);
@@ -90,7 +94,7 @@ sub new {
. ($args{select} ? $args{select} . ($args{select} ? $args{select}
: join(', ', map { $q->quote($_) } @cols)) : join(', ', map { $q->quote($_) } @cols))
. " FROM " . $q->quote(@{$tbl}{qw(db tbl)}) . " FROM " . $q->quote(@{$tbl}{qw(db tbl)})
. ($args{where} ? " AND ($args{where})" : '') . ($where ? " AND ($where)" : '')
. " /*explain checksum table*/"; . " /*explain checksum table*/";
MKDEBUG && _d('Explain one nibble statement:', $explain_nibble_sql); MKDEBUG && _d('Explain one nibble statement:', $explain_nibble_sql);
@@ -125,7 +129,7 @@ sub new {
= "SELECT /*!40001 SQL_NO_CACHE */ " = "SELECT /*!40001 SQL_NO_CACHE */ "
. join(', ', map { $q->quote($_) } @{$asc->{scols}}) . join(', ', map { $q->quote($_) } @{$asc->{scols}})
. " FROM $from" . " FROM $from"
. ($args{where} ? " WHERE $args{where}" : '') . ($where ? " WHERE $where" : '')
. " ORDER BY $order_by" . " ORDER BY $order_by"
. " LIMIT 1" . " LIMIT 1"
. " /*first lower boundary*/"; . " /*first lower boundary*/";
@@ -135,7 +139,7 @@ sub new {
= "SELECT /*!40001 SQL_NO_CACHE */ " = "SELECT /*!40001 SQL_NO_CACHE */ "
. join(', ', map { $q->quote($_) } @{$asc->{scols}}) . join(', ', map { $q->quote($_) } @{$asc->{scols}})
. " FROM $from" . " FROM $from"
. ($args{where} ? " WHERE $args{where}" : '') . ($where ? " WHERE $where" : '')
. " ORDER BY " . " ORDER BY "
. join(' DESC, ', map {$q->quote($_)} @{$index_cols}) . ' DESC' . join(' DESC, ', map {$q->quote($_)} @{$index_cols}) . ' DESC'
. " LIMIT 1" . " LIMIT 1"
@@ -155,7 +159,7 @@ sub new {
. join(', ', map { $q->quote($_) } @{$asc->{scols}}) . join(', ', map { $q->quote($_) } @{$asc->{scols}})
. " FROM $from" . " FROM $from"
. " WHERE " . $asc->{boundaries}->{'>='} . " WHERE " . $asc->{boundaries}->{'>='}
. ($args{where} ? " AND ($args{where})" : '') . ($where ? " AND ($where)" : '')
. " ORDER BY $order_by" . " ORDER BY $order_by"
. " LIMIT ?, 2" . " LIMIT ?, 2"
. " /*next chunk boundary*/"; . " /*next chunk boundary*/";
@@ -170,7 +174,7 @@ sub new {
. " FROM $from" . " FROM $from"
. " WHERE " . $asc->{boundaries}->{'>='} # lower boundary . " WHERE " . $asc->{boundaries}->{'>='} # lower boundary
. " AND " . $asc->{boundaries}->{'<='} # upper boundary . " AND " . $asc->{boundaries}->{'<='} # upper boundary
. ($args{where} ? " AND ($args{where})" : '') . ($where ? " AND ($where)" : '')
. " ORDER BY $order_by" . " ORDER BY $order_by"
. " /*checksum chunk*/"; . " /*checksum chunk*/";
MKDEBUG && _d('Nibble statement:', $nibble_sql); MKDEBUG && _d('Nibble statement:', $nibble_sql);
@@ -182,7 +186,7 @@ sub new {
. " FROM $from" . " FROM $from"
. " WHERE " . $asc->{boundaries}->{'>='} # lower boundary . " WHERE " . $asc->{boundaries}->{'>='} # lower boundary
. " AND " . $asc->{boundaries}->{'<='} # upper boundary . " AND " . $asc->{boundaries}->{'<='} # upper boundary
. ($args{where} ? " AND ($args{where})" : '') . ($where ? " AND ($where)" : '')
. " ORDER BY $order_by" . " ORDER BY $order_by"
. " /*explain checksum chunk*/"; . " /*explain checksum chunk*/";
MKDEBUG && _d('Explain nibble statement:', $explain_nibble_sql); MKDEBUG && _d('Explain nibble statement:', $explain_nibble_sql);
@@ -203,7 +207,7 @@ sub new {
sql => { sql => {
columns => $asc->{scols}, columns => $asc->{scols},
from => $from, from => $from,
where => $args{where}, where => $where,
boundaries => $asc->{boundaries}, boundaries => $asc->{boundaries},
order_by => $order_by, order_by => $order_by,
}, },
@@ -371,24 +375,36 @@ sub more_boundaries {
sub _find_best_index { sub _find_best_index {
my (%args) = @_; my (%args) = @_;
my @required_args = qw(tbl TableParser dbh Quoter); my @required_args = qw(Cxn tbl TableParser);
my ($tbl, $tp) = @args{@required_args}; my ($cxn, $tbl, $tp) = @args{@required_args};
my $tbl_struct = $tbl->{tbl_struct}; my $tbl_struct = $tbl->{tbl_struct};
my $indexes = $tbl_struct->{keys}; my $indexes = $tbl_struct->{keys};
my $want_index = $args{chunk_index};
MKDEBUG && _d('Wanted index:', $want_index);
if ( $want_index && !exists $indexes->{$want_index} ) {
MKDEBUG && _d('Wanted index does not exist; will auto-select best index');
$want_index = undef;
}
elsif ( $args{mysql_index} ) {
MKDEBUG && _d('MySQL wants to use index', $args{mysql_index});
$want_index = $args{mysql_index};
}
my $best_index; my $best_index;
my @possible_indexes; my @possible_indexes;
if ( my $want_index = $args{chunk_index} ) { if ( $want_index ) {
MKDEBUG && _d('Want to use nibble index', $want_index); if ( $indexes->{$want_index}->{is_unique} ) {
if ( $want_index eq 'PRIMARY' || $indexes->{$want_index}->{is_unique} ) { MKDEBUG && _d('Will use wanted index');
$best_index = $want_index; $best_index = $want_index;
} }
else { else {
MKDEBUG && _d('Wanted index is a possible index');
push @possible_indexes, $want_index; push @possible_indexes, $want_index;
} }
} }
else { else {
MKDEBUG && _d('Auto-selecting best index');
foreach my $index ( $tp->sort_indexes($tbl_struct) ) { foreach my $index ( $tp->sort_indexes($tbl_struct) ) {
if ( $index eq 'PRIMARY' || $indexes->{$index}->{is_unique} ) { if ( $index eq 'PRIMARY' || $indexes->{$index}->{is_unique} ) {
$best_index = $index; $best_index = $index;
@@ -430,14 +446,14 @@ sub _find_best_index {
sub _get_index_cardinality { sub _get_index_cardinality {
my (%args) = @_; my (%args) = @_;
my @required_args = qw(dbh tbl index Quoter); my @required_args = qw(Cxn tbl index Quoter);
my ($dbh, $tbl, $index, $q) = @args{@required_args}; my ($cxn, $tbl, $index, $q) = @args{@required_args};
my $sql = "SHOW INDEXES FROM " . $q->quote(@{$tbl}{qw(db tbl)}) my $sql = "SHOW INDEXES FROM " . $q->quote(@{$tbl}{qw(db tbl)})
. " WHERE Key_name = '$index'"; . " WHERE Key_name = '$index'";
MKDEBUG && _d($sql); MKDEBUG && _d($sql);
my $cardinality = 1; my $cardinality = 1;
my $rows = $dbh->selectall_hashref($sql, 'key_name'); my $rows = $cxn->dbh()->selectall_hashref($sql, 'key_name');
foreach my $row ( values %$rows ) { foreach my $row ( values %$rows ) {
$cardinality *= $row->{cardinality} if $row->{cardinality}; $cardinality *= $row->{cardinality} if $row->{cardinality};
} }
@@ -445,17 +461,24 @@ sub _get_index_cardinality {
return $cardinality; return $cardinality;
} }
sub _can_nibble_once { sub _get_row_estimate {
my (%args) = @_; my (%args) = @_;
my @required_args = qw(dbh tbl chunk_size OptionParser TableParser); my @required_args = qw(Cxn tbl OptionParser TableParser Quoter);
my ($dbh, $tbl, $chunk_size, $o, $tp) = @args{@required_args}; my ($cxn, $tbl, $o, $tp, $q) = @args{@required_args};
my ($table_status) = $tp->get_table_status($dbh, $tbl->{db}, $tbl->{tbl});
MKDEBUG && _d('TABLE STATUS', Dumper($table_status)); if ( my $where = $o->get('where') ) {
my $n_rows = $table_status->{rows} || 0; MKDEBUG && _d('WHERE clause, using explain plan for row estimate');
my $limit = $o->get('chunk-size-limit'); my $table = $q->quote(@{$tbl}{qw(db tbl)});
my $one_nibble = $n_rows < $chunk_size * $limit ? 1 : 0; my $sql = "EXPLAIN SELECT COUNT(*) FROM $table WHERE $where";
MKDEBUG && _d('One nibble:', $one_nibble ? 'yes' : 'no'); MKDEBUG && _d($sql);
return $one_nibble; my $expl = $cxn->dbh()->selectrow_hashref($sql);
MKDEBUG && _d(Dumper($expl));
return ($expl->{rows} || 0), $expl->{key};
}
else {
MKDEBUG && _d('No WHERE clause, using table status for row estimate');
return $tbl->{tbl_status}->{rows} || 0;
}
} }
sub _prepare_sths { sub _prepare_sths {

View File

@@ -57,10 +57,12 @@ my $tbl_name = qr{
# Quoter - <Quoter> object. # Quoter - <Quoter> object.
# #
# Optional Arguments: # Optional Arguments:
# Schema - <Schema> object to initialize while iterating. # Schema - <Schema> object to initialize while iterating.
# TableParser - <TableParser> object to parse CREATE TABLE for tbl_struct. # TableParser - <TableParser> object get tbl_struct.
# keep_ddl - Keep CREATE TABLE (default false) # keep_ddl - Keep SHOW CREATE TABLE (default false).
# resume - Skip tables so first call to <next()> returns this "db.table". # keep_tbl_status - Keep SHOW TABLE STATUS (default false).
# resume - Skip tables so first call to <next()> returns
# this "db.table".
# #
# Returns: # Returns:
# SchemaIterator object # SchemaIterator object
@@ -214,6 +216,7 @@ sub next {
} }
delete $schema_obj->{ddl} unless $self->{keep_ddl}; delete $schema_obj->{ddl} unless $self->{keep_ddl};
delete $schema_obj->{tbl_status} unless $self->{keep_tbl_status};
if ( my $schema = $self->{Schema} ) { if ( my $schema = $self->{Schema} ) {
$schema->add_schema_object($schema_obj); $schema->add_schema_object($schema_obj);
@@ -345,27 +348,34 @@ sub _iterate_dbh {
while ( my $tbl = shift @{$self->{tbls}} ) { while ( my $tbl = shift @{$self->{tbls}} ) {
next unless $self->_resume_from_table($tbl); next unless $self->_resume_from_table($tbl);
my $engine;
# If there are engine filters, we have to get the table status.
# Else, get it if the user wants to keep it since they'll expect
# it to be available.
my $tbl_status;
if ( $self->{filters}->{'engines'} if ( $self->{filters}->{'engines'}
|| $self->{filters}->{'ignore-engines'} ) { || $self->{filters}->{'ignore-engines'}
|| $self->{keep_tbl_status} )
{
my $sql = "SHOW TABLE STATUS FROM " . $q->quote($self->{db}) my $sql = "SHOW TABLE STATUS FROM " . $q->quote($self->{db})
. " LIKE \'$tbl\'"; . " LIKE \'$tbl\'";
MKDEBUG && _d($sql); MKDEBUG && _d($sql);
$engine = $dbh->selectrow_hashref($sql)->{engine}; $tbl_status = $dbh->selectrow_hashref($sql);
MKDEBUG && _d($tbl, 'uses', $engine, 'engine'); MKDEBUG && _d(Dumper($tbl_status));
} }
if ( !$tbl_status
if ( !$engine || $self->engine_is_allowed($engine) ) { || $self->engine_is_allowed($tbl_status->{engine}) ) {
my $ddl; my $ddl;
if ( my $tp = $self->{TableParser} ) { if ( my $tp = $self->{TableParser} ) {
$ddl = $tp->get_create_table($dbh, $self->{db}, $tbl); $ddl = $tp->get_create_table($dbh, $self->{db}, $tbl);
} }
return { return {
db => $self->{db}, db => $self->{db},
tbl => $tbl, tbl => $tbl,
ddl => $ddl, ddl => $ddl,
tbl_status => $tbl_status,
}; };
} }
} }
@@ -415,7 +425,6 @@ sub database_is_allowed {
return 0; return 0;
} }
# MKDEBUG && _d('Database', $db, 'is allowed');
return 1; return 1;
} }
@@ -477,7 +486,6 @@ sub table_is_allowed {
return 0; return 0;
} }
# MKDEBUG && _d('Table', $tbl, 'is allowed');
return 1; return 1;
} }
@@ -500,7 +508,6 @@ sub engine_is_allowed {
return 0; return 0;
} }
# MKDEBUG && _d('Engine', $engine, 'is allowed');
return 1; return 1;
} }

View File

@@ -39,7 +39,7 @@ if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox master'; plan skip_all => 'Cannot connect to sandbox master';
} }
else { else {
plan tests => 26; plan tests => 28;
} }
my $q = new Quoter(); my $q = new Quoter();
@@ -76,20 +76,22 @@ sub make_nibble_iter {
my $schema = new Schema(); my $schema = new Schema();
my $si = new SchemaIterator( my $si = new SchemaIterator(
dbh => $dbh, dbh => $dbh,
keep_ddl => 1, keep_ddl => 1,
Schema => $schema, keep_tbl_status => 1,
Schema => $schema,
%common_modules, %common_modules,
); );
1 while $si->next(); 1 while $si->next();
my $ni = new NibbleIterator( my $ni = new NibbleIterator(
Cxn => $cxn, Cxn => $cxn,
tbl => $schema->get_table($args{db}, $args{tbl}), tbl => $schema->get_table($args{db}, $args{tbl}),
chunk_size => $o->get('chunk-size'), chunk_size => $o->get('chunk-size'),
callbacks => $args{callbacks}, chunk_index => $o->get('chunk-index'),
select => $args{select}, callbacks => $args{callbacks},
one_nibble => $args{one_nibble}, select => $args{select},
one_nibble => $args{one_nibble},
%common_modules, %common_modules,
); );
@@ -552,6 +554,45 @@ is_deeply(
"Nibble small table without indexes" "Nibble small table without indexes"
); );
# ############################################################################
# Auto-select best index if wanted index doesn't exit.
# ############################################################################
$ni = make_nibble_iter(
sql_file => "a-z.sql",
db => 'test',
tbl => 't',
one_nibble => 0,
argv => [qw(--databases test --chunk-index nonexistent)],
);
is(
$ni->nibble_index(),
'c',
"Auto-chooses index if wanted index does not exist"
);
# ############################################################################
# Add a WHERE clause and nibble just the selected range.
# ############################################################################
$ni = make_nibble_iter(
sql_file => "a-z.sql",
db => 'test',
tbl => 't',
one_nibble => 0,
argv => [qw(--databases test --where c>'m')],
);
@rows = ();
while (my $row = $ni->next()) {
push @rows, @$row;
}
is_deeply(
\@rows,
[ ('n'..'z') ],
"Nibbles only values in --where clause range"
);
# ############################################################################# # #############################################################################
# Done. # Done.
# ############################################################################# # #############################################################################