mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-09 18:30:16 +00:00
688 lines
24 KiB
Perl
688 lines
24 KiB
Perl
# This program is copyright 2010-2011 Percona Ireland Ltd.
|
|
# Feedback and improvements are welcome.
|
|
#
|
|
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
|
|
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
|
|
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify it under
|
|
# the terms of the GNU General Public License as published by the Free Software
|
|
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
|
|
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
|
|
# licenses.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along with
|
|
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
|
# Place, Suite 330, Boston, MA 02111-1307 USA.
|
|
# ###########################################################################
|
|
# QueryAdvisorRules package
|
|
# ###########################################################################
|
|
{
|
|
# Package: QueryAdvisorRules
|
|
# QueryAdvisorRules encapsulates rules for checking queries.
|
|
package QueryAdvisorRules;
|
|
use base 'AdvisorRules';
|
|
|
|
use strict;
|
|
use warnings FATAL => 'all';
|
|
use English qw(-no_match_vars);
|
|
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
|
|
|
|
sub new {
|
|
my ( $class, %args ) = @_;
|
|
my $self = $class->SUPER::new(%args);
|
|
@{$self->{rules}} = $self->get_rules();
|
|
PTDEBUG && _d(scalar @{$self->{rules}}, "rules");
|
|
return $self;
|
|
}
|
|
|
|
# Each rules is a hashref with two keys:
|
|
# * id Unique PREFIX.NUMBER for the rule. The prefix is three chars
|
|
# which hints to the nature of the rule. See example below.
|
|
# * code Coderef to check rule, returns undef if rule does not match,
|
|
# else returns the string pos near where the rule matches or 0
|
|
# to indicate it doesn't know the pos. The code is passed a\
|
|
# single arg: a hashref event.
|
|
sub get_rules {
|
|
return
|
|
{
|
|
id => 'ALI.001', # Implicit alias
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
my $struct = $event->{query_struct};
|
|
my $tbls = $struct->{from} || $struct->{into} || $struct->{tables};
|
|
return unless $tbls;
|
|
foreach my $tbl ( @$tbls ) {
|
|
return 0 if $tbl->{alias} && !$tbl->{explicit_alias};
|
|
}
|
|
my $cols = $struct->{columns};
|
|
return unless $cols;
|
|
foreach my $col ( @$cols ) {
|
|
return 0 if $col->{alias} && !$col->{explicit_alias};
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'ALI.002', # tbl.* alias
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
my $cols = $event->{query_struct}->{columns};
|
|
return unless $cols;
|
|
foreach my $col ( @$cols ) {
|
|
return 0 if $col->{tbl} && $col->{col} eq '*' && $col->{alias};
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'ALI.003', # tbl AS tbl
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
my $struct = $event->{query_struct};
|
|
my $tbls = $struct->{from} || $struct->{into} || $struct->{tables};
|
|
return unless $tbls;
|
|
foreach my $tbl ( @$tbls ) {
|
|
return 0 if $tbl->{alias} && $tbl->{alias} eq $tbl->{tbl};
|
|
}
|
|
my $cols = $struct->{columns};
|
|
return unless $cols;
|
|
foreach my $col ( @$cols ) {
|
|
return 0 if $col->{alias} && $col->{alias} eq $col->{col};
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'ARG.001', # col = '%foo'
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
my $where = $event->{query_struct}->{where};
|
|
return unless $where && @$where;
|
|
foreach my $arg ( @$where ) {
|
|
return 0
|
|
if ($arg->{operator} || '') eq 'like'
|
|
&& $arg->{right_arg} =~ m/[\'\"][\%\_]./;
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'ARG.002', # LIKE w/o wildcard
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
my $where = $event->{query_struct}->{where};
|
|
return unless $where && @$where;
|
|
foreach my $arg ( @$where ) {
|
|
return 0
|
|
if ($arg->{operator} || '') eq 'like'
|
|
&& $arg->{right_arg} !~ m/[%_]/;
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'CLA.001', # SELECT w/o WHERE
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
return unless ($event->{query_struct}->{type} || '') eq 'select';
|
|
return unless $event->{query_struct}->{from};
|
|
return 0 unless $event->{query_struct}->{where};
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'CLA.002', # ORDER BY RAND()
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
my $orderby = $event->{query_struct}->{order_by};
|
|
return unless $orderby;
|
|
foreach my $ident ( @$orderby ) {
|
|
# SQLParser will have uppercased the function name.
|
|
return 0 if $ident->{function} && $ident->{function} eq 'RAND';
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'CLA.003', # LIMIT w/ OFFSET
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
return unless $event->{query_struct}->{limit};
|
|
return unless defined $event->{query_struct}->{limit}->{offset};
|
|
return 0;
|
|
},
|
|
},
|
|
{
|
|
id => 'CLA.004', # GROUP BY <number>
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
my $groupby = $event->{query_struct}->{group_by};
|
|
return unless $groupby;
|
|
foreach my $ident ( @$groupby ) {
|
|
return 0 if exists $ident->{position};
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'CLA.005', # ORDER BY col where col=<constant>
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
my $orderby = $event->{query_struct}->{order_by};
|
|
return unless $orderby;
|
|
my $where = $event->{query_struct}->{where};
|
|
return unless $where;
|
|
my %orderby_col = map { lc $_->{column} => 1 }
|
|
grep { $_->{column} }
|
|
@$orderby;
|
|
foreach my $pred ( @$where ) {
|
|
my $val = $pred->{right_arg};
|
|
next unless $val;
|
|
return 0 if $val =~ m/^\d+$/ && $orderby_col{lc($pred->{left_arg} || '')};
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'CLA.006', # GROUP BY or ORDER BY different tables
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
my $groupby = $event->{query_struct}->{group_by};
|
|
my $orderby = $event->{query_struct}->{order_by};
|
|
return unless $groupby || $orderby;
|
|
|
|
my %groupby_tbls = map { $_->{table} => 1 }
|
|
grep { $_->{table} }
|
|
@$groupby;
|
|
return 0 if scalar keys %groupby_tbls > 1;
|
|
|
|
my %orderby_tbls = map { $_->{table} => 1 }
|
|
grep { $_->{table} }
|
|
@$orderby;
|
|
return 0 if scalar keys %orderby_tbls > 1;
|
|
|
|
# Remove ORDER BY tables from GROUP BY tables. Any tables
|
|
# remain in GROUP BY are unique to GROUP BY, i.e. not in
|
|
# ORDER BY, so we have a case like: group by tbl1.id order by tbl2.id
|
|
map { delete $groupby_tbls{$_} } keys %orderby_tbls;
|
|
return 0 if scalar keys %groupby_tbls;
|
|
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'CLA.007', # ORDER BY ASC/DESC mix can't use index
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
my $order_by = $event->{query_struct}->{order_by};
|
|
return unless $order_by;
|
|
my ($asc, $desc) = (0, 0);
|
|
foreach my $col ( @$order_by ) {
|
|
if ( ($col->{sort} || 'ASC') eq 'ASC' ) {
|
|
$asc++;
|
|
}
|
|
else {
|
|
$desc++;
|
|
}
|
|
return 0 if $asc && $desc;
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'COL.001', # SELECT *
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
return unless ($event->{query_struct}->{type} || '') eq 'select';
|
|
my $cols = $event->{query_struct}->{columns};
|
|
return unless $cols;
|
|
foreach my $col ( @$cols ) {
|
|
return 0 if $col->{col} eq '*' && !$col->{func};
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'COL.002', # INSERT w/o (cols) def
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
my $type = $event->{query_struct}->{type} || '';
|
|
return unless $type eq 'insert' || $type eq 'replace';
|
|
return 0 unless $event->{query_struct}->{columns};
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'LIT.001', # IP as string
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
if ( $event->{arg} =~ m/['"]\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/gc ) {
|
|
return (pos $event->{arg}) || 0;
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'LIT.002', # Date not quoted
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
# YYYY-MM-DD
|
|
if ( $event->{arg} =~ m/(?<!['"\w-])\d{4}-\d{1,2}-\d{1,2}\b/gc ) {
|
|
return (pos $event->{arg}) || 0;
|
|
}
|
|
# YY-MM-DD
|
|
if ( $event->{arg} =~ m/(?<!['"\w\d-])\d{2}-\d{1,2}-\d{1,2}\b/gc ) {
|
|
return (pos $event->{arg}) || 0;
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'KWR.001', # SQL_CALC_FOUND_ROWS
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
return 0 if $event->{query_struct}->{keywords}->{sql_calc_found_rows};
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'JOI.001', # comma and ansi joins
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
my $struct = $event->{query_struct};
|
|
my $tbls = $struct->{from} || $struct->{into} || $struct->{tables};
|
|
return unless $tbls;
|
|
my $comma_join = 0;
|
|
my $ansi_join = 0;
|
|
foreach my $tbl ( @$tbls ) {
|
|
if ( $tbl->{join} ) {
|
|
if ( $tbl->{join}->{ansi} ) {
|
|
$ansi_join = 1;
|
|
}
|
|
else {
|
|
$comma_join = 1;
|
|
}
|
|
}
|
|
return 0 if $comma_join && $ansi_join;
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'RES.001', # non-deterministic GROUP BY
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
return unless ($event->{query_struct}->{type} || '') eq 'select';
|
|
my $groupby = $event->{query_struct}->{group_by};
|
|
return unless $groupby;
|
|
# Only check GROUP BY column names, not numbers. GROUP BY number
|
|
# is handled in CLA.004.
|
|
my %groupby_col = map { $_->{column} => 1 }
|
|
grep { $_->{column} }
|
|
@$groupby;
|
|
return unless scalar %groupby_col;
|
|
# Skip non-columns -- NULL, digits, functions, variables
|
|
my $cols = [
|
|
grep { _looks_like_column($_->{col}) }
|
|
grep { ! exists $_->{func} }
|
|
@{$event->{query_struct}->{columns}}
|
|
];
|
|
# All SELECT cols must be in GROUP BY cols clause.
|
|
# E.g. select a, b, c from tbl group by a; -- non-deterministic
|
|
foreach my $col ( @$cols ) {
|
|
return 0 unless $groupby_col{ $col->{col} }
|
|
|| ($col->{alias} && $groupby_col{ $col->{alias} });
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'RES.002', # non-deterministic LIMIT w/o ORDER BY
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
return unless $event->{query_struct}->{limit};
|
|
# If query doesn't use tables then this check isn't applicable.
|
|
return unless $event->{query_struct}->{from}
|
|
|| $event->{query_struct}->{into}
|
|
|| $event->{query_struct}->{tables};
|
|
return 0 unless $event->{query_struct}->{order_by};
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'STA.001', # != instead of <>
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
return 0 if $event->{arg} =~ m/!=/;
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'SUB.001', # IN(<subquery>)
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
if ( $event->{arg} =~ m/\bIN\s*\(\s*SELECT\b/gi ) {
|
|
return pos $event->{arg};
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'JOI.002', # table joined more than once, but not self-join
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
my $struct = $event->{query_struct};
|
|
return unless $struct;
|
|
my $tbls = $struct->{from} || $struct->{into} || $struct->{tables};
|
|
return unless $tbls;
|
|
my %tbl_cnt;
|
|
my $n_tbls = scalar @$tbls;
|
|
|
|
# To detect this rule we look for tables joined more than once
|
|
# (if cnt > 1) and via both an ansi and comma join. This captures
|
|
# "t AS a JOIN t AS b a.foo=b.bar, t" but not the simple self-join
|
|
# "t AS a JOIN t AS b a.foo=b.bar" or cases where a table is joined
|
|
# to many other tables all via ansi joins or the implicit self-join
|
|
# (which we really can't detect) "t AS a, t AS b WHERE a.foo=b.bar".
|
|
# When a table shows up multiple times in ansi joins and then again
|
|
# in a comma join, the comma join is usually culprit of this rule.
|
|
for my $i ( 0..($n_tbls-1) ) {
|
|
my $tbl = $tbls->[$i];
|
|
my $tbl_name = lc $tbl->{tbl};
|
|
|
|
$tbl_cnt{$tbl_name}->{cnt}++;
|
|
$tbl_cnt{$tbl_name}->{ansi_join}++
|
|
if $tbl->{join} && $tbl->{join}->{ansi};
|
|
$tbl_cnt{$tbl_name}->{comma_join}++
|
|
if $tbl->{join} && !$tbl->{join}->{ansi};
|
|
|
|
if ( $tbl_cnt{$tbl_name}->{cnt} > 1 ) {
|
|
return 0
|
|
if $tbl_cnt{$tbl_name}->{ansi_join}
|
|
&& $tbl_cnt{$tbl_name}->{comma_join};
|
|
}
|
|
}
|
|
return;
|
|
},
|
|
},
|
|
{
|
|
id => 'JOI.003', # OUTER JOIN converted to INNER JOIN
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
my $struct = $event->{query_struct};
|
|
return unless $struct;
|
|
my $tbls = $struct->{from} || $struct->{into} || $struct->{tables};
|
|
return unless $tbls;
|
|
my $where = $struct->{where};
|
|
return unless $where;
|
|
|
|
# Good LEFT OUTER JOIN:
|
|
# select * from L left join R using(c) where L.a=5;
|
|
# Converts to INNER JOIN when:
|
|
# select * from L left join R using(c) where L.a=5 and R.b=10;
|
|
# To detect this condition we need to see if there's an OUTER
|
|
# join then see if there's a column from the outer table in the
|
|
# WHERE clause that is anything but "IS NULL". So in the example
|
|
# above, R.b=10 is this culprit.
|
|
# http://code.google.com/p/maatkit/issues/detail?id=950
|
|
my %outer_tbls = map { $_->{tbl} => 1 } get_outer_tables($tbls);
|
|
PTDEBUG && _d("Outer tables:", keys %outer_tbls);
|
|
return unless %outer_tbls;
|
|
|
|
foreach my $pred ( @$where ) {
|
|
next unless $pred->{left_arg}; # skip constants like 1 in "WHERE 1"
|
|
my ($tbl, $col) = split /\./, $pred->{left_arg};
|
|
if ( $tbl && $col && $outer_tbls{$tbl} ) {
|
|
# Only outer_tbl.col IS NULL is permissible.
|
|
if ($pred->{operator} ne 'is' || $pred->{right_arg} !~ m/null/i)
|
|
{
|
|
PTDEBUG && _d("Predicate prevents OUTER JOIN:",
|
|
map { $pred->{$_} } qw(left_arg operator right_arg));
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
},
|
|
{
|
|
id => 'JOI.004', # broken exclusion join
|
|
code => sub {
|
|
my ( %args ) = @_;
|
|
my $event = $args{event};
|
|
my $struct = $event->{query_struct};
|
|
return unless $struct;
|
|
my $tbls = $struct->{from} || $struct->{into} || $struct->{tables};
|
|
return unless $tbls;
|
|
my $where = $struct->{where};
|
|
return unless $where;
|
|
|
|
my %outer_tbls;
|
|
my %outer_tbl_join_cols;
|
|
my @unknown_join_cols;
|
|
foreach my $outer_tbl ( get_outer_tables($tbls) ) {
|
|
$outer_tbls{$outer_tbl->{tbl}} = 1;
|
|
|
|
# For "L LEFT JOIN R" R is the outer table and since it follows
|
|
# L its table struct will have the join struct with the join
|
|
# condition. But for "L RIGHT JOIN R" L is the outer table and
|
|
# will not have the join struct because it precedes R. This
|
|
# is due to how parse_from() works. So if the outer table doesn't
|
|
# have the join struct, we need to get it from the inner table.
|
|
my $join = $outer_tbl->{join};
|
|
if ( !$join ) {
|
|
my ($inner_tbl) = grep {
|
|
exists $_->{join}
|
|
&& $_->{join}->{to} eq $outer_tbl->{tbl}
|
|
} @$tbls;
|
|
$join = $inner_tbl->{join};
|
|
die "Cannot find join structure for $outer_tbl->{tbl}"
|
|
unless $join;
|
|
}
|
|
|
|
# Get the outer table columns used in the jon condition.
|
|
if ( $join->{condition} eq 'using' ) {
|
|
%outer_tbl_join_cols = map { $_ => 1 } @{$join->{columns}};
|
|
}
|
|
else {
|
|
my $where = $join->{where};
|
|
die "Join structure for ON condition has no where structure"
|
|
unless $where;
|
|
my @join_cols;
|
|
foreach my $pred ( @$where ) {
|
|
next unless $pred->{operator} eq '=';
|
|
# Assume all equality comparisons are like tbl1.col=tbl2.col.
|
|
# Thus join conditions like tbl1.col<tbl2.col aren't handled.
|
|
push @join_cols, $pred->{left_arg}, $pred->{right_arg};
|
|
}
|
|
PTDEBUG && _d("Join columns:", @join_cols);
|
|
foreach my $join_col ( @join_cols ) {
|
|
my ($tbl, $col) = split /\./, $join_col;
|
|
if ( !$col ) {
|
|
$col = $tbl;
|
|
$tbl = determine_table_for_column(
|
|
column => $col,
|
|
tbl_structs => $event->{tbl_structs},
|
|
);
|
|
}
|
|
if ( !$tbl ) {
|
|
PTDEBUG && _d("Cannot determine the table for join column",
|
|
$col);
|
|
push @unknown_join_cols, $col;
|
|
}
|
|
else {
|
|
$outer_tbl_join_cols{$col} = 1
|
|
if $tbl eq $outer_tbl->{tbl};
|
|
}
|
|
}
|
|
}
|
|
}
|
|
PTDEBUG && _d("Outer table join columns:", keys %outer_tbl_join_cols);
|
|
PTDEBUG && _d("Unknown join columns:", @unknown_join_cols);
|
|
|
|
# Here's a problem query:
|
|
# select c from L left join R on L.a=R.b where L.a=5 and R.c is null
|
|
# The problem is "R.c is null" will not allow one to determine if
|
|
# a null row from the outer table is null due to not matching the
|
|
# inner table or due to R.c actually having a null value. So we
|
|
# need to check every outer table column in the WHERE clause for
|
|
# ones that are 1) not in the JOIN expression and 2) "IS NULL'.
|
|
# http://code.google.com/p/maatkit/issues/detail?id=950
|
|
foreach my $pred ( @$where ) {
|
|
next unless $pred->{left_arg}; # skip constants like 1 in "WHERE 1"
|
|
next unless $pred->{operator} eq 'is'
|
|
&& $pred->{right_arg} =~ m/NULL/i;
|
|
|
|
my ($tbl, $col) = split /\./, $pred->{left_arg};
|
|
if ( !$col ) {
|
|
# A col in the WHERE clause isn't table-qualified. Try to
|
|
# determine its table. If we can, great, if not "return 0 if"
|
|
# below will immediately fail because $tbl will be undef still.
|
|
# That's ok; it just means this test tries as best it can and
|
|
# gets skipped silently when we can't tbl-qualify cols.
|
|
$col = $tbl;
|
|
$tbl = determine_table_for_column(
|
|
column => $col,
|
|
tbl_structs => $event->{tbl_structs},
|
|
);
|
|
}
|
|
next unless $tbl; # can't check tbl if tbl is unknown
|
|
next unless $outer_tbls{$tbl}; # only want outer tbl cols
|
|
|
|
# At this point we know col is from outer table and "IS NULL".
|
|
# "outer_tbl.join_col IS NULL" is ok, but...
|
|
next if $outer_tbl_join_cols{$col};
|
|
|
|
# ...this rule could match here for two reasons. First, if
|
|
# we know the outer tbl join cols and this col isn't one of them
|
|
# (hence the statement above passed and we got here), then
|
|
# @unknown_join_cols will be empty and we'll match. This is like
|
|
# "outer_tbl.NON_join_col IS NULL". Or second, we don't know
|
|
# the outer tbl join cols and @unknown_join_cols will have cols
|
|
# and we'll match if this col isn't one of the unknown join cols.
|
|
# This is for cases like:
|
|
# select c from L left join R on a=b where L.a=5 and R.c is null
|
|
# We don't know if a or b belong to L or R but we know c is from
|
|
# the outer table and is neither a nor b.
|
|
return 0 unless grep { $col eq $_ } @unknown_join_cols;
|
|
}
|
|
|
|
return; # rule does not match, as best as we can determine
|
|
}
|
|
},
|
|
};
|
|
|
|
|
|
# Sub: get_outer_tables
|
|
# Get the outer tables in joins.
|
|
#
|
|
# Parameters:
|
|
# $tbls - Arrayref of hashrefs with table info
|
|
#
|
|
# Returns:
|
|
# Array of hashref to the outer tables
|
|
sub get_outer_tables {
|
|
my ( $tbls ) = @_;
|
|
return unless $tbls;
|
|
my @outer_tbls;
|
|
my $n_tbls = scalar @$tbls;
|
|
for my $i( 0..($n_tbls-1) ) {
|
|
my $tbl = $tbls->[$i];
|
|
next unless $tbl->{join} && $tbl->{join}->{type} =~ m/left|right/i;
|
|
push @outer_tbls,
|
|
$tbl->{join}->{type} =~ m/left/i ? $tbl
|
|
: $tbls->[$i - 1];
|
|
}
|
|
return @outer_tbls;
|
|
}
|
|
|
|
|
|
# Sub: determine_table_for_column
|
|
# Determine which table a column belongs to. No extensive, online effort
|
|
# is made to determine the column's table. The caller is responsible for
|
|
# using the parsed SQL structure to get its db/tables and their tbl structs
|
|
# and providing a list of them.
|
|
#
|
|
# Parameters:
|
|
# %args - Arguments
|
|
#
|
|
# Required Arguments:
|
|
# column - column name, not quoted
|
|
#
|
|
# Optional Arguments:
|
|
# tbl_structs - arrayref hashrefs returned by <TableParser::parse()>
|
|
#
|
|
# Returns:
|
|
# Table name, not quoted
|
|
sub determine_table_for_column {
|
|
my ( %args ) = @_;
|
|
my @required_args = qw(column);
|
|
foreach my $arg ( @required_args ) {
|
|
die "I need a $arg argument" unless $args{$arg};
|
|
}
|
|
my ($col) = @args{@required_args};
|
|
|
|
my $tbl_structs = $args{tbl_structs};
|
|
return unless $tbl_structs;
|
|
|
|
foreach my $db ( keys %$tbl_structs ) {
|
|
foreach my $tbl ( keys %{$tbl_structs->{$db}} ) {
|
|
if ( $tbl_structs->{$db}->{$tbl}->{is_col}->{$col} ) {
|
|
PTDEBUG && _d($col, "column belongs to", $db, $tbl);
|
|
return $tbl;
|
|
}
|
|
}
|
|
}
|
|
|
|
PTDEBUG && _d("Cannot determine table for column", $col);
|
|
return;
|
|
}
|
|
|
|
sub _looks_like_column {
|
|
my $col = shift;
|
|
# NULL, numbers, variables and functions are definitely not columns
|
|
return if $col eq '*' || uc($col) eq 'NULL';
|
|
return if $col =~ /\A(?:\b[0-9]+\b|\@{1,2}.+)/;
|
|
return $col;
|
|
}
|
|
|
|
sub _d {
|
|
my ($package, undef, $line) = caller 0;
|
|
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
|
|
map { defined $_ ? $_ : 'undef' }
|
|
@_;
|
|
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
|
|
}
|
|
|
|
1;
|
|
}
|
|
# ###########################################################################
|
|
# End QueryAdvisorRules package
|
|
# ###########################################################################
|