# This program is copyright 2010-2012 Percona Ireland Ltd.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License.  On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA  02111-1307  USA.
# ###########################################################################
# SQLParser package
# ###########################################################################
{
# Package: SQLParser
# SQLParser parses common MySQL SQL statements into data structures.
# This parser is MySQL-specific and intentionally meant to handle only
# "common" cases.  Although there are many limitations (like UNION, CASE,
# etc.), many complex cases are handled that no other free, Perl SQL
# parser at the time of writing can parse, notably subqueries in all their
# places and varieties.
#
# This package has not been profiled and since it relies heavily on
# mildly complex regex, so do not expect amazing performance.
#
# See SQLParser.t for examples of the various data structures.  There are
# many and they vary a lot depending on the statement parsed, so documentation
# in this file is not exhaustive.
#
# This package differs from QueryParser because here we parse the entire SQL
# statement (thus giving access to all its parts), whereas QueryParser extracts
# just needed parts (and ignores all the rest).
package SQLParser;

use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;

use Data::Dumper;
$Data::Dumper::Indent    = 1;
$Data::Dumper::Sortkeys  = 1;
$Data::Dumper::Quotekeys = 0;

# Basic identifiers for database, table, column and function names.
my $quoted_ident   = qr/`[^`]+`/;
my $unquoted_ident = qr/
   \@{0,2}         # optional @ or @@ for variables
   \w+             # the ident name
   (?:\([^\)]*\))? # optional function params
/x;

my $ident_alias = qr/
  \s+                                 # space before alias
  (?:(AS)\s+)?                        # optional AS keyword
  ((?>$quoted_ident|$unquoted_ident)) # alias
/xi;

# A table is identified by 1 or 2 identifiers separated by a period
# and optionally followed by an alias.  See parse_table_reference()
# for why an optional index hint is not included here.
my $table_ident = qr/(?:
   ((?:(?>$quoted_ident|$unquoted_ident)\.?){1,2}) # table
   (?:$ident_alias)?                               # optional alias
)/xo;

# A column is identified by 1 to 3 identifiers separated by periods
# and optionally followed by an alias.
my $column_ident = qr/(?:
   ((?:(?>$quoted_ident|$unquoted_ident|\*)\.?){1,3}) # column
   (?:$ident_alias)?                                  # optional alias
)/xo;

my $function_ident = qr/
   \b
   (
      \w+      # function name
      \(       # opening parenthesis
      [^\)]+   # function args, if any
      \)       # closing parenthesis
   )
/x;

my %ignore_function = (
   INDEX => 1,
   KEY   => 1,
);

# Sub: new
#   Create a SQLParser object.
#
# Parameters:
#   %args - Arguments
#
# Optional Arguments:
#   Schema - <Schema> object.  Can be set later by calling <set_Schema()>.
#
# Returns:
#   SQLParser object
sub new {
   my ( $class, %args ) = @_;
   my $self = {
      %args,
   };
   return bless $self, $class;
}

# Sub: parse
#   Parse a SQL statement.   Only statements of $allowed_types are parsed.
#   This sub recurses to parse subqueries.
#
# Parameters:
#   $query - SQL statement
#
# Returns:
#   A complex hashref of the parsed SQL statement.  All keys and almost all
#   values are lowercase for consistency.  The struct is roughly:
#   (start code)
#   {
#     type       => '',     # one of $allowed_types
#     clauses    => {},     # raw, unparsed text of clauses
#     <clause>   => struct  # parsed clause struct, e.g. from => [<tables>]
#     keywords   => {},     # LOW_PRIORITY, DISTINCT, SQL_CACHE, etc.
#     functions  => {},     # MAX(), SUM(), NOW(), etc.
#     select     => {},     # SELECT struct for INSERT/REPLACE ... SELECT
#     subqueries => [],     # pointers to subquery structs
#   }
#   (end code)
#   It varies, of course, depending on the query.  If something is missing
#   it means the query doesn't have that part.  E.g. INSERT has an INTO clause
#   but DELETE does not, and only DELETE and SELECT have FROM clauses.  Each
#   clause struct is different; see their respective parse_CLAUSE subs.
sub parse {
   my ( $self, $query ) = @_;
   return unless $query;

   # Only these types of statements are parsed.
   my $allowed_types = qr/(?:
       DELETE
      |INSERT
      |REPLACE
      |SELECT
      |UPDATE
      |CREATE
   )/xi;

   # Flatten and clean query.
   $query = $self->clean_query($query);

   # Remove first word, should be the statement type.  The parse_TYPE subs
   # expect that this is already removed.
   my $type;
   if ( $query =~ s/^(\w+)\s+// ) {
      $type = lc $1;
      PTDEBUG && _d('Query type:', $type);
      die "Cannot parse " . uc($type) . " queries"
         unless $type =~ m/$allowed_types/i;
   }
   else {
      die "Query does not begin with a word";  # shouldn't happen
   }

   $query = $self->normalize_keyword_spaces($query);

   # If query has any subqueries, remove/save them and replace them.
   # They'll be parsed later, after the main outer query.
   my @subqueries;
   if ( $query =~ m/(\(SELECT )/i ) {
      PTDEBUG && _d('Removing subqueries');
      @subqueries = $self->remove_subqueries($query);
      $query      = shift @subqueries;
   }
   elsif ( $type eq 'create' && $query =~ m/\s+SELECT/ ) {
      PTDEBUG && _d('CREATE..SELECT');
      ($subqueries[0]->{query}) = $query =~ m/\s+(SELECT .+)/;
      $query =~ s/\s+SELECT.+//;
   }

   # Parse raw text parts from query.  The parse_TYPE subs only do half
   # the work: parsing raw text parts of clauses, tables, functions, etc.
   # Since these parts are invariant (e.g. a LIMIT clause is same for any
   # type of SQL statement) they are parsed later via other parse_CLAUSE
   # subs, instead of parsing them individually in each parse_TYPE sub.
   my $parse_func = "parse_$type";
   my $struct     = $self->$parse_func($query);
   if ( !$struct ) {
      PTDEBUG && _d($parse_func, 'failed to parse query');
      return;
   }
   $struct->{type} = $type;
   $self->_parse_clauses($struct);
   # TODO: parse functions

   if ( @subqueries ) {
      PTDEBUG && _d('Parsing subqueries');
      foreach my $subquery ( @subqueries ) {
         my $subquery_struct = $self->parse($subquery->{query});
         @{$subquery_struct}{keys %$subquery} = values %$subquery;
         push @{$struct->{subqueries}}, $subquery_struct;
      }
   }

   PTDEBUG && _d('Query struct:', Dumper($struct));
   return $struct;
}


# Sub: _parse_clauses
#   Parse raw text of clauses into data structures.  This sub recurses
#   to parse the clauses of subqueries.  The clauses are read from
#   and their data structures saved into the $struct parameter.
#
# Parameters:
#   $struct - Hashref from which clauses are read (%{$struct->{clauses}})
#             and into which data structs are saved (e.g. $struct->{from}=...).
sub _parse_clauses {
   my ( $self, $struct ) = @_;
   # Parse raw text of clauses and functions.
   foreach my $clause ( keys %{$struct->{clauses}} ) {
      # Rename/remove clauses with space in their names, like ORDER BY.
      if ( $clause =~ m/ / ) {
         (my $clause_no_space = $clause) =~ s/ /_/g;
         $struct->{clauses}->{$clause_no_space} = $struct->{clauses}->{$clause};
         delete $struct->{clauses}->{$clause};
         $clause = $clause_no_space;
      }

      my $parse_func     = "parse_$clause";
      $struct->{$clause} = $self->$parse_func($struct->{clauses}->{$clause});

      if ( $clause eq 'select' ) {
         PTDEBUG && _d('Parsing subquery clauses');
         $struct->{select}->{type} = 'select';
         $self->_parse_clauses($struct->{select});
      }
   }
   return;
}

# Sub: clean_query
#   Remove spaces, flatten, and normalize some patterns for easier parsing.
#
# Parameters:
#   $query - SQL statement
#
# Returns:
#   Cleaned $query
sub clean_query {
   my ( $self, $query ) = @_;
   return unless $query;

   # Whitespace and comments.
   $query =~ s/^\s*--.*$//gm;  # -- comments
   $query =~ s/\s+/ /g;        # extra spaces/flatten
   $query =~ s!/\*.*?\*/!!g;   # /* comments */
   $query =~ s/^\s+//;         # leading spaces
   $query =~ s/\s+$//;         # trailing spaces

   return $query;
}

# Sub: normalize_keyword_spaces
#   Normalize spaces around certain SQL keywords.  Spaces are added and
#   removed around certain SQL keywords to make parsing easier.
#
# Parameters:
#   $query - SQL statement
#
# Returns:
#   Normalized $query
sub normalize_keyword_spaces {
   my ( $self, $query ) = @_;

   # Add spaces between important tokens to help the parse_* subs.
   $query =~ s/\b(VALUE(?:S)?)\(/$1 (/i;
   $query =~ s/\bON\(/on (/gi;
   $query =~ s/\bUSING\(/using (/gi;

   # Start of (SELECT subquery).
   $query =~ s/\(\s+SELECT\s+/(SELECT /gi;

   return $query;
}

# Sub: _parse_query
#    This sub is called by the parse_TYPE subs except parse_insert.
#    It does two things: remove, save the given keywords, all of which
#    should appear at the beginning of the query; and, save (but not
#    remove) the given clauses.  The query should start with the values
#    for the first clause because the query's first word was removed
#    in parse().  So for "SELECT cols FROM ...", the query given here
#    is "cols FROM ..." where "cols" belongs to the first clause "columns".
#    Then the query is walked clause-by-clause, saving each.
#
# Parameters:
#   $query        - SQL statement with first word (SELECT, INSERT, etc.) removed
#   $keywords     - Compiled regex of keywords that can appear in $query
#   $first_clause - First clause word to expect in $query
#   $clauses      - Compiled regex of clause words that can appear in $query
#
# Returns:
#   Hashref with raw text of clauses
sub _parse_query {
   my ( $self, $query, $keywords, $first_clause, $clauses ) = @_;
   return unless $query;
   my $struct = {};

   # Save, remove keywords.
   1 while $query =~ s/$keywords\s+/$struct->{keywords}->{lc $1}=1, ''/gie;

   # Go clausing.
   my @clause = grep { defined $_ }
      ($query =~ m/\G(.+?)(?:$clauses\s+|\Z)/gci);

   my $clause = $first_clause,
   my $value  = shift @clause;
   $struct->{clauses}->{$clause} = $value;
   PTDEBUG && _d('Clause:', $clause, $value);

   # All other clauses.
   while ( @clause ) {
      $clause = shift @clause;
      $value  = shift @clause;
      $struct->{clauses}->{lc $clause} = $value;
      PTDEBUG && _d('Clause:', $clause, $value);
   }

   ($struct->{unknown}) = ($query =~ m/\G(.+)/);

   return $struct;
}

sub parse_delete {
   my ( $self, $query ) = @_;
   if ( $query =~ s/FROM\s+//i ) {
      my $keywords = qr/(LOW_PRIORITY|QUICK|IGNORE)/i;
      my $clauses  = qr/(FROM|WHERE|ORDER BY|LIMIT)/i;
      return $self->_parse_query($query, $keywords, 'from', $clauses);
   }
   else {
      die "DELETE without FROM: $query";
   }
}

sub parse_insert {
   my ( $self, $query ) = @_;
   return unless $query;
   my $struct = {};

   # Save, remove keywords.
   my $keywords   = qr/(LOW_PRIORITY|DELAYED|HIGH_PRIORITY|IGNORE)/i;
   1 while $query =~ s/$keywords\s+/$struct->{keywords}->{lc $1}=1, ''/gie;

   if ( $query =~ m/ON DUPLICATE KEY UPDATE (.+)/i ) {
      my $values = $1;
      die "No values after ON DUPLICATE KEY UPDATE: $query" unless $values;
      $struct->{clauses}->{on_duplicate} = $values;
      PTDEBUG && _d('Clause: on duplicate key update', $values);

      # This clause can be confused for JOIN ... ON in INSERT-SELECT queries,
      # so we remove the ON DUPLICATE KEY UPDATE clause after extracting its
      # values.
      $query =~ s/\s+ON DUPLICATE KEY UPDATE.+//;
   }

   # Parse INTO clause.  Literal "INTO" is optional.
   if ( my @into = ($query =~ m/
            (?=.*?(?:VALUE|SE(?:T|LECT)))  # Avoid a backtracking explosion
            (?:INTO\s+)?            # INTO, optional
            (.+?)\s+                # table ref
            (\([^\)]+\)\s+)?        # column list, optional
            (VALUE.?|SET|SELECT)\s+ # start of next clause
         /xgci)
   ) {
      my $tbl  = shift @into;  # table ref
      $struct->{clauses}->{into} = $tbl;
      PTDEBUG && _d('Clause: into', $tbl);

      my $cols = shift @into;  # columns, maybe
      if ( $cols ) {
         $cols =~ s/[\(\)]//g;
         $struct->{clauses}->{columns} = $cols;
         PTDEBUG && _d('Clause: columns', $cols);
      }

      my $next_clause = lc(shift @into);  # VALUES, SET or SELECT
      die "INSERT/REPLACE without clause after table: $query"
         unless $next_clause;
      $next_clause = 'values' if $next_clause eq 'value';
      my ($values) = ($query =~ m/\G(.+)/gci);
      die "INSERT/REPLACE without values: $query" unless $values;
      $struct->{clauses}->{$next_clause} = $values;
      PTDEBUG && _d('Clause:', $next_clause, $values);
   }

   # Save any leftovers.  If there are any, parsing missed something.
   ($struct->{unknown}) = ($query =~ m/\G(.+)/);

   return $struct;
}
{
   # Suppress warnings like "Name "SQLParser::parse_set" used only once:
   # possible typo at SQLParser.pm line 480." caused by the fact that we
   # don't call these aliases directly, they're called indirectly using
   # $parse_func, hence Perl can't see their being called a compile time.
   no warnings;
   # INSERT and REPLACE are so similar that they are both parsed
   # in parse_insert().
   *parse_replace = \&parse_insert;
}

sub parse_select {
   my ( $self, $query ) = @_;

   # Keywords are expected to be at the start of the query, so these
   # that appear at the end are handled separately.  Afaik, SELECT is
   # only statement with optional keywords at the end.  Also, these
   # appear to be the only keywords with spaces instead of _.
   my @keywords;
   my $final_keywords = qr/(FOR UPDATE|LOCK IN SHARE MODE)/i;
   1 while $query =~ s/\s+$final_keywords/(push @keywords, $1), ''/gie;

   my $keywords = qr/(
       ALL
      |DISTINCT
      |DISTINCTROW
      |HIGH_PRIORITY
      |STRAIGHT_JOIN
      |SQL_SMALL_RESULT
      |SQL_BIG_RESULT
      |SQL_BUFFER_RESULT
      |SQL_CACHE
      |SQL_NO_CACHE
      |SQL_CALC_FOUND_ROWS
   )/xi;
   my $clauses = qr/(
       FROM
      |WHERE
      |GROUP\sBY
      |HAVING
      |ORDER\sBY
      |LIMIT
      |PROCEDURE
      |INTO OUTFILE
   )/xi;
   my $struct = $self->_parse_query($query, $keywords, 'columns', $clauses);

   # Add final keywords, if any.
   map { s/ /_/g; $struct->{keywords}->{lc $_} = 1; } @keywords;

   return $struct;
}

sub parse_update {
   my $keywords = qr/(LOW_PRIORITY|IGNORE)/i;
   my $clauses  = qr/(SET|WHERE|ORDER BY|LIMIT)/i;
   return _parse_query(@_, $keywords, 'tables', $clauses);

}

sub parse_create {
   my ($self, $query) = @_;
   my ($obj, $name) = $query =~ m/
      (\S+)\s+
      (?:IF NOT EXISTS\s+)?
      (\S+)
   /xi;
   return {
      object  => lc $obj,
      name    => $name,
      unknown => undef,
   };
}

# Sub: parse_from
#   Parse a FROM clause, a.k.a. the table references.  Does not handle
#   nested joins.  See http://dev.mysql.com/doc/refman/5.1/en/join.html
#
# Parameters:
#   $from - FROM clause (with the word "FROM")
#
# Returns:
#   Arrayref of hashrefs, one hashref for each table in the order that
#   the tables appear, like:
#   (start code)
#   {
#     name           => 't2',  -- table's real name
#     alias          => 'b',   -- table's alias, if any
#     explicit_alias => 1,     -- if explicitly aliased with AS
#     join  => {               -- if joined to another table, all but first
#                              -- table are because comma implies INNER JOIN
#       to        => 't1',     -- table name on left side of join, if this is
#                              -- LEFT JOIN then this is the inner table, if
#                              -- RIGHT JOIN then this is outer table
#       type      => '',       -- left, right, inner, outer, cross, natural
#       condition => 'using',  -- on or using, if applicable
#       columns   => ['id'],   -- columns for USING condition, if applicable
#       ansi      => 1,        -- true of ANSI JOIN, i.e. true if not implicit
#                              -- INNER JOIN due to following a comma
#     },
#   },
#   {
#     name => 't3',
#     join => {
#       to        => 't2',
#       type      => 'left',
#       condition => 'on',     -- an ON condition is like a WHERE clause so
#       where     => [...]     -- this arrayref of predicates appears, see
#                              -- <parse_where()> for its structure
#     },
#   },
#  (end code)
sub parse_from {
   my ( $self, $from ) = @_;
   return unless $from;
   PTDEBUG && _d('Parsing FROM', $from);

   # Extract the column list from USING(col, ...) clauses else
   # the inner commas will be captured by $comma_join.
   my $using_cols;
   ($from, $using_cols) = $self->remove_using_columns($from);

   my $funcs;
   ($from, $funcs) = $self->remove_functions($from);

   # Table references in a FROM clause are separated either by commas
   # (comma/theta join, implicit INNER join) or the JOIN keyword (ansi
   # join).  JOIN can be preceded by other keywords like LEFT, RIGHT,
   # OUTER, etc.  There must be spaces before and after JOIN and its
   # keywords, but there does not have to be spaces before or after a
   # comma.  See http://dev.mysql.com/doc/refman/5.5/en/join.html
   my $comma_join = qr/(?>\s*,\s*)/;
   my $ansi_join  = qr/(?>
     \s+
     (?:(?:INNER|CROSS|STRAIGHT_JOIN|LEFT|RIGHT|OUTER|NATURAL)\s+)*
     JOIN
     \s+
   )/xi;

   my @tbls;     # all table refs, a hashref for each
   my $tbl_ref;  # current table ref hashref
   my $join;     # join info hahsref for current table ref
   foreach my $thing ( split /($comma_join|$ansi_join)/io, $from ) {
      # We shouldn't parse empty things.
      die "Error parsing FROM clause" unless $thing;

      # Strip leading and trailing spaces.
      $thing =~ s/^\s+//;
      $thing =~ s/\s+$//;
      PTDEBUG && _d('Table thing:', $thing);

      if ( $thing =~ m/\s+(?:ON|USING)\s+/i ) {
         PTDEBUG && _d("JOIN condition");
         # This join condition follows a JOIN (comma joins don't have
         # conditions).  It includes a table ref, ON|USING, and then
         # the value to ON|USING.
         my ($tbl_ref_txt, $join_condition_verb, $join_condition_value)
            = $thing =~ m/^(.+?)\s+(ON|USING)\s+(.+)/i;

         $tbl_ref = $self->parse_table_reference($tbl_ref_txt);

         $join->{condition} = lc $join_condition_verb;
         if ( $join->{condition} eq 'on' ) {
            # The value for ON can be, as the MySQL manual says, is just
            # like a WHERE clause.
            $join->{where} = $self->parse_where($join_condition_value, $funcs);
         }
         else { # USING
            # Although calling parse_columns() works, it's overkill.
            # This is not a columns def as in "SELECT col1, col2", it's
            # a simple csv list of column names without aliases, etc.
            $join->{columns} = $self->_parse_csv(shift @$using_cols);
         }
      }
      elsif ( $thing =~ m/(?:,|JOIN)/i ) {
         # A comma or JOIN signals the end of the current table ref and
         # the beginning of the next table ref.  Save the current table ref.
         if ( $join ) {
            $tbl_ref->{join} = $join;
         }
         push @tbls, $tbl_ref;
         PTDEBUG && _d("Complete table reference:", Dumper($tbl_ref));

         # Reset vars for the next table ref.
         $tbl_ref = undef;
         $join    = {};

         # Next table ref becomes the current table ref.  It's joined to
         # the previous table ref either implicitly (comma join) or explicitly
         # (ansi join).
         $join->{to} = $tbls[-1]->{tbl};
         if ( $thing eq ',' ) {
            $join->{type} = 'inner';
            $join->{ansi} = 0;
         }
         else { # ansi join
            my $type = $thing =~ m/^(.+?)\s+JOIN$/i ? lc $1 : 'inner';
            $join->{type} = $type;
            $join->{ansi} = 1;
         }
      }
      else {
         # First table ref and comma-joined tables.
         $tbl_ref = $self->parse_table_reference($thing);
         PTDEBUG && _d('Table reference:', Dumper($tbl_ref));
      }
   }

   # Save the last table ref.  It's not completed in the loop above because
   # there's no comma or JOIN after it.
   if ( $tbl_ref ) {
      if ( $join ) {
         $tbl_ref->{join} = $join;
      }
      push @tbls, $tbl_ref;
      PTDEBUG && _d("Complete table reference:", Dumper($tbl_ref));
   }

   return \@tbls;
}

# Parse a table ref like "tbl", "tbl alias" or "tbl AS alias", where
# tbl can be optionally "db." qualified.  Also handles FORCE|USE|IGNORE
# INDEX hints.  Does not handle "FOR JOIN" hint because "JOIN" here gets
# confused with the "JOIN" thing in parse_from().
sub parse_table_reference {
   my ( $self, $tbl_ref ) = @_;
   return unless $tbl_ref;
   PTDEBUG && _d('Parsing table reference:', $tbl_ref);
   my %tbl;

   # First, check for an index hint.  Remove and save it if present.
   # This can't be included in the $table_ident regex because, for example,
   # `tbl` FORCE INDEX (foo), makes FORCE look like an implicit alias.
   if ( $tbl_ref =~ s/
         \s+(
            (?:FORCE|USE|IGNORE)\s
            (?:INDEX|KEY)
            \s*\([^\)]+\)\s*
         )//xi)
   {
      $tbl{index_hint} = $1;
      PTDEBUG && _d('Index hint:', $tbl{index_hint});
   }

   if ( $tbl_ref =~ m/$table_ident/ ) {
      my ($db_tbl, $as, $alias) = ($1, $2, $3); # XXX
      my $ident_struct = $self->parse_identifier('table', $db_tbl);
      $alias =~ s/`//g if $alias;
      @tbl{keys %$ident_struct} = values %$ident_struct;
      $tbl{explicit_alias} = 1 if $as;
      $tbl{alias}          = $alias if $alias;
   }
   else {
      die "Table ident match failed";  # shouldn't happen
   }

   return \%tbl;
}
{
   no warnings;  # Why? See same line above.
   *parse_into   = \&parse_from;
   *parse_tables = \&parse_from;
}

# This is not your traditional parser, but it works for simple to rather
# complex cases, with a few noted and intentional limitations.  First,
# the limitations:
#
#   * probably doesn't handle every possible operator (see $op)
#   * doesn't care about grouping with parentheses
#   * not "fully" tested because the possibilities are infinite
#
# It works in four steps; let's take this WHERE clause as an example:
#
#   i="x and y" or j in ("and", "or") and x is not null or a between 1 and 10 and sz="this 'and' foo"
#
# The first step splits the string on and|or, the only two keywords I'm
# aware of that join the separate predicates.  This step doesn't care if
# and|or is really between two predicates or in a string or something else.
# The second step is done while the first step is being done: check predicate
# "fragments" (from step 1) for operators; save which ones have and don't
# have at least one operator.  So the result of step 1 and 2 is:
#
#   PREDICATE FRAGMENT                OPERATOR
#   ================================  ========
#   i="x                              Y
#   and y"                            N
#   or j in ("                        Y
#   and", "                           N
#   or")                              N
#   and x is not null                 Y
#   or a between 1                    Y
#   and 10                            N
#   and sz="this '                    Y
#   and' foo"                         N
#
# The third step runs through the list of pred frags backwards and joins
# the current frag to the preceding frag if it does not have an operator.
# The result is:
#
#   PREDICATE FRAGMENT                OPERATOR
#   ================================  ========
#   i="x and y"                       Y
#                                     N
#   or j in ("and", "or")             Y
#                                     N
#                                     N
#   and x is not null                 Y
#   or a between 1 and 10             Y
#                                     N
#   and sz="this 'and' foo"           Y
#                                     N
#
# The fourth step is similar but not shown: pred frags with unbalanced ' or "
# are joined to the preceding pred frag.  This fixes cases where a pred frag
# has multiple and|or in a string value; e.g. "foo and bar or dog".
#
# After the pred frags are complete, the parts of these predicates are parsed
# and returned in an arrayref of hashrefs like:
#
#   {
#     predicate => 'and',
#     column    => 'id',
#     operator  => '>=',
#     value     => '42',
#   }
#
# Invalid predicates, or valid ones that we can't parse,  will cause
# the sub to die.
sub parse_where {
   my ( $self, $where, $functions ) = @_;
   return unless $where;
   PTDEBUG && _d("Parsing WHERE", $where);

   # Not all the operators listed at
   # http://dev.mysql.com/doc/refman/5.1/en/non-typed-operators.html
   # are supported.  E.g. - (minus) is an op but does it ever show up
   # in a where clause?  "col-3=2" is valid (where col=5), but we're
   # not interested in weird stuff like that.
   my $op_symbol = qr/
      (?:
       <=(?:>)?
      |>=
      |<>
      |!=
      |<
      |>
      |=
   )/xi;
   my $op_verb = qr/
      (?:
          (?:(?:NOT\s)?LIKE)
         |(?:IS(?:\sNOT\s)?)
         |(?:(?:\sNOT\s)?BETWEEN)
         |(?:(?:NOT\s)?IN)
      )
   /xi;
   my $op_pat = qr/
   (
      (?>
          (?:$op_symbol)  # don't need spaces around the symbols, e.g.: col=1
         |(?:\s+$op_verb) # must have space before verb op, e.g.: col LIKE ...
      )
   )/x;

   # Step 1 and 2: split on and|or and look for operators.
   my $offset = 0;
   my $pred   = "";
   my @pred;
   my @has_op;
   while ( $where =~ m/\b(and|or)\b/gi ) {
      my $pos = (pos $where) - (length $1);  # pos at and|or, not after

      $pred = substr $where, $offset, ($pos-$offset);
      push @pred, $pred;
      push @has_op, $pred =~ m/$op_pat/o ? 1 : 0;

      $offset = $pos;
   }
   # Final predicate fragment: last and|or to end of string.
   $pred = substr $where, $offset;
   push @pred, $pred;
   push @has_op, $pred =~ m/$op_pat/o ? 1 : 0;
   PTDEBUG && _d("Predicate fragments:", Dumper(\@pred));
   PTDEBUG && _d("Predicate frags with operators:", @has_op);

   # Step 3: join pred frags without ops to preceding pred frag.
   my $n = scalar @pred - 1;
   for my $i ( 1..$n ) {
      $i   *= -1;
      my $j = $i - 1;  # preceding pred frag

      # Two constants in a row, like "TRUE or FALSE", are a special case.
      # The current pred ($i) will not have an op but in this case it's
      # not a continuation of the preceding pred ($j) so we don't want to
      # join them.  And there's a special case within this special case:
      # "BETWEEN 1 AND 10".  _is_constant() strips leading AND or OR so
      # 10 is going to look like an independent constant but really it's
      # part of the BETWEEN op, so this whole special check is skipped
      # if the preceding pred contains BETWEEN.  Yes, parsing SQL is tricky.
      next if $pred[$j] !~ m/\s+between\s+/i  && $self->_is_constant($pred[$i]);

      if ( !$has_op[$i] ) {
         $pred[$j] .= $pred[$i];
         $pred[$i]  = undef;
      }
   }
   PTDEBUG && _d("Predicate fragments joined:", Dumper(\@pred));

   # Step 4: join pred frags with unbalanced ' or " to preceding pred frag.
   for my $i ( 0..@pred ) {
      $pred = $pred[$i];
      next unless defined $pred;
      my $n_single_quotes = ($pred =~ tr/'//);
      my $n_double_quotes = ($pred =~ tr/"//);
      if ( ($n_single_quotes % 2) || ($n_double_quotes % 2) ) {
         $pred[$i]     .= $pred[$i + 1];
         $pred[$i + 1]  = undef;
      }
   }
   PTDEBUG && _d("Predicate fragments balanced:", Dumper(\@pred));

   # Parse, clean up and save the complete predicates.
   my @predicates;
   foreach my $pred ( @pred ) {
      next unless defined $pred;
      $pred =~ s/^\s+//;
      $pred =~ s/\s+$//;
      my $conj;
      if ( $pred =~ s/^(and|or)\s+//i ) {
         $conj = lc $1;
      }
      my ($col, $op, $val) = $pred =~ m/^(.+?)$op_pat(.+)$/o;
      if ( !$col || !$op ) {
         if ( $self->_is_constant($pred) ) {
            $val = lc $pred;
         }
         else {
            die "Failed to parse WHERE condition: $pred";
         }
      }

      # Remove whitespace and lowercase some keywords.
      if ( $col ) {
         $col =~ s/\s+$//;
         $col =~ s/^\(+//;  # no unquoted column name begins with (
      }
      if ( $op ) {
         $op  =  lc $op;
         $op  =~ s/^\s+//;
         $op  =~ s/\s+$//;
      }
      $val =~ s/^\s+//;

      # No unquoted value ends with ) except FUNCTION(...)
      if ( ($op || '') !~ m/IN/i && $val !~ m/^\w+\([^\)]+\)$/ ) {
         $val =~ s/\)+$//;
      }

      if ( $val =~ m/NULL|TRUE|FALSE/i ) {
         $val = lc $val;
      }

      if ( $functions ) {
         $col = shift @$functions if $col =~ m/__FUNC\d+__/;
         $val = shift @$functions if $val =~ m/__FUNC\d+__/;
      }

      push @predicates, {
         predicate => $conj,
         left_arg  => $col,
         operator  => $op,
         right_arg => $val,
      };
   }

   return \@predicates;
}

# Returns true if the value is a constant.  Constants are TRUE, FALSE,
# and any signed number.  A leading AND or OR keyword is removed first.
sub _is_constant {
   my ( $self, $val ) = @_;
   return 0 unless defined $val;
   $val =~ s/^\s*(?:and|or)\s+//;
   return
      $val =~ m/^\s*(?:TRUE|FALSE)\s*$/i || $val =~ m/^\s*-?\d+\s*$/ ? 1 : 0;
}

sub parse_having {
   my ( $self, $having ) = @_;
   # TODO
   return $having;
}

# GROUP BY {col_name | expr | position} [ASC | DESC], ... [WITH ROLLUP]
sub parse_group_by {
   my ( $self, $group_by ) = @_;
   return unless $group_by;
   PTDEBUG && _d('Parsing GROUP BY', $group_by);

   # Remove special "WITH ROLLUP" clause so we're left with a simple csv list.
   my $with_rollup = $group_by =~ s/\s+WITH ROLLUP\s*//i;

   # Parse the identifiers.
   my $idents = $self->parse_identifiers( $self->_parse_csv($group_by) );

   $idents->{with_rollup} = 1 if $with_rollup;

   return $idents;
}

# [ORDER BY {col_name | expr | position} [ASC | DESC], ...]
sub parse_order_by {
   my ( $self, $order_by ) = @_;
   return unless $order_by;
   PTDEBUG && _d('Parsing ORDER BY', $order_by);
   my $idents = $self->parse_identifiers( $self->_parse_csv($order_by) );
   return $idents;
}

# [LIMIT {[offset,] row_count | row_count OFFSET offset}]
sub parse_limit {
   my ( $self, $limit ) = @_;
   return unless $limit;
   my $struct = {
      row_count => undef,
   };
   if ( $limit =~ m/(\S+)\s+OFFSET\s+(\S+)/i ) {
      $struct->{explicit_offset} = 1;
      $struct->{row_count}       = $1;
      $struct->{offset}          = $2;
   }
   else {
      my ($offset, $cnt) = $limit =~ m/(?:(\S+),\s+)?(\S+)/i;
      $struct->{row_count} = $cnt;
      $struct->{offset}    = $offset if defined $offset;
   }
   return $struct;
}

# Parses the list of values after, e.g., INSERT tbl VALUES (...), (...).
# Does not currently parse each set of values; it just splits the list.
sub parse_values {
   my ( $self, $values ) = @_;
   return unless $values;
   $values =~ s/^\s*\(//;
   $values =~ s/\s*\)//;
   my $vals = $self->_parse_csv(
      $values,
      quoted_values => 1,
      remove_quotes => 0,
   );
   return $vals;
}

sub parse_set {
   my ( $self, $set ) = @_;
   PTDEBUG && _d("Parse SET", $set);
   return unless $set;
   my $vals = $self->_parse_csv($set);
   return unless $vals && @$vals;

   my @set;
   foreach my $col_val ( @$vals ) {
      # Do not remove quotes around the val because quotes let us determine
      # the value's type.  E.g. tbl might be a table, but "tbl" is a string,
      # and NOW() is the function, but 'NOW()' is a string.
      my ($col, $val)  = $col_val =~ m/^([^=]+)\s*=\s*(.+)/;
      my $ident_struct = $self->parse_identifier('column', $col);
      my $set_struct   = {
         %$ident_struct,
         value => $val,
      };
      PTDEBUG && _d("SET:", Dumper($set_struct));
      push @set, $set_struct;
   }
   return \@set;
}

# Split any comma-separated list of values, removing leading
# and trailing spaces.
sub _parse_csv {
   my ( $self, $vals, %args ) = @_;
   return unless $vals;

   my @vals;
   if ( $args{quoted_values} ) {
      # If the vals are quoted, then they can contain commas, like:
      # "hello, world!", 'batman'.  If only we could use Text::CSV,
      # then we wouldn't write yet another csv parser to handle this,
      # but Percona Toolkit doesn't like package dependencies, so here's
      # our light implementation of this classic problem.
      my $quote_char   = '';
      VAL:
      foreach my $val ( split(',', $vals) ) {
         PTDEBUG && _d("Next value:", $val);
         # If there's a quote char, then this val is the rest of a previously
         # quoted and split value.
         if ( $quote_char ) {
            PTDEBUG && _d("Value is part of previous quoted value");
            # split() removed the comma inside the quoted value,
            # so add it back else "hello, world" is incorrectly
            # returned as "hello world".
            $vals[-1] .= ",$val";

            # Quoted and split value is complete when a val ends with the
            # same quote char that began the split value.
            if ( $val =~ m/[^\\]*$quote_char$/ ) {
               if ( $args{remove_quotes} ) {
                  $vals[-1] =~ s/^\s*$quote_char//;
                  $vals[-1] =~ s/$quote_char\s*$//;
               }
               PTDEBUG && _d("Previous quoted value is complete:", $vals[-1]);
               $quote_char = '';
            }

            next VAL;
         }

         # Start of new value so strip leading spaces but not trailing
         # spaces yet because if the next check determines that this is
         # a quoted and split val, then trailing space is actually space
         # inside the quoted val, so we want to preserve it.
         $val =~ s/^\s+//;

         # A value is quoted *and* split (because there's a comma in the
         # quoted value) if the vale begins with a quote char and does not
         # end with that char.  E.g.: "foo but not "foo".  The val "foo is
         # the first part of the split value, e.g. "foo, bar".
         if ( $val =~ m/^(['"])/ ) {
            PTDEBUG && _d("Value is quoted");
            $quote_char = $1;  # XXX
            if ( $val =~ m/.$quote_char$/ ) {
               PTDEBUG && _d("Value is complete");
               $quote_char = '';
               if ( $args{remove_quotes} ) {
                  $vals[-1] =~ s/^\s*$quote_char//;
                  $vals[-1] =~ s/$quote_char\s*$//;
               }
            }
            else {
               PTDEBUG && _d("Quoted value is not complete");
            }
         }
         else {
            $val =~ s/\s+$//;
         }

         # Save complete value (e.g. foo or "foo" without the quotes),
         # or save the first part of a quoted and split value; the rest
         # of such a value will be joined back above.
         PTDEBUG && _d("Saving value", ($quote_char ? "fragment" : ""));
         push @vals, $val;
      }
   }
   else {
      @vals = map { s/^\s+//; s/\s+$//; $_ } split(',', $vals);
   }

   return \@vals;
}
{
   no warnings;  # Why? See same line above.
   *parse_on_duplicate = \&_parse_csv;
}

sub parse_columns {
   my ( $self, $cols ) = @_;
   PTDEBUG && _d('Parsing columns list:', $cols);

   my @cols;
   pos $cols = 0;
   while (pos $cols < length $cols) {
      if ($cols =~ m/\G\s*$column_ident\s*(?>,|\Z)/gcxo) {
         my ($db_tbl_col, $as, $alias) = ($1, $2, $3); # XXX
         my $ident_struct = $self->parse_identifier('column', $db_tbl_col);
         $alias =~ s/`//g if $alias;
         my $col_struct = {
            %$ident_struct,
            ($as    ? (explicit_alias => 1)      : ()),
            ($alias ? (alias          => $alias) : ()),
         };
         push @cols, $col_struct;
      }
      else {
         die "Column ident match failed";  # shouldn't happen
      }
   }

   return \@cols;
}

# Remove subqueries from query, return modified query and list of subqueries.
# Each subquery is replaced with the special token __SQn__ where n is the
# subquery's ID.  Subqueries are parsed and removed in to out, last to first;
# i.e. the last, inner-most subquery is ID 0 and the first, outermost
# subquery has the greatest ID.  Each subquery ID corresponds to its index in
# the list of returned subquery hashrefs after the modified query.  __SQ2__
# is subqueries[2].  Each hashref is like:
#   * query    Subquery text
#   * context  scalar, list or identifier
#   * nested   (optional) 1 if nested
# This sub does not handle UNION and it expects to that subqueries start
# with "(SELECT ".  See SQLParser.t for examples.
sub remove_subqueries {
   my ( $self, $query ) = @_;

   # Find starting pos of all subqueries.
   my @start_pos;
   while ( $query =~ m/(\(SELECT )/gi ) {
      my $pos = (pos $query) - (length $1);
      push @start_pos, $pos;
   }

   # Starting with the inner-most, last subquery, find ending pos of
   # all subqueries.  This is done by counting open and close parentheses
   # until all are closed.  The last closing ) should close the ( that
   # opened the subquery.  No sane regex can help us here for cases like:
   # (select max(id) from t where col in(1,2,3) and foo='(bar)').
   @start_pos = reverse @start_pos;
   my @end_pos;
   for my $i ( 0..$#start_pos ) {
      my $closed = 0;
      pos $query = $start_pos[$i];
      while ( $query =~ m/([\(\)])/cg ) {
         my $c = $1;
         $closed += ($c eq '(' ? 1 : -1);
         last unless $closed;
      }
      push @end_pos, pos $query;
   }

   # Replace each subquery with a __SQn__ token.
   my @subqueries;
   my $len_adj = 0;
   my $n    = 0;
   for my $i ( 0..$#start_pos ) {
      PTDEBUG && _d('Query:', $query);
      my $offset = $start_pos[$i];
      my $len    = $end_pos[$i] - $start_pos[$i] - $len_adj;
      PTDEBUG && _d("Subquery $n start", $start_pos[$i],
            'orig end', $end_pos[$i], 'adj', $len_adj, 'adj end',
            $offset + $len, 'len', $len);

      my $struct   = {};
      my $token    = '__SQ' . $n . '__';
      my $subquery = substr($query, $offset, $len, $token);
      PTDEBUG && _d("Subquery $n:", $subquery);

      # Adjust len for next outer subquery.  This is required because the
      # subqueries' start/end pos are found relative to one another, so
      # when a subquery is replaced with its shorter __SQn__ token the end
      # pos for the other subqueries decreases.  The token is shorter than
      # any valid subquery so the end pos should only decrease.
      my $outer_start = $start_pos[$i + 1];
      my $outer_end   = $end_pos[$i + 1];
      if (    $outer_start && ($outer_start < $start_pos[$i])
           && $outer_end   && ($outer_end   > $end_pos[$i]) ) {
         PTDEBUG && _d("Subquery $n nested in next subquery");
         $len_adj += $len - length $token;
         $struct->{nested} = $i + 1;
      }
      else {
         PTDEBUG && _d("Subquery $n not nested");
         $len_adj = 0;
         if ( $subqueries[-1] && $subqueries[-1]->{nested} ) {
            PTDEBUG && _d("Outermost subquery");
         }
      }

      # Get subquery context: scalar, list or identifier.
      if ( $query =~ m/(?:=|>|<|>=|<=|<>|!=|<=>)\s*$token/ ) {
         $struct->{context} = 'scalar';
      }
      elsif ( $query =~ m/\b(?:IN|ANY|SOME|ALL|EXISTS)\s*$token/i ) {
         # Add ( ) around __SQn__ for things like "IN(__SQn__)"
         # unless they're already there.
         if ( $query !~ m/\($token\)/ ) {
            $query =~ s/$token/\($token\)/;
            $len_adj -= 2 if $struct->{nested};
         }
         $struct->{context} = 'list';
      }
      else {
         # If the subquery is not preceded by an operator (=, >, etc.)
         # or IN(), EXISTS(), etc. then it should be an identifier,
         # either a derived table or column.
         $struct->{context} = 'identifier';
      }
      PTDEBUG && _d("Subquery $n context:", $struct->{context});

      # Remove ( ) around subquery so it can be parsed by a parse_TYPE sub.
      $subquery =~ s/^\s*\(//;
      $subquery =~ s/\s*\)\s*$//;

      # Save subquery to struct after modifications above.
      $struct->{query} = $subquery;
      push @subqueries, $struct;
      $n++;
   }

   return $query, @subqueries;
}

sub remove_using_columns {
   my ($self, $from) = @_;
   return unless $from;
   PTDEBUG && _d('Removing cols from USING clauses');
   my $using = qr/
      \bUSING
      \s*
      \(
         ([^\)]+)
      \)
   /xi;
   my @cols;
   $from =~ s/$using/push @cols, $1; "USING ($#cols)"/eg;
   PTDEBUG && _d('FROM:', $from, Dumper(\@cols));
   return $from, \@cols;
}

sub replace_function {
   my ($func, $funcs) = @_;
   my ($func_name) = $func =~ m/^(\w+)/;
   if ( !$ignore_function{uc $func_name} ) {
      my $n = scalar @$funcs;
      push @$funcs, $func;
      return "__FUNC${n}__";
   }
   return $func;
}

sub remove_functions {
   my ($self, $clause) = @_;
   return unless $clause;
   PTDEBUG && _d('Removing functions from clause:', $clause);
   my @funcs;
   $clause =~ s/$function_ident/replace_function($1, \@funcs)/eg;
   PTDEBUG && _d('Function-stripped clause:', $clause, Dumper(\@funcs));
   return $clause, \@funcs;
}

# Sub: parse_identifiers
#   Parse an arrayref of identifiers into their parts.  Identifiers can be
#   column names (optionally qualified), expressions, or constants.
#   GROUP BY and ORDER BY specify a list of identifiers.
#
# Parameters:
#   $idents - Arrayref of indentifiers
#
# Returns:
#   Arrayref of hashes with each identifier's parts, depending on what kind
#   of identifier it is.
sub parse_identifiers {
   my ( $self, $idents ) = @_;
   return unless $idents;
   PTDEBUG && _d("Parsing identifiers");

   my @ident_parts;
   foreach my $ident ( @$idents ) {
      PTDEBUG && _d("Identifier:", $ident);
      my $parts = {};

      if ( $ident =~ s/\s+(ASC|DESC)\s*$//i ) {
         $parts->{sort} = uc $1;  # XXX
      }

      if ( $ident =~ m/^\d+$/ ) {      # Position like 5
         PTDEBUG && _d("Positional ident");
         $parts->{position} = $ident;
      }
      elsif ( $ident =~ m/^\w+\(/ ) {  # Function like MIN(col)
         PTDEBUG && _d("Expression ident");
         my ($func, $expr) = $ident =~ m/^(\w+)\(([^\)]*)\)/;
         $parts->{function}   = uc $func;
         $parts->{expression} = $expr if $expr;
      }
      else {                           # Ref like (table.)column
         PTDEBUG && _d("Table/column ident");
         my ($tbl, $col)  = $self->split_unquote($ident);
         $parts->{table}  = $tbl if $tbl;
         $parts->{column} = $col;
      }
      push @ident_parts, $parts;
   }

   return \@ident_parts;
}

sub parse_identifier {
   my ( $self, $type, $ident ) = @_;
   return unless $type && $ident;
   PTDEBUG && _d("Parsing", $type, "identifier:", $ident);

   my ($func, $expr);
   if ( $ident =~ m/^\w+\(/ ) {  # Function like MIN(col)
      ($func, $expr) = $ident =~ m/^(\w+)\(([^\)]*)\)/;
      PTDEBUG && _d('Function', $func, 'arg', $expr);
      return { col => $ident } unless $expr;  # NOW()
      $ident = $expr;  # col from MAX(col)
   }

   my %ident_struct;
   my @ident_parts = map { s/`//g; $_; } split /[.]/, $ident;
   if ( @ident_parts == 3 ) {
      @ident_struct{qw(db tbl col)} = @ident_parts;
   }
   elsif ( @ident_parts == 2 ) {
      my @parts_for_type = $type eq 'column' ? qw(tbl col)
                         : $type eq 'table'  ? qw(db  tbl)
                         : die "Invalid identifier type: $type";
      @ident_struct{@parts_for_type} = @ident_parts;
   }
   elsif ( @ident_parts == 1 ) {
      my $part = $type eq 'column' ? 'col' : 'tbl';
      @ident_struct{($part)} = @ident_parts;
   }
   else {
      die "Invalid number of parts in $type reference: $ident";
   }

   if ( $self->{Schema} ) {
      if ( $type eq 'column' && (!$ident_struct{tbl} || !$ident_struct{db}) ) {
         my $qcol = $self->{Schema}->find_column(%ident_struct);
         if ( $qcol && @$qcol == 1 ) {
            @ident_struct{qw(db tbl)} = @{$qcol->[0]}{qw(db tbl)};
         }
      }
      elsif ( !$ident_struct{db} ) {
         my $qtbl = $self->{Schema}->find_table(%ident_struct);
         if ( $qtbl && @$qtbl == 1 ) {
            $ident_struct{db} = $qtbl->[0];
         }
      }
   }

   if ( $func ) {
      $ident_struct{func} = uc $func;
   }

   PTDEBUG && _d($type, "identifier struct:", Dumper(\%ident_struct));
   return \%ident_struct;
}

# Sub: split_unquote
#   Split and unquote a table name.  The table name can be database-qualified
#   or not, like `db`.`table`.  The table name can be backtick-quoted or not.
#
# Parameters:
#   $db_tbl     - Table name
#   $default_db - Default database name to return if $db_tbl is not
#                 database-qualified
#
# Returns:
#   Array: unquoted database (possibly undef), unquoted table
sub split_unquote {
   my ( $self, $db_tbl, $default_db ) = @_;
   $db_tbl =~ s/`//g;
   my ( $db, $tbl ) = split(/[.]/, $db_tbl);
   if ( !$tbl ) {
      $tbl = $db;
      $db  = $default_db;
   }
   return ($db, $tbl);
}

# Sub: is_identifier
#   Determine if something is a schema object identifier.
#   E.g.: `tbl` is an identifier, but "tbl" is a string and 1 is a number.
#   See <http://dev.mysql.com/doc/refman/5.1/en/identifiers.html>
#
# Parameters:
#   $thing - Name of something, including any quoting as it appears in a query.
#
# Returns:
#   True of $thing is an identifier, else false.
sub is_identifier {
   my ( $self, $thing ) = @_;

   # Nothing is not an ident.
   return 0 unless $thing;

   # Tables, columns, FUNCTIONS(), etc. cannot be 'quoted' or "quoted"
   # because that would make them strings, not idents.
   return 0 if $thing =~ m/\s*['"]/;

   # Numbers, ints or floats, are not identifiers.
   return 0 if $thing =~ m/^\s*\d+(?:\.\d+)?\s*$/;

   # Keywords are not identifiers.
   return 0 if $thing =~ m/^\s*(?>
       NULL
      |DUAL
   )\s*$/xi;

   # The column ident really matches everything: db, db.tbl, db.tbl.col,
   # function(), @@var, etc.
   return 1 if $thing =~ m/^\s*$column_ident\s*$/;

   # If the thing isn't quoted and doesn't match our ident pattern, then
   # it's probably not an ident.
   return 0;
}

sub set_Schema {
   my ( $self, $sq ) = @_;
   $self->{Schema} = $sq;
   return;
}

sub _d {
   my ($package, undef, $line) = caller 0;
   @_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
        map { defined $_ ? $_ : 'undef' }
        @_;
   print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}

1;
}
# ###########################################################################
# End SQLParser package
# ###########################################################################