diff --git a/lib/QueryRewriter.pm b/lib/QueryRewriter.pm index 5e4fa979..b77d81ac 100644 --- a/lib/QueryRewriter.pm +++ b/lib/QueryRewriter.pm @@ -173,9 +173,17 @@ sub fingerprint { $query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE && return $query; - #$query =~ s/\\["']//g; # quoted strings - $query =~ s/".*?"/?/sg; # quoted strings - $query =~ s/'.*?'/?/sg; # quoted strings + # ----------------------------------------------------------- + # Remove quoted strings + # ----------------------------------------------------------- + $query =~ s/([^\\])(\\')/$1/sg; + $query =~ s/([^\\])(\\")/$1/sg; + $query =~ s/\\\\//sg; + $query =~ s/\\'//sg; + $query =~ s/\\"//sg; + $query =~ s/(?:[^\\])".*?[^\\]?"/ ?/sg; + $query =~ s/([^\\])('.*?[^\\]?')/$1?/sg; + # ----------------------------------------------------------- $query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values @@ -218,8 +226,8 @@ sub fingerprint { $query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT # The following are disabled because of speed issues. Should we try to # normalize whitespace between and around operators? My gut feeling is no. - # $query =~ s/ , | ,|, /,/g; # Normalize commas - # $query =~ s/ = | =|= /=/g; # Normalize equals + # $query =~ s/ , | ,|, /,/g; # Normalize commas + # $query =~ s/ = | =|= /=/g; # Normalize equals # $query =~ s# [,=+*/-] ?|[,=+*/-] #+#g; # Normalize operators # Remove ASC keywords from ORDER BY clause so these queries fingerprint @@ -239,6 +247,26 @@ sub fingerprint { return $query; } +sub remove_quoted_text { + my ($string) = @_; + my $new_string; + + my $in_quote; + my $prev; + + for (my $i=0; $i < length($string); $i++) { + my $c = substr($string, $i, 1); + + if ($c eq "'" or $c eq '"' and not $in_quote and $c ne '\\') { + $in_quote = $c; + } + + $prev = $c; + + } + return $string; +} + # Gets the verbs from an SQL query, such as SELECT, UPDATE, etc. sub distill_verbs { my ( $self, $query ) = @_;