Fixed quoted strings regexes

This commit is contained in:
Carlos Salguero
2021-08-27 07:12:05 -03:00
parent af0f6db9f2
commit 3f20158067

View File

@@ -173,9 +173,17 @@ sub fingerprint {
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
&& return $query;
#$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
# -----------------------------------------------------------
# Remove quoted strings
# -----------------------------------------------------------
$query =~ s/([^\\])(\\')/$1/sg;
$query =~ s/([^\\])(\\")/$1/sg;
$query =~ s/\\\\//sg;
$query =~ s/\\'//sg;
$query =~ s/\\"//sg;
$query =~ s/(?:[^\\])".*?[^\\]?"/ ?/sg;
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
# -----------------------------------------------------------
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values
@@ -218,8 +226,8 @@ sub fingerprint {
$query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT
# The following are disabled because of speed issues. Should we try to
# normalize whitespace between and around operators? My gut feeling is no.
# $query =~ s/ , | ,|, /,/g; # Normalize commas
# $query =~ s/ = | =|= /=/g; # Normalize equals
# $query =~ s/ , | ,|, /,/g; # Normalize commas
# $query =~ s/ = | =|= /=/g; # Normalize equals
# $query =~ s# [,=+*/-] ?|[,=+*/-] #+#g; # Normalize operators
# Remove ASC keywords from ORDER BY clause so these queries fingerprint
@@ -239,6 +247,26 @@ sub fingerprint {
return $query;
}
sub remove_quoted_text {
my ($string) = @_;
my $new_string;
my $in_quote;
my $prev;
for (my $i=0; $i < length($string); $i++) {
my $c = substr($string, $i, 1);
if ($c eq "'" or $c eq '"' and not $in_quote and $c ne '\\') {
$in_quote = $c;
}
$prev = $c;
}
return $string;
}
# Gets the verbs from an SQL query, such as SELECT, UPDATE, etc.
sub distill_verbs {
my ( $self, $query ) = @_;