Compare commits

...

5 Commits

Author SHA1 Message Date
Carlos Salguero
c874287220 Fixed merge conflicts with 3.x branch 2021-08-27 14:00:33 -03:00
Carlos Salguero
f8683b76fa Merge branch '3.x' into PT-1900 2021-08-27 13:57:10 -03:00
Carlos Salguero
b309b23222 PT-1900 Fixed query rewriter to properly handle quoted text 2021-08-27 09:14:05 -03:00
Carlos Salguero
3f20158067 Fixed quoted strings regexes 2021-08-27 07:12:05 -03:00
Carlos Salguero
af0f6db9f2 PT-1900 WIP 2020-12-29 10:17:34 -03:00
10 changed files with 90 additions and 31 deletions

View File

@@ -1565,8 +1565,8 @@ $bal = qr/
my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments
my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */
my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */
my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW
my $vlc_re = qr#/\*.*?[0-9]+.*?\*/#sm; # For SHOW + /*!version */
my $vlc_rf = qr#^(?:SHOW).*?/\*![0-9]+(.*?)\*/#sm; # Variation for SHOW
sub new {
@@ -1581,7 +1581,8 @@ sub strip_comments {
$query =~ s/$mlc_re//go;
$query =~ s/$olc_re//go;
if ( $query =~ m/$vlc_rf/i ) { # contains show + version
$query =~ s/$vlc_re//go;
my $qualifier = $1 || '';
$query =~ s/$vlc_re/$qualifier/go;
}
return $query;
}
@@ -1652,9 +1653,15 @@ sub fingerprint {
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
&& return $query;
$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
$query =~ s/([^\\])(\\')/$1/sg;
$query =~ s/([^\\])(\\")/$1/sg;
$query =~ s/\\\\//sg;
$query =~ s/\\'//sg;
$query =~ s/\\"//sg;
$query =~ s/([^\\])(".*?[^\\]?")/$1?/sg;
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values
if ( $self->{match_md5_checksums} ) {
$query =~ s/([._-])[a-f0-9]{32}/$1?/g;
@@ -1704,6 +1711,13 @@ sub distill_verbs {
$query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK";
$query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1";
if ( $query =~ m/\A\s*LOAD/i ) {
my ($tbl) = $query =~ m/INTO TABLE\s+(\S+)/i;
$tbl ||= '';
$tbl =~ s/`//g;
return "LOAD DATA $tbl";
}
if ( $query =~ m/\Aadministrator command:/ ) {
$query =~ s/administrator command:/ADMIN/;
$query = uc $query;
@@ -1716,7 +1730,7 @@ sub distill_verbs {
PTDEBUG && _d($query);
$query = uc $query;
$query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g;
$query =~ s/\s+(?:SESSION|FULL|STORAGE|ENGINE)\b/ /g;
$query =~ s/\s+COUNT[^)]+\)//g;
$query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms;
@@ -1731,6 +1745,7 @@ sub distill_verbs {
eval $QueryParser::tbl_ident;
my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i;
if ( $dds) {
$query =~ s/\s+IF(?:\s+NOT)?\s+EXISTS/ /i;
my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i;
$obj = uc $obj if $obj;
PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj);
@@ -1797,6 +1812,9 @@ sub distill {
map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for;
$query = $verbs;
}
elsif ( $verbs && $verbs =~ m/^LOAD DATA/ ) {
return $verbs;
}
else {
my @tables = $self->__distill_tables($query, $table, %args);
$query = join(q{ }, $verbs, @tables);

View File

@@ -2520,9 +2520,13 @@ sub fingerprint {
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
&& return $query;
$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
$query =~ s/([^\\])(\\')/$1/sg;
$query =~ s/([^\\])(\\")/$1/sg;
$query =~ s/\\\\//sg;
$query =~ s/\\'//sg;
$query =~ s/\\"//sg;
$query =~ s/([^\\])(".*?[^\\]?")/$1?/sg;
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values

View File

@@ -4911,9 +4911,13 @@ sub fingerprint {
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
&& return $query;
$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
$query =~ s/([^\\])(\\')/$1/sg;
$query =~ s/([^\\])(\\")/$1/sg;
$query =~ s/\\\\//sg;
$query =~ s/\\'//sg;
$query =~ s/\\"//sg;
$query =~ s/([^\\])(".*?[^\\]?")/$1?/sg;
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values

View File

@@ -2945,9 +2945,13 @@ sub fingerprint {
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
&& return $query;
$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
$query =~ s/([^\\])(\\')/$1/sg;
$query =~ s/([^\\])(\\")/$1/sg;
$query =~ s/\\\\//sg;
$query =~ s/\\'//sg;
$query =~ s/\\"//sg;
$query =~ s/([^\\])(".*?[^\\]?")/$1?/sg;
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values

View File

@@ -2946,9 +2946,13 @@ sub fingerprint {
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
&& return $query;
$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
$query =~ s/([^\\])(\\')/$1/sg;
$query =~ s/([^\\])(\\")/$1/sg;
$query =~ s/\\\\//sg;
$query =~ s/\\'//sg;
$query =~ s/\\"//sg;
$query =~ s/([^\\])(".*?[^\\]?")/$1?/sg;
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values

View File

@@ -4818,9 +4818,13 @@ sub fingerprint {
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
&& return $query;
$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
$query =~ s/([^\\])(\\')/$1/sg;
$query =~ s/([^\\])(\\")/$1/sg;
$query =~ s/\\\\//sg;
$query =~ s/\\'//sg;
$query =~ s/\\"//sg;
$query =~ s/([^\\])(".*?[^\\]?")/$1?/sg;
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values

View File

@@ -173,9 +173,17 @@ sub fingerprint {
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
&& return $query;
$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
# -----------------------------------------------------------
# Remove quoted strings
# -----------------------------------------------------------
$query =~ s/([^\\])(\\')/$1/sg;
$query =~ s/([^\\])(\\")/$1/sg;
$query =~ s/\\\\//sg;
$query =~ s/\\'//sg;
$query =~ s/\\"//sg;
$query =~ s/([^\\])(".*?[^\\]?")/$1?/sg;
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
# -----------------------------------------------------------
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values
@@ -218,8 +226,8 @@ sub fingerprint {
$query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT
# The following are disabled because of speed issues. Should we try to
# normalize whitespace between and around operators? My gut feeling is no.
# $query =~ s/ , | ,|, /,/g; # Normalize commas
# $query =~ s/ = | =|= /=/g; # Normalize equals
# $query =~ s/ , | ,|, /,/g; # Normalize commas
# $query =~ s/ = | =|= /=/g; # Normalize equals
# $query =~ s# [,=+*/-] ?|[,=+*/-] #+#g; # Normalize operators
# Remove ASC keywords from ORDER BY clause so these queries fingerprint

View File

@@ -162,7 +162,7 @@ is(
# This is a known deficiency, fixes seem to be expensive though.
is(
$qr->fingerprint("select '\\\\' from foo"),
"select '\\ from foo",
"select ? from foo",
"Does not handle all quoted strings",
);
@@ -1478,6 +1478,19 @@ is(
"Fingerprint db.tbl<number>name (preserve number)"
);
is(
$qr->fingerprint(
"SELECT i FROM d.t WHERE i=\"3\""
),
"select i from d.t where i=?",
"Fingerprint db.tbl<number>name (preserve number)"
);
is(
$qr->fingerprint("CALL foo(1, 2, 3)"),
"call foo",
'Fingerprints stored procedure calls specially',
);
# #############################################################################
# Done.
# #############################################################################

View File

@@ -259,7 +259,7 @@ SELECT `SCHEMA_NAME` FROM `INFORMATION_SCHEMA`.`SCHEMATA`, (SELECT DB_first_leve
# EXPLAIN /*!50100 PARTITIONS*/
SELECT USER()\G
# Query 12: 0 QPS, 0x concurrency, ID 0x75A885FE43F31908754B91A2F3BD1E20 at byte 1082
# Query 12: 0 QPS, 0x concurrency, ID 0x15BF4DCE0B364CE831C14D6853A472B7 at byte 1082
# This item is included in the report because it matches --limit.
# Scores: V/M = 0.00
# Time range: all events occurred at 2016-06-07T19:07:02.565999Z
@@ -303,4 +303,4 @@ SELECT 1 FROM (SELECT `GRANTEE`, `IS_GRANTABLE` FROM `INFORMATION_SCHEMA`.`COLUM
# 9 0xDDABDE67AC3044CAED549F59FFFA541B 0.0000 0.0% 1 0.0000 0.00 SELECT phpmyadmin.pma__navigationhiding
# 10 0x35CCC630581DCD5AA46100310F18DEB9 0.0000 0.0% 1 0.0000 0.00 SELECT INFORMATION_SCHEMA.SCHEMATA
# 11 0x7B48FAA9C951DD8A389FF9DA2DF3DF62 0.0000 0.0% 1 0.0000 0.00 SELECT
# 12 0x75A885FE43F31908754B91A2F3BD1E20 0.0000 0.0% 1 0.0000 0.00 SELECT UNION INFORMATION_SCHEMA.COLUMN_PRIVILEGES INFORMATION_SCHEMA.TABLE_PRIVILEGES INFORMATION_SCHEMA.SCHEMA_PRIVILEGES INFORMATION_SCHEMA.USER_PRIVILEGES
# 12 0x15BF4DCE0B364CE831C14D6853A472B7 0.0000 0.0% 1 0.0000 0.00 SELECT UNION INFORMATION_SCHEMA.COLUMN_PRIVILEGES INFORMATION_SCHEMA.TABLE_PRIVILEGES INFORMATION_SCHEMA.SCHEMA_PRIVILEGES INFORMATION_SCHEMA.USER_PRIVILEGES

View File

@@ -1 +1 @@
# Exec time 100 10s 1s 3s 2s 3s 896ms 2s
# Exec time 100 9s 2s 4s 3s 4s 786ms 3s