diff --git a/bin/pt-index-usage b/bin/pt-index-usage index ad4b3810..aeaa7aa3 100755 --- a/bin/pt-index-usage +++ b/bin/pt-index-usage @@ -2374,8 +2374,8 @@ $bal = qr/ my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */ -my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */ -my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW +my $vlc_re = qr#/\*.*?[0-9]+.*?\*/#sm; # For SHOW + /*!version */ +my $vlc_rf = qr#^(?:SHOW).*?/\*![0-9]+(.*?)\*/#sm; # Variation for SHOW sub new { @@ -2390,7 +2390,8 @@ sub strip_comments { $query =~ s/$mlc_re//go; $query =~ s/$olc_re//go; if ( $query =~ m/$vlc_rf/i ) { # contains show + version - $query =~ s/$vlc_re//go; + my $qualifier = $1 || ''; + $query =~ s/$vlc_re/$qualifier/go; } return $query; } @@ -2465,6 +2466,8 @@ sub fingerprint { $query =~ s/".*?"/?/sg; # quoted strings $query =~ s/'.*?'/?/sg; # quoted strings + $query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values + if ( $self->{match_md5_checksums} ) { $query =~ s/([._-])[a-f0-9]{32}/$1?/g; } @@ -2513,6 +2516,13 @@ sub distill_verbs { $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK"; $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1"; + if ( $query =~ m/\A\s*LOAD/i ) { + my ($tbl) = $query =~ m/INTO TABLE\s+(\S+)/i; + $tbl ||= ''; + $tbl =~ s/`//g; + return "LOAD DATA $tbl"; + } + if ( $query =~ m/\Aadministrator command:/ ) { $query =~ s/administrator command:/ADMIN/; $query = uc $query; @@ -2525,7 +2535,7 @@ sub distill_verbs { PTDEBUG && _d($query); $query = uc $query; - $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g; + $query =~ s/\s+(?:SESSION|FULL|STORAGE|ENGINE)\b/ /g; $query =~ s/\s+COUNT[^)]+\)//g; $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms; @@ -2540,6 +2550,7 @@ sub distill_verbs { eval $QueryParser::tbl_ident; my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i; if ( $dds) { + $query =~ s/\s+IF(?:\s+NOT)?\s+EXISTS/ /i; my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i; $obj = uc $obj if $obj; PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj); @@ -2606,6 +2617,9 @@ sub distill { map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for; $query = $verbs; } + elsif ( $verbs && $verbs =~ m/^LOAD DATA/ ) { + return $verbs; + } else { my @tables = $self->__distill_tables($query, $table, %args); $query = join(q{ }, $verbs, @tables); diff --git a/bin/pt-kill b/bin/pt-kill index 7dd05698..ca744b32 100755 --- a/bin/pt-kill +++ b/bin/pt-kill @@ -4652,8 +4652,8 @@ $bal = qr/ my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */ -my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */ -my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW +my $vlc_re = qr#/\*.*?[0-9]+.*?\*/#sm; # For SHOW + /*!version */ +my $vlc_rf = qr#^(?:SHOW).*?/\*![0-9]+(.*?)\*/#sm; # Variation for SHOW sub new { @@ -4668,7 +4668,8 @@ sub strip_comments { $query =~ s/$mlc_re//go; $query =~ s/$olc_re//go; if ( $query =~ m/$vlc_rf/i ) { # contains show + version - $query =~ s/$vlc_re//go; + my $qualifier = $1 || ''; + $query =~ s/$vlc_re/$qualifier/go; } return $query; } @@ -4743,6 +4744,8 @@ sub fingerprint { $query =~ s/".*?"/?/sg; # quoted strings $query =~ s/'.*?'/?/sg; # quoted strings + $query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values + if ( $self->{match_md5_checksums} ) { $query =~ s/([._-])[a-f0-9]{32}/$1?/g; } @@ -4791,6 +4794,13 @@ sub distill_verbs { $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK"; $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1"; + if ( $query =~ m/\A\s*LOAD/i ) { + my ($tbl) = $query =~ m/INTO TABLE\s+(\S+)/i; + $tbl ||= ''; + $tbl =~ s/`//g; + return "LOAD DATA $tbl"; + } + if ( $query =~ m/\Aadministrator command:/ ) { $query =~ s/administrator command:/ADMIN/; $query = uc $query; @@ -4803,7 +4813,7 @@ sub distill_verbs { PTDEBUG && _d($query); $query = uc $query; - $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g; + $query =~ s/\s+(?:SESSION|FULL|STORAGE|ENGINE)\b/ /g; $query =~ s/\s+COUNT[^)]+\)//g; $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms; @@ -4818,6 +4828,7 @@ sub distill_verbs { eval $QueryParser::tbl_ident; my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i; if ( $dds) { + $query =~ s/\s+IF(?:\s+NOT)?\s+EXISTS/ /i; my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i; $obj = uc $obj if $obj; PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj); @@ -4884,6 +4895,9 @@ sub distill { map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for; $query = $verbs; } + elsif ( $verbs && $verbs =~ m/^LOAD DATA/ ) { + return $verbs; + } else { my @tables = $self->__distill_tables($query, $table, %args); $query = join(q{ }, $verbs, @tables); diff --git a/bin/pt-query-digest b/bin/pt-query-digest index 0908ab93..0ba4f32f 100755 --- a/bin/pt-query-digest +++ b/bin/pt-query-digest @@ -2891,6 +2891,8 @@ sub fingerprint { $query =~ s/".*?"/?/sg; # quoted strings $query =~ s/'.*?'/?/sg; # quoted strings + $query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values + if ( $self->{match_md5_checksums} ) { $query =~ s/([._-])[a-f0-9]{32}/$1?/g; } @@ -5797,7 +5799,7 @@ sub top_events { <=> $classes->{$b}->{$args{attrib}}->{$args{orderby}} } grep { defined $classes->{$_}->{$args{attrib}}->{$args{orderby}} - } keys %$classes; + } keys %$classes; # this should first be sorted for test consistency, but many tests already in place would fail my @chosen; # top events my @other; # other events (< top) my ($total, $count) = (0, 0); diff --git a/bin/pt-table-usage b/bin/pt-table-usage index 119128fe..b23781cd 100755 --- a/bin/pt-table-usage +++ b/bin/pt-table-usage @@ -2112,8 +2112,8 @@ $bal = qr/ my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */ -my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */ -my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW +my $vlc_re = qr#/\*.*?[0-9]+.*?\*/#sm; # For SHOW + /*!version */ +my $vlc_rf = qr#^(?:SHOW).*?/\*![0-9]+(.*?)\*/#sm; # Variation for SHOW sub new { @@ -2128,7 +2128,8 @@ sub strip_comments { $query =~ s/$mlc_re//go; $query =~ s/$olc_re//go; if ( $query =~ m/$vlc_rf/i ) { # contains show + version - $query =~ s/$vlc_re//go; + my $qualifier = $1 || ''; + $query =~ s/$vlc_re/$qualifier/go; } return $query; } @@ -2203,6 +2204,8 @@ sub fingerprint { $query =~ s/".*?"/?/sg; # quoted strings $query =~ s/'.*?'/?/sg; # quoted strings + $query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values + if ( $self->{match_md5_checksums} ) { $query =~ s/([._-])[a-f0-9]{32}/$1?/g; } @@ -2251,6 +2254,13 @@ sub distill_verbs { $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK"; $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1"; + if ( $query =~ m/\A\s*LOAD/i ) { + my ($tbl) = $query =~ m/INTO TABLE\s+(\S+)/i; + $tbl ||= ''; + $tbl =~ s/`//g; + return "LOAD DATA $tbl"; + } + if ( $query =~ m/\Aadministrator command:/ ) { $query =~ s/administrator command:/ADMIN/; $query = uc $query; @@ -2263,7 +2273,7 @@ sub distill_verbs { PTDEBUG && _d($query); $query = uc $query; - $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g; + $query =~ s/\s+(?:SESSION|FULL|STORAGE|ENGINE)\b/ /g; $query =~ s/\s+COUNT[^)]+\)//g; $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms; @@ -2278,6 +2288,7 @@ sub distill_verbs { eval $QueryParser::tbl_ident; my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i; if ( $dds) { + $query =~ s/\s+IF(?:\s+NOT)?\s+EXISTS/ /i; my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i; $obj = uc $obj if $obj; PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj); @@ -2344,6 +2355,9 @@ sub distill { map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for; $query = $verbs; } + elsif ( $verbs && $verbs =~ m/^LOAD DATA/ ) { + return $verbs; + } else { my @tables = $self->__distill_tables($query, $table, %args); $query = join(q{ }, $verbs, @tables); diff --git a/bin/pt-upgrade b/bin/pt-upgrade index 71d09444..323a6ae6 100755 --- a/bin/pt-upgrade +++ b/bin/pt-upgrade @@ -4559,8 +4559,8 @@ $bal = qr/ my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */ -my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */ -my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW +my $vlc_re = qr#/\*.*?[0-9]+.*?\*/#sm; # For SHOW + /*!version */ +my $vlc_rf = qr#^(?:SHOW).*?/\*![0-9]+(.*?)\*/#sm; # Variation for SHOW sub new { @@ -4575,7 +4575,8 @@ sub strip_comments { $query =~ s/$mlc_re//go; $query =~ s/$olc_re//go; if ( $query =~ m/$vlc_rf/i ) { # contains show + version - $query =~ s/$vlc_re//go; + my $qualifier = $1 || ''; + $query =~ s/$vlc_re/$qualifier/go; } return $query; } @@ -4650,6 +4651,8 @@ sub fingerprint { $query =~ s/".*?"/?/sg; # quoted strings $query =~ s/'.*?'/?/sg; # quoted strings + $query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values + if ( $self->{match_md5_checksums} ) { $query =~ s/([._-])[a-f0-9]{32}/$1?/g; } @@ -4698,6 +4701,13 @@ sub distill_verbs { $query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK"; $query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1"; + if ( $query =~ m/\A\s*LOAD/i ) { + my ($tbl) = $query =~ m/INTO TABLE\s+(\S+)/i; + $tbl ||= ''; + $tbl =~ s/`//g; + return "LOAD DATA $tbl"; + } + if ( $query =~ m/\Aadministrator command:/ ) { $query =~ s/administrator command:/ADMIN/; $query = uc $query; @@ -4710,7 +4720,7 @@ sub distill_verbs { PTDEBUG && _d($query); $query = uc $query; - $query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g; + $query =~ s/\s+(?:SESSION|FULL|STORAGE|ENGINE)\b/ /g; $query =~ s/\s+COUNT[^)]+\)//g; $query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms; @@ -4725,6 +4735,7 @@ sub distill_verbs { eval $QueryParser::tbl_ident; my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i; if ( $dds) { + $query =~ s/\s+IF(?:\s+NOT)?\s+EXISTS/ /i; my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i; $obj = uc $obj if $obj; PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj); @@ -4791,6 +4802,9 @@ sub distill { map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for; $query = $verbs; } + elsif ( $verbs && $verbs =~ m/^LOAD DATA/ ) { + return $verbs; + } else { my @tables = $self->__distill_tables($query, $table, %args); $query = join(q{ }, $verbs, @tables); diff --git a/lib/QueryRewriter.pm b/lib/QueryRewriter.pm index 05d1fcaa..a534a86d 100644 --- a/lib/QueryRewriter.pm +++ b/lib/QueryRewriter.pm @@ -177,6 +177,8 @@ sub fingerprint { $query =~ s/".*?"/?/sg; # quoted strings $query =~ s/'.*?'/?/sg; # quoted strings + $query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values + # MD5 checksums which are always 32 hex chars if ( $self->{match_md5_checksums} ) { $query =~ s/([._-])[a-f0-9]{32}/$1?/g; diff --git a/t/lib/QueryRewriter.t b/t/lib/QueryRewriter.t index 2d8f7f89..1fa43762 100644 --- a/t/lib/QueryRewriter.t +++ b/t/lib/QueryRewriter.t @@ -416,6 +416,16 @@ is( "Fingerprint /* -- comment */ SELECT (bug 1174956)" ); + +# issue 965553 + +is( + $qr->fingerprint('SELECT * FROM tbl WHERE id=1 AND flag=true AND trueflag=FALSE'), + 'select * from tbl where id=? and flag=? and trueflag=?', + 'boolean values abstracted correctly', +); + + # ############################################################################# # convert_to_select() # ############################################################################# @@ -1454,6 +1464,8 @@ is( "distill CREATE TABLE IF NOT EXISTS foo", ); + + # ############################################################################# # Done. # #############################################################################