mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-11 05:29:30 +00:00
Add pt-fingerprint.
This commit is contained in:
2143
bin/pt-fingerprint
Executable file
2143
bin/pt-fingerprint
Executable file
File diff suppressed because it is too large
Load Diff
@@ -175,10 +175,30 @@ sub fingerprint {
|
||||
$query =~ s/\\["']//g; # quoted strings
|
||||
$query =~ s/".*?"/?/sg; # quoted strings
|
||||
$query =~ s/'.*?'/?/sg; # quoted strings
|
||||
# This regex is extremely broad in its definition of what looks like a
|
||||
# number. That is for speed.
|
||||
$query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;# Anything vaguely resembling numbers
|
||||
$query =~ s/[xb.+-]\?/?/g; # Clean up leftovers
|
||||
|
||||
# MD5 checksums which are always 32 hex chars
|
||||
if ( $self->{match_md5_checksums} ) {
|
||||
$query =~ s/([._-])[a-f0-9]{32}/$1?/g;
|
||||
}
|
||||
|
||||
# Things resembling numbers/hex.
|
||||
if ( !$self->{match_embedded_numbers} ) {
|
||||
# For speed, this regex is extremely broad in its definition
|
||||
# of what looks like a number.
|
||||
$query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;
|
||||
}
|
||||
else {
|
||||
$query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g;
|
||||
}
|
||||
|
||||
# Clean up leftovers
|
||||
if ( $self->{match_md5_checksums} ) {
|
||||
$query =~ s/[xb+-]\?/?/g;
|
||||
}
|
||||
else {
|
||||
$query =~ s/[xb.+-]\?/?/g;
|
||||
}
|
||||
|
||||
$query =~ s/\A\s+//; # Chop off leading whitespace
|
||||
chomp $query; # Kill trailing whitespace
|
||||
$query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace
|
||||
|
@@ -10,7 +10,7 @@ BEGIN {
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use Test::More tests => 266;
|
||||
use Test::More tests => 271;
|
||||
|
||||
use QueryRewriter;
|
||||
use QueryParser;
|
||||
@@ -349,6 +349,64 @@ is(
|
||||
"Fingerprint LOAD DATA INFILE"
|
||||
);
|
||||
|
||||
# fingerprint MD5 checksums, 32 char hex strings. This is a
|
||||
# special feature used by pt-fingerprint.
|
||||
$qr = new QueryRewriter(
|
||||
QueryParser => $qp,
|
||||
match_md5_checksums => 1,
|
||||
);
|
||||
|
||||
is(
|
||||
$qr->fingerprint(
|
||||
"SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1"
|
||||
),
|
||||
"select * from db.?_temp where id=?",
|
||||
"Fingerprint db.MD5_tbl"
|
||||
);
|
||||
|
||||
is(
|
||||
$qr->fingerprint(
|
||||
"SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1"
|
||||
),
|
||||
"select * from db.temp_? where id=?",
|
||||
"Fingerprint db.tbl_MD5"
|
||||
);
|
||||
|
||||
$qr = new QueryRewriter(
|
||||
QueryParser => $qp,
|
||||
match_md5_checksums => 1,
|
||||
match_embedded_numbers => 1,
|
||||
);
|
||||
|
||||
is(
|
||||
$qr->fingerprint(
|
||||
"SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1"
|
||||
),
|
||||
"select * from db.?_temp where id=?",
|
||||
"Fingerprint db.MD5_tbl (with match_embedded_numbers)"
|
||||
);
|
||||
|
||||
is(
|
||||
$qr->fingerprint(
|
||||
"SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1"
|
||||
),
|
||||
"select * from db.temp_? where id=?",
|
||||
"Fingerprint db.tbl_MD5 (with match_embedded_numbers)"
|
||||
);
|
||||
|
||||
$qr = new QueryRewriter(
|
||||
QueryParser => $qp,
|
||||
match_embedded_numbers => 1,
|
||||
);
|
||||
|
||||
is(
|
||||
$qr->fingerprint(
|
||||
"SELECT * FROM prices.rt_5min where id=1"
|
||||
),
|
||||
"select * from prices.rt_5min where id=?",
|
||||
"Fingerprint db.tbl<number>name (preserve number)"
|
||||
);
|
||||
|
||||
# #############################################################################
|
||||
# convert_to_select()
|
||||
# #############################################################################
|
||||
|
101
t/pt-fingerprint/basics.t
Normal file
101
t/pt-fingerprint/basics.t
Normal file
@@ -0,0 +1,101 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
BEGIN {
|
||||
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
|
||||
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
|
||||
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
|
||||
};
|
||||
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use Test::More tests => 7;
|
||||
|
||||
use PerconaTest;
|
||||
require "$trunk/bin/pt-fingerprint";
|
||||
|
||||
my @args = qw();
|
||||
my $output;
|
||||
my $sample = "$trunk/t/pt-fingerprint/samples";
|
||||
my $pqd = "$trunk/bin/pt-query-digest";
|
||||
|
||||
$output = `$trunk/bin/pt-fingerprint --help`;
|
||||
like(
|
||||
$output,
|
||||
qr/--help/,
|
||||
"It runs"
|
||||
);
|
||||
|
||||
|
||||
sub test_query_file {
|
||||
my ($file) = @_;
|
||||
if ( ! -f "$sample/$file.fingerprint" ) {
|
||||
`$pqd --fingerprint $sample/$file | awk '/Fingerprint/ { getline; print; exit; }' | sed -e 's/^#[ ]*//' > $sample/$file.fingerprint`;
|
||||
diag("Created $sample/$file.fingerprint");
|
||||
}
|
||||
chomp(my $expect = `cat $sample/$file.fingerprint`);
|
||||
my $got = output(
|
||||
sub { pt_fingerprint::main("$sample/$file") }
|
||||
);
|
||||
chomp($got);
|
||||
is(
|
||||
$got,
|
||||
$expect,
|
||||
"$file fingerprint"
|
||||
);
|
||||
};
|
||||
|
||||
opendir my $dir, $sample or die "Cannot open $sample: $OS_ERROR\n";
|
||||
while (defined(my $file = readdir($dir))) {
|
||||
next unless $file =~ m/^query\d+$/;
|
||||
test_query_file($file);
|
||||
}
|
||||
closedir $dir;
|
||||
|
||||
|
||||
sub test_query {
|
||||
my (%args) = @_;
|
||||
my $query = $args{query};
|
||||
my $expect = $args{expect};
|
||||
my @ops = $args{ops} ? @{$args{ops}} : ();
|
||||
|
||||
$output = output(
|
||||
sub { pt_fingerprint::main('--query', $query, @ops) }
|
||||
);
|
||||
chomp($output);
|
||||
is(
|
||||
$output,
|
||||
$expect,
|
||||
$args{name} ? $args{name} : "Fingerprint " . substr($query, 0, 70)
|
||||
);
|
||||
}
|
||||
|
||||
test_query(
|
||||
query => 'select * from tbl where id=1',
|
||||
expect => 'select * from tbl where id=?',
|
||||
);
|
||||
|
||||
test_query(
|
||||
name => "Fingerprint MD5_word",
|
||||
query => "SELECT c FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1",
|
||||
expect => "select c from db.?_temp where id=?",
|
||||
ops => [qw(--match-md5-checksums)],
|
||||
);
|
||||
|
||||
test_query(
|
||||
name => "Fingerprint word_MD5",
|
||||
query => "SELECT c FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1",
|
||||
expect => "select c from db.temp_? where id=?",
|
||||
ops => [qw(--match-md5-checksums)],
|
||||
);
|
||||
|
||||
test_query(
|
||||
name => "Fingerprint word<number>",
|
||||
query => "SELECT c FROM db.catch22 WHERE id is null",
|
||||
expect => "select c from db.catch22 where id is ?",
|
||||
ops => [qw(--match-embedded-numbers)],
|
||||
);
|
||||
# #############################################################################
|
||||
# Done.
|
||||
# #############################################################################
|
||||
exit;
|
2
t/pt-fingerprint/samples/query001
Normal file
2
t/pt-fingerprint/samples/query001
Normal file
@@ -0,0 +1,2 @@
|
||||
# Query_time: 1
|
||||
select * from db.tbl where id=1 or foo='bar';
|
1
t/pt-fingerprint/samples/query001.fingerprint
Normal file
1
t/pt-fingerprint/samples/query001.fingerprint
Normal file
@@ -0,0 +1 @@
|
||||
select * from db.tbl where id=? or foo=?
|
2
t/pt-fingerprint/samples/query002
Normal file
2
t/pt-fingerprint/samples/query002
Normal file
@@ -0,0 +1,2 @@
|
||||
# Query_time: 1
|
||||
select col from db.tbl1 where id in (1, 2, 3);
|
1
t/pt-fingerprint/samples/query002.fingerprint
Normal file
1
t/pt-fingerprint/samples/query002.fingerprint
Normal file
@@ -0,0 +1 @@
|
||||
select col from db.tbl? where id in(?+)
|
Reference in New Issue
Block a user