Add pt-fingerprint.

This commit is contained in:
Daniel Nichter
2012-03-30 16:07:37 -06:00
8 changed files with 2333 additions and 5 deletions

2143
bin/pt-fingerprint Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -175,10 +175,30 @@ sub fingerprint {
$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
# This regex is extremely broad in its definition of what looks like a
# number. That is for speed.
$query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;# Anything vaguely resembling numbers
$query =~ s/[xb.+-]\?/?/g; # Clean up leftovers
# MD5 checksums which are always 32 hex chars
if ( $self->{match_md5_checksums} ) {
$query =~ s/([._-])[a-f0-9]{32}/$1?/g;
}
# Things resembling numbers/hex.
if ( !$self->{match_embedded_numbers} ) {
# For speed, this regex is extremely broad in its definition
# of what looks like a number.
$query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;
}
else {
$query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g;
}
# Clean up leftovers
if ( $self->{match_md5_checksums} ) {
$query =~ s/[xb+-]\?/?/g;
}
else {
$query =~ s/[xb.+-]\?/?/g;
}
$query =~ s/\A\s+//; # Chop off leading whitespace
chomp $query; # Kill trailing whitespace
$query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace

View File

@@ -10,7 +10,7 @@ BEGIN {
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More tests => 266;
use Test::More tests => 271;
use QueryRewriter;
use QueryParser;
@@ -349,6 +349,64 @@ is(
"Fingerprint LOAD DATA INFILE"
);
# fingerprint MD5 checksums, 32 char hex strings. This is a
# special feature used by pt-fingerprint.
$qr = new QueryRewriter(
QueryParser => $qp,
match_md5_checksums => 1,
);
is(
$qr->fingerprint(
"SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1"
),
"select * from db.?_temp where id=?",
"Fingerprint db.MD5_tbl"
);
is(
$qr->fingerprint(
"SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1"
),
"select * from db.temp_? where id=?",
"Fingerprint db.tbl_MD5"
);
$qr = new QueryRewriter(
QueryParser => $qp,
match_md5_checksums => 1,
match_embedded_numbers => 1,
);
is(
$qr->fingerprint(
"SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1"
),
"select * from db.?_temp where id=?",
"Fingerprint db.MD5_tbl (with match_embedded_numbers)"
);
is(
$qr->fingerprint(
"SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1"
),
"select * from db.temp_? where id=?",
"Fingerprint db.tbl_MD5 (with match_embedded_numbers)"
);
$qr = new QueryRewriter(
QueryParser => $qp,
match_embedded_numbers => 1,
);
is(
$qr->fingerprint(
"SELECT * FROM prices.rt_5min where id=1"
),
"select * from prices.rt_5min where id=?",
"Fingerprint db.tbl<number>name (preserve number)"
);
# #############################################################################
# convert_to_select()
# #############################################################################

101
t/pt-fingerprint/basics.t Normal file
View File

@@ -0,0 +1,101 @@
#!/usr/bin/env perl
BEGIN {
die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n"
unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH};
unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib";
};
use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More tests => 7;
use PerconaTest;
require "$trunk/bin/pt-fingerprint";
my @args = qw();
my $output;
my $sample = "$trunk/t/pt-fingerprint/samples";
my $pqd = "$trunk/bin/pt-query-digest";
$output = `$trunk/bin/pt-fingerprint --help`;
like(
$output,
qr/--help/,
"It runs"
);
sub test_query_file {
my ($file) = @_;
if ( ! -f "$sample/$file.fingerprint" ) {
`$pqd --fingerprint $sample/$file | awk '/Fingerprint/ { getline; print; exit; }' | sed -e 's/^#[ ]*//' > $sample/$file.fingerprint`;
diag("Created $sample/$file.fingerprint");
}
chomp(my $expect = `cat $sample/$file.fingerprint`);
my $got = output(
sub { pt_fingerprint::main("$sample/$file") }
);
chomp($got);
is(
$got,
$expect,
"$file fingerprint"
);
};
opendir my $dir, $sample or die "Cannot open $sample: $OS_ERROR\n";
while (defined(my $file = readdir($dir))) {
next unless $file =~ m/^query\d+$/;
test_query_file($file);
}
closedir $dir;
sub test_query {
my (%args) = @_;
my $query = $args{query};
my $expect = $args{expect};
my @ops = $args{ops} ? @{$args{ops}} : ();
$output = output(
sub { pt_fingerprint::main('--query', $query, @ops) }
);
chomp($output);
is(
$output,
$expect,
$args{name} ? $args{name} : "Fingerprint " . substr($query, 0, 70)
);
}
test_query(
query => 'select * from tbl where id=1',
expect => 'select * from tbl where id=?',
);
test_query(
name => "Fingerprint MD5_word",
query => "SELECT c FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1",
expect => "select c from db.?_temp where id=?",
ops => [qw(--match-md5-checksums)],
);
test_query(
name => "Fingerprint word_MD5",
query => "SELECT c FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1",
expect => "select c from db.temp_? where id=?",
ops => [qw(--match-md5-checksums)],
);
test_query(
name => "Fingerprint word<number>",
query => "SELECT c FROM db.catch22 WHERE id is null",
expect => "select c from db.catch22 where id is ?",
ops => [qw(--match-embedded-numbers)],
);
# #############################################################################
# Done.
# #############################################################################
exit;

View File

@@ -0,0 +1,2 @@
# Query_time: 1
select * from db.tbl where id=1 or foo='bar';

View File

@@ -0,0 +1 @@
select * from db.tbl where id=? or foo=?

View File

@@ -0,0 +1,2 @@
# Query_time: 1
select col from db.tbl1 where id in (1, 2, 3);

View File

@@ -0,0 +1 @@
select col from db.tbl? where id in(?+)