From a81d25c489d1f0d032fe7d102d130de7446f140d Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 26 Mar 2012 10:45:46 -0600 Subject: [PATCH] Add fingerprint_md5 and preserve_embedded_numbers flags to QueryRewriter, used in fingerprint() to handle customer's requirements. --- lib/QueryRewriter.pm | 28 +++++++++++++++++--- t/lib/QueryRewriter.t | 60 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 5 deletions(-) diff --git a/lib/QueryRewriter.pm b/lib/QueryRewriter.pm index b89ea1c6..1789e634 100644 --- a/lib/QueryRewriter.pm +++ b/lib/QueryRewriter.pm @@ -175,10 +175,30 @@ sub fingerprint { $query =~ s/\\["']//g; # quoted strings $query =~ s/".*?"/?/sg; # quoted strings $query =~ s/'.*?'/?/sg; # quoted strings - # This regex is extremely broad in its definition of what looks like a - # number. That is for speed. - $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g;# Anything vaguely resembling numbers - $query =~ s/[xb.+-]\?/?/g; # Clean up leftovers + + # MD5 checksums which are always 32 hex chars + if ( $self->{fingerprint_md5} ) { + $query =~ s/([._-])[a-f0-9]{32}/$1?/g; + } + + # Things resembling numbers/hex. + if ( !$self->{preserve_embedded_numbers} ) { + # For speed, this regex is extremely broad in its definition + # of what looks like a number. + $query =~ s/[0-9+-][0-9a-f.xb+-]*/?/g; + } + else { + $query =~ s/\b[0-9+-][0-9a-f.xb+-]*/?/g; + } + + # Clean up leftovers + if ( $self->{fingerprint_md5} ) { + $query =~ s/[xb+-]\?/?/g; + } + else { + $query =~ s/[xb.+-]\?/?/g; + } + $query =~ s/\A\s+//; # Chop off leading whitespace chomp $query; # Kill trailing whitespace $query =~ tr[ \n\t\r\f][ ]s; # Collapse whitespace diff --git a/t/lib/QueryRewriter.t b/t/lib/QueryRewriter.t index c60f6dce..d2fa4e97 100644 --- a/t/lib/QueryRewriter.t +++ b/t/lib/QueryRewriter.t @@ -10,7 +10,7 @@ BEGIN { use strict; use warnings FATAL => 'all'; use English qw(-no_match_vars); -use Test::More tests => 266; +use Test::More tests => 271; use QueryRewriter; use QueryParser; @@ -349,6 +349,64 @@ is( "Fingerprint LOAD DATA INFILE" ); +# fingerprint MD5 checksums, 32 char hex strings. This is a +# special feature used by pt-fingerprint. +$qr = new QueryRewriter( + QueryParser => $qp, + fingerprint_md5 => 1, +); + +is( + $qr->fingerprint( + "SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1" + ), + "select * from db.?_temp where id=?", + "Fingerprint db.MD5_tbl" +); + +is( + $qr->fingerprint( + "SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1" + ), + "select * from db.temp_? where id=?", + "Fingerprint db.tbl_MD5" +); + +$qr = new QueryRewriter( + QueryParser => $qp, + fingerprint_md5 => 1, + preserve_embedded_numbers => 1, +); + +is( + $qr->fingerprint( + "SELECT * FROM db.fbc5e685a5d3d45aa1d0347fdb7c4d35_temp where id=1" + ), + "select * from db.?_temp where id=?", + "Fingerprint db.MD5_tbl (with preserve_embedded_numbers)" +); + +is( + $qr->fingerprint( + "SELECT * FROM db.temp_fbc5e685a5d3d45aa1d0347fdb7c4d35 where id=1" + ), + "select * from db.temp_? where id=?", + "Fingerprint db.tbl_MD5 (with preserve_embedded_numbers)" +); + +$qr = new QueryRewriter( + QueryParser => $qp, + preserve_embedded_numbers => 1, +); + +is( + $qr->fingerprint( + "SELECT * FROM prices.rt_5min where id=1" + ), + "select * from prices.rt_5min where id=?", + "Fingerprint db.tblname (preserve number)" +); + # ############################################################################# # convert_to_select() # #############################################################################