Compare commits

..

4 Commits

Author SHA1 Message Date
Carlos Salguero
1352db81b6 PT-1398 Updated way of getting MySQL PID 2021-09-30 15:51:47 -03:00
Carlos Salguero
9d6508da5f PT-1900 pt-query-digest not hiding parameter properly sometimes when parameter=binary (#510)
* PT-1900 WIP

* Fixed quoted strings regexes

* PT-1900 Fixed query rewriter to properly handle quoted text

* Fixed merge conflicts with 3.x branch
2021-09-27 08:23:23 -03:00
Carlos Salguero
d91ba9cadd PT-1959 Updated version number (#515)
Not updated in files that will be updated during the build like the
python script, Makefile.PL and some other files like documentation.
2021-09-20 10:27:29 -03:00
Jelle van der Waa
b19654fa48 Make build reproducible (#499)
By setting SOURCE_DATE_EPOCH builds the build binaries can become
reproducible as a rebuild will take the old build date as input.

Motivation: https://reproducible-builds.org
2021-09-20 10:26:27 -03:00
13 changed files with 131 additions and 64 deletions

View File

@@ -1565,8 +1565,8 @@ $bal = qr/
my $olc_re = qr/(?:--|#)[^'"\r\n]*(?=[\r\n]|\Z)/; # One-line comments
my $mlc_re = qr#/\*[^!].*?\*/#sm; # But not /*!version */
my $vlc_re = qr#/\*.*?[0-9+].*?\*/#sm; # For SHOW + /*!version */
my $vlc_rf = qr#^(SHOW).*?/\*![0-9+].*?\*/#sm; # Variation for SHOW
my $vlc_re = qr#/\*.*?[0-9]+.*?\*/#sm; # For SHOW + /*!version */
my $vlc_rf = qr#^(?:SHOW).*?/\*![0-9]+(.*?)\*/#sm; # Variation for SHOW
sub new {
@@ -1581,7 +1581,8 @@ sub strip_comments {
$query =~ s/$mlc_re//go;
$query =~ s/$olc_re//go;
if ( $query =~ m/$vlc_rf/i ) { # contains show + version
$query =~ s/$vlc_re//go;
my $qualifier = $1 || '';
$query =~ s/$vlc_re/$qualifier/go;
}
return $query;
}
@@ -1652,9 +1653,15 @@ sub fingerprint {
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
&& return $query;
$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
$query =~ s/([^\\])(\\')/$1/sg;
$query =~ s/([^\\])(\\")/$1/sg;
$query =~ s/\\\\//sg;
$query =~ s/\\'//sg;
$query =~ s/\\"//sg;
$query =~ s/([^\\])(".*?[^\\]?")/$1?/sg;
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values
if ( $self->{match_md5_checksums} ) {
$query =~ s/([._-])[a-f0-9]{32}/$1?/g;
@@ -1704,6 +1711,13 @@ sub distill_verbs {
$query =~ m/\A\s*UNLOCK TABLES/i && return "UNLOCK";
$query =~ m/\A\s*xa\s+(\S+)/i && return "XA_$1";
if ( $query =~ m/\A\s*LOAD/i ) {
my ($tbl) = $query =~ m/INTO TABLE\s+(\S+)/i;
$tbl ||= '';
$tbl =~ s/`//g;
return "LOAD DATA $tbl";
}
if ( $query =~ m/\Aadministrator command:/ ) {
$query =~ s/administrator command:/ADMIN/;
$query = uc $query;
@@ -1716,7 +1730,7 @@ sub distill_verbs {
PTDEBUG && _d($query);
$query = uc $query;
$query =~ s/\s+(?:GLOBAL|SESSION|FULL|STORAGE|ENGINE)\b/ /g;
$query =~ s/\s+(?:SESSION|FULL|STORAGE|ENGINE)\b/ /g;
$query =~ s/\s+COUNT[^)]+\)//g;
$query =~ s/\s+(?:FOR|FROM|LIKE|WHERE|LIMIT|IN)\b.+//ms;
@@ -1731,6 +1745,7 @@ sub distill_verbs {
eval $QueryParser::tbl_ident;
my ( $dds ) = $query =~ /^\s*($QueryParser::data_def_stmts)\b/i;
if ( $dds) {
$query =~ s/\s+IF(?:\s+NOT)?\s+EXISTS/ /i;
my ( $obj ) = $query =~ m/$dds.+(DATABASE|TABLE)\b/i;
$obj = uc $obj if $obj;
PTDEBUG && _d('Data def statment:', $dds, 'obj:', $obj);
@@ -1797,6 +1812,9 @@ sub distill {
map { $verbs =~ s/$_/$alias_for{$_}/ } keys %alias_for;
$query = $verbs;
}
elsif ( $verbs && $verbs =~ m/^LOAD DATA/ ) {
return $verbs;
}
else {
my @tables = $self->__distill_tables($query, $table, %args);
$query = join(q{ }, $verbs, @tables);

View File

@@ -2520,9 +2520,13 @@ sub fingerprint {
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
&& return $query;
$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
$query =~ s/([^\\])(\\')/$1/sg;
$query =~ s/([^\\])(\\")/$1/sg;
$query =~ s/\\\\//sg;
$query =~ s/\\'//sg;
$query =~ s/\\"//sg;
$query =~ s/([^\\])(".*?[^\\]?")/$1?/sg;
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values

View File

@@ -4911,9 +4911,13 @@ sub fingerprint {
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
&& return $query;
$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
$query =~ s/([^\\])(\\')/$1/sg;
$query =~ s/([^\\])(\\")/$1/sg;
$query =~ s/\\\\//sg;
$query =~ s/\\'//sg;
$query =~ s/\\"//sg;
$query =~ s/([^\\])(".*?[^\\]?")/$1?/sg;
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values

View File

@@ -2945,9 +2945,13 @@ sub fingerprint {
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
&& return $query;
$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
$query =~ s/([^\\])(\\')/$1/sg;
$query =~ s/([^\\])(\\")/$1/sg;
$query =~ s/\\\\//sg;
$query =~ s/\\'//sg;
$query =~ s/\\"//sg;
$query =~ s/([^\\])(".*?[^\\]?")/$1?/sg;
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values

View File

@@ -822,7 +822,7 @@ collect() {
local mysql_version="$(awk '/^version[^_]/{print substr($2,1,3)}' "$d/$p-variables")"
local mysql_error_log="$(awk '/^log_error /{print $2}' "$d/$p-variables")"
local mysql_error_log="$(awk '/^log_error/{print $2}' "$d/$p-variables")"
if [ -z "$mysql_error_log" -a "$mysqld_pid" ]; then
mysql_error_log="$(ls -l /proc/$mysqld_pid/fd | awk '/ 2 ->/{print $NF}')"
fi
@@ -833,10 +833,10 @@ collect() {
tail -f "$mysql_error_log" >"$d/$p-log_error" &
tail_error_log_pid=$!
$CMD_MYSQLADMIN $EXT_ARGV
$CMD_MYSQLADMIN $EXT_ARGV debug
else
log "Could not find the MySQL error log"
fi
fi
if [ "${mysql_version}" '>' "5.1" ]; then
local mutex="SHOW ENGINE INNODB MUTEX"
else
@@ -869,12 +869,7 @@ collect() {
local strace_pid=$!
fi
local cnt=$(($OPT_RUN_TIME / $OPT_SLEEP_COLLECT))
$CMD_MYSQLADMIN $EXT_ARGV ext -i$OPT_SLEEP_COLLECT -c$cnt >>"$d/$p-mysqladmin" &
local mysqladmin_pid=$!
if [ ! "$OPT_MYSQL_ONLY" ]; then
if [ ! "$OPT_MYSQL_ONLY" ]; then
ps -eaF >> "$d/$p-ps" &
top -bn${OPT_RUN_TIME} >> "$d/$p-top" &
@@ -887,9 +882,10 @@ collect() {
if [ "$CMD_DMESG" ]; then
local UPTIME=`cat /proc/uptime | awk '{ print $1 }'`
local START_TIME=$(echo "$UPTIME 60" | awk '{print ($1 - $2)}')
$CMD_DMESG | perl -ne 'm/\[\s*(\d+)\./; if ($1 > '${START_TIME}') { print }' >> "$d/$p-dmesg" &
$CMD_DMESG | perl -ne 'm/\[\s*(\d+)\./; if ($1 > '${START_TIME}') { print }' >> "$d/$p-dmesg" &
fi
local cnt=$(($OPT_RUN_TIME / $OPT_SLEEP_COLLECT))
if [ "$CMD_VMSTAT" ]; then
$CMD_VMSTAT $OPT_SLEEP_COLLECT $cnt >> "$d/$p-vmstat" &
$CMD_VMSTAT $OPT_RUN_TIME 2 >> "$d/$p-vmstat-overall" &
@@ -902,7 +898,10 @@ collect() {
$CMD_MPSTAT -P ALL $OPT_SLEEP_COLLECT $cnt >> "$d/$p-mpstat" &
$CMD_MPSTAT -P ALL $OPT_RUN_TIME 1 >> "$d/$p-mpstat-overall" &
fi
fi
$CMD_MYSQLADMIN $EXT_ARGV ext -i$OPT_SLEEP_COLLECT -c$cnt >>"$d/$p-mysqladmin" &
local mysqladmin_pid=$!
fi
local have_lock_waits_table=""
$CMD_MYSQL $EXT_ARGV -e "SHOW TABLES FROM INFORMATION_SCHEMA" \
@@ -917,7 +916,7 @@ collect() {
local ps_instrumentation_enabled=$($CMD_MYSQL $EXT_ARGV -e 'SELECT ENABLED FROM performance_schema.setup_instruments WHERE NAME = "transaction";' \
| sed "2q;d" | sed 'y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/')
if [ !$ps_instrumentation_enabled = "yes" ]; then
if [ $ps_instrumentation_enabled != "yes" ]; then
log "Performance Schema instrumentation is disabled"
fi
@@ -956,12 +955,12 @@ collect() {
(echo $ts; df -k) >> "$d/$p-df" &
(echo $ts; netstat -antp) >> "$d/$p-netstat" &
(echo $ts; netstat -s) >> "$d/$p-netstat_s" &
fi
($CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G") \
fi
(echo $ts; $CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G") \
>> "$d/$p-processlist" &
if [ "$have_lock_waits_table" ]; then
(lock_waits) >>"$d/$p-lock-waits" &
(transactions) >>"$d/$p-transactions" &
(echo $ts; lock_waits) >>"$d/$p-lock-waits" &
(echo $ts; transactions) >>"$d/$p-transactions" &
fi
if [ "${mysql_version}" '>' "5.6" ] && [ $ps_instrumentation_enabled == "yes" ]; then
@@ -969,10 +968,10 @@ collect() {
fi
if [ "${mysql_version}" '>' "5.6" ]; then
(ps_prepared_statements) >> "$d/$p-prepared-statements" &
(echo $ts; ps_prepared_statements) >> "$d/$p-prepared-statements" &
fi
slave_status "$d/$p-slave-status" "${mysql_version}"
slave_status "$d/$p-slave-status" "${mysql_version}"
curr_time=$(date +'%s')
done
@@ -1077,7 +1076,7 @@ lock_waits() {
LEFT JOIN INFORMATION_SCHEMA.PROCESSLIST AS p ON p.id = b.trx_mysql_thread_id
ORDER BY wait_time DESC\G"
$CMD_MYSQL $EXT_ARGV -e "$sql2"
}
}
transactions() {
$CMD_MYSQL $EXT_ARGV -e "SELECT SQL_NO_CACHE * FROM INFORMATION_SCHEMA.INNODB_TRX ORDER BY trx_id\G"
@@ -1126,8 +1125,8 @@ rocksdb_status() {
}
ps_locks_transactions() {
local outfile=$1
local outfile=$1
$CMD_MYSQL $EXT_ARGV -e 'select @@performance_schema' | grep "1" &>/dev/null
if [ $? -eq 0 ]; then
@@ -1163,27 +1162,28 @@ slave_status() {
local outfile=$1
local mysql_version=$2
local sql="SHOW SLAVE STATUS\G"
echo -e "\n$sql\n" >> $outfile
$CMD_MYSQL $EXT_ARGV -e "$sql" >> $outfile
if [ "${mysql_version}" '>' "5.6" ]; then
if [ "${mysql_version}" '<' "5.7" ]; then
local sql="SHOW SLAVE STATUS\G"
echo -e "\n$sql\n" >> $outfile
$CMD_MYSQL $EXT_ARGV -e "$sql" >> $outfile
else
local sql="SELECT * FROM performance_schema.replication_connection_configuration JOIN performance_schema.replication_applier_configuration USING(channel_name)\G"
echo -e "\n$sql\n" >> $outfile
$CMD_MYSQL $EXT_ARGV -e "$sql" >> $outfile
sql="SELECT * FROM performance_schema.replication_connection_status\G"
sql="SELECT * FROM replication_connection_status\G"
echo -e "\n$sql\n" >> $outfile
$CMD_MYSQL $EXT_ARGV -e "$sql" >> $outfile
sql="SELECT * FROM performance_schema.replication_applier_status JOIN performance_schema.replication_applier_status_by_coordinator USING(channel_name)\G"
sql="SELECT * FROM replication_applier_status JOIN replication_applier_status_by_coordinator USING(channel_name)\G"
echo -e "\n$sql\n" >> $outfile
$CMD_MYSQL $EXT_ARGV -e "$sql" >> $outfile
fi
}
collect_mysql_variables() {
local outfile=$1
local outfile=$1
local sql="SHOW GLOBAL VARIABLES"
echo -e "\n$sql\n" >> $outfile
@@ -1192,11 +1192,11 @@ collect_mysql_variables() {
sql="select * from performance_schema.variables_by_thread order by thread_id, variable_name;"
echo -e "\n$sql\n" >> $outfile
$CMD_MYSQL $EXT_ARGV -e "$sql" >> $outfile
sql="select * from performance_schema.user_variables_by_thread order by thread_id, variable_name;"
echo -e "\n$sql\n" >> $outfile
$CMD_MYSQL $EXT_ARGV -e "$sql" >> $outfile
sql="select * from performance_schema.status_by_thread order by thread_id, variable_name; "
echo -e "\n$sql\n" >> $outfile
$CMD_MYSQL $EXT_ARGV -e "$sql" >> $outfile

View File

@@ -2946,9 +2946,13 @@ sub fingerprint {
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
&& return $query;
$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
$query =~ s/([^\\])(\\')/$1/sg;
$query =~ s/([^\\])(\\")/$1/sg;
$query =~ s/\\\\//sg;
$query =~ s/\\'//sg;
$query =~ s/\\"//sg;
$query =~ s/([^\\])(".*?[^\\]?")/$1?/sg;
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values

View File

@@ -4818,9 +4818,13 @@ sub fingerprint {
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
&& return $query;
$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
$query =~ s/([^\\])(\\')/$1/sg;
$query =~ s/([^\\])(\\")/$1/sg;
$query =~ s/\\\\//sg;
$query =~ s/\\'//sg;
$query =~ s/\\"//sg;
$query =~ s/([^\\])(".*?[^\\]?")/$1?/sg;
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values

View File

@@ -173,9 +173,17 @@ sub fingerprint {
$query =~ s/\Ause \S+\Z/use ?/i # Abstract the DB in USE
&& return $query;
$query =~ s/\\["']//g; # quoted strings
$query =~ s/".*?"/?/sg; # quoted strings
$query =~ s/'.*?'/?/sg; # quoted strings
# -----------------------------------------------------------
# Remove quoted strings
# -----------------------------------------------------------
$query =~ s/([^\\])(\\')/$1/sg;
$query =~ s/([^\\])(\\")/$1/sg;
$query =~ s/\\\\//sg;
$query =~ s/\\'//sg;
$query =~ s/\\"//sg;
$query =~ s/([^\\])(".*?[^\\]?")/$1?/sg;
$query =~ s/([^\\])('.*?[^\\]?')/$1?/sg;
# -----------------------------------------------------------
$query =~ s/\bfalse\b|\btrue\b/?/isg; # boolean values
@@ -218,8 +226,8 @@ sub fingerprint {
$query =~ s/\blimit \?(?:, ?\?| offset \?)?/limit ?/; # LIMIT
# The following are disabled because of speed issues. Should we try to
# normalize whitespace between and around operators? My gut feeling is no.
# $query =~ s/ , | ,|, /,/g; # Normalize commas
# $query =~ s/ = | =|= /=/g; # Normalize equals
# $query =~ s/ , | ,|, /,/g; # Normalize commas
# $query =~ s/ = | =|= /=/g; # Normalize equals
# $query =~ s# [,=+*/-] ?|[,=+*/-] #+#g; # Normalize operators
# Remove ASC keywords from ORDER BY clause so these queries fingerprint

View File

@@ -52,7 +52,8 @@ collect() {
local mysqld_pid=""
# Get pidof mysqld.
if [ ! "$OPT_MYSQL_ONLY" ]; then
mysqld_pid=$(_pidof mysqld | awk '{print $1; exit;}')
port=$(mysql -ss -e 'SELECT @@port')
mysqld_pid=$(lsof -i ":${port}" | grep -i listen | cut -f 3 -d" ")
fi
# Get memory allocation info before anything else.

View File

@@ -3,10 +3,17 @@ help: ## Display this help message.
@grep '^[a-zA-Z]' $(MAKEFILE_LIST) | \
awk -F ':.*?## ' 'NF==2 {printf " %-26s%s\n", $$1, $$2}'
DATE_FMT = +%FT%T%z
ifdef SOURCE_DATE_EPOCH
BUILD_DATE ?= $(shell date -u -d "@$(SOURCE_DATE_EPOCH)" "$(DATE_FMT)" 2>/dev/null || date -u -r "$(SOURCE_DATE_EPOCH)" "$(DATE_FMT)" 2>/dev/null || date -u "$(DATE_FMT)")
else
BUILD_DATE ?= $(shell date "$(DATE_FMT)")
endif
GO := go
pkgs = $(shell find . -type d -name "pt-*" -exec basename {} \;)
VERSION=$(shell git describe --abbrev=0 --tags)
BUILD=$(shell date +%FT%T%z)
BUILD=$(BUILD_DATE)
GOVERSION=$(shell go version | cut --delimiter=" " -f3)
COMMIT=$(shell git rev-list -1 HEAD)
GOUTILSDIR ?= $(GOPATH)/bin

View File

@@ -162,7 +162,7 @@ is(
# This is a known deficiency, fixes seem to be expensive though.
is(
$qr->fingerprint("select '\\\\' from foo"),
"select '\\ from foo",
"select ? from foo",
"Does not handle all quoted strings",
);
@@ -1478,6 +1478,19 @@ is(
"Fingerprint db.tbl<number>name (preserve number)"
);
is(
$qr->fingerprint(
"SELECT i FROM d.t WHERE i=\"3\""
),
"select i from d.t where i=?",
"Fingerprint db.tbl<number>name (preserve number)"
);
is(
$qr->fingerprint("CALL foo(1, 2, 3)"),
"call foo",
'Fingerprints stored procedure calls specially',
);
# #############################################################################
# Done.
# #############################################################################

View File

@@ -259,7 +259,7 @@ SELECT `SCHEMA_NAME` FROM `INFORMATION_SCHEMA`.`SCHEMATA`, (SELECT DB_first_leve
# EXPLAIN /*!50100 PARTITIONS*/
SELECT USER()\G
# Query 12: 0 QPS, 0x concurrency, ID 0x75A885FE43F31908754B91A2F3BD1E20 at byte 1082
# Query 12: 0 QPS, 0x concurrency, ID 0x15BF4DCE0B364CE831C14D6853A472B7 at byte 1082
# This item is included in the report because it matches --limit.
# Scores: V/M = 0.00
# Time range: all events occurred at 2016-06-07T19:07:02.565999Z
@@ -303,4 +303,4 @@ SELECT 1 FROM (SELECT `GRANTEE`, `IS_GRANTABLE` FROM `INFORMATION_SCHEMA`.`COLUM
# 9 0xDDABDE67AC3044CAED549F59FFFA541B 0.0000 0.0% 1 0.0000 0.00 SELECT phpmyadmin.pma__navigationhiding
# 10 0x35CCC630581DCD5AA46100310F18DEB9 0.0000 0.0% 1 0.0000 0.00 SELECT INFORMATION_SCHEMA.SCHEMATA
# 11 0x7B48FAA9C951DD8A389FF9DA2DF3DF62 0.0000 0.0% 1 0.0000 0.00 SELECT
# 12 0x75A885FE43F31908754B91A2F3BD1E20 0.0000 0.0% 1 0.0000 0.00 SELECT UNION INFORMATION_SCHEMA.COLUMN_PRIVILEGES INFORMATION_SCHEMA.TABLE_PRIVILEGES INFORMATION_SCHEMA.SCHEMA_PRIVILEGES INFORMATION_SCHEMA.USER_PRIVILEGES
# 12 0x15BF4DCE0B364CE831C14D6853A472B7 0.0000 0.0% 1 0.0000 0.00 SELECT UNION INFORMATION_SCHEMA.COLUMN_PRIVILEGES INFORMATION_SCHEMA.TABLE_PRIVILEGES INFORMATION_SCHEMA.SCHEMA_PRIVILEGES INFORMATION_SCHEMA.USER_PRIVILEGES

View File

@@ -1 +1 @@
# Exec time 100 10s 1s 3s 2s 3s 896ms 2s
# Exec time 100 9s 2s 4s 3s 4s 786ms 3s