Files
percona-toolkit/bin/pt-stalk

1364 lines
38 KiB
Bash
Executable File

#!/usr/bin/env bash
# This program is part of Percona Toolkit: http://www.percona.com/software/
# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal
# notices and disclaimers.
set -u
# ###########################################################################
# log_warn_die package
# This package is a copy without comments from the original. The original
# with comments and its test file can be found in the Bazaar repository at,
# lib/bash/log_warn_die.sh
# t/lib/bash/log_warn_die.sh
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################
set -u
EXIT_STATUS=0
log() {
TS=$(date +%F-%T | tr :- _);
echo "$TS $*"
}
warn() {
log "$*" >&2
EXIT_STATUS=1
}
die() {
warn "$*"
exit 1
}
# ###########################################################################
# End log_warn_die package
# ###########################################################################
# ###########################################################################
# parse_options package
# This package is a copy without comments from the original. The original
# with comments and its test file can be found in the Bazaar repository at,
# lib/bash/parse_options.sh
# t/lib/bash/parse_options.sh
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################
set -u
ARGV="" # Non-option args (probably input files)
EXT_ARGV="" # Everything after -- (args for an external command)
OPT_ERRS=0 # How many command line option errors
OPT_VERSION="no" # If --version was specified
OPT_HELP="no" # If --help was specified
usage() {
local file="$1"
local usage=$(grep '^Usage: ' "$file")
echo $usage >&2
echo >&2
echo "For more information, 'man $TOOL' or 'perldoc $file'." >&2
}
usage_or_errors() {
local file="$1"
if [ "$OPT_VERSION" = "yes" ]; then
local version=$(grep '^pt-[^ ]\+ [0-9]' "$file")
echo "$version"
return 1
fi
if [ "$OPT_HELP" = "yes" ]; then
usage "$file"
echo >&2
echo "Command line options:" >&2
echo >&2
for opt in $(ls $TMPDIR/po/); do
local desc=$(cat $TMPDIR/po/$opt | grep '^desc:' | sed -e 's/^desc://')
echo "--$opt" >&2
echo " $desc" >&2
echo >&2
done
return 1
fi
if [ $OPT_ERRS -gt 0 ]; then
echo >&2
usage "$file"
return 1
fi
return 0
}
parse_options() {
local file="$1"
shift
if [ ! -d "$TMPDIR/po/" ]; then
mkdir "$TMPDIR/po/"
if [ $? -ne 0 ]; then
echo "Cannot mkdir $TMPDIR/po/" >&2
exit 1
fi
fi
rm -rf "$TMPDIR"/po/*
if [ $? -ne 0 ]; then
echo "Cannot rm -rf $TMPDIR/po/*" >&2
exit 1
fi
(
export PO_DIR="$TMPDIR/po"
cat "$file" | perl -ne '
BEGIN { $/ = ""; }
next unless $_ =~ m/^=head1 OPTIONS/;
while ( defined(my $para = <>) ) {
last if $para =~ m/^=head1/;
chomp;
if ( $para =~ m/^=item --(\S+)/ ) {
my $opt = $1;
my $file = "$ENV{PO_DIR}/$opt";
open my $opt_fh, ">", $file or die "Cannot open $file: $!";
printf $opt_fh "long:$opt\n";
$para = <>;
chomp;
if ( $para =~ m/^[a-z ]+:/ ) {
map {
chomp;
my ($attrib, $val) = split(/: /, $_);
printf $opt_fh "$attrib:$val\n";
} split(/; /, $para);
$para = <>;
chomp;
}
my ($desc) = $para =~ m/^([^?.]+)/;
printf $opt_fh "desc:$desc.\n";
close $opt_fh;
}
}
last;
'
)
for opt_spec in $(ls "$TMPDIR/po/"); do
local opt=""
local default_val=""
local neg=0
while read line; do
local key=$(echo $line | cut -d ':' -f 1)
local val=$(echo $line | cut -d ':' -f 2)
case "$key" in
long)
opt=$(echo $val | sed 's/-/_/g' | tr [:lower:] [:upper:])
;;
default)
default_val="$val"
;;
"short form")
;;
type)
;;
desc)
;;
negatable)
if [ "$val" = "yes" ]; then
neg=1
fi
;;
*)
echo "Invalid attribute in $TMPDIR/po/$opt_spec: $line" >&2
exit 1
esac
done < "$TMPDIR/po/$opt_spec"
if [ -z "$opt" ]; then
echo "No long attribute in option spec $TMPDIR/po/$opt_spec" >&2
exit 1
fi
if [ $neg -eq 1 ]; then
if [ -z "$default_val" ] || [ "$default_val" != "yes" ]; then
echo "Option $opt_spec is negatable but not default: yes" >&2
exit 1
fi
fi
eval "OPT_${opt}"="$default_val"
done
for opt; do
if [ $# -eq 0 ]; then
break # no more opts
fi
opt=$1
if [ "$opt" = "--" ]; then
shift
EXT_ARGV="$@"
break
fi
shift
if [ $(expr "$opt" : "-") -eq 0 ]; then
if [ -z "$ARGV" ]; then
ARGV="$opt"
else
ARGV="$ARGV $opt"
fi
continue
fi
local real_opt="$opt"
if $(echo $opt | grep -q '^--no-'); then
neg=1
opt=$(echo $opt | sed 's/^--no-//')
else
neg=0
opt=$(echo $opt | sed 's/^-*//')
fi
if [ -f "$TMPDIR/po/$opt" ]; then
spec="$TMPDIR/po/$opt"
else
spec=$(grep "^short form:-$opt\$" "$TMPDIR"/po/* | cut -d ':' -f 1)
if [ -z "$spec" ]; then
OPT_ERRS=$(($OPT_ERRS + 1))
echo "Unknown option: $real_opt" >&2
continue
fi
fi
local required_arg=$(cat $spec | awk -F: '/^type:/{print $2}')
if [ -n "$required_arg" ]; then
if [ $# -eq 0 ]; then
OPT_ERRS=$(($OPT_ERRS + 1))
echo "$real_opt requires a $required_arg argument" >&2
continue
else
val="$1"
shift
fi
else
if [ $neg -eq 0 ]; then
val="yes"
else
val="no"
fi
fi
opt=$(cat $spec | grep '^long:' | cut -d':' -f2 | sed 's/-/_/g' | tr [:lower:] [:upper:])
eval "OPT_$opt"="$val"
done
}
# ###########################################################################
# End parse_options package
# ###########################################################################
# ###########################################################################
# tmpdir package
# This package is a copy without comments from the original. The original
# with comments and its test file can be found in the Bazaar repository at,
# lib/bash/tmpdir.sh
# t/lib/bash/tmpdir.sh
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################
set -u
TMPDIR=""
mk_tmpdir() {
local dir="${1:-""}"
if [ -n "$dir" ]; then
if [ ! -d "$dir" ]; then
mkdir $dir || die "Cannot make tmpdir $dir"
fi
TMPDIR="$dir"
else
local tool=`basename $0`
local pid="$$"
TMPDIR=`mktemp -d /tmp/${tool}.${pid}.XXXXX` \
|| die "Cannot make secure tmpdir"
fi
}
rm_tmpdir() {
if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then
rm -rf "$TMPDIR"
fi
TMPDIR=""
}
# ###########################################################################
# End tmpdir package
# ###########################################################################
# ###########################################################################
# alt_cmds package
# This package is a copy without comments from the original. The original
# with comments and its test file can be found in the Bazaar repository at,
# lib/bash/alt_cmds.sh
# t/lib/bash/alt_cmds.sh
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################
set -u
_seq() {
local i="$1"
awk "BEGIN { for(i=1; i<=$i; i++) print i; }"
}
# ###########################################################################
# End alt_cmds package
# ###########################################################################
# ###########################################################################
# safeguards package
# This package is a copy without comments from the original. The original
# with comments and its test file can be found in the Bazaar repository at,
# lib/bash/safeguards.sh
# t/lib/bash/safeguards.sh
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################
set -u
disk_space() {
local filesystem="${1:-$PWD}"
df -P -k "$filesystem"
}
check_disk_space() {
local file="$1"
local mb="${2:-0}"
local pc="${3:-0}"
local mb_margin="${4:-0}"
local kb=$(($mb * 1024))
local kb_margin=$(($mb_margin * 1024))
local kb_used=$(cat "$file" | awk '/^\//{print $3}');
local kb_free=$(cat "$file" | awk '/^\//{print $4}');
local pc_used=$(cat "$file" | awk '/^\//{print $5}' | sed -e 's/%//g');
if [ "$kb_margin" -gt "0" ]; then
local kb_total=$(($kb_used + $kb_free))
kb_used=$(($kb_used + $kb_margin))
kb_free=$(($kb_free - $kb_margin))
pc_used=$(awk "BEGIN { printf(\"%d\", $kb_used/$kb_total * 100) }")
fi
local pc_free=$((100 - $pc_used))
if [ "$kb_free" -le "$kb" -o "$pc_free" -le "$pc" ]; then
warn "Not enough free disk space: ${pc_free}% free, ${kb_free} KB free; wanted more than ${pc}% free or ${kb} KB free"
return 1
fi
return 0
}
# ###########################################################################
# End safeguards package
# ###########################################################################
# ###########################################################################
# daemon package
# This package is a copy without comments from the original. The original
# with comments and its test file can be found in the Bazaar repository at,
# lib/bash/daemon.sh
# t/lib/bash/daemon.sh
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################
set -u
make_pid_file() {
local file="$1"
local pid="$2"
if [ -f "$file" ]; then
local old_pid=$(cat "$file")
if [ -z "$old_pid" ]; then
die "PID file $file already exists but it is empty"
else
kill -0 $old_pid 2>/dev/null
if [ $? -eq 0 ]; then
die "PID file $file already exists and its PID ($old_pid) is running"
else
echo "Overwriting PID file $file because its PID ($old_pid)" \
"is not running"
fi
fi
fi
echo "$pid" > "$file"
if [ $? -ne 0 ]; then
die "Cannot create or write PID file $file"
fi
}
remove_pid_file() {
local file="$1"
if [ -f "$file" ]; then
rm "$file"
fi
}
# ###########################################################################
# End daemon package
# ###########################################################################
# ###########################################################################
# collect package
# This package is a copy without comments from the original. The original
# with comments and its test file can be found in the Bazaar repository at,
# lib/bash/collect.sh
# t/lib/bash/collect.sh
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################
set -u
CMD_GDB=${CMD_GDB:-"gdb"}
CMD_IOSTAT=${CMD_IOSTAT:-"iostat"}
CMD_MPSTAT=${CMD_MPSTAT:-"mpstat"}
CMD_MYSQL=${CMD_MSSQL:-"mysql"}
CMD_MYSQLADMIN=${CMD_MYSQL_ADMIN:-"mysqladmin"}
CMD_OPCONTROL=${CMD_OPCONTROL:-"opcontrol"}
CMD_OPREPORT=${CMD_OPREPORT:-"opreport"}
CMD_PMAP=${CMD_PMAP:-"pmap"}
CMD_STRACE=${CMD_STRACE:-"strace"}
CMD_TCPDUMP=${CMD_TCPDUMP:-"tcpdump"}
CMD_VMSTAT=${CMD_VMSTAT:-"vmstat"}
collect() {
local d="$1" # directory to save results in
local p="$2" # prefix for each result file
local mysqld_pid=$(pidof -s mysqld);
if [ -z "$mysqld_pid" ]; then
mysqld_pid=$(pgrep -o -x mysqld);
fi
if [ -z "$mysqld_pid" ]; then
mysqld_pid=$(ps -eaf | grep 'mysql[d]' | grep -v mysqld_safe | awk '{print $2}' | head -n1);
fi
if [ "$mysqld_pid" ]; then
if $CMD_PMAP --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then
$CMD_PMAP -x $mysqld_pid > "$d/$p-pmap"
else
$CMD_PMAP $mysqld_pid > "$d/$p-pmap"
fi
fi
if [ "$OPT_COLLECT_GDB" = "yes" -a "$mysqld_pid" ]; then
$CMD_GDB \
-ex "set pagination 0" \
-ex "thread apply all bt" \
--batch -p $mysqld_pid \
>> "$d/$p-stacktrace"
else
echo "GDB (--collect-gdb) was not enabled" >> "$d/$p-stacktrace"
fi
$CMD_MYSQL $EXT_ARGV -e 'SHOW GLOBAL VARIABLES' >> "$d/$p-variables" 2>&1 &
sleep .2
local mysql_version="$(awk '/^version[^_]/{print substr($2,1,3)}' "$d/$p-variables")"
local mysql_error_log="$(awk '/log_error/{print $2}' "$d/$p-variables")"
if [ -z "$mysql_error_log" -a "$mysqld_pid" ]; then
mysql_error_log="$(ls -l /proc/$mysqld_pid/fd | awk '/ 2 ->/{print $NF}')"
fi
local tail_error_log_pid=""
if [ "$mysql_error_log" ]; then
echo "The MySQL error log seems to be ${mysql_error_log}"
tail -f "$mysql_error_log" >"$d/$p-log_error" 2>&1 &
tail_error_log_pid=$!
$CMD_MYSQLADMIN $EXT_ARGV debug
else
echo "Could not find the MySQL error log"
fi
local innostat="SHOW /*!40100 ENGINE*/ INNODB STATUS\G"
if [ "${mysql_version}" '>' "5.1" ]; then
local mutex="SHOW ENGINE INNODB MUTEX"
else
local mutex="SHOW MUTEX STATUS"
fi
$CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus1" 2>&1 &
$CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status1" 2>&1 &
open_tables >> "$d/$p-opentables1" 2>&1 &
local tcpdump_pid=""
if [ "$OPT_COLLECT_TCPDUMP" = "yes" ]; then
local port=$(awk '/^port/{print $2}' "$d/$p-variables")
if [ "$port" ]; then
$CMD_TCPDUMP -i any -s 4096 -w "$d/$p-tcpdump" port ${port} &
tcpdump_pid=$!
fi
fi
local have_oprofile="no"
if [ "$OPT_COLLECT_OPROFILE" = "yes" ]; then
if $CMD_OPCONTROL --init; then
$CMD_OPCONTROL --start --no-vmlinux
have_oprofile="yes"
fi
elif [ "$OPT_COLLECT_STRACE" = "yes" ]; then
$CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" 2>&1 &
local strace_pid=$!
fi
ps -eaf >> "$d/$p-ps" 2>&1 &
sysctl -a >> "$d/$p-sysctl" 2>&1 &
top -bn1 >> "$d/$p-top" 2>&1 &
$CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" 2>&1 &
$CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" 2>&1 &
$CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" 2>&1 &
$CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" 2>&1 &
$CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" 2>&1 &
$CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" 2>&1 &
lsof -nP -p $mysqld_pid -bw >> "$d/$p-lsof" 2>&1 &
$CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" 2>&1 &
local mysqladmin_pid=$!
local have_lock_waits_table=0
$CMD_MYSQL $EXT_ARGV -e "SHOW TABLES FROM INFORMATION_SCHEMA" \
| grep -i "INNODB_LOCK_WAITS" >/dev/null 2>&1
if [ $? -eq 0 ]; then
have_lock_waits_table=1
fi
echo "Loop start: $(date +'TS %s.%N %F %T')"
for loopno in $(_seq $OPT_RUN_TIME); do
disk_space $d > $d/$p-disk-space
check_disk_space \
$d/$p-disk-space \
"$OPT_DISK_BYTE_LIMIT" \
"$OPT_DISK_PCT_LIMIT" \
|| break
sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}')
local ts="$(date +"TS %s.%N %F %T")"
(cat /proc/diskstats 2>&1; echo $ts) >> "$d/$p-diskstats" &
(cat /proc/stat 2>&1; echo $ts) >> "$d/$p-procstat" &
(cat /proc/vmstat 2>&1; echo $ts) >> "$d/$p-procvmstat" &
(cat /proc/meminfo 2>&1; echo $ts) >> "$d/$p-meminfo" &
(cat /proc/slabinfo 2>&1; echo $ts) >> "$d/$p-slabinfo" &
(cat /proc/interrupts 2>&1; echo $ts) >> "$d/$p-interrupts" &
(df -h 2>&1; echo $ts) >> "$d/$p-df" &
(netstat -antp 2>&1; echo $ts) >> "$d/$p-netstat" &
(netstat -s 2>&1; echo $ts) >> "$d/$p-netstat_s" &
($CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G" 2>&1; echo $ts) \
>> "$d/$p-processlist"
if [ $have_lock_waits_table -eq 1 ]; then
(lock_waits 2>&1; echo $ts) >>"$d/$p-lock-waits"
fi
done
echo "Loop end: $(date +'TS %s.%N %F %T')"
if [ "$have_oprofile" = "yes" ]; then
$CMD_OPCONTROL --stop
$CMD_OPCONTROL --dump
kill $(pidof oprofiled); # TODO: what if system doesn't have pidof?
$CMD_OPCONTROL --save=pt_collect_$p
local mysqld_path=$(which mysqld);
if [ "$mysqld_path" -a -f "$mysqld_path" ]; then
$CMD_OPREPORT \
--demangle=smart \
--symbols \
--merge tgid \
session:pt_collect_$p \
"$mysqld_path" \
> "$d/$p-opreport"
else
echo "oprofile data saved to pt_collect_$p; you should be able" \
"to get a report by running something like 'opreport" \
"--demangle=smart --symbols --merge tgid session:pt_collect_$p" \
"/path/to/mysqld'" \
> "$d/$p-opreport"
fi
elif [ "$OPT_COLLECT_STRACE" = "yes" ]; then
kill -s 2 $strace_pid
sleep 1
kill -s 15 $strace_pid
kill -s 18 $mysqld_pid
fi
$CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus2" 2>&1 &
$CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status2" 2>&1 &
open_tables >> "$d/$p-opentables2" 2>&1 &
kill $mysqladmin_pid
[ "$tail_error_log_pid" ] && kill $tail_error_log_pid
[ "$tcpdump_pid" ] && kill $tcpdump_pid
hostname > "$d/$p-hostname"
}
open_tables() {
local open_tables=$($CMD_MYSQLADMIN $EXT_ARGV ext | grep "Open_tables" | awk '{print $4}')
if [ -n "$open_tables" -a $open_tables -le 1000 ]; then
$CMD_MYSQL $EXT_ARGV -e 'SHOW OPEN TABLES' 2>&1 &
else
echo "Too many open tables: $open_tables"
fi
}
lock_waits() {
local sql1="SELECT
CONCAT('thread ', b.trx_mysql_thread_id, ' from ', p.host) AS who_blocks,
IF(p.command = \"Sleep\", p.time, 0) AS idle_in_trx,
MAX(TIMESTAMPDIFF(SECOND, r.trx_wait_started, CURRENT_TIMESTAMP)) AS max_wait_time,
COUNT(*) AS num_waiters
FROM INFORMATION_SCHEMA.INNODB_LOCK_WAITS AS w
INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS b ON b.trx_id = w.blocking_trx_id
INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS r ON r.trx_id = w.requesting_trx_id
LEFT JOIN INFORMATION_SCHEMA.PROCESSLIST AS p ON p.id = b.trx_mysql_thread_id
GROUP BY who_blocks ORDER BY num_waiters DESC\G"
$CMD_MYSQL $EXT_ARGV -e "$sql1"
local sql2="SELECT
r.trx_id AS waiting_trx_id,
r.trx_mysql_thread_id AS waiting_thread,
TIMESTAMPDIFF(SECOND, r.trx_wait_started, CURRENT_TIMESTAMP) AS wait_time,
r.trx_query AS waiting_query,
l.lock_table AS waiting_table_lock,
b.trx_id AS blocking_trx_id, b.trx_mysql_thread_id AS blocking_thread,
SUBSTRING(p.host, 1, INSTR(p.host, ':') - 1) AS blocking_host,
SUBSTRING(p.host, INSTR(p.host, ':') +1) AS blocking_port,
IF(p.command = \"Sleep\", p.time, 0) AS idle_in_trx,
b.trx_query AS blocking_query
FROM INFORMATION_SCHEMA.INNODB_LOCK_WAITS AS w
INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS b ON b.trx_id = w.blocking_trx_id
INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS r ON r.trx_id = w.requesting_trx_id
INNER JOIN INFORMATION_SCHEMA.INNODB_LOCKS AS l ON w.requested_lock_id = l.lock_id
LEFT JOIN INFORMATION_SCHEMA.PROCESSLIST AS p ON p.id = b.trx_mysql_thread_id
ORDER BY wait_time DESC\G"
$CMD_MYSQL $EXT_ARGV -e "$sql2"
}
# ###########################################################################
# End collect package
# ###########################################################################
# ###########################################################################
# Global variables
# ###########################################################################
EXIT_REASON=""
TOOL=$(basename $0)
OKTORUN=1
ITER=1
# ###########################################################################
# Subroutines
# ###########################################################################
grep_processlist() {
local file="$1"
local col="$2"
local pat="${3:-""}"
local gt="${4:-0}"
local quiet="${5:-0}"
awk "
BEGIN {
FS=\"|\"
OFS=\" | \"
n_cols=0
found=0
}
/^\|/ {
if ( n_cols ) {
val=colno_for_name[\"$col\"]
if ((\"$pat\" && match(\$val, \"$pat\")) || ($gt && \$val > $gt) ) {
found++
if (!$quiet) print \$0
}
}
else {
for (i = 1; i <= NF; i++) {
gsub(/^[ ]*/, \"\", \$i)
gsub(/[ ]*$/, \"\", \$i)
if ( \$i != \"\" ) {
name_for_colno[i]=\$i
colno_for_name[\$i]=i
n_cols++
}
}
}
}
END {
if ( found )
exit 0
exit 1
}
" $file
}
set_trg_func() {
if [ -f "$OPT_FUNCTION" ]; then
source $OPT_FUNCTION
TRIGGER_FUNCTION="trg_plugin"
else
TRIGGER_FUNCTION="trg_$OPT_FUNCTION"
fi
}
trg_status() {
local var="$1"
mysqladmin $EXT_ARGV extended-status \
| grep "$OPT_VARIABLE " \
| awk '{print $4}'
}
trg_processlist() {
local var="$1"
local tmpfile="$TMPDIR/processlist"
mysqladmin $EXT_ARGV processlist > $tmpfile-1
grep_processlist $tmpfile-1 $var $OPT_MATCH 0 0 > $tmpfile-2
wc -l $tmpfile-2 | awk '{print $1}'
rm -rf $tmpfile*
return
}
trg_magic() {
echo "TODO"
return
}
oktorun() {
if [ $OKTORUN -eq 0 ]; then
EXIT_REASON="OKTORUN is false, exiting"
return 1 # stop running
fi
if [ -n "$OPT_ITERATIONS" ] && [ $ITER -gt $OPT_ITERATIONS ]; then
EXIT_REASON="No more iterations, exiting"
return 1 # stop running
fi
return 0 # continue running
}
sleep_ok() {
local seconds="$1"
local msg="${2:-""}"
if oktorun; then
if [ -n "$msg" ]; then
log "$msg"
fi
sleep $seconds
fi
}
purge_samples() {
local dir="$1"
local retention_time="$2"
# Delete collect files which more than --retention-time days old.
find "$dir" -type f -mtime +$retention_time -exec rm -f '{}' \;
local oprofile_dir="/var/lib/oprofile/samples"
if [ -d "$oprofile_dir" ]; then
# "pt_collect_" here needs to match $CMD_OPCONTROL --save=pt_collect_$p
# in collect(). TODO: fix this
find "$oprofile_dir" -type d -name 'pt_collect_*' \
-depth -mtime +$retention_time -exec rm -f '{}' \;
fi
}
sigtrap() {
if [ $OKTORUN -eq 1 ]; then
warn "Caught signal, exiting"
OKTORUN=0
else
warn "Caught signal again, forcing exit"
exit $EXIT_STATUS
fi
}
stalk() {
local cycles_true=0 # increment each time check is true, else set to 0
local matched="no" # set to "yes" when check is true
local last_prefix="" # prefix of last collection
while oktorun; do
# Run the trigger which returns the value of whatever is being
# checked. When the value is > --threshold for at least --cycle
# consecutive times, start collecting.
local value=$($TRIGGER_FUNCTION $OPT_VARIABLE)
local trg_exit_status=$?
if [ -z "$value" ]; then
# No value. Maybe we failed to connect to MySQL?
warn "Detected value is empty; something failed? Trigger exit status: $trg_exit_status"
matched="no"
cycles_true=0
elif [ $value -gt $OPT_THRESHOLD ]; then
matched="yes"
cycles_true=$(($cycles_true + 1))
else
matched="no"
cycles_true=0
fi
local msg="Check results: $OPT_VARIABLE=$value, matched=$matched, cycles_true=$cycles_true"
log "$msg"
if [ "$matched" = "yes" -a $cycles_true -ge $OPT_CYCLES ]; then
# ##################################################################
# Start collecting, maybe.
# ##################################################################
local prefix="${OPT_PREFIX:-$(date +%F-%T | tr :- _)}"
log "Collect triggered"
# Check if we'll have enough disk space to collect. Disk space
# is also checked every interval while collecting.
local margin="20" # default 20M margin, unless:
if [ -n "$last_prefix" ]; then
margin=$(du -mc "$OPT_DEST"/"$last_prefix"-* | tail -n 1 | awk '{print $1'})
fi
disk_space "$OPT_DEST" > "$OPT_DEST/$prefix-disk-space"
check_disk_space \
"$OPT_DEST/$prefix-disk-space" \
"$OPT_DISK_BYTE_LIMIT" \
"$OPT_DISK_PCT_LIMIT" \
"$margin" # real used MB + margin MB
if [ $? -eq 0 ]; then
# There should be enough disk space, so collect.
log "$msg" >> "$OPT_DEST/$prefix-trigger"
last_prefix="$prefix"
# Send email to whomever that collect has been triggered.
if [ "$OPT_NOTIFY_BY_EMAIL" ]; then
echo "$msg on $(hostname)" \
| mail -s "Collect triggered on $(hostname)" \
"$OPT_NOTIFY_BY_EMAIL"
fi
# Fork and background the collect subroutine which will
# run for --run-time seconds. We (the parent) sleep
# while its collecting (hopefully --sleep is longer than
# --run-time).
(
collect "$OPT_DEST" "$prefix"
) >> "$OPT_DEST/$prefix-output" 2>&1 &
else
# There will not be enough disk space, so do not collect.
warn "Collect canceled because there will not be enough disk space after collecting another $margin MB"
fi
# ##################################################################
# Done collecting.
# ##################################################################
ITER=$((ITER + 1))
sleep_ok "$OPT_SLEEP" "Sleeping $OPT_SLEEP seconds after collect"
else
# Trigger/check/value is ok, sleep until next check.
sleep_ok "$OPT_INTERVAL"
fi
# Purge old collect file between checks.
purge_samples "$OPT_DEST" "$OPT_RETENTION_TIME"
done
}
# ###########################################################################
# Main program loop, called below if tool is ran from the command line.
# ###########################################################################
main() {
trap sigtrap SIGHUP SIGINT SIGTERM
# Note: $$ is the parent's PID, but we're a child proc.
# Bash 4 has $BASHPID but we can't rely on that. Consequently,
# we don't know our own PID. See the usage of $! below.
log "$0 started"
# Make the collection dir exists.
if [ ! -d "$OPT_DEST" ]; then
mkdir -p "$OPT_DEST" || die "Cannot make --dest $OPT_DEST"
fi
# Check access to the --dest dir. By setting -x in the subshell,
# if either command fails, the subshell will exit immediately and
# $? will be non-zero.
(
set -e
touch "$OPT_DEST/test"
rm "$OPT_DEST/test"
)
if [ $? -ne 0 ]; then
die "Cannot read and write files to --dest $OPT_DEST"
fi
# Test if we have root; warn if not, but it isn't critical.
if [ "$(id -u)" != "0" ]; then
log 'Not running with root privileges!';
fi
# Make a secure tmpdir.
mk_tmpdir
# Set TRIGGER_FUNCTION based on --function.
set_trg_func
# Stalk while oktorun.
stalk
# Clean up.
rm_tmpdir
remove_pid_file "$OPT_PID"
log "$EXIT_REASON"
log "$0 exit status $EXIT_STATUS"
exit $EXIT_STATUS
}
# Execute the program if it was not included from another file.
# This makes it possible to include without executing, and thus test.
if [ "$(basename "$0")" = "pt-stalk" ] \
|| [ "$(basename "$0")" = "bash" -a "$_" = "$0" ]; then
# Check that mysql and mysqladmin are in PATH. If not, we're
# already dead in the water, so don't bother with cmd line opts,
# just error and exit.
[ -n "$(mysql --help)" ] \
|| die "Cannot execute mysql. Check that it is in PATH."
[ -n "$(mysqladmin --help)" ] \
|| die "Cannot execute mysqladmin. Check that it is in PATH."
# Parse command line options. We must do this first so we can
# see if --daemonize was specified.
mk_tmpdir
parse_options "$0" "$@"
usage_or_errors "$0"
po_status=$?
rm_tmpdir
if [ $po_status -ne 0 ]; then
exit $po_status
fi
# Now that we have the cmd line opts, check that we can actually
# connect to MySQL.
[ -n "$(mysql $EXT_ARGV -e 'SELECT 1')" ] \
|| die "Cannot connect to MySQL. Check that MySQL is running and that the options after -- are correct."
if [ "$OPT_DAEMONIZE" = "yes" ]; then
# Check access to the --log file.
(
set -e
touch "$OPT_LOG"
)
if [ $? -ne 0 ]; then
die "Cannot write to --log $OPT_LOG"
fi
# The PID file will at first have our (parent) PID.
# This is fine for ensuring that only one of us is
# running, but it's not fine if the user wants to use
# the PID in the PID file to check or kill the child
# process. So we'll need to update the PID file with
# the child's PID.
make_pid_file "$OPT_PID" $$
main "$@" </dev/null 1>>"$OPT_LOG" 2>&1 &
# Update PID file with the child's PID.
# The child PID is $BASHPID but that special var is only
# in Bash 4+, so we can't rely on it. Consequently, we
# use $! to get the PID of the child we just forked.
echo "$!" > "$OPT_PID"
else
make_pid_file "$OPT_PID" $$
main "$@"
fi
fi
# ############################################################################
# Documentation
# ############################################################################
:<<'DOCUMENTATION'
=pod
=head1 NAME
pt-stalk - Wait for a condition to occur then begin collecting data.
=head1 SYNOPSIS
Usage: pt-stalk [OPTIONS] [-- MYSQL OPTIONS]
pt-stalk watches for a condition to become true, and when it does, executes
a script. By default it executes L<pt-collect>, but that can be customized.
This tool is useful for gathering diagnostic data when an infrequent event
occurs, so an expert person can review the data later.
=head1 RISKS
The following section is included to inform users about the potential risks,
whether known or unknown, of using this tool. The two main categories of risks
are those created by the nature of the tool (e.g. read-only tools vs. read-write
tools) and those created by bugs.
pt-stalk is a read-only tool. It should be very low-risk.
At the time of this release, we know of no bugs that could cause serious harm
to users.
The authoritative source for updated information is always the online issue
tracking system. Issues that affect this tool will be marked as such. You can
see a list of such issues at the following URL:
L<http://www.percona.com/bugs/pt-stalk>.
See also L<"BUGS"> for more information on filing bugs and getting help.
=head1 DESCRIPTION
Although pt-stalk comes pre-configured to do a specific thing, in general
this tool is just a skeleton script for the following flow of actions:
=over
=item 1.
Loop infinitely, sleeping between iterations.
=item 2.
In each iteration, run some command and get the output.
=item 3.
If the command fails or the output is larger than the threshold,
execute the collection script; but do not execute if the destination disk
is too full.
=back
By default, the tool is configured to execute mysqladmin extended-status and
extract the value of the Threads_running variable; if this is greater than
25, it runs the collection script. This is really just placeholder code,
and almost certainly needs to be customized!
If the tool does execute the collection script, it will wait for a while
before checking and executing again. This is to prevent a continuous
condition from causing a huge number of executions to fire off.
The name 'stalk' is because 'watch' is already taken, and 'stalk' is fun.
=head1 CONFIGURING
TODO
=head1 OPTIONS
=over
=item --collect
default: yes; negatable: yes
Collect system information.
=item --collect-gdb
Collect GDB stacktraces.
=item --collect-oprofile
Collect oprofile data.
=item --collect-strace
Collect strace data.
=item --collect-tcpdump
Collect tcpdump data.
=item --cycles
type: int; default: 5
Number of times condition must be met before triggering collection.
=item --daemonize
Daemonize the tool.
=item --dest
type: string; default: ${HOME}/collected
Where to store collected data.
=item --disk-byte-limit
type: int; default: 100
Exit if the disk has less than this many MB free.
=item --disk-pct-limit
type: int; default: 5
Exit if the disk is less than this %full.
=item --function
type: string; default: status
Built-in function name or plugin file name which returns the value of C<VARIABLE>.
Possible values are:
=over
=item * status
Grep the value of C<VARIABLE> from C<mysqladmin extended-status>.
=item * processlist
Count the number of processes in C<mysqladmin processlist> whose
C<VARIABLE> column matches C<MATCH>. For example:
TRIGGER_FUNCTION="processlist" \
VARIABLE="State" \
MATCH="statistics" \
THRESHOLD="10"
The above triggers when more than 10 processes are in the "statistics" state.
C<MATCH> must be specified for this trigger function.
=item * magic
TODO
=item * plugin file name
A plugin file allows you to specify a custom trigger function. The plugin
file must contain a function called C<trg_plugin>. For example:
trg_plugin() {
# Do some stuff.
echo "$value"
}
The last output if the function (its "return value") must be a number.
This number is compared to C<THRESHOLD>. All L<"ENVIRONMENT"> variables
are available to the function.
Do not alter the tool's existing global variables. Prefix any plugin-specific
global variables with "PLUGIN_".
=back
=item --help
Print help and exit.
=item --interval
type: int; default: 1
Interval between checks.
=item --iterations
type: int
Exit after triggering C<pt-collect> this many times. By default, the tool
will collect as many times as it's triggered.
=item --log
type: string; default: /var/log/pt-stalk.log
Print all output to this file when daemonized.
=item --match
type: string
Match pattern for C<processlist> L<"--function">.
=item --notify-by-email
type: string
Send mail to this list of addresses when C<pt-collect> triggers.
=item --pid
type: string; default: /var/run/pt-stalk.pid
Create a PID file when daemonized.
=item --prefix
type: string
Collect file prefix.
If not specified, the current local time is used like C<2011_12_06_14_02_02>,
which is December 6, 2011 at 14:02:02.
=item --retention-time
type: int; default: 30
Remove samples after this many days.
=item --run-time
type: int; default: 30
How long to collect statistics data for?
Make sure that this isn't longer than SLEEP.
=item --sleep
type: int; default: 300
How long to sleep after collecting?
=item --threshold
type: int; default: 25
Max number of C<N> to tolerate.
=item --variable
type: string; default: Threads_running
This is the thing to check for.
=item --version
Print tool's version and exit.
=back
=head1 ENVIRONMENT
No env vars used.
=head1 SYSTEM REQUIREMENTS
This tool requires Bash v3 or newer.
=head1 BUGS
For a list of known bugs, see L<http://www.percona.com/bugs/pt-stalk>.
Please report bugs at L<https://bugs.launchpad.net/percona-toolkit>.
Include the following information in your bug report:
=over
=item * Complete command-line used to run the tool
=item * Tool L<"--version">
=item * MySQL version of all servers involved
=item * Output from the tool including STDERR
=item * Input files (log/dump/config files, etc.)
=back
If possible, include debugging output by running the tool with C<PTDEBUG>;
see L<"ENVIRONMENT">.
=head1 DOWNLOADING
Visit L<http://www.percona.com/software/percona-toolkit/> to download the
latest release of Percona Toolkit. Or, get the latest release from the
command line:
wget percona.com/get/percona-toolkit.tar.gz
wget percona.com/get/percona-toolkit.rpm
wget percona.com/get/percona-toolkit.deb
You can also get individual tools from the latest release:
wget percona.com/get/TOOL
Replace C<TOOL> with the name of any tool.
=head1 AUTHORS
Baron Schwartz, Justin Swanhart, Fernando Ipar, and Daniel Nichter
=head1 ABOUT PERCONA TOOLKIT
This tool is part of Percona Toolkit, a collection of advanced command-line
tools developed by Percona for MySQL support and consulting. Percona Toolkit
was forked from two projects in June, 2011: Maatkit and Aspersa. Those
projects were created by Baron Schwartz and developed primarily by him and
Daniel Nichter, both of whom are employed by Percona. Visit
L<http://www.percona.com/software/> for more software developed by Percona.
=head1 COPYRIGHT, LICENSE, AND WARRANTY
This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc.
Feedback and improvements are welcome.
THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
systems, you can issue `man perlgpl' or `man perlartistic' to read these
licenses.
You should have received a copy of the GNU General Public License along with
this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA.
=head1 VERSION
pt-stalk 2.0.0
=cut
DOCUMENTATION