Log how pt-stalk was ran. Update modules in tool. Tweak 'Starting' and 'Exiting' log lines.

This commit is contained in:
Daniel Nichter
2012-01-19 11:51:24 -07:00
parent 9e29cc205a
commit ba4b4f3059
2 changed files with 72 additions and 44 deletions

View File

@@ -439,17 +439,17 @@ remove_pid_file() {
set -u
CMD_GDB=${CMD_GDB:-"gdb"}
CMD_IOSTAT=${CMD_IOSTAT:-"iostat"}
CMD_MPSTAT=${CMD_MPSTAT:-"mpstat"}
CMD_MYSQL=${CMD_MSSQL:-"mysql"}
CMD_MYSQLADMIN=${CMD_MYSQL_ADMIN:-"mysqladmin"}
CMD_OPCONTROL=${CMD_OPCONTROL:-"opcontrol"}
CMD_OPREPORT=${CMD_OPREPORT:-"opreport"}
CMD_PMAP=${CMD_PMAP:-"pmap"}
CMD_STRACE=${CMD_STRACE:-"strace"}
CMD_TCPDUMP=${CMD_TCPDUMP:-"tcpdump"}
CMD_VMSTAT=${CMD_VMSTAT:-"vmstat"}
CMD_GDB="$(which gdb)"
CMD_IOSTAT="$(which iostat)"
CMD_MPSTAT="$(which mpstat)"
CMD_MYSQL="$(which mysql)"
CMD_MYSQLADMIN="$(which mysqladmin)"
CMD_OPCONTROL="$(which opcontrol)"
CMD_OPREPORT="$(which opreport)"
CMD_PMAP="$(which pmap)"
CMD_STRACE="$(which strace)"
CMD_TCPDUMP="$(which tcpdump)"
CMD_VMSTAT="$(which vmstat)"
collect() {
local d="$1" # directory to save results in
@@ -463,7 +463,7 @@ collect() {
mysqld_pid=$(ps -eaf | grep 'mysql[d]' | grep -v mysqld_safe | awk '{print $2}' | head -n1);
fi
if [ "$mysqld_pid" ]; then
if [ "$CMD_PMAP" -a "$mysqld_pid" ]; then
if $CMD_PMAP --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then
$CMD_PMAP -x $mysqld_pid > "$d/$p-pmap"
else
@@ -471,14 +471,12 @@ collect() {
fi
fi
if [ "$OPT_COLLECT_GDB" = "yes" -a "$mysqld_pid" ]; then
if [ "$CMD_GDB" -a "$OPT_COLLECT_GDB" = "yes" -a "$mysqld_pid" ]; then
$CMD_GDB \
-ex "set pagination 0" \
-ex "thread apply all bt" \
--batch -p $mysqld_pid \
>> "$d/$p-stacktrace"
else
echo "GDB (--collect-gdb) was not enabled" >> "$d/$p-stacktrace"
fi
$CMD_MYSQL $EXT_ARGV -e 'SHOW GLOBAL VARIABLES' >> "$d/$p-variables" 2>&1 &
@@ -512,7 +510,7 @@ collect() {
open_tables >> "$d/$p-opentables1" 2>&1 &
local tcpdump_pid=""
if [ "$OPT_COLLECT_TCPDUMP" = "yes" ]; then
if [ "$CMD_TCPDUMP" -a "$OPT_COLLECT_TCPDUMP" = "yes" ]; then
local port=$(awk '/^port/{print $2}' "$d/$p-variables")
if [ "$port" ]; then
$CMD_TCPDUMP -i any -s 4096 -w "$d/$p-tcpdump" port ${port} &
@@ -521,26 +519,32 @@ collect() {
fi
local have_oprofile="no"
if [ "$OPT_COLLECT_OPROFILE" = "yes" ]; then
if [ "$CMD_OPCONTROL" -a "$OPT_COLLECT_OPROFILE" = "yes" ]; then
if $CMD_OPCONTROL --init; then
$CMD_OPCONTROL --start --no-vmlinux
have_oprofile="yes"
fi
elif [ "$OPT_COLLECT_STRACE" = "yes" ]; then
elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" = "yes" ]; then
$CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" 2>&1 &
local strace_pid=$!
fi
ps -eaf >> "$d/$p-ps" 2>&1 &
sysctl -a >> "$d/$p-sysctl" 2>&1 &
top -bn1 >> "$d/$p-top" 2>&1 &
$CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" 2>&1 &
$CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" 2>&1 &
$CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" 2>&1 &
$CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" 2>&1 &
$CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" 2>&1 &
$CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" 2>&1 &
lsof -nP -p $mysqld_pid -bw >> "$d/$p-lsof" 2>&1 &
ps -eaf >> "$d/$p-ps" 2>&1 &
sysctl -a >> "$d/$p-sysctl" 2>&1 &
top -bn1 >> "$d/$p-top" 2>&1 &
lsof -nP -p $mysqld_pid -bw >> "$d/$p-lsof" 2>&1 &
if [ "$CMD_VMSTAT" ]; then
$CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" 2>&1 &
$CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" 2>&1 &
fi
if [ "$CMD_IOSTAT" ]; then
$CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" 2>&1 &
$CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" 2>&1 &
fi
if [ "$CMD_MPSTAT" ]; then
$CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" 2>&1 &
$CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" 2>&1 &
fi
$CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" 2>&1 &
local mysqladmin_pid=$!
@@ -564,15 +568,29 @@ collect() {
sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}')
local ts="$(date +"TS %s.%N %F %T")"
(cat /proc/diskstats 2>&1; echo $ts) >> "$d/$p-diskstats" &
(cat /proc/stat 2>&1; echo $ts) >> "$d/$p-procstat" &
(cat /proc/vmstat 2>&1; echo $ts) >> "$d/$p-procvmstat" &
(cat /proc/meminfo 2>&1; echo $ts) >> "$d/$p-meminfo" &
(cat /proc/slabinfo 2>&1; echo $ts) >> "$d/$p-slabinfo" &
(cat /proc/interrupts 2>&1; echo $ts) >> "$d/$p-interrupts" &
(df -h 2>&1; echo $ts) >> "$d/$p-df" &
(netstat -antp 2>&1; echo $ts) >> "$d/$p-netstat" &
(netstat -s 2>&1; echo $ts) >> "$d/$p-netstat_s" &
if [ -d "/proc" ]; then
if [ -f "/proc/diskstats" ]; then
(cat /proc/diskstats 2>&1; echo $ts) >> "$d/$p-diskstats" &
fi
if [ -f "/proc/stat" ]; then
(cat /proc/stat 2>&1; echo $ts) >> "$d/$p-procstat" &
fi
if [ -f "/proc/vmstat" ]; then
(cat /proc/vmstat 2>&1; echo $ts) >> "$d/$p-procvmstat" &
fi
if [ -f "/proc/meminfo" ]; then
(cat /proc/meminfo 2>&1; echo $ts) >> "$d/$p-meminfo" &
fi
if [ -f "/proc/slabinfo" ]; then
(cat /proc/slabinfo 2>&1; echo $ts) >> "$d/$p-slabinfo" &
fi
if [ -f "/proc/interrupts" ]; then
(cat /proc/interrupts 2>&1; echo $ts) >> "$d/$p-interrupts" &
fi
fi
(df -h 2>&1; echo $ts) >> "$d/$p-df" &
(netstat -antp 2>&1; echo $ts) >> "$d/$p-netstat" &
(netstat -s 2>&1; echo $ts) >> "$d/$p-netstat_s" &
($CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G" 2>&1; echo $ts) \
>> "$d/$p-processlist"
@@ -605,7 +623,7 @@ collect() {
"/path/to/mysqld'" \
> "$d/$p-opreport"
fi
elif [ "$OPT_COLLECT_STRACE" = "yes" ]; then
elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" = "yes" ]; then
kill -s 2 $strace_pid
sleep 1
kill -s 15 $strace_pid
@@ -672,6 +690,7 @@ lock_waits() {
# ###########################################################################
# Global variables
# ###########################################################################
RAN_WITH=""
EXIT_REASON=""
TOOL=$(basename $0)
OKTORUN=1
@@ -758,12 +777,12 @@ trg_magic() {
oktorun() {
if [ $OKTORUN -eq 0 ]; then
EXIT_REASON="OKTORUN is false, exiting"
EXIT_REASON="OKTORUN is false"
return 1 # stop running
fi
if [ -n "$OPT_ITERATIONS" ] && [ $ITER -gt $OPT_ITERATIONS ]; then
EXIT_REASON="No more iterations, exiting"
EXIT_REASON="no more iterations"
return 1 # stop running
fi
@@ -856,7 +875,8 @@ stalk() {
"$margin" # real used MB + margin MB
if [ $? -eq 0 ]; then
# There should be enough disk space, so collect.
log "$msg" >> "$OPT_DEST/$prefix-trigger"
log "$msg" >> "$OPT_DEST/$prefix-trigger"
log "pt-stalk ran with $RAN_WITH" >> "$OPT_DEST/$prefix-trigger"
last_prefix="$prefix"
# Send email to whomever that collect has been triggered.
@@ -903,7 +923,8 @@ main() {
# Note: $$ is the parent's PID, but we're a child proc.
# Bash 4 has $BASHPID but we can't rely on that. Consequently,
# we don't know our own PID. See the usage of $! below.
log "$0 started"
RAN_WITH="--function=$OPT_FUNCTION --variable=$OPT_VARIABLE --threshold=$OPT_THRESHOLD --match=$OPT_MATCH --cycles=$OPT_CYCLES --interval=$OPT_INTERVAL --iterations=$OPT_ITERATIONS --run-time=$OPT_RUN_TIME --sleep=$OPT_SLEEP --dest=$OPT_DEST --prefix=$OPT_PREFIX --notify-by-email=$OPT_NOTIFY_BY_EMAIL --log=$OPT_LOG --pid=$OPT_PID"
log "Starting $0 $RAN_WITH"
# Make the collection dir exists.
if [ ! -d "$OPT_DEST" ]; then
@@ -939,7 +960,7 @@ main() {
rm_tmpdir
remove_pid_file "$OPT_PID"
log "$EXIT_REASON"
log "Exiting because $EXIT_REASON"
log "$0 exit status $EXIT_STATUS"
exit $EXIT_STATUS
}