From ba4b4f3059ff2e69c78ec676ac7c2e180edaf75b Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 19 Jan 2012 11:51:24 -0700 Subject: [PATCH] Log how pt-stalk was ran. Update modules in tool. Tweak 'Starting' and 'Exiting' log lines. --- bin/pt-stalk | 107 +++++++++++++++++++++++++----------------- t/pt-stalk/pt-stalk.t | 9 +++- 2 files changed, 72 insertions(+), 44 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 312858bd..2fb45bfa 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -439,17 +439,17 @@ remove_pid_file() { set -u -CMD_GDB=${CMD_GDB:-"gdb"} -CMD_IOSTAT=${CMD_IOSTAT:-"iostat"} -CMD_MPSTAT=${CMD_MPSTAT:-"mpstat"} -CMD_MYSQL=${CMD_MSSQL:-"mysql"} -CMD_MYSQLADMIN=${CMD_MYSQL_ADMIN:-"mysqladmin"} -CMD_OPCONTROL=${CMD_OPCONTROL:-"opcontrol"} -CMD_OPREPORT=${CMD_OPREPORT:-"opreport"} -CMD_PMAP=${CMD_PMAP:-"pmap"} -CMD_STRACE=${CMD_STRACE:-"strace"} -CMD_TCPDUMP=${CMD_TCPDUMP:-"tcpdump"} -CMD_VMSTAT=${CMD_VMSTAT:-"vmstat"} +CMD_GDB="$(which gdb)" +CMD_IOSTAT="$(which iostat)" +CMD_MPSTAT="$(which mpstat)" +CMD_MYSQL="$(which mysql)" +CMD_MYSQLADMIN="$(which mysqladmin)" +CMD_OPCONTROL="$(which opcontrol)" +CMD_OPREPORT="$(which opreport)" +CMD_PMAP="$(which pmap)" +CMD_STRACE="$(which strace)" +CMD_TCPDUMP="$(which tcpdump)" +CMD_VMSTAT="$(which vmstat)" collect() { local d="$1" # directory to save results in @@ -463,7 +463,7 @@ collect() { mysqld_pid=$(ps -eaf | grep 'mysql[d]' | grep -v mysqld_safe | awk '{print $2}' | head -n1); fi - if [ "$mysqld_pid" ]; then + if [ "$CMD_PMAP" -a "$mysqld_pid" ]; then if $CMD_PMAP --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then $CMD_PMAP -x $mysqld_pid > "$d/$p-pmap" else @@ -471,14 +471,12 @@ collect() { fi fi - if [ "$OPT_COLLECT_GDB" = "yes" -a "$mysqld_pid" ]; then + if [ "$CMD_GDB" -a "$OPT_COLLECT_GDB" = "yes" -a "$mysqld_pid" ]; then $CMD_GDB \ -ex "set pagination 0" \ -ex "thread apply all bt" \ --batch -p $mysqld_pid \ >> "$d/$p-stacktrace" - else - echo "GDB (--collect-gdb) was not enabled" >> "$d/$p-stacktrace" fi $CMD_MYSQL $EXT_ARGV -e 'SHOW GLOBAL VARIABLES' >> "$d/$p-variables" 2>&1 & @@ -512,7 +510,7 @@ collect() { open_tables >> "$d/$p-opentables1" 2>&1 & local tcpdump_pid="" - if [ "$OPT_COLLECT_TCPDUMP" = "yes" ]; then + if [ "$CMD_TCPDUMP" -a "$OPT_COLLECT_TCPDUMP" = "yes" ]; then local port=$(awk '/^port/{print $2}' "$d/$p-variables") if [ "$port" ]; then $CMD_TCPDUMP -i any -s 4096 -w "$d/$p-tcpdump" port ${port} & @@ -521,26 +519,32 @@ collect() { fi local have_oprofile="no" - if [ "$OPT_COLLECT_OPROFILE" = "yes" ]; then + if [ "$CMD_OPCONTROL" -a "$OPT_COLLECT_OPROFILE" = "yes" ]; then if $CMD_OPCONTROL --init; then $CMD_OPCONTROL --start --no-vmlinux have_oprofile="yes" fi - elif [ "$OPT_COLLECT_STRACE" = "yes" ]; then + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" = "yes" ]; then $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" 2>&1 & local strace_pid=$! fi - ps -eaf >> "$d/$p-ps" 2>&1 & - sysctl -a >> "$d/$p-sysctl" 2>&1 & - top -bn1 >> "$d/$p-top" 2>&1 & - $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" 2>&1 & - $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" 2>&1 & - $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" 2>&1 & - $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" 2>&1 & - $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" 2>&1 & - $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" 2>&1 & - lsof -nP -p $mysqld_pid -bw >> "$d/$p-lsof" 2>&1 & + ps -eaf >> "$d/$p-ps" 2>&1 & + sysctl -a >> "$d/$p-sysctl" 2>&1 & + top -bn1 >> "$d/$p-top" 2>&1 & + lsof -nP -p $mysqld_pid -bw >> "$d/$p-lsof" 2>&1 & + if [ "$CMD_VMSTAT" ]; then + $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" 2>&1 & + $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" 2>&1 & + fi + if [ "$CMD_IOSTAT" ]; then + $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" 2>&1 & + $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" 2>&1 & + fi + if [ "$CMD_MPSTAT" ]; then + $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" 2>&1 & + $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" 2>&1 & + fi $CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" 2>&1 & local mysqladmin_pid=$! @@ -564,15 +568,29 @@ collect() { sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}') local ts="$(date +"TS %s.%N %F %T")" - (cat /proc/diskstats 2>&1; echo $ts) >> "$d/$p-diskstats" & - (cat /proc/stat 2>&1; echo $ts) >> "$d/$p-procstat" & - (cat /proc/vmstat 2>&1; echo $ts) >> "$d/$p-procvmstat" & - (cat /proc/meminfo 2>&1; echo $ts) >> "$d/$p-meminfo" & - (cat /proc/slabinfo 2>&1; echo $ts) >> "$d/$p-slabinfo" & - (cat /proc/interrupts 2>&1; echo $ts) >> "$d/$p-interrupts" & - (df -h 2>&1; echo $ts) >> "$d/$p-df" & - (netstat -antp 2>&1; echo $ts) >> "$d/$p-netstat" & - (netstat -s 2>&1; echo $ts) >> "$d/$p-netstat_s" & + if [ -d "/proc" ]; then + if [ -f "/proc/diskstats" ]; then + (cat /proc/diskstats 2>&1; echo $ts) >> "$d/$p-diskstats" & + fi + if [ -f "/proc/stat" ]; then + (cat /proc/stat 2>&1; echo $ts) >> "$d/$p-procstat" & + fi + if [ -f "/proc/vmstat" ]; then + (cat /proc/vmstat 2>&1; echo $ts) >> "$d/$p-procvmstat" & + fi + if [ -f "/proc/meminfo" ]; then + (cat /proc/meminfo 2>&1; echo $ts) >> "$d/$p-meminfo" & + fi + if [ -f "/proc/slabinfo" ]; then + (cat /proc/slabinfo 2>&1; echo $ts) >> "$d/$p-slabinfo" & + fi + if [ -f "/proc/interrupts" ]; then + (cat /proc/interrupts 2>&1; echo $ts) >> "$d/$p-interrupts" & + fi + fi + (df -h 2>&1; echo $ts) >> "$d/$p-df" & + (netstat -antp 2>&1; echo $ts) >> "$d/$p-netstat" & + (netstat -s 2>&1; echo $ts) >> "$d/$p-netstat_s" & ($CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G" 2>&1; echo $ts) \ >> "$d/$p-processlist" @@ -605,7 +623,7 @@ collect() { "/path/to/mysqld'" \ > "$d/$p-opreport" fi - elif [ "$OPT_COLLECT_STRACE" = "yes" ]; then + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" = "yes" ]; then kill -s 2 $strace_pid sleep 1 kill -s 15 $strace_pid @@ -672,6 +690,7 @@ lock_waits() { # ########################################################################### # Global variables # ########################################################################### +RAN_WITH="" EXIT_REASON="" TOOL=$(basename $0) OKTORUN=1 @@ -758,12 +777,12 @@ trg_magic() { oktorun() { if [ $OKTORUN -eq 0 ]; then - EXIT_REASON="OKTORUN is false, exiting" + EXIT_REASON="OKTORUN is false" return 1 # stop running fi if [ -n "$OPT_ITERATIONS" ] && [ $ITER -gt $OPT_ITERATIONS ]; then - EXIT_REASON="No more iterations, exiting" + EXIT_REASON="no more iterations" return 1 # stop running fi @@ -856,7 +875,8 @@ stalk() { "$margin" # real used MB + margin MB if [ $? -eq 0 ]; then # There should be enough disk space, so collect. - log "$msg" >> "$OPT_DEST/$prefix-trigger" + log "$msg" >> "$OPT_DEST/$prefix-trigger" + log "pt-stalk ran with $RAN_WITH" >> "$OPT_DEST/$prefix-trigger" last_prefix="$prefix" # Send email to whomever that collect has been triggered. @@ -903,7 +923,8 @@ main() { # Note: $$ is the parent's PID, but we're a child proc. # Bash 4 has $BASHPID but we can't rely on that. Consequently, # we don't know our own PID. See the usage of $! below. - log "$0 started" + RAN_WITH="--function=$OPT_FUNCTION --variable=$OPT_VARIABLE --threshold=$OPT_THRESHOLD --match=$OPT_MATCH --cycles=$OPT_CYCLES --interval=$OPT_INTERVAL --iterations=$OPT_ITERATIONS --run-time=$OPT_RUN_TIME --sleep=$OPT_SLEEP --dest=$OPT_DEST --prefix=$OPT_PREFIX --notify-by-email=$OPT_NOTIFY_BY_EMAIL --log=$OPT_LOG --pid=$OPT_PID" + log "Starting $0 $RAN_WITH" # Make the collection dir exists. if [ ! -d "$OPT_DEST" ]; then @@ -939,7 +960,7 @@ main() { rm_tmpdir remove_pid_file "$OPT_PID" - log "$EXIT_REASON" + log "Exiting because $EXIT_REASON" log "$0 exit status $EXIT_STATUS" exit $EXIT_STATUS } diff --git a/t/pt-stalk/pt-stalk.t b/t/pt-stalk/pt-stalk.t index a8e7da87..d1ac37a2 100644 --- a/t/pt-stalk/pt-stalk.t +++ b/t/pt-stalk/pt-stalk.t @@ -23,7 +23,7 @@ if ( !$dbh ) { plan skip_all => 'Cannot connect to sandbox master'; } else { - plan tests => 14; + plan tests => 15; } my $cnf = "/tmp/12345/my.sandbox.cnf"; @@ -161,6 +161,13 @@ is( "pt-stalk is not running" ); +$output = `cat $dest/*-trigger`; +like( + $output, + qr/pt-stalk ran with --function=status --variable=Uptime --threshold=$threshold/, + "Trigger file logs how pt-stalk was ran" +); + # ############################################################################# # Done. # #############################################################################