#!/usr/bin/env bash # This program is part of Percona Toolkit: http://www.percona.com/software/ # See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal # notices and disclaimers. set -u # ########################################################################### # log_warn_die package # This package is a copy without comments from the original. The original # with comments and its test file can be found in the Bazaar repository at, # lib/bash/log_warn_die.sh # t/lib/bash/log_warn_die.sh # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### set -u PTFUNCNAME="" PTDEBUG="${PTDEBUG:-""}" EXIT_STATUS=0 ts() { TS=$(date +%F-%T | tr ':-' '_') echo "$TS $*" } info() { [ ${OPT_VERBOSE:-3} -ge 3 ] && ts "$*" } log() { [ ${OPT_VERBOSE:-3} -ge 2 ] && ts "$*" } warn() { [ ${OPT_VERBOSE:-3} -ge 1 ] && ts "$*" >&2 EXIT_STATUS=1 } die() { ts "$*" >&2 EXIT_STATUS=1 exit 1 } _d () { [ "$PTDEBUG" ] && echo "# $PTFUNCNAME: $(ts "$*")" >&2 } # ########################################################################### # End log_warn_die package # ########################################################################### # ########################################################################### # parse_options package # This package is a copy without comments from the original. The original # with comments and its test file can be found in the Bazaar repository at, # lib/bash/parse_options.sh # t/lib/bash/parse_options.sh # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### set -u ARGV="" # Non-option args (probably input files) EXT_ARGV="" # Everything after -- (args for an external command) HAVE_EXT_ARGV="" # Got --, everything else is put into EXT_ARGV OPT_ERRS=0 # How many command line option errors OPT_VERSION="" # If --version was specified OPT_HELP="" # If --help was specified PO_DIR="" # Directory with program option spec files usage() { local file="$1" local usage="$(grep '^Usage: ' "$file")" echo $usage echo echo "For more information, 'man $TOOL' or 'perldoc $file'." } usage_or_errors() { local file="$1" if [ "$OPT_VERSION" ]; then local version=$(grep '^pt-[^ ]\+ [0-9]' "$file") echo "$version" return 1 fi if [ "$OPT_HELP" ]; then usage "$file" echo echo "Command line options:" echo perl -e ' use strict; use warnings FATAL => qw(all); my $lcol = 20; # Allow this much space for option names. my $rcol = 80 - $lcol; # The terminal is assumed to be 80 chars wide. my $name; while ( <> ) { my $line = $_; chomp $line; if ( $line =~ s/^long:/ --/ ) { $name = $line; } elsif ( $line =~ s/^desc:// ) { $line =~ s/ +$//mg; my @lines = grep { $_ } $line =~ m/(.{0,$rcol})(?:\s+|\Z)/g; if ( length($name) >= $lcol ) { print $name, "\n", (q{ } x $lcol); } else { printf "%-${lcol}s", $name; } print join("\n" . (q{ } x $lcol), @lines); print "\n"; } } ' "$PO_DIR"/* echo echo "Options and values after processing arguments:" echo ( cd "$PO_DIR" for opt in *; do local varname="OPT_$(echo "$opt" | tr a-z- A-Z_)" eval local varvalue=\$$varname if ! grep -q "type:" "$PO_DIR/$opt" >/dev/null; then if [ "$varvalue" -a "$varvalue" = "yes" ]; then varvalue="TRUE" else varvalue="FALSE" fi fi printf -- " --%-30s %s" "$opt" "${varvalue:-(No value)}" echo done ) return 1 fi if [ $OPT_ERRS -gt 0 ]; then echo usage "$file" return 1 fi return 0 } option_error() { local err="$1" OPT_ERRS=$(($OPT_ERRS + 1)) echo "$err" >&2 } parse_options() { local file="$1" shift ARGV="" EXT_ARGV="" HAVE_EXT_ARGV="" OPT_ERRS=0 OPT_VERSION="" OPT_HELP="" PO_DIR="$PT_TMPDIR/po" if [ ! -d "$PO_DIR" ]; then mkdir "$PO_DIR" if [ $? -ne 0 ]; then echo "Cannot mkdir $PO_DIR" >&2 exit 1 fi fi rm -rf "$PO_DIR"/* if [ $? -ne 0 ]; then echo "Cannot rm -rf $PO_DIR/*" >&2 exit 1 fi _parse_pod "$file" # Parse POD into program option (po) spec files _eval_po # Eval po into existence with default values if [ $# -ge 2 ] && [ "$1" = "--config" ]; then shift # --config local user_config_files="$1" shift # that ^ local IFS="," for user_config_file in $user_config_files; do _parse_config_files "$user_config_file" done else _parse_config_files "/etc/percona-toolkit/percona-toolkit.conf" "/etc/percona-toolkit/$TOOL.conf" "$HOME/.percona-toolkit.conf" "$HOME/.$TOOL.conf" fi _parse_command_line "${@:-""}" } _parse_pod() { local file="$1" cat "$file" | PO_DIR="$PO_DIR" perl -ne ' BEGIN { $/ = ""; } next unless $_ =~ m/^=head1 OPTIONS/; while ( defined(my $para = <>) ) { last if $para =~ m/^=head1/; chomp; if ( $para =~ m/^=item --(\S+)/ ) { my $opt = $1; my $file = "$ENV{PO_DIR}/$opt"; open my $opt_fh, ">", $file or die "Cannot open $file: $!"; print $opt_fh "long:$opt\n"; $para = <>; chomp; if ( $para =~ m/^[a-z ]+:/ ) { map { chomp; my ($attrib, $val) = split(/: /, $_); print $opt_fh "$attrib:$val\n"; } split(/; /, $para); $para = <>; chomp; } my ($desc) = $para =~ m/^([^?.]+)/; print $opt_fh "desc:$desc.\n"; close $opt_fh; } } last; ' } _eval_po() { local IFS=":" for opt_spec in "$PO_DIR"/*; do local opt="" local default_val="" local neg=0 local size=0 while read key val; do case "$key" in long) opt=$(echo $val | sed 's/-/_/g' | tr '[:lower:]' '[:upper:]') ;; default) default_val="$val" ;; "short form") ;; type) [ "$val" = "size" ] && size=1 ;; desc) ;; negatable) if [ "$val" = "yes" ]; then neg=1 fi ;; *) echo "Invalid attribute in $opt_spec: $line" >&2 exit 1 esac done < "$opt_spec" if [ -z "$opt" ]; then echo "No long attribute in option spec $opt_spec" >&2 exit 1 fi if [ $neg -eq 1 ]; then if [ -z "$default_val" ] || [ "$default_val" != "yes" ]; then echo "Option $opt_spec is negatable but not default: yes" >&2 exit 1 fi fi if [ $size -eq 1 -a -n "$default_val" ]; then default_val=$(size_to_bytes $default_val) fi eval "OPT_${opt}"="$default_val" done } _parse_config_files() { for config_file in "${@:-""}"; do test -f "$config_file" || continue while read config_opt; do echo "$config_opt" | grep '^[ ]*[^#]' >/dev/null 2>&1 || continue config_opt="$(echo "$config_opt" | sed -e 's/^ *//g' -e 's/ *$//g' -e 's/[ ]*=[ ]*/=/' -e 's/[ ]*#.*$//')" [ "$config_opt" = "" ] && continue if ! [ "$HAVE_EXT_ARGV" ]; then config_opt="--$config_opt" fi _parse_command_line "$config_opt" done < "$config_file" HAVE_EXT_ARGV="" # reset for each file done } _parse_command_line() { local opt="" local val="" local next_opt_is_val="" local opt_is_ok="" local opt_is_negated="" local real_opt="" local required_arg="" local spec="" for opt in "${@:-""}"; do if [ "$opt" = "--" -o "$opt" = "----" ]; then HAVE_EXT_ARGV=1 continue fi if [ "$HAVE_EXT_ARGV" ]; then if [ "$EXT_ARGV" ]; then EXT_ARGV="$EXT_ARGV $opt" else EXT_ARGV="$opt" fi continue fi if [ "$next_opt_is_val" ]; then next_opt_is_val="" if [ $# -eq 0 ] || [ $(expr "$opt" : "\-") -eq 1 ]; then option_error "$real_opt requires a $required_arg argument" continue fi val="$opt" opt_is_ok=1 else if [ $(expr "$opt" : "\-") -eq 0 ]; then if [ -z "$ARGV" ]; then ARGV="$opt" else ARGV="$ARGV $opt" fi continue fi real_opt="$opt" if $(echo $opt | grep '^--no[^-]' >/dev/null); then local base_opt=$(echo $opt | sed 's/^--no//') if [ -f "$PT_TMPDIR/po/$base_opt" ]; then opt_is_negated=1 opt="$base_opt" else opt_is_negated="" opt=$(echo $opt | sed 's/^-*//') fi else if $(echo $opt | grep '^--no-' >/dev/null); then opt_is_negated=1 opt=$(echo $opt | sed 's/^--no-//') else opt_is_negated="" opt=$(echo $opt | sed 's/^-*//') fi fi if $(echo $opt | grep '^[a-z-][a-z-]*=' >/dev/null 2>&1); then val="$(echo $opt | awk -F= '{print $2}')" opt="$(echo $opt | awk -F= '{print $1}')" fi if [ -f "$PT_TMPDIR/po/$opt" ]; then spec="$PT_TMPDIR/po/$opt" else spec=$(grep "^short form:-$opt\$" "$PT_TMPDIR"/po/* | cut -d ':' -f 1) if [ -z "$spec" ]; then option_error "Unknown option: $real_opt" continue fi fi required_arg=$(cat "$spec" | awk -F: '/^type:/{print $2}') if [ "$required_arg" ]; then if [ "$val" ]; then opt_is_ok=1 else next_opt_is_val=1 fi else if [ "$val" ]; then option_error "Option $real_opt does not take a value" continue fi if [ "$opt_is_negated" ]; then val="" else val="yes" fi opt_is_ok=1 fi fi if [ "$opt_is_ok" ]; then opt=$(cat "$spec" | grep '^long:' | cut -d':' -f2 | sed 's/-/_/g' | tr '[:lower:]' '[:upper:]') if grep "^type:size" "$spec" >/dev/null; then val=$(size_to_bytes $val) fi eval "OPT_$opt"="'$val'" opt="" val="" next_opt_is_val="" opt_is_ok="" opt_is_negated="" real_opt="" required_arg="" spec="" fi done } size_to_bytes() { local size="$1" echo $size | perl -ne '%f=(B=>1, K=>1_024, M=>1_048_576, G=>1_073_741_824, T=>1_099_511_627_776); m/^(\d+)([kMGT])?/i; print $1 * $f{uc($2 || "B")};' } # ########################################################################### # End parse_options package # ########################################################################### # ########################################################################### # mysql_options package # This package is a copy without comments from the original. The original # with comments and its test file can be found in the Bazaar repository at, # lib/bash/mysql_options.sh # t/lib/bash/mysql_options.sh # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### set -u mysql_options() { local MYSQL_ARGS="" if [ -n "$OPT_DEFAULTS_FILE" ]; then MYSQL_ARGS="--defaults-file=$OPT_DEFAULTS_FILE" fi if [ -n "$OPT_PORT" ]; then MYSQL_ARGS="$MYSQL_ARGS --port=$OPT_PORT" fi if [ -n "$OPT_SOCKET" ]; then MYSQL_ARGS="$MYSQL_ARGS --socket=$OPT_SOCKET" fi if [ -n "$OPT_HOST" ]; then MYSQL_ARGS="$MYSQL_ARGS --host=$OPT_HOST" fi if [ -n "$OPT_USER" ]; then MYSQL_ARGS="$MYSQL_ARGS --user=$OPT_USER" fi if [ -n "$OPT_PASSWORD" ]; then MYSQL_ARGS="$MYSQL_ARGS --password=$OPT_PASSWORD" fi echo $MYSQL_ARGS } # ########################################################################### # End mysql_options package # ########################################################################### # ########################################################################### # tmpdir package # This package is a copy without comments from the original. The original # with comments and its test file can be found in the Bazaar repository at, # lib/bash/tmpdir.sh # t/lib/bash/tmpdir.sh # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### set -u PT_TMPDIR="" mk_tmpdir() { local dir="${1:-""}" if [ -n "$dir" ]; then if [ ! -d "$dir" ]; then mkdir "$dir" || die "Cannot make tmpdir $dir" fi PT_TMPDIR="$dir" else local tool="${0##*/}" local pid="$$" PT_TMPDIR=`mktemp -d -t "${tool}.${pid}.XXXXXX"` \ || die "Cannot make secure tmpdir" fi } rm_tmpdir() { if [ -n "$PT_TMPDIR" ] && [ -d "$PT_TMPDIR" ]; then rm -rf "$PT_TMPDIR" fi PT_TMPDIR="" } # ########################################################################### # End tmpdir package # ########################################################################### # ########################################################################### # alt_cmds package # This package is a copy without comments from the original. The original # with comments and its test file can be found in the Bazaar repository at, # lib/bash/alt_cmds.sh # t/lib/bash/alt_cmds.sh # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### set -u _seq() { local i="$1" awk "BEGIN { for(i=1; i<=$i; i++) print i; }" } _pidof() { local cmd="$1" if ! pidof "$cmd" 2>/dev/null; then ps -eo pid,ucomm | awk -v comm="$cmd" '$2 == comm { print $1 }' fi } _lsof() { local pid="$1" if ! lsof -p $pid 2>/dev/null; then /bin/ls -l /proc/$pid/fd 2>/dev/null fi } _which() { if [ -x /usr/bin/which ]; then /usr/bin/which "$1" 2>/dev/null | awk '{print $1}' elif which which 1>/dev/null 2>&1; then which "$1" 2>/dev/null | awk '{print $1}' else echo "$1" fi } # ########################################################################### # End alt_cmds package # ########################################################################### # ########################################################################### # safeguards package # This package is a copy without comments from the original. The original # with comments and its test file can be found in the Bazaar repository at, # lib/bash/safeguards.sh # t/lib/bash/safeguards.sh # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### set -u disk_space() { local filesystem="${1:-$PWD}" df -P -k "$filesystem" } check_disk_space() { local file="$1" local min_free_bytes="${2:-0}" local min_free_pct="${3:-0}" local bytes_margin="${4:-0}" local used_bytes=$(perl -ane 'm!^/! && print $F[2] * 1024' "$file") local free_bytes=$(perl -ane 'm!^/! && print $F[3] * 1024' "$file") local pct_used=$(perl -ane 'm!^/! && print ($F[4] =~ m/(\d+)/)' "$file") local pct_free=$((100 - $pct_used)) local real_free_bytes=$free_bytes local real_pct_free=$pct_free if [ $bytes_margin -gt 0 ]; then used_bytes=$(($used_bytes + $bytes_margin)) free_bytes=$(($free_bytes - $bytes_margin)) pct_used=$(perl -e "print int(($used_bytes/($used_bytes + $free_bytes)) * 100)") pct_free=$((100 - $pct_used)) fi if [ $free_bytes -lt $min_free_bytes -o $pct_free -lt $min_free_pct ]; then warn "Not enough free disk space: Limit: ${min_free_pct}% free, ${min_free_bytes} bytes free Actual: ${real_pct_free}% free, ${real_free_bytes} bytes free (- $bytes_margin bytes margin) " cat "$file" >&2 return 1 # not enough disk space fi return 0 # disk space is OK } # ########################################################################### # End safeguards package # ########################################################################### # ########################################################################### # daemon package # This package is a copy without comments from the original. The original # with comments and its test file can be found in the Bazaar repository at, # lib/bash/daemon.sh # t/lib/bash/daemon.sh # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### set -u make_pid_file() { local file="$1" local pid="$2" if [ -f "$file" ]; then local old_pid=$(cat "$file") if [ -z "$old_pid" ]; then die "PID file $file already exists but it is empty" else kill -0 $old_pid 2>/dev/null if [ $? -eq 0 ]; then die "PID file $file already exists and its PID ($old_pid) is running" else echo "Overwriting PID file $file because its PID ($old_pid)" \ "is not running" fi fi fi echo "$pid" > "$file" if [ $? -ne 0 ]; then die "Cannot create or write PID file $file" fi } remove_pid_file() { local file="$1" if [ -f "$file" ]; then rm "$file" fi } # ########################################################################### # End daemon package # ########################################################################### # ########################################################################### # collect package # This package is a copy without comments from the original. The original # with comments and its test file can be found in the Bazaar repository at, # lib/bash/collect.sh # t/lib/bash/collect.sh # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### set -u CMD_GDB="${CMD_GDB:-"$(_which gdb)"}" CMD_IOSTAT="${CMD_IOSTAT:-"$(_which iostat)"}" CMD_MPSTAT="${CMD_MPSTAT:-"$(_which mpstat)"}" CMD_MYSQL="${CMD_MYSQL:-"$(_which mysql)"}" CMD_MYSQLADMIN="${CMD_MYSQLADMIN:-"$(_which mysqladmin)"}" CMD_OPCONTROL="${CMD_OPCONTROL:-"$(_which opcontrol)"}" CMD_OPREPORT="${CMD_OPREPORT:-"$(_which opreport)"}" CMD_PMAP="${CMD_PMAP:-"$(_which pmap)"}" CMD_STRACE="${CMD_STRACE:-"$(_which strace)"}" CMD_SYSCTL="${CMD_SYSCTL:-"$(_which sysctl)"}" CMD_TCPDUMP="${CMD_TCPDUMP:-"$(_which tcpdump)"}" CMD_VMSTAT="${CMD_VMSTAT:-"$(_which vmstat)"}" [ -z "$CMD_SYSCTL" -a -x "/sbin/sysctl" ] && CMD_SYSCTL="/sbin/sysctl" collect() { local d="$1" # directory to save results in local p="$2" # prefix for each result file local mysqld_pid=$(_pidof mysqld | awk '{print $1; exit;}') if [ "$CMD_PMAP" -a "$mysqld_pid" ]; then if $CMD_PMAP --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then $CMD_PMAP -x $mysqld_pid > "$d/$p-pmap" else $CMD_PMAP $mysqld_pid > "$d/$p-pmap" fi fi if [ "$CMD_GDB" -a "$OPT_COLLECT_GDB" -a "$mysqld_pid" ]; then $CMD_GDB \ -ex "set pagination 0" \ -ex "thread apply all bt" \ --batch -p $mysqld_pid \ >> "$d/$p-stacktrace" fi $CMD_MYSQL $EXT_ARGV -e 'SHOW GLOBAL VARIABLES' >> "$d/$p-variables" & sleep .2 local mysql_version="$(awk '/^version[^_]/{print substr($2,1,3)}' "$d/$p-variables")" local mysql_error_log="$(awk '/log_error/{print $2}' "$d/$p-variables")" if [ -z "$mysql_error_log" -a "$mysqld_pid" ]; then mysql_error_log="$(ls -l /proc/$mysqld_pid/fd | awk '/ 2 ->/{print $NF}')" fi local tail_error_log_pid="" if [ "$mysql_error_log" ]; then log "The MySQL error log seems to be $mysql_error_log" tail -f "$mysql_error_log" >"$d/$p-log_error" & tail_error_log_pid=$! $CMD_MYSQLADMIN $EXT_ARGV debug else log "Could not find the MySQL error log" fi local innostat="SHOW /*!40100 ENGINE*/ INNODB STATUS\G" if [ "${mysql_version}" '>' "5.1" ]; then local mutex="SHOW ENGINE INNODB MUTEX" else local mutex="SHOW MUTEX STATUS" fi $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus1" & $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status1" & open_tables >> "$d/$p-opentables1" & local tcpdump_pid="" if [ "$CMD_TCPDUMP" -a "$OPT_COLLECT_TCPDUMP" ]; then local port=$(awk '/^port/{print $2}' "$d/$p-variables") if [ "$port" ]; then $CMD_TCPDUMP -i any -s 4096 -w "$d/$p-tcpdump" port ${port} & tcpdump_pid=$! fi fi local have_oprofile="" if [ "$CMD_OPCONTROL" -a "$OPT_COLLECT_OPROFILE" ]; then if $CMD_OPCONTROL --init; then $CMD_OPCONTROL --start --no-vmlinux have_oprofile="yes" fi elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" -a "$mysqld_pid" ]; then $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" & local strace_pid=$! fi ps -eaf >> "$d/$p-ps" & top -bn1 >> "$d/$p-top" & [ "$mysqld_pid" ] && _lsof $mysqld_pid >> "$d/$p-lsof" & if [ "$CMD_SYSCTL" ]; then $CMD_SYSCTL -a >> "$d/$p-sysctl" & fi if [ "$CMD_VMSTAT" ]; then $CMD_VMSTAT 1 $OPT_RUN_TIME >> "$d/$p-vmstat" & $CMD_VMSTAT $OPT_RUN_TIME 2 >> "$d/$p-vmstat-overall" & fi if [ "$CMD_IOSTAT" ]; then $CMD_IOSTAT -dx 1 $OPT_RUN_TIME >> "$d/$p-iostat" & $CMD_IOSTAT -dx $OPT_RUN_TIME 2 >> "$d/$p-iostat-overall" & fi if [ "$CMD_MPSTAT" ]; then $CMD_MPSTAT -P ALL 1 $OPT_RUN_TIME >> "$d/$p-mpstat" & $CMD_MPSTAT -P ALL $OPT_RUN_TIME 1 >> "$d/$p-mpstat-overall" & fi $CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" & local mysqladmin_pid=$! local have_lock_waits_table="" $CMD_MYSQL $EXT_ARGV -e "SHOW TABLES FROM INFORMATION_SCHEMA" \ | grep -i "INNODB_LOCK_WAITS" >/dev/null 2>&1 if [ $? -eq 0 ]; then have_lock_waits_table="yes" fi log "Loop start: $(date +'TS %s.%N %F %T')" for loopno in $(_seq $OPT_RUN_TIME); do disk_space $d > $d/$p-disk-space check_disk_space \ $d/$p-disk-space \ "$OPT_DISK_BYTES_FREE" \ "$OPT_DISK_PCT_FREE" \ || break sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}') local ts="$(date +"TS %s.%N %F %T")" if [ -d "/proc" ]; then if [ -f "/proc/diskstats" ]; then (echo $ts; cat /proc/diskstats) >> "$d/$p-diskstats" & fi if [ -f "/proc/stat" ]; then (echo $ts; cat /proc/stat) >> "$d/$p-procstat" & fi if [ -f "/proc/vmstat" ]; then (echo $ts; cat /proc/vmstat) >> "$d/$p-procvmstat" & fi if [ -f "/proc/meminfo" ]; then (echo $ts; cat /proc/meminfo) >> "$d/$p-meminfo" & fi if [ -f "/proc/slabinfo" ]; then (echo $ts; cat /proc/slabinfo) >> "$d/$p-slabinfo" & fi if [ -f "/proc/interrupts" ]; then (echo $ts; cat /proc/interrupts) >> "$d/$p-interrupts" & fi fi (echo $ts; df -k) >> "$d/$p-df" & (echo $ts; netstat -antp) >> "$d/$p-netstat" & (echo $ts; netstat -s) >> "$d/$p-netstat_s" & (echo $ts; $CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G") \ >> "$d/$p-processlist" & if [ "$have_lock_waits_table" ]; then (echo $ts; lock_waits) >>"$d/$p-lock-waits" & (echo $ts; transactions) >>"$d/$p-transactions" & fi done log "Loop end: $(date +'TS %s.%N %F %T')" if [ "$have_oprofile" ]; then $CMD_OPCONTROL --stop $CMD_OPCONTROL --dump local oprofiled_pid=$(_pidof oprofiled | awk '{print $1; exit;}') if [ "$oprofiled_pid" ]; then kill $oprofiled_pid else warn "Cannot kill oprofiled because its PID cannot be determined" fi $CMD_OPCONTROL --save=pt_collect_$p local mysqld_path=$(_which mysqld); if [ "$mysqld_path" -a -f "$mysqld_path" ]; then $CMD_OPREPORT \ --demangle=smart \ --symbols \ --merge tgid \ session:pt_collect_$p \ "$mysqld_path" \ > "$d/$p-opreport" else log "oprofile data saved to pt_collect_$p; you should be able" \ "to get a report by running something like 'opreport" \ "--demangle=smart --symbols --merge tgid session:pt_collect_$p" \ "/path/to/mysqld'" \ > "$d/$p-opreport" fi elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" ]; then kill -s 2 $strace_pid sleep 1 kill -s 15 $strace_pid [ "$mysqld_pid" ] && kill -s 18 $mysqld_pid fi $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus2" & $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status2" & open_tables >> "$d/$p-opentables2" & kill $mysqladmin_pid [ "$tail_error_log_pid" ] && kill $tail_error_log_pid [ "$tcpdump_pid" ] && kill $tcpdump_pid hostname > "$d/$p-hostname" local slept=0 while [ -n "$(jobs)" -a $slept -lt $OPT_RUN_TIME ]; do sleep 1 slept=$((slept + 1)) done for pid in $(jobs -p); do kill $pid >/dev/null 2>&1 done for file in "$d/$p-"*; do if [ -z "$(grep -v '^TS ' --max-count 1 "$file")" ]; then log "Removing empty file $file"; rm "$file" fi done } open_tables() { local open_tables=$($CMD_MYSQLADMIN $EXT_ARGV ext | grep "Open_tables" | awk '{print $4}') if [ -n "$open_tables" -a $open_tables -le 1000 ]; then $CMD_MYSQL $EXT_ARGV -e 'SHOW OPEN TABLES' & else log "Too many open tables: $open_tables" fi } lock_waits() { local sql1="SELECT CONCAT('thread ', b.trx_mysql_thread_id, ' from ', p.host) AS who_blocks, IF(p.command = \"Sleep\", p.time, 0) AS idle_in_trx, MAX(TIMESTAMPDIFF(SECOND, r.trx_wait_started, CURRENT_TIMESTAMP)) AS max_wait_time, COUNT(*) AS num_waiters FROM INFORMATION_SCHEMA.INNODB_LOCK_WAITS AS w INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS b ON b.trx_id = w.blocking_trx_id INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS r ON r.trx_id = w.requesting_trx_id LEFT JOIN INFORMATION_SCHEMA.PROCESSLIST AS p ON p.id = b.trx_mysql_thread_id GROUP BY who_blocks ORDER BY num_waiters DESC\G" $CMD_MYSQL $EXT_ARGV -e "$sql1" local sql2="SELECT r.trx_id AS waiting_trx_id, r.trx_mysql_thread_id AS waiting_thread, TIMESTAMPDIFF(SECOND, r.trx_wait_started, CURRENT_TIMESTAMP) AS wait_time, r.trx_query AS waiting_query, l.lock_table AS waiting_table_lock, b.trx_id AS blocking_trx_id, b.trx_mysql_thread_id AS blocking_thread, SUBSTRING(p.host, 1, INSTR(p.host, ':') - 1) AS blocking_host, SUBSTRING(p.host, INSTR(p.host, ':') +1) AS blocking_port, IF(p.command = \"Sleep\", p.time, 0) AS idle_in_trx, b.trx_query AS blocking_query FROM INFORMATION_SCHEMA.INNODB_LOCK_WAITS AS w INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS b ON b.trx_id = w.blocking_trx_id INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS r ON r.trx_id = w.requesting_trx_id INNER JOIN INFORMATION_SCHEMA.INNODB_LOCKS AS l ON w.requested_lock_id = l.lock_id LEFT JOIN INFORMATION_SCHEMA.PROCESSLIST AS p ON p.id = b.trx_mysql_thread_id ORDER BY wait_time DESC\G" $CMD_MYSQL $EXT_ARGV -e "$sql2" } transactions() { $CMD_MYSQL $EXT_ARGV -e "SELECT * FROM INFORMATION_SCHEMA.INNODB_TRX\G" $CMD_MYSQL $EXT_ARGV -e "SELECT * FROM INFORMATION_SCHEMA.INNODB_LOCKS\G" $CMD_MYSQL $EXT_ARGV -e "SELECT * FROM INFORMATION_SCHEMA.INNODB_LOCK_WAITS\G" } # ########################################################################### # End collect package # ########################################################################### # ########################################################################### # Global variables # ########################################################################### TRIGGER_FUNCTION="" RAN_WITH="" EXIT_REASON="" TOOL="pt-stalk" OKTORUN=1 ITER=1 # ########################################################################### # Plugin hooks # ########################################################################### before_stalk() { : } before_collect() { : } after_collect() { : } after_collect_sleep() { : } after_interval_sleep() { : } after_stalk() { : } # ########################################################################### # Subroutines # ########################################################################### grep_processlist() { local file="$1" local col="$2" local pat="${3:-""}" local gt="${4:-0}" local quiet="${5:-0}" awk " BEGIN { FS=\"|\" OFS=\" | \" n_cols=0 found=0 } /^\|/ { if ( n_cols ) { val=colno_for_name[\"$col\"] if ((\"$pat\" && match(\$val, \"$pat\")) || ($gt && \$val > $gt) ) { found++ if (!$quiet) print \$0 } } else { for (i = 1; i <= NF; i++) { gsub(/^[ ]*/, \"\", \$i) gsub(/[ ]*$/, \"\", \$i) if ( \$i != \"\" ) { name_for_colno[i]=\$i colno_for_name[\$i]=i n_cols++ } } } } END { if ( found ) exit 0 exit 1 } " "$file" } set_trg_func() { local func="$1" if [ -f "$func" ]; then # Trigger function is a file with Bash code; source it. . "$func" TRIGGER_FUNCTION="trg_plugin" return 0 # success else # Trigger function is name of a built-in function. func=$(echo "$func" | tr '[:upper:]' '[:lower:]') if [ "$func" = "status" -o "$func" = "processlist" ]; then TRIGGER_FUNCTION="trg_$func" return 0 # success fi fi return 1 # error } trg_status() { local var="$1" mysqladmin $EXT_ARGV extended-status \ | grep "$OPT_VARIABLE " \ | awk '{print $4}' } trg_processlist() { local var="$1" local tmpfile="$PT_TMPDIR/processlist" mysqladmin $EXT_ARGV processlist > "$tmpfile-1" grep_processlist "$tmpfile-1" "$var" "$OPT_MATCH" 0 0 > "$tmpfile-2" wc -l "$tmpfile-2" | awk '{print $1}' rm -f "$tmpfile"* } oktorun() { if [ $OKTORUN -eq 0 ]; then [ -z "$EXIT_REASON" ] && EXIT_REASON="OKTORUN is false" return 1 # stop running fi if [ -n "$OPT_ITERATIONS" ] && [ $ITER -gt $OPT_ITERATIONS ]; then [ -z "$EXIT_REASON" ] && EXIT_REASON="no more iterations" return 1 # stop running fi return 0 # continue running } sleep_ok() { local seconds="$1" local msg="${2:-""}" if oktorun; then [ "$msg" ] && info "$msg" sleep $seconds fi } purge_samples() { local dir="$1" local retention_time="$2" # Delete collect files which more than --retention-time days old. find "$dir" -warn -type f -mtime +$retention_time -exec rm -f '{}' \; local oprofile_dir="/var/lib/oprofile/samples" if [ -d "$oprofile_dir" ]; then # "pt_collect_" here needs to match $CMD_OPCONTROL --save=pt_collect_$p # in collect(). TODO: fix this find "$oprofile_dir" -warn -depth -type d -name 'pt_collect_*' \ -mtime +$retention_time -exec rm -rf '{}' \; fi } sigtrap() { if [ $OKTORUN -eq 1 ]; then warn "Caught signal, exiting" OKTORUN=0 else warn "Caught signal again, forcing exit" exit $EXIT_STATUS fi } stalk() { local cycles_true=0 # increment each time check is true, else set to 0 local matched="" # set to "yes" when check is true local last_prefix="" # prefix of last collection while oktorun; do # Run the trigger which returns the value of whatever is being # checked. When the value is > --threshold for at least --cycle # consecutive times, start collecting. if [ "$OPT_STALK" ]; then local value=$($TRIGGER_FUNCTION $OPT_VARIABLE) local trg_exit_status=$? if [ -z "$value" ]; then # No value. Maybe we failed to connect to MySQL? warn "Detected value is empty; something failed? Trigger exit status: $trg_exit_status" matched="" cycles_true=0 elif [ $value -gt $OPT_THRESHOLD ]; then matched="yes" cycles_true=$(($cycles_true + 1)) else matched="" cycles_true=0 fi local msg="Check results: $OPT_VARIABLE=$value, matched=${matched:-no}, cycles_true=$cycles_true" if [ "$matched" ]; then log "$msg" else info "$msg" fi elif [ "$OPT_COLLECT" ]; then # Make the next if condition true. matched=1 cycles_true=$OPT_CYCLES local msg="Not stalking; collect triggered immediately" log "$msg" fi if [ "$matched" -a $cycles_true -ge $OPT_CYCLES ]; then # ################################################################## # Start collecting, maybe. # ################################################################## log "Collect $ITER triggered" # Send email to whomever that collect has been triggered. if [ "$OPT_NOTIFY_BY_EMAIL" ]; then echo "$msg on $(hostname)" \ | mail -s "Collect triggered on $(hostname)" \ "$OPT_NOTIFY_BY_EMAIL" fi if [ "$OPT_COLLECT" ]; then local prefix="${OPT_PREFIX:-$(date +%F-%T | tr ':-' '_')}" # Check if we'll have enough disk space to collect. Disk space # is also checked every interval while collecting. local margin="20971520" # default 20M margin, unless: if [ -n "$last_prefix" ]; then margin=$(du -mc "$OPT_DEST"/"$last_prefix"-* | tail -n 1 | awk '{print $1'}) fi disk_space "$OPT_DEST" > "$OPT_DEST/$prefix-disk-space" check_disk_space \ "$OPT_DEST/$prefix-disk-space" \ "$OPT_DISK_BYTES_FREE" \ "$OPT_DISK_PCT_FREE" \ "$margin" if [ $? -eq 0 ]; then # There should be enough disk space, so collect. ts "$msg" >> "$OPT_DEST/$prefix-trigger" ts "pt-stalk ran with $RAN_WITH" >> "$OPT_DEST/$prefix-trigger" last_prefix="$prefix" # Plugin hook: before_collect # Fork and background the collect subroutine which will # run for --run-time seconds. We (the parent) sleep # while its collecting (hopefully --sleep is longer than # --run-time). ( collect "$OPT_DEST" "$prefix" ) >> "$OPT_DEST/$prefix-output" 2>&1 & local collector_pid=$! log "Collect $ITER PID $collector_pid" # Plugin hook: after_collect $collector_pid else # There will not be enough disk space, so do not collect. warn "Collect canceled because there will not be enough disk space after collecting another $margin MB" fi # Purge old collect files. if [ -d "$OPT_DEST" ]; then purge_samples "$OPT_DEST" "$OPT_RETENTION_TIME" fi fi # ################################################################## # Done collecting. # ################################################################## log "Collect $ITER done" ITER=$((ITER + 1)) cycles_true=0 sleep_ok "$OPT_SLEEP" "Sleeping $OPT_SLEEP seconds after collect" # Plugin hook: after_collect_sleep else # Trigger/check/value is ok, sleep until next check. sleep_ok "$OPT_INTERVAL" # Plugin hook: after_interval_sleep fi done # One final purge of old collect files, but only if in collect mode. if [ "$OPT_COLLECT" -a -d "$OPT_DEST" ]; then purge_samples "$OPT_DEST" "$OPT_RETENTION_TIME" fi # Before exiting, the last collector may still be running. # Wait for it to finish in case the tool is part of a script, # or part of a test, so the caller has access to the collected # data when the tool exists. collect() waits an additional # --run-time seconds for itself to complete, which means we # have to wait for 2 * run-time like it plus some overhead else # we may get in sync with the collector and kill it a microsecond # before it kills itself, thus 3 * run-time. # https://bugs.launchpad.net/percona-toolkit/+bug/1070434 if [ "$(jobs)" ]; then local sleep_time=$((OPT_RUN_TIME * 3)) log "Waiting up to $sleep_time seconds for collectors to finish..." local slept=0 while [ -n "$(jobs)" -a $slept -lt $sleep_time ]; do sleep 1 slept=$((slept + 1)) done if [ "$(jobs)" ]; then for pid in $(jobs -p); do # This isn't an warning (we don't want exit status 1) because # the system may be running slowly so it's just "natural" that # a collector may get stuck or run really slowly. log "Killing collector $pid" kill $pid >/dev/null 2>&1 done else log "All collectors have finished" fi fi } # ########################################################################### # Main program loop, called below if tool is ran from the command line. # ########################################################################### main() { trap sigtrap SIGHUP SIGINT SIGTERM # Note: $$ is the parent's PID, but we're a child proc. # Bash 4 has $BASHPID but we can't rely on that. Consequently, # we don't know our own PID. See the usage of $! below. RAN_WITH="--function=$OPT_FUNCTION --variable=$OPT_VARIABLE --threshold=$OPT_THRESHOLD --match=$OPT_MATCH --cycles=$OPT_CYCLES --interval=$OPT_INTERVAL --iterations=$OPT_ITERATIONS --run-time=$OPT_RUN_TIME --sleep=$OPT_SLEEP --dest=$OPT_DEST --prefix=$OPT_PREFIX --notify-by-email=$OPT_NOTIFY_BY_EMAIL --log=$OPT_LOG --pid=$OPT_PID --plugin=$OPT_PLUGIN" log "Starting $0 $RAN_WITH" # Test if we have root; warn if not, but it isn't critical. if [ "$(id -u)" != "0" ]; then log 'Not running with root privileges!'; fi # Make a secure tmpdir. mk_tmpdir # Plugin hook: before_stalk # Stalk while oktorun. stalk # Plugin hook: after_stalk # Clean up. rm_tmpdir remove_pid_file "$OPT_PID" log "Exiting because $EXIT_REASON" log "$0 exit status $EXIT_STATUS" exit $EXIT_STATUS } # Execute the program if it was not included from another file. # This makes it possible to include without executing, and thus test. if [ "${0##*/}" = "$TOOL" ] \ || [ "${0##*/}" = "bash" -a "${_:-""}" = "$0" ]; then # Parse command line options. We must do this first so we can # see if --daemonize was specified. mk_tmpdir parse_options "$0" "${@:-""}" # Verify and set TRIGGER_FUNCTION based on --function. if ! set_trg_func "$OPT_FUNCTION"; then option_error "Invalid --function value: $OPT_FUNCTION" fi # Verify and source the --plugin. if [ "$OPT_PLUGIN" ]; then if [ -f "$OPT_PLUGIN" ]; then . "$OPT_PLUGIN" else option_error "Invalid --plugin value: $OPT_PLUGIN is not a file" fi fi if [ -z "$OPT_STALK" -a "$OPT_COLLECT" ]; then # Not stalking; do immediate collect once. OPT_ITERATIONS=1 OPT_CYCLES=0 OPT_SLEEP=0 OPT_INTERVAL=0 fi usage_or_errors "$0" po_status=$? rm_tmpdir if [ $po_status -ne 0 ]; then [ $OPT_ERRS -gt 0 ] && exit 1 exit 0 fi MYSQL_ARGS="$(mysql_options)" EXT_ARGV="$EXT_ARGV $MYSQL_ARGS" # Check that mysql and mysqladmin are in PATH. If not, we're # already dead in the water, so don't bother with cmd line opts, # just error and exit. [ -n "$(mysql --help)" ] \ || die "Cannot execute mysql. Check that it is in PATH." [ -n "$(mysqladmin --help)" ] \ || die "Cannot execute mysqladmin. Check that it is in PATH." # Now that we have the cmd line opts, check that we can actually # connect to MySQL. [ -n "$(mysql $EXT_ARGV -e 'SELECT 1')" ] \ || die "Cannot connect to MySQL. Check that MySQL is running and that the options after -- are correct." # Check existence and access to the --dest dir if we're collecting. if [ "$OPT_COLLECT" ]; then if [ ! -d "$OPT_DEST" ]; then mkdir -p "$OPT_DEST" || die "Cannot make --dest $OPT_DEST" fi # Check access to the --dest dir. By setting -x in the subshell, # if either command fails, the subshell will exit immediately and # $? will be non-zero. ( set -e touch "$OPT_DEST/test" rm "$OPT_DEST/test" ) if [ $? -ne 0 ]; then die "Cannot read and write files to --dest $OPT_DEST" fi fi if [ "$OPT_STALK" -a "$OPT_DAEMONIZE" ]; then # Check access to the --log file. touch "$OPT_LOG" || die "Cannot write to --log $OPT_LOG" # The PID file will at first have our (parent) PID. # This is fine for ensuring that only one of us is # running, but it's not fine if the user wants to use # the PID in the PID file to check or kill the child # process. So we'll need to update the PID file with # the child's PID. make_pid_file "$OPT_PID" $$ main "${@:-""}" >"$OPT_LOG" 2>&1 & # Update PID file with the child's PID. # The child PID is $BASHPID but that special var is only # in Bash 4+, so we can't rely on it. Consequently, we # use $! to get the PID of the child we just forked. echo "$!" > "$OPT_PID" else [ "$OPT_STALK" ] && make_pid_file "$OPT_PID" $$ main "${@:-""}" fi fi # ############################################################################ # Documentation # ############################################################################ :<<'DOCUMENTATION' =pod =head1 NAME pt-stalk - Gather forensic data about MySQL when a problem occurs. =head1 SYNOPSIS Usage: pt-stalk [OPTIONS] [-- EXTRA MYSQL OPTIONS] pt-stalk watches for a trigger condition to become true, and then collects data to help in diagnosing problems. It is designed to run as a daemon with root privileges, so that you can diagnose intermittent problems that you cannot observe directly. You can also use it to execute a custom command, or to gather the data on demand without waiting for the trigger to happen. =head1 RISKS The following section is included to inform users about the potential risks, whether known or unknown, of using this tool. The two main categories of risks are those created by the nature of the tool (e.g. read-only tools vs. read-write tools) and those created by bugs. pt-stalk is a read-write tool; it collects data from the system and writes it into a series of files. It should be very low-risk. Some of the options can cause intrusive data collection to be performed, however, so if you enable any non-default options, you should read their documentation carefully. At the time of this release, we know of no bugs that could cause serious harm to users. The authoritative source for updated information is always the online issue tracking system. Issues that affect this tool will be marked as such. You can see a list of such issues at the following URL: L. See also L<"BUGS"> for more information on filing bugs and getting help. =head1 DESCRIPTION Sometimes a problem happens infrequently and for a short time, giving you no chance to see the system when it happens. How do you solve intermittent MySQL problems when you can't observe them? That's why pt-stalk exists. In addition to using it when there's a known problem on your servers, it is a good idea to run pt-stalk all the time, even when you think nothing is wrong. You will appreciate the data it gathers when a problem occurs, because problems such as MySQL lockups or spikes of activity typically leave no evidence to use in root cause analysis. This tool does two things: it watches a server (typically MySQL) for a trigger to occur, and it gathers diagnostic data. To use it effectively, you need to define a good trigger condition. A good trigger is sensitive enough to fire reliably when a problem occurs, so that you don't miss a chance to solve problems. On the other hand, a good trigger isn't prone to false positives, so you don't gather information when the server is functioning normally. The most reliable triggers for MySQL tend to be the number of connections to the server, and the number of queries running concurrently. These are available in the SHOW GLOBAL STATUS command as Threads_connected and Threads_running. Sometimes Threads_connected is not a reliable indicator of trouble, but Threads_running usually is. Your job, as the tool's user, is to define an appropriate trigger condition for the tool. Choose carefully, because the quality of your results will depend on the trigger you choose. You can define the trigger with the L<"--function">, L<"--variable">, and L<"--threshold"> options, among others. Please read the documentation for --function to learn how to do this. The pt-stalk tool, by default, simply watches MySQL repeatedly until the trigger becomes true. It then gathers diagnostics for a while, and sleeps afterwards for some time to prevent repeatedly gathering data if the condition remains true. In crude pseudocode, omitting some subtleties, while true; do if --variable from --function is greater than --threshold; then observations++ if observations is greater than --cycles; then capture diagnostics for --run-time seconds exit if --iterations is exceeded sleep for --sleep seconds done done clean up data that's older than --retention-time sleep for --interval seconds done The diagnostic data is written to files whose names begin with a timestamp, so you can distinguish samples from each other in case the tool collects data multiple times. The pt-sift tool is designed to help you browse and analyze the resulting samples of data. Although this sounds simple enough, in practice there are a number of subtleties, such as detecting when the disk is beginning to fill up so that the tool doesn't cause the server to run out of disk space. This tool handles these types of potential problems, so it's a good idea to use this tool instead of writing something from scratch and possibly experiencing some of the hazards this tool is designed to prevent. =head1 CONFIGURING You can use standard Percona Toolkit configuration files to set commandline options. You will probably want to run the tool as a daemon and customize at least the diagnostic threshold. Here's a sample configuration file for triggering when there are more than 20 queries running at once: daemonize threshold=20 If you're not running the tool as it's designed (as a root user, daemonized) then you'll need to set several options, such as L<"--dest">, to locations that are writable by non-root users. =head1 OPTIONS =over =item --collect default: yes; negatable: yes Collect system information. You can negate this option to make the tool watch the system but not actually gather any diagnostic data. See also L<"--stalk">. =item --collect-gdb Collect GDB stacktraces. This is achieved by attaching to MySQL and printing stack traces from all threads. This will freeze the server for some period of time, ranging from a second or so to much longer on very busy systems with a lot of memory and many threads in the server. For this reason, it is disabled by default. However, if you are trying to diagnose a server stall or lockup, freezing the server causes no additional harm, and the stack traces can be vital for diagnosis. In addition to freezing the server, there is also some risk of the server crashing or performing badly after GDB detaches from it. =item --collect-oprofile Collect oprofile data. This is achieved by starting an oprofile session, letting it run for the collection time, and then stopping and saving the resulting profile data in the system's default location. Please read your system's oprofile documentation to learn more about this. =item --collect-strace Collect strace data. This is achieved by attaching strace to the server, which will make it run very slowly until strace detaches. The same cautions apply as those listed in --collect-gdb. You should not enable this option together with --collect-gdb, because GDB and strace can't attach to the server process simultaneously. =item --collect-tcpdump Collect tcpdump data. This option causes tcpdump to capture all traffic on all interfaces for the port on which MySQL is listening. You can later use pt-query-digest to decode the MySQL protocol and extract a log of query traffic from it. =item --config type: string Read this comma-separated list of config files. If specified, this must be the first option on the command line. =item --cycles type: int; default: 5 The number of times the trigger condition must be true before collecting data. This helps prevent false positives, and makes the trigger condition less likely to fire when the problem recovers quickly. =item --daemonize Daemonize the tool. This causes the tool to fork into the background and log its output as specified in --log. =item --dest type: string; default: /var/lib/pt-stalk Where to store the diagnostic data. Each time the tool collects data, it writes to a new set of files, which are named with the current system timestamp. =item --disk-bytes-free type: size; default: 100M Don't collect data if the disk has less than this much free space. This prevents the tool from filling up the disk with diagnostic data. If the L<"--dest"> directory contains a previously captured sample of data, the tool will measure its size and use that as an estimate of how much data is likely to be gathered this time, too. It will then be even more pessimistic, and will refuse to collect data unless the disk has enough free space to hold the sample and still have the desired amount of free space. For example, if you'd like 100MB of free space and the previous diagnostic sample consumed 100MB, the tool won't collect any data unless the disk has 200MB free. Valid size value suffixes are k, M, G, and T. =item --disk-pct-free type: int; default: 5 Don't collect data if the disk has less than this percent free space. This prevents the tool from filling up the disk with diagnostic data. This option works similarly to L<"--disk-bytes-free"> but specifies a percentage margin of safety instead of a bytes margin of safety. The tool honors both options, and will not collect any data unless both margins are satisfied. =item --function type: string; default: status Specifies what to watch for a diagnostic trigger. The default value watches SHOW GLOBAL STATUS, but you can also watch SHOW PROCESSLIST or supply a plugin file with your own custom code. This function supplies the value of L<"--variable">, which is then compared against L<"--threshold"> to see if the trigger condition is met. Additional options may be required as well; see below. Possible values: =over =item * status This value specifies that the source of data for the diagnostic trigger is SHOW GLOBAL STATUS. The value of L<"--variable"> then defines which status counter is the trigger. =item * processlist This value specifies that the data for the diagnostic trigger comes from SHOW FULL PROCESSLIST. The trigger value is the count of processes whose L<"--variable"> column matches the L<"--match"> option. For example, to trigger when more than 10 processes are in the "statistics" state, use the following options: --function processlist --variable State \ --match statistics --threshold 10 =back In addition, you can specify a file that contains your custom trigger function, written in Unix shell script. This can be a wrapper that executes anything you wish. If the argument to --function is a file, then it takes precedence over builtin functions, so if there is a file in the working directory named "status" or "processlist" then the tool will use that file as a plugin, even though those are otherwise recognized as reserved words for this option. The plugin file works by providing a function called C, and the tool simply sources the file and executes the function. For example, the function might look like the following: trg_plugin() { mysql $EXT_ARGV -e "SHOW ENGINE INNODB STATUS" \ | grep -c "has waited at" } This snippet will count the number of mutex waits inside of InnoDB. It illustrates the general principle: the function must output a number, which is then compared to the threshold as usual. The $EXT_ARGV variable contains the MySQL options mentioned in the L<"SYNOPSIS"> above. The plugin should not alter the tool's existing global variables. Prefix any plugin-specific global variables with "PLUGIN_" or make them local. =item --help Print help and exit. =item --interval type: int; default: 1 Interval between checks for the diagnostic trigger. =item --iterations type: int Exit after collecting diagnostics this many times. By default, the tool will continue to watch the server forever, but this is useful for scenarios where you want to capture once and then exit, for example. =item --log type: string; default: /var/log/pt-stalk.log Print all output to this file when daemonized. =item --match type: string The pattern to use when watching SHOW PROCESSLIST. See the documentation for L<"--function"> for details. =item --notify-by-email type: string Send mail to this list of addresses when data is collected. =item --pid type: string; default: /var/run/pt-stalk.pid Create a PID file when daemonized. =item --plugin type: string Load a plugin to hook into the tool and extend is functionality. The specified file does not need to be executable, nor does its first line need to be shebang line. It only needs to define one or more of these Bash functions: =over =item before_stalk Called before stalking. =item before_collect Called when the stalk condition is triggered, before running a collector process as a backgrounded subshell. =item after_collect Called after running a collector process. The PID of the collector process is passed as the first argument. This hook is called before C. =item after_collect_sleep Called after sleeping L<"--sleep"> seconds for the collector process to finish. This hook is called after C. =item after_interval_sleep Called after sleeping L<"--interval"> seconds after each trigger check. =item after_stalk Called after stalking. Since pt-stalk stalks forever by default, this hook is only called if L<"--iterations"> is specified. =back For example, a very simple plugin that touches a file when a collector process is triggered: before_colllect() { touch /tmp/foo } Since the plugin is completely sourced (imported) into the tool's namespace, be careful not to define other functions or global variables that already exist in the tool. You should prefix all plugin-specific functions and global variables with C or C. Plugins have access to all command line options but they should not modify them. Each option is a global variable like C<$OPT_DEST> which corresponds to L<"--dest">. Therefore, the global variable for each command line option is C plus the option name in all caps with hyphens replaced by underscores. Plugins can stop the tool by setting the global variable C to C<1>. In this case, the global variable C should also be set to indicate why the tool was stopped. =item --prefix type: string The filename prefix for diagnostic samples. By default, samples have a timestamp prefix based on the current local time, such as 2011_12_06_14_02_02, which is December 6, 2011 at 14:02:02. =item --retention-time type: int; default: 30 Number of days to retain collected samples. Any samples that are older will be purged. =item --run-time type: int; default: 30 How long the tool will collect data when it triggers. This should not be longer than L<"--sleep">. It is usually not necessary to change this; if the default 30 seconds hasn't gathered enough diagnostic data, running longer is not likely to do so. In fact, in many cases a shorter collection period is appropriate. This value is used two other times. After collecting, the collect subprocess will wait another L<"--run-time"> seconds for its commands to finish. Some commands can take awhile if the system is running very slowly (which can likely be the case given that a collection was triggered). Since empty files are deleted, the extra wait gives commands time to finish and write their data. The value is potentially used again just before the tool exits to wait again for any collect subprocesses to finish. In most cases this won't happen because of the aforementioned extra wait. If it happens, the tool will log "Waiting up to N seconds for collectors to finish..." where N is three times L<"--run-time">. In both cases, after waiting, the tool kills all of its subprocesses. =item --sleep type: int; default: 300 How long to sleep after collecting data. This prevents the tool from triggering continuously, which might be a problem if the collection process is intrusive. It also prevents filling up the disk or gathering too much data to analyze reasonably. =item --stalk default: yes; negatable: yes Watch the server and wait for the trigger to occur. You can negate this option to make the tool immediately gather any diagnostic data once and exit. This is useful if a problem is already happening, but pt-stalk is not running, so you only want to collect diagnostic data. If this option is negate, L<"--daemonize">, L<"--log">, L<"--pid">, and other stalking-related options have no effect; the tool simply collects diagnostic data and exits. Safeguard options, like L<"--disk-bytes-free"> and L<"--disk-pct-free">, are still respected. See also L<"--collect">. =item --threshold type: int; default: 25 The threshold at which the diagnostic trigger should fire. See L<"--function"> for details. =item --variable type: string; default: Threads_running The variable to compare against the threshold. See L<"--function"> for details. =item --verbose type: int; default: 2 Print more or less information while running. Since the tool is designed to be a long-running daemon, the default verbosity level only prints the most important information. If you run the tool interactively, you may want to use a higher verbosity level. LEVEL PRINTS ===== ===================================== 0 Errors 1 Warnings 2 Matching triggers and collection info 3 Non-matching triggers =item --version Print tool's version and exit. =item --defaults-file short form: -F; type: string Only read mysql options from the given file. You must give an absolute pathname. =item --host short form: -h; type: string; default: localhost Host to connect to. =item --password short form: -p; type: string Password to use when connecting. =item --port short form: -P; type: int Port number to use for connection. =item --socket short form: -S; type: string Socket file to use for connection. =item --user short form: -u; type: string User for login if not current user. =back =head1 ENVIRONMENT This tool does not require any environment variables for configuration, although it can be influenced to work differently by through several variables. Keep in mind that these are expert settings, and should not be used in most cases. Specifically, the variables that can be set are: =over =item CMD_GDB =item CMD_IOSTAT =item CMD_MPSTAT =item CMD_MYSQL =item CMD_MYSQLADMIN =item CMD_OPCONTROL =item CMD_OPREPORT =item CMD_PMAP =item CMD_STRACE =item CMD_SYSCTL =item CMD_TCPDUMP =item CMD_VMSTAT =back For example, during collection iostat is called with a -dx argument, but because you have an NFS partition, you also need the -n flag there. Instead of editing the source, you can call pt-stalk as CMD_IOSTAT="iostat -n" pt-stalk ... which will do exactly what you need. Combined with the plugin hooks, this gives you a fine-grained control of what the tool does. =head1 SYSTEM REQUIREMENTS This tool requires Bash v3 or newer. Certain options require other programs: =over =item L<"--collect-gdb"> requires C =item L<"--collect-oprofile"> requires C and C =item L<"--collect-strace"> requires C =item L<"--collect-tcpdump"> requires C =back =head1 BUGS For a list of known bugs, see L. Please report bugs at L. Include the following information in your bug report: =over =item * Complete command-line used to run the tool =item * Tool L<"--version"> =item * MySQL version of all servers involved =item * Output from the tool including STDERR =item * Input files (log/dump/config files, etc.) =back If possible, include debugging output by running the tool with C; see L<"ENVIRONMENT">. =head1 DOWNLOADING Visit L to download the latest release of Percona Toolkit. Or, get the latest release from the command line: wget percona.com/get/percona-toolkit.tar.gz wget percona.com/get/percona-toolkit.rpm wget percona.com/get/percona-toolkit.deb You can also get individual tools from the latest release: wget percona.com/get/TOOL Replace C with the name of any tool. =head1 AUTHORS Baron Schwartz, Justin Swanhart, Fernando Ipar, and Daniel Nichter =head1 ABOUT PERCONA TOOLKIT This tool is part of Percona Toolkit, a collection of advanced command-line tools developed by Percona for MySQL support and consulting. Percona Toolkit was forked from two projects in June, 2011: Maatkit and Aspersa. Those projects were created by Baron Schwartz and developed primarily by him and Daniel Nichter, both of whom are employed by Percona. Visit L for more software developed by Percona. =head1 COPYRIGHT, LICENSE, AND WARRANTY This program is copyright 2011-2013 Percona Ireland Ltd, 2010-2011 Baron Schwartz. THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, version 2; OR the Perl Artistic License. On UNIX and similar systems, you can issue `man perlgpl' or `man perlartistic' to read these licenses. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. =head1 VERSION pt-stalk 2.1.8 =cut DOCUMENTATION