diff --git a/bin/pt-collect b/bin/pt-collect deleted file mode 100755 index 057017ac..00000000 --- a/bin/pt-collect +++ /dev/null @@ -1,450 +0,0 @@ -#!/usr/bin/env bash - -# This program is part of Percona Toolkit: http://www.percona.com/software/ -# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal -# notices and disclaimers. - -usage() { - if [ "${OPT_ERR}" ]; then - echo "${OPT_ERR}" >&2 - fi - echo "Usage: pt-collect -d -g -i -o -s [OPTIONS] [-- MYSQL-OPTIONS]" >&2 - echo "For more information, 'man pt-collect' or 'perldoc $0'." >&2 - exit 1 -} - -# Make sure the disk isn't getting too full. Exit if the disk is more than $1 -# percent full, or there is less than $2 megabytes of free space on $3 drive. -check_disk_space() { - PCT=${1:-"100"} - MB=${2:-"0"} - DEST="$3" - avail=$(df -m -P "${DEST}" | awk '/^\//{print $4}'); - full=$(df -m -P "${DEST}" | awk '/^\//{print $5}' | sed -e 's/%//g'); - if [ "${avail}" -le "${MB}" -o "${full}" -ge "${PCT}" ]; then - echo "Not enough free space (${full}% full, ${avail}MB free)" - echo "Wanted less than ${PCT}% full and more than ${MB}MB" - return 1 - fi - return 0 -} - -for o; do - case "${o}" in - --) - shift; break; - ;; - --help) - usage; - ;; - -d) - shift; OPT_d="${1}"; shift; - ;; - -f) - shift; OPT_f="${1}"; shift; - ;; - -i) - shift; OPT_i="${1}"; shift; - ;; - -g) - shift; OPT_g="${1}"; shift; - ;; - -m) - shift; OPT_m="${1}"; shift; - ;; - -o) - shift; OPT_o="${1}"; shift; - ;; - -p) - shift; OPT_p="${1}"; shift; - ;; - -s) - shift; OPT_s="${1}"; shift; - ;; - -t) - shift; OPT_t="${1}"; shift; - ;; - esac -done - - -if [ -z "${OPT_d}" -o -z "${OPT_i}" -o -z "${OPT_o}" -o -z "${OPT_g}" -o -z "${OPT_s}" ]; then - OPT_ERR="Missing command-line argument." - usage -fi - -if [ "${OPT_p}" ]; then - d="${OPT_p}" -else - d=$(date +%F-%T | tr :- _); -fi - -# Check disk space up-front. -check_disk_space "${OPT_f}" "${OPT_m}" "${OPT_d}" || exit 1 - -echo "Gathering info for $d" - -# Make sure there's only one of me. -( - flock 200 - - # Get pidof mysqld; pidof doesn't exist on some systems. We try our best... - p=$(pidof -s mysqld); - if [ -z "${p}" ]; then - p=$(pgrep -o -x mysqld); - fi - if [ -z "${p}" ]; then - p=$(ps -eaf | grep 'mysql[d]' | grep -v mysqld_safe | awk '{print $2}' | head -n1); - fi - - # Get memory allocation info before anything else. - if [ "${p}" ]; then - if pmap --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then - pmap -x $p > "$OPT_d/$d-pmap" - else - # Some pmap's apparently don't support -x (issue 116). - pmap $p > "$OPT_d/$d-pmap" - fi - fi - - # Getting a GDB stacktrace can be an intensive operation, so do this only if - # necessary. - if [ "${OPT_g}" = "yes" -a "${p}" ]; then - gdb -ex "set pagination 0" -ex "thread apply all bt" --batch -p $p >> "$OPT_d/$d-stacktrace" - else - echo "GDB (-g) was not enabled" >> "$OPT_d/$d-stacktrace" - fi - - # Get MySQL's variables if possible. Then sleep long enough that we probably - # complete SHOW VARIABLES if all's well. (We don't want to run mysql in the - # foreground, because it could hang.) - mysql "$@" -e 'SHOW GLOBAL VARIABLES' >> "$OPT_d/$d-variables" 2>&1 & - sleep .2 - - # Get the major.minor version number. Version 3.23 doesn't matter for our - # purposes, and other releases have x.x.x* version conventions so far. - VER="$(awk '/^version[^_]/{print substr($2,1,3)}' "$OPT_d/$d-variables")" - - # Is MySQL logging its errors to a file? If so, tail that file. - errfile="$(awk '/log_error/{print $2}' "$OPT_d/$d-variables")" - if [ -z "${errfile}" -a "${p}" ]; then - # Try getting it from the open filehandle... - errfile="$(ls -l /proc/${p}/fd | awk '/ 2 ->/{print $NF}')" - fi - - if [ "${errfile}" ]; then - echo "The error file seems to be ${errfile}" - tail -f "${errfile}" >"$OPT_d/$d-log_error" 2>&1 & - error_pid=$! - # Send a mysqladmin debug to the server so we can potentially learn about - # locking etc. - mysqladmin debug "$@" - else - echo "Could not detect error file; will not tail MySQL's log file" - fi - - # Get a sample of these right away, so we can get these without interaction - # with the other commands we're about to run. - INNOSTAT="SHOW /*!40100 ENGINE*/ INNODB STATUS\G" - mysql "$@" -e "${INNOSTAT}" >> "$OPT_d/$d-innodbstatus1" 2>&1 & - mysql "$@" -e 'SHOW FULL PROCESSLIST\G' >> "$OPT_d/$d-processlist1" 2>&1 & - mysql "$@" -e 'SHOW OPEN TABLES' >> "$OPT_d/$d-opentables1" 2>&1 & - if [ "${VER}" '>' "5.1" ]; then - mysql "$@" -e 'SHOW ENGINE INNODB MUTEX' >> "$OPT_d/$d-mutex-status1" 2>&1 & - else - mysql "$@" -e 'SHOW MUTEX STATUS' >> "$OPT_d/$d-mutex-status1" 2>&1 & - fi - - # If TCP dumping is specified, start that on the server's port. - if [ "${OPT_t}" = "yes" ]; then - port=$(awk '/^port/{print $2}' "$OPT_d/$d-variables") - if [ "${port}" ]; then - tcpdump -i any -s 4096 -w "$OPT_d/$d-tcpdump" port ${port} & - tcpdump_pid=$! - fi - fi - - # Next, start oprofile gathering data during the whole rest of this process. - # The --init should be a no-op if it has already been init-ed. - if [ "${OPT_o}" = "yes" ]; then - if opcontrol --init; then - opcontrol --start --no-vmlinux - else - OPT_o="no" - fi - elif [ "${OPT_s}" = "yes" ]; then - # Don't run oprofile and strace at the same time. - strace -T -s 0 -f -p $p > "${DEST}/$d-strace" 2>&1 & - strace_pid=$! - fi - - # Grab a few general things first. Background all of these so we can start - # them all up as quickly as possible. We use mysqladmin -c even though it is - # buggy and won't stop on its own in 5.1 and newer, because there is a chance - # that we will get and keep a connection to the database; in troubled times - # the database tends to exceed max_connections, so reconnecting in the loop - # tends not to work very well. - ps -eaf >> "$OPT_d/$d-ps" 2>&1 & - sysctl -a >> "$OPT_d/$d-sysctl" 2>&1 & - top -bn1 >> "$OPT_d/$d-top" 2>&1 & - vmstat 1 $OPT_i >> "$OPT_d/$d-vmstat" 2>&1 & - vmstat $OPT_i 2 >> "$OPT_d/$d-vmstat-overall" 2>&1 & - iostat -dx 1 $OPT_i >> "$OPT_d/$d-iostat" 2>&1 & - iostat -dx $OPT_i 2 >> "$OPT_d/$d-iostat-overall" 2>&1 & - mpstat -P ALL 1 $OPT_i >> "$OPT_d/$d-mpstat" 2>&1 & - mpstat -P ALL $OPT_i 1 >> "$OPT_d/$d-mpstat-overall" 2>&1 & - lsof -nP -p $p -bw >> "$OPT_d/$d-lsof" 2>&1 & - mysqladmin "$@" ext -i1 -c$OPT_i >> "$OPT_d/$d-mysqladmin" 2>&1 & - mysqladmin_pid=$! - - # This loop gathers data for the rest of the duration, and defines the time - # of the whole job. - echo "Loop start: $(date +'TS %s.%N %F %T')" - for a in `seq 1 $OPT_i`; do - # We check the disk, but don't exit, because we need to stop jobs if we - # need to exit. - check_disk_space "${OPT_f}" "${OPT_m}" "${OPT_d}" || break - - # Synchronize ourselves onto the clock tick, so the sleeps are 1-second - sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}') - ts="$(date +"TS %s.%N %F %T")" - - # Collect the stuff for this cycle - (cat /proc/diskstats 2>&1; echo $ts) >> "$OPT_d/$d-diskstats" & - (cat /proc/stat 2>&1; echo $ts) >> "$OPT_d/$d-procstat" & - (cat /proc/vmstat 2>&1; echo $ts) >> "$OPT_d/$d-procvmstat" & - (cat /proc/meminfo 2>&1; echo $ts) >> "$OPT_d/$d-meminfo" & - (cat /proc/slabinfo 2>&1; echo $ts) >> "$OPT_d/$d-slabinfo" & - (cat /proc/interrupts 2>&1; echo $ts) >> "$OPT_d/$d-interrupts" & - (df -h 2>&1; echo $ts) >> "$OPT_d/$d-df" & - (netstat -antp 2>&1; echo $ts) >> "$OPT_d/$d-netstat" & - (netstat -s 2>&1; echo $ts) >> "$OPT_d/$d-netstat_s" & - done - echo "Loop end: $(date +'TS %s.%N %F %T')" - - if [ "${OPT_o}" = "yes" ]; then - opcontrol --stop - opcontrol --dump - kill $(pidof oprofiled); - opcontrol --save=pt_collect_$d - - # Attempt to generate a report; if this fails, then just tell the user how - # to generate the report. - path_to_binary=$(which mysqld); - if [ "${path_to_binary}" -a -f "${path_to_binary}" ]; then - opreport --demangle=smart --symbols --merge tgid session:pt_collect_$d "${path_to_binary}" > "$OPT_d/$d-opreport" - else - echo "oprofile data saved to pt_collect_$d; you should now be able to get a report" > "$OPT_d/$d-opreport" - echo "by running something like" >> "$OPT_d/$d-opreport" - echo "opreport --demangle=smart --symbols --merge tgid session:pt_collect_$d /path/to/mysqld" >> "$OPT_d/$d-opreport" - fi - elif [ "${OPT_s}" = "yes" ]; then - kill -s 2 ${strace_pid} - sleep 1 - kill -s 15 ${strace_pid} - # Sometimes strace leaves threads/processes in T status. - kill -s 18 $p - fi - - mysql "$@" -e "${INNOSTAT}" >> "$OPT_d/$d-innodbstatus2" 2>&1 & - mysql "$@" -e 'SHOW FULL PROCESSLIST\G' >> "$OPT_d/$d-processlist2" 2>&1 & - mysql "$@" -e 'SHOW OPEN TABLES' >> "$OPT_d/$d-opentables2" 2>&1 & - if [ "${VER}" '>' "5.1" ]; then - mysql "$@" -e 'SHOW ENGINE INNODB MUTEX' >> "$OPT_d/$d-mutex-status2" 2>&1 & - else - mysql "$@" -e 'SHOW MUTEX STATUS' >> "$OPT_d/$d-mutex-status2" 2>&1 & - fi - - # Kill backgrounded tasks. - kill $mysqladmin_pid - [ "$error_pid" ] && kill $error_pid - [ "$tcpdump_pid" ] && kill $tcpdump_pid - - # Finally, record what system we collected this data from. - hostname > "$OPT_d/$d-hostname" -)200>/tmp/percona-toolkit-collect-lockfile >> "$OPT_d/$d-output" 2>&1 - -# ############################################################################ -# Documentation -# ############################################################################ -:<<'DOCUMENTATION' -=pod - -=head1 NAME - -pt-collect - Collect information from a server for some period of time. - -=head1 SYNOPSIS - -Usage: pt-collect -d -g -i -o -s [OPTIONS] [-- MYSQL-OPTIONS] - -pt-collect tool gathers a variety of information about a system for a period -of time. It is typically executed when the stalk tool detects a condition -and wants to collect information to assist in diagnosis. Four options -must be specified on the command line: -dgios. - -=head1 RISKS - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-collect is a read-only tool. It should be very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm -to users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -L. - -See also L<"BUGS"> for more information on filing bugs and getting help. - -=head1 DESCRIPTION - -pt-collect creates a lock to ensure that only one instance runs at a time, -and then saves a variety of performance and status data into files in the -configured directory. Files are named with a timestamp so they can be -grouped together. The tool is MySQL-centric by default, and gathers quite -a bit of diagnostic data that's useful for understanding the behavior of -a MySQL database server. - -Options after C<--> are passed to C and C. - -=head1 OPTIONS - -=over - -=item -d (required) - -DESTINATION Where to store the resulting data; must already exist. - -=item -g (required) - -Collect GDB stack traces. - -=item -i INTERVAL (required) - -How many seconds to collect data. - -=item -o (required) - -Collect oprofile data; disables -s. - -=item -s (required) - -Collect strace data. - -=item -f PERCENT - -Exit if the disk is more than this percent full (default 100). - -=item -m MEGABYTES - -Exit if there are less than this many megabytes free disk space (default 0). - -=item -p PREFIX - -Store the data into files with this prefix (optional). - -=item -t - -Collect tcpdump data. - -=back - -=head1 ENVIRONMENT - -This tool does not use any environment variables. - -=head1 SYSTEM REQUIREMENTS - -This tool requires Bash v3 or newer and assumes that these programs -are installed, in the PATH, and executable: sysctl, top, vmstat, iostat, -mpstat, lsof, mysql, mysqladmin, df, netstat, pidof, flock, and others -depending on what command-line options are specified. If some of those -programs are not available, the tool will still run but may print warnings. - -=head1 BUGS - -For a list of known bugs, see L. - -Please report bugs at L. -Include the following information in your bug report: - -=over - -=item * Complete command-line used to run the tool - -=item * Tool L<"--version"> - -=item * MySQL version of all servers involved - -=item * Output from the tool including STDERR - -=item * Input files (log/dump/config files, etc.) - -=back - -If possible, include debugging output by running the tool with C; -see L<"ENVIRONMENT">. - -=head1 DOWNLOADING - -Visit L to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - -You can also get individual tools from the latest release: - - wget percona.com/get/TOOL - -Replace C with the name of any tool. - -=head1 AUTHORS - -Baron Schwartz - -=head1 ABOUT PERCONA TOOLKIT - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -L for more software developed by Percona. - -=head1 COPYRIGHT, LICENSE, AND WARRANTY - -This program is copyright 2010-2011 Baron Schwartz, 2011-2012 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue `man perlgpl' or `man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - -=head1 VERSION - -pt-collect 2.0.2 - -=cut - -DOCUMENTATION diff --git a/bin/pt-mext b/bin/pt-mext index e8d5aaa2..ac35c217 100755 --- a/bin/pt-mext +++ b/bin/pt-mext @@ -17,10 +17,49 @@ if [ -z "$1" ]; then usage; fi -FILE=/tmp/mext_temp_file; +# ########################################################################### +# tmpdir package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/tmpdir.sh +# t/lib/bash/tmpdir.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + +TMPDIR="" + +mk_tmpdir() { + local dir=${1:-""} + + if [ -n "$dir" ]; then + if [ ! -d "$dir" ]; then + mkdir $dir || die "Cannot make tmpdir $dir" + fi + TMPDIR="$dir" + else + local tool=`basename $0` + local pid="$$" + TMPDIR=`mktemp -d /tmp/${tool}.${pid}.XXXXX` \ + || die "Cannot make secure tmpdir" + fi +} + +rm_tmpdir() { + if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then + rm -rf $TMPDIR + fi + TMPDIR="" +} + +# ########################################################################### +# End tmpdir package +# ########################################################################### + +mk_tmpdir + +FILE="$TMPDIR/mext_temp_file"; NUM=0; REL=0; -rm -f $FILE*; # Command-line parsing. args=`getopt -u -n mext r "$@"`; @@ -45,15 +84,15 @@ $@ | grep -v '+' | grep -v Variable_name | sed 's/|//g' \ | while read line; do if [ "$line" = "" ]; then NUM=`expr $NUM + 1`; - echo "" > $FILE$NUM; + echo "" > "$FILE$NUM" fi - echo "$line" >> $FILE$NUM; + echo "$line" >> "$FILE$NUM" done # Count how many files there are and prepare to format the output SPEC="%-33s %13d" AWKS="" -NUM=`ls $FILE* | wc -l`; +NUM=`ls "$FILE"* | wc -l`; # The last file will be empty... NUM=`expr $NUM - 3`; @@ -63,19 +102,19 @@ for i in `seq 0 $NUM`; do NEXTFILE=`expr $i + 1`; # Sort each file and eliminate empty lines, so 'join' doesn't complain. - sort $FILE$i | grep . > $FILE$i.tmp; - mv $FILE$i.tmp $FILE$i; - sort $FILE${NEXTFILE} | grep . > $FILE${NEXTFILE}.tmp; - mv $FILE${NEXTFILE}.tmp $FILE${NEXTFILE}; + sort "$FILE$i" | grep . > "$FILE$i.tmp" + mv "$FILE$i.tmp" "$FILE$i" + sort "$FILE${NEXTFILE}" | grep . > "$FILE${NEXTFILE}.tmp" + mv "$FILE${NEXTFILE}.tmp" "$FILE${NEXTFILE}" # Join the files together. This gets slow O(n^2) as we add more files, but # this really shouldn't be performance critical. - join $FILE$i $FILE${NEXTFILE} | grep . > $FILE; + join "$FILE$i" "$FILE${NEXTFILE}" | grep . > "$FILE" # Find the max length of the [numeric only] values in the file so we know how # wide to make the columns - MAXLEN=`awk '{print $2}' $FILE${NEXTFILE} | grep -v '[^0-9]' | awk '{print length($1)}' | sort -rn | head -n1` - mv $FILE $FILE${NEXTFILE}; + MAXLEN=`awk '{print $2}' "$FILE${NEXTFILE}" | grep -v '[^0-9]' | awk '{print length($1)}' | sort -rn | head -n1` + mv "$FILE" "$FILE${NEXTFILE}" SPEC="$SPEC %${MAXLEN}d"; if [ "$REL" = "1" ]; then AWKS="$AWKS, \$`expr $i + 3` - \$`expr $i + 2`"; @@ -86,10 +125,12 @@ done # Print output AWKCMD="printf(\"$SPEC\n\", \$1, \$2$AWKS);"; -awk "{$AWKCMD}" $FILE`expr $NUM + 1`; +awk "{$AWKCMD}" "$FILE`expr $NUM + 1`" -# Remove all temporary files. -rm -f $FILE*; +# Remove all temporary files and the tmp dir. +rm_tmpdir + +exit 0 # ############################################################################ # Documentation diff --git a/bin/pt-mysql-summary b/bin/pt-mysql-summary index d5c05a72..472b60ce 100755 --- a/bin/pt-mysql-summary +++ b/bin/pt-mysql-summary @@ -13,6 +13,44 @@ usage() { exit 1 } +# ########################################################################### +# tmpdir package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/tmpdir.sh +# t/lib/bash/tmpdir.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + +TMPDIR="" + +mk_tmpdir() { + local dir=${1:-""} + + if [ -n "$dir" ]; then + if [ ! -d "$dir" ]; then + mkdir $dir || die "Cannot make tmpdir $dir" + fi + TMPDIR="$dir" + else + local tool=`basename $0` + local pid="$$" + TMPDIR=`mktemp -d /tmp/${tool}.${pid}.XXXXX` \ + || die "Cannot make secure tmpdir" + fi +} + +rm_tmpdir() { + if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then + rm -rf $TMPDIR + fi + TMPDIR="" +} + +# ########################################################################### +# End tmpdir package +# ########################################################################### + # ######################################################################## # Some global setup is necessary for cross-platform compatibility, even # when sourcing this script for testing purposes. @@ -62,9 +100,9 @@ fuzzy_formula=' # symlink them to /etc/passwd and then run this program as root. Call this # function with "rm" or "touch" as an argument. temp_files() { - for file in /tmp/percona-toolkit{,-mysql-variables,-mysql-status,-innodb-status} \ - /tmp/percona-toolkit{2,-mysql-databases,-mysql-processlist,-noncounters} \ - /tmp/percona-toolkit-mysql{dump,-slave}; + for file in $TMPDIR/percona-toolkit{,-mysql-variables,-mysql-status,-innodb-status} \ + $TMPDIR/percona-toolkit{2,-mysql-databases,-mysql-processlist,-noncounters} \ + $TMPDIR/percona-toolkit-mysql{dump,-slave}; do case "$1" in touch) @@ -127,16 +165,16 @@ secs_to_time () { }' } -# gets a value from /tmp/percona-toolkit-mysql-variables. Returns zero if it doesn't +# gets a value from $TMPDIR/percona-toolkit-mysql-variables. Returns zero if it doesn't # exist. get_var () { - v="$($AP_AWK "\$1 ~ /^$1$/ { print \$2 }" /tmp/percona-toolkit-mysql-variables)" + v="$($AP_AWK "\$1 ~ /^$1$/ { print \$2 }" $TMPDIR/percona-toolkit-mysql-variables)" echo "${v:-0}" } # Returns true if a variable exists var_exists () { - $AP_GREP "$1" /tmp/percona-toolkit-mysql-variables >/dev/null 2>&1; + $AP_GREP "$1" $TMPDIR/percona-toolkit-mysql-variables >/dev/null 2>&1; } # Returns "Enabled", "Disabled", or "Not Supported" depending on whether the @@ -145,7 +183,7 @@ var_exists () { # (string equal) to some value. feat_on() { if var_exists $1 ; then - var="$($AP_AWK "\$1 ~ /^$1$/ { print \$2 }" /tmp/percona-toolkit-mysql-variables)" + var="$($AP_AWK "\$1 ~ /^$1$/ { print \$2 }" $TMPDIR/percona-toolkit-mysql-variables)" if [ "${var}" = "ON" ]; then echo "Enabled" elif [ "${var}" = "OFF" -o "${var}" = "0" -o -z "${var}" ]; then @@ -172,10 +210,10 @@ feat_on() { fi } -# gets a value from /tmp/percona-toolkit-mysql-status. Returns zero if it doesn't +# gets a value from $TMPDIR/percona-toolkit-mysql-status. Returns zero if it doesn't # exist. get_stat () { - v="$($AP_AWK "\$1 ~ /^$1$/ { print \$2 }" /tmp/percona-toolkit-mysql-status)" + v="$($AP_AWK "\$1 ~ /^$1$/ { print \$2 }" $TMPDIR/percona-toolkit-mysql-status)" echo "${v:-0}" } @@ -195,14 +233,17 @@ fuzzy_pct () { # Functions for parsing specific files and getting desired info from them. # These are called from within main() and are separated so they can be tested # easily. The calling convention is that the data they need to run is prepared -# first by putting it into /tmp/percona-toolkit. Then code that's testing just needs to -# put sample data into /tmp/percona-toolkit and call it. +# first by putting it into $TMPDIR/percona-toolkit. Then code that's testing +# just needs to put sample data into $TMPDIR/percona-toolkit and call it. # ############################################################################## # Parses the output of 'ps -e -o args | $AP_GREP mysqld' or 'ps auxww...' -# which should be in /tmp/percona-toolkit. +# which should be in $TMPDIR/percona-toolkit. parse_mysqld_instances () { local file=$1 + local socket=${socket:-""} + local port=${port:-""} + local datadir=${datadir:-""} echo " Port Data Directory Socket" echo " ===== ========================== ======" $AP_GREP '/mysqld ' $file | while read line; do @@ -224,11 +265,11 @@ parse_mysqld_instances () { } # Tries to find the my.cnf file by examining 'ps' output, which should be in -# /tmp/percona-toolkit. You have to specify the port for the instance you are +# $TMPDIR/percona-toolkit. You have to specify the port for the instance you are # interested in, in case there are multiple instances. find_my_cnf_file() { local file=$1 - local port=$2 + local port=${2:-""} if test -n "$port" && $AP_GREP -- "/mysqld.*--port=$port" $file >/dev/null 2>&1 ; then $AP_GREP -- "/mysqld.*--port=$port" $file \ | $AP_AWK 'BEGIN{RS=" "; FS="=";} $1 ~ /--defaults-file/ { print $2; }' \ @@ -240,7 +281,7 @@ find_my_cnf_file() { fi } -# Gets the MySQL system time. Uses input from /tmp/percona-toolkit-mysql-variables. +# Gets the MySQL system time. Uses input from $TMPDIR/percona-toolkit-mysql-variables. get_mysql_timezone () { tz="$(get_var time_zone)" if [ "${tz}" = "SYSTEM" ]; then @@ -249,14 +290,14 @@ get_mysql_timezone () { echo "${tz}" } -# Gets the MySQL system version. Uses input from /tmp/percona-toolkit-mysql-variables. +# Gets the MySQL system version. Uses input from $TMPDIR/percona-toolkit-mysql-variables. get_mysql_version () { name_val Version "$(get_var version) $(get_var version_comment)" name_val "Built On" "$(get_var version_compile_os) $(get_var version_compile_machine)" } # Gets the system start and uptime in human readable format. Last restart date -# should be in /tmp/percona-toolkit. +# should be in $TMPDIR/percona-toolkit. get_mysql_uptime () { local file=$1 restart="$(cat $file)" @@ -265,7 +306,7 @@ get_mysql_uptime () { echo "${restart} (up ${uptime})" } -# Summarizes the output of SHOW MASTER LOGS, which is in /tmp/percona-toolkit +# Summarizes the output of SHOW MASTER LOGS, which is in $TMPDIR/percona-toolkit summarize_binlogs () { local file=$1 name_val "Binlogs" $(wc -l $file) @@ -282,7 +323,7 @@ format_binlog_filters () { } # Takes as input a file that has two samples of SHOW STATUS, columnized next to -# each other. These should be in /tmp/percona-toolkit. Outputs fuzzy-ed numbers: +# each other. These should be in $TMPDIR/percona-toolkit. Outputs fuzzy-ed numbers: # absolute, all-time per second, and per-second over the interval between the # samples. Omits any rows that are all zeroes. format_status_variables () { @@ -387,7 +428,7 @@ summarize_processlist () { echo } -# Pretty-prints the my.cnf file, which should be in /tmp/percona-toolkit. It's super +# Pretty-prints the my.cnf file, which should be in $TMPDIR/percona-toolkit. It's super # annoying, but some *modern* versions of awk don't support POSIX character # sets in regular expressions, like [[:space:]] (looking at you, Debian). So # the below patterns contain [] and must remain that way. @@ -545,8 +586,8 @@ format_innodb_status () { name_val "Pending I/O Writes" "$(find_pending_io_writes "${file}")" name_val "Pending I/O Flushes" "$(find_pending_io_flushes "${file}")" $AP_AWK -F, '/^---TRANSACTION/{print $2}' "${file}" \ - | $AP_SED -e 's/ [0-9]* sec.*//' | sort | uniq -c > /tmp/percona-toolkit2 - name_val "Transaction States" "$(group_concat /tmp/percona-toolkit2)" + | $AP_SED -e 's/ [0-9]* sec.*//' | sort | uniq -c > $TMPDIR/percona-toolkit2 + name_val "Transaction States" "$(group_concat $TMPDIR/percona-toolkit2)" if $AP_GREP 'TABLE LOCK table' "${file}" >/dev/null ; then echo "Tables Locked" $AP_AWK '/^TABLE LOCK table/{print $4}' "${file}" \ @@ -633,9 +674,9 @@ format_overall_db_stats () { printf fmt, db, counts[db ",tables"], counts[db ",views"], counts[db ",sps"], counts[db ",trg"], counts[db ",func"], counts[db ",fk"], counts[db ",partn"]; } } - ' $file > /tmp/percona-toolkit - head -n2 /tmp/percona-toolkit - tail -n +3 /tmp/percona-toolkit | sort + ' $file > $TMPDIR/percona-toolkit + head -n2 $TMPDIR/percona-toolkit + tail -n +3 $TMPDIR/percona-toolkit | sort echo # Now do the summary of engines per DB @@ -693,9 +734,9 @@ format_overall_db_stats () { print ""; } } - ' $file > /tmp/percona-toolkit - head -n1 /tmp/percona-toolkit - tail -n +2 /tmp/percona-toolkit | sort + ' $file > $TMPDIR/percona-toolkit + head -n1 $TMPDIR/percona-toolkit + tail -n +2 $TMPDIR/percona-toolkit | sort echo # Now do the summary of index types per DB. Careful -- index is a reserved @@ -766,9 +807,9 @@ format_overall_db_stats () { print ""; } } - ' $file > /tmp/percona-toolkit - head -n1 /tmp/percona-toolkit - tail -n +2 /tmp/percona-toolkit | sort + ' $file > $TMPDIR/percona-toolkit + head -n1 $TMPDIR/percona-toolkit + tail -n +2 $TMPDIR/percona-toolkit | sort echo # Now do the summary of datatypes per DB @@ -857,10 +898,10 @@ format_overall_db_stats () { print ""; } } - ' $file > /tmp/percona-toolkit - hdr=$($AP_GREP -n Database /tmp/percona-toolkit | cut -d: -f1); - head -n${hdr} /tmp/percona-toolkit - tail -n +$((${hdr} + 1)) /tmp/percona-toolkit | sort + ' $file > $TMPDIR/percona-toolkit + hdr=$($AP_GREP -n Database $TMPDIR/percona-toolkit | cut -d: -f1); + head -n${hdr} $TMPDIR/percona-toolkit + tail -n +$((${hdr} + 1)) $TMPDIR/percona-toolkit | sort echo } @@ -878,6 +919,7 @@ main() { export PATH="/usr/gnu/bin/:/usr/xpg4/bin/:${PATH}" # Set up temporary files. + mk_tmpdir temp_files "rm" temp_files "touch" @@ -887,25 +929,26 @@ main() { section Percona_Toolkit_MySQL_Summary_Report name_val "System time" "`date -u +'%F %T UTC'` (local TZ: `date +'%Z %z'`)" section Instances - ps auxww 2>/dev/null | $AP_GREP mysqld > /tmp/percona-toolkit - parse_mysqld_instances /tmp/percona-toolkit + ps auxww 2>/dev/null | $AP_GREP mysqld > $TMPDIR/percona-toolkit + parse_mysqld_instances $TMPDIR/percona-toolkit # ######################################################################## # Fetch some basic info so we can start # ######################################################################## - mysql "$@" -ss -e 'SELECT CURRENT_USER()' > /tmp/percona-toolkit + mysql "$@" -ss -e 'SELECT CURRENT_USER()' > $TMPDIR/percona-toolkit if [ "$?" != "0" ]; then echo "Cannot connect to mysql, please specify command-line options." temp_files "rm" + rm_tmpdir exit 1 fi - user="$(cat /tmp/percona-toolkit)"; - mysql "$@" -ss -e 'SHOW /*!40100 GLOBAL*/ VARIABLES' > /tmp/percona-toolkit-mysql-variables - mysql "$@" -ss -e 'SHOW /*!50000 GLOBAL*/ STATUS' > /tmp/percona-toolkit-mysql-status - mysql "$@" -ss -e 'SHOW DATABASES' > /tmp/percona-toolkit-mysql-databases 2>/dev/null - mysql "$@" -ssE -e 'SHOW SLAVE STATUS' > /tmp/percona-toolkit-mysql-slave 2>/dev/null - mysql "$@" -ssE -e 'SHOW /*!50000 ENGINE*/ INNODB STATUS' > /tmp/percona-toolkit-innodb-status 2>/dev/null - mysql "$@" -ssE -e 'SHOW FULL PROCESSLIST' > /tmp/percona-toolkit-mysql-processlist 2>/dev/null + user="$(cat $TMPDIR/percona-toolkit)"; + mysql "$@" -ss -e 'SHOW /*!40100 GLOBAL*/ VARIABLES' > $TMPDIR/percona-toolkit-mysql-variables + mysql "$@" -ss -e 'SHOW /*!50000 GLOBAL*/ STATUS' > $TMPDIR/percona-toolkit-mysql-status + mysql "$@" -ss -e 'SHOW DATABASES' > $TMPDIR/percona-toolkit-mysql-databases 2>/dev/null + mysql "$@" -ssE -e 'SHOW SLAVE STATUS' > $TMPDIR/percona-toolkit-mysql-slave 2>/dev/null + mysql "$@" -ssE -e 'SHOW /*!50000 ENGINE*/ INNODB STATUS' > $TMPDIR/percona-toolkit-innodb-status 2>/dev/null + mysql "$@" -ssE -e 'SHOW FULL PROCESSLIST' > $TMPDIR/percona-toolkit-mysql-processlist 2>/dev/null now="$(mysql "$@" -ss -e 'SELECT NOW()')" port="$(get_var port)" @@ -920,16 +963,16 @@ main() { uptime="$(get_stat Uptime)" mysql "$@" -ss -e "SELECT LEFT(NOW() - INTERVAL ${uptime} SECOND, 16)" \ - > /tmp/percona-toolkit - name_val Started "$(get_mysql_uptime /tmp/percona-toolkit)" + > $TMPDIR/percona-toolkit + name_val Started "$(get_mysql_uptime $TMPDIR/percona-toolkit)" - name_val Databases "$($AP_GREP -c . /tmp/percona-toolkit-mysql-databases)" + name_val Databases "$($AP_GREP -c . $TMPDIR/percona-toolkit-mysql-databases)" name_val Datadir "$(get_var datadir)" procs="$(get_stat Threads_connected)" procr="$(get_stat Threads_running)" name_val Processes "$(fuzz ${procs}) connected, $(fuzz ${procr}) running" - if [ -s /tmp/percona-toolkit-mysql-slave ]; then slave=""; else slave="not "; fi - slavecount=$($AP_GREP -c 'Binlog Dump' /tmp/percona-toolkit-mysql-processlist) + if [ -s $TMPDIR/percona-toolkit-mysql-slave ]; then slave=""; else slave="not "; fi + slavecount=$($AP_GREP -c 'Binlog Dump' $TMPDIR/percona-toolkit-mysql-processlist) name_val Replication "Is ${slave}a slave, has ${slavecount} slaves connected" # TODO move this into a section with other files: error log, slow log and @@ -942,7 +985,7 @@ main() { # Processlist, sliced several different ways # ######################################################################## section Processlist - summarize_processlist /tmp/percona-toolkit-mysql-processlist + summarize_processlist $TMPDIR/percona-toolkit-mysql-processlist # ######################################################################## # Queries and query plans @@ -951,7 +994,7 @@ main() { sleep 10 # TODO: gather this data in the same format as normal: stats, TS line mysql "$@" -ss -e 'SHOW /*!50000 GLOBAL*/ STATUS' \ - | join /tmp/percona-toolkit-mysql-status - > /tmp/percona-toolkit + | join $TMPDIR/percona-toolkit-mysql-status - > $TMPDIR/percona-toolkit # Make a file with a list of things we want to omit because they aren't # counters, they are gauges (in RRDTool terminology). Gauges are shown # elsewhere in the output. @@ -975,9 +1018,9 @@ main() { Threads_cached Threads_connected Threads_running \ Uptime_since_flush_status; do - echo "${var}" >> /tmp/percona-toolkit-noncounters + echo "${var}" >> $TMPDIR/percona-toolkit-noncounters done - format_status_variables /tmp/percona-toolkit | $AP_GREP -v -f /tmp/percona-toolkit-noncounters + format_status_variables $TMPDIR/percona-toolkit | $AP_GREP -v -f $TMPDIR/percona-toolkit-noncounters # ######################################################################## # Table cache @@ -1054,22 +1097,22 @@ main() { trg_arg="${trg_arg} ${triggers}"; fi # Find out which databases to dump - num_dbs="$($AP_GREP -c . /tmp/percona-toolkit-mysql-databases)" + num_dbs="$($AP_GREP -c . $TMPDIR/percona-toolkit-mysql-databases)" echo "There are ${num_dbs} databases. Would you like to dump all, or just one?" echo -n "Type the name of the database, or press Enter to dump all of them. " read dbtodump mysqldump "$@" --no-data --skip-comments \ --skip-add-locks --skip-add-drop-table --compact \ --skip-lock-all-tables --skip-lock-tables --skip-set-charset \ - ${trg_arg} ${dbtodump:---all-databases} > /tmp/percona-toolkit-mysqldump + ${trg_arg} ${dbtodump:---all-databases} > $TMPDIR/percona-toolkit-mysqldump # Test the result by checking the file, not by the exit status, because we # might get partway through and then die, and the info is worth analyzing # anyway. - if $AP_GREP 'CREATE TABLE' /tmp/percona-toolkit-mysqldump >/dev/null 2>&1; then - format_overall_db_stats /tmp/percona-toolkit-mysqldump + if $AP_GREP 'CREATE TABLE' $TMPDIR/percona-toolkit-mysqldump >/dev/null 2>&1; then + format_overall_db_stats $TMPDIR/percona-toolkit-mysqldump else echo "Skipping schema analysis due to apparent error in dump file" - rm -f /tmp/percona-toolkit-mysqldump + rm -f $TMPDIR/percona-toolkit-mysqldump fi else echo "Skipping schema analysis" @@ -1079,23 +1122,23 @@ main() { # Noteworthy Technologies # ######################################################################## section Noteworthy_Technologies - if [ -e /tmp/percona-toolkit-mysqldump ]; then - if $AP_GREP FULLTEXT /tmp/percona-toolkit-mysqldump > /dev/null; then + if [ -e $TMPDIR/percona-toolkit-mysqldump ]; then + if $AP_GREP FULLTEXT $TMPDIR/percona-toolkit-mysqldump > /dev/null; then name_val "Full Text Indexing" Yes else name_val "Full Text Indexing" No fi - if $AP_GREP 'GEOMETRY\|POINT\|LINESTRING\|POLYGON' /tmp/percona-toolkit-mysqldump > /dev/null; then + if $AP_GREP 'GEOMETRY\|POINT\|LINESTRING\|POLYGON' $TMPDIR/percona-toolkit-mysqldump > /dev/null; then name_val "Geospatial Types" Yes else name_val "Geospatial Types" No fi - if $AP_GREP 'FOREIGN KEY' /tmp/percona-toolkit-mysqldump > /dev/null; then + if $AP_GREP 'FOREIGN KEY' $TMPDIR/percona-toolkit-mysqldump > /dev/null; then name_val "Foreign Keys" Yes else name_val "Foreign Keys" No fi - if $AP_GREP 'PARTITION BY' /tmp/percona-toolkit-mysqldump > /dev/null; then + if $AP_GREP 'PARTITION BY' $TMPDIR/percona-toolkit-mysqldump > /dev/null; then name_val "Partitioning" Yes else name_val "Partitioning" No @@ -1175,8 +1218,8 @@ main() { name_val "Adaptive Flushing" $(get_var innodb_adaptive_flushing) name_val "Adaptive Checkpoint" $(get_var innodb_adaptive_checkpoint) - if [ -s /tmp/percona-toolkit-innodb-status ]; then - format_innodb_status /tmp/percona-toolkit-innodb-status + if [ -s $TMPDIR/percona-toolkit-innodb-status ]; then + format_innodb_status $TMPDIR/percona-toolkit-innodb-status fi fi @@ -1211,15 +1254,15 @@ main() { section Binary_Logging binlog=$(get_var log_bin) if [ "${binlog}" ]; then - mysql "$@" -ss -e 'SHOW MASTER LOGS' > /tmp/percona-toolkit 2>/dev/null - summarize_binlogs /tmp/percona-toolkit + mysql "$@" -ss -e 'SHOW MASTER LOGS' > $TMPDIR/percona-toolkit 2>/dev/null + summarize_binlogs $TMPDIR/percona-toolkit format="$(get_var binlog_format)" name_val binlog_format "${format:-STATEMENT}" name_val expire_logs_days $(get_var expire_logs_days) name_val sync_binlog $(get_var sync_binlog) name_val server_id $(get_var server_id) - mysql "$@" -ss -e 'SHOW MASTER STATUS' > /tmp/percona-toolkit 2>/dev/null - format_binlog_filters /tmp/percona-toolkit + mysql "$@" -ss -e 'SHOW MASTER STATUS' > $TMPDIR/percona-toolkit 2>/dev/null + format_binlog_filters $TMPDIR/percona-toolkit fi # Replication: seconds behind, running, filters, skip_slave_start, skip_errors, @@ -1252,8 +1295,8 @@ main() { # If there is a my.cnf in a standard location, see if we can pretty-print it. # ######################################################################## section Configuration_File - ps auxww 2>/dev/null | $AP_GREP mysqld > /tmp/percona-toolkit - cnf_file=$(find_my_cnf_file /tmp/percona-toolkit ${port}); + ps auxww 2>/dev/null | $AP_GREP mysqld > $TMPDIR/percona-toolkit + cnf_file=$(find_my_cnf_file $TMPDIR/percona-toolkit ${port}); if [ ! -e "${cnf_file}" ]; then name_val "Config File" "Cannot autodetect, trying common locations" cnf_file="/etc/my.cnf"; @@ -1266,8 +1309,8 @@ main() { fi if [ -e "${cnf_file}" ]; then name_val "Config File" "${cnf_file}" - cat "${cnf_file}" > /tmp/percona-toolkit - pretty_print_cnf_file /tmp/percona-toolkit + cat "${cnf_file}" > $TMPDIR/percona-toolkit + pretty_print_cnf_file $TMPDIR/percona-toolkit else name_val "Config File" "Cannot autodetect or find, giving up" fi @@ -1276,6 +1319,8 @@ main() { # Make sure that we signal the end of the tool's output. section The_End + + rm_tmpdir } # Execute the program if it was not included from another file. This makes it @@ -1325,8 +1370,8 @@ See also L<"BUGS"> for more information on filing bugs and getting help. pt-mysql-summary works by connecting to a MySQL database server and querying it for status and configuration information. It saves these bits of data -into files in /tmp, and then formats them neatly with awk and other scripting -languages. +into files in a temporary directory, and then formats them neatly with awk +and other scripting languages. To use, simply execute it. Optionally add the same command-line options you would use to connect to MySQL, like C. diff --git a/bin/pt-sift b/bin/pt-sift index 79606be5..45f84d13 100755 --- a/bin/pt-sift +++ b/bin/pt-sift @@ -13,6 +13,47 @@ usage() { exit 1 } +# ########################################################################### +# tmpdir package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/tmpdir.sh +# t/lib/bash/tmpdir.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + +# pt-sift isn't ready for this yet. +#set -u + +TMPDIR="" + +mk_tmpdir() { + local dir=${1:-""} + + if [ -n "$dir" ]; then + if [ ! -d "$dir" ]; then + mkdir $dir || die "Cannot make tmpdir $dir" + fi + TMPDIR="$dir" + else + local tool=`basename $0` + local pid="$$" + TMPDIR=`mktemp -d /tmp/${tool}.${pid}.XXXXX` \ + || die "Cannot make secure tmpdir" + fi +} + +rm_tmpdir() { + if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then + rm -rf $TMPDIR + fi + TMPDIR="" +} + +# ########################################################################### +# End tmpdir package +# ########################################################################### + # Show current help and settings print_help() { cat <<-HELP @@ -72,19 +113,22 @@ main() { fi done + # Make a secure tmpdir. + mk_tmpdir + # We need to generate a list of timestamps, and ask the user to choose one if # there is no PREFIX yet. NOTE: we rely on the "-df" files here. - ls "${BASEDIR}" | grep -- '-df$' | cut -d- -f1 | sort > /tmp/pt-sift.prefixes + ls "${BASEDIR}" | grep -- '-df$' | cut -d- -f1 | sort > $TMPDIR/pt-sift.prefixes if [ -z "${PREFIX}" ]; then - if [ "$(grep -c . /tmp/pt-sift.prefixes)" = "1" ]; then + if [ "$(grep -c . $TMPDIR/pt-sift.prefixes)" = "1" ]; then # If there is only one sample, we use it as the prefix. - PREFIX="$(cat /tmp/pt-sift.prefixes)" + PREFIX="$(cat $TMPDIR/pt-sift.prefixes)" fi fi if [ -z "${PREFIX}" ]; then echo i=0 - cat /tmp/pt-sift.prefixes | while read line; do + cat $TMPDIR/pt-sift.prefixes | while read line; do i=$(($i + 1)) echo -n " $line" if [ "${i}" = "3" ]; then @@ -94,14 +138,14 @@ main() { done # We might have ended mid-line or we might have printed a newline; print a # newline if required to end the list of timestamp prefixes. - awk 'BEGIN { i = 0 } { i++ } END { if ( i % 3 != 0 ) { print "" } }' /tmp/pt-sift.prefixes + awk 'BEGIN { i = 0 } { i++ } END { if ( i % 3 != 0 ) { print "" } }' $TMPDIR/pt-sift.prefixes echo - while [ -z "${PREFIX}" -o "$(grep -c "${PREFIX}" /tmp/pt-sift.prefixes)" -ne 1 ]; do - DEFAULT="$(tail -1 /tmp/pt-sift.prefixes)" + while [ -z "${PREFIX}" -o "$(grep -c "${PREFIX}" $TMPDIR/pt-sift.prefixes)" -ne 1 ]; do + DEFAULT="$(tail -1 $TMPDIR/pt-sift.prefixes)" read -e -p "Select a timestamp from the list [${DEFAULT}] " ARG ARG="${ARG:-${DEFAULT}}" - if [ "$(grep -c "${ARG}" /tmp/pt-sift.prefixes)" -eq 1 ]; then - PREFIX="$(grep "${ARG}" /tmp/pt-sift.prefixes)" + if [ "$(grep -c "${ARG}" $TMPDIR/pt-sift.prefixes)" -eq 1 ]; then + PREFIX="$(grep "${ARG}" $TMPDIR/pt-sift.prefixes)" fi done fi @@ -113,7 +157,7 @@ main() { if [ "${ACTION}" != "INVALID" ]; then # Print the current host, timestamp and action. Figure out if we're at # the first or last sample, to make it easy to navigate. - PAGE="$(awk "/./{i++} /${PREFIX}/{c=i} END{print c, \"of\", i}" /tmp/pt-sift.prefixes)" + PAGE="$(awk "/./{i++} /${PREFIX}/{c=i} END{print c, \"of\", i}" $TMPDIR/pt-sift.prefixes)" HOST="$(cat "${BASEDIR}/${PREFIX}-hostname" 2>/dev/null)" echo -e "======== ${HOST:-unknown} at \033[34m${PREFIX} \033[31m${ACTION}\033[0m (${PAGE}) ========" fi @@ -421,7 +465,7 @@ main() { if ( printed == 0 ) { print \"${PREFIX}\"; } - }" /tmp/pt-sift.prefixes)" + }" $TMPDIR/pt-sift.prefixes)" ;; 1) ACTION="DEFAULT" @@ -458,6 +502,7 @@ main() { esac done + rm_tmpdir } # Execute the program if it was not included from another file. This makes it diff --git a/bin/pt-stalk b/bin/pt-stalk index 996d810c..9be53f7a 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -4,165 +4,1229 @@ # See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal # notices and disclaimers. -# ######################################################################## -# Check for the existence of a config file and source it if it exists -# ######################################################################## -if [ -f "${0}.conf" ]; then - . "${0}.conf" -fi +set -u -# ######################################################################## -# Configuration settings. -# ######################################################################## -# This is the max number of we want to tolerate. -THRESHOLD=${THRESHOLD:-100} +# ########################################################################### +# log_warn_die package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/log_warn_die.sh +# t/lib/bash/log_warn_die.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### -# This is the thing to check for. -VARIABLE=${VARIABLE:-Threads_connected} -# How many times must the condition be met before the script will fire? -CYCLES=${CYCLES:-1} +set -u -# Collect GDB stacktraces? -GDB=${GDB:-no} +EXIT_STATUS=0 -# Collect oprofile data? -OPROFILE=${OPROFILE:-yes} +log() { + TS=$(date +%F-%T | tr :- _); + echo "$TS $*" +} -# Collect strace data? -STRACE=${STRACE:-no} +warn() { + log "$*" >&2 + EXIT_STATUS=1 +} -# Collect tcpdump data? -TCPDUMP=${TCPDUMP:-yes} - -# Send mail to this list of addresses when the script triggers. -# EMAIL= - -# Any options to pass to mysql/mysqladmin, such as -u, -p, etc -# MYSQLOPTIONS="" - -# This is the interval between checks. -INTERVAL=${INTERVAL:-30} - -# If the command you're running to detect the condition is allowed to return -# nothing (e.g. a grep line that might not even exist if there's no problem), -# then set this to "yes". -MAYBE_EMPTY=${MAYBE_EMPTY:-no} - -# This is the location of the 'collect' script. -if [ -z "${COLLECT}" ]; then - COLLECT="${HOME}/bin/pt-collect"; -fi - -# This is where to store the collected data. -if [ -z "${DEST}" ]; then - DEST="${HOME}/collected/" -fi - -# How long to collect statistics data for? Make sure that this isn't longer -# than SLEEP. -DURATION=${DURATION:-30} - -# How long to sleep after collecting? -if [ -z "${SLEEP}" ]; then - SLEEP=$(($DURATION * 10)) -fi - -# Bail out if the disk is more than this %full. -PCT_THRESHOLD=${PCT_THRESHOLD:-95} - -# Bail out if the disk has less than this many MB free. -MB_THRESHOLD=${MB_THRESHOLD:-100} - -# Remove samples after this many days. -PURGE=${PURGE:-30} - -# ######################################################################## -# End configuration -# ######################################################################## - -# ######################################################################## -# Echo to STDERR and exit false. -# ######################################################################## die() { - echo "${1}" >&2 + warn "$*" exit 1 } -# ######################################################################## -# Echo to STDERR and possibly email. -# ######################################################################## -log() { - if [ "${EMAIL}" ]; then - echo "${1} on $(hostname)" | mail -s "${2} on $(hostname)" ${EMAIL} - fi - echo "${1}" >&2 +# ########################################################################### +# End log_warn_die package +# ########################################################################### + +# ########################################################################### +# parse_options package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/parse_options.sh +# t/lib/bash/parse_options.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + + + + + +set -u + +ARGV="" # Non-option args (probably input files) +EXT_ARGV="" # Everything after -- (args for an external command) +HAVE_EXT_ARGV="" # Got --, everything else is put into EXT_ARGV +OPT_ERRS=0 # How many command line option errors +OPT_VERSION="" # If --version was specified +OPT_HELP="" # If --help was specified +PO_DIR="" # Directory with program option spec files + +usage() { + local file="$1" + + local usage=$(grep '^Usage: ' "$file") + echo $usage + echo + echo "For more information, 'man $TOOL' or 'perldoc $file'." } -# Make sure pt-collect is executable before starting. -if [ ! -x $COLLECT ]; then - die "$COLLECT not found or not executable" -fi +usage_or_errors() { + local file="$1" -# Make the collection location -mkdir -p "${DEST}" || die "Can't make the destination directory" -test -d "${DEST}" || die "${DEST} isn't a directory" -test -w "${DEST}" || die "${DEST} isn't writable" - -# Test if we have root; warn if not, but it isn't critical. -if [ "$(id -u)" != "0" ]; then - echo 'Not running with root privileges!'; -fi - -# We increment this variable every time that the check is true, and set it to 0 -# if it's false. -cycles_true=0; - -while true; do - d=$(date +%F-%T | tr :- _); - - # XXX This is where we decide whether to execute 'collect'. - # XXX Customize this if needed. The idea is to generate a number and store - # XXX it into $detected, and if $detected > $THRESHOLD, then we'll execute - # XXX the collection process. - detected=$(mysqladmin ext ${MYSQLOPTIONS} | grep ${VARIABLE} | awk '{print $4}'); - if [ -z "${detected}" -a ${MAYBE_EMPTY} = "no" ]; then - # Oops, couldn't connect, maybe max_connections problem? - echo "$d The detected value is empty; something failed? Exit status is $?" - matched="yes" - cycles_true=$(($cycles_true + 1)) - elif [ "${detected:-0}" -gt ${THRESHOLD} ]; then - matched="yes" - cycles_true=$(($cycles_true + 1)) - else - matched="no" - cycles_true=0 + if [ "$OPT_VERSION" ]; then + local version=$(grep '^pt-[^ ]\+ [0-9]' "$file") + echo "$version" + return 1 fi - # XXX Stop customizing here; everything above should be what you need. - - NOTE="$d check results: ${VARIABLE} = ${detected}, matched = ${matched}, cycles_true = ${cycles_true}" - # Actually execute the collection script. - if [ "${matched:-no}" = "yes" -a ${cycles_true} -ge ${CYCLES} ]; then - - log "${NOTE}" "${COLLECT} triggered" - PREFIX="$(date +%F-%T | tr :- _)" - echo "${NOTE}" > "${DEST}/${PREFIX}-trigger" - ${COLLECT} -d "${DEST}" -i "${DURATION}" -g "${GDB}" -o "${OPROFILE}" -p "${PREFIX}" -s "${STRACE}" -t "${TCPDUMP}" -f "${PCT_THRESHOLD}" -m "${MB_THRESHOLD}" -- ${MYSQLOPTIONS} - echo "$d sleeping ${SLEEP} seconds to avoid DOS attack" - sleep ${SLEEP} - else - echo ${NOTE} - sleep ${INTERVAL} + if [ "$OPT_HELP" ]; then + usage "$file" + echo + echo "Command line options:" + echo + perl -e ' + use strict; + use warnings FATAL => qw(all); + my $lcol = 20; # Allow this much space for option names. + my $rcol = 80 - $lcol; # The terminal is assumed to be 80 chars wide. + my $name; + while ( <> ) { + my $line = $_; + chomp $line; + if ( $line =~ s/^long:/ --/ ) { + $name = $line; + } + elsif ( $line =~ s/^desc:// ) { + $line =~ s/ +$//mg; + my @lines = grep { $_ } + $line =~ m/(.{0,$rcol})(?:\s+|\Z)/g; + if ( length($name) >= $lcol ) { + print $name, "\n", (q{ } x $lcol); + } + else { + printf "%-${lcol}s", $name; + } + print join("\n" . (q{ } x $lcol), @lines); + print "\n"; + } + } + ' "$PO_DIR"/* + echo + echo "Options and values after processing arguments:" + echo + for opt in $(ls "$PO_DIR"); do + local varname="OPT_$(echo "$opt" | tr a-z- A-Z_)" + local varvalue="${!varname}" + printf -- " --%-30s %s" "$opt" "${varvalue:-(No value)}" + echo + done + return 1 fi + if [ $OPT_ERRS -gt 0 ]; then + echo + usage "$file" + return 1 + fi - # Delete things more than $PURGE days old - find "${DEST}" -type f -mtime +${PURGE} -exec rm -f '{}' \; - find "/var/lib/oprofile/samples" -type d -name 'pt_collect_*' \ - -depth -mtime +${PURGE} -exec rm -f '{}' \; + return 0 +} -done +option_error() { + local err="$1" + OPT_ERRS=$(($OPT_ERRS + 1)) + echo "$err" >&2 +} + +parse_options() { + local file="$1" + shift + + ARGV="" + EXT_ARGV="" + HAVE_EXT_ARGV="" + OPT_ERRS=0 + OPT_VERSION="" + OPT_HELP="" + PO_DIR="$TMPDIR/po" + + if [ ! -d "$PO_DIR" ]; then + mkdir "$PO_DIR" + if [ $? -ne 0 ]; then + echo "Cannot mkdir $PO_DIR" >&2 + exit 1 + fi + fi + + rm -rf "$PO_DIR"/* + if [ $? -ne 0 ]; then + echo "Cannot rm -rf $PO_DIR/*" >&2 + exit 1 + fi + + _parse_pod "$file" # Parse POD into program option (po) spec files + _eval_po # Eval po into existence with default values + + if [ $# -ge 2 ] && [ "$1" = "--config" ]; then + shift # --config + local user_config_files="$1" + shift # that ^ + local IFS="," + for user_config_file in $user_config_files; do + _parse_config_files "$user_config_file" + done + else + _parse_config_files "/etc/percona-toolkit/percona-toolkit.conf" "/etc/percona-toolkit/$TOOL.conf" "$HOME/.percona-toolkit.conf" "$HOME/.$TOOL.conf" + fi + + _parse_command_line "$@" +} + +_parse_pod() { + local file="$1" + + cat "$file" | PO_DIR="$PO_DIR" perl -ne ' + BEGIN { $/ = ""; } + next unless $_ =~ m/^=head1 OPTIONS/; + while ( defined(my $para = <>) ) { + last if $para =~ m/^=head1/; + chomp; + if ( $para =~ m/^=item --(\S+)/ ) { + my $opt = $1; + my $file = "$ENV{PO_DIR}/$opt"; + open my $opt_fh, ">", $file or die "Cannot open $file: $!"; + print $opt_fh "long:$opt\n"; + $para = <>; + chomp; + if ( $para =~ m/^[a-z ]+:/ ) { + map { + chomp; + my ($attrib, $val) = split(/: /, $_); + print $opt_fh "$attrib:$val\n"; + } split(/; /, $para); + $para = <>; + chomp; + } + my ($desc) = $para =~ m/^([^?.]+)/; + print $opt_fh "desc:$desc.\n"; + close $opt_fh; + } + } + last; + ' +} + +_eval_po() { + local IFS=":" + for opt_spec in "$PO_DIR"/*; do + local opt="" + local default_val="" + local neg=0 + local size=0 + while read key val; do + case "$key" in + long) + opt=$(echo $val | sed 's/-/_/g' | tr [:lower:] [:upper:]) + ;; + default) + default_val="$val" + ;; + "short form") + ;; + type) + [ "$val" = "size" ] && size=1 + ;; + desc) + ;; + negatable) + if [ "$val" = "yes" ]; then + neg=1 + fi + ;; + *) + echo "Invalid attribute in $opt_spec: $line" >&2 + exit 1 + esac + done < "$opt_spec" + + if [ -z "$opt" ]; then + echo "No long attribute in option spec $opt_spec" >&2 + exit 1 + fi + + if [ $neg -eq 1 ]; then + if [ -z "$default_val" ] || [ "$default_val" != "yes" ]; then + echo "Option $opt_spec is negatable but not default: yes" >&2 + exit 1 + fi + fi + + if [ $size -eq 1 -a -n "$default_val" ]; then + default_val=$(size_to_bytes $default_val) + fi + + eval "OPT_${opt}"="$default_val" + done +} + +_parse_config_files() { + + for config_file in "$@"; do + test -f "$config_file" || continue + + while read config_opt; do + + echo "$config_opt" | grep '^[ ]*[^#]' >/dev/null 2>&1 || continue + + config_opt="$(echo "$config_opt" | sed -e 's/^ *//g' -e 's/ *$//g' -e 's/[ ]*=[ ]*/=/' -e 's/[ ]*#.*$//')" + + [ "$config_opt" = "" ] && continue + + if ! [ "$HAVE_EXT_ARGV" ]; then + config_opt="--$config_opt" + fi + + _parse_command_line "$config_opt" + + done < "$config_file" + + HAVE_EXT_ARGV="" # reset for each file + + done +} + +_parse_command_line() { + local opt="" + local val="" + local next_opt_is_val="" + local opt_is_ok="" + local opt_is_negated="" + local real_opt="" + local required_arg="" + local spec="" + + for opt in "$@"; do + if [ "$opt" = "--" -o "$opt" = "----" ]; then + HAVE_EXT_ARGV=1 + continue + fi + if [ "$HAVE_EXT_ARGV" ]; then + if [ "$EXT_ARGV" ]; then + EXT_ARGV="$EXT_ARGV $opt" + else + EXT_ARGV="$opt" + fi + continue + fi + + if [ "$next_opt_is_val" ]; then + next_opt_is_val="" + if [ $# -eq 0 ] || [ $(expr "$opt" : "-") -eq 1 ]; then + option_error "$real_opt requires a $required_arg argument" + continue + fi + val="$opt" + opt_is_ok=1 + else + if [ $(expr "$opt" : "-") -eq 0 ]; then + if [ -z "$ARGV" ]; then + ARGV="$opt" + else + ARGV="$ARGV $opt" + fi + continue + fi + + real_opt="$opt" + + if $(echo $opt | grep '^--no-' >/dev/null); then + opt_is_negated=1 + opt=$(echo $opt | sed 's/^--no-//') + else + opt_is_negated="" + opt=$(echo $opt | sed 's/^-*//') + fi + + if $(echo $opt | grep '^[a-z-][a-z-]*=' >/dev/null 2>&1); then + val="$(echo $opt | awk -F= '{print $2}')" + opt="$(echo $opt | awk -F= '{print $1}')" + fi + + if [ -f "$TMPDIR/po/$opt" ]; then + spec="$TMPDIR/po/$opt" + else + spec=$(grep "^short form:-$opt\$" "$TMPDIR"/po/* | cut -d ':' -f 1) + if [ -z "$spec" ]; then + option_error "Unknown option: $real_opt" + continue + fi + fi + + required_arg=$(cat "$spec" | awk -F: '/^type:/{print $2}') + if [ "$required_arg" ]; then + if [ "$val" ]; then + opt_is_ok=1 + else + next_opt_is_val=1 + fi + else + if [ "$val" ]; then + option_error "Option $real_opt does not take a value" + continue + fi + if [ "$opt_is_negated" ]; then + val="" + else + val="yes" + fi + opt_is_ok=1 + fi + fi + + if [ "$opt_is_ok" ]; then + opt=$(cat "$spec" | grep '^long:' | cut -d':' -f2 | sed 's/-/_/g' | tr [:lower:] [:upper:]) + + if grep "^type:size" "$spec" >/dev/null; then + val=$(size_to_bytes $val) + fi + + eval "OPT_$opt"="'$val'" + + opt="" + val="" + next_opt_is_val="" + opt_is_ok="" + opt_is_negated="" + real_opt="" + required_arg="" + spec="" + fi + done +} + +size_to_bytes() { + local size="$1" + echo $size | perl -ne '%f=(B=>1, K=>1_024, M=>1_048_576, G=>1_073_741_824, T=>1_099_511_627_776); m/^(\d+)([kMGT])?/i; print $1 * $f{uc($2 || "B")};' +} + +# ########################################################################### +# End parse_options package +# ########################################################################### + +# ########################################################################### +# tmpdir package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/tmpdir.sh +# t/lib/bash/tmpdir.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + + +set -u + +TMPDIR="" + +mk_tmpdir() { + local dir="${1:-""}" + + if [ -n "$dir" ]; then + if [ ! -d "$dir" ]; then + mkdir "$dir" || die "Cannot make tmpdir $dir" + fi + TMPDIR="$dir" + else + local tool="${0##*/}" + local pid="$$" + TMPDIR=`mktemp -d /tmp/${tool}.${pid}.XXXXX` \ + || die "Cannot make secure tmpdir" + fi +} + +rm_tmpdir() { + if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then + rm -rf "$TMPDIR" + fi + TMPDIR="" +} + +# ########################################################################### +# End tmpdir package +# ########################################################################### + +# ########################################################################### +# alt_cmds package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/alt_cmds.sh +# t/lib/bash/alt_cmds.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + + +set -u + +CMD_PIDOF="$(which pidof)" +CMD_PGREP="$(which pgrep)" + +_seq() { + local i="$1" + awk "BEGIN { for(i=1; i<=$i; i++) print i; }" +} + +_pidof() { + local cmd="$1" + if ! pidof "$cmd" 2>/dev/null; then + ps -eo pid,ucomm | awk -v comm="$cmd" '$2 == comm { print $1 }' + fi +} + +_lsof() { + local pid="$1" + if ! lsof -p $pid 2>/dev/null; then + /bin/ls -l /proc/$pid/fd 2>/dev/null + fi +} + +# ########################################################################### +# End alt_cmds package +# ########################################################################### + +# ########################################################################### +# safeguards package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/safeguards.sh +# t/lib/bash/safeguards.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + + +set -u + +disk_space() { + local filesystem="${1:-$PWD}" + df -P -k "$filesystem" +} + +check_disk_space() { + local file="$1" + local min_free_bytes="${2:-0}" + local min_free_pct="${3:-0}" + local bytes_margin="${4:-0}" + + local used_bytes=$(cat "$file" | awk '/^\//{print $3 * 1024}'); + local free_bytes=$(cat "$file" | awk '/^\//{print $4 * 1024}'); + local pct_used=$(cat "$file" | awk '/^\//{print $5}' | sed -e 's/%//g'); + local pct_free=$((100 - $pct_used)) + + local real_free_bytes=$free_bytes + local real_pct_free=$pct_free + + if [ $bytes_margin -gt 0 ]; then + used_bytes=$(($used_bytes + $bytes_margin)) + free_bytes=$(($free_bytes - $bytes_margin)) + pct_used=$(awk "BEGIN { printf(\"%d\", ($used_bytes/($used_bytes + $free_bytes)) * 100) }") + + pct_free=$((100 - $pct_used)) + fi + + if [ $free_bytes -lt $min_free_bytes -o $pct_free -lt $min_free_pct ]; then + warn "Not enough free disk space: + Limit: ${min_free_pct}% free, ${min_free_bytes} bytes free + Actual: ${real_pct_free}% free, ${real_free_bytes} bytes free (- $bytes_margin bytes margin) +" + cat "$file" >&2 + + return 1 # not enough disk space + fi + + return 0 # disk space is OK +} + +# ########################################################################### +# End safeguards package +# ########################################################################### + +# ########################################################################### +# daemon package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/daemon.sh +# t/lib/bash/daemon.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + + +set -u + +make_pid_file() { + local file="$1" + local pid="$2" + + + if [ -f "$file" ]; then + local old_pid=$(cat "$file") + if [ -z "$old_pid" ]; then + die "PID file $file already exists but it is empty" + else + kill -0 $old_pid 2>/dev/null + if [ $? -eq 0 ]; then + die "PID file $file already exists and its PID ($old_pid) is running" + else + echo "Overwriting PID file $file because its PID ($old_pid)" \ + "is not running" + fi + fi + fi + + echo "$pid" > "$file" + if [ $? -ne 0 ]; then + die "Cannot create or write PID file $file" + fi +} + +remove_pid_file() { + local file="$1" + if [ -f "$file" ]; then + rm "$file" + fi +} + +# ########################################################################### +# End daemon package +# ########################################################################### + +# ########################################################################### +# collect package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/collect.sh +# t/lib/bash/collect.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + + +set -u + +CMD_GDB="$(which gdb)" +CMD_IOSTAT="$(which iostat)" +CMD_MPSTAT="$(which mpstat)" +CMD_MYSQL="$(which mysql)" +CMD_MYSQLADMIN="$(which mysqladmin)" +CMD_OPCONTROL="$(which opcontrol)" +CMD_OPREPORT="$(which opreport)" +CMD_PMAP="$(which pmap)" +CMD_STRACE="$(which strace)" +CMD_SYSCTL="$(which sysctl)" +CMD_TCPDUMP="$(which tcpdump)" +CMD_VMSTAT="$(which vmstat)" + +[ -z "$CMD_SYSCTL" -a -x "/sbin/sysctl" ] && CMD_SYSCTL="/sbin/sysctl" + +collect() { + local d="$1" # directory to save results in + local p="$2" # prefix for each result file + + local mysqld_pid=$(_pidof mysqld | head -n1) + + if [ "$CMD_PMAP" -a "$mysqld_pid" ]; then + if $CMD_PMAP --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then + $CMD_PMAP -x $mysqld_pid > "$d/$p-pmap" + else + $CMD_PMAP $mysqld_pid > "$d/$p-pmap" + fi + fi + + if [ "$CMD_GDB" -a "$OPT_COLLECT_GDB" -a "$mysqld_pid" ]; then + $CMD_GDB \ + -ex "set pagination 0" \ + -ex "thread apply all bt" \ + --batch -p $mysqld_pid \ + >> "$d/$p-stacktrace" + fi + + $CMD_MYSQL $EXT_ARGV -e 'SHOW GLOBAL VARIABLES' >> "$d/$p-variables" & + sleep .2 + + local mysql_version="$(awk '/^version[^_]/{print substr($2,1,3)}' "$d/$p-variables")" + + local mysql_error_log="$(awk '/log_error/{print $2}' "$d/$p-variables")" + if [ -z "$mysql_error_log" -a "$mysqld_pid" ]; then + mysql_error_log="$(ls -l /proc/$mysqld_pid/fd | awk '/ 2 ->/{print $NF}')" + fi + + local tail_error_log_pid="" + if [ "$mysql_error_log" ]; then + log "The MySQL error log seems to be $mysql_error_log" + tail -f "$mysql_error_log" >"$d/$p-log_error" & + tail_error_log_pid=$! + + $CMD_MYSQLADMIN $EXT_ARGV debug + else + log "Could not find the MySQL error log" + fi + + local innostat="SHOW /*!40100 ENGINE*/ INNODB STATUS\G" + if [ "${mysql_version}" '>' "5.1" ]; then + local mutex="SHOW ENGINE INNODB MUTEX" + else + local mutex="SHOW MUTEX STATUS" + fi + $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus1" & + $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status1" & + open_tables >> "$d/$p-opentables1" & + + local tcpdump_pid="" + if [ "$CMD_TCPDUMP" -a "$OPT_COLLECT_TCPDUMP" ]; then + local port=$(awk '/^port/{print $2}' "$d/$p-variables") + if [ "$port" ]; then + $CMD_TCPDUMP -i any -s 4096 -w "$d/$p-tcpdump" port ${port} & + tcpdump_pid=$! + fi + fi + + local have_oprofile="" + if [ "$CMD_OPCONTROL" -a "$OPT_COLLECT_OPROFILE" ]; then + if $CMD_OPCONTROL --init; then + $CMD_OPCONTROL --start --no-vmlinux + have_oprofile="yes" + fi + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" -a "$mysqld_pid" ]; then + $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" & + local strace_pid=$! + fi + + ps -eaf >> "$d/$p-ps" & + top -bn1 >> "$d/$p-top" & + + [ "$mysqld_pid" ] && _lsof $mysqld_pid >> "$d/$p-lsof" & + + if [ "$CMD_SYSCTL" ]; then + $CMD_SYSCTL -a >> "$d/$p-sysctl" & + fi + if [ "$CMD_VMSTAT" ]; then + $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" & + $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" & + fi + if [ "$CMD_IOSTAT" ]; then + $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" & + $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" & + fi + if [ "$CMD_MPSTAT" ]; then + $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" & + $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" & + fi + + $CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" & + local mysqladmin_pid=$! + + local have_lock_waits_table="" + $CMD_MYSQL $EXT_ARGV -e "SHOW TABLES FROM INFORMATION_SCHEMA" \ + | grep -i "INNODB_LOCK_WAITS" >/dev/null 2>&1 + if [ $? -eq 0 ]; then + have_lock_waits_table="yes" + fi + + log "Loop start: $(date +'TS %s.%N %F %T')" + for loopno in $(_seq $OPT_RUN_TIME); do + disk_space $d > $d/$p-disk-space + check_disk_space \ + $d/$p-disk-space \ + "$OPT_DISK_BYTES_FREE" \ + "$OPT_DISK_PCT_FREE" \ + || break + + sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}') + local ts="$(date +"TS %s.%N %F %T")" + + + if [ -d "/proc" ]; then + if [ -f "/proc/diskstats" ]; then + (echo $ts; cat /proc/diskstats) >> "$d/$p-diskstats" & + fi + if [ -f "/proc/stat" ]; then + (echo $ts; cat /proc/stat) >> "$d/$p-procstat" & + fi + if [ -f "/proc/vmstat" ]; then + (echo $ts; cat /proc/vmstat) >> "$d/$p-procvmstat" & + fi + if [ -f "/proc/meminfo" ]; then + (echo $ts; cat /proc/meminfo) >> "$d/$p-meminfo" & + fi + if [ -f "/proc/slabinfo" ]; then + (echo $ts; cat /proc/slabinfo) >> "$d/$p-slabinfo" & + fi + if [ -f "/proc/interrupts" ]; then + (echo $ts; cat /proc/interrupts) >> "$d/$p-interrupts" & + fi + fi + + (echo $ts; df -h) >> "$d/$p-df" & + + (echo $ts; netstat -antp) >> "$d/$p-netstat" & + (echo $ts; netstat -s) >> "$d/$p-netstat_s" & + + (echo $ts; $CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G") \ + >> "$d/$p-processlist" & + + if [ "$have_lock_waits_table" ]; then + (echo $ts; lock_waits) >>"$d/$p-lock-waits" & + fi + done + log "Loop end: $(date +'TS %s.%N %F %T')" + + if [ "$have_oprofile" ]; then + $CMD_OPCONTROL --stop + $CMD_OPCONTROL --dump + + local oprofiled_pid=$(_pidof oprofiled) + if [ "$oprofiled_pid" ]; then + kill $oprofiled_pid + else + warn "Cannot kill oprofiled because its PID cannot be determined" + fi + + $CMD_OPCONTROL --save=pt_collect_$p + + local mysqld_path=$(which mysqld); + if [ "$mysqld_path" -a -f "$mysqld_path" ]; then + $CMD_OPREPORT \ + --demangle=smart \ + --symbols \ + --merge tgid \ + session:pt_collect_$p \ + "$mysqld_path" \ + > "$d/$p-opreport" + else + log "oprofile data saved to pt_collect_$p; you should be able" \ + "to get a report by running something like 'opreport" \ + "--demangle=smart --symbols --merge tgid session:pt_collect_$p" \ + "/path/to/mysqld'" \ + > "$d/$p-opreport" + fi + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" ]; then + kill -s 2 $strace_pid + sleep 1 + kill -s 15 $strace_pid + [ "$mysqld_pid" ] && kill -s 18 $mysqld_pid + fi + + $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus2" & + $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status2" & + open_tables >> "$d/$p-opentables2" & + + kill $mysqladmin_pid + [ "$tail_error_log_pid" ] && kill $tail_error_log_pid + [ "$tcpdump_pid" ] && kill $tcpdump_pid + + hostname > "$d/$p-hostname" + + for file in "$d/$p-"*; do + if [ -z "$(grep -v '^TS ' --max-count 1 "$file")" ]; then + log "Removing empty file $file"; + rm "$file" + fi + done +} + +open_tables() { + local open_tables=$($CMD_MYSQLADMIN $EXT_ARGV ext | grep "Open_tables" | awk '{print $4}') + if [ -n "$open_tables" -a $open_tables -le 1000 ]; then + $CMD_MYSQL $EXT_ARGV -e 'SHOW OPEN TABLES' & + else + log "Too many open tables: $open_tables" + fi +} + +lock_waits() { + local sql1="SELECT + CONCAT('thread ', b.trx_mysql_thread_id, ' from ', p.host) AS who_blocks, + IF(p.command = \"Sleep\", p.time, 0) AS idle_in_trx, + MAX(TIMESTAMPDIFF(SECOND, r.trx_wait_started, CURRENT_TIMESTAMP)) AS max_wait_time, + COUNT(*) AS num_waiters + FROM INFORMATION_SCHEMA.INNODB_LOCK_WAITS AS w + INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS b ON b.trx_id = w.blocking_trx_id + INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS r ON r.trx_id = w.requesting_trx_id + LEFT JOIN INFORMATION_SCHEMA.PROCESSLIST AS p ON p.id = b.trx_mysql_thread_id + GROUP BY who_blocks ORDER BY num_waiters DESC\G" + $CMD_MYSQL $EXT_ARGV -e "$sql1" + + local sql2="SELECT + r.trx_id AS waiting_trx_id, + r.trx_mysql_thread_id AS waiting_thread, + TIMESTAMPDIFF(SECOND, r.trx_wait_started, CURRENT_TIMESTAMP) AS wait_time, + r.trx_query AS waiting_query, + l.lock_table AS waiting_table_lock, + b.trx_id AS blocking_trx_id, b.trx_mysql_thread_id AS blocking_thread, + SUBSTRING(p.host, 1, INSTR(p.host, ':') - 1) AS blocking_host, + SUBSTRING(p.host, INSTR(p.host, ':') +1) AS blocking_port, + IF(p.command = \"Sleep\", p.time, 0) AS idle_in_trx, + b.trx_query AS blocking_query + FROM INFORMATION_SCHEMA.INNODB_LOCK_WAITS AS w + INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS b ON b.trx_id = w.blocking_trx_id + INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS r ON r.trx_id = w.requesting_trx_id + INNER JOIN INFORMATION_SCHEMA.INNODB_LOCKS AS l ON w.requested_lock_id = l.lock_id + LEFT JOIN INFORMATION_SCHEMA.PROCESSLIST AS p ON p.id = b.trx_mysql_thread_id + ORDER BY wait_time DESC\G" + $CMD_MYSQL $EXT_ARGV -e "$sql2" +} + +# ########################################################################### +# End collect package +# ########################################################################### + +# ########################################################################### +# Global variables +# ########################################################################### +TRIGGER_FUNCTION="" +RAN_WITH="" +EXIT_REASON="" +TOOL="pt-stalk" +OKTORUN=1 +ITER=1 + +# ########################################################################### +# Subroutines +# ########################################################################### + +grep_processlist() { + local file="$1" + local col="$2" + local pat="${3:-""}" + local gt="${4:-0}" + local quiet="${5:-0}" + + awk " + BEGIN { + FS=\"|\" + OFS=\" | \" + n_cols=0 + found=0 + } + + /^\|/ { + if ( n_cols ) { + val=colno_for_name[\"$col\"] + if ((\"$pat\" && match(\$val, \"$pat\")) || ($gt && \$val > $gt) ) { + found++ + if (!$quiet) print \$0 + } + } + else { + for (i = 1; i <= NF; i++) { + gsub(/^[ ]*/, \"\", \$i) + gsub(/[ ]*$/, \"\", \$i) + if ( \$i != \"\" ) { + name_for_colno[i]=\$i + colno_for_name[\$i]=i + n_cols++ + } + } + } + } + + END { + if ( found ) + exit 0 + exit 1 + } + " "$file" +} + +set_trg_func() { + local func="$1" + if [ -f "$func" ]; then + # Trigger function is a file with Bash code; source it. + . "$func" + TRIGGER_FUNCTION="trg_plugin" + return 0 # success + else + # Trigger function is name of a built-in function. + func=$(echo "$func" | tr [:upper:] [:lower:]) + if [ "$func" = "status" -o "$func" = "processlist" ]; then + TRIGGER_FUNCTION="trg_$func" + return 0 # success + fi + fi + return 1 # error +} + +trg_status() { + local var="$1" + mysqladmin $EXT_ARGV extended-status \ + | grep "$OPT_VARIABLE " \ + | awk '{print $4}' +} + +trg_processlist() { + local var="$1" + local tmpfile="$TMPDIR/processlist" + mysqladmin $EXT_ARGV processlist > "$tmpfile-1" + grep_processlist "$tmpfile-1" "$var" "$OPT_MATCH" 0 0 > "$tmpfile-2" + wc -l "$tmpfile-2" | awk '{print $1}' + rm -f "$tmpfile"* +} + +oktorun() { + if [ $OKTORUN -eq 0 ]; then + EXIT_REASON="OKTORUN is false" + return 1 # stop running + fi + + if [ -n "$OPT_ITERATIONS" ] && [ $ITER -gt $OPT_ITERATIONS ]; then + EXIT_REASON="no more iterations" + return 1 # stop running + fi + + return 0 # continue running +} + +sleep_ok() { + local seconds="$1" + local msg="${2:-""}" + if oktorun; then + if [ -n "$msg" ]; then + log "$msg" + fi + sleep $seconds + fi +} + +purge_samples() { + local dir="$1" + local retention_time="$2" + + # Delete collect files which more than --retention-time days old. + find "$dir" -type f -mtime +$retention_time -exec rm -f '{}' \; + + local oprofile_dir="/var/lib/oprofile/samples" + if [ -d "$oprofile_dir" ]; then + # "pt_collect_" here needs to match $CMD_OPCONTROL --save=pt_collect_$p + # in collect(). TODO: fix this + find "$oprofile_dir" -type d -name 'pt_collect_*' \ + -depth -mtime +$retention_time -exec rm -rf '{}' \; + fi +} + +sigtrap() { + if [ $OKTORUN -eq 1 ]; then + warn "Caught signal, exiting" + OKTORUN=0 + else + warn "Caught signal again, forcing exit" + exit $EXIT_STATUS + fi +} + +stalk() { + local cycles_true=0 # increment each time check is true, else set to 0 + local matched="" # set to "yes" when check is true + local last_prefix="" # prefix of last collection + + while oktorun; do + # Run the trigger which returns the value of whatever is being + # checked. When the value is > --threshold for at least --cycle + # consecutive times, start collecting. + local value=$($TRIGGER_FUNCTION $OPT_VARIABLE) + local trg_exit_status=$? + + if [ -z "$value" ]; then + # No value. Maybe we failed to connect to MySQL? + warn "Detected value is empty; something failed? Trigger exit status: $trg_exit_status" + matched="" + cycles_true=0 + elif [ $value -gt $OPT_THRESHOLD ]; then + matched="yes" + cycles_true=$(($cycles_true + 1)) + else + matched="" + cycles_true=0 + fi + + local msg="Check results: $OPT_VARIABLE=$value, matched=${matched:-no}, cycles_true=$cycles_true" + log "$msg" + + if [ "$matched" -a $cycles_true -ge $OPT_CYCLES ]; then + # ################################################################## + # Start collecting, maybe. + # ################################################################## + log "Collect triggered" + + # Send email to whomever that collect has been triggered. + if [ "$OPT_NOTIFY_BY_EMAIL" ]; then + echo "$msg on $(hostname)" \ + | mail -s "Collect triggered on $(hostname)" \ + "$OPT_NOTIFY_BY_EMAIL" + fi + + if [ "$OPT_COLLECT" ]; then + local prefix="${OPT_PREFIX:-$(date +%F-%T | tr :- _)}" + + # Check if we'll have enough disk space to collect. Disk space + # is also checked every interval while collecting. + local margin="20971520" # default 20M margin, unless: + if [ -n "$last_prefix" ]; then + margin=$(du -mc "$OPT_DEST"/"$last_prefix"-* | tail -n 1 | awk '{print $1'}) + fi + disk_space "$OPT_DEST" > "$OPT_DEST/$prefix-disk-space" + check_disk_space \ + "$OPT_DEST/$prefix-disk-space" \ + "$OPT_DISK_BYTES_FREE" \ + "$OPT_DISK_PCT_FREE" \ + "$margin" + if [ $? -eq 0 ]; then + # There should be enough disk space, so collect. + log "$msg" >> "$OPT_DEST/$prefix-trigger" + log "pt-stalk ran with $RAN_WITH" >> "$OPT_DEST/$prefix-trigger" + last_prefix="$prefix" + + + # Fork and background the collect subroutine which will + # run for --run-time seconds. We (the parent) sleep + # while its collecting (hopefully --sleep is longer than + # --run-time). + ( + collect "$OPT_DEST" "$prefix" + ) >> "$OPT_DEST/$prefix-output" 2>&1 & + log "Collector PID $!" + else + # There will not be enough disk space, so do not collect. + warn "Collect canceled because there will not be enough disk space after collecting another $margin MB" + fi + fi + + # ################################################################## + # Done collecting. + # ################################################################## + ITER=$((ITER + 1)) + sleep_ok "$OPT_SLEEP" "Sleeping $OPT_SLEEP seconds after collect" + else + # Trigger/check/value is ok, sleep until next check. + sleep_ok "$OPT_INTERVAL" + fi + + # Purge old collect file between checks. + if [ -d "$OPT_DEST" ]; then + purge_samples "$OPT_DEST" "$OPT_RETENTION_TIME" + fi + done +} + +# ########################################################################### +# Main program loop, called below if tool is ran from the command line. +# ########################################################################### + +main() { + trap sigtrap SIGHUP SIGINT SIGTERM + + # Note: $$ is the parent's PID, but we're a child proc. + # Bash 4 has $BASHPID but we can't rely on that. Consequently, + # we don't know our own PID. See the usage of $! below. + RAN_WITH="--function=$OPT_FUNCTION --variable=$OPT_VARIABLE --threshold=$OPT_THRESHOLD --match=$OPT_MATCH --cycles=$OPT_CYCLES --interval=$OPT_INTERVAL --iterations=$OPT_ITERATIONS --run-time=$OPT_RUN_TIME --sleep=$OPT_SLEEP --dest=$OPT_DEST --prefix=$OPT_PREFIX --notify-by-email=$OPT_NOTIFY_BY_EMAIL --log=$OPT_LOG --pid=$OPT_PID" + + log "Starting $0 $RAN_WITH" + + # Test if we have root; warn if not, but it isn't critical. + if [ "$(id -u)" != "0" ]; then + log 'Not running with root privileges!'; + fi + + # Make a secure tmpdir. + mk_tmpdir + + # Stalk while oktorun. + stalk + + # Clean up. + rm_tmpdir + remove_pid_file "$OPT_PID" + + log "Exiting because $EXIT_REASON" + log "$0 exit status $EXIT_STATUS" + exit $EXIT_STATUS +} + +# Execute the program if it was not included from another file. +# This makes it possible to include without executing, and thus test. +if [ "${0##*/}" = "$TOOL" ] \ + || [ "${0##*/}" = "bash" -a "$_" = "$0" ]; then + + # Parse command line options. We must do this first so we can + # see if --daemonize was specified. + mk_tmpdir + parse_options "$0" "$@" + + # Verify and set TRIGGER_FUNCTION based on --function. + if ! set_trg_func "$OPT_FUNCTION"; then + option_error "Invalid --function value: $OPT_FUNCTION" + fi + + usage_or_errors "$0" + po_status=$? + rm_tmpdir + if [ $po_status -ne 0 ]; then + [ $OPT_ERRS -gt 0 ] && exit 1 + exit 0 + fi + + # Check that mysql and mysqladmin are in PATH. If not, we're + # already dead in the water, so don't bother with cmd line opts, + # just error and exit. + [ -n "$(mysql --help)" ] \ + || die "Cannot execute mysql. Check that it is in PATH." + [ -n "$(mysqladmin --help)" ] \ + || die "Cannot execute mysqladmin. Check that it is in PATH." + + # Now that we have the cmd line opts, check that we can actually + # connect to MySQL. + [ -n "$(mysql $EXT_ARGV -e 'SELECT 1')" ] \ + || die "Cannot connect to MySQL. Check that MySQL is running and that the options after -- are correct." + + # Check existence and access to the --dest dir if we're collecting. + if [ "$OPT_COLLECT" ]; then + if [ ! -d "$OPT_DEST" ]; then + mkdir -p "$OPT_DEST" || die "Cannot make --dest $OPT_DEST" + fi + + # Check access to the --dest dir. By setting -x in the subshell, + # if either command fails, the subshell will exit immediately and + # $? will be non-zero. + ( + set -e + touch "$OPT_DEST/test" + rm "$OPT_DEST/test" + ) + if [ $? -ne 0 ]; then + die "Cannot read and write files to --dest $OPT_DEST" + fi + fi + + if [ "$OPT_DAEMONIZE" ]; then + # Check access to the --log file. + touch "$OPT_LOG" || die "Cannot write to --log $OPT_LOG" + + # The PID file will at first have our (parent) PID. + # This is fine for ensuring that only one of us is + # running, but it's not fine if the user wants to use + # the PID in the PID file to check or kill the child + # process. So we'll need to update the PID file with + # the child's PID. + make_pid_file "$OPT_PID" $$ + + main "$@" >"$OPT_LOG" 2>&1 & + + # Update PID file with the child's PID. + # The child PID is $BASHPID but that special var is only + # in Bash 4+, so we can't rely on it. Consequently, we + # use $! to get the PID of the child we just forked. + echo "$!" > "$OPT_PID" + else + make_pid_file "$OPT_PID" $$ + main "$@" + fi +fi # ############################################################################ # Documentation @@ -172,16 +1236,17 @@ done =head1 NAME -pt-stalk - Wait for a condition to occur then begin collecting data. +pt-stalk - Gather forensic data about MySQL when a problem occurs. =head1 SYNOPSIS -Usage: pt-stalk +Usage: pt-stalk [OPTIONS] [-- MYSQL OPTIONS] -pt-stalk watches for a condition to become true, and when it does, executes -a script. By default it executes L, but that can be customized. -This tool is useful for gathering diagnostic data when an infrequent event -occurs, so an expert person can review the data later. +pt-stalk watches for a trigger condition to become true, and then collects data +to help in diagnosing problems. It is designed to run as a daemon with root +privileges, so that you can diagnose intermittent problems that you cannot +observe directly. You can also use it to execute a custom command, or to gather +the data on demand without waiting for the trigger to happen. =head1 RISKS @@ -190,7 +1255,10 @@ whether known or unknown, of using this tool. The two main categories of risks are those created by the nature of the tool (e.g. read-only tools vs. read-write tools) and those created by bugs. -pt-stalk is a read-only tool. It should be very low-risk. +pt-stalk is a read-write tool; it collects data from the system and writes it +into a series of files. It should be very low-risk. Some of the options +can cause intrusive data collection to be performed, however, so if you enable +any non-default options, you should read their documentation carefully. At the time of this release, we know of no bugs that could cause serious harm to users. @@ -204,139 +1272,337 @@ See also L<"BUGS"> for more information on filing bugs and getting help. =head1 DESCRIPTION -Although pt-stalk comes pre-configured to do a specific thing, in general -this tool is just a skeleton script for the following flow of actions: +Sometimes a problem happens infrequently and for a short time, giving you no +chance to see the system when it happens. How do you solve intermittent MySQL +problems when you can't observe them? That's why pt-stalk exists. In addition to +using it when there's a known problem on your servers, it is a good idea to run +pt-stalk all the time, even when you think nothing is wrong. You will +appreciate the data it gathers when a problem occurs, because problems such as +MySQL lockups or spikes of activity typically leave no evidence to use in root +cause analysis. -=over +This tool does two things: it watches a server (typically MySQL) for a trigger +to occur, and it gathers diagnostic data. To use it effectively, you need to +define a good trigger condition. A good trigger is sensitive enough to fire +reliably when a problem occurs, so that you don't miss a chance to solve +problems. On the other hand, a good trigger isn't prone to false positives, so +you don't gather information when the server is functioning normally. -=item 1. +The most reliable triggers for MySQL tend to be the number of connections to the +server, and the number of queries running concurrently. These are available in +the SHOW GLOBAL STATUS command as Threads_connected and Threads_running. +Sometimes Threads_connected is not a reliable indicator of trouble, but +Threads_running usually is. Your job, as the tool's user, is to define an +appropriate trigger condition for the tool. Choose carefully, because the +quality of your results will depend on the trigger you choose. -Loop infinitely, sleeping between iterations. +You can define the trigger with the L<"--function">, L<"--variable">, and +L<"--threshold"> options, among others. Please read the documentation for +--function to learn how to do this. -=item 2. +The pt-stalk tool, by default, simply watches MySQL repeatedly until the trigger +becomes true. It then gathers diagnostics for a while, and sleeps afterwards for +some time to prevent repeatedly gathering data if the condition remains true. +In crude pseudocode, omitting some subtleties, -In each iteration, run some command and get the output. + while true; do + if --variable from --function is greater than --threshold; then + observations++ + if observations is greater than --cycles; then + capture diagnostics for --run-time seconds + exit if --iterations is exceeded + sleep for --sleep seconds + done + done + clean up data that's older than --retention-time + sleep for --interval seconds + done -=item 3. +The diagnostic data is written to files whose names begin with a timestamp, so +you can distinguish samples from each other in case the tool collects data +multiple times. The pt-sift tool is designed to help you browse and analyze the +resulting samples of data. -If the command fails or the output is larger than the threshold, -execute the collection script; but do not execute if the destination disk -is too full. - -=back - -By default, the tool is configured to execute mysqladmin extended-status and -extract the value of the Threads_connected variable; if this is greater than -100, it runs the collection script. This is really just placeholder code, -and almost certainly needs to be customized! - -If the tool does execute the collection script, it will wait for a while -before checking and executing again. This is to prevent a continuous -condition from causing a huge number of executions to fire off. - -The name 'stalk' is because 'watch' is already taken, and 'stalk' is fun. +Although this sounds simple enough, in practice there are a number of +subtleties, such as detecting when the disk is beginning to fill up so that the +tool doesn't cause the server to run out of disk space. This tool handles these +types of potential problems, so it's a good idea to use this tool instead of +writing something from scratch and possibly experiencing some of the hazards +this tool is designed to prevent. =head1 CONFIGURING -If the file F exists in the current working directory, then -L<"ENVIRONMENT"> variables are imported from it. For example, the config -file has the format: +You can use standard Percona Toolkit configuration files to set commandline +options. - INTERVAL=10 - GDB=yes +You will probably want to run the tool as a daemon and customize at least the +diagnostic threshold. Here's a sample configuration file for triggering when +there are more than 20 queries running at once: -See L<"ENVIRONMENT">. + daemonize + threshold=20 + +If you're not running the tool as it's designed (as a root user, daemonized) +then you'll need to set several options, such as L<"--dest">, to locations that +are writable by non-root users. =head1 OPTIONS -This tool does not have any command-line options, but see -L<"ENVIRONMENT"> and L<"CONFIGURING">. +=over -=head1 ENVIRONMENT +=item --collect -The following environment variables configure how, what, and when the tool -runs. They are all optional and can be specified either on the command line -or in the F config file (see L<"CONFIGURING">). +default: yes; negatable: yes + +Collect system information. You can negate this option to make the tool watch +the system but not actually gather any diagnostic data. + +=item --collect-gdb + +Collect GDB stacktraces. This is achieved by attaching to MySQL and printing +stack traces from all threads. This will freeze the server for some period of +time, ranging from a second or so to much longer on very busy systems with a lot +of memory and many threads in the server. For this reason, it is disabled by +default. However, if you are trying to diagnose a server stall or lockup, +freezing the server causes no additional harm, and the stack traces can be vital +for diagnosis. + +In addition to freezing the server, there is also some risk of the server +crashing or performing badly after GDB detaches from it. + +=item --collect-oprofile + +Collect oprofile data. This is achieved by starting an oprofile session, +letting it run for the collection time, and then stopping and saving the +resulting profile data in the system's default location. Please read your +system's oprofile documentation to learn more about this. + +=item --collect-strace + +Collect strace data. This is achieved by attaching strace to the server, which +will make it run very slowly until strace detaches. The same cautions apply as +those listed in --collect-gdb. You should not enable this option together with +--collect-gdb, because GDB and strace can't attach to the server process +simultaneously. + +=item --collect-tcpdump + +Collect tcpdump data. This option causes tcpdump to capture all traffic on all +interfaces for the port on which MySQL is listening. You can later use +pt-query-digest to decode the MySQL protocol and extract a log of query traffic +from it. + +=item --config + +type: string + +Read this comma-separated list of config files. If specified, this must be the +first option on the command line. + +=item --cycles + +type: int; default: 5 + +The number of times the trigger condition must be true before collecting data. +This helps prevent false positives, and makes the trigger condition less likely +to fire when the problem recovers quickly. + +=item --daemonize + +Daemonize the tool. This causes the tool to fork into the background and log +its output as specified in --log. + +=item --dest + +type: string; default: /var/lib/pt-stalk + +Where to store the diagnostic data. Each time the tool collects data, it writes +to a new set of files, which are named with the current system timestamp. + +=item --disk-bytes-free + +type: size; default: 100M + +Don't collect data if the disk has less than this much free space. +This prevents the tool from filling up the disk with diagnostic data. + +If the L<"--dest"> directory contains a previously captured sample of data, +the tool will measure its size and use that as an estimate of how much data is +likely to be gathered this time, too. It will then be even more pessimistic, +and will refuse to collect data unless the disk has enough free space to hold +the sample and still have the desired amount of free space. For example, if +you'd like 100MB of free space and the previous diagnostic sample consumed +100MB, the tool won't collect any data unless the disk has 200MB free. + +Valid size value suffixes are k, M, G, and T. + +=item --disk-pct-free + +type: int; default: 5 + +Don't collect data if the disk has less than this percent free space. +This prevents the tool from filling up the disk with diagnostic data. + +This option works similarly to L<"--disk-bytes-free"> but specifies a +percentage margin of safety instead of a bytes margin of safety. +The tool honors both options, and will not collect any data unless both +margins are satisfied. + +=item --function + +type: string; default: status + +Specifies what to watch for a diagnostic trigger. The default value watches +SHOW GLOBAL STATUS, but you can also watch SHOW PROCESSLIST or supply a plugin +file with your own custom code. This function supplies the value of +L<"--variable">, which is then compared against L<"--threshold"> to see if the +trigger condition is met. Additional options may be required as well; see +below. Possible values: =over -=item THRESHOLD (default 100) +=item * status -This is the max number of we want to tolerate. +This value specifies that the source of data for the diagnostic trigger is SHOW +GLOBAL STATUS. The value of L<"--variable"> then defines which status counter +is the trigger. -=item VARIABLE (default Threads_connected} +=item * processlist -This is the thing to check for. +This value specifies that the data for the diagnostic trigger comes from SHOW +FULL PROCESSLIST. The trigger value is the count of processes whose +L<"--variable"> column matches the L<"--match"> option. For example, to trigger +when more than 10 processes are in the "statistics" state, use the following +options: -=item CYCLES (default 1) - -How many times must the condition be met before the script will fire? - -=item GDB (default no) - -Collect GDB stacktraces? - -=item OPROFILE (default yes) - -Collect oprofile data? - -=item STRACE (default no) - -Collect strace data? - -=item TCPDUMP (default yes) - -Collect tcpdump data? - -=item EMAIL - -Send mail to this list of addresses when the script triggers. - -=item MYSQLOPTIONS - -Any options to pass to mysql/mysqladmin, such as -u, -p, etc - -=item INTERVAL (default 30) - -This is the interval between checks. - -=item MAYBE_EMPTY (default no) - -If the command you're running to detect the condition is allowed to return -nothing (e.g. a grep line that might not even exist if there's no problem), -then set this to "yes". - -=item COLLECT (default ${HOME}/bin/pt-collect) - -This is the location of the 'collect' script. - -=item DEST (default ${HOME}/collected/) - -This is where to store the collected data. - -=item DURATION (default 30) - -How long to collect statistics data for? Make sure that this isn't longer -than SLEEP. - -=item SLEEP (default DURATION * 10) - -How long to sleep after collecting? - -=item PCT_THRESHOLD (default 95) - -Bail out if the disk is more than this %full. - -=item MB_THRESHOLD (default 100) - -Bail out if the disk has less than this many MB free. - -=item PURGE (default 30) - -Remove samples after this many days. + --trigger processlist --variable State \ + --match statistics --threshold 10 =back +In addition, you can specify a file that contains your custom trigger function, +written in Unix shell script. This can be a wrapper that executes anything you +wish. If the argument to --function is a file, then it takes precedence over +builtin functions, so if there is a file in the working directory named "status" +or "processlist" then the tool will use that file as a plugin, even though those +are otherwise recognized as reserved words for this option. + +The plugin file works by providing a function called C, and the tool +simply sources the file and executes the function. For example, the function +might look like the following: + + trg_plugin() { + mysql $EXT_ARGV -e "SHOW ENGINE INNODB STATUS" \ + | grep -c "has waited at" + } + +This snippet will count the number of mutex waits inside of InnoDB. It +illustrates the general principle: the function must output a number, which is +then compared to the threshold as usual. The $EXT_ARGV variable contains the +MySQL options mentioned in the L<"SYNOPSIS"> above. + +The plugin should not alter the tool's existing global variables. Prefix any +plugin-specific global variables with "PLUGIN_" or make them local. + +=item --help + +Print help and exit. + +=item --interval + +type: int; default: 1 + +Interval between checks for the diagnostic trigger. + +=item --iterations + +type: int + +Exit after collecting diagnostics this many times. By default, the tool +will continue to watch the server forever, but this is useful for scenarios +where you want to capture once and then exit, for example. + +=item --log + +type: string; default: /var/log/pt-stalk.log + +Print all output to this file when daemonized. + +=item --match + +type: string + +The pattern to use when watching SHOW PROCESSLIST. See the documentation for +L<"--function"> for details. + +=item --notify-by-email + +type: string + +Send mail to this list of addresses when data is collected. + +=item --pid + +type: string; default: /var/run/pt-stalk.pid + +Create a PID file when daemonized. + +=item --prefix + +type: string + +The filename prefix for diagnostic samples. By default, samples have a timestamp +prefix based on the current local time, such as 2011_12_06_14_02_02, which is +December 6, 2011 at 14:02:02. + +=item --retention-time + +type: int; default: 30 + +Number of days to retain collected samples. Any samples that are older will be +purged. + +=item --run-time + +type: int; default: 30 + +How long the tool will collect data when it triggers. This should not be longer +than L<"--sleep">. It is usually not necessary to change this; if the default 30 +seconds hasn't gathered enough diagnostic data, running longer is not likely to +do so. In fact, in many cases a shorter collection period is appropriate. + +=item --sleep + +type: int; default: 300 + +How long to sleep after collecting data. This prevents the tool from triggering +continuously, which might be a problem if the collection process is intrusive. +It also prevents filling up the disk or gathering too much data to analyze +reasonably. + +=item --threshold + +type: int; default: 25 + +The threshold at which the diagnostic trigger should fire. See L<"--function"> +for details. + +=item --variable + +type: string; default: Threads_running + +The variable to compare against the threshold. See L<"--function"> for details. + +=item --version + +Print tool's version and exit. + +=back + +=head1 ENVIRONMENT + +This tool does not use any environment variables for configuration. + =head1 SYSTEM REQUIREMENTS This tool requires Bash v3 or newer. @@ -385,7 +1651,7 @@ Replace C with the name of any tool. =head1 AUTHORS -Baron Schwartz, Justin Swanhart, and Fernando Ipar +Baron Schwartz, Justin Swanhart, Fernando Ipar, and Daniel Nichter =head1 ABOUT PERCONA TOOLKIT diff --git a/bin/pt-summary b/bin/pt-summary index 4b408581..7f475107 100755 --- a/bin/pt-summary +++ b/bin/pt-summary @@ -44,13 +44,53 @@ fuzz () { echo $1 | $AP_AWK "{fuzzy_var=\$1; ${fuzzy_formula} print fuzzy_var;}" } +# ########################################################################### +# tmpdir package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/tmpdir.sh +# t/lib/bash/tmpdir.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + +set -u + +TMPDIR="" + +mk_tmpdir() { + local dir=${1:-""} + + if [ -n "$dir" ]; then + if [ ! -d "$dir" ]; then + mkdir $dir || die "Cannot make tmpdir $dir" + fi + TMPDIR="$dir" + else + local tool=`basename $0` + local pid="$$" + TMPDIR=`mktemp -d /tmp/${tool}.${pid}.XXXXX` \ + || die "Cannot make secure tmpdir" + fi +} + +rm_tmpdir() { + if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then + rm -rf $TMPDIR + fi + TMPDIR="" +} + +# ########################################################################### +# End tmpdir package +# ########################################################################### + # The temp files are for storing working results so we don't call commands many # times (gives inconsistent results, maybe adds load on things I don't want to # such as RAID controllers). They must not exist -- if they did, someone would # symlink them to /etc/passwd and then run this program as root. Call this # function with "rm" or "touch" as an argument. temp_files() { - for file in /tmp/percona-toolkit /tmp/percona-toolkit2; do + for file in $TMPDIR/percona-toolkit $TMPDIR/percona-toolkit2; do case "$1" in touch) if ! touch "${file}"; then @@ -128,12 +168,12 @@ group_concat () { # Functions for parsing specific files and getting desired info from them. # These are called from within main() and are separated so they can be tested # easily. The calling convention is that the data they need to run is prepared -# first by putting it into /tmp/percona-toolkit. Then code that's testing just needs to -# put sample data into /tmp/percona-toolkit and call it. +# first by putting it into $TMPDIR/percona-toolkit. Then code that's testing +# just needs to put sample data into $TMPDIR/percona-toolkit and call it. # ############################################################################## # ############################################################################## -# Parse Linux's /proc/cpuinfo, which should be stored in /tmp/percona-toolkit. +# Parse Linux's /proc/cpuinfo, which should be stored in $TMPDIR/percona-toolkit. # ############################################################################## parse_proc_cpuinfo () { local file=$1 @@ -189,8 +229,8 @@ parse_psrinfo_cpus() { start = index($0, " at ") + 4; end = length($0) - start - 4 print substr($0, start, end); - }' "$1" | sort | uniq -c > /tmp/percona-toolkit2 - name_val "Speeds" "$(group_concat /tmp/percona-toolkit2)" + }' "$1" | sort | uniq -c > $TMPDIR/percona-toolkit2 + name_val "Speeds" "$(group_concat $TMPDIR/percona-toolkit2)" } # ############################################################################## @@ -292,7 +332,7 @@ parse_ip_s_link () { } # ############################################################################## -# Parse the output of 'netstat -antp' which should be in /tmp/percona-toolkit. +# Parse the output of 'netstat -antp' which should be in $TMPDIR/percona-toolkit. # ############################################################################## parse_netstat () { local file=$1 @@ -397,7 +437,7 @@ parse_filesystems () { } # ############################################################################## -# Parse the output of fdisk -l, which should be in /tmp/percona-toolkit; there might be +# Parse the output of fdisk -l, which should be in $TMPDIR/percona-toolkit; there might be # multiple fdisk -l outputs in the file. # ############################################################################## parse_fdisk () { @@ -431,7 +471,7 @@ parse_fdisk () { } # ############################################################################## -# Parse the output of dmesg, which should be in /tmp/percona-toolkit, and detect +# Parse the output of dmesg, which should be in $TMPDIR/percona-toolkit, and detect # virtualization. # ############################################################################## parse_virtualization_dmesg () { @@ -463,7 +503,7 @@ parse_virtualization_generic() { } # ############################################################################## -# Parse the output of lspci, which should be in /tmp/percona-toolkit, and detect +# Parse the output of lspci, which should be in $TMPDIR/percona-toolkit, and detect # Ethernet cards. # ############################################################################## parse_ethernet_controller_lspci () { @@ -474,7 +514,7 @@ parse_ethernet_controller_lspci () { } # ############################################################################## -# Parse the output of lspci, which should be in /tmp/percona-toolkit, and detect RAID +# Parse the output of lspci, which should be in $TMPDIR/percona-toolkit, and detect RAID # controllers. # ############################################################################## parse_raid_controller_lspci () { @@ -497,7 +537,7 @@ parse_raid_controller_lspci () { } # ############################################################################## -# Parse the output of dmesg, which should be in /tmp/percona-toolkit, and detect RAID +# Parse the output of dmesg, which should be in $TMPDIR/percona-toolkit, and detect RAID # controllers. # ############################################################################## parse_raid_controller_dmesg () { @@ -516,7 +556,7 @@ parse_raid_controller_dmesg () { # ############################################################################## # Parse the output of "hpacucli ctrl all show config", which should be stored in -# /tmp/percona-toolkit +# $TMPDIR/percona-toolkit # ############################################################################## parse_hpacucli () { local file=$1 @@ -524,7 +564,7 @@ parse_hpacucli () { } # ############################################################################## -# Parse the output of arcconf, which should be stored in /tmp/percona-toolkit +# Parse the output of arcconf, which should be stored in $TMPDIR/percona-toolkit # ############################################################################## parse_arcconf () { local file=$1 @@ -634,7 +674,7 @@ parse_fusionmpt_lsiutil () { } # ############################################################################## -# Parse the output of MegaCli64 -AdpAllInfo -aALL from /tmp/percona-toolkit. +# Parse the output of MegaCli64 -AdpAllInfo -aALL from $TMPDIR/percona-toolkit. # ############################################################################## parse_lsi_megaraid_adapter_info () { local file=$1 @@ -653,7 +693,7 @@ parse_lsi_megaraid_adapter_info () { } # ############################################################################## -# Parse the output (saved in /tmp/percona-toolkit) of +# Parse the output (saved in $TMPDIR/percona-toolkit) of # /opt/MegaRAID/MegaCli/MegaCli64 -AdpBbuCmd -GetBbuStatus -aALL # ############################################################################## parse_lsi_megaraid_bbu_status () { @@ -665,7 +705,7 @@ parse_lsi_megaraid_bbu_status () { } # ############################################################################## -# Parse physical devices from the output (saved in /tmp/percona-toolkit) of +# Parse physical devices from the output (saved in $TMPDIR/percona-toolkit) of # /opt/MegaRAID/MegaCli/MegaCli64 -LdPdInfo -aALL # OR, it will also work with the output of # /opt/MegaRAID/MegaCli/MegaCli64 -PDList -aALL @@ -694,7 +734,7 @@ parse_lsi_megaraid_devices () { } # ############################################################################## -# Parse virtual devices from the output (saved in /tmp/percona-toolkit) of +# Parse virtual devices from the output (saved in $TMPDIR/percona-toolkit) of # /opt/MegaRAID/MegaCli/MegaCli64 -LdPdInfo -aALL # OR, it will also work with the output of # /opt/MegaRAID/MegaCli/MegaCli64 -LDInfo -Lall -aAll @@ -826,6 +866,7 @@ main () { export PATH="${PATH}:/usr/StorMan/:/opt/MegaRAID/MegaCli/"; # Set up temporary files. + mk_tmpdir temp_files "rm" temp_files "touch" section Percona_Toolkit_System_Summary_Report @@ -833,7 +874,7 @@ main () { # ######################################################################## # Grab a bunch of stuff and put it into temp files for later. # ######################################################################## - sysctl -a > /tmp/percona-toolkit.sysctl 2>/dev/null + sysctl -a > $TMPDIR/percona-toolkit.sysctl 2>/dev/null # ######################################################################## # General date, time, load, etc @@ -939,19 +980,19 @@ main () { # available to non-root users and usually has telltale signs. It's most # reliable to look at /var/log/dmesg if possible. There are a number of # other ways to find out if a system is virtualized. - cat /var/log/dmesg > /tmp/percona-toolkit 2>/dev/null - if [ ! -s /tmp/percona-toolkit ]; then - dmesg > /tmp/percona-toolkit 2>/dev/null + cat /var/log/dmesg > $TMPDIR/percona-toolkit 2>/dev/null + if [ ! -s $TMPDIR/percona-toolkit ]; then + dmesg > $TMPDIR/percona-toolkit 2>/dev/null fi - if [ -s /tmp/percona-toolkit ]; then - virt="$(parse_virtualization_dmesg /tmp/percona-toolkit)" + if [ -s $TMPDIR/percona-toolkit ]; then + virt="$(parse_virtualization_dmesg $TMPDIR/percona-toolkit)" fi if [ -z "${virt}" ]; then if which lspci >/dev/null 2>&1; then - lspci > /tmp/percona-toolkit 2>/dev/null - if grep -qi virtualbox /tmp/percona-toolkit; then + lspci > $TMPDIR/percona-toolkit 2>/dev/null + if grep -qi virtualbox $TMPDIR/percona-toolkit; then virt=VirtualBox - elif grep -qi vmware /tmp/percona-toolkit; then + elif grep -qi vmware $TMPDIR/percona-toolkit; then virt=VMWare elif [ -e /proc/user_beancounters ]; then virt="OpenVZ/Virtuozzo" @@ -962,10 +1003,10 @@ main () { virt="FreeBSD Jail" fi elif [ "${platform}" = "SunOS" ]; then - if which prtdiag >/dev/null 2>&1 && prtdiag > /tmp/percona-toolkit.prtdiag 2>/dev/null; then - virt="$(parse_virtualization_generic /tmp/percona-toolkit.prtdiag)" - elif which smbios >/dev/null 2>&1 && smbios > /tmp/percona-toolkit.smbios 2>/dev/null; then - virt="$(parse_virtualization_generic /tmp/percona-toolkit.smbios)" + if which prtdiag >/dev/null 2>&1 && prtdiag > $TMPDIR/percona-toolkit.prtdiag 2>/dev/null; then + virt="$(parse_virtualization_generic $TMPDIR/percona-toolkit.prtdiag)" + elif which smbios >/dev/null 2>&1 && smbios > $TMPDIR/percona-toolkit.smbios 2>/dev/null; then + virt="$(parse_virtualization_generic $TMPDIR/percona-toolkit.smbios)" fi fi name_val Virtualized "${virt:-No virtualization detected}" @@ -975,23 +1016,23 @@ main () { # ######################################################################## section Processor if [ -f /proc/cpuinfo ]; then - cat /proc/cpuinfo > /tmp/percona-toolkit 2>/dev/null - parse_proc_cpuinfo /tmp/percona-toolkit + cat /proc/cpuinfo > $TMPDIR/percona-toolkit 2>/dev/null + parse_proc_cpuinfo $TMPDIR/percona-toolkit elif [ "${platform}" = "FreeBSD" ]; then - parse_sysctl_cpu_freebsd /tmp/percona-toolkit.sysctl + parse_sysctl_cpu_freebsd $TMPDIR/percona-toolkit.sysctl elif [ "${platform}" = "SunOS" ]; then - psrinfo -v > /tmp/percona-toolkit - parse_psrinfo_cpus /tmp/percona-toolkit + psrinfo -v > $TMPDIR/percona-toolkit + parse_psrinfo_cpus $TMPDIR/percona-toolkit # TODO: prtconf -v actually prints the CPU model name etc. fi section Memory if [ "${platform}" = "Linux" ]; then - free -b > /tmp/percona-toolkit - cat /proc/meminfo >> /tmp/percona-toolkit - parse_free_minus_b /tmp/percona-toolkit + free -b > $TMPDIR/percona-toolkit + cat /proc/meminfo >> $TMPDIR/percona-toolkit + parse_free_minus_b $TMPDIR/percona-toolkit elif [ "${platform}" = "FreeBSD" ]; then - parse_memory_sysctl_freebsd /tmp/percona-toolkit.sysctl + parse_memory_sysctl_freebsd $TMPDIR/percona-toolkit.sysctl elif [ "${platform}" = "SunOS" ]; then name_val Memory "$(prtconf | awk -F: '/Memory/{print $2}')" fi @@ -1007,8 +1048,8 @@ main () { fi fi - if which dmidecode >/dev/null 2>&1 && dmidecode > /tmp/percona-toolkit 2>/dev/null; then - parse_dmidecode_mem_devices /tmp/percona-toolkit + if which dmidecode >/dev/null 2>&1 && dmidecode > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_dmidecode_mem_devices $TMPDIR/percona-toolkit fi # ######################################################################## @@ -1023,25 +1064,25 @@ main () { if [ "${platform}" = "Linux" ]; then cmd="df -h -P" fi - $cmd | sort > /tmp/percona-toolkit2 - mount | sort | join /tmp/percona-toolkit2 - > /tmp/percona-toolkit - parse_filesystems /tmp/percona-toolkit "${platform}" + $cmd | sort > $TMPDIR/percona-toolkit2 + mount | sort | join $TMPDIR/percona-toolkit2 - > $TMPDIR/percona-toolkit + parse_filesystems $TMPDIR/percona-toolkit "${platform}" fi fi if [ "${platform}" = "Linux" ]; then section "Disk_Schedulers_And_Queue_Size" - echo "" > /tmp/percona-toolkit + echo "" > $TMPDIR/percona-toolkit for disk in $(ls /sys/block/ | grep -v -e ram -e loop -e 'fd[0-9]'); do if [ -e "/sys/block/${disk}/queue/scheduler" ]; then name_val "${disk}" "$(cat /sys/block/${disk}/queue/scheduler | grep -o '\[.*\]') $(cat /sys/block/${disk}/queue/nr_requests)" - fdisk -l "/dev/${disk}" >> /tmp/percona-toolkit 2>/dev/null + fdisk -l "/dev/${disk}" >> $TMPDIR/percona-toolkit 2>/dev/null fi done - # Relies on /tmp/percona-toolkit having data from the Disk Schedulers loop. + # Relies on $TMPDIR/percona-toolkit having data from the Disk Schedulers loop. section "Disk_Partioning" - parse_fdisk /tmp/percona-toolkit + parse_fdisk $TMPDIR/percona-toolkit section "Kernel_Inode_State" for file in dentry-state file-nr inode-nr; do @@ -1064,15 +1105,15 @@ main () { # often available to non-root users. It's most reliable to look at # /var/log/dmesg if possible. # ######################################################################## - if which lspci >/dev/null 2>&1 && lspci > /tmp/percona-toolkit 2>/dev/null; then - controller="$(parse_raid_controller_lspci /tmp/percona-toolkit)" + if which lspci >/dev/null 2>&1 && lspci > $TMPDIR/percona-toolkit 2>/dev/null; then + controller="$(parse_raid_controller_lspci $TMPDIR/percona-toolkit)" fi if [ -z "${controller}" ]; then - cat /var/log/dmesg > /tmp/percona-toolkit 2>/dev/null - if [ ! -s /tmp/percona-toolkit ]; then - dmesg > /tmp/percona-toolkit 2>/dev/null + cat /var/log/dmesg > $TMPDIR/percona-toolkit 2>/dev/null + if [ ! -s $TMPDIR/percona-toolkit ]; then + dmesg > $TMPDIR/percona-toolkit 2>/dev/null fi - controller="$(parse_raid_controller_dmesg /tmp/percona-toolkit)" + controller="$(parse_raid_controller_dmesg $TMPDIR/percona-toolkit)" fi name_val Controller "${controller:-No RAID controller detected}" @@ -1085,29 +1126,29 @@ main () { # ######################################################################## notfound="" if [ "${controller}" = "AACRAID" ]; then - if arcconf getconfig 1 > /tmp/percona-toolkit 2>/dev/null; then - parse_arcconf /tmp/percona-toolkit + if arcconf getconfig 1 > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_arcconf $TMPDIR/percona-toolkit elif ! which arcconf >/dev/null 2>&1; then notfound="e.g. http://www.adaptec.com/en-US/support/raid/scsi_raid/ASR-2120S/" fi elif [ "${controller}" = "HP Smart Array" ]; then - if hpacucli ctrl all show config > /tmp/percona-toolkit 2>/dev/null; then - parse_hpacucli /tmp/percona-toolkit + if hpacucli ctrl all show config > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_hpacucli $TMPDIR/percona-toolkit elif ! which hpacucli >/dev/null 2>&1; then notfound="your package repository or the manufacturer's website" fi elif [ "${controller}" = "LSI Logic MegaRAID SAS" ]; then - if MegaCli64 -AdpAllInfo -aALL -NoLog > /tmp/percona-toolkit 2>/dev/null; then - parse_lsi_megaraid_adapter_info /tmp/percona-toolkit + if MegaCli64 -AdpAllInfo -aALL -NoLog > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_lsi_megaraid_adapter_info $TMPDIR/percona-toolkit elif ! which MegaCli64 >/dev/null 2>&1; then notfound="your package repository or the manufacturer's website" fi - if MegaCli64 -AdpBbuCmd -GetBbuStatus -aALL -NoLog > /tmp/percona-toolkit 2>/dev/null; then - parse_lsi_megaraid_bbu_status /tmp/percona-toolkit + if MegaCli64 -AdpBbuCmd -GetBbuStatus -aALL -NoLog > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_lsi_megaraid_bbu_status $TMPDIR/percona-toolkit fi - if MegaCli64 -LdPdInfo -aALL -NoLog > /tmp/percona-toolkit 2>/dev/null; then - parse_lsi_megaraid_virtual_devices /tmp/percona-toolkit - parse_lsi_megaraid_devices /tmp/percona-toolkit + if MegaCli64 -LdPdInfo -aALL -NoLog > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_lsi_megaraid_virtual_devices $TMPDIR/percona-toolkit + parse_lsi_megaraid_devices $TMPDIR/percona-toolkit fi fi @@ -1122,8 +1163,8 @@ main () { # ##################################################################### if [ "${platform}" = "Linux" ]; then section Network_Config - if which lspci > /dev/null 2>&1 && lspci > /tmp/percona-toolkit 2>/dev/null; then - parse_ethernet_controller_lspci /tmp/percona-toolkit + if which lspci > /dev/null 2>&1 && lspci > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_ethernet_controller_lspci $TMPDIR/percona-toolkit fi if sysctl net.ipv4.tcp_fin_timeout > /dev/null 2>&1; then name_val "FIN Timeout" "$(sysctl net.ipv4.tcp_fin_timeout)" @@ -1135,15 +1176,15 @@ main () { # /proc/sys/net/netfilter/nf_conntrack_max or /proc/sys/net/nf_conntrack_max # in new kernels like Fedora 12? - if which ip >/dev/null 2>&1 && ip -s link > /tmp/percona-toolkit 2>/dev/null; then + if which ip >/dev/null 2>&1 && ip -s link > $TMPDIR/percona-toolkit 2>/dev/null; then section Interface_Statistics - parse_ip_s_link /tmp/percona-toolkit + parse_ip_s_link $TMPDIR/percona-toolkit fi if [ "${platform}" = "Linux" ]; then section Network_Connections - if netstat -antp > /tmp/percona-toolkit 2>/dev/null; then - parse_netstat /tmp/percona-toolkit + if netstat -antp > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_netstat $TMPDIR/percona-toolkit fi fi fi @@ -1164,12 +1205,12 @@ main () { fi if which vmstat > /dev/null 2>&1 ; then section "Simplified_and_fuzzy_rounded_vmstat_(wait_please)" - vmstat 1 5 > /tmp/percona-toolkit + vmstat 1 5 > $TMPDIR/percona-toolkit if [ "${platform}" = "Linux" ]; then - format_vmstat /tmp/percona-toolkit + format_vmstat $TMPDIR/percona-toolkit else # TODO: simplify/format for other platforms - cat /tmp/percona-toolkit + cat $TMPDIR/percona-toolkit fi fi fi @@ -1179,6 +1220,7 @@ main () { # ######################################################################## temp_files "rm" temp_files "check" + rm_tmpdir section The_End } @@ -1238,9 +1280,9 @@ See also L<"BUGS"> for more information on filing bugs and getting help. =head1 DESCRIPTION pt-summary runs a large variety of commands to inspect system status and -configuration, saves the output into files in /tmp, and then runs Unix -commands on these results to format them nicely. It works best when -executed as a privileged user, but will also work without privileges, +configuration, saves the output into files in a temporary directory, and +then runs Unix commands on these results to format them nicely. It works +best when executed as a privileged user, but will also work without privileges, although some output might not be possible to generate without root. =head1 OPTIONS diff --git a/docs/percona-toolkit.pod b/docs/percona-toolkit.pod index bccd9ca6..25265137 100644 --- a/docs/percona-toolkit.pod +++ b/docs/percona-toolkit.pod @@ -190,8 +190,8 @@ The syntax of the configuration files is as follows: =item * -Whitespace followed by a hash (#) sign signifies that the rest of the line is a -comment. This is deleted. +Whitespace followed by a hash sign (#) signifies that the rest of the line is a +comment. This is deleted. For example: =item * @@ -208,7 +208,9 @@ Each line is permitted to be in either of the following formats: option option=value -Whitespace around the equals sign is deleted during processing. +Do not prefix the option with C<-->. Do not quote the values, even if +it has spaces; value are literal. Whitespace around the equals sign is +deleted during processing. =item * @@ -222,6 +224,22 @@ program. =back +=head2 EXAMPLE + +This config file for pt-stalk, + + # Config for pt-stalk + variable=Threads_connected + cycles=2 # trigger if problem seen twice in a row + -- + --user daniel + +is equivalent to this command line: + + pt-stalk --variable Threads_connected --cycles 2 -- --user daniel + +Options after C<--> are passed literally to mysql and mysqladmin. + =head2 READ ORDER The tools read several configuration files in order: diff --git a/lib/bash/alt_cmds.sh b/lib/bash/alt_cmds.sh index 55a41e3a..e0d96e64 100644 --- a/lib/bash/alt_cmds.sh +++ b/lib/bash/alt_cmds.sh @@ -25,10 +25,24 @@ set -u # seq N, return 1, ..., 5 _seq() { - local i=$1 + local i="$1" awk "BEGIN { for(i=1; i<=$i; i++) print i; }" } +_pidof() { + local cmd="$1" + if ! pidof "$cmd" 2>/dev/null; then + ps -eo pid,ucomm | awk -v comm="$cmd" '$2 == comm { print $1 }' + fi +} + +_lsof() { + local pid="$1" + if ! lsof -p $pid 2>/dev/null; then + /bin/ls -l /proc/$pid/fd 2>/dev/null + fi +} + # ########################################################################### # End alt_cmds package # ########################################################################### diff --git a/lib/bash/collect.sh b/lib/bash/collect.sh index 9acf2a0c..472e5e3c 100644 --- a/lib/bash/collect.sh +++ b/lib/bash/collect.sh @@ -1,4 +1,4 @@ -# This program is copyright 2011 Percona Inc. +# This program is copyright 2011-2012 Percona Inc. # Feedback and improvements are welcome. # # THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED @@ -24,33 +24,31 @@ set -u # Global variables. -CMD_GDB=${CMD_GDB:-"gdb"} -CMD_IOSTAT=${CMD_IOSTAT:-"iostat"} -CMD_MPSTAT=${CMD_MPSTAT:-"mpstat"} -CMD_MYSQL=${CMD_MSSQL:-"mysql"} -CMD_MYSQLADMIN=${CMD_MYSQL_ADMIN:-"mysqladmin"} -CMD_OPCONTROL=${CMD_OPCONTROL:-"opcontrol"} -CMD_OPREPORT=${CMD_OPREPORT:-"opreport"} -CMD_PMAP=${CMD_PMAP:-"pmap"} -CMD_STRACE=${CMD_STRACE:-"strace"} -CMD_TCPDUMP=${CMD_TCPDUMP:-"tcpdump"} -CMD_VMSTAT=${CMD_VMSTAT:-"vmstat"} +CMD_GDB="$(which gdb)" +CMD_IOSTAT="$(which iostat)" +CMD_MPSTAT="$(which mpstat)" +CMD_MYSQL="$(which mysql)" +CMD_MYSQLADMIN="$(which mysqladmin)" +CMD_OPCONTROL="$(which opcontrol)" +CMD_OPREPORT="$(which opreport)" +CMD_PMAP="$(which pmap)" +CMD_STRACE="$(which strace)" +CMD_SYSCTL="$(which sysctl)" +CMD_TCPDUMP="$(which tcpdump)" +CMD_VMSTAT="$(which vmstat)" + +# Try to find command manually. +[ -z "$CMD_SYSCTL" -a -x "/sbin/sysctl" ] && CMD_SYSCTL="/sbin/sysctl" collect() { - local d=$1 # directory to save results in - local p=$2 # prefix for each result file + local d="$1" # directory to save results in + local p="$2" # prefix for each result file - # Get pidof mysqld; pidof doesn't exist on some systems. We try our best... - local mysqld_pid=$(pidof -s mysqld); - if [ -z "$mysqld_pid" ]; then - mysqld_pid=$(pgrep -o -x mysqld); - fi - if [ -z "$mysqld_pid" ]; then - mysqld_pid=$(ps -eaf | grep 'mysql[d]' | grep -v mysqld_safe | awk '{print $2}' | head -n1); - fi + # Get pidof mysqld. + local mysqld_pid=$(_pidof mysqld | head -n1) # Get memory allocation info before anything else. - if [ -x "$CMD_PMAP" -a "$mysqld_pid" ]; then + if [ "$CMD_PMAP" -a "$mysqld_pid" ]; then if $CMD_PMAP --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then $CMD_PMAP -x $mysqld_pid > "$d/$p-pmap" else @@ -60,21 +58,19 @@ collect() { fi # Getting a GDB stacktrace can be an intensive operation, - # so do this only if necessary. - if [ "$OPT_COLLECT_GDB" = "yes" -a "$mysqld_pid" ]; then + # so do this only if necessary (and possible). + if [ "$CMD_GDB" -a "$OPT_COLLECT_GDB" -a "$mysqld_pid" ]; then $CMD_GDB \ -ex "set pagination 0" \ -ex "thread apply all bt" \ --batch -p $mysqld_pid \ >> "$d/$p-stacktrace" - else - echo "GDB (--collect-gdb) was not enabled" >> "$d/$p-stacktrace" fi # Get MySQL's variables if possible. Then sleep long enough that we probably # complete SHOW VARIABLES if all's well. (We don't want to run mysql in the # foreground, because it could hang.) - $CMD_MYSQL $EXT_ARGV -e 'SHOW GLOBAL VARIABLES' >> "$d/$p-variables" 2>&1 & + $CMD_MYSQL $EXT_ARGV -e 'SHOW GLOBAL VARIABLES' >> "$d/$p-variables" & sleep .2 # Get the major.minor version number. Version 3.23 doesn't matter for our @@ -90,14 +86,15 @@ collect() { local tail_error_log_pid="" if [ "$mysql_error_log" ]; then - echo "The MySQL error log seems to be ${mysql_error_log}" - tail -f "$mysql_error_log" >"$d/$p-log_error" 2>&1 & + log "The MySQL error log seems to be $mysql_error_log" + tail -f "$mysql_error_log" >"$d/$p-log_error" & tail_error_log_pid=$! + # Send a mysqladmin debug to the server so we can potentially learn about # locking etc. $CMD_MYSQLADMIN $EXT_ARGV debug else - echo "Could not find the MySQL error log" + log "Could not find the MySQL error log" fi # Get a sample of these right away, so we can get these without interaction @@ -108,13 +105,13 @@ collect() { else local mutex="SHOW MUTEX STATUS" fi - $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus1" 2>&1 & - $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status1" 2>&1 & - open_tables >> "$d/$p-opentables1" 2>&1 & + $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus1" & + $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status1" & + open_tables >> "$d/$p-opentables1" & # If TCP dumping is specified, start that on the server's port. local tcpdump_pid="" - if [ "$OPT_COLLECT_TCPDUMP" = "yes" ]; then + if [ "$CMD_TCPDUMP" -a "$OPT_COLLECT_TCPDUMP" ]; then local port=$(awk '/^port/{print $2}' "$d/$p-variables") if [ "$port" ]; then $CMD_TCPDUMP -i any -s 4096 -w "$d/$p-tcpdump" port ${port} & @@ -124,30 +121,40 @@ collect() { # Next, start oprofile gathering data during the whole rest of this process. # The --init should be a no-op if it has already been init-ed. - local have_oprofile="no" - if [ "$OPT_COLLECT_OPROFILE" = "yes" ]; then + local have_oprofile="" + if [ "$CMD_OPCONTROL" -a "$OPT_COLLECT_OPROFILE" ]; then if $CMD_OPCONTROL --init; then $CMD_OPCONTROL --start --no-vmlinux have_oprofile="yes" fi - elif [ "$OPT_COLLECT_STRACE" = "yes" ]; then + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" -a "$mysqld_pid" ]; then # Don't run oprofile and strace at the same time. - $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" 2>&1 & + $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" & local strace_pid=$! fi # Grab a few general things first. Background all of these so we can start # them all up as quickly as possible. - ps -eaf >> "$d/$p-ps" 2>&1 & - sysctl -a >> "$d/$p-sysctl" 2>&1 & - top -bn1 >> "$d/$p-top" 2>&1 & - $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" 2>&1 & - $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" 2>&1 & - $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" 2>&1 & - $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" 2>&1 & - $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" 2>&1 & - $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" 2>&1 & - lsof -nP -p $mysqld_pid -bw >> "$d/$p-lsof" 2>&1 & + ps -eaf >> "$d/$p-ps" & + top -bn1 >> "$d/$p-top" & + + [ "$mysqld_pid" ] && _lsof $mysqld_pid >> "$d/$p-lsof" & + + if [ "$CMD_SYSCTL" ]; then + $CMD_SYSCTL -a >> "$d/$p-sysctl" & + fi + if [ "$CMD_VMSTAT" ]; then + $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" & + $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" & + fi + if [ "$CMD_IOSTAT" ]; then + $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" & + $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" & + fi + if [ "$CMD_MPSTAT" ]; then + $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" & + $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" & + fi # Collect multiple snapshots of the status variables. We use # mysqladmin -c even though it is buggy and won't stop on its @@ -155,57 +162,83 @@ collect() { # get and keep a connection to the database; in troubled times # the database tends to exceed max_connections, so reconnecting # in the loop tends not to work very well. - $CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" 2>&1 & + $CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" & local mysqladmin_pid=$! - local have_lock_waits_table=0 + local have_lock_waits_table="" $CMD_MYSQL $EXT_ARGV -e "SHOW TABLES FROM INFORMATION_SCHEMA" \ - | grep -qi "INNODB_LOCK_WAITS" + | grep -i "INNODB_LOCK_WAITS" >/dev/null 2>&1 if [ $? -eq 0 ]; then - have_lock_waits_table=1 + have_lock_waits_table="yes" fi # This loop gathers data for the rest of the duration, and defines the time # of the whole job. - echo "Loop start: $(date +'TS %s.%N %F %T')" + log "Loop start: $(date +'TS %s.%N %F %T')" for loopno in $(_seq $OPT_RUN_TIME); do # We check the disk, but don't exit, because we need to stop jobs if we # need to exit. disk_space $d > $d/$p-disk-space check_disk_space \ $d/$p-disk-space \ - "$OPT_DISK_BYTE_LIMIT" \ - "$OPT_DISK_PCT_LIMIT" \ + "$OPT_DISK_BYTES_FREE" \ + "$OPT_DISK_PCT_FREE" \ || break # Synchronize ourselves onto the clock tick, so the sleeps are 1-second sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}') local ts="$(date +"TS %s.%N %F %T")" - # Collect the stuff for this cycle - (cat /proc/diskstats 2>&1; echo $ts) >> "$d/$p-diskstats" & - (cat /proc/stat 2>&1; echo $ts) >> "$d/$p-procstat" & - (cat /proc/vmstat 2>&1; echo $ts) >> "$d/$p-procvmstat" & - (cat /proc/meminfo 2>&1; echo $ts) >> "$d/$p-meminfo" & - (cat /proc/slabinfo 2>&1; echo $ts) >> "$d/$p-slabinfo" & - (cat /proc/interrupts 2>&1; echo $ts) >> "$d/$p-interrupts" & - (df -h 2>&1; echo $ts) >> "$d/$p-df" & - (netstat -antp 2>&1; echo $ts) >> "$d/$p-netstat" & - (netstat -s 2>&1; echo $ts) >> "$d/$p-netstat_s" & + # ##################################################################### + # Collect data for this cycle. + # ##################################################################### - ($CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G" 2>&1; echo $ts) \ - >> "$d/$p-processlist" + if [ -d "/proc" ]; then + if [ -f "/proc/diskstats" ]; then + (echo $ts; cat /proc/diskstats) >> "$d/$p-diskstats" & + fi + if [ -f "/proc/stat" ]; then + (echo $ts; cat /proc/stat) >> "$d/$p-procstat" & + fi + if [ -f "/proc/vmstat" ]; then + (echo $ts; cat /proc/vmstat) >> "$d/$p-procvmstat" & + fi + if [ -f "/proc/meminfo" ]; then + (echo $ts; cat /proc/meminfo) >> "$d/$p-meminfo" & + fi + if [ -f "/proc/slabinfo" ]; then + (echo $ts; cat /proc/slabinfo) >> "$d/$p-slabinfo" & + fi + if [ -f "/proc/interrupts" ]; then + (echo $ts; cat /proc/interrupts) >> "$d/$p-interrupts" & + fi + fi - if [ $have_lock_waits_table -eq 1 ]; then - (lock_waits 2>&1; echo $ts) >>"$d/$p-lock-waits" + (echo $ts; df -h) >> "$d/$p-df" & + + (echo $ts; netstat -antp) >> "$d/$p-netstat" & + (echo $ts; netstat -s) >> "$d/$p-netstat_s" & + + (echo $ts; $CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G") \ + >> "$d/$p-processlist" & + + if [ "$have_lock_waits_table" ]; then + (echo $ts; lock_waits) >>"$d/$p-lock-waits" & fi done - echo "Loop end: $(date +'TS %s.%N %F %T')" + log "Loop end: $(date +'TS %s.%N %F %T')" - if [ "$have_oprofile" = "yes" ]; then + if [ "$have_oprofile" ]; then $CMD_OPCONTROL --stop $CMD_OPCONTROL --dump - kill $(pidof oprofiled); # TODO: what if system doesn't have pidof? + + local oprofiled_pid=$(_pidof oprofiled) + if [ "$oprofiled_pid" ]; then + kill $oprofiled_pid + else + warn "Cannot kill oprofiled because its PID cannot be determined" + fi + $CMD_OPCONTROL --save=pt_collect_$p # Attempt to generate a report; if this fails, then just tell the user @@ -220,39 +253,51 @@ collect() { "$mysqld_path" \ > "$d/$p-opreport" else - echo "oprofile data saved to pt_collect_$p; you should be able" \ + log "oprofile data saved to pt_collect_$p; you should be able" \ "to get a report by running something like 'opreport" \ "--demangle=smart --symbols --merge tgid session:pt_collect_$p" \ "/path/to/mysqld'" \ > "$d/$p-opreport" fi - elif [ "$OPT_COLLECT_STRACE" = "yes" ]; then + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" ]; then kill -s 2 $strace_pid sleep 1 kill -s 15 $strace_pid # Sometimes strace leaves threads/processes in T status. - kill -s 18 $mysqld_pid + [ "$mysqld_pid" ] && kill -s 18 $mysqld_pid fi - $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus2" 2>&1 & - $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status2" 2>&1 & - open_tables >> "$d/$p-opentables2" 2>&1 & + $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus2" & + $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status2" & + open_tables >> "$d/$p-opentables2" & # Kill backgrounded tasks. kill $mysqladmin_pid [ "$tail_error_log_pid" ] && kill $tail_error_log_pid - [ "$tcpdump_pid" ] && kill $tcpdump_pid + [ "$tcpdump_pid" ] && kill $tcpdump_pid # Finally, record what system we collected this data from. hostname > "$d/$p-hostname" + + # Remove "empty" files, i.e. ones that are truly empty or + # just contain timestamp lines. When a command above fails, + # it may leave an empty file. + for file in "$d/$p-"*; do + # If there's not at least 1 line that's not a TS, + # then the file is empty. + if [ -z "$(grep -v '^TS ' --max-count 1 "$file")" ]; then + log "Removing empty file $file"; + rm "$file" + fi + done } open_tables() { local open_tables=$($CMD_MYSQLADMIN $EXT_ARGV ext | grep "Open_tables" | awk '{print $4}') if [ -n "$open_tables" -a $open_tables -le 1000 ]; then - $CMD_MYSQL $EXT_ARGV -e 'SHOW OPEN TABLES' 2>&1 & + $CMD_MYSQL $EXT_ARGV -e 'SHOW OPEN TABLES' & else - echo "Too many open tables: $open_tables" + log "Too many open tables: $open_tables" fi } diff --git a/lib/bash/daemon.sh b/lib/bash/daemon.sh index 1fe823b8..663cb8b3 100644 --- a/lib/bash/daemon.sh +++ b/lib/bash/daemon.sh @@ -30,8 +30,8 @@ set -u # file - File to write PID to. # pid - PID to write into file. make_pid_file() { - local file=$1 - local pid=$2 + local file="$1" + local pid="$2" # Yes there's a race condition here, between checking if the file exists # and creating it, but it's not important enough to handle. @@ -39,7 +39,7 @@ make_pid_file() { if [ -f "$file" ]; then # PID file already exists. See if the pid it contains is still running. # If yes, then die. Else, the pid file is stale and we can reclaim it. - local old_pid=$(cat $file) + local old_pid=$(cat "$file") if [ -z "$old_pid" ]; then # PID file is empty, so be safe and die since we can't check a # non-existent pid. @@ -56,13 +56,16 @@ make_pid_file() { fi # PID file doesn't exist, or it does but its pid is stale. - echo "$pid" > $file + echo "$pid" > "$file" + if [ $? -ne 0 ]; then + die "Cannot create or write PID file $file" + fi } remove_pid_file() { - local file=$1 + local file="$1" if [ -f "$file" ]; then - rm $file + rm "$file" fi } diff --git a/lib/bash/log_warn_die.sh b/lib/bash/log_warn_die.sh index 6d5ca2e4..eaa7fed3 100644 --- a/lib/bash/log_warn_die.sh +++ b/lib/bash/log_warn_die.sh @@ -28,16 +28,16 @@ EXIT_STATUS=0 log() { TS=$(date +%F-%T | tr :- _); - echo "$TS $1" + echo "$TS $*" } warn() { - log "$1" >&2 - EXIT_STATUS=$((EXIT_STATUS | 1)) + log "$*" >&2 + EXIT_STATUS=1 } die() { - warn "$1" + warn "$*" exit 1 } diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index e4915247..3bb2762f 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -1,4 +1,4 @@ -# This program is copyright 2011 Percona Inc. +# This program is copyright 2011-2012 Percona Inc. # Feedback and improvements are welcome. # # THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED @@ -22,15 +22,39 @@ # parse_options parses Perl POD options from Bash tools and creates # global variables for each option. +# XXX +# GLOBAL $TMPDIR AND $TOOL MUST BE SET BEFORE USING THIS LIB! +# XXX + +# Parsing command line options with Bash is easy until we have to dealt +# with values that have spaces, e.g. --option="hello world". This is +# further complicated by command line vs. config file. From the command +# line, <--option "hello world"> is put into $@ as "--option", "hello world", +# i.e. 2 args. From a config file, is either 2 args +# split on the space, or 1 arg as a whole line. It needs to be 2 args +# split on the = but this isn't possible; see the note before while read +# in _parse_config_files(). Perl tool config files do not work when the +# value is quoted, so we can't quote it either. And in any case, that +# wouldn't work because then the value would include the literal quotes +# because it's a line from a file, not a command line where Bash will +# interpret the quotes and return a single value in the code. So... + +# XXX +# BE CAREFUL MAKING CHANGES TO THIS LIB AND MAKE SURE +# t/lib/bash/parse_options.sh STILL PASSES! +# XXX + set -u # Global variables. These must be global because declare inside a # sub will be scoped locally. ARGV="" # Non-option args (probably input files) EXT_ARGV="" # Everything after -- (args for an external command) +HAVE_EXT_ARGV="" # Got --, everything else is put into EXT_ARGV OPT_ERRS=0 # How many command line option errors -OPT_VERSION="no" # If --version was specified -OPT_HELP="no" # If --help was specified +OPT_VERSION="" # If --version was specified +OPT_HELP="" # If --help was specified +PO_DIR="" # Directory with program option spec files # Sub: usage # Print usage (--help) and list the program's options. @@ -41,44 +65,71 @@ OPT_HELP="no" # If --help was specified # Required Global Variables: # TIMDIR - Temp directory set by . # TOOL - Tool's name. -# -# Optional Global Variables: -# OPT_ERR - Command line option error message. usage() { - local file=$1 + local file="$1" - local usage=$(grep '^Usage: ' $file) - echo $usage >&2 - echo >&2 - echo "For more information, 'man $TOOL' or 'perldoc $file'." >&2 + local usage=$(grep '^Usage: ' "$file") + echo $usage + echo + echo "For more information, 'man $TOOL' or 'perldoc $file'." } usage_or_errors() { - local file=$1 + local file="$1" - if [ "$OPT_VERSION" = "yes" ]; then - local version=$(grep '^pt-[^ ]\+ [0-9]' $file) + if [ "$OPT_VERSION" ]; then + local version=$(grep '^pt-[^ ]\+ [0-9]' "$file") echo "$version" return 1 fi - if [ "$OPT_HELP" = "yes" ]; then + if [ "$OPT_HELP" ]; then usage "$file" - echo >&2 - echo "Command line options:" >&2 - echo >&2 - for opt in $(ls $TMPDIR/po/); do - local desc=$(cat $TMPDIR/po/$opt | grep '^desc:' | sed -e 's/^desc://') - echo "--$opt" >&2 - echo " $desc" >&2 - echo >&2 + echo + echo "Command line options:" + echo + perl -e ' + use strict; + use warnings FATAL => qw(all); + my $lcol = 20; # Allow this much space for option names. + my $rcol = 80 - $lcol; # The terminal is assumed to be 80 chars wide. + my $name; + while ( <> ) { + my $line = $_; + chomp $line; + if ( $line =~ s/^long:/ --/ ) { + $name = $line; + } + elsif ( $line =~ s/^desc:// ) { + $line =~ s/ +$//mg; + my @lines = grep { $_ } + $line =~ m/(.{0,$rcol})(?:\s+|\Z)/g; + if ( length($name) >= $lcol ) { + print $name, "\n", (q{ } x $lcol); + } + else { + printf "%-${lcol}s", $name; + } + print join("\n" . (q{ } x $lcol), @lines); + print "\n"; + } + } + ' "$PO_DIR"/* + echo + echo "Options and values after processing arguments:" + echo + for opt in $(ls "$PO_DIR"); do + local varname="OPT_$(echo "$opt" | tr a-z- A-Z_)" + local varvalue="${!varname}" + printf -- " --%-30s %s" "$opt" "${varvalue:-(No value)}" + echo done return 1 fi if [ $OPT_ERRS -gt 0 ]; then - echo >&2 - usage $file + echo + usage "$file" return 1 fi @@ -86,6 +137,12 @@ usage_or_errors() { return 0 } +option_error() { + local err="$1" + OPT_ERRS=$(($OPT_ERRS + 1)) + echo "$err" >&2 +} + # Sub: parse_options # Parse Perl POD options from a program file. # @@ -100,9 +157,59 @@ usage_or_errors() { # option, removing the option's leading --, changing all - to _, and # prefixing with "OPT_". E.g. --foo-bar becomes OPT_FOO_BAR. parse_options() { - local file=$1 + local file="$1" shift + # XXX + # Reset all globals else t/lib/bash/parse_options.sh will fail. + # XXX + ARGV="" + EXT_ARGV="" + HAVE_EXT_ARGV="" + OPT_ERRS=0 + OPT_VERSION="" + OPT_HELP="" + PO_DIR="$TMPDIR/po" + + # Ready the directory for the program option (po) spec files. + if [ ! -d "$PO_DIR" ]; then + mkdir "$PO_DIR" + if [ $? -ne 0 ]; then + echo "Cannot mkdir $PO_DIR" >&2 + exit 1 + fi + fi + + rm -rf "$PO_DIR"/* + if [ $? -ne 0 ]; then + echo "Cannot rm -rf $PO_DIR/*" >&2 + exit 1 + fi + + _parse_pod "$file" # Parse POD into program option (po) spec files + _eval_po # Eval po into existence with default values + + # If the first option is --config FILES, then remove it and use + # those files instead of the default config files. + if [ $# -ge 2 ] && [ "$1" = "--config" ]; then + shift # --config + local user_config_files="$1" + shift # that ^ + local IFS="," + for user_config_file in $user_config_files; do + _parse_config_files "$user_config_file" + done + else + _parse_config_files "/etc/percona-toolkit/percona-toolkit.conf" "/etc/percona-toolkit/$TOOL.conf" "$HOME/.percona-toolkit.conf" "$HOME/.$TOOL.conf" + fi + + # Finally, parse the command line. + _parse_command_line "$@" +} + +_parse_pod() { + local file="$1" + # Parse the program options (po) from the POD. Each option has # a spec file like: # $ cat po/string-opt2 @@ -111,51 +218,48 @@ parse_options() { # default=foo # That's the spec for --string-opt2. Each line is a key:value pair # from the option's POD line like "type: string; default: foo". - mkdir $TMPDIR/po/ 2>/dev/null - rm -rf $TMPDIR/po/* - ( - export PO_DIR="$TMPDIR/po" - cat $file | perl -ne ' - BEGIN { $/ = ""; } - next unless $_ =~ m/^=head1 OPTIONS/; - while ( defined(my $para = <>) ) { - last if $para =~ m/^=head1/; + cat "$file" | PO_DIR="$PO_DIR" perl -ne ' + BEGIN { $/ = ""; } + next unless $_ =~ m/^=head1 OPTIONS/; + while ( defined(my $para = <>) ) { + last if $para =~ m/^=head1/; + chomp; + if ( $para =~ m/^=item --(\S+)/ ) { + my $opt = $1; + my $file = "$ENV{PO_DIR}/$opt"; + open my $opt_fh, ">", $file or die "Cannot open $file: $!"; + print $opt_fh "long:$opt\n"; + $para = <>; chomp; - if ( $para =~ m/^=item --(\S+)/ ) { - my $opt = $1; - my $file = "$ENV{PO_DIR}/$opt"; - open my $opt_fh, ">", $file or die "Cannot open $file: $!"; - printf $opt_fh "long:$opt\n"; + if ( $para =~ m/^[a-z ]+:/ ) { + map { + chomp; + my ($attrib, $val) = split(/: /, $_); + print $opt_fh "$attrib:$val\n"; + } split(/; /, $para); $para = <>; chomp; - if ( $para =~ m/^[a-z ]+:/ ) { - map { - chomp; - my ($attrib, $val) = split(/: /, $_); - printf $opt_fh "$attrib:$val\n"; - } split(/; /, $para); - $para = <>; - chomp; - } - my ($desc) = $para =~ m/^([^?.]+)/; - printf $opt_fh "desc:$desc.\n"; - close $opt_fh; } + my ($desc) = $para =~ m/^([^?.]+)/; + print $opt_fh "desc:$desc.\n"; + close $opt_fh; } - last; - ' - ) + } + last; + ' +} +_eval_po() { # Evaluate the program options into existence as global variables # transformed like --my-op == $OPT_MY_OP. If an option has a default # value, it's assigned that value. Else, it's value is an empty string. - for opt_spec in $(ls $TMPDIR/po/); do + local IFS=":" + for opt_spec in "$PO_DIR"/*; do local opt="" local default_val="" local neg=0 - while read line; do - local key=`echo $line | cut -d ':' -f 1` - local val=`echo $line | cut -d ':' -f 2` + local size=0 + while read key val; do case "$key" in long) opt=$(echo $val | sed 's/-/_/g' | tr [:lower:] [:upper:]) @@ -166,6 +270,7 @@ parse_options() { "short form") ;; type) + [ "$val" = "size" ] && size=1 ;; desc) ;; @@ -175,13 +280,13 @@ parse_options() { fi ;; *) - echo "Invalid attribute in $TMPDIR/po/$opt_spec: $line" >&2 + echo "Invalid attribute in $opt_spec: $line" >&2 exit 1 esac - done < $TMPDIR/po/$opt_spec + done < "$opt_spec" if [ -z "$opt" ]; then - echo "No long attribute in option spec $TMPDIR/po/$opt_spec" >&2 + echo "No long attribute in option spec $opt_spec" >&2 exit 1 fi @@ -192,9 +297,58 @@ parse_options() { fi fi + # Convert sizes. + if [ $size -eq 1 -a -n "$default_val" ]; then + default_val=$(size_to_bytes $default_val) + fi + + # Eval the option into existence as a global variable. eval "OPT_${opt}"="$default_val" done +} +_parse_config_files() { + + for config_file in "$@"; do + # Next config file if this one doesn't exist. + test -f "$config_file" || continue + + # We must use while read because values can contain spaces. + # Else, if we for $(grep ...) then a line like "op=hello world" + # will return 2 values: "op=hello" and "world". If we quote + # the command like for "$(grep ...)" then the entire config + # file is returned as 1 value like "opt=hello world\nop2=42". + while read config_opt; do + + # Skip the line if it begins with a # or is blank. + echo "$config_opt" | grep '^[ ]*[^#]' >/dev/null 2>&1 || continue + + # Strip leading and trailing spaces, and spaces around the first =, + # and end-of-line # comments. + config_opt="$(echo "$config_opt" | sed -e 's/^ *//g' -e 's/ *$//g' -e 's/[ ]*=[ ]*/=/' -e 's/[ ]*#.*$//')" + + # Skip blank lines. + [ "$config_opt" = "" ] && continue + + # Options in a config file are not prefixed with --, + # but command line options are, so one or the other has + # to add or remove the -- prefix. We add it for config + # files rather than trying to strip it from command line + # options because it's a simpler operation here. + if ! [ "$HAVE_EXT_ARGV" ]; then + config_opt="--$config_opt" + fi + + _parse_command_line "$config_opt" + + done < "$config_file" + + HAVE_EXT_ARGV="" # reset for each file + + done +} + +_parse_command_line() { # Parse the command line options. Anything after -- is put into # EXT_ARGV. Options must begin with one or two hyphens (--help or -h), # else the item is put into ARGV (it's probably a filename, directory, @@ -205,82 +359,135 @@ parse_options() { # a default value 100, then $OPT_FOO=100 already, but if --foo=500 is # specified on the command line, then we re-eval $OPT_FOO=500 to update # $OPT_FOO. - for opt; do - if [ $# -eq 0 ]; then - break # no more opts + local opt="" + local val="" + local next_opt_is_val="" + local opt_is_ok="" + local opt_is_negated="" + local real_opt="" + local required_arg="" + local spec="" + + for opt in "$@"; do + if [ "$opt" = "--" -o "$opt" = "----" ]; then + HAVE_EXT_ARGV=1 + continue fi - opt=$1 - if [ "$opt" = "--" ]; then - shift - EXT_ARGV="$@" - break - fi - shift - if [ $(expr "$opt" : "-") -eq 0 ]; then - # Option does not begin with a hyphen (-), so treat it as - # a filename, directory, etc. - if [ -z "$ARGV" ]; then - ARGV="$opt" + if [ "$HAVE_EXT_ARGV" ]; then + # Previous line was -- so this and subsequent options are + # really external argvs. + if [ "$EXT_ARGV" ]; then + EXT_ARGV="$EXT_ARGV $opt" else - ARGV="$ARGV $opt" + EXT_ARGV="$opt" fi continue fi - # Save real opt from cmd line for error messages. - local real_opt="$opt" - - # Strip leading -- or --no- from option. - if $(echo $opt | grep -q '^--no-'); then - neg=1 - opt=$(echo $opt | sed 's/^--no-//') - else - neg=0 - opt=$(echo $opt | sed 's/^-*//') - fi - - # Find the option's spec file. - if [ -f "$TMPDIR/po/$opt" ]; then - spec="$TMPDIR/po/$opt" - else - spec=$(grep "^short form:-$opt\$" $TMPDIR/po/* | cut -d ':' -f 1) - if [ -z "$spec" ]; then - OPT_ERRS=$(($OPT_ERRS + 1)) - echo "Unknown option: $real_opt" >&2 + if [ "$next_opt_is_val" ]; then + next_opt_is_val="" + if [ $# -eq 0 ] || [ $(expr "$opt" : "-") -eq 1 ]; then + option_error "$real_opt requires a $required_arg argument" continue fi - fi - - # Get the value specified for the option, if any. If the opt's spec - # says it has a type, then it requires a value and that value should - # be the next item ($1). Else, typeless options (like --version) are - # either "yes" if specified, else "no" if negatable and --no-opt. - required_arg=$(cat $spec | grep '^type:' | cut -d':' -f2) - if [ -n "$required_arg" ]; then - if [ $# -eq 0 ]; then - OPT_ERRS=$(($OPT_ERRS + 1)) - echo "$real_opt requires a $required_arg argument" >&2 - continue - else - val="$1" - shift - fi + val="$opt" + opt_is_ok=1 else - if [ $neg -eq 0 ]; then - val="yes" + # If option does not begin with a hyphen (-), it's a filename, etc. + if [ $(expr "$opt" : "-") -eq 0 ]; then + if [ -z "$ARGV" ]; then + ARGV="$opt" + else + ARGV="$ARGV $opt" + fi + continue + fi + + # Save real opt from cmd line for error messages. + real_opt="$opt" + + # Strip leading -- or --no- from option. + if $(echo $opt | grep '^--no-' >/dev/null); then + opt_is_negated=1 + opt=$(echo $opt | sed 's/^--no-//') else - val="no" + opt_is_negated="" + opt=$(echo $opt | sed 's/^-*//') + fi + + # Split opt=val pair. + if $(echo $opt | grep '^[a-z-][a-z-]*=' >/dev/null 2>&1); then + val="$(echo $opt | awk -F= '{print $2}')" + opt="$(echo $opt | awk -F= '{print $1}')" + fi + + # Find the option's spec file. + if [ -f "$TMPDIR/po/$opt" ]; then + spec="$TMPDIR/po/$opt" + else + spec=$(grep "^short form:-$opt\$" "$TMPDIR"/po/* | cut -d ':' -f 1) + if [ -z "$spec" ]; then + option_error "Unknown option: $real_opt" + continue + fi + fi + + # Get the value specified for the option, if any. If the opt's spec + # says it has a type, then it requires a value and that value should + # be the next item ($1). Else, typeless options (like --version) are + # either "yes" if specified, else "no" if negatable and --no-opt. + required_arg=$(cat "$spec" | awk -F: '/^type:/{print $2}') + if [ "$required_arg" ]; then + # Option takes a value. + if [ "$val" ]; then + opt_is_ok=1 + else + next_opt_is_val=1 + fi + else + # Option does not take a value. + if [ "$val" ]; then + option_error "Option $real_opt does not take a value" + continue + fi + if [ "$opt_is_negated" ]; then + val="" + else + val="yes" + fi + opt_is_ok=1 fi fi - # Get and transform the opt's long form. E.g.: -q == --quiet == QUIET. - opt=$(cat $spec | grep '^long:' | cut -d':' -f2 | sed 's/-/_/g' | tr [:lower:] [:upper:]) + if [ "$opt_is_ok" ]; then + # Get and transform the opt's long form. E.g.: -q == --quiet == QUIET. + opt=$(cat "$spec" | grep '^long:' | cut -d':' -f2 | sed 's/-/_/g' | tr [:lower:] [:upper:]) - # Re-eval the option to update its global variable value. - eval "OPT_$opt"="$val" + # Convert sizes. + if grep "^type:size" "$spec" >/dev/null; then + val=$(size_to_bytes $val) + fi + + # Re-eval the option to update its global variable value. + eval "OPT_$opt"="'$val'" + + opt="" + val="" + next_opt_is_val="" + opt_is_ok="" + opt_is_negated="" + real_opt="" + required_arg="" + spec="" + fi done } +size_to_bytes() { + local size="$1" + echo $size | perl -ne '%f=(B=>1, K=>1_024, M=>1_048_576, G=>1_073_741_824, T=>1_099_511_627_776); m/^(\d+)([kMGT])?/i; print $1 * $f{uc($2 || "B")};' +} + # ########################################################################### # End parse_options package # ########################################################################### diff --git a/lib/bash/safeguards.sh b/lib/bash/safeguards.sh index 1e6df342..91402bb6 100644 --- a/lib/bash/safeguards.sh +++ b/lib/bash/safeguards.sh @@ -1,4 +1,4 @@ -# This program is copyright 2011 Percona Inc. +# This program is copyright 2011-2012 Percona Inc. # Feedback and improvements are welcome. # # THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED @@ -24,55 +24,62 @@ set -u disk_space() { - local filesystem=${1:-"$PWD"} + local filesystem="${1:-$PWD}" # Filesystem 1024-blocks Used Available Capacity Mounted on # /dev/disk0s2 118153176 94409664 23487512 81% / - df -P -k $filesystem + df -P -k "$filesystem" } # Sub: check_disk_space # Check if there is or will be enough disk space. Input is a file # with output from , i.e. `df -P -k`. The df output -# must use 1k blocks, but the mb arg from the user is in MB. +# must use 1k blocks, which should be POSIX standard. # # Arguments: -# file - File with output from . -# mb - Minimum MB free. -# pc - Minimum percent free. -# mb_margin - Add this many MB to the real MB used. +# file - File with output from . +# min_free_bytes - Minimum free bytes. +# min_free_pct - Minimum free percentage. +# bytes_margin - Add this many bytes to the real bytes used. # # Returns: # 0 if there is/will be enough disk space, else 1. check_disk_space() { - local file=$1 - local mb=${2:-"0"} - local pc=${3:-"0"} - local mb_margin=${4:-"0"} + local file="$1" + local min_free_bytes="${2:-0}" + local min_free_pct="${3:-0}" + local bytes_margin="${4:-0}" - # Convert MB to KB because the df output should be in 1k blocks. - local kb=$(($mb * 1024)) - local kb_margin=$(($mb_margin * 1024)) + # Real/actual bytes used and bytes free. + local used_bytes=$(cat "$file" | awk '/^\//{print $3 * 1024}'); + local free_bytes=$(cat "$file" | awk '/^\//{print $4 * 1024}'); + local pct_used=$(cat "$file" | awk '/^\//{print $5}' | sed -e 's/%//g'); + local pct_free=$((100 - $pct_used)) - local kb_used=$(cat $file | awk '/^\//{print $3}'); - local kb_free=$(cat $file | awk '/^\//{print $4}'); - local pc_used=$(cat $file | awk '/^\//{print $5}' | sed -e 's/%//g'); + # Report the real values to the user. + local real_free_bytes=$free_bytes + local real_pct_free=$pct_free - if [ "$kb_margin" -gt "0" ]; then - local kb_total=$(($kb_used + $kb_free)) + # If there's a margin, we need to adjust the real values. + if [ $bytes_margin -gt 0 ]; then + used_bytes=$(($used_bytes + $bytes_margin)) + free_bytes=$(($free_bytes - $bytes_margin)) + pct_used=$(awk "BEGIN { printf(\"%d\", ($used_bytes/($used_bytes + $free_bytes)) * 100) }") - kb_used=$(($kb_used + $kb_margin)) - kb_free=$(($kb_free - $kb_margin)) - pc_used=$(awk "BEGIN { printf(\"%d\", $kb_used/$kb_total * 100) }") + pct_free=$((100 - $pct_used)) fi - local pc_free=$((100 - $pc_used)) + if [ $free_bytes -lt $min_free_bytes -o $pct_free -lt $min_free_pct ]; then + warn "Not enough free disk space: + Limit: ${min_free_pct}% free, ${min_free_bytes} bytes free + Actual: ${real_pct_free}% free, ${real_free_bytes} bytes free (- $bytes_margin bytes margin) +" + # Print the df that we used. + cat "$file" >&2 - if [ "$kb_free" -le "$kb" -o "$pc_free" -le "$pc" ]; then - warn "Not enough free disk space: ${pc_free}% free, ${kb_free} KB free; wanted more than ${pc}% free or ${kb} KB free" - return 1 + return 1 # not enough disk space fi - return 0 + return 0 # disk space is OK } # ########################################################################### diff --git a/lib/bash/tmpdir.sh b/lib/bash/tmpdir.sh index d1b9e1b7..0ee045ac 100644 --- a/lib/bash/tmpdir.sh +++ b/lib/bash/tmpdir.sh @@ -35,15 +35,15 @@ TMPDIR="" # Set Global Variables: # TMPDIR - Absolute path of secure temp directory. mk_tmpdir() { - local dir=${1:-""} + local dir="${1:-""}" if [ -n "$dir" ]; then if [ ! -d "$dir" ]; then - mkdir $dir || die "Cannot make tmpdir $dir" + mkdir "$dir" || die "Cannot make tmpdir $dir" fi TMPDIR="$dir" else - local tool=`basename $0` + local tool="${0##*/}" local pid="$$" TMPDIR=`mktemp -d /tmp/${tool}.${pid}.XXXXX` \ || die "Cannot make secure tmpdir" @@ -60,7 +60,7 @@ mk_tmpdir() { # TMPDIR - Set to "". rm_tmpdir() { if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then - rm -rf $TMPDIR + rm -rf "$TMPDIR" fi TMPDIR="" } diff --git a/t/lib/bash.t b/t/lib/bash.t index aaf1fe28..6b6f3998 100644 --- a/t/lib/bash.t +++ b/t/lib/bash.t @@ -15,6 +15,7 @@ use PerconaTest; my ($tool) = $PROGRAM_NAME =~ m/([\w-]+)\.t$/; push @ARGV, "$trunk/t/lib/bash/*.sh" unless @ARGV; +$ENV{BIN_DIR} = "$trunk/bin"; $ENV{LIB_DIR} = "$trunk/lib/bash"; $ENV{T_LIB_DIR} = "$trunk/t/lib"; diff --git a/t/lib/bash/alt_cmds.sh b/t/lib/bash/alt_cmds.sh new file mode 100644 index 00000000..5d674bc0 --- /dev/null +++ b/t/lib/bash/alt_cmds.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +TESTS=1 + +source "$LIB_DIR/alt_cmds.sh" + +_seq 5 > $TEST_TMPDIR/out +no_diff \ + $TEST_TMPDIR/out \ + $T_LIB_DIR/samples/bash/seq1.txt \ + "_seq 5" + +# ########################################################################### +# Done +# ########################################################################### diff --git a/t/lib/bash/collect.sh b/t/lib/bash/collect.sh index 9d9249fa..0564c666 100644 --- a/t/lib/bash/collect.sh +++ b/t/lib/bash/collect.sh @@ -1,10 +1,11 @@ #!/usr/bin/env bash -TESTS=18 +TESTS=20 TMPFILE="$TEST_TMPDIR/parse-opts-output" TMPDIR="$TEST_TMPDIR" PATH="$PATH:$PERCONA_TOOLKIT_SANDBOX/bin" +TOOL="pt-stalk" mkdir "$TMPDIR/collect" 2>/dev/null @@ -14,7 +15,7 @@ source "$LIB_DIR/safeguards.sh" source "$LIB_DIR/alt_cmds.sh" source "$LIB_DIR/collect.sh" -parse_options "$T_LIB_DIR/samples/bash/po002.sh" --run-time 1 -- --defaults-file=/tmp/12345/my.sandbox.cnf +parse_options "$BIN_DIR/pt-stalk" --run-time 1 -- --defaults-file=/tmp/12345/my.sandbox.cnf # Prefix (with path) for the collect files. local p="$TMPDIR/collect/2011_12_05" @@ -23,12 +24,23 @@ local p="$TMPDIR/collect/2011_12_05" collect "$TMPDIR/collect" "2011_12_05" > $p-output 2>&1 # Even if this system doesn't have all the cmds, collect should still -# create all the default files. +# have created some files for cmds that (hopefully) all systems have. ls -1 $TMPDIR/collect | sort > $TMPDIR/collect-files -no_diff \ - $TMPDIR/collect-files \ - $T_LIB_DIR/samples/bash/collect001.txt \ - "Default collect files" + +# If this system has /proc, then some files should be collected. +# Else, those files should not exist. +if [ -f /proc/diskstats ]; then + cmd_ok \ + "grep -q '[0-9]' $TMPDIR/collect/2011_12_05-diskstats" \ + "/proc/diskstats" +else + test -f $TMPDIR/collect/2011_12_05-diskstats + is "$?" "1" "No /proc/diskstats" +fi + +cmd_ok \ + "grep -q '\-hostname\$' $TMPDIR/collect-files" \ + "Collected hostname" cmd_ok \ "grep -q 'Avail' $p-df" \ @@ -96,11 +108,25 @@ cmd_ok \ local iters=$(cat $p-df | grep -c '^TS ') is "$iters" "1" "1 iteration/1s run time" +empty_files=0 +for file in $p-*; do + if ! [ -s $file ]; then + empty_files=1 + break + fi + if [ -z "$(grep -v '^TS ' --max-count 1 $file)" ]; then + empty_files=1 + break + fi +done + +is "$empty_files" "0" "No empty files" + # ########################################################################### # Try longer run time. # ########################################################################### -parse_options "$T_LIB_DIR/samples/bash/po002.sh" --run-time 2 -- --defaults-file=/tmp/12345/my.sandbox.cnf +parse_options "$BIN_DIR/pt-stalk" --run-time 2 -- --defaults-file=/tmp/12345/my.sandbox.cnf rm $TMPDIR/collect/* diff --git a/t/lib/bash/daemon.sh b/t/lib/bash/daemon.sh index 76ec4d0b..0ffad457 100644 --- a/t/lib/bash/daemon.sh +++ b/t/lib/bash/daemon.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -TESTS=7 +TESTS=9 TMPDIR="$TEST_TMPDIR" local file="$TMPDIR/pid-file" @@ -60,6 +60,22 @@ is \ rm $file rm $TMPDIR/output +# ########################################################################### +# Die if pid file can't be created. +# ########################################################################### +( + make_pid_file "/root/pid" $$ >$TMPDIR/output 2>&1 +) + +is \ + "$?" \ + "1" \ + "Exit 1 if PID file can't be created" + +cmd_ok \ + "grep -q 'Cannot create or write PID file /root/pid' $TMPDIR/output" \ + "Error that PID file can't be created" + # ########################################################################### # Done. # ########################################################################### diff --git a/t/lib/bash/log_warn_die.sh b/t/lib/bash/log_warn_die.sh new file mode 100644 index 00000000..c3f4ed74 --- /dev/null +++ b/t/lib/bash/log_warn_die.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +TESTS=6 + +source "$LIB_DIR/log_warn_die.sh" + +log "Hello world!" > $TEST_TMPDIR/log +cmd_ok \ + "grep -q 'Hello world!' $TEST_TMPDIR/log" \ + "log msg" + +log "Hello" "world!" > $TEST_TMPDIR/log +cmd_ok \ + "grep -q 'Hello world!' $TEST_TMPDIR/log" \ + "log msg msg" + +is \ + "$EXIT_STATUS" \ + "0" \ + "Exit status 0" + +warn "Hello world!" 2> $TEST_TMPDIR/log +cmd_ok \ + "grep -q 'Hello world!' $TEST_TMPDIR/log" \ + "warn msg" + +warn "Hello" "world!" 2> $TEST_TMPDIR/log +cmd_ok \ + "grep -q 'Hello world!' $TEST_TMPDIR/log" \ + "warn msg msg" + +is \ + "$EXIT_STATUS" \ + "1" \ + "Exit status 1" + +# ########################################################################### +# Done +# ########################################################################### diff --git a/t/lib/bash/parse_options.sh b/t/lib/bash/parse_options.sh index 176c824c..8c462eff 100644 --- a/t/lib/bash/parse_options.sh +++ b/t/lib/bash/parse_options.sh @@ -1,8 +1,10 @@ #!/usr/bin/env bash -TESTS=26 +TESTS=78 TMPFILE="$TEST_TMPDIR/parse-opts-output" +TOOL="pt-stalk" +TMPDIR="$TEST_TMPDIR" source "$LIB_DIR/log_warn_die.sh" source "$LIB_DIR/parse_options.sh" @@ -11,16 +13,14 @@ source "$LIB_DIR/parse_options.sh" # Parse options from POD using all default values. # ############################################################################ -TOOL="pt-stalk" -TMPDIR="$TEST_TMPDIR" -parse_options "$T_LIB_DIR/samples/bash/po001.sh" "" 2>$TMPFILE +parse_options "$T_LIB_DIR/samples/bash/po001.sh" 2>$TMPFILE is "`cat $TMPFILE`" "" "No warnings or errors" is "$OPT_STRING_OPT" "" "Default string option" is "$OPT_STRING_OPT2" "foo" "Default string option with default" is "$OPT_TYPELESS_OPTION" "" "Default typeless option" -is "$OPT_NOPTION" "yes" "Defailt neg option" +is "$OPT_NOPTION" "yes" "Default neg option" is "$OPT_INT_OPT" "" "Default int option" is "$OPT_INT_OPT2" "42" "Default int option with default" is "$OPT_VERSION" "" "--version" @@ -38,6 +38,20 @@ is "$OPT_NOPTION" "yes" "Default neg option (spec)" is "$OPT_INT_OPT" "50" "Specified int option (spec)" is "$OPT_INT_OPT2" "42" "Default int option with default (spec)" is "$OPT_VERSION" "" "--version (spec)" +is "$ARGV" "" "ARGV" +is "$EXT_ARGV" "" "External ARGV" + +# ############################################################################ +# --option=value should work like --option value. +# ############################################################################ + +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --int-opt=42 + +is "$OPT_INT_OPT" "42" "Specified int option (--option=value)" + +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --string-opt="hello world" + +is "$OPT_STRING_OPT" "hello world" "Specified int option (--option=\"value\")" # ############################################################################ # Negate an option like --no-option. @@ -46,9 +60,9 @@ is "$OPT_VERSION" "" "--version (spec)" parse_options "$T_LIB_DIR/samples/bash/po001.sh" --no-noption is "$OPT_STRING_OPT" "" "Default string option (neg)" -is "$OPT_STRING_OPT2" "foo" "Default string option with default (net)" +is "$OPT_STRING_OPT2" "foo" "Default string option with default (neg)" is "$OPT_TYPELESS_OPTION" "" "Default typeless option (neg)" -is "$OPT_NOPTION" "no" "Negated option (neg)" +is "$OPT_NOPTION" "" "Negated option (neg)" is "$OPT_INT_OPT" "" "Default int option (neg)" is "$OPT_INT_OPT2" "42" "Default int option with default (neg)" is "$OPT_VERSION" "" "--version (neg)" @@ -60,6 +74,16 @@ is "$OPT_VERSION" "" "--version (neg)" parse_options "$T_LIB_DIR/samples/bash/po001.sh" -v is "$OPT_VERSION" "yes" "Short form" +# ############################################################################ +# Command line options plus externals args. +# ############################################################################ + +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --no-noption -- --foo + +is "$OPT_NOPTION" "" "Negated option (--)" +is "$ARGV" "" "ARGV (--)" +is "$EXT_ARGV" "--foo" "External ARGV (--)" + # ############################################################################ # An unknown option should produce an error. # ############################################################################ @@ -77,11 +101,129 @@ is "$err" "1" "Non-zero exit on unknown option" # ########################################################################### parse_options "$T_LIB_DIR/samples/bash/po001.sh" --help usage_or_errors "$T_LIB_DIR/samples/bash/po001.sh" >$TMPFILE 2>&1 -no_diff \ - "$TMPFILE" \ - "$T_LIB_DIR/samples/bash/help001.txt" \ +cmd_ok \ + "grep -q \"For more information, 'man pt-stalk' or 'perldoc\" $TMPFILE" \ "--help" +cmd_ok \ + "grep -q ' --string-opt2[ ]*String option with a default.' $TMPFILE" \ + "Command line options" + +cmd_ok \ + "grep -q '\-\-string-opt[ ]*(No value)' $TMPFILE" \ + "Options and values after processing arguments" + +# Don't interpolate. +parse_options "$T_LIB_DIR/samples/bash/po003.sh" --help +usage_or_errors "$T_LIB_DIR/samples/bash/po003.sh" >$TMPFILE 2>&1 + +cmd_ok \ + "grep -q 'Exit if the disk is less than this %full.' $TMPFILE" \ + "Don't interpolate --help descriptions" + +# ########################################################################### +# Config files. +# ########################################################################### +TOOL="pt-test" +cp "$T_LIB_DIR/samples/bash/config001.conf" "$HOME/.$TOOL.conf" + +parse_options "$T_LIB_DIR/samples/bash/po001.sh" + +is "$OPT_STRING_OPT" "abc" "Default string option (conf)" +is "$OPT_STRING_OPT2" "foo" "Default string option with default (conf)" +is "$OPT_TYPELESS_OPTION" "yes" "Default typeless option (conf)" +is "$OPT_NOPTION" "yes" "Default neg option (conf)" +is "$OPT_INT_OPT" "" "Default int option (conf)" +is "$OPT_INT_OPT2" "42" "Default int option with default (conf)" +is "$OPT_VERSION" "" "--version (conf)" +is "$ARGV" "" "ARGV (conf)" +is "$EXT_ARGV" "--host=127.1 --user=daniel" "External ARGV (conf)" + +# Command line should override config file. +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --string-opt zzz + +is "$OPT_STRING_OPT" "zzz" "Command line overrides config file" + +# User-specified --config +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --config "$T_LIB_DIR/samples/bash/config003.conf" --string-opt bar + +is "$OPT_STRING_OPT" "bar" "--config string option" +is "$OPT_STRING_OPT2" "foo" "--config string option2" +is "$OPT_TYPELESS_OPTION" "" "--config typeless option" +is "$OPT_NOPTION" "yes" "--config negatable option" +is "$OPT_INT_OPT" "123" "--config int option" +is "$OPT_INT_OPT2" "42" "--config int option2" +is "$OPT_VERSION" "" "--config version option" +is "$ARGV" "" "--config ARGV" +is "$EXT_ARGV" "" "--config External ARGV" + +# Multiple --config files, last should take precedence. +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --config $T_LIB_DIR/samples/bash/config001.conf,$T_LIB_DIR/samples/bash/config002.conf + +is "$OPT_STRING_OPT" "hello world" "Two --config string option" +is "$OPT_TYPELESS_OPTION" "yes" "Two --config typeless option" +is "$OPT_INT_OPT" "100" "Two --config int option" +is "$ARGV" "" "Two --config ARGV" +is "$EXT_ARGV" "--host=127.1 --user=daniel" "Two--config External ARGV" + +# Spaces before and after the option[=value] lines. +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --config $T_LIB_DIR/samples/bash/config004.conf + +is "$OPT_STRING_OPT" "foo" "Default string option (spacey)" +is "$OPT_TYPELESS_OPTION" "yes" "Default typeless option (spacey)" +is "$OPT_INT_OPT" "123" "Default int option (spacey)" +is "$ARGV" "" "ARGV (spacey)" +is "$EXT_ARGV" "" "External ARGV (spacey)" + +# ############################################################################ +# Option values with spaces. +# ############################################################################ + +# Config file +cp "$T_LIB_DIR/samples/bash/config002.conf" "$HOME/.$TOOL.conf" + +parse_options "$T_LIB_DIR/samples/bash/po001.sh" "" + +is "$OPT_STRING_OPT" "hello world" "Option value with space (conf)" +is "$OPT_INT_OPT" "100" "Option = value # comment (conf)" + +rm "$HOME/.$TOOL.conf" +TOOL="pt-stalk" + +# Command line +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --string-opt "hello world" +is "$OPT_STRING_OPT" "hello world" "Option value with space (cmd line)" +is "$ARGV" "" "ARGV (cmd line)" +is "$EXT_ARGV" "" "External ARGV (cmd line)" + +# ############################################################################ +# Size options. +# ############################################################################ + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" --disk-bytes-free 1T +is "$OPT_DISK_BYTES_FREE" "1099511627776" "Size: 1T" + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" --disk-bytes-free 1G +is "$OPT_DISK_BYTES_FREE" "1073741824" "Size: 1G" + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" --disk-bytes-free 1M +is "$OPT_DISK_BYTES_FREE" "1048576" "Size: 1M" + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" --disk-bytes-free 1K +is "$OPT_DISK_BYTES_FREE" "1024" "Size: 1K" + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" --disk-bytes-free 1k +is "$OPT_DISK_BYTES_FREE" "1024" "Size: 1k" + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" --disk-bytes-free 1 +is "$OPT_DISK_BYTES_FREE" "1" "Size: 1" + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" --disk-bytes-free 100M +is "$OPT_DISK_BYTES_FREE" "104857600" "Size: 100M" + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" +is "$OPT_DISK_BYTES_FREE" "104857600" "Size: 100M default" + # ############################################################################ # Done # ############################################################################ diff --git a/t/lib/bash/safeguards.sh b/t/lib/bash/safeguards.sh index c874498f..cf678d90 100644 --- a/t/lib/bash/safeguards.sh +++ b/t/lib/bash/safeguards.sh @@ -18,36 +18,51 @@ is \ "2" \ "2-line df output" -check_disk_space "$SAMPLE/diskspace001.txt" 22000 18 >$TMPDIR/out 2>&1 +# Filesystem 1024-blocks Used Available Capacity Mounted on +# /dev/disk0s2 118153176 94409664 23487512 81% / +# +# Those values are in Kb, so: +# used = 94409664 (94.4G) = 96_675_495_936 bytes +# free = 23487512 (23.4G) = 24_051_212_288 bytes +# pct free = 100 - 81 = 19 % + +# want free - 100, 18 < 19, so this should be ok. +check_disk_space "$SAMPLE/diskspace001.txt" 24051212188 18 >$TMPDIR/out 2>&1 is "$?" "0" "Enough disk space" is \ "`cat $TMPDIR/out`" \ "" \ "No output if enough disk space" -check_disk_space "$SAMPLE/diskspace001.txt" 24000 18 >$TMPDIR/out 2>&1 +# want free - 100 is ok, but 20 < 19 is not. +check_disk_space "$SAMPLE/diskspace001.txt" 24051212188 20 >$TMPDIR/out 2>&1 +is "$?" "1" "Not enough % free" + +# want free + 100, so this should fail +# (real free is 100 bytes under what we want) +check_disk_space "$SAMPLE/diskspace001.txt" 24051212388 18 >$TMPDIR/out 2>&1 is "$?" "1" "Not enough MB free" cmd_ok \ - "grep -q '19% free, 23487512 KB free; wanted more than 18% free or 24576000 KB free' $TMPDIR/out" \ + "grep -q 'Actual: 19% free, 24051212288 bytes free (- 0 bytes margin)' $TMPDIR/out" \ "Warning if not enough disk space" -check_disk_space "$SAMPLE/diskspace001.txt" 22000 19 >$TMPDIR/out 2>&1 -is "$?" "1" "Not enough % free" - # ########################################################################### # Check with a margin (amount we plan to use in the future). # ########################################################################### -check_disk_space "$SAMPLE/diskspace001.txt" 22000 18 100 +# want free - 100 + 50 margin, so effectively want free - 50 is ok. +check_disk_space "$SAMPLE/diskspace001.txt" 24051212188 18 50 is "$?" "0" "Enough disk space with margin" -check_disk_space "$SAMPLE/diskspace001.txt" 23000 18 100 >$TMPDIR/out 2>&1 +# want free - 100 + 101 margin, so real free is 1 byte under what we want. +check_disk_space "$SAMPLE/diskspace001.txt" 24051212188 18 101 >$TMPDIR/out 2>&1 is "$?" "1" "Not enough MB free with margin" -check_disk_space "$SAMPLE/diskspace001.txt" 100 5 20000 >$TMPDIR/out 2>&1 +# want free - 100 + 50 margin ok but %free will be 19 which is < 25. +check_disk_space "$SAMPLE/diskspace001.txt" 24051212188 25 50 >$TMPDIR/out 2>&1 is "$?" "1" "Not enough % free with margin" cmd_ok \ - "grep -q '3% free,' $TMPDIR/out" \ + "grep -q 'Actual:[ ]*19% free,' $TMPDIR/out" \ "Calculates % free with margin" # ########################################################################### diff --git a/t/lib/samples/bash/collect001.txt b/t/lib/samples/bash/collect001.txt deleted file mode 100644 index f76b8ecb..00000000 --- a/t/lib/samples/bash/collect001.txt +++ /dev/null @@ -1,33 +0,0 @@ -2011_12_05-df -2011_12_05-disk-space -2011_12_05-diskstats -2011_12_05-hostname -2011_12_05-innodbstatus1 -2011_12_05-innodbstatus2 -2011_12_05-interrupts -2011_12_05-iostat -2011_12_05-iostat-overall -2011_12_05-log_error -2011_12_05-lsof -2011_12_05-meminfo -2011_12_05-mpstat -2011_12_05-mpstat-overall -2011_12_05-mutex-status1 -2011_12_05-mutex-status2 -2011_12_05-mysqladmin -2011_12_05-netstat -2011_12_05-netstat_s -2011_12_05-opentables1 -2011_12_05-opentables2 -2011_12_05-output -2011_12_05-processlist -2011_12_05-procstat -2011_12_05-procvmstat -2011_12_05-ps -2011_12_05-slabinfo -2011_12_05-stacktrace -2011_12_05-sysctl -2011_12_05-top -2011_12_05-variables -2011_12_05-vmstat -2011_12_05-vmstat-overall diff --git a/t/lib/samples/bash/config001.conf b/t/lib/samples/bash/config001.conf new file mode 100644 index 00000000..f68cf974 --- /dev/null +++ b/t/lib/samples/bash/config001.conf @@ -0,0 +1,5 @@ +string-opt=abc +typeless-option +-- +--host=127.1 +--user=daniel diff --git a/t/lib/samples/bash/config002.conf b/t/lib/samples/bash/config002.conf new file mode 100644 index 00000000..d4a76af5 --- /dev/null +++ b/t/lib/samples/bash/config002.conf @@ -0,0 +1,5 @@ +# Line comment. +string-opt=hello world + + +int-opt = 100 # Inline comment. diff --git a/t/lib/samples/bash/config003.conf b/t/lib/samples/bash/config003.conf new file mode 100644 index 00000000..0ac5a1d5 --- /dev/null +++ b/t/lib/samples/bash/config003.conf @@ -0,0 +1,2 @@ +string-opt=from config file +int-opt=123 diff --git a/t/lib/samples/bash/config004.conf b/t/lib/samples/bash/config004.conf new file mode 100644 index 00000000..eb023329 --- /dev/null +++ b/t/lib/samples/bash/config004.conf @@ -0,0 +1,3 @@ + typeless-option + int-opt=123 +string-opt=foo diff --git a/t/lib/samples/bash/help001.txt b/t/lib/samples/bash/help001.txt deleted file mode 100644 index 65dc71b2..00000000 --- a/t/lib/samples/bash/help001.txt +++ /dev/null @@ -1,30 +0,0 @@ -Usage: pt-stalk [OPTIONS] [-- MYSQL_OPTIONS] - -For more information, 'man pt-stalk' or 'perldoc /Users/daniel/p/bash-tool-libs/t/lib/samples/bash/po001.sh'. - -Command line options: - ---help - Print help and exit. - ---int-opt - Int option without a default. - ---int-opt2 - Int option with a default. - ---noption - Negatable option. - ---string-opt - String option without a default. - ---string-opt2 - String option with a default. - ---typeless-option - Just an option. - ---version - Print tool's version and exit. - diff --git a/t/lib/samples/bash/po002.sh b/t/lib/samples/bash/po002.sh deleted file mode 100644 index 63a8672b..00000000 --- a/t/lib/samples/bash/po002.sh +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/env bash - -: - -# ############################################################################ -# Documentation -# ############################################################################ -:<<'DOCUMENTATION' -=pod - -=head1 NAME - -pt-stalk - Wait for a condition to occur then begin collecting data. - -=head1 OPTIONS - -=over - -=item --collect - -default: yes; negatable: yes - -Collect system information. - -=item --collect-gdb - -Collect GDB stacktraces. - -=item --collect-oprofile - -Collect oprofile data. - -=item --collect-strace - -Collect strace data. - -=item --collect-tcpdump - -Collect tcpdump data. - -=item --cycles - -type: int; default: 5 - -Number of times condition must be met before triggering collection. - -=item --daemonize - -default: yes; negatable: yes - -Daemonize the tool. - -=item --dest - -type: string - -Where to store collected data. - -=item --disk-byte-limit - -type: int; default: 100 - -Exit if the disk has less than this many MB free. - -=item --disk-pct-limit - -type: int; default: 5 - -Exit if the disk is less than this %full. - -=item --execute-command - -type: string; default: pt-collect - -Location of the C tool. - -=item --function - -type: string; default: status - -Built-in function name or plugin file name which returns the value of C. - -Possible values are: - -=over - -=item * status - -Grep the value of C from C. - -=item * processlist - -Count the number of processes in C whose -C column matches C. For example: - - TRIGGER_FUNCTION="processlist" \ - VARIABLE="State" \ - MATCH="statistics" \ - THRESHOLD="10" - -The above triggers when more than 10 processes are in the "statistics" state. -C must be specified for this trigger function. - -=item * magic - -TODO - -=item * plugin file name - -A plugin file allows you to specify a custom trigger function. The plugin -file must contain a function called C. For example: - - trg_plugin() { - # Do some stuff. - echo "$value" - } - -The last output if the function (its "return value") must be a number. -This number is compared to C. All L<"ENVIRONMENT"> variables -are available to the function. - -Do not alter the tool's existing global variables. Prefix any plugin-specific -global variables with "PLUGIN_". - -=back - -=item --help - -Print help and exit. - -=item --interval - -type: int; default: 1 - -Interval between checks. - -=item --iterations - -type: int - -Exit after triggering C this many times. By default, the tool -will collect as many times as it's triggered. - -=item --log - -type: string; default: /var/log/pt-stalk.log - -Print all output to this file when daemonized. - -=item --match - -type: string - -Match pattern for C L<"--function">. - -=item --notify-by-email - -type: string - -Send mail to this list of addresses when C triggers. - -=item --pid FILE - -type: string; default: /var/run/pt-stalk.pid - -Create a PID file when daemonized. - -=item --retention-time - -type: int; default: 30 - -Remove samples after this many days. - -=item --run-time - -type: int; default: 30 - -How long to collect statistics data for? - -Make sure that this isn't longer than SLEEP. - -=item --sleep - -type: int; default: 300 - -How long to sleep after collecting? - -=item --threshold N - -type: int; default: 25 - -Max number of C to tolerate. - -=item --variable NAME - -type: string; default: Threads_running - -This is the thing to check for. - -=item --version - -Print tool's version and exit. - -=back - -=head1 ENVIRONMENT - -No env vars used. - -=cut - -DOCUMENTATION diff --git a/t/lib/samples/bash/po003.sh b/t/lib/samples/bash/po003.sh new file mode 100644 index 00000000..a7971cdc --- /dev/null +++ b/t/lib/samples/bash/po003.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +: + +# ############################################################################ +# Documentation +# ############################################################################ +:<<'DOCUMENTATION' +=pod + +=head1 NAME + +pt-stalk - Wait for a condition to occur then begin collecting data. + +=head1 OPTIONS + +=over + +=item --disk-pct-limit + +type: int; default: 5 + +Exit if the disk is less than this %full. + +=item --help + +Print help. + +=back + +=head1 ENVIRONMENT + +No env vars used. + +=cut + +DOCUMENTATION diff --git a/t/lib/samples/bash/po004.sh b/t/lib/samples/bash/po004.sh new file mode 100644 index 00000000..0574f221 --- /dev/null +++ b/t/lib/samples/bash/po004.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +: + +# ############################################################################ +# Documentation +# ############################################################################ +:<<'DOCUMENTATION' +=pod + +=head1 NAME + +pt-stalk - Wait for a condition to occur then begin collecting data. + +=head1 OPTIONS + +=over + +=item --disk-bytes-free + +type: size; default: 100M + +Fall apart if there's less than this many bytes free on the disk. + +=item --help + +Print help. + +=back + +=head1 ENVIRONMENT + +No env vars used. + +=cut + +DOCUMENTATION diff --git a/t/lib/samples/bash/seq1.txt b/t/lib/samples/bash/seq1.txt new file mode 100644 index 00000000..8a1218a1 --- /dev/null +++ b/t/lib/samples/bash/seq1.txt @@ -0,0 +1,5 @@ +1 +2 +3 +4 +5 diff --git a/t/pt-collect/pt-collect.t b/t/pt-collect/pt-collect.t deleted file mode 100644 index e9384f1d..00000000 --- a/t/pt-collect/pt-collect.t +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env perl - -BEGIN { - die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" - unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; - unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; -}; - -use strict; -use warnings FATAL => 'all'; -use English qw(-no_match_vars); -use Test::More tests => 1; - -use PerconaTest; - -like( - `$trunk/bin/pt-collect --help 2>&1`, - qr/Usage:/, - 'It runs' -); - -# ############################################################################# -# Done. -# ############################################################################# -exit; diff --git a/t/pt-mysql-summary/get_mysql_info.sh b/t/pt-mysql-summary/get_mysql_info.sh index 0accdf52..78a181c9 100644 --- a/t/pt-mysql-summary/get_mysql_info.sh +++ b/t/pt-mysql-summary/get_mysql_info.sh @@ -3,14 +3,14 @@ TESTS=3 TEST_NAME="get_mysql_timezone" -cp samples/mysql-variables-001.txt /tmp/percona-toolkit-mysql-variables +cp samples/mysql-variables-001.txt $TMPDIR/percona-toolkit-mysql-variables is $(get_mysql_timezone) "EDT" TEST_NAME="get_mysql_uptime" cat < $TMPDIR/expected 2010-05-27 11:38 (up 0+02:08:52) EOF -cp samples/mysql-status-001.txt /tmp/percona-toolkit-mysql-status +cp samples/mysql-status-001.txt $TMPDIR/percona-toolkit-mysql-status echo "2010-05-27 11:38" > $TMPDIR/in get_mysql_uptime $TMPDIR/in > $TMPDIR/got no_diff $TMPDIR/got $TMPDIR/expected @@ -20,6 +20,6 @@ cat < $TMPDIR/expected Version | 5.0.51a-24+lenny2 (Debian) Built On | debian-linux-gnu i486 EOF -cp samples/mysql-variables-001.txt /tmp/percona-toolkit-mysql-variables +cp samples/mysql-variables-001.txt $TMPDIR/percona-toolkit-mysql-variables get_mysql_version > $TMPDIR/got no_diff $TMPDIR/got $TMPDIR/expected diff --git a/t/pt-stalk/pt-stalk.t b/t/pt-stalk/pt-stalk.t index 869b2513..83d180d7 100644 --- a/t/pt-stalk/pt-stalk.t +++ b/t/pt-stalk/pt-stalk.t @@ -9,16 +9,238 @@ BEGIN { use strict; use warnings FATAL => 'all'; use English qw(-no_match_vars); -use Test::More tests => 1; +use Test::More; +use Time::HiRes qw(sleep); use PerconaTest; +use DSNParser; +use Sandbox; -TODO: { - local $TODO = "Test pt-stalk"; - ok(1, 'ok'); -}; +my $dp = new DSNParser(opts=>$dsn_opts); +my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); +my $dbh = $sb->get_dbh_for('master'); + +if ( !$dbh ) { + plan skip_all => 'Cannot connect to sandbox master'; +} +else { + plan tests => 21; +} + +my $cnf = "/tmp/12345/my.sandbox.cnf"; +my $pid_file = "/tmp/pt-stalk.pid.$PID"; +my $log_file = "/tmp/pt-stalk.log.$PID"; +my $dest = "/tmp/pt-stalk.collect.$PID"; +my $pid; + +diag(`rm $pid_file 2>/dev/null`); +diag(`rm $log_file 2>/dev/null`); +diag(`rm -rf $dest 2>/dev/null`); + +# ########################################################################### +# Test that it won't run if can't connect to MySQL. +# ########################################################################### + +my $retval = system("$trunk/bin/pt-stalk >$log_file 2>&1"); +my $output = `cat $log_file`; + +like( + $output, + qr/Cannot connect to MySQL/, + "Cannot connect to MySQL" +); + +is( + $retval >> 8, + 1, + "Exit 1" +); + +# ########################################################################### +# Test that it runs and dies normally. +# ########################################################################### +diag(`rm $pid_file 2>/dev/null`); +diag(`rm $log_file 2>/dev/null`); +diag(`rm -rf $dest 2>/dev/null`); + +$retval = system("$trunk/bin/pt-stalk --daemonize --pid $pid_file --log $log_file --dest $dest -- --defaults-file=$cnf"); + +is( + $retval >> 8, + 0, + "Parent exit 0" +); + +PerconaTest::wait_for_files($pid_file, $log_file); +ok( + -f $pid_file, + "Creates PID file" +); + +ok( + -f $log_file, + "Creates log file" +); + +sleep 1; + +ok( + -d $dest, + "Creates --dest (collect) dir" +); + +chomp($pid = `cat $pid_file`); +$retval = system("kill -0 $pid"); +is( + $retval >> 0, + 0, + "pt-stalk is running ($pid)" +); + +$output = `cat $log_file`; +like( + $output, + qr/Check results: Threads_running=\d+, matched=no, cycles_true=0/, + "Check results logged" +); + +$retval = system("kill $pid 2>/dev/null"); +is( + $retval >> 0, + 0, + "Killed pt-stalk" +); + +sleep 1; + +ok( + ! -f $pid_file, + "Removes PID file" +); + +$output = `cat $log_file`; +like( + $output, + qr/Caught signal, exiting/, + "Caught signal logged" +); + +# ########################################################################### +# Test collect. +# ########################################################################### +diag(`rm $pid_file 2>/dev/null`); +diag(`rm $log_file 2>/dev/null`); +diag(`rm $dest/* 2>/dev/null`); + +# We'll have to watch Uptime since it's the only status var that's going +# to be predictable. +my (undef, $uptime) = $dbh->selectrow_array("SHOW STATUS LIKE 'Uptime'"); +my $threshold = $uptime + 2; + +$retval = system("$trunk/bin/pt-stalk --iterations 1 --dest $dest --variable Uptime --threshold $threshold --cycles 2 --run-time 2 --pid $pid_file -- --defaults-file=$cnf >$log_file 2>&1"); + +sleep 3; + +$output = `cat $dest/*-trigger`; +like( + $output, + qr/Check results: Uptime=\d+, matched=yes, cycles_true=2/, + "Collect triggered" +); + +chomp($output = `cat $dest/*-df | grep -c '^TS'`); +is( + $output, + 2, + "Collect ran for --run-time" +); + +$output = `ps x | grep -v grep | grep 'pt-stalk pt-stalk --iterations 1 --dest $dest'`; +is( + $output, + "", + "pt-stalk is not running" +); + +$output = `cat $dest/*-trigger`; +like( + $output, + qr/pt-stalk ran with --function=status --variable=Uptime --threshold=$threshold/, + "Trigger file logs how pt-stalk was ran" +); + +chomp($output = `cat $log_file | grep 'Collector PID'`); +like( + $output, + qr/Collector PID \d+/, + "Collector PID logged" +); + +# ########################################################################### +# Triggered but --no-collect. +# ########################################################################### +diag(`rm $pid_file 2>/dev/null`); +diag(`rm $log_file 2>/dev/null`); +diag(`rm $dest/* 2>/dev/null`); + +(undef, $uptime) = $dbh->selectrow_array("SHOW STATUS LIKE 'Uptime'"); +$threshold = $uptime + 2; + +$retval = system("$trunk/bin/pt-stalk --no-collect --iterations 1 --dest $dest --variable Uptime --threshold $threshold --cycles 1 --run-time 1 --pid $pid_file -- --defaults-file=$cnf >$log_file 2>&1"); + +sleep 2; + +$output = `cat $log_file`; +like( + $output, + qr/Collect triggered/, + "Collect triggered" +); + +ok( + ! -f "$dest/*", + "No files collected" +); + +$output = `ps x | grep -v grep | grep 'pt-stalk pt-stalk --iterations 1 --dest $dest'`; +is( + $output, + "", + "pt-stalk is not running" +); + +# ############################################################################# +# --config +# ############################################################################# + +diag(`cp $ENV{HOME}/.pt-stalk.conf $ENV{HOME}/.pt-stalk.conf.original 2>/dev/null`); +diag(`cp $trunk/t/pt-stalk/samples/config001.conf $ENV{HOME}/.pt-stalk.conf`); + +system "$trunk/bin/pt-stalk --dest $dest --pid $pid_file >$log_file 2>&1 &"; +PerconaTest::wait_for_files($pid_file); +sleep 1; +chomp($pid = `cat $pid_file`); +$retval = system("kill $pid 2>/dev/null"); +is( + $retval >> 0, + 0, + "Killed pt-stalk" +); + +$output = `cat $log_file`; +like( + $output, + qr/Check results: Aborted_connects=|variable=Aborted_connects/, + "Read default config file" +); + +diag(`rm $ENV{HOME}/.pt-stalk.conf`); +diag(`cp $ENV{HOME}/.pt-stalk.conf.original $ENV{HOME}/.pt-stalk.conf 2>/dev/null`); # ############################################################################# # Done. # ############################################################################# +diag(`rm $pid_file 2>/dev/null`); +diag(`rm $log_file 2>/dev/null`); +diag(`rm -rf $dest 2>/dev/null`); exit; diff --git a/t/pt-stalk/samples/config001.conf b/t/pt-stalk/samples/config001.conf new file mode 100644 index 00000000..c4ad24e8 --- /dev/null +++ b/t/pt-stalk/samples/config001.conf @@ -0,0 +1,8 @@ +--iterations=1 +--variable=Aborted_connects +--threshold=999999 +-- +-umsandbox +-pmsandbox +--host 127.1 +--port 12345 diff --git a/util/test-bash-functions b/util/test-bash-functions index 1a14a0de..c6c0cd61 100755 --- a/util/test-bash-functions +++ b/util/test-bash-functions @@ -93,7 +93,7 @@ run_test() { # Print a TAP-style test result. result() { local result=$1 - local test_name=$2 + local test_name=${2:-""} if [ $result -eq 0 ]; then echo "ok $testno - $TEST_FILE $test_name" else @@ -115,7 +115,7 @@ result() { no_diff() { local got=$1 local expected=$2 - local test_name=$3 + local test_name=${3:-""} test_command="diff $got $expected" eval $test_command > $TEST_TMPDIR/failed_result 2>&1 result $? "$test_name" @@ -124,7 +124,7 @@ no_diff() { is() { local got=$1 local expected=$2 - local test_name=$3 + local test_name=${3:-""} test_command="\"$got\" == \"$expected\"" test "$got" = "$expected" result $? "$test_name" @@ -132,7 +132,7 @@ is() { cmd_ok() { local test_command=$1 - local test_name=$2 + local test_name=${2:-""} eval $test_command result $? "$test_name" }