From 255ea01fd8589ac3005b5ce3c4b0df4a55e31b3e Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 1 Nov 2011 11:39:04 -0600 Subject: [PATCH 01/71] Make main() in pt-stalk. --- bin/pt-stalk | 124 ++++++++++++++++++++++++++++----------------------- 1 file changed, 67 insertions(+), 57 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 21ea8e78..1e603899 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -99,66 +99,76 @@ log() { echo "${1}" >&2 } -# Make the collection location -mkdir -p "${DEST}" || die "Can't make the destination directory" -test -d "${DEST}" || die "${DEST} isn't a directory" -test -w "${DEST}" || die "${DEST} isn't writable" +# The main code that runs by default. Arguments are the command-line options. +main() { -# Test if we have root; warn if not, but it isn't critical. -if [ "$(id -u)" != "0" ]; then - echo 'Not running with root privileges!'; + # Make the collection location + mkdir -p "${DEST}" || die "Can't make the destination directory" + test -d "${DEST}" || die "${DEST} isn't a directory" + test -w "${DEST}" || die "${DEST} isn't writable" + + # Test if we have root; warn if not, but it isn't critical. + if [ "$(id -u)" != "0" ]; then + echo 'Not running with root privileges!'; + fi + + # We increment this variable every time that the check is true, + # and set it to 0 if it's false. + cycles_true=0; + + while true; do + d=$(date +%F-%T | tr :- _); + + # XXX This is where we decide whether to execute 'collect'. + # XXX Customize this if needed. The idea is to generate a number and store + # XXX it into $detected, and if $detected > $THRESHOLD, then we'll execute + # XXX the collection process. + detected=$(mysqladmin ext ${MYSQLOPTIONS} | grep ${VARIABLE} | awk '{print $4}'); + if [ -z "${detected}" -a ${MAYBE_EMPTY} = "no" ]; then + # Oops, couldn't connect, maybe max_connections problem? + echo "$d The detected value is empty; something failed? Exit status is $?" + matched="yes" + cycles_true=$(($cycles_true + 1)) + elif [ "${detected:-0}" -gt ${THRESHOLD} ]; then + matched="yes" + cycles_true=$(($cycles_true + 1)) + else + matched="no" + cycles_true=0 + fi + + # XXX Stop customizing here; everything above should be what you need. + + NOTE="$d check results: ${VARIABLE} = ${detected}, matched = ${matched}, cycles_true = ${cycles_true}" + # Actually execute the collection script. + if [ "${matched:-no}" = "yes" -a ${cycles_true} -ge ${CYCLES} ]; then + + log "${NOTE}" "${COLLECT} triggered" + PREFIX="$(date +%F-%T | tr :- _)" + echo "${NOTE}" > "${DEST}/${PREFIX}-trigger" + ${COLLECT} -d "${DEST}" -i "${DURATION}" -g "${GDB}" -o "${OPROFILE}" -p "${PREFIX}" -s "${STRACE}" -t "${TCPDUMP}" -f "${PCT_THRESHOLD}" -m "${MB_THRESHOLD}" -- ${MYSQLOPTIONS} + echo "$d sleeping ${SLEEP} seconds to avoid DOS attack" + sleep ${SLEEP} + else + echo ${NOTE} + sleep ${INTERVAL} + fi + + + # Delete things more than $PURGE days old + find "${DEST}" -type f -mtime +${PURGE} -exec rm -f '{}' \; + find "/var/lib/oprofile/samples" -type d -name 'pt_collect_*' \ + -depth -mtime +${PURGE} -exec rm -f '{}' \; + + done +} + +# Execute the program if it was not included from another file. +# This makes it possible to include without executing, and thus test. +if [ "$(basename "$0")" = "pt-stalk" ] || [ "$(basename "$0")" = "bash" -a "$_" = "$0" ]; then + main "$@" fi -# We increment this variable every time that the check is true, and set it to 0 -# if it's false. -cycles_true=0; - -while true; do - d=$(date +%F-%T | tr :- _); - - # XXX This is where we decide whether to execute 'collect'. - # XXX Customize this if needed. The idea is to generate a number and store - # XXX it into $detected, and if $detected > $THRESHOLD, then we'll execute - # XXX the collection process. - detected=$(mysqladmin ext ${MYSQLOPTIONS} | grep ${VARIABLE} | awk '{print $4}'); - if [ -z "${detected}" -a ${MAYBE_EMPTY} = "no" ]; then - # Oops, couldn't connect, maybe max_connections problem? - echo "$d The detected value is empty; something failed? Exit status is $?" - matched="yes" - cycles_true=$(($cycles_true + 1)) - elif [ "${detected:-0}" -gt ${THRESHOLD} ]; then - matched="yes" - cycles_true=$(($cycles_true + 1)) - else - matched="no" - cycles_true=0 - fi - - # XXX Stop customizing here; everything above should be what you need. - - NOTE="$d check results: ${VARIABLE} = ${detected}, matched = ${matched}, cycles_true = ${cycles_true}" - # Actually execute the collection script. - if [ "${matched:-no}" = "yes" -a ${cycles_true} -ge ${CYCLES} ]; then - - log "${NOTE}" "${COLLECT} triggered" - PREFIX="$(date +%F-%T | tr :- _)" - echo "${NOTE}" > "${DEST}/${PREFIX}-trigger" - ${COLLECT} -d "${DEST}" -i "${DURATION}" -g "${GDB}" -o "${OPROFILE}" -p "${PREFIX}" -s "${STRACE}" -t "${TCPDUMP}" -f "${PCT_THRESHOLD}" -m "${MB_THRESHOLD}" -- ${MYSQLOPTIONS} - echo "$d sleeping ${SLEEP} seconds to avoid DOS attack" - sleep ${SLEEP} - else - echo ${NOTE} - sleep ${INTERVAL} - fi - - - # Delete things more than $PURGE days old - find "${DEST}" -type f -mtime +${PURGE} -exec rm -f '{}' \; - find "/var/lib/oprofile/samples" -type d -name 'pt_collect_*' \ - -depth -mtime +${PURGE} -exec rm -f '{}' \; - -done - # ############################################################################ # Documentation # ############################################################################ From a0a0590cd1cb274cc22e497d4e541a7c6cbf7f4e Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 1 Nov 2011 11:55:01 -0600 Subject: [PATCH 02/71] Change default trigger from Threads_connected:100 to Threads_running:30. --- bin/pt-stalk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 1e603899..3dcc959c 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -15,10 +15,10 @@ fi # Configuration settings. # ######################################################################## # This is the max number of we want to tolerate. -THRESHOLD=${THRESHOLD:-100} +THRESHOLD=${THRESHOLD:-30} # This is the thing to check for. -VARIABLE=${VARIABLE:-Threads_connected} +VARIABLE=${VARIABLE:-Threads_running} # How many times must the condition be met before the script will fire? CYCLES=${CYCLES:-1} @@ -231,7 +231,7 @@ is too full. =back By default, the tool is configured to execute mysqladmin extended-status and -extract the value of the Threads_connected variable; if this is greater than +extract the value of the Threads_running variable; if this is greater than 100, it runs the collection script. This is really just placeholder code, and almost certainly needs to be customized! @@ -265,11 +265,11 @@ or in the F config file (see L<"CONFIGURING">). =over -=item THRESHOLD (default 100) +=item THRESHOLD (default 30) This is the max number of we want to tolerate. -=item VARIABLE (default Threads_connected} +=item VARIABLE (default Threads_running) This is the thing to check for. From 1e04b40869872b424d56fffa0ba99ce2d9209037 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Wed, 2 Nov 2011 16:05:59 -0600 Subject: [PATCH 03/71] Implement trigger functions show_variables and show_status. --- bin/pt-stalk | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 3dcc959c..5d23c657 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -77,6 +77,9 @@ MB_THRESHOLD=${MB_THRESHOLD:-100} # Remove samples after this many days. PURGE=${PURGE:-30} +# Which trigger function to call to get the value of VARIABLE. +TRIGGER_FUNCTION=${TRIGGER_FUNCTION:-"show_status"} + # ######################################################################## # End configuration # ######################################################################## @@ -89,6 +92,19 @@ die() { exit 1 } +# ######################################################################## +# Trigger functions +# ######################################################################## +show_status() { + local var=$1 + mysqladmin ${MYSQLOPTIONS} extended-status | grep ${VARIABLE} | awk '{print $4}' +} + +show_variables() { + local var=$1 + mysqladmin ${MYSQLOPTIONS} variables | grep ${VARIABLE} | awk '{print $4}' +} + # ######################################################################## # Echo to STDERR and possibly email. # ######################################################################## @@ -119,11 +135,11 @@ main() { while true; do d=$(date +%F-%T | tr :- _); - # XXX This is where we decide whether to execute 'collect'. - # XXX Customize this if needed. The idea is to generate a number and store - # XXX it into $detected, and if $detected > $THRESHOLD, then we'll execute - # XXX the collection process. - detected=$(mysqladmin ext ${MYSQLOPTIONS} | grep ${VARIABLE} | awk '{print $4}'); + # This is where we decide whether to execute 'collect'. + # The idea is to generate a number and store into $detected, + # and if $detected > $THRESHOLD, then we'll execute pt-collect. + local detected=$($TRIGGER_FUNCTION $VARIABLE) + if [ -z "${detected}" -a ${MAYBE_EMPTY} = "no" ]; then # Oops, couldn't connect, maybe max_connections problem? echo "$d The detected value is empty; something failed? Exit status is $?" @@ -137,8 +153,6 @@ main() { cycles_true=0 fi - # XXX Stop customizing here; everything above should be what you need. - NOTE="$d check results: ${VARIABLE} = ${detected}, matched = ${matched}, cycles_true = ${cycles_true}" # Actually execute the collection script. if [ "${matched:-no}" = "yes" -a ${cycles_true} -ge ${CYCLES} ]; then @@ -154,7 +168,6 @@ main() { sleep ${INTERVAL} fi - # Delete things more than $PURGE days old find "${DEST}" -type f -mtime +${PURGE} -exec rm -f '{}' \; find "/var/lib/oprofile/samples" -type d -name 'pt_collect_*' \ @@ -273,6 +286,11 @@ This is the max number of we want to tolerate. This is the thing to check for. +=item TRIGGER_FUNCTION (default show_status) + +Built-in function name to get the value of VARIABLE, either C +or C. + =item CYCLES (default 1) How many times must the condition be met before the script will fire? From 8d9bfeb31fe99016a412d5e8680f0a1249a21ec3 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 3 Nov 2011 12:53:28 -0600 Subject: [PATCH 04/71] Add processlist trigger function. Remove variables trigger function. --- bin/pt-stalk | 63 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 7 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 5d23c657..08bb20d6 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -78,7 +78,7 @@ MB_THRESHOLD=${MB_THRESHOLD:-100} PURGE=${PURGE:-30} # Which trigger function to call to get the value of VARIABLE. -TRIGGER_FUNCTION=${TRIGGER_FUNCTION:-"show_status"} +TRIGGER_FUNCTION=${TRIGGER_FUNCTION:-"status"} # ######################################################################## # End configuration @@ -92,17 +92,66 @@ die() { exit 1 } +grep_processlist() { + local file=$1 + local col=$2 + local pat=${3:-""} + local gt=${4:-0} + local quiet=${5:-0} + + awk " + BEGIN { + FS=\"|\" + OFS=\" | \" + n_cols=0 + found=0 + } + + /^\|/ { + if ( n_cols ) { + val=colno_for_name[\"$col\"] + if ((\"$pat\" && match(\$val, \"$pat\")) || ($gt && \$val > $gt) ) { + found++ + if (!$quiet) print \$0 + } + } + else { + for (i = 1; i <= NF; i++) { + gsub(/^[ ]*/, \"\", \$i) + gsub(/[ ]*$/, \"\", \$i) + if ( \$i != \"\" ) { + name_for_colno[i]=\$i + colno_for_name[\$i]=i + n_cols++ + } + } + } + } + + END { + if ( found ) + exit 0 + exit 1 + } + " $file +} + # ######################################################################## # Trigger functions # ######################################################################## -show_status() { +trg_status() { local var=$1 mysqladmin ${MYSQLOPTIONS} extended-status | grep ${VARIABLE} | awk '{print $4}' } -show_variables() { +trg_processlist() { local var=$1 - mysqladmin ${MYSQLOPTIONS} variables | grep ${VARIABLE} | awk '{print $4}' + local tmpfile="/tmp/mysql-processlist.$$" + mysqladmin ${MYSQLOPTIONS} processlist > $tmpfile-1 + grep_processlist $tmpfile-1 $var $MATCH 0 0 > $tmpfile-2 + wc -l $tmpfile-2 | awk '{print $1}' + rm -rf $tmpfile* + return } # ######################################################################## @@ -138,7 +187,7 @@ main() { # This is where we decide whether to execute 'collect'. # The idea is to generate a number and store into $detected, # and if $detected > $THRESHOLD, then we'll execute pt-collect. - local detected=$($TRIGGER_FUNCTION $VARIABLE) + local detected=$("trg_${TRIGGER_FUNCTION}" $VARIABLE) if [ -z "${detected}" -a ${MAYBE_EMPTY} = "no" ]; then # Oops, couldn't connect, maybe max_connections problem? @@ -288,8 +337,8 @@ This is the thing to check for. =item TRIGGER_FUNCTION (default show_status) -Built-in function name to get the value of VARIABLE, either C -or C. +Built-in function name to get the value of VARIABLE, either C +or C. =item CYCLES (default 1) From 325ca3781b01145924231314187061d2c7db9ad1 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 28 Nov 2011 10:12:45 -0700 Subject: [PATCH 05/71] Implement TRIGGER_FUNCTION=plugin-file-name. Update docu. --- bin/pt-stalk | 71 ++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 64 insertions(+), 7 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 08bb20d6..902f1040 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -136,6 +136,15 @@ grep_processlist() { " $file } +set_trg_func() { + if [ -f "$TRIGGER_FUNCTION" ]; then + source $TRIGGER_FUNCTION + TRIGGER_FUNCTION="trg_plugin" + else + TRIGGER_FUNCTION="trg_$TRIGGER_FUNCTION" + fi +} + # ######################################################################## # Trigger functions # ######################################################################## @@ -154,6 +163,11 @@ trg_processlist() { return } +trg_magic() { + echo "TODO" + return +} + # ######################################################################## # Echo to STDERR and possibly email. # ######################################################################## @@ -181,13 +195,15 @@ main() { # and set it to 0 if it's false. cycles_true=0; + set_trg_func + while true; do d=$(date +%F-%T | tr :- _); # This is where we decide whether to execute 'collect'. # The idea is to generate a number and store into $detected, # and if $detected > $THRESHOLD, then we'll execute pt-collect. - local detected=$("trg_${TRIGGER_FUNCTION}" $VARIABLE) + local detected=$("${TRIGGER_FUNCTION}" $VARIABLE) if [ -z "${detected}" -a ${MAYBE_EMPTY} = "no" ]; then # Oops, couldn't connect, maybe max_connections problem? @@ -294,7 +310,7 @@ is too full. By default, the tool is configured to execute mysqladmin extended-status and extract the value of the Threads_running variable; if this is greater than -100, it runs the collection script. This is really just placeholder code, +30, it runs the collection script. This is really just placeholder code, and almost certainly needs to be customized! If the tool does execute the collection script, it will wait for a while @@ -335,10 +351,51 @@ This is the max number of we want to tolerate. This is the thing to check for. -=item TRIGGER_FUNCTION (default show_status) +=item TRIGGER_FUNCTION (default status) -Built-in function name to get the value of VARIABLE, either C -or C. +Built-in function name or plugin file name which returns the value of C. Possible values are: + +=over + +=item * status + +Grep the value of C from C. + +=item * processlist + +Count the number of processes in C whose +C column matches C. For example: + + TRIGGER_FUNCTION="processlist" \ + VARIABLE="State" \ + MATCH="statistics" \ + THRESHOLD="10" + +The above triggers when more than 10 processes are in the "statistics" state. +C must be specified for this trigger function. + +=item * magic + +TODO + +=item * plugin file name + +A plugin file allows you to specify a custom trigger function. The plugin +file must contain a function called C. For example: + + trg_plugin() { + # Do some stuff. + echo "$value" + } + +The last output if the function (its "return value") must be a number. +This number is compared to C. All L<"ENVIRONMENT"> variables +are available to the function. + +Do not alter the tool's existing global variables. Prefix any plugin-specific +global variables with "PLUGIN_". + +=back =item CYCLES (default 1) @@ -457,7 +514,7 @@ Replace C with the name of any tool. =head1 AUTHORS -Baron Schwartz, Justin Swanhart, and Fernando Ipar +Baron Schwartz, Justin Swanhart, Fernando Ipar, and Daniel Nichter =head1 ABOUT PERCONA TOOLKIT @@ -489,7 +546,7 @@ Place, Suite 330, Boston, MA 02111-1307 USA. =head1 VERSION -pt-stalk 1.0.1 +pt-stalk 2.0.0 =cut From b93b7de472f081d1feb29d0f56844110e02b11e0 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 28 Nov 2011 11:22:03 -0700 Subject: [PATCH 06/71] Use tmpdir lib. --- bin/pt-stalk | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 49 insertions(+), 2 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 902f1040..2e80d44b 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -4,6 +4,47 @@ # See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal # notices and disclaimers. +# ########################################################################### +# tmpdir package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/tmpdir.sh +# t/lib/bash/tmpdir.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + +set -u + +TMPDIR="" +OPT_TMPDIR=${OPT_TMPDIR:""} + +mk_tmpdir() { + if [ -n "$OPT_TMPDIR" ]; then + TMPDIR="$OPT_TMPDIR" + if [ ! -d "$TMPDIR" ]; then + mkdir $TMPDIR || die "Cannot make $TMPDIR" + fi + else + local tool=`basename $0` + local pid="$$" + TMPDIR=`mktemp -d /tmp/${tool}.${pid}.XXXXX` \ + || die "Cannot make secure tmpdir" + fi +} + +rm_tmpdir() { + if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then + rm -rf $TMPDIR + fi + TMPDIR="" +} + +# ########################################################################### +# End tmpdir package +# ########################################################################### + +set +u + # ######################################################################## # Check for the existence of a config file and source it if it exists # ######################################################################## @@ -155,7 +196,7 @@ trg_status() { trg_processlist() { local var=$1 - local tmpfile="/tmp/mysql-processlist.$$" + local tmpfile="$TMPDIR/processlist" mysqladmin ${MYSQLOPTIONS} processlist > $tmpfile-1 grep_processlist $tmpfile-1 $var $MATCH 0 0 > $tmpfile-2 wc -l $tmpfile-2 | awk '{print $1}' @@ -180,7 +221,6 @@ log() { # The main code that runs by default. Arguments are the command-line options. main() { - # Make the collection location mkdir -p "${DEST}" || die "Can't make the destination directory" test -d "${DEST}" || die "${DEST} isn't a directory" @@ -191,6 +231,9 @@ main() { echo 'Not running with root privileges!'; fi + # Make a secure tmpdir. Any output should be saved only in $TMPDIR/. + mk_tmpdir + # We increment this variable every time that the check is true, # and set it to 0 if it's false. cycles_true=0; @@ -239,6 +282,10 @@ main() { -depth -mtime +${PURGE} -exec rm -f '{}' \; done + + # Remove the secure tmpdir. This is not actually called because + # this tool runs forever. + rm_tmpdir } # Execute the program if it was not included from another file. From 937c16c172def3182a53f9cfa1c27ffc96fb51c4 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 29 Nov 2011 11:54:47 -0700 Subject: [PATCH 07/71] Convert env vars to cmd line opts. Use parse_options lib. --- bin/pt-stalk | 525 +++++++++++++++++++++++++++++---------------------- 1 file changed, 302 insertions(+), 223 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 2e80d44b..f7e2575f 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -4,6 +4,173 @@ # See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal # notices and disclaimers. +# ########################################################################### +# log_warn_die package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/log_warn_die.sh +# t/lib/bash/log_warn_die.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + +set -u + +EXIT_STATUS=0 + +log() { + TS=$(date +%F-%T | tr :- _); + echo "$TS $1" +} + +warn() { + log "$1" >&2 + EXIT_STATUS=$((EXIT_STATUS | 1)) +} + +die() { + warn "$1" + exit 1 +} + +# ########################################################################### +# End log_warn_die package +# ########################################################################### + +# ########################################################################### +# parse_options package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/parse_options.sh +# t/lib/bash/parse_options.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + +set -u + +declare -a ARGV # non-option args (probably input files) +declare EXT_ARGV # everything after -- (args for an external command) +OPT_ERR=${OPT_ERR:""} + +usage() { + local file=$1 + + local usage=$(grep '^Usage: ' $file) + local opts=$(grep -A 2 '^=item --' $file | sed -e 's/^=item //' -e 's/^\([A-Z]\)/ \1/' -e 's/^--$//' > $TMPDIR/help) + + if [ "$OPT_ERR" ]; then + echo "Error: ${OPT_ERR}" >&2 + fi + echo $usage >&2 + echo >&2 + echo "Options:" >&2 + echo >&2 + cat $TMPDIR/help >&2 + echo >&2 + echo "For more information, 'man $TOOL' or 'perldoc $file'." >&2 +} + +parse_options() { + local file=$1 + shift + + local opt="" + local val="" + local default="" + local version="" + local i=0 + + awk ' + /^=head1 OPTIONS/ { + getline + while ($0 !~ /^=head1/) { + if ($0 ~ /^=item --.*/) { + long_opt=substr($2, 3, length($2) - 2) + short_opt="" + required_arg="" + + if ($3) { + if ($3 ~ /-[a-z]/) + short_opt=substr($3, 3, length($3) - 3) + else + required_arg=$3 + } + + if ($4 ~ /[A-Z]/) + required_arg=$4 + + getline # blank line + getline # short description line + + if ($0 ~ /default: /) { + i=index($0, "default: ") + default=substr($0, i + 9, length($0) - (i + 9)) + } + else + default="" + + print long_opt "," short_opt "," required_arg "," default + } + getline + } + exit + }' $file > $TMPDIR/options + + while read spec; do + opt=$(echo $spec | cut -d',' -f1 | sed 's/-/_/g' | tr [:lower:] [:upper:]) + default=$(echo $spec | cut -d',' -f4) + eval "OPT_${opt}"="$default" + done < <(cat $TMPDIR/options) + + for opt; do + if [ $# -eq 0 ]; then + break + fi + opt=$1 + if [ "$opt" = "--" ]; then + shift + EXT_ARGV="$@" + break + fi + if [ "$opt" = "--version" ]; then + version=$(grep '^pt-[^ ]\+ [0-9]' $0) + echo "$version" + exit 0 + fi + if [ "$opt" = "--help" ]; then + usage $file + exit 0 + fi + shift + if [ $(expr "$opt" : "-") -eq 0 ]; then + ARGV[i]="$opt" + i=$((i+1)) + continue + fi + opt=$(echo $opt | sed 's/^-*//') + spec=$(grep -E "^$opt,|,$opt," "$TMPDIR/options") + if [ -z "$spec" ]; then + die "Unknown option: $opt" + fi + opt=$(echo $spec | cut -d',' -f1) + required_arg=$(echo $spec | cut -d',' -f3) + val="yes" + if [ -n "$required_arg" ]; then + if [ $# -eq 0 ]; then + die "--$opt requires a $required_arg argument" + else + val="$1" + shift + fi + fi + opt=$(echo $opt | sed 's/-/_/g' | tr [:lower:] [:upper:]) + eval "OPT_${opt}"="$val" + done +} + +# ########################################################################### +# End parse_options package +# ########################################################################### + # ########################################################################### # tmpdir package # This package is a copy without comments from the original. The original @@ -43,96 +210,11 @@ rm_tmpdir() { # End tmpdir package # ########################################################################### +# ########################################################################### +# Subroutines +# ########################################################################### set +u -# ######################################################################## -# Check for the existence of a config file and source it if it exists -# ######################################################################## -if [ -f "${0}.conf" ]; then - . "${0}.conf" -fi - -# ######################################################################## -# Configuration settings. -# ######################################################################## -# This is the max number of we want to tolerate. -THRESHOLD=${THRESHOLD:-30} - -# This is the thing to check for. -VARIABLE=${VARIABLE:-Threads_running} - -# How many times must the condition be met before the script will fire? -CYCLES=${CYCLES:-1} - -# Collect GDB stacktraces? -GDB=${GDB:-no} - -# Collect oprofile data? -OPROFILE=${OPROFILE:-yes} - -# Collect strace data? -STRACE=${STRACE:-no} - -# Collect tcpdump data? -TCPDUMP=${TCPDUMP:-yes} - -# Send mail to this list of addresses when the script triggers. -# EMAIL= - -# Any options to pass to mysql/mysqladmin, such as -u, -p, etc -# MYSQLOPTIONS="" - -# This is the interval between checks. -INTERVAL=${INTERVAL:-30} - -# If the command you're running to detect the condition is allowed to return -# nothing (e.g. a grep line that might not even exist if there's no problem), -# then set this to "yes". -MAYBE_EMPTY=${MAYBE_EMPTY:-no} - -# This is the location of the 'collect' script. -if [ -z "${COLLECT}" ]; then - COLLECT="${HOME}/bin/pt-collect"; -fi - -# This is where to store the collected data. -if [ -z "${DEST}" ]; then - DEST="${HOME}/collected/" -fi - -# How long to collect statistics data for? Make sure that this isn't longer -# than SLEEP. -DURATION=${DURATION:-30} - -# How long to sleep after collecting? -if [ -z "${SLEEP}" ]; then - SLEEP=$(($DURATION * 10)) -fi - -# Bail out if the disk is more than this %full. -PCT_THRESHOLD=${PCT_THRESHOLD:-95} - -# Bail out if the disk has less than this many MB free. -MB_THRESHOLD=${MB_THRESHOLD:-100} - -# Remove samples after this many days. -PURGE=${PURGE:-30} - -# Which trigger function to call to get the value of VARIABLE. -TRIGGER_FUNCTION=${TRIGGER_FUNCTION:-"status"} - -# ######################################################################## -# End configuration -# ######################################################################## - -# ######################################################################## -# Echo to STDERR and exit false. -# ######################################################################## -die() { - echo "${1}" >&2 - exit 1 -} - grep_processlist() { local file=$1 local col=$2 @@ -178,27 +260,24 @@ grep_processlist() { } set_trg_func() { - if [ -f "$TRIGGER_FUNCTION" ]; then - source $TRIGGER_FUNCTION + if [ -f "$OPT_TRIGGER_FUNCTION" ]; then + source $OPT_TRIGGER_FUNCTION TRIGGER_FUNCTION="trg_plugin" else - TRIGGER_FUNCTION="trg_$TRIGGER_FUNCTION" + TRIGGER_FUNCTION="trg_$OPT_TRIGGER_FUNCTION" fi } -# ######################################################################## -# Trigger functions -# ######################################################################## trg_status() { local var=$1 - mysqladmin ${MYSQLOPTIONS} extended-status | grep ${VARIABLE} | awk '{print $4}' + mysqladmin "$EXT_ARGV" extended-status | grep "$OPT_VARIABLE" | awk '{print $4}' } trg_processlist() { local var=$1 local tmpfile="$TMPDIR/processlist" - mysqladmin ${MYSQLOPTIONS} processlist > $tmpfile-1 - grep_processlist $tmpfile-1 $var $MATCH 0 0 > $tmpfile-2 + mysqladmin "$EXT_ARGV" processlist > $tmpfile-1 + grep_processlist $tmpfile-1 $var $OPT_MATCH 0 0 > $tmpfile-2 wc -l $tmpfile-2 | awk '{print $1}' rm -rf $tmpfile* return @@ -209,51 +288,45 @@ trg_magic() { return } -# ######################################################################## -# Echo to STDERR and possibly email. -# ######################################################################## -log() { - if [ "${EMAIL}" ]; then - echo "${1} on $(hostname)" | mail -s "${2} on $(hostname)" ${EMAIL} - fi - echo "${1}" >&2 -} +# ########################################################################### +# Main program loop, called below if tool is ran from the command line. +# ########################################################################### -# The main code that runs by default. Arguments are the command-line options. main() { + mk_tmpdir + parse_options $0 "$@" + # Make the collection location - mkdir -p "${DEST}" || die "Can't make the destination directory" - test -d "${DEST}" || die "${DEST} isn't a directory" - test -w "${DEST}" || die "${DEST} isn't writable" + # mkdir -p "$OPT_DEST" || die "Can't make the destination directory" + # test -d "$OPT_DEST" || die "$OPT_DEST isn't a directory" + # test -w "$OPT_DEST" || die "$OPT_DEST isn't writable" # Test if we have root; warn if not, but it isn't critical. if [ "$(id -u)" != "0" ]; then echo 'Not running with root privileges!'; fi - # Make a secure tmpdir. Any output should be saved only in $TMPDIR/. - mk_tmpdir - # We increment this variable every time that the check is true, # and set it to 0 if it's false. - cycles_true=0; + local cycles_true=0 + local matched="no" set_trg_func while true; do - d=$(date +%F-%T | tr :- _); # This is where we decide whether to execute 'collect'. # The idea is to generate a number and store into $detected, - # and if $detected > $THRESHOLD, then we'll execute pt-collect. - local detected=$("${TRIGGER_FUNCTION}" $VARIABLE) + # and if $detected > $OPT_THRESHOLD, then we'll execute pt-collect. + local value=$($TRIGGER_FUNCTION $OPT_VARIABLE) + local trg_exit_status=$? - if [ -z "${detected}" -a ${MAYBE_EMPTY} = "no" ]; then - # Oops, couldn't connect, maybe max_connections problem? - echo "$d The detected value is empty; something failed? Exit status is $?" - matched="yes" - cycles_true=$(($cycles_true + 1)) - elif [ "${detected:-0}" -gt ${THRESHOLD} ]; then + if [ -z "$value" ]; then + # No value. Maybe we failed to connect to MySQL? + warn "Detected value is empty; something failed? Trigger exit status: $trg_exit_status" + matched="no" + cycles_true=0 + elif [ $value -gt $OPT_THRESHOLD ]; then matched="yes" cycles_true=$(($cycles_true + 1)) else @@ -261,25 +334,36 @@ main() { cycles_true=0 fi - NOTE="$d check results: ${VARIABLE} = ${detected}, matched = ${matched}, cycles_true = ${cycles_true}" - # Actually execute the collection script. - if [ "${matched:-no}" = "yes" -a ${cycles_true} -ge ${CYCLES} ]; then + log "Check results: $OPT_VARIABLE=$value, matched=$matched, cycles_true=$cycles_true" - log "${NOTE}" "${COLLECT} triggered" - PREFIX="$(date +%F-%T | tr :- _)" - echo "${NOTE}" > "${DEST}/${PREFIX}-trigger" - ${COLLECT} -d "${DEST}" -i "${DURATION}" -g "${GDB}" -o "${OPROFILE}" -p "${PREFIX}" -s "${STRACE}" -t "${TCPDUMP}" -f "${PCT_THRESHOLD}" -m "${MB_THRESHOLD}" -- ${MYSQLOPTIONS} - echo "$d sleeping ${SLEEP} seconds to avoid DOS attack" - sleep ${SLEEP} + if [ "$matched" = "yes" -a $cycles_true -ge $OPT_CYCLES ]; then + log "$OPT_COLLECT triggered" + + # PREFIX="$(date +%F-%T | tr :- _)" + # echo "${NOTE}" > "${DEST}/${PREFIX}-trigger" + + # Run pt-collect. + $OPT_COLLECT \ + -i "$OPT_RUN_TIME" \ + -g "$OPT_GDB" \ + -o "$OPT_OPROFILE" \ + -p "$OPT_PREFIX" \ + -s "$OPT_STRACE" \ + -t "$OPT_TCPDUMP" \ + -f "$OPT_PCT_THRESHOLD" \ + -m "$OPT_MB_THRESHOLD" \ + -- "$EXT_ARGV" + + log "Sleeping $OPT_SLEEP seconds to avoid DOS attack" + sleep $OPT_SLEEP else - echo ${NOTE} - sleep ${INTERVAL} + sleep $OPT_INTERVAL fi # Delete things more than $PURGE days old - find "${DEST}" -type f -mtime +${PURGE} -exec rm -f '{}' \; - find "/var/lib/oprofile/samples" -type d -name 'pt_collect_*' \ - -depth -mtime +${PURGE} -exec rm -f '{}' \; + #find "$OPT_DEST" -type f -mtime +$OPT_PURGE -exec rm -f '{}' \; + #find "/var/lib/oprofile/samples" -type d -name 'pt_collect_*' \ + # -depth -mtime +$OPT_PURGE -exec rm -f '{}' \; done @@ -294,6 +378,8 @@ if [ "$(basename "$0")" = "pt-stalk" ] || [ "$(basename "$0")" = "bash" -a "$_" main "$@" fi +exit $EXIT_STATUS + # ############################################################################ # Documentation # ############################################################################ @@ -306,7 +392,7 @@ pt-stalk - Wait for a condition to occur then begin collecting data. =head1 SYNOPSIS -Usage: pt-stalk +Usage: pt-stalk [OPTIONS] [-- MYSQL OPTIONS] pt-stalk watches for a condition to become true, and when it does, executes a script. By default it executes L, but that can be customized. @@ -357,7 +443,7 @@ is too full. By default, the tool is configured to execute mysqladmin extended-status and extract the value of the Threads_running variable; if this is greater than -30, it runs the collection script. This is really just placeholder code, +25, it runs the collection script. This is really just placeholder code, and almost certainly needs to be customized! If the tool does execute the collection script, it will wait for a while @@ -368,39 +454,83 @@ The name 'stalk' is because 'watch' is already taken, and 'stalk' is fun. =head1 CONFIGURING -If the file F exists in the current working directory, then -L<"ENVIRONMENT"> variables are imported from it. For example, the config -file has the format: - - INTERVAL=10 - GDB=yes - -See L<"ENVIRONMENT">. +TODO =head1 OPTIONS -This tool does not have any command-line options, but see -L<"ENVIRONMENT"> and L<"CONFIGURING">. - -=head1 ENVIRONMENT - -The following environment variables configure how, what, and when the tool -runs. They are all optional and can be specified either on the command line -or in the F config file (see L<"CONFIGURING">). - =over -=item THRESHOLD (default 30) +=item --collect DIRECTORY -This is the max number of we want to tolerate. +Location of the C tool. (default: ${HOME}/bin/pt-collect) -=item VARIABLE (default Threads_running) +=item --cycles N -This is the thing to check for. +Number of times condition must be met before triggering collection. (default: 5) -=item TRIGGER_FUNCTION (default status) +=item --dest DIRECTORY -Built-in function name or plugin file name which returns the value of C. Possible values are: +Where to store collected data. + +=item --disk-byte-limit MEGABYTES + +Exit if the disk has less than this many MB free. (default: 100) + +=item --disk-pct-limit PERCENT + +Exit if the disk is less than this %full. (default: 5) + +=item --email ADDRESS + +Send mail to this list of addresses when the script triggers. + +=item --gdb + +Collect GDB stacktraces. + +=item --help + +Print help and exit. + +=item --interval SECONDS + +Interval between checks. (default: 1) + +=item --match PATTERN + +Match pattern for C L<"--trigger-function">. + +=item --oprofile + +Collect oprofile data. (default: yes) + +=item --purge DAYS + +Remove samples after this many days. (default: 30) + +=item --run-time SECONDS + +How long to collect statistics data for? (default: 30) + +Make sure that this isn't longer than SLEEP. + +=item --strace + +Collect strace data. + +=item --sleep SECONDS + +How long to sleep after collecting? (default: 300) + +=item --tcpdump + +Collect tcpdump data? (default: yes) + +=item --trigger-function + +Built-in function name or plugin file name which returns the value of C. (default: status) + +Possible values are: =over @@ -444,75 +574,24 @@ global variables with "PLUGIN_". =back -=item CYCLES (default 1) +=item --threshold N -How many times must the condition be met before the script will fire? +Max number of C to tolerate. (default: 25) -=item GDB (default no) +=item --variable NAME -Collect GDB stacktraces? +This is the thing to check for. (default: Threads_running) -=item OPROFILE (default yes) +=item --version -Collect oprofile data? - -=item STRACE (default no) - -Collect strace data? - -=item TCPDUMP (default yes) - -Collect tcpdump data? - -=item EMAIL - -Send mail to this list of addresses when the script triggers. - -=item MYSQLOPTIONS - -Any options to pass to mysql/mysqladmin, such as -u, -p, etc - -=item INTERVAL (default 30) - -This is the interval between checks. - -=item MAYBE_EMPTY (default no) - -If the command you're running to detect the condition is allowed to return -nothing (e.g. a grep line that might not even exist if there's no problem), -then set this to "yes". - -=item COLLECT (default ${HOME}/bin/pt-collect) - -This is the location of the 'collect' script. - -=item DEST (default ${HOME}/collected/) - -This is where to store the collected data. - -=item DURATION (default 30) - -How long to collect statistics data for? Make sure that this isn't longer -than SLEEP. - -=item SLEEP (default DURATION * 10) - -How long to sleep after collecting? - -=item PCT_THRESHOLD (default 95) - -Bail out if the disk is more than this %full. - -=item MB_THRESHOLD (default 100) - -Bail out if the disk has less than this many MB free. - -=item PURGE (default 30) - -Remove samples after this many days. +Print tool's version and exit. =back +=head1 ENVIRONMENT + +No env vars used. + =head1 SYSTEM REQUIREMENTS This tool requires Bash v3 or newer. From 30dbcb35a7696a6ab68f93ec5e84d87298ec9240 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Wed, 30 Nov 2011 11:35:53 -0700 Subject: [PATCH 08/71] Rename options. Implement untested oktorun() for --interations. --- bin/pt-stalk | 158 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 97 insertions(+), 61 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index f7e2575f..94ba330b 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -210,6 +210,12 @@ rm_tmpdir() { # End tmpdir package # ########################################################################### +# ########################################################################### +# Global variables +# ########################################################################### +OKTORUN=1 +ITER=0 + # ########################################################################### # Subroutines # ########################################################################### @@ -260,11 +266,11 @@ grep_processlist() { } set_trg_func() { - if [ -f "$OPT_TRIGGER_FUNCTION" ]; then - source $OPT_TRIGGER_FUNCTION + if [ -f "$OPT_FUNCTION" ]; then + source $OPT_FUNCTION TRIGGER_FUNCTION="trg_plugin" else - TRIGGER_FUNCTION="trg_$OPT_TRIGGER_FUNCTION" + TRIGGER_FUNCTION="trg_$OPT_FUNCTION" fi } @@ -288,6 +294,18 @@ trg_magic() { return } +oktorun() { + if [ $OKTORUN -ne 0 ]; then + return 1 # stop running + fi + + if [ -n "$OPT_ITERATIONS" -a "$ITER" -gt "$OPT_ITERATIONS" ]; then + return 1 # stop running + fi + + return 0 # continue running +} + # ########################################################################### # Main program loop, called below if tool is ran from the command line. # ########################################################################### @@ -303,7 +321,7 @@ main() { # Test if we have root; warn if not, but it isn't critical. if [ "$(id -u)" != "0" ]; then - echo 'Not running with root privileges!'; + log 'Not running with root privileges!'; fi # We increment this variable every time that the check is true, @@ -311,9 +329,10 @@ main() { local cycles_true=0 local matched="no" + # Set TRIGGER_FUNCTION based on --function. set_trg_func - while true; do + while oktorun; do # This is where we decide whether to execute 'collect'. # The idea is to generate a number and store into $detected, @@ -338,20 +357,20 @@ main() { if [ "$matched" = "yes" -a $cycles_true -ge $OPT_CYCLES ]; then log "$OPT_COLLECT triggered" + ITER=$((ITER + 1)) # PREFIX="$(date +%F-%T | tr :- _)" # echo "${NOTE}" > "${DEST}/${PREFIX}-trigger" # Run pt-collect. - $OPT_COLLECT \ - -i "$OPT_RUN_TIME" \ - -g "$OPT_GDB" \ - -o "$OPT_OPROFILE" \ - -p "$OPT_PREFIX" \ - -s "$OPT_STRACE" \ - -t "$OPT_TCPDUMP" \ - -f "$OPT_PCT_THRESHOLD" \ - -m "$OPT_MB_THRESHOLD" \ + $OPT_EXECUTE_COMMAND \ + -i "$OPT_RUN_TIME" \ + -g "$OPT_COLLECT_GDB" \ + -o "$OPT_COLLECT_OPROFILE" \ + -s "$OPT_COLLECT_STRACE" \ + -t "$OPT_COLLECT_TCPDUMP" \ + -f "$OPT_DISK_PCT_LIMIT" \ + -m "$OPT_DISK_BYTE_LIMIT" \ -- "$EXT_ARGV" log "Sleeping $OPT_SLEEP seconds to avoid DOS attack" @@ -460,14 +479,30 @@ TODO =over -=item --collect DIRECTORY +=item --collect-gdb BOOLEAN -Location of the C tool. (default: ${HOME}/bin/pt-collect) +Collect GDB stacktraces. (default: no) + +=item --collect-oprofile BOOLEAN + +Collect oprofile data. (default: no) + +=item --collect-strace BOOLEAN + +Collect strace data. (default: no) + +=item --collect-tcpdump + +Collect tcpdump data? (default: no) =item --cycles N Number of times condition must be met before triggering collection. (default: 5) +=item --daemonize + +Daemonize the tool. + =item --dest DIRECTORY Where to store collected data. @@ -480,53 +515,11 @@ Exit if the disk has less than this many MB free. (default: 100) Exit if the disk is less than this %full. (default: 5) -=item --email ADDRESS +=item --execute-command COMMAND -Send mail to this list of addresses when the script triggers. +Location of the C tool. (default: pt-collect) -=item --gdb - -Collect GDB stacktraces. - -=item --help - -Print help and exit. - -=item --interval SECONDS - -Interval between checks. (default: 1) - -=item --match PATTERN - -Match pattern for C L<"--trigger-function">. - -=item --oprofile - -Collect oprofile data. (default: yes) - -=item --purge DAYS - -Remove samples after this many days. (default: 30) - -=item --run-time SECONDS - -How long to collect statistics data for? (default: 30) - -Make sure that this isn't longer than SLEEP. - -=item --strace - -Collect strace data. - -=item --sleep SECONDS - -How long to sleep after collecting? (default: 300) - -=item --tcpdump - -Collect tcpdump data? (default: yes) - -=item --trigger-function +=item --function FUNCTION Built-in function name or plugin file name which returns the value of C. (default: status) @@ -574,6 +567,49 @@ global variables with "PLUGIN_". =back +=item --help + +Print help and exit. + +=item --interval SECONDS + +Interval between checks. (default: 1) + +=item --iterations N + +Exit after triggering C this many times. By default, the tool +will collect as many times as it's triggered. + +=item --log FILE + +Print all output to this file when daemonized. (default: /var/log/pt-stalk.log) + +=item --match PATTERN + +Match pattern for C L<"--function">. + +=item --notify-by-email EMAILS + +Send mail to this list of addresses when C triggers. + +=item --pid FILE + +Create a PID file when daemonized. (default: /var/run/pt-stalk.pid) + +=item --retention-time DAYS + +Remove samples after this many days. (default: 30) + +=item --run-time SECONDS + +How long to collect statistics data for? (default: 30) + +Make sure that this isn't longer than SLEEP. + +=item --sleep SECONDS + +How long to sleep after collecting? (default: 300) + =item --threshold N Max number of C to tolerate. (default: 25) From 3e79920c9e8d998bc832c5b12f8051f96de21569 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Fri, 2 Dec 2011 11:39:25 -0700 Subject: [PATCH 09/71] Use option specs. Refactor internals. --- bin/pt-stalk | 175 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 116 insertions(+), 59 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 94ba330b..f5db4ef2 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -295,17 +295,55 @@ trg_magic() { } oktorun() { - if [ $OKTORUN -ne 0 ]; then + if [ $OKTORUN -eq 0 ]; then return 1 # stop running fi - if [ -n "$OPT_ITERATIONS" -a "$ITER" -gt "$OPT_ITERATIONS" ]; then + if [ -n "$OPT_ITERATIONS" ] && [ $ITER -ge $OPT_ITERATIONS ]; then return 1 # stop running fi return 0 # continue running } +sleep_ok() { + local seconds=$1 + local msg=$2 + if oktorun; then + if [ -n "$msg" ]; then + log $msg + fi + sleep $seconds + fi +} + +purge_samples() { + # Delete things more than $PURGE days old + #find "$OPT_DEST" -type f -mtime +$OPT_PURGE -exec rm -f '{}' \; + #find "/var/lib/oprofile/samples" -type d -name 'pt_collect_*' \ + # -depth -mtime +$OPT_PURGE -exec rm -f '{}' \; + : +} + +collect() { + log "$OPT_COLLECT triggered" + ITER=$((ITER + 1)) + + # PREFIX="$(date +%F-%T | tr :- _)" + # echo "${NOTE}" > "${DEST}/${PREFIX}-trigger" + + # Run pt-collect. + $OPT_EXECUTE_COMMAND \ + -i "$OPT_RUN_TIME" \ + -g "$OPT_COLLECT_GDB" \ + -o "$OPT_COLLECT_OPROFILE" \ + -s "$OPT_COLLECT_STRACE" \ + -t "$OPT_COLLECT_TCPDUMP" \ + -f "$OPT_DISK_PCT_LIMIT" \ + -m "$OPT_DISK_BYTE_LIMIT" \ + -- "$EXT_ARGV" +} + # ########################################################################### # Main program loop, called below if tool is ran from the command line. # ########################################################################### @@ -356,34 +394,13 @@ main() { log "Check results: $OPT_VARIABLE=$value, matched=$matched, cycles_true=$cycles_true" if [ "$matched" = "yes" -a $cycles_true -ge $OPT_CYCLES ]; then - log "$OPT_COLLECT triggered" - ITER=$((ITER + 1)) - - # PREFIX="$(date +%F-%T | tr :- _)" - # echo "${NOTE}" > "${DEST}/${PREFIX}-trigger" - - # Run pt-collect. - $OPT_EXECUTE_COMMAND \ - -i "$OPT_RUN_TIME" \ - -g "$OPT_COLLECT_GDB" \ - -o "$OPT_COLLECT_OPROFILE" \ - -s "$OPT_COLLECT_STRACE" \ - -t "$OPT_COLLECT_TCPDUMP" \ - -f "$OPT_DISK_PCT_LIMIT" \ - -m "$OPT_DISK_BYTE_LIMIT" \ - -- "$EXT_ARGV" - - log "Sleeping $OPT_SLEEP seconds to avoid DOS attack" - sleep $OPT_SLEEP + collect + sleep_ok $OPT_SLEEP "Sleeping $OPT_SLEEP seconds to avoid DOS attack" else - sleep $OPT_INTERVAL + sleep_ok $OPT_INTERVAL fi - # Delete things more than $PURGE days old - #find "$OPT_DEST" -type f -mtime +$OPT_PURGE -exec rm -f '{}' \; - #find "/var/lib/oprofile/samples" -type d -name 'pt_collect_*' \ - # -depth -mtime +$OPT_PURGE -exec rm -f '{}' \; - + purge_samples done # Remove the secure tmpdir. This is not actually called because @@ -479,49 +496,69 @@ TODO =over -=item --collect-gdb BOOLEAN +=item --collect -Collect GDB stacktraces. (default: no) +default: yes; negatable: yes -=item --collect-oprofile BOOLEAN +Collect system information. -Collect oprofile data. (default: no) +=item --collect-gdb -=item --collect-strace BOOLEAN +Collect GDB stacktraces. -Collect strace data. (default: no) +=item --collect-oprofile + +Collect oprofile data. + +=item --collect-strace + +Collect strace data. =item --collect-tcpdump -Collect tcpdump data? (default: no) +Collect tcpdump data. -=item --cycles N +=item --cycles -Number of times condition must be met before triggering collection. (default: 5) +type: int; default: 5 + +Number of times condition must be met before triggering collection. =item --daemonize +default: yes; negatable: yes + Daemonize the tool. -=item --dest DIRECTORY +=item --dest + +type: string Where to store collected data. -=item --disk-byte-limit MEGABYTES +=item --disk-byte-limit -Exit if the disk has less than this many MB free. (default: 100) +type: int; default: 100 -=item --disk-pct-limit PERCENT +Exit if the disk has less than this many MB free. -Exit if the disk is less than this %full. (default: 5) +=item --disk-pct-limit -=item --execute-command COMMAND +type: int; default: 5 -Location of the C tool. (default: pt-collect) +Exit if the disk is less than this %full. -=item --function FUNCTION +=item --execute-command -Built-in function name or plugin file name which returns the value of C. (default: status) +type: string; default: pt-collect + +Location of the C tool. + +=item --function + +type: string; default: status + +Built-in function name or plugin file name which returns the value of C. Possible values are: @@ -575,48 +612,68 @@ Print help and exit. Interval between checks. (default: 1) -=item --iterations N +=item --iterations + +type: int Exit after triggering C this many times. By default, the tool will collect as many times as it's triggered. -=item --log FILE +=item --log -Print all output to this file when daemonized. (default: /var/log/pt-stalk.log) +type: string; default: /var/log/pt-stalk.log -=item --match PATTERN +Print all output to this file when daemonized. + +=item --match + +type: string Match pattern for C L<"--function">. -=item --notify-by-email EMAILS +=item --notify-by-email + +type: string Send mail to this list of addresses when C triggers. =item --pid FILE -Create a PID file when daemonized. (default: /var/run/pt-stalk.pid) +type: string; default: /var/run/pt-stalk.pid -=item --retention-time DAYS +Create a PID file when daemonized. -Remove samples after this many days. (default: 30) +=item --retention-time -=item --run-time SECONDS +type: int; default: 30 -How long to collect statistics data for? (default: 30) +Remove samples after this many days. + +=item --run-time + +type: int; default: 30 + +How long to collect statistics data for? Make sure that this isn't longer than SLEEP. -=item --sleep SECONDS +=item --sleep -How long to sleep after collecting? (default: 300) +type: int; default: 300 + +How long to sleep after collecting? =item --threshold N -Max number of C to tolerate. (default: 25) +type: int; default: 25 + +Max number of C to tolerate. =item --variable NAME -This is the thing to check for. (default: Threads_running) +type: string; default: Threads_running + +This is the thing to check for. =item --version From 2e1a5e1139d04d4057787342d4bbe1be2508664e Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 6 Dec 2011 15:18:19 -0700 Subject: [PATCH 10/71] Do collect in pt-stalk. Add --prefix, set default --dest. Re-implement --notify-by-email and --rention-time. --- bin/pt-stalk | 323 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 284 insertions(+), 39 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 7c7c520f..503b57cd 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -47,7 +47,7 @@ die() { declare -a ARGV # non-option args (probably input files) declare EXT_ARGV # everything after -- (args for an external command) -OPT_ERR=${OPT_ERR:""} +OPT_ERR=${OPT_ERR:-""} usage() { local file=$1 @@ -244,11 +244,247 @@ rm_tmpdir() { # End tmpdir package # ########################################################################### +# ########################################################################### +# alt_cmds package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/alt_cmds.sh +# t/lib/bash/alt_cmds.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + +_seq() { + local i=$1 + awk "BEGIN { for(i=1; i<=$i; i++) print i; }" +} + +# ########################################################################### +# End alt_cmds package +# ########################################################################### + +# ########################################################################### +# safeguards package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/safeguards.sh +# t/lib/bash/safeguards.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + +disk_space() { + local filesystem=${1:-"$PWD"} + df -m -P $filesystem +} + +check_disk_space() { + local file=$1 + local mb=${2:-"0"} + local pct=${3:-"0"} + + local avail=$(cat $file | awk '/^\//{print $4}'); + local full=$(cat $file | awk '/^\//{print $5}' | sed -e 's/%//g'); + if [ "${avail}" -le "$mb" -o "$full" -le "$pct" ]; then + echo "Not enough free space (${full}% full, ${avail}MB free)" + echo "Wanted less than ${pct}% full and more than ${mb}MB" + return 1 + fi + return 0 +} + +# ########################################################################### +# End safeguards package +# ########################################################################### + +# ########################################################################### +# collect package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/collect.sh +# t/lib/bash/collect.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + +CMD_GDB=${CMD_GDB:-"gdb"} +CMD_IOSTAT=${CMD_IOSTAT:-"iostat"} +CMD_MPSTAT=${CMD_MPSTAT:-"mpstat"} +CMD_MYSQL=${CMD_MSSQL:-"mysql"} +CMD_MYSQLADMIN=${CMD_MYSQL_ADMIN:-"mysqladmin"} +CMD_OPCONTROL=${CMD_OPCONTROL:-"opcontrol"} +CMD_OPREPORT=${CMD_OPREPORT:-"opreport"} +CMD_PMAP=${CMD_PMAP:-"pmap"} +CMD_STRACE=${CMD_STRACE:-"strace"} +CMD_TCPDUMP=${CMD_TCPDUMP:-"tcpdump"} +CMD_VMSTAT=${CMD_VMSTAT:-"vmstat"} + +collect() { + local d=$1 # directory to save results in + local p=$2 # prefix for each result file + + local mysqld_pid=$(pidof -s mysqld); + if [ -z "$mysqld_pid" ]; then + mysqld_pid=$(pgrep -o -x mysqld); + fi + if [ -z "$mysqld_pid" ]; then + mysqld_pid=$(ps -eaf | grep 'mysql[d]' | grep -v mysqld_safe | awk '{print $2}' | head -n1); + fi + + if [ -x "$CMD_PMAP" -a "$mysqld_pid" ]; then + if $CMD_PMAP --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then + $CMD_PMAP -x $mysqld_pid > "$d/$p-pmap" + else + $CMD_PMAP $mysqld_pid > "$d/$p-pmap" + fi + fi + + if [ "$OPT_COLLECT_GDB" = "yes" -a "$mysqld_pid" ]; then + $CMD_GDB \ + -ex "set pagination 0" \ + -ex "thread apply all bt" \ + --batch -p $mysqld_pid \ + >> "$d/$p-stacktrace" + else + echo "GDB (--collect-gdb) was not enabled" >> "$d/$p-stacktrace" + fi + + $CMD_MYSQL "$EXT_ARGV" -e 'SHOW GLOBAL VARIABLES' >> "$d/$p-variables" 2>&1 & + sleep .2 + + local mysql_version="$(awk '/^version[^_]/{print substr($2,1,3)}' "$d/$p-variables")" + + local mysql_error_log="$(awk '/log_error/{print $2}' "$d/$p-variables")" + if [ -z "$mysql_error_log" -a "$mysqld_pid" ]; then + mysql_error_log="$(ls -l /proc/$mysqld_pid/fd | awk '/ 2 ->/{print $NF}')" + fi + + local tail_error_log_pid="" + if [ "$mysql_error_log" ]; then + echo "The MySQL error log seems to be ${mysql_error_log}" + tail -f "$mysql_error_log" >"$d/$p-log_error" 2>&1 & + tail_error_log_pid=$! + $CMD_MYSQLADMIN "$EXT_ARGV" debug + else + echo "Could not find the MySQL error log" + fi + + local innostat="SHOW /*!40100 ENGINE*/ INNODB STATUS\G" + local proclist="SHOW FULL PROCESSLIST\G" + if [ "${mysql_version}" '>' "5.1" ]; then + local mutex="SHOW ENGINE INNODB MUTEX" + else + local mutex="SHOW MUTEX STATUS" + fi + $CMD_MYSQL "$EXT_ARGV" -e "$innostat" >> "$d/$p-innodbstatus1" 2>&1 & + $CMD_MYSQL "$EXT_ARGV" -e "$proclist" >> "$d/$p-processlist1" 2>&1 & + $CMD_MYSQL "$EXT_ARGV" -e 'SHOW OPEN TABLES' >> "$d/$p-opentables1" 2>&1 & + $CMD_MYSQL "$EXT_ARGV" -e "$mutex" >> "$d/$p-mutex-status1" 2>&1 & + + local tcpdump_pid="" + if [ "$OPT_COLLECT_TCPDUMP" = "yes" ]; then + local port=$(awk '/^port/{print $2}' "$d/$p-variables") + if [ "$port" ]; then + $CMD_TCPDUMP -i any -s 4096 -w "$d/$p-tcpdump" port ${port} & + tcpdump_pid=$! + fi + fi + + local have_oprofile="no" + if [ "$OPT_COLLECT_OPROFILE" = "yes" ]; then + if $CMD_OPCONTROL --init; then + $CMD_OPCONTROL --start --no-vmlinux + have_oprofile="yes" + fi + elif [ "$OPT_COLLECT_STRACE" = "yes" ]; then + $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" 2>&1 & + local strace_pid=$! + fi + + ps -eaf >> "$d/$p-ps" 2>&1 & + sysctl -a >> "$d/$p-sysctl" 2>&1 & + top -bn1 >> "$d/$p-top" 2>&1 & + $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" 2>&1 & + $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" 2>&1 & + $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" 2>&1 & + $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" 2>&1 & + $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" 2>&1 & + $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" 2>&1 & + lsof -nP -p $mysqld_pid -bw >> "$d/$p-lsof" 2>&1 & + $CMD_MYSQLADMIN "$EXT_ARGV" ext -i1 -c$OPT_INTERVAL >> "$d/$p-mysqladmin" 2>&1 & + local mysqladmin_pid=$! + + echo "Loop start: $(date +'TS %s.%N %F %T')" + for a in $(_seq $OPT_RUN_TIME); do + disk_space $d > $d/$p-disk-space + check_disk_space \ + $d/$p-disk-space \ + "$OPT_DISK_BYTE_LIMIT" \ + "$OPT_DISK_PCT_LIMIT" \ + || break + + sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}') + local ts="$(date +"TS %s.%N %F %T")" + + (cat /proc/diskstats 2>&1; echo $ts) >> "$d/$p-diskstats" & + (cat /proc/stat 2>&1; echo $ts) >> "$d/$p-procstat" & + (cat /proc/vmstat 2>&1; echo $ts) >> "$d/$p-procvmstat" & + (cat /proc/meminfo 2>&1; echo $ts) >> "$d/$p-meminfo" & + (cat /proc/slabinfo 2>&1; echo $ts) >> "$d/$p-slabinfo" & + (cat /proc/interrupts 2>&1; echo $ts) >> "$d/$p-interrupts" & + (df -h 2>&1; echo $ts) >> "$d/$p-df" & + (netstat -antp 2>&1; echo $ts) >> "$d/$p-netstat" & + (netstat -s 2>&1; echo $ts) >> "$d/$p-netstat_s" & + done + echo "Loop end: $(date +'TS %s.%N %F %T')" + + if [ "$have_oprofile" = "yes" ]; then + $CMD_OPCONTROL --stop + $CMD_OPCONTROL --dump + kill $(pidof oprofiled); # TODO: what if system doesn't have pidof? + $CMD_OPCONTROL --save=pt_collect_$p + + local mysqld_path=$(which mysqld); + if [ "$mysqld_path" -a -f "$mysqld_path" ]; then + $CMD_OPREPORT \ + --demangle=smart \ + --symbols \ + --merge tgid \ + session:pt_collect_$p \ + "$mysqld_path" \ + > "$d/$p-opreport" + else + echo "oprofile data saved to pt_collect_$p; you should be able" \ + "to get a report by running something like 'opreport" \ + "--demangle=smart --symbols --merge tgid session:pt_collect_$p" \ + "/path/to/mysqld'" \ + > "$d/$p-opreport" + fi + elif [ "$OPT_COLLECT_STRACE" = "yes" ]; then + kill -s 2 $strace_pid + sleep 1 + kill -s 15 $strace_pid + kill -s 18 $mysqld_pid + fi + + $CMD_MYSQL "$EXT_ARGV" -e "$innostat" >> "$d/$p-innodbstatus2" 2>&1 & + $CMD_MYSQL "$EXT_ARGV" -e "$proclist" >> "$d/$p-processlist2" 2>&1 & + $CMD_MYSQL "$EXT_ARGV" -e 'SHOW OPEN TABLES' >> "$d/$p-opentables2" 2>&1 & + $CMD_MYSQL "$EXT_ARGV" -e "$mutex" >> "$d/$p-mutex-status2" 2>&1 & + + kill $mysqladmin_pid + [ "$tail_error_log_pid" ] && kill $tail_error_log_pid + [ "$tcpdump_pid" ] && kill $tcpdump_pid + + hostname > "$d/$p-hostname" +} + +# ########################################################################### +# End collect package +# ########################################################################### + # ########################################################################### # Global variables # ########################################################################### OKTORUN=1 -ITER=0 +ITER=1 # ########################################################################### # Subroutines @@ -341,21 +577,22 @@ oktorun() { sleep_ok() { local seconds=$1 - local msg=${2:""} + local msg=${2:-""} if oktorun; then if [ -n "$msg" ]; then - log $msg + log "$msg" fi sleep $seconds fi } purge_samples() { - # Delete things more than $PURGE days old - #find "$OPT_DEST" -type f -mtime +$OPT_PURGE -exec rm -f '{}' \; - #find "/var/lib/oprofile/samples" -type d -name 'pt_collect_*' \ - # -depth -mtime +$OPT_PURGE -exec rm -f '{}' \; - : + # Delete collect files which more than --retention-time days old. + find "$OPT_DEST" -type f -mtime +$OPT_RETENTION_TIME -exec rm -f '{}' \; + if [ -d "/var/lib/oprofile/samples" ]; then + find "/var/lib/oprofile/samples" -type d -name 'pt_collect_*' \ + -depth -mtime +$OPT_RETENTION_TIME -exec rm -f '{}' \; + fi } sigtrap() { @@ -368,25 +605,6 @@ sigtrap() { fi } -collect() { - log "$OPT_COLLECT triggered" - ITER=$((ITER + 1)) - - # PREFIX="$(date +%F-%T | tr :- _)" - # echo "${NOTE}" > "${DEST}/${PREFIX}-trigger" - - # Run pt-collect. - $OPT_EXECUTE_COMMAND \ - -i "$OPT_RUN_TIME" \ - -g "$OPT_COLLECT_GDB" \ - -o "$OPT_COLLECT_OPROFILE" \ - -s "$OPT_COLLECT_STRACE" \ - -t "$OPT_COLLECT_TCPDUMP" \ - -f "$OPT_DISK_PCT_LIMIT" \ - -m "$OPT_DISK_BYTE_LIMIT" \ - -- "$EXT_ARGV" -} - stalk() { # We increment this variable every time that the check is true, # and set it to 0 if it's false. @@ -413,11 +631,29 @@ stalk() { cycles_true=0 fi - log "Check results: $OPT_VARIABLE=$value, matched=$matched, cycles_true=$cycles_true" + local msg="Check results: $OPT_VARIABLE=$value, matched=$matched, cycles_true=$cycles_true" + log "$msg" if [ "$matched" = "yes" -a $cycles_true -ge $OPT_CYCLES ]; then - collect - sleep_ok "$OPT_SLEEP" "Sleeping $OPT_SLEEP seconds to avoid DOS attack" + local prefix=${OPT_PREFIX:-"$(date +%F-%T | tr :- _)"} + + log "Collect triggered" + log "$msg" >> "$OPT_DEST/$prefix-trigger" + + if [ "$OPT_NOTIFY_BY_EMAIL" ]; then + echo "$msg on $(hostname)" \ + | mail -s "Collect triggered on $(hostname)" "$OPT_NOTIFY_BY_EMAIL" + fi + + # Fork collect subroutine which should run for --run-time seconds. + ( + flock 200 + collect $OPT_DEST $prefix + ) 200>/tmp/percona-toolkit-collect-lockfile \ + >> "$OPT_DEST/$prefix-output" 2>&1 & + + ITER=$((ITER + 1)) + sleep_ok "$OPT_SLEEP" "Sleeping $OPT_SLEEP seconds after collect" else sleep_ok "$OPT_INTERVAL" fi @@ -441,10 +677,10 @@ main() { # Make a secure tmpdir. mk_tmpdir - # Make the collection location - # mkdir -p "$OPT_DEST" || die "Can't make the destination directory" - # test -d "$OPT_DEST" || die "$OPT_DEST isn't a directory" - # test -w "$OPT_DEST" || die "$OPT_DEST isn't writable" + # Make the collection dir exists. + mkdir -p "$OPT_DEST" || die "Can't make the destination directory" + test -d "$OPT_DEST" || die "$OPT_DEST isn't a directory" + test -w "$OPT_DEST" || die "$OPT_DEST isn't writable" # Test if we have root; warn if not, but it isn't critical. if [ "$(id -u)" != "0" ]; then @@ -603,7 +839,7 @@ Daemonize the tool. =item --dest -type: string +type: string; default: ${HOME}/collected Where to store collected data. @@ -710,12 +946,21 @@ type: string Send mail to this list of addresses when C triggers. -=item --pid FILE +=item --pid type: string; default: /var/run/pt-stalk.pid Create a PID file when daemonized. +=item --prefix + +type: string + +Collect file prefix. + +If not specified, the current local time is used like C<2011_12_06_14_02_02>, +which is December 6, 2011 at 14:02:02. + =item --retention-time type: int; default: 30 @@ -736,13 +981,13 @@ type: int; default: 300 How long to sleep after collecting? -=item --threshold N +=item --threshold type: int; default: 25 Max number of C to tolerate. -=item --variable NAME +=item --variable type: string; default: Threads_running From 356526594ca03047cd0c69876723fe48fa85b917 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 6 Dec 2011 15:21:00 -0700 Subject: [PATCH 11/71] Remove pt-collect. --- bin/pt-collect | 450 -------------------------------------- t/pt-collect/pt-collect.t | 25 --- 2 files changed, 475 deletions(-) delete mode 100755 bin/pt-collect delete mode 100644 t/pt-collect/pt-collect.t diff --git a/bin/pt-collect b/bin/pt-collect deleted file mode 100755 index 6439c8bd..00000000 --- a/bin/pt-collect +++ /dev/null @@ -1,450 +0,0 @@ -#!/usr/bin/env bash - -# This program is part of Percona Toolkit: http://www.percona.com/software/ -# See "COPYRIGHT, LICENSE, AND WARRANTY" at the end of this file for legal -# notices and disclaimers. - -usage() { - if [ "${OPT_ERR}" ]; then - echo "${OPT_ERR}" >&2 - fi - echo "Usage: pt-collect -d -g -i -o -s [OPTIONS] [-- MYSQL-OPTIONS]" >&2 - echo "For more information, 'man pt-collect' or 'perldoc $0'." >&2 - exit 1 -} - -# Make sure the disk isn't getting too full. Exit if the disk is more than $1 -# percent full, or there is less than $2 megabytes of free space on $3 drive. -check_disk_space() { - PCT=${1:-"100"} - MB=${2:-"0"} - DEST="$3" - avail=$(df -m -P "${DEST}" | awk '/^\//{print $4}'); - full=$(df -m -P "${DEST}" | awk '/^\//{print $5}' | sed -e 's/%//g'); - if [ "${avail}" -le "${MB}" -o "${full}" -ge "${PCT}" ]; then - echo "Not enough free space (${full}% full, ${avail}MB free)" - echo "Wanted less than ${PCT}% full and more than ${MB}MB" - return 1 - fi - return 0 -} - -for o; do - case "${o}" in - --) - shift; break; - ;; - --help) - usage; - ;; - -d) - shift; OPT_d="${1}"; shift; - ;; - -f) - shift; OPT_f="${1}"; shift; - ;; - -i) - shift; OPT_i="${1}"; shift; - ;; - -g) - shift; OPT_g="${1}"; shift; - ;; - -m) - shift; OPT_m="${1}"; shift; - ;; - -o) - shift; OPT_o="${1}"; shift; - ;; - -p) - shift; OPT_p="${1}"; shift; - ;; - -s) - shift; OPT_s="${1}"; shift; - ;; - -t) - shift; OPT_t="${1}"; shift; - ;; - esac -done - - -if [ -z "${OPT_d}" -o -z "${OPT_i}" -o -z "${OPT_o}" -o -z "${OPT_g}" -o -z "${OPT_s}" ]; then - OPT_ERR="Missing command-line argument." - usage -fi - -if [ "${OPT_p}" ]; then - d="${OPT_p}" -else - d=$(date +%F-%T | tr :- _); -fi - -# Check disk space up-front. -check_disk_space "${OPT_f}" "${OPT_m}" "${OPT_d}" || exit 1 - -echo "Gathering info for $d" - -# Make sure there's only one of me. -( - flock 200 - - # Get pidof mysqld; pidof doesn't exist on some systems. We try our best... - p=$(pidof -s mysqld); - if [ -z "${p}" ]; then - p=$(pgrep -o -x mysqld); - fi - if [ -z "${p}" ]; then - p=$(ps -eaf | grep 'mysql[d]' | grep -v mysqld_safe | awk '{print $2}' | head -n1); - fi - - # Get memory allocation info before anything else. - if [ "${p}" ]; then - if pmap --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then - pmap -x $p > "$OPT_d/$d-pmap" - else - # Some pmap's apparently don't support -x (issue 116). - pmap $p > "$OPT_d/$d-pmap" - fi - fi - - # Getting a GDB stacktrace can be an intensive operation, so do this only if - # necessary. - if [ "${OPT_g}" = "yes" -a "${p}" ]; then - gdb -ex "set pagination 0" -ex "thread apply all bt" --batch -p $p >> "$OPT_d/$d-stacktrace" - else - echo "GDB (-g) was not enabled" >> "$OPT_d/$d-stacktrace" - fi - - # Get MySQL's variables if possible. Then sleep long enough that we probably - # complete SHOW VARIABLES if all's well. (We don't want to run mysql in the - # foreground, because it could hang.) - mysql "$@" -e 'SHOW GLOBAL VARIABLES' >> "$OPT_d/$d-variables" 2>&1 & - sleep .2 - - # Get the major.minor version number. Version 3.23 doesn't matter for our - # purposes, and other releases have x.x.x* version conventions so far. - VER="$(awk '/^version[^_]/{print substr($2,1,3)}' "$OPT_d/$d-variables")" - - # Is MySQL logging its errors to a file? If so, tail that file. - errfile="$(awk '/log_error/{print $2}' "$OPT_d/$d-variables")" - if [ -z "${errfile}" -a "${p}" ]; then - # Try getting it from the open filehandle... - errfile="$(ls -l /proc/${p}/fd | awk '/ 2 ->/{print $NF}')" - fi - - if [ "${errfile}" ]; then - echo "The error file seems to be ${errfile}" - tail -f "${errfile}" >"$OPT_d/$d-log_error" 2>&1 & - error_pid=$! - # Send a mysqladmin debug to the server so we can potentially learn about - # locking etc. - mysqladmin debug "$@" - else - echo "Could not detect error file; will not tail MySQL's log file" - fi - - # Get a sample of these right away, so we can get these without interaction - # with the other commands we're about to run. - INNOSTAT="SHOW /*!40100 ENGINE*/ INNODB STATUS\G" - mysql "$@" -e "${INNOSTAT}" >> "$OPT_d/$d-innodbstatus1" 2>&1 & - mysql "$@" -e 'SHOW FULL PROCESSLIST\G' >> "$OPT_d/$d-processlist1" 2>&1 & - mysql "$@" -e 'SHOW OPEN TABLES' >> "$OPT_d/$d-opentables1" 2>&1 & - if [ "${VER}" '>' "5.1" ]; then - mysql "$@" -e 'SHOW ENGINE INNODB MUTEX' >> "$OPT_d/$d-mutex-status1" 2>&1 & - else - mysql "$@" -e 'SHOW MUTEX STATUS' >> "$OPT_d/$d-mutex-status1" 2>&1 & - fi - - # If TCP dumping is specified, start that on the server's port. - if [ "${OPT_t}" = "yes" ]; then - port=$(awk '/^port/{print $2}' "$OPT_d/$d-variables") - if [ "${port}" ]; then - tcpdump -i any -s 4096 -w "$OPT_d/$d-tcpdump" port ${port} & - tcpdump_pid=$! - fi - fi - - # Next, start oprofile gathering data during the whole rest of this process. - # The --init should be a no-op if it has already been init-ed. - if [ "${OPT_o}" = "yes" ]; then - if opcontrol --init; then - opcontrol --start --no-vmlinux - else - OPT_o="no" - fi - elif [ "${OPT_s}" = "yes" ]; then - # Don't run oprofile and strace at the same time. - strace -T -s 0 -f -p $p > "${DEST}/$d-strace" 2>&1 & - strace_pid=$! - fi - - # Grab a few general things first. Background all of these so we can start - # them all up as quickly as possible. We use mysqladmin -c even though it is - # buggy and won't stop on its own in 5.1 and newer, because there is a chance - # that we will get and keep a connection to the database; in troubled times - # the database tends to exceed max_connections, so reconnecting in the loop - # tends not to work very well. - ps -eaf >> "$OPT_d/$d-ps" 2>&1 & - sysctl -a >> "$OPT_d/$d-sysctl" 2>&1 & - top -bn1 >> "$OPT_d/$d-top" 2>&1 & - vmstat 1 $OPT_i >> "$OPT_d/$d-vmstat" 2>&1 & - vmstat $OPT_i 2 >> "$OPT_d/$d-vmstat-overall" 2>&1 & - iostat -dx 1 $OPT_i >> "$OPT_d/$d-iostat" 2>&1 & - iostat -dx $OPT_i 2 >> "$OPT_d/$d-iostat-overall" 2>&1 & - mpstat -P ALL 1 $OPT_i >> "$OPT_d/$d-mpstat" 2>&1 & - mpstat -P ALL $OPT_i 1 >> "$OPT_d/$d-mpstat-overall" 2>&1 & - lsof -nP -p $p -bw >> "$OPT_d/$d-lsof" 2>&1 & - mysqladmin "$@" ext -i1 -c$OPT_i >> "$OPT_d/$d-mysqladmin" 2>&1 & - mysqladmin_pid=$! - - # This loop gathers data for the rest of the duration, and defines the time - # of the whole job. - echo "Loop start: $(date +'TS %s.%N %F %T')" - for a in `seq 1 $OPT_i`; do - # We check the disk, but don't exit, because we need to stop jobs if we - # need to exit. - check_disk_space "${OPT_f}" "${OPT_m}" "${OPT_d}" || break - - # Synchronize ourselves onto the clock tick, so the sleeps are 1-second - sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}') - ts="$(date +"TS %s.%N %F %T")" - - # Collect the stuff for this cycle - (cat /proc/diskstats 2>&1; echo $ts) >> "$OPT_d/$d-diskstats" & - (cat /proc/stat 2>&1; echo $ts) >> "$OPT_d/$d-procstat" & - (cat /proc/vmstat 2>&1; echo $ts) >> "$OPT_d/$d-procvmstat" & - (cat /proc/meminfo 2>&1; echo $ts) >> "$OPT_d/$d-meminfo" & - (cat /proc/slabinfo 2>&1; echo $ts) >> "$OPT_d/$d-slabinfo" & - (cat /proc/interrupts 2>&1; echo $ts) >> "$OPT_d/$d-interrupts" & - (df -h 2>&1; echo $ts) >> "$OPT_d/$d-df" & - (netstat -antp 2>&1; echo $ts) >> "$OPT_d/$d-netstat" & - (netstat -s 2>&1; echo $ts) >> "$OPT_d/$d-netstat_s" & - done - echo "Loop end: $(date +'TS %s.%N %F %T')" - - if [ "${OPT_o}" = "yes" ]; then - opcontrol --stop - opcontrol --dump - kill $(pidof oprofiled); - opcontrol --save=pt_collect_$d - - # Attempt to generate a report; if this fails, then just tell the user how - # to generate the report. - path_to_binary=$(which mysqld); - if [ "${path_to_binary}" -a -f "${path_to_binary}" ]; then - opreport --demangle=smart --symbols --merge tgid session:pt_collect_$d "${path_to_binary}" > "$OPT_d/$d-opreport" - else - echo "oprofile data saved to pt_collect_$d; you should now be able to get a report" > "$OPT_d/$d-opreport" - echo "by running something like" >> "$OPT_d/$d-opreport" - echo "opreport --demangle=smart --symbols --merge tgid session:pt_collect_$d /path/to/mysqld" >> "$OPT_d/$d-opreport" - fi - elif [ "${OPT_s}" = "yes" ]; then - kill -s 2 ${strace_pid} - sleep 1 - kill -s 15 ${strace_pid} - # Sometimes strace leaves threads/processes in T status. - kill -s 18 $p - fi - - mysql "$@" -e "${INNOSTAT}" >> "$OPT_d/$d-innodbstatus2" 2>&1 & - mysql "$@" -e 'SHOW FULL PROCESSLIST\G' >> "$OPT_d/$d-processlist2" 2>&1 & - mysql "$@" -e 'SHOW OPEN TABLES' >> "$OPT_d/$d-opentables2" 2>&1 & - if [ "${VER}" '>' "5.1" ]; then - mysql "$@" -e 'SHOW ENGINE INNODB MUTEX' >> "$OPT_d/$d-mutex-status2" 2>&1 & - else - mysql "$@" -e 'SHOW MUTEX STATUS' >> "$OPT_d/$d-mutex-status2" 2>&1 & - fi - - # Kill backgrounded tasks. - kill $mysqladmin_pid - [ "$error_pid" ] && kill $error_pid - [ "$tcpdump_pid" ] && kill $tcpdump_pid - - # Finally, record what system we collected this data from. - hostname > "$OPT_d/$d-hostname" -)200>/tmp/percona-toolkit-collect-lockfile >> "$OPT_d/$d-output" 2>&1 - -# ############################################################################ -# Documentation -# ############################################################################ -:<<'DOCUMENTATION' -=pod - -=head1 NAME - -pt-collect - Collect information from a server for some period of time. - -=head1 SYNOPSIS - -Usage: pt-collect -d -g -i -o -s [OPTIONS] [-- MYSQL-OPTIONS] - -pt-collect tool gathers a variety of information about a system for a period -of time. It is typically executed when the stalk tool detects a condition -and wants to collect information to assist in diagnosis. Four options -must be specified on the command line: -dgios. - -=head1 RISKS - -The following section is included to inform users about the potential risks, -whether known or unknown, of using this tool. The two main categories of risks -are those created by the nature of the tool (e.g. read-only tools vs. read-write -tools) and those created by bugs. - -pt-collect is a read-only tool. It should be very low-risk. - -At the time of this release, we know of no bugs that could cause serious harm -to users. - -The authoritative source for updated information is always the online issue -tracking system. Issues that affect this tool will be marked as such. You can -see a list of such issues at the following URL: -L. - -See also L<"BUGS"> for more information on filing bugs and getting help. - -=head1 DESCRIPTION - -pt-collect creates a lock to ensure that only one instance runs at a time, -and then saves a variety of performance and status data into files in the -configured directory. Files are named with a timestamp so they can be -grouped together. The tool is MySQL-centric by default, and gathers quite -a bit of diagnostic data that's useful for understanding the behavior of -a MySQL database server. - -Options after C<--> are passed to C and C. - -=head1 OPTIONS - -=over - -=item -d (required) - -DESTINATION Where to store the resulting data; must already exist. - -=item -g (required) - -Collect GDB stack traces. - -=item -i INTERVAL (required) - -How many seconds to collect data. - -=item -o (required) - -Collect oprofile data; disables -s. - -=item -s (required) - -Collect strace data. - -=item -f PERCENT - -Exit if the disk is more than this percent full (default 100). - -=item -m MEGABYTES - -Exit if there are less than this many megabytes free disk space (default 0). - -=item -p PREFIX - -Store the data into files with this prefix (optional). - -=item -t - -Collect tcpdump data. - -=back - -=head1 ENVIRONMENT - -This tool does not use any environment variables. - -=head1 SYSTEM REQUIREMENTS - -This tool requires Bash v3 or newer and assumes that these programs -are installed, in the PATH, and executable: sysctl, top, vmstat, iostat, -mpstat, lsof, mysql, mysqladmin, df, netstat, pidof, flock, and others -depending on what command-line options are specified. If some of those -programs are not available, the tool will still run but may print warnings. - -=head1 BUGS - -For a list of known bugs, see L. - -Please report bugs at L. -Include the following information in your bug report: - -=over - -=item * Complete command-line used to run the tool - -=item * Tool L<"--version"> - -=item * MySQL version of all servers involved - -=item * Output from the tool including STDERR - -=item * Input files (log/dump/config files, etc.) - -=back - -If possible, include debugging output by running the tool with C; -see L<"ENVIRONMENT">. - -=head1 DOWNLOADING - -Visit L to download the -latest release of Percona Toolkit. Or, get the latest release from the -command line: - - wget percona.com/get/percona-toolkit.tar.gz - - wget percona.com/get/percona-toolkit.rpm - - wget percona.com/get/percona-toolkit.deb - -You can also get individual tools from the latest release: - - wget percona.com/get/TOOL - -Replace C with the name of any tool. - -=head1 AUTHORS - -Baron Schwartz - -=head1 ABOUT PERCONA TOOLKIT - -This tool is part of Percona Toolkit, a collection of advanced command-line -tools developed by Percona for MySQL support and consulting. Percona Toolkit -was forked from two projects in June, 2011: Maatkit and Aspersa. Those -projects were created by Baron Schwartz and developed primarily by him and -Daniel Nichter, both of whom are employed by Percona. Visit -L for more software developed by Percona. - -=head1 COPYRIGHT, LICENSE, AND WARRANTY - -This program is copyright 2010-2011 Baron Schwartz, 2011 Percona Inc. -Feedback and improvements are welcome. - -THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation, version 2; OR the Perl Artistic License. On UNIX and similar -systems, you can issue `man perlgpl' or `man perlartistic' to read these -licenses. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA. - -=head1 VERSION - -pt-collect 1.0.1 - -=cut - -DOCUMENTATION diff --git a/t/pt-collect/pt-collect.t b/t/pt-collect/pt-collect.t deleted file mode 100644 index e9384f1d..00000000 --- a/t/pt-collect/pt-collect.t +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env perl - -BEGIN { - die "The PERCONA_TOOLKIT_BRANCH environment variable is not set.\n" - unless $ENV{PERCONA_TOOLKIT_BRANCH} && -d $ENV{PERCONA_TOOLKIT_BRANCH}; - unshift @INC, "$ENV{PERCONA_TOOLKIT_BRANCH}/lib"; -}; - -use strict; -use warnings FATAL => 'all'; -use English qw(-no_match_vars); -use Test::More tests => 1; - -use PerconaTest; - -like( - `$trunk/bin/pt-collect --help 2>&1`, - qr/Usage:/, - 'It runs' -); - -# ############################################################################# -# Done. -# ############################################################################# -exit; From ba34364cf9e3b84b9af6bfdc7523f94e7412973c Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Wed, 7 Dec 2011 10:58:51 -0700 Subject: [PATCH 12/71] Use daemon.sh to check PID file. --- bin/pt-stalk | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 73 insertions(+), 1 deletion(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 503b57cd..bd3ff2a4 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -15,6 +15,9 @@ set -u # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### + +set -u + EXIT_STATUS=0 log() { @@ -45,6 +48,9 @@ die() { # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### + +set -u + declare -a ARGV # non-option args (probably input files) declare EXT_ARGV # everything after -- (args for an external command) OPT_ERR=${OPT_ERR:-""} @@ -216,6 +222,9 @@ parse_options() { # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### + +set -u + TMPDIR="" OPT_TMPDIR=${OPT_TMPDIR:""} @@ -253,6 +262,9 @@ rm_tmpdir() { # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### + +set -u + _seq() { local i=$1 awk "BEGIN { for(i=1; i<=$i; i++) print i; }" @@ -271,6 +283,9 @@ _seq() { # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### + +set -u + disk_space() { local filesystem=${1:-"$PWD"} df -m -P $filesystem @@ -291,10 +306,54 @@ check_disk_space() { return 0 } + # ########################################################################### # End safeguards package # ########################################################################### +# ########################################################################### +# daemon package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/daemon.sh +# t/lib/bash/daemon.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + +make_pid_file() { + local file=$1 + local pid=$2 + + + if [ -f "$file" ]; then + local old_pid=$(cat $file) + if [ -z "$old_pid" ]; then + die "PID file $file already exists but it is empty" + else + kill -0 $old_pid 2>/dev/null + if [ $? -eq 0 ]; then + die "PID file $file already exists and its PID ($old_pid) is running" + else + echo "Overwriting PID file $file because its PID ($old_pid)" \ + "is not running" + fi + fi + fi + + echo "$pid" > $file +} + +remove_pid_file() { + local file=$1 + if [ -f "$file" ]; then + rm $file + fi +} + +# ########################################################################### +# End daemon package +# ########################################################################### + # ########################################################################### # collect package # This package is a copy without comments from the original. The original @@ -304,6 +363,9 @@ check_disk_space() { # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### + +set -u + CMD_GDB=${CMD_GDB:-"gdb"} CMD_IOSTAT=${CMD_IOSTAT:-"iostat"} CMD_MPSTAT=${CMD_MPSTAT:-"mpstat"} @@ -693,8 +755,9 @@ main() { # Stalk while oktorun. stalk - # Remove the secure tmpdir. + # Clean up. rm_tmpdir + remove_pid_file "$OPT_PID" log "$0 exit status $EXIT_STATUS" exit $EXIT_STATUS @@ -712,8 +775,17 @@ if [ "$(basename "$0")" = "pt-stalk" ] \ rm_tmpdir if [ "$OPT_DAEMONIZE" = "yes" ]; then + # The PID file will at first have our (parent) PID. + # This is fine for ensuring that only one of us is + # running, but it's not fine if the user wants to use + # the PID in the PID file to check or kill the child + # process. So we'll need to update the PID file with + # the child's PID. + make_pid_file $OPT_PID $$ + main "$@" >$OPT_LOG 2>&1 & + # Update PID file with the child's PID. # The child PID is $BASHPID but that special var is only # in Bash 4+, so we can't rely on it. Consequently, we # use $! to get the PID of the child we just forked. From b91020a007c4b9579fb4872491d4ef36ac2d9ddc Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 8 Dec 2011 13:35:48 -0700 Subject: [PATCH 13/71] Update modules in pt-stalk. --- bin/pt-stalk | 100 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 79 insertions(+), 21 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 17df8d02..9730ca3d 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -324,6 +324,9 @@ check_disk_space() { # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### + +set -u + make_pid_file() { local file=$1 local pid=$2 @@ -433,16 +436,14 @@ collect() { fi local innostat="SHOW /*!40100 ENGINE*/ INNODB STATUS\G" - local proclist="SHOW FULL PROCESSLIST\G" if [ "${mysql_version}" '>' "5.1" ]; then local mutex="SHOW ENGINE INNODB MUTEX" else local mutex="SHOW MUTEX STATUS" fi - $CMD_MYSQL "$EXT_ARGV" -e "$innostat" >> "$d/$p-innodbstatus1" 2>&1 & - $CMD_MYSQL "$EXT_ARGV" -e "$proclist" >> "$d/$p-processlist1" 2>&1 & - $CMD_MYSQL "$EXT_ARGV" -e 'SHOW OPEN TABLES' >> "$d/$p-opentables1" 2>&1 & - $CMD_MYSQL "$EXT_ARGV" -e "$mutex" >> "$d/$p-mutex-status1" 2>&1 & + $CMD_MYSQL "$EXT_ARGV" -e "$innostat" >> "$d/$p-innodbstatus1" 2>&1 & + $CMD_MYSQL "$EXT_ARGV" -e "$mutex" >> "$d/$p-mutex-status1" 2>&1 & + open_tables >> "$d/$p-opentables1" 2>&1 & local tcpdump_pid="" if [ "$OPT_COLLECT_TCPDUMP" = "yes" ]; then @@ -464,21 +465,30 @@ collect() { local strace_pid=$! fi - ps -eaf >> "$d/$p-ps" 2>&1 & - sysctl -a >> "$d/$p-sysctl" 2>&1 & - top -bn1 >> "$d/$p-top" 2>&1 & - $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" 2>&1 & - $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" 2>&1 & - $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" 2>&1 & - $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" 2>&1 & - $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" 2>&1 & - $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" 2>&1 & - lsof -nP -p $mysqld_pid -bw >> "$d/$p-lsof" 2>&1 & - $CMD_MYSQLADMIN "$EXT_ARGV" ext -i1 -c$OPT_INTERVAL >> "$d/$p-mysqladmin" 2>&1 & + ps -eaf >> "$d/$p-ps" 2>&1 & + sysctl -a >> "$d/$p-sysctl" 2>&1 & + top -bn1 >> "$d/$p-top" 2>&1 & + $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" 2>&1 & + $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" 2>&1 & + $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" 2>&1 & + $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" 2>&1 & + $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" 2>&1 & + $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" 2>&1 & + lsof -nP -p $mysqld_pid -bw >> "$d/$p-lsof" 2>&1 & + + $CMD_MYSQLADMIN "$EXT_ARGV" ext -i1 -c$OPT_RUN_TIME \ + >> "$d/$p-mysqladmin" 2>&1 & local mysqladmin_pid=$! + local have_lock_waits_table=0 + $MYSQL_CMD "$EXT_ARGV" -e "SHOW TABLES FROM INFORMATION_SCHEMA" \ + | grep -qi "INNODB_LOCK_WAITS" + if [ $? -eq 0 ]; then + have_lock_waits_table=1 + fi + echo "Loop start: $(date +'TS %s.%N %F %T')" - for a in $(_seq $OPT_RUN_TIME); do + for loopno in $(_seq $OPT_RUN_TIME); do disk_space $d > $d/$p-disk-space check_disk_space \ $d/$p-disk-space \ @@ -498,6 +508,13 @@ collect() { (df -h 2>&1; echo $ts) >> "$d/$p-df" & (netstat -antp 2>&1; echo $ts) >> "$d/$p-netstat" & (netstat -s 2>&1; echo $ts) >> "$d/$p-netstat_s" & + + ($CMD_MYSQL "$EXT_ARGV" -e "SHOW FULL PROCESSLIST\G" 2>&1; echo $ts) \ + >> "$d/$p-processlist" + + if [ $have_lock_waits_table -eq 1 ]; then + (lock_waits 2>&1; echo $ts) >>"$d/$p-lock-waits" + fi done echo "Loop end: $(date +'TS %s.%N %F %T')" @@ -530,10 +547,9 @@ collect() { kill -s 18 $mysqld_pid fi - $CMD_MYSQL "$EXT_ARGV" -e "$innostat" >> "$d/$p-innodbstatus2" 2>&1 & - $CMD_MYSQL "$EXT_ARGV" -e "$proclist" >> "$d/$p-processlist2" 2>&1 & - $CMD_MYSQL "$EXT_ARGV" -e 'SHOW OPEN TABLES' >> "$d/$p-opentables2" 2>&1 & - $CMD_MYSQL "$EXT_ARGV" -e "$mutex" >> "$d/$p-mutex-status2" 2>&1 & + $CMD_MYSQL "$EXT_ARGV" -e "$innostat" >> "$d/$p-innodbstatus2" 2>&1 & + $CMD_MYSQL "$EXT_ARGV" -e "$mutex" >> "$d/$p-mutex-status2" 2>&1 & + open_tables >> "$d/$p-opentables2" 2>&1 & kill $mysqladmin_pid [ "$tail_error_log_pid" ] && kill $tail_error_log_pid @@ -542,6 +558,48 @@ collect() { hostname > "$d/$p-hostname" } +open_tables() { + local open_tables=$($CMD_MYSQLADMIN "$EXT_ARGV" ext | grep "Open_tables" | awk '{print $4}') + if [ -n "$open_tables" -a $open_tables -le 1000 ]; then + $CMD_MYSQL "$EXT_ARGV" -e 'SHOW OPEN TABLES' 2>&1 & + else + echo "Too many open tables: $open_tables" + fi +} + +lock_waits() { + local sql1="SELECT + CONCAT('thread ', b.trx_mysql_thread_id, ' from ', p.host) AS who_blocks, + IF(p.command = \"Sleep\", p.time, 0) AS idle_in_trx, + MAX(TIMESTAMPDIFF(SECOND, r.trx_wait_started, CURRENT_TIMESTAMP)) AS max_wait_time, + COUNT(*) AS num_waiters + FROM INFORMATION_SCHEMA.INNODB_LOCK_WAITS AS w + INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS b ON b.trx_id = w.blocking_trx_id + INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS r ON r.trx_id = w.requesting_trx_id + LEFT JOIN INFORMATION_SCHEMA.PROCESSLIST AS p ON p.id = b.trx_mysql_thread_id + GROUP BY who_blocks ORDER BY num_waiters DESC\G" + $CMD_MYSQL "$EXT_ARGV" -e "$sql1" + + local sql2="SELECT + r.trx_id AS waiting_trx_id, + r.trx_mysql_thread_id AS waiting_thread, + TIMESTAMPDIFF(SECOND, r.trx_wait_started, CURRENT_TIMESTAMP) AS wait_time, + r.trx_query AS waiting_query, + l.lock_table AS waiting_table_lock, + b.trx_id AS blocking_trx_id, b.trx_mysql_thread_id AS blocking_thread, + SUBSTRING(p.host, 1, INSTR(p.host, ':') - 1) AS blocking_host, + SUBSTRING(p.host, INSTR(p.host, ':') +1) AS blocking_port, + IF(p.command = \"Sleep\", p.time, 0) AS idle_in_trx, + b.trx_query AS blocking_query + FROM INFORMATION_SCHEMA.INNODB_LOCK_WAITS AS w + INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS b ON b.trx_id = w.blocking_trx_id + INNER JOIN INFORMATION_SCHEMA.INNODB_TRX AS r ON r.trx_id = w.requesting_trx_id + INNER JOIN INFORMATION_SCHEMA.INNODB_LOCKS AS l ON w.requested_lock_id = l.lock_id + LEFT JOIN INFORMATION_SCHEMA.PROCESSLIST AS p ON p.id = b.trx_mysql_thread_id + ORDER BY wait_time DESC\G" + $CMD_MYSQL "$EXT_ARGV" -e "$sql2" +} + # ########################################################################### # End collect package # ########################################################################### From 2c8021e7d6256c48698667678c3199e026e14bb9 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 8 Dec 2011 14:26:10 -0700 Subject: [PATCH 14/71] Check -x mysql, mysqladmin, and that MySQL is alive before starting. --- bin/pt-stalk | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index ab6dbc15..9ccadca5 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -669,13 +669,13 @@ set_trg_func() { trg_status() { local var=$1 - mysqladmin "$EXT_ARGV" extended-status | grep "$OPT_VARIABLE" | awk '{print $4}' + mysqladmin $EXT_ARGV extended-status | grep "$OPT_VARIABLE" | awk '{print $4}' } trg_processlist() { local var=$1 local tmpfile="$TMPDIR/processlist" - mysqladmin "$EXT_ARGV" processlist > $tmpfile-1 + mysqladmin $EXT_ARGV processlist > $tmpfile-1 grep_processlist $tmpfile-1 $var $OPT_MATCH 0 0 > $tmpfile-2 wc -l $tmpfile-2 | awk '{print $1}' rm -rf $tmpfile* @@ -830,12 +830,25 @@ main() { if [ "$(basename "$0")" = "pt-stalk" ] \ || [ "$(basename "$0")" = "bash" -a "$_" = "$0" ]; then + # Check that mysql and mysqladmin are in PATH. If not, we're + # already dead in the water, so don't bother with cmd line opts, + # just error and exit. + [ -n "$(mysql --help)" ] \ + || die "Cannot execute mysql. Check that it is in PATH." + [ -n "$(mysqladmin --help)" ] \ + || die "Cannot execute mysqladmin. Check that it is in PATH." + # Parse command line options. We must do this first so we can # see if --daemonize was specified. mk_tmpdir parse_options $0 "$@" rm_tmpdir + # Now that we have the cmd line opts, check that we can actually + # connect to MySQL. + [ -n "$(mysql $EXT_ARGV -e 'SELECT 1')" ] \ + || die "Cannot connect to MySQL. Check that MySQL is running and that the options after -- are correct." + if [ "$OPT_DAEMONIZE" = "yes" ]; then # The PID file will at first have our (parent) PID. # This is fine for ensuring that only one of us is From c33fe97cf7b84434cee07b638e450991c5526414 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 15 Dec 2011 12:02:47 -0700 Subject: [PATCH 15/71] Check disk space+margin before collecting. More code comments. --- bin/pt-stalk | 70 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 51 insertions(+), 19 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index cc82e773..9500f4cc 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -741,15 +741,14 @@ sigtrap() { } stalk() { - # We increment this variable every time that the check is true, - # and set it to 0 if it's false. - local cycles_true=0 - local matched="no" + local cycles_true=0 # increment each time check is true, else set to 0 + local matched="no" # set to "yes" when check is true + local last_prefix="" # prefix of last collection while oktorun; do - # This is where we decide whether to execute 'collect'. - # The idea is to generate a number and store into $detected, - # and if $detected > $OPT_THRESHOLD, then we'll execute pt-collect. + # Run the trigger which returns the value of whatever is being + # checked. When the value is > --threshold for at least --cycle + # consecutive times, start collecting. local value=$($TRIGGER_FUNCTION $OPT_VARIABLE) local trg_exit_status=$? @@ -770,29 +769,62 @@ stalk() { log "$msg" if [ "$matched" = "yes" -a $cycles_true -ge $OPT_CYCLES ]; then + # ################################################################## + # Start collecting, maybe. + # ################################################################## local prefix=${OPT_PREFIX:-"$(date +%F-%T | tr :- _)"} - log "Collect triggered" - log "$msg" >> "$OPT_DEST/$prefix-trigger" - if [ "$OPT_NOTIFY_BY_EMAIL" ]; then - echo "$msg on $(hostname)" \ - | mail -s "Collect triggered on $(hostname)" "$OPT_NOTIFY_BY_EMAIL" + # Check if we'll have enough disk space to collect. Disk space + # is also checked every interval while collecting. + local margin="20" # default 20M margin, unless: + if [ -n "$last_prefix" ]; then + margin=$(du -mc $d/$last_prefix-* | tail -n 1 | awk '{print $1'}) + fi + disk_space $d > $d/$p-disk-space # Get real disk space. + check_disk_space \ # Then check it plus... + $d/$p-disk-space \ + "$OPT_DISK_BYTE_LIMIT" \ + "$OPT_DISK_PCT_LIMIT" \ + "$margin" \ # ... the margin. + + if [ $? -eq 0 ]; then + # There should be enough disk space, so collect. + log "$msg" >> "$OPT_DEST/$prefix-trigger" + last_prefix="$prefix" + + # Send email to whomever that collect has been triggered. + if [ "$OPT_NOTIFY_BY_EMAIL" ]; then + echo "$msg on $(hostname)" \ + | mail -s "Collect triggered on $(hostname)" \ + "$OPT_NOTIFY_BY_EMAIL" + fi + + # Fork and background the collect subroutine which will + # run for --run-time seconds. We (the parent) sleep + # while its collecting (hopefully --sleep is longer than + # --run-time). + ( + flock 200 + collect $OPT_DEST $prefix + ) 200>/tmp/percona-toolkit-collect-lockfile \ + >> "$OPT_DEST/$prefix-output" 2>&1 & + else + # There will not be enough disk space, so do not collect. + warn "Collect canceled because there will not be enough disk space after collecting another $margin MB" fi - # Fork collect subroutine which should run for --run-time seconds. - ( - flock 200 - collect $OPT_DEST $prefix - ) 200>/tmp/percona-toolkit-collect-lockfile \ - >> "$OPT_DEST/$prefix-output" 2>&1 & - + # ################################################################## + # Done collecting. + # ################################################################## ITER=$((ITER + 1)) sleep_ok "$OPT_SLEEP" "Sleeping $OPT_SLEEP seconds after collect" else + # Trigger/check/value is ok, sleep until next check. sleep_ok "$OPT_INTERVAL" fi + # Purge old collect file between checks. purge_samples done } From 78603f5d03a0d2fa9f7dfb897ed28e8abf9cecf9 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 15 Dec 2011 14:28:10 -0700 Subject: [PATCH 16/71] Test pt-stalk. Fix disk space+margin check. Fix checking --iterations. Add EXIT_REASON. Fix grepping status var. --- bin/pt-stalk | 23 ++++--- t/pt-stalk/pt-stalk.t | 156 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 164 insertions(+), 15 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index ba4fa15f..9af42a2f 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -646,6 +646,7 @@ lock_waits() { # ########################################################################### # Global variables # ########################################################################### +EXIT_REASON="" TOOL=`basename $0` OKTORUN=1 ITER=1 @@ -709,7 +710,7 @@ set_trg_func() { trg_status() { local var=$1 - mysqladmin $EXT_ARGV extended-status | grep "$OPT_VARIABLE" | awk '{print $4}' + mysqladmin $EXT_ARGV extended-status | grep "$OPT_VARIABLE " | awk '{print $4}' } trg_processlist() { @@ -729,10 +730,12 @@ trg_magic() { oktorun() { if [ $OKTORUN -eq 0 ]; then + EXIT_REASON="OKTORUN is false, exiting" return 1 # stop running fi - if [ -n "$OPT_ITERATIONS" ] && [ $ITER -ge $OPT_ITERATIONS ]; then + if [ -n "$OPT_ITERATIONS" ] && [ $ITER -gt $OPT_ITERATIONS ]; then + EXIT_REASON="No more iterations, exiting" return 1 # stop running fi @@ -808,15 +811,14 @@ stalk() { # is also checked every interval while collecting. local margin="20" # default 20M margin, unless: if [ -n "$last_prefix" ]; then - margin=$(du -mc $d/$last_prefix-* | tail -n 1 | awk '{print $1'}) + margin=$(du -mc $OPT_DEST/$last_prefix-* | tail -n 1 | awk '{print $1'}) fi - disk_space $d > $d/$p-disk-space # Get real disk space. - check_disk_space \ # Then check it plus... - $d/$p-disk-space \ - "$OPT_DISK_BYTE_LIMIT" \ - "$OPT_DISK_PCT_LIMIT" \ - "$margin" \ # ... the margin. - + disk_space $OPT_DEST > $OPT_DEST/$prefix-disk-space + check_disk_space \ + $OPT_DEST/$prefix-disk-space \ + "$OPT_DISK_BYTE_LIMIT" \ + "$OPT_DISK_PCT_LIMIT" \ + "$margin" # real used MB + margin MB if [ $? -eq 0 ]; then # There should be enough disk space, so collect. log "$msg" >> "$OPT_DEST/$prefix-trigger" @@ -893,6 +895,7 @@ main() { rm_tmpdir remove_pid_file "$OPT_PID" + log "$EXIT_REASON" log "$0 exit status $EXIT_STATUS" exit $EXIT_STATUS } diff --git a/t/pt-stalk/pt-stalk.t b/t/pt-stalk/pt-stalk.t index 869b2513..90b04da4 100644 --- a/t/pt-stalk/pt-stalk.t +++ b/t/pt-stalk/pt-stalk.t @@ -9,16 +9,162 @@ BEGIN { use strict; use warnings FATAL => 'all'; use English qw(-no_match_vars); -use Test::More tests => 1; +use Test::More; use PerconaTest; +use DSNParser; +use Sandbox; -TODO: { - local $TODO = "Test pt-stalk"; - ok(1, 'ok'); -}; +my $dp = new DSNParser(opts=>$dsn_opts); +my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); +my $dbh = $sb->get_dbh_for('master'); + +if ( !$dbh ) { + plan skip_all => 'Cannot connect to sandbox master'; +} +else { + plan tests => 14; +} + +my $cnf = "/tmp/12345/my.sandbox.cnf"; +my $pid_file = "/tmp/pt-stalk.pid.$PID"; +my $log_file = "/tmp/pt-stalk.log.$PID"; +my $dest = "/tmp/pt-stalk.collect.$PID"; +my $pid; + +diag(`rm $pid_file 2>/dev/null`); +diag(`rm $log_file 2>/dev/null`); +diag(`rm -rf $dest 2>/dev/null`); + +# ########################################################################### +# Test that it won't run if can't connect to MySQL. +# ########################################################################### + +my $retval = system("$trunk/bin/pt-stalk >$log_file 2>&1"); +my $output = `cat $log_file`; + +like( + $output, + qr/Cannot connect to MySQL/, + "Cannot connect to MySQL" +); + +is( + $retval >> 8, + 1, + "Exit 1" +); + +# ########################################################################### +# Test that it runs and dies normally. +# ########################################################################### +diag(`rm $pid_file 2>/dev/null`); +diag(`rm $log_file 2>/dev/null`); +diag(`rm -rf $dest 2>/dev/null`); + +$retval = system("$trunk/bin/pt-stalk --pid $pid_file --log $log_file --dest $dest -- --defaults-file=$cnf"); + +is( + $retval >> 8, + 0, + "Parent exit 0" +); + +PerconaTest::wait_for_files($pid_file, $log_file); +ok( + -f $pid_file, + "Creates PID file" +); + +ok( + -f $log_file, + "Creates log file" +); + +sleep 1; + +ok( + -d $dest, + "Creates --dest (collect) dir" +); + +chomp($pid = `cat $pid_file`); +$retval = system("kill -0 $pid"); +is( + $retval >> 0, + 0, + "pt-stalk is running ($pid)" +); + +$output = `cat $log_file`; +like( + $output, + qr/Check results: Threads_running=\d+, matched=no, cycles_true=0/, + "Check results logged" +); + +$retval = system("kill $pid 2>/dev/null"); +is( + $retval >> 0, + 0, + "Killed pt-stalk" +); + +sleep 1; + +ok( + ! -f $pid_file, + "Removes PID file" +); + +$output = `cat $log_file`; +like( + $output, + qr/Caught signal, exiting/, + "Caught signal logged" +); + +# ########################################################################### +# Test collect. +# ########################################################################### +diag(`rm $pid_file 2>/dev/null`); +diag(`rm $log_file 2>/dev/null`); +diag(`rm $dest/* 2>/dev/null`); + +# We'll have to watch Uptime since it's the only status var that's going +# to be predictable. +my (undef, $uptime) = $dbh->selectrow_array("SHOW STATUS LIKE 'Uptime'"); +my $threshold = $uptime + 2; + +$retval = system("$trunk/bin/pt-stalk --no-daemonize --iterations 1 --dest $dest --variable Uptime --threshold $threshold --cycles 2 --run-time 2 -- --defaults-file=$cnf >$log_file 2>&1"); + +sleep 3; + +$output = `cat $dest/*-trigger`; +like( + $output, + qr/Check results: Uptime=\d+, matched=yes, cycles_true=2/, + "Collect triggered" +); + +chomp($output = `cat $dest/*-df | grep -c '^TS'`); +is( + $output, + 2, + "Collect ran for --run-time" +); + +$output = `ps x | grep -v grep | grep 'pt-stalk pt-stalk --no-daemonize --iterations 1 --dest $dest'`; +is( + $output, + "", + "pt-stalk is not running" +); # ############################################################################# # Done. # ############################################################################# +diag(`rm $pid_file 2>/dev/null`); +diag(`rm $log_file 2>/dev/null`); +diag(`rm -rf $dest 2>/dev/null`); exit; From 09467de2b3a595fe7844da21f931ceda03de0801 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 15 Dec 2011 14:31:23 -0700 Subject: [PATCH 17/71] Remove --execute-command. --- bin/pt-stalk | 6 ------ 1 file changed, 6 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 9af42a2f..4e381660 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -1078,12 +1078,6 @@ type: int; default: 5 Exit if the disk is less than this %full. -=item --execute-command - -type: string; default: pt-collect - -Location of the C tool. - =item --function type: string; default: status From 5ce6fba919686c5e3649e8850fddda0a5bbf9384 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 19 Dec 2011 11:10:02 -0700 Subject: [PATCH 18/71] Do not daemonize by default. --- bin/pt-stalk | 2 -- t/pt-stalk/pt-stalk.t | 6 +++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index d482ab13..7615b364 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -1059,8 +1059,6 @@ Number of times condition must be met before triggering collection. =item --daemonize -default: yes; negatable: yes - Daemonize the tool. =item --dest diff --git a/t/pt-stalk/pt-stalk.t b/t/pt-stalk/pt-stalk.t index 90b04da4..eb57d5c5 100644 --- a/t/pt-stalk/pt-stalk.t +++ b/t/pt-stalk/pt-stalk.t @@ -62,7 +62,7 @@ diag(`rm $pid_file 2>/dev/null`); diag(`rm $log_file 2>/dev/null`); diag(`rm -rf $dest 2>/dev/null`); -$retval = system("$trunk/bin/pt-stalk --pid $pid_file --log $log_file --dest $dest -- --defaults-file=$cnf"); +$retval = system("$trunk/bin/pt-stalk --daemonize --pid $pid_file --log $log_file --dest $dest -- --defaults-file=$cnf"); is( $retval >> 8, @@ -136,7 +136,7 @@ diag(`rm $dest/* 2>/dev/null`); my (undef, $uptime) = $dbh->selectrow_array("SHOW STATUS LIKE 'Uptime'"); my $threshold = $uptime + 2; -$retval = system("$trunk/bin/pt-stalk --no-daemonize --iterations 1 --dest $dest --variable Uptime --threshold $threshold --cycles 2 --run-time 2 -- --defaults-file=$cnf >$log_file 2>&1"); +$retval = system("$trunk/bin/pt-stalk --iterations 1 --dest $dest --variable Uptime --threshold $threshold --cycles 2 --run-time 2 -- --defaults-file=$cnf >$log_file 2>&1"); sleep 3; @@ -154,7 +154,7 @@ is( "Collect ran for --run-time" ); -$output = `ps x | grep -v grep | grep 'pt-stalk pt-stalk --no-daemonize --iterations 1 --dest $dest'`; +$output = `ps x | grep -v grep | grep 'pt-stalk pt-stalk --iterations 1 --dest $dest'`; is( $output, "", From 482875e3306f0bdf172777cab1720470f65592e9 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 19 Dec 2011 12:59:38 -0700 Subject: [PATCH 19/71] Don't rm po before --help output. --- bin/pt-stalk | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 674e8805..2276307a 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -928,8 +928,12 @@ if [ "$(basename "$0")" = "pt-stalk" ] \ # see if --daemonize was specified. mk_tmpdir parse_options $0 "$@" + usage_or_errors $0 + po_status=$? rm_tmpdir - usage_or_errors $0 || exit $? + if [ $po_status -ne 0 ]; then + exit $po_status + fi # Now that we have the cmd line opts, check that we can actually # connect to MySQL. From 650641b052c80862fd4a81f794372f4252f125ff Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 19 Dec 2011 13:44:40 -0700 Subject: [PATCH 20/71] Check --pid even if not --daemonize. --- bin/pt-stalk | 1 + 1 file changed, 1 insertion(+) diff --git a/bin/pt-stalk b/bin/pt-stalk index 2276307a..fc93ba23 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -957,6 +957,7 @@ if [ "$(basename "$0")" = "pt-stalk" ] \ # use $! to get the PID of the child we just forked. echo "$!" > $OPT_PID else + make_pid_file $OPT_PID $$ main "$@" fi fi From 0ca763c81b70ea5b0785be47ed022cbcc46ebf08 Mon Sep 17 00:00:00 2001 From: Brian Fraser Date: Tue, 3 Jan 2012 14:52:57 -0300 Subject: [PATCH 21/71] pt-mysql-summary: Use mktemp through the tmpdir package. --- bin/pt-mysql-summary | 194 ++++++++++++++++----------- t/pt-mysql-summary/get_mysql_info.sh | 6 +- 2 files changed, 122 insertions(+), 78 deletions(-) diff --git a/bin/pt-mysql-summary b/bin/pt-mysql-summary index 054b4fe8..db01f10e 100755 --- a/bin/pt-mysql-summary +++ b/bin/pt-mysql-summary @@ -13,6 +13,47 @@ usage() { exit 1 } +# ########################################################################### +# tmpdir package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/tmpdir.sh +# t/lib/bash/tmpdir.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + +# pt-mysql-summary isn't ready for this yet. +# set -u + +TMPDIR="" + +mk_tmpdir() { + local dir=${1:-""} + + if [ -n "$dir" ]; then + if [ ! -d "$dir" ]; then + mkdir $dir || die "Cannot make tmpdir $dir" + fi + TMPDIR="$dir" + else + local tool=`basename $0` + local pid="$$" + TMPDIR=`mktemp -d /tmp/${tool}.${pid}.XXXXX` \ + || die "Cannot make secure tmpdir" + fi +} + +rm_tmpdir() { + if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then + rm -rf $TMPDIR + fi + TMPDIR="" +} + +# ########################################################################### +# End tmpdir package +# ########################################################################### + # ######################################################################## # Some global setup is necessary for cross-platform compatibility, even # when sourcing this script for testing purposes. @@ -62,9 +103,9 @@ fuzzy_formula=' # symlink them to /etc/passwd and then run this program as root. Call this # function with "rm" or "touch" as an argument. temp_files() { - for file in /tmp/percona-toolkit{,-mysql-variables,-mysql-status,-innodb-status} \ - /tmp/percona-toolkit{2,-mysql-databases,-mysql-processlist,-noncounters} \ - /tmp/percona-toolkit-mysql{dump,-slave}; + for file in $TMPDIR/percona-toolkit{,-mysql-variables,-mysql-status,-innodb-status} \ + $TMPDIR/percona-toolkit{2,-mysql-databases,-mysql-processlist,-noncounters} \ + $TMPDIR/percona-toolkit-mysql{dump,-slave}; do case "$1" in touch) @@ -127,16 +168,16 @@ secs_to_time () { }' } -# gets a value from /tmp/percona-toolkit-mysql-variables. Returns zero if it doesn't +# gets a value from $TMPDIR/percona-toolkit-mysql-variables. Returns zero if it doesn't # exist. get_var () { - v="$($AP_AWK "\$1 ~ /^$1$/ { print \$2 }" /tmp/percona-toolkit-mysql-variables)" + v="$($AP_AWK "\$1 ~ /^$1$/ { print \$2 }" $TMPDIR/percona-toolkit-mysql-variables)" echo "${v:-0}" } # Returns true if a variable exists var_exists () { - $AP_GREP "$1" /tmp/percona-toolkit-mysql-variables >/dev/null 2>&1; + $AP_GREP "$1" $TMPDIR/percona-toolkit-mysql-variables >/dev/null 2>&1; } # Returns "Enabled", "Disabled", or "Not Supported" depending on whether the @@ -145,7 +186,7 @@ var_exists () { # (string equal) to some value. feat_on() { if var_exists $1 ; then - var="$($AP_AWK "\$1 ~ /^$1$/ { print \$2 }" /tmp/percona-toolkit-mysql-variables)" + var="$($AP_AWK "\$1 ~ /^$1$/ { print \$2 }" $TMPDIR/percona-toolkit-mysql-variables)" if [ "${var}" = "ON" ]; then echo "Enabled" elif [ "${var}" = "OFF" -o "${var}" = "0" -o -z "${var}" ]; then @@ -172,10 +213,10 @@ feat_on() { fi } -# gets a value from /tmp/percona-toolkit-mysql-status. Returns zero if it doesn't +# gets a value from $TMPDIR/percona-toolkit-mysql-status. Returns zero if it doesn't # exist. get_stat () { - v="$($AP_AWK "\$1 ~ /^$1$/ { print \$2 }" /tmp/percona-toolkit-mysql-status)" + v="$($AP_AWK "\$1 ~ /^$1$/ { print \$2 }" $TMPDIR/percona-toolkit-mysql-status)" echo "${v:-0}" } @@ -195,12 +236,12 @@ fuzzy_pct () { # Functions for parsing specific files and getting desired info from them. # These are called from within main() and are separated so they can be tested # easily. The calling convention is that the data they need to run is prepared -# first by putting it into /tmp/percona-toolkit. Then code that's testing just needs to -# put sample data into /tmp/percona-toolkit and call it. +# first by putting it into $TMPDIR/percona-toolkit. Then code that's testing +# just needs to put sample data into $TMPDIR/percona-toolkit and call it. # ############################################################################## # Parses the output of 'ps -e -o args | $AP_GREP mysqld' or 'ps auxww...' -# which should be in /tmp/percona-toolkit. +# which should be in $TMPDIR/percona-toolkit. parse_mysqld_instances () { local file=$1 echo " Port Data Directory Socket" @@ -224,7 +265,7 @@ parse_mysqld_instances () { } # Tries to find the my.cnf file by examining 'ps' output, which should be in -# /tmp/percona-toolkit. You have to specify the port for the instance you are +# $TMPDIR/percona-toolkit. You have to specify the port for the instance you are # interested in, in case there are multiple instances. find_my_cnf_file() { local file=$1 @@ -240,7 +281,7 @@ find_my_cnf_file() { fi } -# Gets the MySQL system time. Uses input from /tmp/percona-toolkit-mysql-variables. +# Gets the MySQL system time. Uses input from $TMPDIR/percona-toolkit-mysql-variables. get_mysql_timezone () { tz="$(get_var time_zone)" if [ "${tz}" = "SYSTEM" ]; then @@ -249,14 +290,14 @@ get_mysql_timezone () { echo "${tz}" } -# Gets the MySQL system version. Uses input from /tmp/percona-toolkit-mysql-variables. +# Gets the MySQL system version. Uses input from $TMPDIR/percona-toolkit-mysql-variables. get_mysql_version () { name_val Version "$(get_var version) $(get_var version_comment)" name_val "Built On" "$(get_var version_compile_os) $(get_var version_compile_machine)" } # Gets the system start and uptime in human readable format. Last restart date -# should be in /tmp/percona-toolkit. +# should be in $TMPDIR/percona-toolkit. get_mysql_uptime () { local file=$1 restart="$(cat $file)" @@ -265,7 +306,7 @@ get_mysql_uptime () { echo "${restart} (up ${uptime})" } -# Summarizes the output of SHOW MASTER LOGS, which is in /tmp/percona-toolkit +# Summarizes the output of SHOW MASTER LOGS, which is in $TMPDIR/percona-toolkit summarize_binlogs () { local file=$1 name_val "Binlogs" $(wc -l $file) @@ -282,7 +323,7 @@ format_binlog_filters () { } # Takes as input a file that has two samples of SHOW STATUS, columnized next to -# each other. These should be in /tmp/percona-toolkit. Outputs fuzzy-ed numbers: +# each other. These should be in $TMPDIR/percona-toolkit. Outputs fuzzy-ed numbers: # absolute, all-time per second, and per-second over the interval between the # samples. Omits any rows that are all zeroes. format_status_variables () { @@ -387,7 +428,7 @@ summarize_processlist () { echo } -# Pretty-prints the my.cnf file, which should be in /tmp/percona-toolkit. It's super +# Pretty-prints the my.cnf file, which should be in $TMPDIR/percona-toolkit. It's super # annoying, but some *modern* versions of awk don't support POSIX character # sets in regular expressions, like [[:space:]] (looking at you, Debian). So # the below patterns contain [] and must remain that way. @@ -545,8 +586,8 @@ format_innodb_status () { name_val "Pending I/O Writes" "$(find_pending_io_writes "${file}")" name_val "Pending I/O Flushes" "$(find_pending_io_flushes "${file}")" $AP_AWK -F, '/^---TRANSACTION/{print $2}' "${file}" \ - | $AP_SED -e 's/ [0-9]* sec.*//' | sort | uniq -c > /tmp/percona-toolkit2 - name_val "Transaction States" "$(group_concat /tmp/percona-toolkit2)" + | $AP_SED -e 's/ [0-9]* sec.*//' | sort | uniq -c > $TMPDIR/percona-toolkit2 + name_val "Transaction States" "$(group_concat $TMPDIR/percona-toolkit2)" if $AP_GREP 'TABLE LOCK table' "${file}" >/dev/null ; then echo "Tables Locked" $AP_AWK '/^TABLE LOCK table/{print $4}' "${file}" \ @@ -633,9 +674,9 @@ format_overall_db_stats () { printf fmt, db, counts[db ",tables"], counts[db ",views"], counts[db ",sps"], counts[db ",trg"], counts[db ",func"], counts[db ",fk"], counts[db ",partn"]; } } - ' $file > /tmp/percona-toolkit - head -n2 /tmp/percona-toolkit - tail -n +3 /tmp/percona-toolkit | sort + ' $file > $TMPDIR/percona-toolkit + head -n2 $TMPDIR/percona-toolkit + tail -n +3 $TMPDIR/percona-toolkit | sort echo # Now do the summary of engines per DB @@ -693,9 +734,9 @@ format_overall_db_stats () { print ""; } } - ' $file > /tmp/percona-toolkit - head -n1 /tmp/percona-toolkit - tail -n +2 /tmp/percona-toolkit | sort + ' $file > $TMPDIR/percona-toolkit + head -n1 $TMPDIR/percona-toolkit + tail -n +2 $TMPDIR/percona-toolkit | sort echo # Now do the summary of index types per DB. Careful -- index is a reserved @@ -766,9 +807,9 @@ format_overall_db_stats () { print ""; } } - ' $file > /tmp/percona-toolkit - head -n1 /tmp/percona-toolkit - tail -n +2 /tmp/percona-toolkit | sort + ' $file > $TMPDIR/percona-toolkit + head -n1 $TMPDIR/percona-toolkit + tail -n +2 $TMPDIR/percona-toolkit | sort echo # Now do the summary of datatypes per DB @@ -857,10 +898,10 @@ format_overall_db_stats () { print ""; } } - ' $file > /tmp/percona-toolkit - hdr=$($AP_GREP -n Database /tmp/percona-toolkit | cut -d: -f1); - head -n${hdr} /tmp/percona-toolkit - tail -n +$((${hdr} + 1)) /tmp/percona-toolkit | sort + ' $file > $TMPDIR/percona-toolkit + hdr=$($AP_GREP -n Database $TMPDIR/percona-toolkit | cut -d: -f1); + head -n${hdr} $TMPDIR/percona-toolkit + tail -n +$((${hdr} + 1)) $TMPDIR/percona-toolkit | sort echo } @@ -878,6 +919,7 @@ main() { export PATH="/usr/gnu/bin/:/usr/xpg4/bin/:${PATH}" # Set up temporary files. + mk_tmpdir temp_files "rm" temp_files "touch" @@ -887,25 +929,26 @@ main() { section Percona_Toolkit_MySQL_Summary_Report name_val "System time" "`date -u +'%F %T UTC'` (local TZ: `date +'%Z %z'`)" section Instances - ps auxww 2>/dev/null | $AP_GREP mysqld > /tmp/percona-toolkit - parse_mysqld_instances /tmp/percona-toolkit + ps auxww 2>/dev/null | $AP_GREP mysqld > $TMPDIR/percona-toolkit + parse_mysqld_instances $TMPDIR/percona-toolkit # ######################################################################## # Fetch some basic info so we can start # ######################################################################## - mysql "$@" -ss -e 'SELECT CURRENT_USER()' > /tmp/percona-toolkit + mysql "$@" -ss -e 'SELECT CURRENT_USER()' > $TMPDIR/percona-toolkit if [ "$?" != "0" ]; then echo "Cannot connect to mysql, please specify command-line options." temp_files "rm" + rm_tmpdir exit 1 fi - user="$(cat /tmp/percona-toolkit)"; - mysql "$@" -ss -e 'SHOW /*!40100 GLOBAL*/ VARIABLES' > /tmp/percona-toolkit-mysql-variables - mysql "$@" -ss -e 'SHOW /*!50000 GLOBAL*/ STATUS' > /tmp/percona-toolkit-mysql-status - mysql "$@" -ss -e 'SHOW DATABASES' > /tmp/percona-toolkit-mysql-databases 2>/dev/null - mysql "$@" -ssE -e 'SHOW SLAVE STATUS' > /tmp/percona-toolkit-mysql-slave 2>/dev/null - mysql "$@" -ssE -e 'SHOW /*!50000 ENGINE*/ INNODB STATUS' > /tmp/percona-toolkit-innodb-status 2>/dev/null - mysql "$@" -ssE -e 'SHOW FULL PROCESSLIST' > /tmp/percona-toolkit-mysql-processlist 2>/dev/null + user="$(cat $TMPDIR/percona-toolkit)"; + mysql "$@" -ss -e 'SHOW /*!40100 GLOBAL*/ VARIABLES' > $TMPDIR/percona-toolkit-mysql-variables + mysql "$@" -ss -e 'SHOW /*!50000 GLOBAL*/ STATUS' > $TMPDIR/percona-toolkit-mysql-status + mysql "$@" -ss -e 'SHOW DATABASES' > $TMPDIR/percona-toolkit-mysql-databases 2>/dev/null + mysql "$@" -ssE -e 'SHOW SLAVE STATUS' > $TMPDIR/percona-toolkit-mysql-slave 2>/dev/null + mysql "$@" -ssE -e 'SHOW /*!50000 ENGINE*/ INNODB STATUS' > $TMPDIR/percona-toolkit-innodb-status 2>/dev/null + mysql "$@" -ssE -e 'SHOW FULL PROCESSLIST' > $TMPDIR/percona-toolkit-mysql-processlist 2>/dev/null now="$(mysql "$@" -ss -e 'SELECT NOW()')" port="$(get_var port)" @@ -920,16 +963,16 @@ main() { uptime="$(get_stat Uptime)" mysql "$@" -ss -e "SELECT LEFT(NOW() - INTERVAL ${uptime} SECOND, 16)" \ - > /tmp/percona-toolkit - name_val Started "$(get_mysql_uptime /tmp/percona-toolkit)" + > $TMPDIR/percona-toolkit + name_val Started "$(get_mysql_uptime $TMPDIR/percona-toolkit)" - name_val Databases "$($AP_GREP -c . /tmp/percona-toolkit-mysql-databases)" + name_val Databases "$($AP_GREP -c . $TMPDIR/percona-toolkit-mysql-databases)" name_val Datadir "$(get_var datadir)" procs="$(get_stat Threads_connected)" procr="$(get_stat Threads_running)" name_val Processes "$(fuzz ${procs}) connected, $(fuzz ${procr}) running" - if [ -s /tmp/percona-toolkit-mysql-slave ]; then slave=""; else slave="not "; fi - slavecount=$($AP_GREP -c 'Binlog Dump' /tmp/percona-toolkit-mysql-processlist) + if [ -s $TMPDIR/percona-toolkit-mysql-slave ]; then slave=""; else slave="not "; fi + slavecount=$($AP_GREP -c 'Binlog Dump' $TMPDIR/percona-toolkit-mysql-processlist) name_val Replication "Is ${slave}a slave, has ${slavecount} slaves connected" # TODO move this into a section with other files: error log, slow log and @@ -942,7 +985,7 @@ main() { # Processlist, sliced several different ways # ######################################################################## section Processlist - summarize_processlist /tmp/percona-toolkit-mysql-processlist + summarize_processlist $TMPDIR/percona-toolkit-mysql-processlist # ######################################################################## # Queries and query plans @@ -951,7 +994,7 @@ main() { sleep 10 # TODO: gather this data in the same format as normal: stats, TS line mysql "$@" -ss -e 'SHOW /*!50000 GLOBAL*/ STATUS' \ - | join /tmp/percona-toolkit-mysql-status - > /tmp/percona-toolkit + | join $TMPDIR/percona-toolkit-mysql-status - > $TMPDIR/percona-toolkit # Make a file with a list of things we want to omit because they aren't # counters, they are gauges (in RRDTool terminology). Gauges are shown # elsewhere in the output. @@ -975,9 +1018,9 @@ main() { Threads_cached Threads_connected Threads_running \ Uptime_since_flush_status; do - echo "${var}" >> /tmp/percona-toolkit-noncounters + echo "${var}" >> $TMPDIR/percona-toolkit-noncounters done - format_status_variables /tmp/percona-toolkit | $AP_GREP -v -f /tmp/percona-toolkit-noncounters + format_status_variables $TMPDIR/percona-toolkit | $AP_GREP -v -f $TMPDIR/percona-toolkit-noncounters # ######################################################################## # Table cache @@ -1054,22 +1097,22 @@ main() { trg_arg="${trg_arg} ${triggers}"; fi # Find out which databases to dump - num_dbs="$($AP_GREP -c . /tmp/percona-toolkit-mysql-databases)" + num_dbs="$($AP_GREP -c . $TMPDIR/percona-toolkit-mysql-databases)" echo "There are ${num_dbs} databases. Would you like to dump all, or just one?" echo -n "Type the name of the database, or press Enter to dump all of them. " read dbtodump mysqldump "$@" --no-data --skip-comments \ --skip-add-locks --skip-add-drop-table --compact \ --skip-lock-all-tables --skip-lock-tables --skip-set-charset \ - ${trg_arg} ${dbtodump:---all-databases} > /tmp/percona-toolkit-mysqldump + ${trg_arg} ${dbtodump:---all-databases} > $TMPDIR/percona-toolkit-mysqldump # Test the result by checking the file, not by the exit status, because we # might get partway through and then die, and the info is worth analyzing # anyway. - if $AP_GREP 'CREATE TABLE' /tmp/percona-toolkit-mysqldump >/dev/null 2>&1; then - format_overall_db_stats /tmp/percona-toolkit-mysqldump + if $AP_GREP 'CREATE TABLE' $TMPDIR/percona-toolkit-mysqldump >/dev/null 2>&1; then + format_overall_db_stats $TMPDIR/percona-toolkit-mysqldump else echo "Skipping schema analysis due to apparent error in dump file" - rm -f /tmp/percona-toolkit-mysqldump + rm -f $TMPDIR/percona-toolkit-mysqldump fi else echo "Skipping schema analysis" @@ -1079,23 +1122,23 @@ main() { # Noteworthy Technologies # ######################################################################## section Noteworthy_Technologies - if [ -e /tmp/percona-toolkit-mysqldump ]; then - if $AP_GREP FULLTEXT /tmp/percona-toolkit-mysqldump > /dev/null; then + if [ -e $TMPDIR/percona-toolkit-mysqldump ]; then + if $AP_GREP FULLTEXT $TMPDIR/percona-toolkit-mysqldump > /dev/null; then name_val "Full Text Indexing" Yes else name_val "Full Text Indexing" No fi - if $AP_GREP 'GEOMETRY\|POINT\|LINESTRING\|POLYGON' /tmp/percona-toolkit-mysqldump > /dev/null; then + if $AP_GREP 'GEOMETRY\|POINT\|LINESTRING\|POLYGON' $TMPDIR/percona-toolkit-mysqldump > /dev/null; then name_val "Geospatial Types" Yes else name_val "Geospatial Types" No fi - if $AP_GREP 'FOREIGN KEY' /tmp/percona-toolkit-mysqldump > /dev/null; then + if $AP_GREP 'FOREIGN KEY' $TMPDIR/percona-toolkit-mysqldump > /dev/null; then name_val "Foreign Keys" Yes else name_val "Foreign Keys" No fi - if $AP_GREP 'PARTITION BY' /tmp/percona-toolkit-mysqldump > /dev/null; then + if $AP_GREP 'PARTITION BY' $TMPDIR/percona-toolkit-mysqldump > /dev/null; then name_val "Partitioning" Yes else name_val "Partitioning" No @@ -1175,8 +1218,8 @@ main() { name_val "Adaptive Flushing" $(get_var innodb_adaptive_flushing) name_val "Adaptive Checkpoint" $(get_var innodb_adaptive_checkpoint) - if [ -s /tmp/percona-toolkit-innodb-status ]; then - format_innodb_status /tmp/percona-toolkit-innodb-status + if [ -s $TMPDIR/percona-toolkit-innodb-status ]; then + format_innodb_status $TMPDIR/percona-toolkit-innodb-status fi fi @@ -1211,15 +1254,15 @@ main() { section Binary_Logging binlog=$(get_var log_bin) if [ "${binlog}" ]; then - mysql "$@" -ss -e 'SHOW MASTER LOGS' > /tmp/percona-toolkit 2>/dev/null - summarize_binlogs /tmp/percona-toolkit + mysql "$@" -ss -e 'SHOW MASTER LOGS' > $TMPDIR/percona-toolkit 2>/dev/null + summarize_binlogs $TMPDIR/percona-toolkit format="$(get_var binlog_format)" name_val binlog_format "${format:-STATEMENT}" name_val expire_logs_days $(get_var expire_logs_days) name_val sync_binlog $(get_var sync_binlog) name_val server_id $(get_var server_id) - mysql "$@" -ss -e 'SHOW MASTER STATUS' > /tmp/percona-toolkit 2>/dev/null - format_binlog_filters /tmp/percona-toolkit + mysql "$@" -ss -e 'SHOW MASTER STATUS' > $TMPDIR/percona-toolkit 2>/dev/null + format_binlog_filters $TMPDIR/percona-toolkit fi # Replication: seconds behind, running, filters, skip_slave_start, skip_errors, @@ -1252,8 +1295,8 @@ main() { # If there is a my.cnf in a standard location, see if we can pretty-print it. # ######################################################################## section Configuration_File - ps auxww 2>/dev/null | $AP_GREP mysqld > /tmp/percona-toolkit - cnf_file=$(find_my_cnf_file /tmp/percona-toolkit ${port}); + ps auxww 2>/dev/null | $AP_GREP mysqld > $TMPDIR/percona-toolkit + cnf_file=$(find_my_cnf_file $TMPDIR/percona-toolkit ${port}); if [ ! -e "${cnf_file}" ]; then name_val "Config File" "Cannot autodetect, trying common locations" cnf_file="/etc/my.cnf"; @@ -1266,13 +1309,14 @@ main() { fi if [ -e "${cnf_file}" ]; then name_val "Config File" "${cnf_file}" - cat "${cnf_file}" > /tmp/percona-toolkit - pretty_print_cnf_file /tmp/percona-toolkit + cat "${cnf_file}" > $TMPDIR/percona-toolkit + pretty_print_cnf_file $TMPDIR/percona-toolkit else name_val "Config File" "Cannot autodetect or find, giving up" fi temp_files "rm" + rm_tmpdir # Make sure that we signal the end of the tool's output. section The_End @@ -1325,8 +1369,8 @@ See also L<"BUGS"> for more information on filing bugs and getting help. pt-mysql-summary works by connecting to a MySQL database server and querying it for status and configuration information. It saves these bits of data -into files in /tmp, and then formats them neatly with awk and other scripting -languages. +into files in a temporary directory, and then formats them neatly with awk +and other scripting languages. To use, simply execute it. Optionally add the same command-line options you would use to connect to MySQL, like C. diff --git a/t/pt-mysql-summary/get_mysql_info.sh b/t/pt-mysql-summary/get_mysql_info.sh index 0accdf52..78a181c9 100644 --- a/t/pt-mysql-summary/get_mysql_info.sh +++ b/t/pt-mysql-summary/get_mysql_info.sh @@ -3,14 +3,14 @@ TESTS=3 TEST_NAME="get_mysql_timezone" -cp samples/mysql-variables-001.txt /tmp/percona-toolkit-mysql-variables +cp samples/mysql-variables-001.txt $TMPDIR/percona-toolkit-mysql-variables is $(get_mysql_timezone) "EDT" TEST_NAME="get_mysql_uptime" cat < $TMPDIR/expected 2010-05-27 11:38 (up 0+02:08:52) EOF -cp samples/mysql-status-001.txt /tmp/percona-toolkit-mysql-status +cp samples/mysql-status-001.txt $TMPDIR/percona-toolkit-mysql-status echo "2010-05-27 11:38" > $TMPDIR/in get_mysql_uptime $TMPDIR/in > $TMPDIR/got no_diff $TMPDIR/got $TMPDIR/expected @@ -20,6 +20,6 @@ cat < $TMPDIR/expected Version | 5.0.51a-24+lenny2 (Debian) Built On | debian-linux-gnu i486 EOF -cp samples/mysql-variables-001.txt /tmp/percona-toolkit-mysql-variables +cp samples/mysql-variables-001.txt $TMPDIR/percona-toolkit-mysql-variables get_mysql_version > $TMPDIR/got no_diff $TMPDIR/got $TMPDIR/expected From e1f31e99f1b7d8ed7878fdce895f293229e7f36b Mon Sep 17 00:00:00 2001 From: Brian Fraser Date: Tue, 3 Jan 2012 14:54:15 -0300 Subject: [PATCH 22/71] pt-mext: Use mktemp through the tmpdir package. --- bin/pt-mext | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/bin/pt-mext b/bin/pt-mext index 63eecd23..cf5da66e 100755 --- a/bin/pt-mext +++ b/bin/pt-mext @@ -17,7 +17,50 @@ if [ -z "$1" ]; then usage; fi -FILE=/tmp/mext_temp_file; +# ########################################################################### +# tmpdir package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/tmpdir.sh +# t/lib/bash/tmpdir.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + +# pt-mext isn't ready for this yet +#set -u + +TMPDIR="" + +mk_tmpdir() { + local dir=${1:-""} + + if [ -n "$dir" ]; then + if [ ! -d "$dir" ]; then + mkdir $dir || die "Cannot make tmpdir $dir" + fi + TMPDIR="$dir" + else + local tool=`basename $0` + local pid="$$" + TMPDIR=`mktemp -d /tmp/${tool}.${pid}.XXXXX` \ + || die "Cannot make secure tmpdir" + fi +} + +rm_tmpdir() { + if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then + rm -rf $TMPDIR + fi + TMPDIR="" +} + +# ########################################################################### +# End tmpdir package +# ########################################################################### + +mk_tmpdir + +FILE=`mktemp -p $TMPDIR mext_temp_file.XXXXXX`; NUM=0; REL=0; rm -f $FILE*; From f6382f03d8d55198c7368bb79f2aa2731e01343d Mon Sep 17 00:00:00 2001 From: Brian Fraser Date: Tue, 3 Jan 2012 14:54:27 -0300 Subject: [PATCH 23/71] pt-sift: Use mktemp through the tmpdir package. --- bin/pt-sift | 67 ++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/bin/pt-sift b/bin/pt-sift index 21d723bf..8282ce4f 100755 --- a/bin/pt-sift +++ b/bin/pt-sift @@ -13,6 +13,47 @@ usage() { exit 1 } +# ########################################################################### +# tmpdir package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/tmpdir.sh +# t/lib/bash/tmpdir.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + +# pt-sift isn't ready for this yet. +#set -u + +TMPDIR="" + +mk_tmpdir() { + local dir=${1:-""} + + if [ -n "$dir" ]; then + if [ ! -d "$dir" ]; then + mkdir $dir || die "Cannot make tmpdir $dir" + fi + TMPDIR="$dir" + else + local tool=`basename $0` + local pid="$$" + TMPDIR=`mktemp -d /tmp/${tool}.${pid}.XXXXX` \ + || die "Cannot make secure tmpdir" + fi +} + +rm_tmpdir() { + if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then + rm -rf $TMPDIR + fi + TMPDIR="" +} + +# ########################################################################### +# End tmpdir package +# ########################################################################### + # Show current help and settings print_help() { cat <<-HELP @@ -89,19 +130,22 @@ main() { fi done + # Make a secure tmpdir. + mk_tmpdir + # We need to generate a list of timestamps, and ask the user to choose one if # there is no PREFIX yet. NOTE: we rely on the "-df" files here. - ls "${BASEDIR}" | grep -- '-df$' | cut -d- -f1 | sort > /tmp/pt-sift.prefixes + ls "${BASEDIR}" | grep -- '-df$' | cut -d- -f1 | sort > $TMPDIR/pt-sift.prefixes if [ -z "${PREFIX}" ]; then - if [ "$(grep -c . /tmp/pt-sift.prefixes)" = "1" ]; then + if [ "$(grep -c . $TMPDIR/pt-sift.prefixes)" = "1" ]; then # If there is only one sample, we use it as the prefix. - PREFIX="$(cat /tmp/pt-sift.prefixes)" + PREFIX="$(cat $TMPDIR/pt-sift.prefixes)" fi fi if [ -z "${PREFIX}" ]; then echo i=0 - cat /tmp/pt-sift.prefixes | while read line; do + cat $TMPDIR/pt-sift.prefixes | while read line; do i=$(($i + 1)) echo -n " $line" if [ "${i}" = "3" ]; then @@ -111,14 +155,14 @@ main() { done # We might have ended mid-line or we might have printed a newline; print a # newline if required to end the list of timestamp prefixes. - awk 'BEGIN { i = 0 } { i++ } END { if ( i % 3 != 0 ) { print "" } }' /tmp/pt-sift.prefixes + awk 'BEGIN { i = 0 } { i++ } END { if ( i % 3 != 0 ) { print "" } }' $TMPDIR/pt-sift.prefixes echo - while [ -z "${PREFIX}" -o "$(grep -c "${PREFIX}" /tmp/pt-sift.prefixes)" -ne 1 ]; do - DEFAULT="$(tail -1 /tmp/pt-sift.prefixes)" + while [ -z "${PREFIX}" -o "$(grep -c "${PREFIX}" $TMPDIR/pt-sift.prefixes)" -ne 1 ]; do + DEFAULT="$(tail -1 $TMPDIR/pt-sift.prefixes)" read -e -p "Select a timestamp from the list [${DEFAULT}] " ARG ARG="${ARG:-${DEFAULT}}" - if [ "$(grep -c "${ARG}" /tmp/pt-sift.prefixes)" -eq 1 ]; then - PREFIX="$(grep "${ARG}" /tmp/pt-sift.prefixes)" + if [ "$(grep -c "${ARG}" $TMPDIR/pt-sift.prefixes)" -eq 1 ]; then + PREFIX="$(grep "${ARG}" $TMPDIR/pt-sift.prefixes)" fi done fi @@ -130,7 +174,7 @@ main() { if [ "${ACTION}" != "INVALID" ]; then # Print the current host, timestamp and action. Figure out if we're at # the first or last sample, to make it easy to navigate. - PAGE="$(awk "/./{i++} /${PREFIX}/{c=i} END{print c, \"of\", i}" /tmp/pt-sift.prefixes)" + PAGE="$(awk "/./{i++} /${PREFIX}/{c=i} END{print c, \"of\", i}" $TMPDIR/pt-sift.prefixes)" HOST="$(cat "${BASEDIR}/${PREFIX}-hostname" 2>/dev/null)" echo -e "======== ${HOST:-unknown} at \033[34m${PREFIX} \033[31m${ACTION}\033[0m (${PAGE}) ========" fi @@ -438,7 +482,7 @@ main() { if ( printed == 0 ) { print \"${PREFIX}\"; } - }" /tmp/pt-sift.prefixes)" + }" $TMPDIR/pt-sift.prefixes)" ;; 1) ACTION="DEFAULT" @@ -475,6 +519,7 @@ main() { esac done + rm_tmpdir } # Execute the program if it was not included from another file. This makes it From 9facff62af82131217e4732effdd946017590519 Mon Sep 17 00:00:00 2001 From: Brian Fraser Date: Tue, 3 Jan 2012 14:54:38 -0300 Subject: [PATCH 24/71] pt-summary: Use mktemp through the tmpdir package. --- bin/pt-summary | 199 ++++++++++++++++++++++++++++++------------------- 1 file changed, 121 insertions(+), 78 deletions(-) diff --git a/bin/pt-summary b/bin/pt-summary index af8560e1..9b7f62cf 100755 --- a/bin/pt-summary +++ b/bin/pt-summary @@ -44,13 +44,54 @@ fuzz () { echo $1 | $AP_AWK "{fuzzy_var=\$1; ${fuzzy_formula} print fuzzy_var;}" } +# ########################################################################### +# tmpdir package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/tmpdir.sh +# t/lib/bash/tmpdir.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + +# pt-summary isn't ready for this yet. +#set -u + +TMPDIR="" + +mk_tmpdir() { + local dir=${1:-""} + + if [ -n "$dir" ]; then + if [ ! -d "$dir" ]; then + mkdir $dir || die "Cannot make tmpdir $dir" + fi + TMPDIR="$dir" + else + local tool=`basename $0` + local pid="$$" + TMPDIR=`mktemp -d /tmp/${tool}.${pid}.XXXXX` \ + || die "Cannot make secure tmpdir" + fi +} + +rm_tmpdir() { + if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then + rm -rf $TMPDIR + fi + TMPDIR="" +} + +# ########################################################################### +# End tmpdir package +# ########################################################################### + # The temp files are for storing working results so we don't call commands many # times (gives inconsistent results, maybe adds load on things I don't want to # such as RAID controllers). They must not exist -- if they did, someone would # symlink them to /etc/passwd and then run this program as root. Call this # function with "rm" or "touch" as an argument. temp_files() { - for file in /tmp/percona-toolkit /tmp/percona-toolkit2; do + for file in $TMPDIR/percona-toolkit $TMPDIR/percona-toolkit2; do case "$1" in touch) if ! touch "${file}"; then @@ -128,12 +169,12 @@ group_concat () { # Functions for parsing specific files and getting desired info from them. # These are called from within main() and are separated so they can be tested # easily. The calling convention is that the data they need to run is prepared -# first by putting it into /tmp/percona-toolkit. Then code that's testing just needs to -# put sample data into /tmp/percona-toolkit and call it. +# first by putting it into $TMPDIR/percona-toolkit. Then code that's testing +# just needs to put sample data into $TMPDIR/percona-toolkit and call it. # ############################################################################## # ############################################################################## -# Parse Linux's /proc/cpuinfo, which should be stored in /tmp/percona-toolkit. +# Parse Linux's /proc/cpuinfo, which should be stored in $TMPDIR/percona-toolkit. # ############################################################################## parse_proc_cpuinfo () { local file=$1 @@ -189,8 +230,8 @@ parse_psrinfo_cpus() { start = index($0, " at ") + 4; end = length($0) - start - 4 print substr($0, start, end); - }' "$1" | sort | uniq -c > /tmp/percona-toolkit2 - name_val "Speeds" "$(group_concat /tmp/percona-toolkit2)" + }' "$1" | sort | uniq -c > $TMPDIR/percona-toolkit2 + name_val "Speeds" "$(group_concat $TMPDIR/percona-toolkit2)" } # ############################################################################## @@ -292,7 +333,7 @@ parse_ip_s_link () { } # ############################################################################## -# Parse the output of 'netstat -antp' which should be in /tmp/percona-toolkit. +# Parse the output of 'netstat -antp' which should be in $TMPDIR/percona-toolkit. # ############################################################################## parse_netstat () { local file=$1 @@ -397,7 +438,7 @@ parse_filesystems () { } # ############################################################################## -# Parse the output of fdisk -l, which should be in /tmp/percona-toolkit; there might be +# Parse the output of fdisk -l, which should be in $TMPDIR/percona-toolkit; there might be # multiple fdisk -l outputs in the file. # ############################################################################## parse_fdisk () { @@ -431,7 +472,7 @@ parse_fdisk () { } # ############################################################################## -# Parse the output of dmesg, which should be in /tmp/percona-toolkit, and detect +# Parse the output of dmesg, which should be in $TMPDIR/percona-toolkit, and detect # virtualization. # ############################################################################## parse_virtualization_dmesg () { @@ -463,7 +504,7 @@ parse_virtualization_generic() { } # ############################################################################## -# Parse the output of lspci, which should be in /tmp/percona-toolkit, and detect +# Parse the output of lspci, which should be in $TMPDIR/percona-toolkit, and detect # Ethernet cards. # ############################################################################## parse_ethernet_controller_lspci () { @@ -474,7 +515,7 @@ parse_ethernet_controller_lspci () { } # ############################################################################## -# Parse the output of lspci, which should be in /tmp/percona-toolkit, and detect RAID +# Parse the output of lspci, which should be in $TMPDIR/percona-toolkit, and detect RAID # controllers. # ############################################################################## parse_raid_controller_lspci () { @@ -497,7 +538,7 @@ parse_raid_controller_lspci () { } # ############################################################################## -# Parse the output of dmesg, which should be in /tmp/percona-toolkit, and detect RAID +# Parse the output of dmesg, which should be in $TMPDIR/percona-toolkit, and detect RAID # controllers. # ############################################################################## parse_raid_controller_dmesg () { @@ -516,7 +557,7 @@ parse_raid_controller_dmesg () { # ############################################################################## # Parse the output of "hpacucli ctrl all show config", which should be stored in -# /tmp/percona-toolkit +# $TMPDIR/percona-toolkit # ############################################################################## parse_hpacucli () { local file=$1 @@ -524,7 +565,7 @@ parse_hpacucli () { } # ############################################################################## -# Parse the output of arcconf, which should be stored in /tmp/percona-toolkit +# Parse the output of arcconf, which should be stored in $TMPDIR/percona-toolkit # ############################################################################## parse_arcconf () { local file=$1 @@ -634,7 +675,7 @@ parse_fusionmpt_lsiutil () { } # ############################################################################## -# Parse the output of MegaCli64 -AdpAllInfo -aALL from /tmp/percona-toolkit. +# Parse the output of MegaCli64 -AdpAllInfo -aALL from $TMPDIR/percona-toolkit. # ############################################################################## parse_lsi_megaraid_adapter_info () { local file=$1 @@ -653,7 +694,7 @@ parse_lsi_megaraid_adapter_info () { } # ############################################################################## -# Parse the output (saved in /tmp/percona-toolkit) of +# Parse the output (saved in $TMPDIR/percona-toolkit) of # /opt/MegaRAID/MegaCli/MegaCli64 -AdpBbuCmd -GetBbuStatus -aALL # ############################################################################## parse_lsi_megaraid_bbu_status () { @@ -665,7 +706,7 @@ parse_lsi_megaraid_bbu_status () { } # ############################################################################## -# Parse physical devices from the output (saved in /tmp/percona-toolkit) of +# Parse physical devices from the output (saved in $TMPDIR/percona-toolkit) of # /opt/MegaRAID/MegaCli/MegaCli64 -LdPdInfo -aALL # OR, it will also work with the output of # /opt/MegaRAID/MegaCli/MegaCli64 -PDList -aALL @@ -694,7 +735,7 @@ parse_lsi_megaraid_devices () { } # ############################################################################## -# Parse virtual devices from the output (saved in /tmp/percona-toolkit) of +# Parse virtual devices from the output (saved in $TMPDIR/percona-toolkit) of # /opt/MegaRAID/MegaCli/MegaCli64 -LdPdInfo -aALL # OR, it will also work with the output of # /opt/MegaRAID/MegaCli/MegaCli64 -LDInfo -Lall -aAll @@ -826,6 +867,7 @@ main () { export PATH="${PATH}:/usr/StorMan/:/opt/MegaRAID/MegaCli/"; # Set up temporary files. + mk_tmpdir temp_files "rm" temp_files "touch" section Percona_Toolkit_System_Summary_Report @@ -833,7 +875,7 @@ main () { # ######################################################################## # Grab a bunch of stuff and put it into temp files for later. # ######################################################################## - sysctl -a > /tmp/percona-toolkit.sysctl 2>/dev/null + sysctl -a > $TMPDIR/percona-toolkit.sysctl 2>/dev/null # ######################################################################## # General date, time, load, etc @@ -939,19 +981,19 @@ main () { # available to non-root users and usually has telltale signs. It's most # reliable to look at /var/log/dmesg if possible. There are a number of # other ways to find out if a system is virtualized. - cat /var/log/dmesg > /tmp/percona-toolkit 2>/dev/null - if [ ! -s /tmp/percona-toolkit ]; then - dmesg > /tmp/percona-toolkit 2>/dev/null + cat /var/log/dmesg > $TMPDIR/percona-toolkit 2>/dev/null + if [ ! -s $TMPDIR/percona-toolkit ]; then + dmesg > $TMPDIR/percona-toolkit 2>/dev/null fi - if [ -s /tmp/percona-toolkit ]; then - virt="$(parse_virtualization_dmesg /tmp/percona-toolkit)" + if [ -s $TMPDIR/percona-toolkit ]; then + virt="$(parse_virtualization_dmesg $TMPDIR/percona-toolkit)" fi if [ -z "${virt}" ]; then if which lspci >/dev/null 2>&1; then - lspci > /tmp/percona-toolkit 2>/dev/null - if grep -qi virtualbox /tmp/percona-toolkit; then + lspci > $TMPDIR/percona-toolkit 2>/dev/null + if grep -qi virtualbox $TMPDIR/percona-toolkit; then virt=VirtualBox - elif grep -qi vmware /tmp/percona-toolkit; then + elif grep -qi vmware $TMPDIR/percona-toolkit; then virt=VMWare elif [ -e /proc/user_beancounters ]; then virt="OpenVZ/Virtuozzo" @@ -962,10 +1004,10 @@ main () { virt="FreeBSD Jail" fi elif [ "${platform}" = "SunOS" ]; then - if which prtdiag >/dev/null 2>&1 && prtdiag > /tmp/percona-toolkit.prtdiag 2>/dev/null; then - virt="$(parse_virtualization_generic /tmp/percona-toolkit.prtdiag)" - elif which smbios >/dev/null 2>&1 && smbios > /tmp/percona-toolkit.smbios 2>/dev/null; then - virt="$(parse_virtualization_generic /tmp/percona-toolkit.smbios)" + if which prtdiag >/dev/null 2>&1 && prtdiag > $TMPDIR/percona-toolkit.prtdiag 2>/dev/null; then + virt="$(parse_virtualization_generic $TMPDIR/percona-toolkit.prtdiag)" + elif which smbios >/dev/null 2>&1 && smbios > $TMPDIR/percona-toolkit.smbios 2>/dev/null; then + virt="$(parse_virtualization_generic $TMPDIR/percona-toolkit.smbios)" fi fi name_val Virtualized "${virt:-No virtualization detected}" @@ -975,23 +1017,23 @@ main () { # ######################################################################## section Processor if [ -f /proc/cpuinfo ]; then - cat /proc/cpuinfo > /tmp/percona-toolkit 2>/dev/null - parse_proc_cpuinfo /tmp/percona-toolkit + cat /proc/cpuinfo > $TMPDIR/percona-toolkit 2>/dev/null + parse_proc_cpuinfo $TMPDIR/percona-toolkit elif [ "${platform}" = "FreeBSD" ]; then - parse_sysctl_cpu_freebsd /tmp/percona-toolkit.sysctl + parse_sysctl_cpu_freebsd $TMPDIR/percona-toolkit.sysctl elif [ "${platform}" = "SunOS" ]; then - psrinfo -v > /tmp/percona-toolkit - parse_psrinfo_cpus /tmp/percona-toolkit + psrinfo -v > $TMPDIR/percona-toolkit + parse_psrinfo_cpus $TMPDIR/percona-toolkit # TODO: prtconf -v actually prints the CPU model name etc. fi section Memory if [ "${platform}" = "Linux" ]; then - free -b > /tmp/percona-toolkit - cat /proc/meminfo >> /tmp/percona-toolkit - parse_free_minus_b /tmp/percona-toolkit + free -b > $TMPDIR/percona-toolkit + cat /proc/meminfo >> $TMPDIR/percona-toolkit + parse_free_minus_b $TMPDIR/percona-toolkit elif [ "${platform}" = "FreeBSD" ]; then - parse_memory_sysctl_freebsd /tmp/percona-toolkit.sysctl + parse_memory_sysctl_freebsd $TMPDIR/percona-toolkit.sysctl elif [ "${platform}" = "SunOS" ]; then name_val Memory "$(prtconf | awk -F: '/Memory/{print $2}')" fi @@ -1007,8 +1049,8 @@ main () { fi fi - if which dmidecode >/dev/null 2>&1 && dmidecode > /tmp/percona-toolkit 2>/dev/null; then - parse_dmidecode_mem_devices /tmp/percona-toolkit + if which dmidecode >/dev/null 2>&1 && dmidecode > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_dmidecode_mem_devices $TMPDIR/percona-toolkit fi # ######################################################################## @@ -1023,25 +1065,25 @@ main () { if [ "${platform}" = "Linux" ]; then cmd="df -h -P" fi - $cmd | sort > /tmp/percona-toolkit2 - mount | sort | join /tmp/percona-toolkit2 - > /tmp/percona-toolkit - parse_filesystems /tmp/percona-toolkit "${platform}" + $cmd | sort > $TMPDIR/percona-toolkit2 + mount | sort | join $TMPDIR/percona-toolkit2 - > $TMPDIR/percona-toolkit + parse_filesystems $TMPDIR/percona-toolkit "${platform}" fi fi if [ "${platform}" = "Linux" ]; then section "Disk_Schedulers_And_Queue_Size" - echo "" > /tmp/percona-toolkit + echo "" > $TMPDIR/percona-toolkit for disk in $(ls /sys/block/ | grep -v -e ram -e loop -e 'fd[0-9]'); do if [ -e "/sys/block/${disk}/queue/scheduler" ]; then name_val "${disk}" "$(cat /sys/block/${disk}/queue/scheduler | grep -o '\[.*\]') $(cat /sys/block/${disk}/queue/nr_requests)" - fdisk -l "/dev/${disk}" >> /tmp/percona-toolkit 2>/dev/null + fdisk -l "/dev/${disk}" >> $TMPDIR/percona-toolkit 2>/dev/null fi done - # Relies on /tmp/percona-toolkit having data from the Disk Schedulers loop. + # Relies on $TMPDIR/percona-toolkit having data from the Disk Schedulers loop. section "Disk_Partioning" - parse_fdisk /tmp/percona-toolkit + parse_fdisk $TMPDIR/percona-toolkit section "Kernel_Inode_State" for file in dentry-state file-nr inode-nr; do @@ -1064,15 +1106,15 @@ main () { # often available to non-root users. It's most reliable to look at # /var/log/dmesg if possible. # ######################################################################## - if which lspci >/dev/null 2>&1 && lspci > /tmp/percona-toolkit 2>/dev/null; then - controller="$(parse_raid_controller_lspci /tmp/percona-toolkit)" + if which lspci >/dev/null 2>&1 && lspci > $TMPDIR/percona-toolkit 2>/dev/null; then + controller="$(parse_raid_controller_lspci $TMPDIR/percona-toolkit)" fi if [ -z "${controller}" ]; then - cat /var/log/dmesg > /tmp/percona-toolkit 2>/dev/null - if [ ! -s /tmp/percona-toolkit ]; then - dmesg > /tmp/percona-toolkit 2>/dev/null + cat /var/log/dmesg > $TMPDIR/percona-toolkit 2>/dev/null + if [ ! -s $TMPDIR/percona-toolkit ]; then + dmesg > $TMPDIR/percona-toolkit 2>/dev/null fi - controller="$(parse_raid_controller_dmesg /tmp/percona-toolkit)" + controller="$(parse_raid_controller_dmesg $TMPDIR/percona-toolkit)" fi name_val Controller "${controller:-No RAID controller detected}" @@ -1085,29 +1127,29 @@ main () { # ######################################################################## notfound="" if [ "${controller}" = "AACRAID" ]; then - if arcconf getconfig 1 > /tmp/percona-toolkit 2>/dev/null; then - parse_arcconf /tmp/percona-toolkit + if arcconf getconfig 1 > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_arcconf $TMPDIR/percona-toolkit elif ! which arcconf >/dev/null 2>&1; then notfound="e.g. http://www.adaptec.com/en-US/support/raid/scsi_raid/ASR-2120S/" fi elif [ "${controller}" = "HP Smart Array" ]; then - if hpacucli ctrl all show config > /tmp/percona-toolkit 2>/dev/null; then - parse_hpacucli /tmp/percona-toolkit + if hpacucli ctrl all show config > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_hpacucli $TMPDIR/percona-toolkit elif ! which hpacucli >/dev/null 2>&1; then notfound="your package repository or the manufacturer's website" fi elif [ "${controller}" = "LSI Logic MegaRAID SAS" ]; then - if MegaCli64 -AdpAllInfo -aALL -NoLog > /tmp/percona-toolkit 2>/dev/null; then - parse_lsi_megaraid_adapter_info /tmp/percona-toolkit + if MegaCli64 -AdpAllInfo -aALL -NoLog > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_lsi_megaraid_adapter_info $TMPDIR/percona-toolkit elif ! which MegaCli64 >/dev/null 2>&1; then notfound="your package repository or the manufacturer's website" fi - if MegaCli64 -AdpBbuCmd -GetBbuStatus -aALL -NoLog > /tmp/percona-toolkit 2>/dev/null; then - parse_lsi_megaraid_bbu_status /tmp/percona-toolkit + if MegaCli64 -AdpBbuCmd -GetBbuStatus -aALL -NoLog > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_lsi_megaraid_bbu_status $TMPDIR/percona-toolkit fi - if MegaCli64 -LdPdInfo -aALL -NoLog > /tmp/percona-toolkit 2>/dev/null; then - parse_lsi_megaraid_virtual_devices /tmp/percona-toolkit - parse_lsi_megaraid_devices /tmp/percona-toolkit + if MegaCli64 -LdPdInfo -aALL -NoLog > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_lsi_megaraid_virtual_devices $TMPDIR/percona-toolkit + parse_lsi_megaraid_devices $TMPDIR/percona-toolkit fi fi @@ -1122,8 +1164,8 @@ main () { # ##################################################################### if [ "${platform}" = "Linux" ]; then section Network_Config - if which lspci > /dev/null 2>&1 && lspci > /tmp/percona-toolkit 2>/dev/null; then - parse_ethernet_controller_lspci /tmp/percona-toolkit + if which lspci > /dev/null 2>&1 && lspci > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_ethernet_controller_lspci $TMPDIR/percona-toolkit fi if sysctl net.ipv4.tcp_fin_timeout > /dev/null 2>&1; then name_val "FIN Timeout" "$(sysctl net.ipv4.tcp_fin_timeout)" @@ -1135,15 +1177,15 @@ main () { # /proc/sys/net/netfilter/nf_conntrack_max or /proc/sys/net/nf_conntrack_max # in new kernels like Fedora 12? - if which ip >/dev/null 2>&1 && ip -s link > /tmp/percona-toolkit 2>/dev/null; then + if which ip >/dev/null 2>&1 && ip -s link > $TMPDIR/percona-toolkit 2>/dev/null; then section Interface_Statistics - parse_ip_s_link /tmp/percona-toolkit + parse_ip_s_link $TMPDIR/percona-toolkit fi if [ "${platform}" = "Linux" ]; then section Network_Connections - if netstat -antp > /tmp/percona-toolkit 2>/dev/null; then - parse_netstat /tmp/percona-toolkit + if netstat -antp > $TMPDIR/percona-toolkit 2>/dev/null; then + parse_netstat $TMPDIR/percona-toolkit fi fi fi @@ -1164,12 +1206,12 @@ main () { fi if which vmstat > /dev/null 2>&1 ; then section "Simplified_and_fuzzy_rounded_vmstat_(wait_please)" - vmstat 1 5 > /tmp/percona-toolkit + vmstat 1 5 > $TMPDIR/percona-toolkit if [ "${platform}" = "Linux" ]; then - format_vmstat /tmp/percona-toolkit + format_vmstat $TMPDIR/percona-toolkit else # TODO: simplify/format for other platforms - cat /tmp/percona-toolkit + cat $TMPDIR/percona-toolkit fi fi fi @@ -1179,6 +1221,7 @@ main () { # ######################################################################## temp_files "rm" temp_files "check" + rm_tmpdir section The_End } @@ -1238,9 +1281,9 @@ See also L<"BUGS"> for more information on filing bugs and getting help. =head1 DESCRIPTION pt-summary runs a large variety of commands to inspect system status and -configuration, saves the output into files in /tmp, and then runs Unix -commands on these results to format them nicely. It works best when -executed as a privileged user, but will also work without privileges, +configuration, saves the output into files in a temporary directory, and +then runs Unix commands on these results to format them nicely. It works +best when executed as a privileged user, but will also work without privileges, although some output might not be possible to generate without root. =head1 OPTIONS From f4b8e8356b6a2a1d0250b0c4beb4714b5b366a73 Mon Sep 17 00:00:00 2001 From: Brian Fraser Date: Tue, 3 Jan 2012 14:59:55 -0300 Subject: [PATCH 25/71] test-bash-functions: test_name is optional, so avoid it stopping us if set -u --- util/test-bash-functions | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/util/test-bash-functions b/util/test-bash-functions index 1a14a0de..c6c0cd61 100755 --- a/util/test-bash-functions +++ b/util/test-bash-functions @@ -93,7 +93,7 @@ run_test() { # Print a TAP-style test result. result() { local result=$1 - local test_name=$2 + local test_name=${2:-""} if [ $result -eq 0 ]; then echo "ok $testno - $TEST_FILE $test_name" else @@ -115,7 +115,7 @@ result() { no_diff() { local got=$1 local expected=$2 - local test_name=$3 + local test_name=${3:-""} test_command="diff $got $expected" eval $test_command > $TEST_TMPDIR/failed_result 2>&1 result $? "$test_name" @@ -124,7 +124,7 @@ no_diff() { is() { local got=$1 local expected=$2 - local test_name=$3 + local test_name=${3:-""} test_command="\"$got\" == \"$expected\"" test "$got" = "$expected" result $? "$test_name" @@ -132,7 +132,7 @@ is() { cmd_ok() { local test_command=$1 - local test_name=$2 + local test_name=${2:-""} eval $test_command result $? "$test_name" } From 96aabd299900f77645e21014af93dbfe54a9bf73 Mon Sep 17 00:00:00 2001 From: Brian Fraser Date: Tue, 3 Jan 2012 15:01:09 -0300 Subject: [PATCH 26/71] bin/pt-summary: Enable set -u --- bin/pt-summary | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bin/pt-summary b/bin/pt-summary index 9b7f62cf..1c8b16c9 100755 --- a/bin/pt-summary +++ b/bin/pt-summary @@ -53,8 +53,7 @@ fuzz () { # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### -# pt-summary isn't ready for this yet. -#set -u +set -u TMPDIR="" From d6b15f491691b9f49eb28728954b97a988cf0c4e Mon Sep 17 00:00:00 2001 From: Brian Fraser Date: Tue, 3 Jan 2012 15:10:16 -0300 Subject: [PATCH 27/71] bin/pt-mysql-summary: Enable set -u --- bin/pt-mysql-summary | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/bin/pt-mysql-summary b/bin/pt-mysql-summary index db01f10e..441fb3af 100755 --- a/bin/pt-mysql-summary +++ b/bin/pt-mysql-summary @@ -22,8 +22,7 @@ usage() { # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### -# pt-mysql-summary isn't ready for this yet. -# set -u +set -u TMPDIR="" @@ -244,6 +243,9 @@ fuzzy_pct () { # which should be in $TMPDIR/percona-toolkit. parse_mysqld_instances () { local file=$1 + local socket=${socket:-""} + local port=${port:-""} + local datadir=${datadir:-""} echo " Port Data Directory Socket" echo " ===== ========================== ======" $AP_GREP '/mysqld ' $file | while read line; do @@ -269,7 +271,7 @@ parse_mysqld_instances () { # interested in, in case there are multiple instances. find_my_cnf_file() { local file=$1 - local port=$2 + local port=${2:-""} if test -n "$port" && $AP_GREP -- "/mysqld.*--port=$port" $file >/dev/null 2>&1 ; then $AP_GREP -- "/mysqld.*--port=$port" $file \ | $AP_AWK 'BEGIN{RS=" "; FS="=";} $1 ~ /--defaults-file/ { print $2; }' \ From a9afea4506f7b0527a904131e7a5ebd6815455fe Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Wed, 11 Jan 2012 11:31:41 -0700 Subject: [PATCH 28/71] Quote vals in Bash libs. Add tests for log_warn_die.sh and alt_cmds.sh. Make --help test path independent. --- lib/bash/alt_cmds.sh | 2 +- lib/bash/daemon.sh | 12 +++++------ lib/bash/log_warn_die.sh | 8 +++---- lib/bash/parse_options.sh | 30 +++++++++++++------------- t/lib/bash/alt_cmds.sh | 15 +++++++++++++ t/lib/bash/log_warn_die.sh | 39 ++++++++++++++++++++++++++++++++++ t/lib/bash/parse_options.sh | 5 ++--- t/lib/samples/bash/help001.txt | 30 -------------------------- t/lib/samples/bash/seq1.txt | 5 +++++ 9 files changed, 87 insertions(+), 59 deletions(-) create mode 100644 t/lib/bash/alt_cmds.sh create mode 100644 t/lib/bash/log_warn_die.sh delete mode 100644 t/lib/samples/bash/help001.txt create mode 100644 t/lib/samples/bash/seq1.txt diff --git a/lib/bash/alt_cmds.sh b/lib/bash/alt_cmds.sh index 55a41e3a..c5a88f5e 100644 --- a/lib/bash/alt_cmds.sh +++ b/lib/bash/alt_cmds.sh @@ -25,7 +25,7 @@ set -u # seq N, return 1, ..., 5 _seq() { - local i=$1 + local i="$1" awk "BEGIN { for(i=1; i<=$i; i++) print i; }" } diff --git a/lib/bash/daemon.sh b/lib/bash/daemon.sh index 1fe823b8..9abe8b3e 100644 --- a/lib/bash/daemon.sh +++ b/lib/bash/daemon.sh @@ -30,8 +30,8 @@ set -u # file - File to write PID to. # pid - PID to write into file. make_pid_file() { - local file=$1 - local pid=$2 + local file="$1" + local pid="$2" # Yes there's a race condition here, between checking if the file exists # and creating it, but it's not important enough to handle. @@ -39,7 +39,7 @@ make_pid_file() { if [ -f "$file" ]; then # PID file already exists. See if the pid it contains is still running. # If yes, then die. Else, the pid file is stale and we can reclaim it. - local old_pid=$(cat $file) + local old_pid=$(cat "$file") if [ -z "$old_pid" ]; then # PID file is empty, so be safe and die since we can't check a # non-existent pid. @@ -56,13 +56,13 @@ make_pid_file() { fi # PID file doesn't exist, or it does but its pid is stale. - echo "$pid" > $file + echo "$pid" > "$file" } remove_pid_file() { - local file=$1 + local file="$1" if [ -f "$file" ]; then - rm $file + rm "$file" fi } diff --git a/lib/bash/log_warn_die.sh b/lib/bash/log_warn_die.sh index 6d5ca2e4..eaa7fed3 100644 --- a/lib/bash/log_warn_die.sh +++ b/lib/bash/log_warn_die.sh @@ -28,16 +28,16 @@ EXIT_STATUS=0 log() { TS=$(date +%F-%T | tr :- _); - echo "$TS $1" + echo "$TS $*" } warn() { - log "$1" >&2 - EXIT_STATUS=$((EXIT_STATUS | 1)) + log "$*" >&2 + EXIT_STATUS=1 } die() { - warn "$1" + warn "$*" exit 1 } diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index e4915247..4fde19c1 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -45,19 +45,19 @@ OPT_HELP="no" # If --help was specified # Optional Global Variables: # OPT_ERR - Command line option error message. usage() { - local file=$1 + local file="$1" - local usage=$(grep '^Usage: ' $file) + local usage=$(grep '^Usage: ' "$file") echo $usage >&2 echo >&2 echo "For more information, 'man $TOOL' or 'perldoc $file'." >&2 } usage_or_errors() { - local file=$1 + local file="$1" if [ "$OPT_VERSION" = "yes" ]; then - local version=$(grep '^pt-[^ ]\+ [0-9]' $file) + local version=$(grep '^pt-[^ ]\+ [0-9]' "$file") echo "$version" return 1 fi @@ -78,7 +78,7 @@ usage_or_errors() { if [ $OPT_ERRS -gt 0 ]; then echo >&2 - usage $file + usage "$file" return 1 fi @@ -100,7 +100,7 @@ usage_or_errors() { # option, removing the option's leading --, changing all - to _, and # prefixing with "OPT_". E.g. --foo-bar becomes OPT_FOO_BAR. parse_options() { - local file=$1 + local file="$1" shift # Parse the program options (po) from the POD. Each option has @@ -111,11 +111,11 @@ parse_options() { # default=foo # That's the spec for --string-opt2. Each line is a key:value pair # from the option's POD line like "type: string; default: foo". - mkdir $TMPDIR/po/ 2>/dev/null - rm -rf $TMPDIR/po/* + mkdir "$TMPDIR/po/" 2>/dev/null + rm -rf "$TMPDIR"/po/* ( export PO_DIR="$TMPDIR/po" - cat $file | perl -ne ' + cat "$file" | perl -ne ' BEGIN { $/ = ""; } next unless $_ =~ m/^=head1 OPTIONS/; while ( defined(my $para = <>) ) { @@ -149,13 +149,13 @@ parse_options() { # Evaluate the program options into existence as global variables # transformed like --my-op == $OPT_MY_OP. If an option has a default # value, it's assigned that value. Else, it's value is an empty string. - for opt_spec in $(ls $TMPDIR/po/); do + for opt_spec in $(ls "$TMPDIR/po/"); do local opt="" local default_val="" local neg=0 while read line; do - local key=`echo $line | cut -d ':' -f 1` - local val=`echo $line | cut -d ':' -f 2` + local key=$(echo $line | cut -d ':' -f 1) + local val=$(echo $line | cut -d ':' -f 2) case "$key" in long) opt=$(echo $val | sed 's/-/_/g' | tr [:lower:] [:upper:]) @@ -178,7 +178,7 @@ parse_options() { echo "Invalid attribute in $TMPDIR/po/$opt_spec: $line" >&2 exit 1 esac - done < $TMPDIR/po/$opt_spec + done < "$TMPDIR/po/$opt_spec" if [ -z "$opt" ]; then echo "No long attribute in option spec $TMPDIR/po/$opt_spec" >&2 @@ -243,7 +243,7 @@ parse_options() { if [ -f "$TMPDIR/po/$opt" ]; then spec="$TMPDIR/po/$opt" else - spec=$(grep "^short form:-$opt\$" $TMPDIR/po/* | cut -d ':' -f 1) + spec=$(grep "^short form:-$opt\$" "$TMPDIR"/po/* | cut -d ':' -f 1) if [ -z "$spec" ]; then OPT_ERRS=$(($OPT_ERRS + 1)) echo "Unknown option: $real_opt" >&2 @@ -255,7 +255,7 @@ parse_options() { # says it has a type, then it requires a value and that value should # be the next item ($1). Else, typeless options (like --version) are # either "yes" if specified, else "no" if negatable and --no-opt. - required_arg=$(cat $spec | grep '^type:' | cut -d':' -f2) + local required_arg=$(cat $spec | awk -F: '/^type:/{print $2}') if [ -n "$required_arg" ]; then if [ $# -eq 0 ]; then OPT_ERRS=$(($OPT_ERRS + 1)) diff --git a/t/lib/bash/alt_cmds.sh b/t/lib/bash/alt_cmds.sh new file mode 100644 index 00000000..5d674bc0 --- /dev/null +++ b/t/lib/bash/alt_cmds.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +TESTS=1 + +source "$LIB_DIR/alt_cmds.sh" + +_seq 5 > $TEST_TMPDIR/out +no_diff \ + $TEST_TMPDIR/out \ + $T_LIB_DIR/samples/bash/seq1.txt \ + "_seq 5" + +# ########################################################################### +# Done +# ########################################################################### diff --git a/t/lib/bash/log_warn_die.sh b/t/lib/bash/log_warn_die.sh new file mode 100644 index 00000000..c3f4ed74 --- /dev/null +++ b/t/lib/bash/log_warn_die.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +TESTS=6 + +source "$LIB_DIR/log_warn_die.sh" + +log "Hello world!" > $TEST_TMPDIR/log +cmd_ok \ + "grep -q 'Hello world!' $TEST_TMPDIR/log" \ + "log msg" + +log "Hello" "world!" > $TEST_TMPDIR/log +cmd_ok \ + "grep -q 'Hello world!' $TEST_TMPDIR/log" \ + "log msg msg" + +is \ + "$EXIT_STATUS" \ + "0" \ + "Exit status 0" + +warn "Hello world!" 2> $TEST_TMPDIR/log +cmd_ok \ + "grep -q 'Hello world!' $TEST_TMPDIR/log" \ + "warn msg" + +warn "Hello" "world!" 2> $TEST_TMPDIR/log +cmd_ok \ + "grep -q 'Hello world!' $TEST_TMPDIR/log" \ + "warn msg msg" + +is \ + "$EXIT_STATUS" \ + "1" \ + "Exit status 1" + +# ########################################################################### +# Done +# ########################################################################### diff --git a/t/lib/bash/parse_options.sh b/t/lib/bash/parse_options.sh index 176c824c..482bc46d 100644 --- a/t/lib/bash/parse_options.sh +++ b/t/lib/bash/parse_options.sh @@ -77,9 +77,8 @@ is "$err" "1" "Non-zero exit on unknown option" # ########################################################################### parse_options "$T_LIB_DIR/samples/bash/po001.sh" --help usage_or_errors "$T_LIB_DIR/samples/bash/po001.sh" >$TMPFILE 2>&1 -no_diff \ - "$TMPFILE" \ - "$T_LIB_DIR/samples/bash/help001.txt" \ +cmd_ok \ + "grep -q \"For more information, 'man pt-stalk' or 'perldoc\" $TMPFILE" \ "--help" # ############################################################################ diff --git a/t/lib/samples/bash/help001.txt b/t/lib/samples/bash/help001.txt deleted file mode 100644 index 65dc71b2..00000000 --- a/t/lib/samples/bash/help001.txt +++ /dev/null @@ -1,30 +0,0 @@ -Usage: pt-stalk [OPTIONS] [-- MYSQL_OPTIONS] - -For more information, 'man pt-stalk' or 'perldoc /Users/daniel/p/bash-tool-libs/t/lib/samples/bash/po001.sh'. - -Command line options: - ---help - Print help and exit. - ---int-opt - Int option without a default. - ---int-opt2 - Int option with a default. - ---noption - Negatable option. - ---string-opt - String option without a default. - ---string-opt2 - String option with a default. - ---typeless-option - Just an option. - ---version - Print tool's version and exit. - diff --git a/t/lib/samples/bash/seq1.txt b/t/lib/samples/bash/seq1.txt new file mode 100644 index 00000000..8a1218a1 --- /dev/null +++ b/t/lib/samples/bash/seq1.txt @@ -0,0 +1,5 @@ +1 +2 +3 +4 +5 From cfce6e6eb2478ae628f47dcfd4de096aba349ba4 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Fri, 13 Jan 2012 10:02:17 -0700 Subject: [PATCH 29/71] More quoting and tweaking re Baron's review. --- lib/bash/collect.sh | 8 ++++---- lib/bash/safeguards.sh | 18 +++++++++--------- t/lib/samples/bash/collect001.txt | 1 + 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/lib/bash/collect.sh b/lib/bash/collect.sh index 9acf2a0c..40d3e194 100644 --- a/lib/bash/collect.sh +++ b/lib/bash/collect.sh @@ -37,8 +37,8 @@ CMD_TCPDUMP=${CMD_TCPDUMP:-"tcpdump"} CMD_VMSTAT=${CMD_VMSTAT:-"vmstat"} collect() { - local d=$1 # directory to save results in - local p=$2 # prefix for each result file + local d="$1" # directory to save results in + local p="$2" # prefix for each result file # Get pidof mysqld; pidof doesn't exist on some systems. We try our best... local mysqld_pid=$(pidof -s mysqld); @@ -50,7 +50,7 @@ collect() { fi # Get memory allocation info before anything else. - if [ -x "$CMD_PMAP" -a "$mysqld_pid" ]; then + if [ "$mysqld_pid" ]; then if $CMD_PMAP --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then $CMD_PMAP -x $mysqld_pid > "$d/$p-pmap" else @@ -160,7 +160,7 @@ collect() { local have_lock_waits_table=0 $CMD_MYSQL $EXT_ARGV -e "SHOW TABLES FROM INFORMATION_SCHEMA" \ - | grep -qi "INNODB_LOCK_WAITS" + | grep -i "INNODB_LOCK_WAITS" >/dev/null 2>&1 if [ $? -eq 0 ]; then have_lock_waits_table=1 fi diff --git a/lib/bash/safeguards.sh b/lib/bash/safeguards.sh index 1e6df342..97db3842 100644 --- a/lib/bash/safeguards.sh +++ b/lib/bash/safeguards.sh @@ -24,10 +24,10 @@ set -u disk_space() { - local filesystem=${1:-"$PWD"} + local filesystem="${1:-$PWD}" # Filesystem 1024-blocks Used Available Capacity Mounted on # /dev/disk0s2 118153176 94409664 23487512 81% / - df -P -k $filesystem + df -P -k "$filesystem" } # Sub: check_disk_space @@ -44,18 +44,18 @@ disk_space() { # Returns: # 0 if there is/will be enough disk space, else 1. check_disk_space() { - local file=$1 - local mb=${2:-"0"} - local pc=${3:-"0"} - local mb_margin=${4:-"0"} + local file="$1" + local mb="${2:-0}" + local pc="${3:-0}" + local mb_margin="${4:-0}" # Convert MB to KB because the df output should be in 1k blocks. local kb=$(($mb * 1024)) local kb_margin=$(($mb_margin * 1024)) - local kb_used=$(cat $file | awk '/^\//{print $3}'); - local kb_free=$(cat $file | awk '/^\//{print $4}'); - local pc_used=$(cat $file | awk '/^\//{print $5}' | sed -e 's/%//g'); + local kb_used=$(cat "$file" | awk '/^\//{print $3}'); + local kb_free=$(cat "$file" | awk '/^\//{print $4}'); + local pc_used=$(cat "$file" | awk '/^\//{print $5}' | sed -e 's/%//g'); if [ "$kb_margin" -gt "0" ]; then local kb_total=$(($kb_used + $kb_free)) diff --git a/t/lib/samples/bash/collect001.txt b/t/lib/samples/bash/collect001.txt index f76b8ecb..8c27bdc1 100644 --- a/t/lib/samples/bash/collect001.txt +++ b/t/lib/samples/bash/collect001.txt @@ -20,6 +20,7 @@ 2011_12_05-opentables1 2011_12_05-opentables2 2011_12_05-output +2011_12_05-pmap 2011_12_05-processlist 2011_12_05-procstat 2011_12_05-procvmstat From 894c492c6d43a1db6183a5ac5f82629ee7cfc774 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Fri, 13 Jan 2012 10:23:45 -0700 Subject: [PATCH 30/71] Update Bash libs. Quote stuff in tool's code. Remove flock. --- bin/pt-stalk | 151 +++++++++++++++++++++++++++------------------------ 1 file changed, 79 insertions(+), 72 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index fc93ba23..1b446954 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -22,16 +22,16 @@ EXIT_STATUS=0 log() { TS=$(date +%F-%T | tr :- _); - echo "$TS $1" + echo "$TS $*" } warn() { - log "$1" >&2 - EXIT_STATUS=$((EXIT_STATUS | 1)) + log "$*" >&2 + EXIT_STATUS=1 } die() { - warn "$1" + warn "$*" exit 1 } @@ -58,19 +58,19 @@ OPT_VERSION="no" # If --version was specified OPT_HELP="no" # If --help was specified usage() { - local file=$1 + local file="$1" - local usage=$(grep '^Usage: ' $file) + local usage=$(grep '^Usage: ' "$file") echo $usage >&2 echo >&2 echo "For more information, 'man $TOOL' or 'perldoc $file'." >&2 } usage_or_errors() { - local file=$1 + local file="$1" if [ "$OPT_VERSION" = "yes" ]; then - local version=$(grep '^pt-[^ ]\+ [0-9]' $file) + local version=$(grep '^pt-[^ ]\+ [0-9]' "$file") echo "$version" return 1 fi @@ -91,7 +91,7 @@ usage_or_errors() { if [ $OPT_ERRS -gt 0 ]; then echo >&2 - usage $file + usage "$file" return 1 fi @@ -99,14 +99,14 @@ usage_or_errors() { } parse_options() { - local file=$1 + local file="$1" shift - mkdir $TMPDIR/po/ 2>/dev/null - rm -rf $TMPDIR/po/* + mkdir "$TMPDIR/po/" 2>/dev/null + rm -rf "$TMPDIR"/po/* ( export PO_DIR="$TMPDIR/po" - cat $file | perl -ne ' + cat "$file" | perl -ne ' BEGIN { $/ = ""; } next unless $_ =~ m/^=head1 OPTIONS/; while ( defined(my $para = <>) ) { @@ -137,13 +137,13 @@ parse_options() { ' ) - for opt_spec in $(ls $TMPDIR/po/); do + for opt_spec in $(ls "$TMPDIR/po/"); do local opt="" local default_val="" local neg=0 while read line; do - local key=`echo $line | cut -d ':' -f 1` - local val=`echo $line | cut -d ':' -f 2` + local key=$(echo $line | cut -d ':' -f 1) + local val=$(echo $line | cut -d ':' -f 2) case "$key" in long) opt=$(echo $val | sed 's/-/_/g' | tr [:lower:] [:upper:]) @@ -166,7 +166,7 @@ parse_options() { echo "Invalid attribute in $TMPDIR/po/$opt_spec: $line" >&2 exit 1 esac - done < $TMPDIR/po/$opt_spec + done < "$TMPDIR/po/$opt_spec" if [ -z "$opt" ]; then echo "No long attribute in option spec $TMPDIR/po/$opt_spec" >&2 @@ -216,7 +216,7 @@ parse_options() { if [ -f "$TMPDIR/po/$opt" ]; then spec="$TMPDIR/po/$opt" else - spec=$(grep "^short form:-$opt\$" $TMPDIR/po/* | cut -d ':' -f 1) + spec=$(grep "^short form:-$opt\$" "$TMPDIR"/po/* | cut -d ':' -f 1) if [ -z "$spec" ]; then OPT_ERRS=$(($OPT_ERRS + 1)) echo "Unknown option: $real_opt" >&2 @@ -224,7 +224,7 @@ parse_options() { fi fi - required_arg=$(cat $spec | grep '^type:' | cut -d':' -f2) + local required_arg=$(cat $spec | awk -F: '/^type:/{print $2}') if [ -n "$required_arg" ]; then if [ $# -eq 0 ]; then OPT_ERRS=$(($OPT_ERRS + 1)) @@ -306,7 +306,7 @@ rm_tmpdir() { set -u _seq() { - local i=$1 + local i="$1" awk "BEGIN { for(i=1; i<=$i; i++) print i; }" } @@ -327,22 +327,22 @@ _seq() { set -u disk_space() { - local filesystem=${1:-"$PWD"} - df -P -k $filesystem + local filesystem="${1:-$PWD}" + df -P -k "$filesystem" } check_disk_space() { - local file=$1 - local mb=${2:-"0"} - local pc=${3:-"0"} - local mb_margin=${4:-"0"} + local file="$1" + local mb="${2:-0}" + local pc="${3:-0}" + local mb_margin="${4:-0}" local kb=$(($mb * 1024)) local kb_margin=$(($mb_margin * 1024)) - local kb_used=$(cat $file | awk '/^\//{print $3}'); - local kb_free=$(cat $file | awk '/^\//{print $4}'); - local pc_used=$(cat $file | awk '/^\//{print $5}' | sed -e 's/%//g'); + local kb_used=$(cat "$file" | awk '/^\//{print $3}'); + local kb_free=$(cat "$file" | awk '/^\//{print $4}'); + local pc_used=$(cat "$file" | awk '/^\//{print $5}' | sed -e 's/%//g'); if [ "$kb_margin" -gt "0" ]; then local kb_total=$(($kb_used + $kb_free)) @@ -379,12 +379,12 @@ check_disk_space() { set -u make_pid_file() { - local file=$1 - local pid=$2 + local file="$1" + local pid="$2" if [ -f "$file" ]; then - local old_pid=$(cat $file) + local old_pid=$(cat "$file") if [ -z "$old_pid" ]; then die "PID file $file already exists but it is empty" else @@ -398,13 +398,13 @@ make_pid_file() { fi fi - echo "$pid" > $file + echo "$pid" > "$file" } remove_pid_file() { - local file=$1 + local file="$1" if [ -f "$file" ]; then - rm $file + rm "$file" fi } @@ -437,8 +437,8 @@ CMD_TCPDUMP=${CMD_TCPDUMP:-"tcpdump"} CMD_VMSTAT=${CMD_VMSTAT:-"vmstat"} collect() { - local d=$1 # directory to save results in - local p=$2 # prefix for each result file + local d="$1" # directory to save results in + local p="$2" # prefix for each result file local mysqld_pid=$(pidof -s mysqld); if [ -z "$mysqld_pid" ]; then @@ -448,7 +448,7 @@ collect() { mysqld_pid=$(ps -eaf | grep 'mysql[d]' | grep -v mysqld_safe | awk '{print $2}' | head -n1); fi - if [ -x "$CMD_PMAP" -a "$mysqld_pid" ]; then + if [ "$mysqld_pid" ]; then if $CMD_PMAP --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then $CMD_PMAP -x $mysqld_pid > "$d/$p-pmap" else @@ -532,7 +532,7 @@ collect() { local have_lock_waits_table=0 $CMD_MYSQL $EXT_ARGV -e "SHOW TABLES FROM INFORMATION_SCHEMA" \ - | grep -qi "INNODB_LOCK_WAITS" + | grep -i "INNODB_LOCK_WAITS" >/dev/null 2>&1 if [ $? -eq 0 ]; then have_lock_waits_table=1 fi @@ -658,7 +658,7 @@ lock_waits() { # Global variables # ########################################################################### EXIT_REASON="" -TOOL=`basename $0` +TOOL=$(basename $0) OKTORUN=1 ITER=1 @@ -667,11 +667,11 @@ ITER=1 # ########################################################################### grep_processlist() { - local file=$1 - local col=$2 - local pat=${3:-""} - local gt=${4:-0} - local quiet=${5:-0} + local file="$1" + local col="$2" + local pat="${3:-""}" + local gt="${4:-0}" + local quiet="${5:-0}" awk " BEGIN { @@ -720,12 +720,14 @@ set_trg_func() { } trg_status() { - local var=$1 - mysqladmin $EXT_ARGV extended-status | grep "$OPT_VARIABLE " | awk '{print $4}' + local var="$1" + mysqladmin $EXT_ARGV extended-status \ + | grep "$OPT_VARIABLE " \ + | awk '{print $4}' } trg_processlist() { - local var=$1 + local var="$1" local tmpfile="$TMPDIR/processlist" mysqladmin $EXT_ARGV processlist > $tmpfile-1 grep_processlist $tmpfile-1 $var $OPT_MATCH 0 0 > $tmpfile-2 @@ -754,8 +756,8 @@ oktorun() { } sleep_ok() { - local seconds=$1 - local msg=${2:-""} + local seconds="$1" + local msg="${2:-""}" if oktorun; then if [ -n "$msg" ]; then log "$msg" @@ -765,11 +767,18 @@ sleep_ok() { } purge_samples() { + local dir="$1" + local retention_time="$2" + # Delete collect files which more than --retention-time days old. - find "$OPT_DEST" -type f -mtime +$OPT_RETENTION_TIME -exec rm -f '{}' \; - if [ -d "/var/lib/oprofile/samples" ]; then - find "/var/lib/oprofile/samples" -type d -name 'pt_collect_*' \ - -depth -mtime +$OPT_RETENTION_TIME -exec rm -f '{}' \; + find "$dir" -type f -mtime +$retention_time -exec rm -f '{}' \; + + local oprofile_dir="/var/lib/oprofile/samples" + if [ -d "$oprofile_dir" ]; then + # "pt_collect_" here needs to match $CMD_OPCONTROL --save=pt_collect_$p + # in collect(). TODO: fix this + find "$oprofile_dir" -type d -name 'pt_collect_*' \ + -depth -mtime +$retention_time -exec rm -f '{}' \; fi } @@ -815,20 +824,20 @@ stalk() { # ################################################################## # Start collecting, maybe. # ################################################################## - local prefix=${OPT_PREFIX:-"$(date +%F-%T | tr :- _)"} + local prefix="${OPT_PREFIX:-$(date +%F-%T | tr :- _)}" log "Collect triggered" # Check if we'll have enough disk space to collect. Disk space # is also checked every interval while collecting. local margin="20" # default 20M margin, unless: if [ -n "$last_prefix" ]; then - margin=$(du -mc $OPT_DEST/$last_prefix-* | tail -n 1 | awk '{print $1'}) + margin=$(du -mc "$OPT_DEST"/"$last_prefix"-* | tail -n 1 | awk '{print $1'}) fi - disk_space $OPT_DEST > $OPT_DEST/$prefix-disk-space - check_disk_space \ - $OPT_DEST/$prefix-disk-space \ - "$OPT_DISK_BYTE_LIMIT" \ - "$OPT_DISK_PCT_LIMIT" \ + disk_space "$OPT_DEST" > "$OPT_DEST/$prefix-disk-space" + check_disk_space \ + "$OPT_DEST/$prefix-disk-space" \ + "$OPT_DISK_BYTE_LIMIT" \ + "$OPT_DISK_PCT_LIMIT" \ "$margin" # real used MB + margin MB if [ $? -eq 0 ]; then # There should be enough disk space, so collect. @@ -847,10 +856,8 @@ stalk() { # while its collecting (hopefully --sleep is longer than # --run-time). ( - flock 200 - collect $OPT_DEST $prefix - ) 200>/tmp/percona-toolkit-collect-lockfile \ - >> "$OPT_DEST/$prefix-output" 2>&1 & + collect "$OPT_DEST" "$prefix" + ) >> "$OPT_DEST/$prefix-output" 2>&1 & else # There will not be enough disk space, so do not collect. warn "Collect canceled because there will not be enough disk space after collecting another $margin MB" @@ -867,7 +874,7 @@ stalk() { fi # Purge old collect file between checks. - purge_samples + purge_samples "$OPT_DEST" "$OPT_RETENTION_TIME" done } @@ -927,8 +934,8 @@ if [ "$(basename "$0")" = "pt-stalk" ] \ # Parse command line options. We must do this first so we can # see if --daemonize was specified. mk_tmpdir - parse_options $0 "$@" - usage_or_errors $0 + parse_options "$0" "$@" + usage_or_errors "$0" po_status=$? rm_tmpdir if [ $po_status -ne 0 ]; then @@ -947,17 +954,17 @@ if [ "$(basename "$0")" = "pt-stalk" ] \ # the PID in the PID file to check or kill the child # process. So we'll need to update the PID file with # the child's PID. - make_pid_file $OPT_PID $$ + make_pid_file "$OPT_PID" $$ - main "$@" >$OPT_LOG 2>&1 & + main "$@" >"$OPT_LOG" 2>&1 & # Update PID file with the child's PID. # The child PID is $BASHPID but that special var is only # in Bash 4+, so we can't rely on it. Consequently, we # use $! to get the PID of the child we just forked. - echo "$!" > $OPT_PID + echo "$!" > "$OPT_PID" else - make_pid_file $OPT_PID $$ + make_pid_file "$OPT_PID" $$ main "$@" fi fi From b2bac5c7654f15d845dc42fb2be45f41144d6b3c Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 17 Jan 2012 12:15:49 -0700 Subject: [PATCH 31/71] Quote in tmpdir.sh and fix typo in doc. --- bin/pt-stalk | 6 +++--- lib/bash/tmpdir.sh | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 1b446954..3de794d7 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -267,7 +267,7 @@ set -u TMPDIR="" mk_tmpdir() { - local dir=${1:-""} + local dir="${1:-""}" if [ -n "$dir" ]; then if [ ! -d "$dir" ]; then @@ -284,7 +284,7 @@ mk_tmpdir() { rm_tmpdir() { if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then - rm -rf $TMPDIR + rm -rf "$TMPDIR" fi TMPDIR="" } @@ -1176,7 +1176,7 @@ Print all output to this file when daemonized. type: string -Match pattern for C L<"--function">. +Match pattern for C L<"--function">. =item --notify-by-email diff --git a/lib/bash/tmpdir.sh b/lib/bash/tmpdir.sh index d1b9e1b7..fdd6c8f3 100644 --- a/lib/bash/tmpdir.sh +++ b/lib/bash/tmpdir.sh @@ -35,7 +35,7 @@ TMPDIR="" # Set Global Variables: # TMPDIR - Absolute path of secure temp directory. mk_tmpdir() { - local dir=${1:-""} + local dir="${1:-""}" if [ -n "$dir" ]; then if [ ! -d "$dir" ]; then @@ -60,7 +60,7 @@ mk_tmpdir() { # TMPDIR - Set to "". rm_tmpdir() { if [ -n "$TMPDIR" ] && [ -d "$TMPDIR" ]; then - rm -rf $TMPDIR + rm -rf "$TMPDIR" fi TMPDIR="" } From 6e2b670af66dce1dc4afeb3448994858e5ecabe5 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 17 Jan 2012 13:12:08 -0700 Subject: [PATCH 32/71] Die if --log, --pid, or --dest aren't accessible. Also die if po dir isn't accessible. --- bin/pt-stalk | 49 +++++++++++++++++++++++++++++++++------ lib/bash/daemon.sh | 3 +++ lib/bash/parse_options.sh | 14 ++++++++++- t/lib/bash/daemon.sh | 18 +++++++++++++- t/pt-stalk/pt-stalk.t | 2 +- 5 files changed, 76 insertions(+), 10 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 3de794d7..312858bd 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -102,8 +102,20 @@ parse_options() { local file="$1" shift - mkdir "$TMPDIR/po/" 2>/dev/null + if [ ! -d "$TMPDIR/po/" ]; then + mkdir "$TMPDIR/po/" + if [ $? -ne 0 ]; then + echo "Cannot mkdir $TMPDIR/po/" >&2 + exit 1 + fi + fi + rm -rf "$TMPDIR"/po/* + if [ $? -ne 0 ]; then + echo "Cannot rm -rf $TMPDIR/po/*" >&2 + exit 1 + fi + ( export PO_DIR="$TMPDIR/po" cat "$file" | perl -ne ' @@ -399,6 +411,9 @@ make_pid_file() { fi echo "$pid" > "$file" + if [ $? -ne 0 ]; then + die "Cannot create or write PID file $file" + fi } remove_pid_file() { @@ -890,19 +905,30 @@ main() { # we don't know our own PID. See the usage of $! below. log "$0 started" - # Make a secure tmpdir. - mk_tmpdir - # Make the collection dir exists. - mkdir -p "$OPT_DEST" || die "Can't make the destination directory" - test -d "$OPT_DEST" || die "$OPT_DEST isn't a directory" - test -w "$OPT_DEST" || die "$OPT_DEST isn't writable" + if [ ! -d "$OPT_DEST" ]; then + mkdir -p "$OPT_DEST" || die "Cannot make --dest $OPT_DEST" + fi + # Check access to the --dest dir. By setting -x in the subshell, + # if either command fails, the subshell will exit immediately and + # $? will be non-zero. + ( + set -e + touch "$OPT_DEST/test" + rm "$OPT_DEST/test" + ) + if [ $? -ne 0 ]; then + die "Cannot read and write files to --dest $OPT_DEST" + fi # Test if we have root; warn if not, but it isn't critical. if [ "$(id -u)" != "0" ]; then log 'Not running with root privileges!'; fi + # Make a secure tmpdir. + mk_tmpdir + # Set TRIGGER_FUNCTION based on --function. set_trg_func @@ -948,6 +974,15 @@ if [ "$(basename "$0")" = "pt-stalk" ] \ || die "Cannot connect to MySQL. Check that MySQL is running and that the options after -- are correct." if [ "$OPT_DAEMONIZE" = "yes" ]; then + # Check access to the --log file. + ( + set -e + touch "$OPT_LOG" + ) + if [ $? -ne 0 ]; then + die "Cannot write to --log $OPT_LOG" + fi + # The PID file will at first have our (parent) PID. # This is fine for ensuring that only one of us is # running, but it's not fine if the user wants to use diff --git a/lib/bash/daemon.sh b/lib/bash/daemon.sh index 9abe8b3e..663cb8b3 100644 --- a/lib/bash/daemon.sh +++ b/lib/bash/daemon.sh @@ -57,6 +57,9 @@ make_pid_file() { # PID file doesn't exist, or it does but its pid is stale. echo "$pid" > "$file" + if [ $? -ne 0 ]; then + die "Cannot create or write PID file $file" + fi } remove_pid_file() { diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index 4fde19c1..21d580d2 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -111,8 +111,20 @@ parse_options() { # default=foo # That's the spec for --string-opt2. Each line is a key:value pair # from the option's POD line like "type: string; default: foo". - mkdir "$TMPDIR/po/" 2>/dev/null + if [ ! -d "$TMPDIR/po/" ]; then + mkdir "$TMPDIR/po/" + if [ $? -ne 0 ]; then + echo "Cannot mkdir $TMPDIR/po/" >&2 + exit 1 + fi + fi + rm -rf "$TMPDIR"/po/* + if [ $? -ne 0 ]; then + echo "Cannot rm -rf $TMPDIR/po/*" >&2 + exit 1 + fi + ( export PO_DIR="$TMPDIR/po" cat "$file" | perl -ne ' diff --git a/t/lib/bash/daemon.sh b/t/lib/bash/daemon.sh index 76ec4d0b..0ffad457 100644 --- a/t/lib/bash/daemon.sh +++ b/t/lib/bash/daemon.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -TESTS=7 +TESTS=9 TMPDIR="$TEST_TMPDIR" local file="$TMPDIR/pid-file" @@ -60,6 +60,22 @@ is \ rm $file rm $TMPDIR/output +# ########################################################################### +# Die if pid file can't be created. +# ########################################################################### +( + make_pid_file "/root/pid" $$ >$TMPDIR/output 2>&1 +) + +is \ + "$?" \ + "1" \ + "Exit 1 if PID file can't be created" + +cmd_ok \ + "grep -q 'Cannot create or write PID file /root/pid' $TMPDIR/output" \ + "Error that PID file can't be created" + # ########################################################################### # Done. # ########################################################################### diff --git a/t/pt-stalk/pt-stalk.t b/t/pt-stalk/pt-stalk.t index eb57d5c5..a8e7da87 100644 --- a/t/pt-stalk/pt-stalk.t +++ b/t/pt-stalk/pt-stalk.t @@ -136,7 +136,7 @@ diag(`rm $dest/* 2>/dev/null`); my (undef, $uptime) = $dbh->selectrow_array("SHOW STATUS LIKE 'Uptime'"); my $threshold = $uptime + 2; -$retval = system("$trunk/bin/pt-stalk --iterations 1 --dest $dest --variable Uptime --threshold $threshold --cycles 2 --run-time 2 -- --defaults-file=$cnf >$log_file 2>&1"); +$retval = system("$trunk/bin/pt-stalk --iterations 1 --dest $dest --variable Uptime --threshold $threshold --cycles 2 --run-time 2 --pid $pid_file -- --defaults-file=$cnf >$log_file 2>&1"); sleep 3; From 2412e01aa0caba0c883bd05e3152c3c86bb4aba2 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 19 Jan 2012 10:46:48 -0700 Subject: [PATCH 33/71] Use which to get programs. Don't create file unless the program or file exists. --- lib/bash/collect.sh | 98 ++++++++++++++++++------------- t/lib/bash/collect.sh | 23 ++++++-- t/lib/samples/bash/collect001.txt | 34 ----------- 3 files changed, 75 insertions(+), 80 deletions(-) delete mode 100644 t/lib/samples/bash/collect001.txt diff --git a/lib/bash/collect.sh b/lib/bash/collect.sh index 40d3e194..635de955 100644 --- a/lib/bash/collect.sh +++ b/lib/bash/collect.sh @@ -1,4 +1,4 @@ -# This program is copyright 2011 Percona Inc. +# This program is copyright 2011-2012 Percona Inc. # Feedback and improvements are welcome. # # THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED @@ -24,17 +24,17 @@ set -u # Global variables. -CMD_GDB=${CMD_GDB:-"gdb"} -CMD_IOSTAT=${CMD_IOSTAT:-"iostat"} -CMD_MPSTAT=${CMD_MPSTAT:-"mpstat"} -CMD_MYSQL=${CMD_MSSQL:-"mysql"} -CMD_MYSQLADMIN=${CMD_MYSQL_ADMIN:-"mysqladmin"} -CMD_OPCONTROL=${CMD_OPCONTROL:-"opcontrol"} -CMD_OPREPORT=${CMD_OPREPORT:-"opreport"} -CMD_PMAP=${CMD_PMAP:-"pmap"} -CMD_STRACE=${CMD_STRACE:-"strace"} -CMD_TCPDUMP=${CMD_TCPDUMP:-"tcpdump"} -CMD_VMSTAT=${CMD_VMSTAT:-"vmstat"} +CMD_GDB="$(which gdb)" +CMD_IOSTAT="$(which iostat)" +CMD_MPSTAT="$(which mpstat)" +CMD_MYSQL="$(which mysql)" +CMD_MYSQLADMIN="$(which mysqladmin)" +CMD_OPCONTROL="$(which opcontrol)" +CMD_OPREPORT="$(which opreport)" +CMD_PMAP="$(which pmap)" +CMD_STRACE="$(which strace)" +CMD_TCPDUMP="$(which tcpdump)" +CMD_VMSTAT="$(which vmstat)" collect() { local d="$1" # directory to save results in @@ -50,7 +50,7 @@ collect() { fi # Get memory allocation info before anything else. - if [ "$mysqld_pid" ]; then + if [ "$CMD_PMAP" -a "$mysqld_pid" ]; then if $CMD_PMAP --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then $CMD_PMAP -x $mysqld_pid > "$d/$p-pmap" else @@ -60,15 +60,13 @@ collect() { fi # Getting a GDB stacktrace can be an intensive operation, - # so do this only if necessary. - if [ "$OPT_COLLECT_GDB" = "yes" -a "$mysqld_pid" ]; then + # so do this only if necessary (and possible). + if [ "$CMD_GDB" -a "$OPT_COLLECT_GDB" = "yes" -a "$mysqld_pid" ]; then $CMD_GDB \ -ex "set pagination 0" \ -ex "thread apply all bt" \ --batch -p $mysqld_pid \ >> "$d/$p-stacktrace" - else - echo "GDB (--collect-gdb) was not enabled" >> "$d/$p-stacktrace" fi # Get MySQL's variables if possible. Then sleep long enough that we probably @@ -114,7 +112,7 @@ collect() { # If TCP dumping is specified, start that on the server's port. local tcpdump_pid="" - if [ "$OPT_COLLECT_TCPDUMP" = "yes" ]; then + if [ "$CMD_TCPDUMP" -a "$OPT_COLLECT_TCPDUMP" = "yes" ]; then local port=$(awk '/^port/{print $2}' "$d/$p-variables") if [ "$port" ]; then $CMD_TCPDUMP -i any -s 4096 -w "$d/$p-tcpdump" port ${port} & @@ -125,12 +123,12 @@ collect() { # Next, start oprofile gathering data during the whole rest of this process. # The --init should be a no-op if it has already been init-ed. local have_oprofile="no" - if [ "$OPT_COLLECT_OPROFILE" = "yes" ]; then + if [ "$CMD_OPCONTROL" -a "$OPT_COLLECT_OPROFILE" = "yes" ]; then if $CMD_OPCONTROL --init; then $CMD_OPCONTROL --start --no-vmlinux have_oprofile="yes" fi - elif [ "$OPT_COLLECT_STRACE" = "yes" ]; then + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" = "yes" ]; then # Don't run oprofile and strace at the same time. $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" 2>&1 & local strace_pid=$! @@ -138,16 +136,22 @@ collect() { # Grab a few general things first. Background all of these so we can start # them all up as quickly as possible. - ps -eaf >> "$d/$p-ps" 2>&1 & - sysctl -a >> "$d/$p-sysctl" 2>&1 & - top -bn1 >> "$d/$p-top" 2>&1 & - $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" 2>&1 & - $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" 2>&1 & - $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" 2>&1 & - $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" 2>&1 & - $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" 2>&1 & - $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" 2>&1 & - lsof -nP -p $mysqld_pid -bw >> "$d/$p-lsof" 2>&1 & + ps -eaf >> "$d/$p-ps" 2>&1 & + sysctl -a >> "$d/$p-sysctl" 2>&1 & + top -bn1 >> "$d/$p-top" 2>&1 & + lsof -nP -p $mysqld_pid -bw >> "$d/$p-lsof" 2>&1 & + if [ "$CMD_VMSTAT" ]; then + $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" 2>&1 & + $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" 2>&1 & + fi + if [ "$CMD_IOSTAT" ]; then + $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" 2>&1 & + $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" 2>&1 & + fi + if [ "$CMD_MPSTAT" ]; then + $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" 2>&1 & + $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" 2>&1 & + fi # Collect multiple snapshots of the status variables. We use # mysqladmin -c even though it is buggy and won't stop on its @@ -183,15 +187,29 @@ collect() { local ts="$(date +"TS %s.%N %F %T")" # Collect the stuff for this cycle - (cat /proc/diskstats 2>&1; echo $ts) >> "$d/$p-diskstats" & - (cat /proc/stat 2>&1; echo $ts) >> "$d/$p-procstat" & - (cat /proc/vmstat 2>&1; echo $ts) >> "$d/$p-procvmstat" & - (cat /proc/meminfo 2>&1; echo $ts) >> "$d/$p-meminfo" & - (cat /proc/slabinfo 2>&1; echo $ts) >> "$d/$p-slabinfo" & - (cat /proc/interrupts 2>&1; echo $ts) >> "$d/$p-interrupts" & - (df -h 2>&1; echo $ts) >> "$d/$p-df" & - (netstat -antp 2>&1; echo $ts) >> "$d/$p-netstat" & - (netstat -s 2>&1; echo $ts) >> "$d/$p-netstat_s" & + if [ -d "/proc" ]; then + if [ -f "/proc/diskstats" ]; then + (cat /proc/diskstats 2>&1; echo $ts) >> "$d/$p-diskstats" & + fi + if [ -f "/proc/stat" ]; then + (cat /proc/stat 2>&1; echo $ts) >> "$d/$p-procstat" & + fi + if [ -f "/proc/vmstat" ]; then + (cat /proc/vmstat 2>&1; echo $ts) >> "$d/$p-procvmstat" & + fi + if [ -f "/proc/meminfo" ]; then + (cat /proc/meminfo 2>&1; echo $ts) >> "$d/$p-meminfo" & + fi + if [ -f "/proc/slabinfo" ]; then + (cat /proc/slabinfo 2>&1; echo $ts) >> "$d/$p-slabinfo" & + fi + if [ -f "/proc/interrupts" ]; then + (cat /proc/interrupts 2>&1; echo $ts) >> "$d/$p-interrupts" & + fi + fi + (df -h 2>&1; echo $ts) >> "$d/$p-df" & + (netstat -antp 2>&1; echo $ts) >> "$d/$p-netstat" & + (netstat -s 2>&1; echo $ts) >> "$d/$p-netstat_s" & ($CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G" 2>&1; echo $ts) \ >> "$d/$p-processlist" @@ -226,7 +244,7 @@ collect() { "/path/to/mysqld'" \ > "$d/$p-opreport" fi - elif [ "$OPT_COLLECT_STRACE" = "yes" ]; then + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" = "yes" ]; then kill -s 2 $strace_pid sleep 1 kill -s 15 $strace_pid diff --git a/t/lib/bash/collect.sh b/t/lib/bash/collect.sh index 9d9249fa..ed622b9b 100644 --- a/t/lib/bash/collect.sh +++ b/t/lib/bash/collect.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -TESTS=18 +TESTS=19 TMPFILE="$TEST_TMPDIR/parse-opts-output" TMPDIR="$TEST_TMPDIR" @@ -23,12 +23,23 @@ local p="$TMPDIR/collect/2011_12_05" collect "$TMPDIR/collect" "2011_12_05" > $p-output 2>&1 # Even if this system doesn't have all the cmds, collect should still -# create all the default files. +# have created some files for cmds that (hopefully) all systems have. ls -1 $TMPDIR/collect | sort > $TMPDIR/collect-files -no_diff \ - $TMPDIR/collect-files \ - $T_LIB_DIR/samples/bash/collect001.txt \ - "Default collect files" + +# If this system has /proc, then some files should be collected. +# Else, those files should not exist. +if [ -f /proc/diskstats ]; then + cmd_ok \ + "grep '[0-9]' $TMPDIR/collect/2011_12_05-diskstats" \ + "/proc/diskstats" +else + test -f $TMPDIR/collect/2011_12_05-diskstats + is "$?" "1" "No /proc/diskstats" +fi + +cmd_ok \ + "grep -q '\-hostname\$' $TMPDIR/collect-files" \ + "Collected hostname" cmd_ok \ "grep -q 'Avail' $p-df" \ diff --git a/t/lib/samples/bash/collect001.txt b/t/lib/samples/bash/collect001.txt deleted file mode 100644 index 8c27bdc1..00000000 --- a/t/lib/samples/bash/collect001.txt +++ /dev/null @@ -1,34 +0,0 @@ -2011_12_05-df -2011_12_05-disk-space -2011_12_05-diskstats -2011_12_05-hostname -2011_12_05-innodbstatus1 -2011_12_05-innodbstatus2 -2011_12_05-interrupts -2011_12_05-iostat -2011_12_05-iostat-overall -2011_12_05-log_error -2011_12_05-lsof -2011_12_05-meminfo -2011_12_05-mpstat -2011_12_05-mpstat-overall -2011_12_05-mutex-status1 -2011_12_05-mutex-status2 -2011_12_05-mysqladmin -2011_12_05-netstat -2011_12_05-netstat_s -2011_12_05-opentables1 -2011_12_05-opentables2 -2011_12_05-output -2011_12_05-pmap -2011_12_05-processlist -2011_12_05-procstat -2011_12_05-procvmstat -2011_12_05-ps -2011_12_05-slabinfo -2011_12_05-stacktrace -2011_12_05-sysctl -2011_12_05-top -2011_12_05-variables -2011_12_05-vmstat -2011_12_05-vmstat-overall From 9e29cc205a7a2ddecca08474bccf4a631fa84fc4 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 19 Jan 2012 11:13:01 -0700 Subject: [PATCH 34/71] Make test quiet. --- t/lib/bash/collect.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/lib/bash/collect.sh b/t/lib/bash/collect.sh index ed622b9b..a9138dd7 100644 --- a/t/lib/bash/collect.sh +++ b/t/lib/bash/collect.sh @@ -30,7 +30,7 @@ ls -1 $TMPDIR/collect | sort > $TMPDIR/collect-files # Else, those files should not exist. if [ -f /proc/diskstats ]; then cmd_ok \ - "grep '[0-9]' $TMPDIR/collect/2011_12_05-diskstats" \ + "grep -q '[0-9]' $TMPDIR/collect/2011_12_05-diskstats" \ "/proc/diskstats" else test -f $TMPDIR/collect/2011_12_05-diskstats From ba4b4f3059ff2e69c78ec676ac7c2e180edaf75b Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 19 Jan 2012 11:51:24 -0700 Subject: [PATCH 35/71] Log how pt-stalk was ran. Update modules in tool. Tweak 'Starting' and 'Exiting' log lines. --- bin/pt-stalk | 107 +++++++++++++++++++++++++----------------- t/pt-stalk/pt-stalk.t | 9 +++- 2 files changed, 72 insertions(+), 44 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 312858bd..2fb45bfa 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -439,17 +439,17 @@ remove_pid_file() { set -u -CMD_GDB=${CMD_GDB:-"gdb"} -CMD_IOSTAT=${CMD_IOSTAT:-"iostat"} -CMD_MPSTAT=${CMD_MPSTAT:-"mpstat"} -CMD_MYSQL=${CMD_MSSQL:-"mysql"} -CMD_MYSQLADMIN=${CMD_MYSQL_ADMIN:-"mysqladmin"} -CMD_OPCONTROL=${CMD_OPCONTROL:-"opcontrol"} -CMD_OPREPORT=${CMD_OPREPORT:-"opreport"} -CMD_PMAP=${CMD_PMAP:-"pmap"} -CMD_STRACE=${CMD_STRACE:-"strace"} -CMD_TCPDUMP=${CMD_TCPDUMP:-"tcpdump"} -CMD_VMSTAT=${CMD_VMSTAT:-"vmstat"} +CMD_GDB="$(which gdb)" +CMD_IOSTAT="$(which iostat)" +CMD_MPSTAT="$(which mpstat)" +CMD_MYSQL="$(which mysql)" +CMD_MYSQLADMIN="$(which mysqladmin)" +CMD_OPCONTROL="$(which opcontrol)" +CMD_OPREPORT="$(which opreport)" +CMD_PMAP="$(which pmap)" +CMD_STRACE="$(which strace)" +CMD_TCPDUMP="$(which tcpdump)" +CMD_VMSTAT="$(which vmstat)" collect() { local d="$1" # directory to save results in @@ -463,7 +463,7 @@ collect() { mysqld_pid=$(ps -eaf | grep 'mysql[d]' | grep -v mysqld_safe | awk '{print $2}' | head -n1); fi - if [ "$mysqld_pid" ]; then + if [ "$CMD_PMAP" -a "$mysqld_pid" ]; then if $CMD_PMAP --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then $CMD_PMAP -x $mysqld_pid > "$d/$p-pmap" else @@ -471,14 +471,12 @@ collect() { fi fi - if [ "$OPT_COLLECT_GDB" = "yes" -a "$mysqld_pid" ]; then + if [ "$CMD_GDB" -a "$OPT_COLLECT_GDB" = "yes" -a "$mysqld_pid" ]; then $CMD_GDB \ -ex "set pagination 0" \ -ex "thread apply all bt" \ --batch -p $mysqld_pid \ >> "$d/$p-stacktrace" - else - echo "GDB (--collect-gdb) was not enabled" >> "$d/$p-stacktrace" fi $CMD_MYSQL $EXT_ARGV -e 'SHOW GLOBAL VARIABLES' >> "$d/$p-variables" 2>&1 & @@ -512,7 +510,7 @@ collect() { open_tables >> "$d/$p-opentables1" 2>&1 & local tcpdump_pid="" - if [ "$OPT_COLLECT_TCPDUMP" = "yes" ]; then + if [ "$CMD_TCPDUMP" -a "$OPT_COLLECT_TCPDUMP" = "yes" ]; then local port=$(awk '/^port/{print $2}' "$d/$p-variables") if [ "$port" ]; then $CMD_TCPDUMP -i any -s 4096 -w "$d/$p-tcpdump" port ${port} & @@ -521,26 +519,32 @@ collect() { fi local have_oprofile="no" - if [ "$OPT_COLLECT_OPROFILE" = "yes" ]; then + if [ "$CMD_OPCONTROL" -a "$OPT_COLLECT_OPROFILE" = "yes" ]; then if $CMD_OPCONTROL --init; then $CMD_OPCONTROL --start --no-vmlinux have_oprofile="yes" fi - elif [ "$OPT_COLLECT_STRACE" = "yes" ]; then + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" = "yes" ]; then $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" 2>&1 & local strace_pid=$! fi - ps -eaf >> "$d/$p-ps" 2>&1 & - sysctl -a >> "$d/$p-sysctl" 2>&1 & - top -bn1 >> "$d/$p-top" 2>&1 & - $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" 2>&1 & - $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" 2>&1 & - $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" 2>&1 & - $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" 2>&1 & - $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" 2>&1 & - $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" 2>&1 & - lsof -nP -p $mysqld_pid -bw >> "$d/$p-lsof" 2>&1 & + ps -eaf >> "$d/$p-ps" 2>&1 & + sysctl -a >> "$d/$p-sysctl" 2>&1 & + top -bn1 >> "$d/$p-top" 2>&1 & + lsof -nP -p $mysqld_pid -bw >> "$d/$p-lsof" 2>&1 & + if [ "$CMD_VMSTAT" ]; then + $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" 2>&1 & + $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" 2>&1 & + fi + if [ "$CMD_IOSTAT" ]; then + $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" 2>&1 & + $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" 2>&1 & + fi + if [ "$CMD_MPSTAT" ]; then + $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" 2>&1 & + $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" 2>&1 & + fi $CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" 2>&1 & local mysqladmin_pid=$! @@ -564,15 +568,29 @@ collect() { sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}') local ts="$(date +"TS %s.%N %F %T")" - (cat /proc/diskstats 2>&1; echo $ts) >> "$d/$p-diskstats" & - (cat /proc/stat 2>&1; echo $ts) >> "$d/$p-procstat" & - (cat /proc/vmstat 2>&1; echo $ts) >> "$d/$p-procvmstat" & - (cat /proc/meminfo 2>&1; echo $ts) >> "$d/$p-meminfo" & - (cat /proc/slabinfo 2>&1; echo $ts) >> "$d/$p-slabinfo" & - (cat /proc/interrupts 2>&1; echo $ts) >> "$d/$p-interrupts" & - (df -h 2>&1; echo $ts) >> "$d/$p-df" & - (netstat -antp 2>&1; echo $ts) >> "$d/$p-netstat" & - (netstat -s 2>&1; echo $ts) >> "$d/$p-netstat_s" & + if [ -d "/proc" ]; then + if [ -f "/proc/diskstats" ]; then + (cat /proc/diskstats 2>&1; echo $ts) >> "$d/$p-diskstats" & + fi + if [ -f "/proc/stat" ]; then + (cat /proc/stat 2>&1; echo $ts) >> "$d/$p-procstat" & + fi + if [ -f "/proc/vmstat" ]; then + (cat /proc/vmstat 2>&1; echo $ts) >> "$d/$p-procvmstat" & + fi + if [ -f "/proc/meminfo" ]; then + (cat /proc/meminfo 2>&1; echo $ts) >> "$d/$p-meminfo" & + fi + if [ -f "/proc/slabinfo" ]; then + (cat /proc/slabinfo 2>&1; echo $ts) >> "$d/$p-slabinfo" & + fi + if [ -f "/proc/interrupts" ]; then + (cat /proc/interrupts 2>&1; echo $ts) >> "$d/$p-interrupts" & + fi + fi + (df -h 2>&1; echo $ts) >> "$d/$p-df" & + (netstat -antp 2>&1; echo $ts) >> "$d/$p-netstat" & + (netstat -s 2>&1; echo $ts) >> "$d/$p-netstat_s" & ($CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G" 2>&1; echo $ts) \ >> "$d/$p-processlist" @@ -605,7 +623,7 @@ collect() { "/path/to/mysqld'" \ > "$d/$p-opreport" fi - elif [ "$OPT_COLLECT_STRACE" = "yes" ]; then + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" = "yes" ]; then kill -s 2 $strace_pid sleep 1 kill -s 15 $strace_pid @@ -672,6 +690,7 @@ lock_waits() { # ########################################################################### # Global variables # ########################################################################### +RAN_WITH="" EXIT_REASON="" TOOL=$(basename $0) OKTORUN=1 @@ -758,12 +777,12 @@ trg_magic() { oktorun() { if [ $OKTORUN -eq 0 ]; then - EXIT_REASON="OKTORUN is false, exiting" + EXIT_REASON="OKTORUN is false" return 1 # stop running fi if [ -n "$OPT_ITERATIONS" ] && [ $ITER -gt $OPT_ITERATIONS ]; then - EXIT_REASON="No more iterations, exiting" + EXIT_REASON="no more iterations" return 1 # stop running fi @@ -856,7 +875,8 @@ stalk() { "$margin" # real used MB + margin MB if [ $? -eq 0 ]; then # There should be enough disk space, so collect. - log "$msg" >> "$OPT_DEST/$prefix-trigger" + log "$msg" >> "$OPT_DEST/$prefix-trigger" + log "pt-stalk ran with $RAN_WITH" >> "$OPT_DEST/$prefix-trigger" last_prefix="$prefix" # Send email to whomever that collect has been triggered. @@ -903,7 +923,8 @@ main() { # Note: $$ is the parent's PID, but we're a child proc. # Bash 4 has $BASHPID but we can't rely on that. Consequently, # we don't know our own PID. See the usage of $! below. - log "$0 started" + RAN_WITH="--function=$OPT_FUNCTION --variable=$OPT_VARIABLE --threshold=$OPT_THRESHOLD --match=$OPT_MATCH --cycles=$OPT_CYCLES --interval=$OPT_INTERVAL --iterations=$OPT_ITERATIONS --run-time=$OPT_RUN_TIME --sleep=$OPT_SLEEP --dest=$OPT_DEST --prefix=$OPT_PREFIX --notify-by-email=$OPT_NOTIFY_BY_EMAIL --log=$OPT_LOG --pid=$OPT_PID" + log "Starting $0 $RAN_WITH" # Make the collection dir exists. if [ ! -d "$OPT_DEST" ]; then @@ -939,7 +960,7 @@ main() { rm_tmpdir remove_pid_file "$OPT_PID" - log "$EXIT_REASON" + log "Exiting because $EXIT_REASON" log "$0 exit status $EXIT_STATUS" exit $EXIT_STATUS } diff --git a/t/pt-stalk/pt-stalk.t b/t/pt-stalk/pt-stalk.t index a8e7da87..d1ac37a2 100644 --- a/t/pt-stalk/pt-stalk.t +++ b/t/pt-stalk/pt-stalk.t @@ -23,7 +23,7 @@ if ( !$dbh ) { plan skip_all => 'Cannot connect to sandbox master'; } else { - plan tests => 14; + plan tests => 15; } my $cnf = "/tmp/12345/my.sandbox.cnf"; @@ -161,6 +161,13 @@ is( "pt-stalk is not running" ); +$output = `cat $dest/*-trigger`; +like( + $output, + qr/pt-stalk ran with --function=status --variable=Uptime --threshold=$threshold/, + "Trigger file logs how pt-stalk was ran" +); + # ############################################################################# # Done. # ############################################################################# From 51b93a623559a203207360733348b5fb66c620fc Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 19 Jan 2012 12:20:33 -0700 Subject: [PATCH 36/71] Fix --help description interpolation. --- bin/pt-stalk | 6 +++--- lib/bash/parse_options.sh | 6 +++--- t/lib/bash.t | 1 + t/lib/bash/parse_options.sh | 10 +++++++++- t/lib/samples/bash/po003.sh | 37 +++++++++++++++++++++++++++++++++++++ 5 files changed, 53 insertions(+), 7 deletions(-) create mode 100644 t/lib/samples/bash/po003.sh diff --git a/bin/pt-stalk b/bin/pt-stalk index 2fb45bfa..1968cb85 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -128,20 +128,20 @@ parse_options() { my $opt = $1; my $file = "$ENV{PO_DIR}/$opt"; open my $opt_fh, ">", $file or die "Cannot open $file: $!"; - printf $opt_fh "long:$opt\n"; + print $opt_fh "long:$opt\n"; $para = <>; chomp; if ( $para =~ m/^[a-z ]+:/ ) { map { chomp; my ($attrib, $val) = split(/: /, $_); - printf $opt_fh "$attrib:$val\n"; + print $opt_fh "$attrib:$val\n"; } split(/; /, $para); $para = <>; chomp; } my ($desc) = $para =~ m/^([^?.]+)/; - printf $opt_fh "desc:$desc.\n"; + print $opt_fh "desc:$desc.\n"; close $opt_fh; } } diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index 21d580d2..293d4f76 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -137,20 +137,20 @@ parse_options() { my $opt = $1; my $file = "$ENV{PO_DIR}/$opt"; open my $opt_fh, ">", $file or die "Cannot open $file: $!"; - printf $opt_fh "long:$opt\n"; + print $opt_fh "long:$opt\n"; $para = <>; chomp; if ( $para =~ m/^[a-z ]+:/ ) { map { chomp; my ($attrib, $val) = split(/: /, $_); - printf $opt_fh "$attrib:$val\n"; + print $opt_fh "$attrib:$val\n"; } split(/; /, $para); $para = <>; chomp; } my ($desc) = $para =~ m/^([^?.]+)/; - printf $opt_fh "desc:$desc.\n"; + print $opt_fh "desc:$desc.\n"; close $opt_fh; } } diff --git a/t/lib/bash.t b/t/lib/bash.t index aaf1fe28..6b6f3998 100644 --- a/t/lib/bash.t +++ b/t/lib/bash.t @@ -15,6 +15,7 @@ use PerconaTest; my ($tool) = $PROGRAM_NAME =~ m/([\w-]+)\.t$/; push @ARGV, "$trunk/t/lib/bash/*.sh" unless @ARGV; +$ENV{BIN_DIR} = "$trunk/bin"; $ENV{LIB_DIR} = "$trunk/lib/bash"; $ENV{T_LIB_DIR} = "$trunk/t/lib"; diff --git a/t/lib/bash/parse_options.sh b/t/lib/bash/parse_options.sh index 482bc46d..c65fe24c 100644 --- a/t/lib/bash/parse_options.sh +++ b/t/lib/bash/parse_options.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -TESTS=26 +TESTS=27 TMPFILE="$TEST_TMPDIR/parse-opts-output" @@ -81,6 +81,14 @@ cmd_ok \ "grep -q \"For more information, 'man pt-stalk' or 'perldoc\" $TMPFILE" \ "--help" +# Don't interpolate. +parse_options "$T_LIB_DIR/samples/bash/po003.sh" --help +usage_or_errors "$T_LIB_DIR/samples/bash/po003.sh" >$TMPFILE 2>&1 + +cmd_ok \ + "grep -q 'Exit if the disk is less than this %full.' $TMPFILE" \ + "Don't interpolate --help descriptions" + # ############################################################################ # Done # ############################################################################ diff --git a/t/lib/samples/bash/po003.sh b/t/lib/samples/bash/po003.sh new file mode 100644 index 00000000..a7971cdc --- /dev/null +++ b/t/lib/samples/bash/po003.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +: + +# ############################################################################ +# Documentation +# ############################################################################ +:<<'DOCUMENTATION' +=pod + +=head1 NAME + +pt-stalk - Wait for a condition to occur then begin collecting data. + +=head1 OPTIONS + +=over + +=item --disk-pct-limit + +type: int; default: 5 + +Exit if the disk is less than this %full. + +=item --help + +Print help. + +=back + +=head1 ENVIRONMENT + +No env vars used. + +=cut + +DOCUMENTATION From acbc6b4622dc84ce26a304bb75194fcf9cbc7849 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Fri, 20 Jan 2012 11:34:51 -0700 Subject: [PATCH 37/71] Parse config files. Refactor parse_option.sh. Print --help and --version to STDOUT. --- lib/bash/parse_options.sh | 163 ++++++++++++++++++------------ t/lib/bash/parse_options.sh | 43 +++++++- t/lib/samples/bash/config001.conf | 5 + 3 files changed, 143 insertions(+), 68 deletions(-) create mode 100644 t/lib/samples/bash/config001.conf diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index 293d4f76..61344f4f 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -22,15 +22,20 @@ # parse_options parses Perl POD options from Bash tools and creates # global variables for each option. +# *********************************************************** +# GLOBAL $TMPDIR AND $TOOL MUST BE SET BEFORE USING THIS LIB! +# *********************************************************** + set -u # Global variables. These must be global because declare inside a # sub will be scoped locally. -ARGV="" # Non-option args (probably input files) -EXT_ARGV="" # Everything after -- (args for an external command) -OPT_ERRS=0 # How many command line option errors -OPT_VERSION="no" # If --version was specified -OPT_HELP="no" # If --help was specified +ARGV="" # Non-option args (probably input files) +EXT_ARGV="" # Everything after -- (args for an external command) +OPT_ERRS=0 # How many command line option errors +OPT_VERSION="no" # If --version was specified +OPT_HELP="no" # If --help was specified +PO_DIR="$TMPDIR/po" # Directory with program option spec files # Sub: usage # Print usage (--help) and list the program's options. @@ -48,9 +53,9 @@ usage() { local file="$1" local usage=$(grep '^Usage: ' "$file") - echo $usage >&2 - echo >&2 - echo "For more information, 'man $TOOL' or 'perldoc $file'." >&2 + echo $usage + echo + echo "For more information, 'man $TOOL' or 'perldoc $file'." } usage_or_errors() { @@ -64,20 +69,20 @@ usage_or_errors() { if [ "$OPT_HELP" = "yes" ]; then usage "$file" - echo >&2 - echo "Command line options:" >&2 - echo >&2 + echo + echo "Command line options:" + echo for opt in $(ls $TMPDIR/po/); do local desc=$(cat $TMPDIR/po/$opt | grep '^desc:' | sed -e 's/^desc://') - echo "--$opt" >&2 - echo " $desc" >&2 - echo >&2 + echo "--$opt" + echo " $desc" + echo done return 1 fi if [ $OPT_ERRS -gt 0 ]; then - echo >&2 + echo usage "$file" return 1 fi @@ -103,6 +108,33 @@ parse_options() { local file="$1" shift + # Change --op=val to --op val because _parse_command_line() needs + # a space-separated list of "op val op val" etc. + local opts=$(echo "$@" | perl -ne 's/--(\S+)=/--$1 /g, print') + + if [ ! -d "$PO_DIR" ]; then + mkdir "$PO_DIR" + if [ $? -ne 0 ]; then + echo "Cannot mkdir $PO_DIR" >&2 + exit 1 + fi + fi + + rm -rf "$PO_DIR"/* + if [ $? -ne 0 ]; then + echo "Cannot rm -rf $PO_DIR/*" >&2 + exit 1 + fi + + _parse_pod "$file" + _eval_po + _parse_config_files + _parse_command_line $opts # do NOT quote, we want "--op" "val" not "--op val" +} + +_parse_pod() { + local file="$1" + # Parse the program options (po) from the POD. Each option has # a spec file like: # $ cat po/string-opt2 @@ -111,57 +143,42 @@ parse_options() { # default=foo # That's the spec for --string-opt2. Each line is a key:value pair # from the option's POD line like "type: string; default: foo". - if [ ! -d "$TMPDIR/po/" ]; then - mkdir "$TMPDIR/po/" - if [ $? -ne 0 ]; then - echo "Cannot mkdir $TMPDIR/po/" >&2 - exit 1 - fi - fi - - rm -rf "$TMPDIR"/po/* - if [ $? -ne 0 ]; then - echo "Cannot rm -rf $TMPDIR/po/*" >&2 - exit 1 - fi - - ( - export PO_DIR="$TMPDIR/po" - cat "$file" | perl -ne ' - BEGIN { $/ = ""; } - next unless $_ =~ m/^=head1 OPTIONS/; - while ( defined(my $para = <>) ) { - last if $para =~ m/^=head1/; + cat "$file" | PO_DIR="$PO_DIR" perl -ne ' + BEGIN { $/ = ""; } + next unless $_ =~ m/^=head1 OPTIONS/; + while ( defined(my $para = <>) ) { + last if $para =~ m/^=head1/; + chomp; + if ( $para =~ m/^=item --(\S+)/ ) { + my $opt = $1; + my $file = "$ENV{PO_DIR}/$opt"; + open my $opt_fh, ">", $file or die "Cannot open $file: $!"; + print $opt_fh "long:$opt\n"; + $para = <>; chomp; - if ( $para =~ m/^=item --(\S+)/ ) { - my $opt = $1; - my $file = "$ENV{PO_DIR}/$opt"; - open my $opt_fh, ">", $file or die "Cannot open $file: $!"; - print $opt_fh "long:$opt\n"; + if ( $para =~ m/^[a-z ]+:/ ) { + map { + chomp; + my ($attrib, $val) = split(/: /, $_); + print $opt_fh "$attrib:$val\n"; + } split(/; /, $para); $para = <>; chomp; - if ( $para =~ m/^[a-z ]+:/ ) { - map { - chomp; - my ($attrib, $val) = split(/: /, $_); - print $opt_fh "$attrib:$val\n"; - } split(/; /, $para); - $para = <>; - chomp; - } - my ($desc) = $para =~ m/^([^?.]+)/; - print $opt_fh "desc:$desc.\n"; - close $opt_fh; } + my ($desc) = $para =~ m/^([^?.]+)/; + print $opt_fh "desc:$desc.\n"; + close $opt_fh; } - last; - ' - ) + } + last; + ' +} +_eval_po() { # Evaluate the program options into existence as global variables # transformed like --my-op == $OPT_MY_OP. If an option has a default # value, it's assigned that value. Else, it's value is an empty string. - for opt_spec in $(ls "$TMPDIR/po/"); do + for opt_spec in $(ls "$PO_DIR"); do local opt="" local default_val="" local neg=0 @@ -187,13 +204,13 @@ parse_options() { fi ;; *) - echo "Invalid attribute in $TMPDIR/po/$opt_spec: $line" >&2 + echo "Invalid attribute in $PO_DIR/$opt_spec: $line" >&2 exit 1 esac - done < "$TMPDIR/po/$opt_spec" + done < "$PO_DIR/$opt_spec" if [ -z "$opt" ]; then - echo "No long attribute in option spec $TMPDIR/po/$opt_spec" >&2 + echo "No long attribute in option spec $PO_DIR/$opt_spec" >&2 exit 1 fi @@ -204,9 +221,29 @@ parse_options() { fi fi + # Eval the option into existence as a global variable. eval "OPT_${opt}"="$default_val" done +} +_parse_config_files() { + local config_files="/etc/percona-toolkit/percona-toolkit.conf /etc/percona-toolkit/$TOOL.conf $HOME/.percona-toolkit.conf $HOME/.$TOOL.conf" + for config_file in $config_files; do + test -f "$config_file" || continue + + # The config file syntax is just like a command line except there + # is one option per line. In Bash, --foo --bar is the same as + # --foo + # --bar + # So we can simply cat the config file into/as the command line. + # The Perl changes --foo=bar to --foo bar because _parse_command_line() + # needs a space-separated list of "opt val opt val" etc. + _parse_command_line \ + $(cat "$config_file" | perl -ne 's/--(\S+)=/--$1 /g, print') + done +} + +_parse_command_line() { # Parse the command line options. Anything after -- is put into # EXT_ARGV. Options must begin with one or two hyphens (--help or -h), # else the item is put into ARGV (it's probably a filename, directory, @@ -217,7 +254,7 @@ parse_options() { # a default value 100, then $OPT_FOO=100 already, but if --foo=500 is # specified on the command line, then we re-eval $OPT_FOO=500 to update # $OPT_FOO. - for opt; do + for opt in "$@"; do if [ $# -eq 0 ]; then break # no more opts fi @@ -267,7 +304,7 @@ parse_options() { # says it has a type, then it requires a value and that value should # be the next item ($1). Else, typeless options (like --version) are # either "yes" if specified, else "no" if negatable and --no-opt. - local required_arg=$(cat $spec | awk -F: '/^type:/{print $2}') + local required_arg=$(cat "$spec" | awk -F: '/^type:/{print $2}') if [ -n "$required_arg" ]; then if [ $# -eq 0 ]; then OPT_ERRS=$(($OPT_ERRS + 1)) @@ -286,7 +323,7 @@ parse_options() { fi # Get and transform the opt's long form. E.g.: -q == --quiet == QUIET. - opt=$(cat $spec | grep '^long:' | cut -d':' -f2 | sed 's/-/_/g' | tr [:lower:] [:upper:]) + opt=$(cat "$spec" | grep '^long:' | cut -d':' -f2 | sed 's/-/_/g' | tr [:lower:] [:upper:]) # Re-eval the option to update its global variable value. eval "OPT_$opt"="$val" diff --git a/t/lib/bash/parse_options.sh b/t/lib/bash/parse_options.sh index c65fe24c..0436f406 100644 --- a/t/lib/bash/parse_options.sh +++ b/t/lib/bash/parse_options.sh @@ -1,8 +1,10 @@ #!/usr/bin/env bash -TESTS=27 +TESTS=37 TMPFILE="$TEST_TMPDIR/parse-opts-output" +TOOL="pt-stalk" +TMPDIR="$TEST_TMPDIR" source "$LIB_DIR/log_warn_die.sh" source "$LIB_DIR/parse_options.sh" @@ -11,8 +13,6 @@ source "$LIB_DIR/parse_options.sh" # Parse options from POD using all default values. # ############################################################################ -TOOL="pt-stalk" -TMPDIR="$TEST_TMPDIR" parse_options "$T_LIB_DIR/samples/bash/po001.sh" "" 2>$TMPFILE is "`cat $TMPFILE`" "" "No warnings or errors" @@ -20,7 +20,7 @@ is "`cat $TMPFILE`" "" "No warnings or errors" is "$OPT_STRING_OPT" "" "Default string option" is "$OPT_STRING_OPT2" "foo" "Default string option with default" is "$OPT_TYPELESS_OPTION" "" "Default typeless option" -is "$OPT_NOPTION" "yes" "Defailt neg option" +is "$OPT_NOPTION" "yes" "Default neg option" is "$OPT_INT_OPT" "" "Default int option" is "$OPT_INT_OPT2" "42" "Default int option with default" is "$OPT_VERSION" "" "--version" @@ -39,6 +39,14 @@ is "$OPT_INT_OPT" "50" "Specified int option (spec)" is "$OPT_INT_OPT2" "42" "Default int option with default (spec)" is "$OPT_VERSION" "" "--version (spec)" +# ############################################################################ +# --option=value should work like --option value. +# ############################################################################ + +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --int-opt=42 + +is "$OPT_INT_OPT" "42" "Specified int option (--option=value)" + # ############################################################################ # Negate an option like --no-option. # ############################################################################ @@ -46,7 +54,7 @@ is "$OPT_VERSION" "" "--version (spec)" parse_options "$T_LIB_DIR/samples/bash/po001.sh" --no-noption is "$OPT_STRING_OPT" "" "Default string option (neg)" -is "$OPT_STRING_OPT2" "foo" "Default string option with default (net)" +is "$OPT_STRING_OPT2" "foo" "Default string option with default (neg)" is "$OPT_TYPELESS_OPTION" "" "Default typeless option (neg)" is "$OPT_NOPTION" "no" "Negated option (neg)" is "$OPT_INT_OPT" "" "Default int option (neg)" @@ -89,6 +97,31 @@ cmd_ok \ "grep -q 'Exit if the disk is less than this %full.' $TMPFILE" \ "Don't interpolate --help descriptions" +# ########################################################################### +# Config files. +# ########################################################################### +TOOL="pt-test" +cp "$T_LIB_DIR/samples/bash/config001.conf" "$HOME/.$TOOL.conf" + +parse_options "$T_LIB_DIR/samples/bash/po001.sh" "" + +is "$OPT_STRING_OPT" "abc" "Default string option (conf)" +is "$OPT_STRING_OPT2" "foo" "Default string option with default (conf)" +is "$OPT_TYPELESS_OPTION" "yes" "Default typeless option (conf)" +is "$OPT_NOPTION" "yes" "Default neg option (conf)" +is "$OPT_INT_OPT" "" "Default int option (conf)" +is "$OPT_INT_OPT2" "42" "Default int option with default (conf)" +is "$OPT_VERSION" "" "--version (conf)" +is "$EXT_ARGV" "--host 127.1 --user daniel" "External ARGV (conf)" + +# Command line should override config file. +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --string-opt zzz + +is "$OPT_STRING_OPT" "zzz" "Command line overrides config file" + +rm "$HOME/.$TOOL.conf" +TOOL="pt-stalk" + # ############################################################################ # Done # ############################################################################ diff --git a/t/lib/samples/bash/config001.conf b/t/lib/samples/bash/config001.conf new file mode 100644 index 00000000..7681362d --- /dev/null +++ b/t/lib/samples/bash/config001.conf @@ -0,0 +1,5 @@ +--string-opt=abc +--typeless-option +-- +--host=127.1 +--user=daniel From 63ea85e75593f154663d543fb0cc9b87a7977a05 Mon Sep 17 00:00:00 2001 From: "baron@percona.com" <> Date: Sat, 21 Jan 2012 09:15:45 -0500 Subject: [PATCH 38/71] docs --- bin/pt-stalk | 130 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 87 insertions(+), 43 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 1968cb85..3070f636 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -926,7 +926,7 @@ main() { RAN_WITH="--function=$OPT_FUNCTION --variable=$OPT_VARIABLE --threshold=$OPT_THRESHOLD --match=$OPT_MATCH --cycles=$OPT_CYCLES --interval=$OPT_INTERVAL --iterations=$OPT_ITERATIONS --run-time=$OPT_RUN_TIME --sleep=$OPT_SLEEP --dest=$OPT_DEST --prefix=$OPT_PREFIX --notify-by-email=$OPT_NOTIFY_BY_EMAIL --log=$OPT_LOG --pid=$OPT_PID" log "Starting $0 $RAN_WITH" - # Make the collection dir exists. + # Make sure the collection dir exists. if [ ! -d "$OPT_DEST" ]; then mkdir -p "$OPT_DEST" || die "Cannot make --dest $OPT_DEST" fi @@ -1033,16 +1033,17 @@ fi =head1 NAME -pt-stalk - Wait for a condition to occur then begin collecting data. +pt-stalk - Gather forensic data about MySQL when a problem occurs. =head1 SYNOPSIS Usage: pt-stalk [OPTIONS] [-- MYSQL OPTIONS] -pt-stalk watches for a condition to become true, and when it does, executes -a script. By default it executes L, but that can be customized. -This tool is useful for gathering diagnostic data when an infrequent event -occurs, so an expert person can review the data later. +pt-stalk watches for a trigger condition to become true, and then collects data +to help in diagnosing problems. It is designed to run as a daemon so that you +can diagnose intermittent problems that you cannot observe directly. You can +also use it to execute a custom command, or to gather the data on demand without +waiting for the trigger to happen. =head1 RISKS @@ -1051,7 +1052,9 @@ whether known or unknown, of using this tool. The two main categories of risks are those created by the nature of the tool (e.g. read-only tools vs. read-write tools) and those created by bugs. -pt-stalk is a read-only tool. It should be very low-risk. +pt-stalk is a read-only tool. It should be very low-risk. Some of the options +can cause intrusive data collection to be performed, however, so if you enable +any non-default options, you should read their documentation carefully. At the time of this release, we know of no bugs that could cause serious harm to users. @@ -1065,37 +1068,42 @@ See also L<"BUGS"> for more information on filing bugs and getting help. =head1 DESCRIPTION -Although pt-stalk comes pre-configured to do a specific thing, in general -this tool is just a skeleton script for the following flow of actions: +Sometimes a problem happens infrequently and for a short time, giving you no +chance to see the system when it happens. How do you solve intermittent MySQL +problems when you can't observe them? That's why pt-stalk exists. In addition to +using it when there's a known problem on your servers, it is a good idea to run +pt-stalk all the time, even when you think nothing is wrong. You will +appreciate the data it gathers when a problem occurs, because problems such as +MySQL lockups or spikes of activity typically leave no evidence to use in root +cause analysis. -=over +This tool does two things: it watches a server (typically MySQL) for a trigger +to occur, and it gathers diagnostic data. To use it effectively, you need to +define a good trigger condition. A good trigger is sensitive enough to fire +reliably when a problem occurs, so that you don't miss a chance to solve +problems. On the other hand, a good trigger isn't prone to false positives, so +you don't gather information when the server is functioning normally. -=item 1. +The most reliable triggers for MySQL tend to be the number of connections to the +server, and the number of queries running concurrently. These are available in +the SHOW GLOBAL STATUS command as Threads_connected and Threads_running. +Sometimes Threads_connected is not a reliable indicator of trouble, but +Threads_running usually is. Your job, as the tool's user, is to define an +appropriate trigger condition for the tool. Choose carefully, because the +quality of your results will depend on the trigger you choose. -Loop infinitely, sleeping between iterations. +The pt-stalk tool, by default, simply watches MySQL repeatedly until the trigger +becomes true. It then gathers diagnostics for a while, and sleeps afterwards for +some time to prevent repeatedly gathering data if the condition remains true. -=item 2. +The diagnostic data is written to files whose names begin with a timestamp, so +you can distinguish samples from each other in case the tool collects data +multiple times. The pt-sift tool is designed to help you browse and analyze the +resulting samples of data. -In each iteration, run some command and get the output. - -=item 3. - -If the command fails or the output is larger than the threshold, -execute the collection script; but do not execute if the destination disk -is too full. - -=back - -By default, the tool is configured to execute mysqladmin extended-status and -extract the value of the Threads_running variable; if this is greater than -25, it runs the collection script. This is really just placeholder code, -and almost certainly needs to be customized! - -If the tool does execute the collection script, it will wait for a while -before checking and executing again. This is to prevent a continuous -condition from causing a huge number of executions to fire off. - -The name 'stalk' is because 'watch' is already taken, and 'stalk' is fun. +Although this sounds simple enough, in practice there are a number of +subtleties, such as detecting when the disk is beginning to fill up so that the +tool doesn't cause the server to run out of disk space. =head1 CONFIGURING @@ -1109,51 +1117,87 @@ TODO default: yes; negatable: yes -Collect system information. +Collect system information. You can negate this option to make the tool watch +the system but not actually gather any diagnostic data. =item --collect-gdb -Collect GDB stacktraces. +Collect GDB stacktraces. This is achieved by attaching to MySQL and printing +stack traces from all threads. This will freeze the server for some period of +time, ranging from a second or so to much longer on very busy systems with a lot +of memory and many threads in the server. For this reason, it is disabled by +default. However, if you are trying to diagnose a server stall or lockup, +freezing the server causes no additional harm, and the stack traces can be vital +for diagnosis. + +In addition to freezing the server, there is also some risk of the server +crashing or performing badly after GDB detaches from it. =item --collect-oprofile -Collect oprofile data. +Collect oprofile data. This is achieved by starting an oprofile session, +letting it run for the collection time, and then stopping and saving the +resulting profile data in the system's default location. Please read your +system's oprofile documentation to learn more about this. =item --collect-strace -Collect strace data. +Collect strace data. This is achieved by attaching strace to the server, which +will make it run very slowly until strace detaches. The same cautions apply as +those listed in --collect-gdb. You should not enable this option together with +--collect-gdb, because GDB and strace can't attach to the server process +simultaneously. =item --collect-tcpdump -Collect tcpdump data. +Collect tcpdump data. This option causes tcpdump to capture all traffic on all +interfaces for the port on which MySQL is listening. You can later use +pt-query-digest to decode the MySQL protocol and extract a log of query traffic +from it. =item --cycles type: int; default: 5 -Number of times condition must be met before triggering collection. +The number of times the trigger condition must be true before collecting data. +This helps prevent false positives and make the trigger condition less +susceptible to firing when the condition recovers quickly. =item --daemonize -Daemonize the tool. +Daemonize the tool. This causes the tool to fork into the background and log +its output as specified in --log. =item --dest type: string; default: ${HOME}/collected -Where to store collected data. +Where to store the diagnostic data. Each time the tool collects data, it writes +to a new set of files, which are named with the current system timestamp. =item --disk-byte-limit type: int; default: 100 -Exit if the disk has less than this many MB free. +Don't collect data unless the destination disk has this much free space. This +prevents the tool from filling up the disk with diagnostic data. + +If the destination directory contains a previously captured sample of data, the +tool will measure its size and use that as an estimate of how much data is +likely to be gathered this time, too. It will then be even more pessimistic, +and will refuse to collect data unless the disk has enough free space to hold +the sample and still have the desired amount of free space. For example, if +you'd like 100MB of free space and the previous diagnostic sample consumed +100MB, the tool won't collect any data unless the disk has 200MB free. =item --disk-pct-limit type: int; default: 5 -Exit if the disk is less than this %full. +Don't collect data unless the disk has at least this percent free space. This +option works similarly to --disk-byte-limit, but specifies a percentage margin +of safety instead of a byte margin of safety. The tool honors both options, and +will not collect any data unless both margins are satisfied. =item --function From 65a3ab515767a52e9e3c156259c2fa1be9741fee Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Sat, 21 Jan 2012 13:59:02 -0700 Subject: [PATCH 39/71] Handle values with spaces. Still needs work; one test is failing. --- lib/bash/parse_options.sh | 196 ++++++++++++++++++------------ t/lib/bash/parse_options.sh | 28 ++++- t/lib/samples/bash/config002.conf | 1 + 3 files changed, 144 insertions(+), 81 deletions(-) create mode 100644 t/lib/samples/bash/config002.conf diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index 61344f4f..a9286d6a 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -108,9 +108,13 @@ parse_options() { local file="$1" shift - # Change --op=val to --op val because _parse_command_line() needs - # a space-separated list of "op val op val" etc. - local opts=$(echo "$@" | perl -ne 's/--(\S+)=/--$1 /g, print') + # Reset the globals (mostly for testing). + ARGV="" + EXT_ARGV="" + OPT_ERRS=0 + OPT_VERSION="no" + OPT_HELP="no" + PO_DIR="$TMPDIR/po" if [ ! -d "$PO_DIR" ]; then mkdir "$PO_DIR" @@ -129,7 +133,7 @@ parse_options() { _parse_pod "$file" _eval_po _parse_config_files - _parse_command_line $opts # do NOT quote, we want "--op" "val" not "--op val" + _parse_command_line "$@" } _parse_pod() { @@ -230,16 +234,22 @@ _parse_config_files() { local config_files="/etc/percona-toolkit/percona-toolkit.conf /etc/percona-toolkit/$TOOL.conf $HOME/.percona-toolkit.conf $HOME/.$TOOL.conf" for config_file in $config_files; do test -f "$config_file" || continue - - # The config file syntax is just like a command line except there - # is one option per line. In Bash, --foo --bar is the same as - # --foo - # --bar - # So we can simply cat the config file into/as the command line. - # The Perl changes --foo=bar to --foo bar because _parse_command_line() - # needs a space-separated list of "opt val opt val" etc. - _parse_command_line \ - $(cat "$config_file" | perl -ne 's/--(\S+)=/--$1 /g, print') + local dashdash="" + for conf_opt in $(grep '^[^# ]' "$config_file"); do + if [ "$dashdash" ]; then + if [ "$EXT_ARGV" ]; then + EXT_ARGV="$EXT_ARGV $conf_opt" + else + EXT_ARGV="$conf_opt" + fi + else + _parse_command_line "$conf_opt" + if [ $? -eq 1 ]; then + dashdash=1 + EXT_ARGV="" + fi + fi + done done } @@ -254,79 +264,113 @@ _parse_command_line() { # a default value 100, then $OPT_FOO=100 already, but if --foo=500 is # specified on the command line, then we re-eval $OPT_FOO=500 to update # $OPT_FOO. + local opt="" + local val="" + local next_opt_is_val="" + local opt_is_ok="" + local opt_is_negated="" + local real_opt="" + local required_arg="" + for opt in "$@"; do - if [ $# -eq 0 ]; then - break # no more opts - fi - opt=$1 - if [ "$opt" = "--" ]; then - shift - EXT_ARGV="$@" - break - fi - shift - if [ $(expr "$opt" : "-") -eq 0 ]; then - # Option does not begin with a hyphen (-), so treat it as - # a filename, directory, etc. - if [ -z "$ARGV" ]; then - ARGV="$opt" - else - ARGV="$ARGV $opt" - fi - continue - fi - - # Save real opt from cmd line for error messages. - local real_opt="$opt" - - # Strip leading -- or --no- from option. - if $(echo $opt | grep -q '^--no-'); then - neg=1 - opt=$(echo $opt | sed 's/^--no-//') - else - neg=0 - opt=$(echo $opt | sed 's/^-*//') - fi - - # Find the option's spec file. - if [ -f "$TMPDIR/po/$opt" ]; then - spec="$TMPDIR/po/$opt" - else - spec=$(grep "^short form:-$opt\$" "$TMPDIR"/po/* | cut -d ':' -f 1) - if [ -z "$spec" ]; then - OPT_ERRS=$(($OPT_ERRS + 1)) - echo "Unknown option: $real_opt" >&2 - continue - fi - fi - - # Get the value specified for the option, if any. If the opt's spec - # says it has a type, then it requires a value and that value should - # be the next item ($1). Else, typeless options (like --version) are - # either "yes" if specified, else "no" if negatable and --no-opt. - local required_arg=$(cat "$spec" | awk -F: '/^type:/{print $2}') - if [ -n "$required_arg" ]; then - if [ $# -eq 0 ]; then + if [ "$next_opt_is_val" ]; then + next_opt_is_val="" + if [ $# -eq 0 ] || [ $(expr "$opt" : "-") -eq 1 ]; then OPT_ERRS=$(($OPT_ERRS + 1)) echo "$real_opt requires a $required_arg argument" >&2 continue - else - val="$1" - shift fi + val="$opt" + opt_is_ok=1 else - if [ $neg -eq 0 ]; then - val="yes" + if [ "$opt" = "--" ]; then + EXT_ARGV="$@" + return 1 + fi + + # If option does not begin with a hyphen (-), it's a filename, etc. + if [ $(expr "$opt" : "-") -eq 0 ]; then + if [ -z "$ARGV" ]; then + ARGV="$opt" + else + ARGV="$ARGV $opt" + fi + continue + fi + + # Save real opt from cmd line for error messages. + real_opt="$opt" + + # Strip leading -- or --no- from option. + if $(echo $opt | grep -q '^--no-'); then + opt_is_negated=1 + opt=$(echo $opt | sed 's/^--no-//') else - val="no" + opt_is_negated="" + opt=$(echo $opt | sed 's/^-*//') + fi + + # Split opt=val pair. + if $(echo $opt | grep '^[a-z-][a-z-]*=' >/dev/null 2>&1); then + val="$(echo $opt | awk -F= '{print $2}')" + opt="$(echo $opt | awk -F= '{print $1}')" + fi + + # Find the option's spec file. + if [ -f "$TMPDIR/po/$opt" ]; then + spec="$TMPDIR/po/$opt" + else + spec=$(grep "^short form:-$opt\$" "$TMPDIR"/po/* | cut -d ':' -f 1) + if [ -z "$spec" ]; then + OPT_ERRS=$(($OPT_ERRS + 1)) + echo "Unknown option: $real_opt" >&2 + continue + fi + fi + + # Get the value specified for the option, if any. If the opt's spec + # says it has a type, then it requires a value and that value should + # be the next item ($1). Else, typeless options (like --version) are + # either "yes" if specified, else "no" if negatable and --no-opt. + required_arg=$(cat "$spec" | awk -F: '/^type:/{print $2}') + if [ "$required_arg" ]; then + # Option takes a value. + if [ "$val" ]; then + opt_is_ok=1 + else + next_opt_is_val=1 + fi + else + # Option does not take a value. + if [ "$val" ]; then + OPT_ERRS=$(($OPT_ERRS + 1)) + echo "Option $real_opt does not take a value" >&2 + continue + fi + if [ "$opt_is_negated" ]; then + val="" + else + val="yes" + fi + opt_is_ok=1 fi fi - # Get and transform the opt's long form. E.g.: -q == --quiet == QUIET. - opt=$(cat "$spec" | grep '^long:' | cut -d':' -f2 | sed 's/-/_/g' | tr [:lower:] [:upper:]) + if [ "$opt_is_ok" ]; then + # Get and transform the opt's long form. E.g.: -q == --quiet == QUIET. + opt=$(cat "$spec" | grep '^long:' | cut -d':' -f2 | sed 's/-/_/g' | tr [:lower:] [:upper:]) - # Re-eval the option to update its global variable value. - eval "OPT_$opt"="$val" + # Re-eval the option to update its global variable value. + eval "OPT_$opt"="'$val'" + + opt="" + val="" + next_opt_is_val="" + opt_is_ok="" + opt_is_negated="" + real_opt="" + required_arg="" + fi done } diff --git a/t/lib/bash/parse_options.sh b/t/lib/bash/parse_options.sh index 0436f406..9938dc73 100644 --- a/t/lib/bash/parse_options.sh +++ b/t/lib/bash/parse_options.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -TESTS=37 +TESTS=44 TMPFILE="$TEST_TMPDIR/parse-opts-output" TOOL="pt-stalk" @@ -13,7 +13,7 @@ source "$LIB_DIR/parse_options.sh" # Parse options from POD using all default values. # ############################################################################ -parse_options "$T_LIB_DIR/samples/bash/po001.sh" "" 2>$TMPFILE +parse_options "$T_LIB_DIR/samples/bash/po001.sh" 2>$TMPFILE is "`cat $TMPFILE`" "" "No warnings or errors" @@ -38,6 +38,8 @@ is "$OPT_NOPTION" "yes" "Default neg option (spec)" is "$OPT_INT_OPT" "50" "Specified int option (spec)" is "$OPT_INT_OPT2" "42" "Default int option with default (spec)" is "$OPT_VERSION" "" "--version (spec)" +is "$ARGV" "" "ARGV" +is "$EXT_ARGV" "" "External ARGV" # ############################################################################ # --option=value should work like --option value. @@ -56,7 +58,7 @@ parse_options "$T_LIB_DIR/samples/bash/po001.sh" --no-noption is "$OPT_STRING_OPT" "" "Default string option (neg)" is "$OPT_STRING_OPT2" "foo" "Default string option with default (neg)" is "$OPT_TYPELESS_OPTION" "" "Default typeless option (neg)" -is "$OPT_NOPTION" "no" "Negated option (neg)" +is "$OPT_NOPTION" "" "Negated option (neg)" is "$OPT_INT_OPT" "" "Default int option (neg)" is "$OPT_INT_OPT2" "42" "Default int option with default (neg)" is "$OPT_VERSION" "" "--version (neg)" @@ -103,7 +105,7 @@ cmd_ok \ TOOL="pt-test" cp "$T_LIB_DIR/samples/bash/config001.conf" "$HOME/.$TOOL.conf" -parse_options "$T_LIB_DIR/samples/bash/po001.sh" "" +parse_options "$T_LIB_DIR/samples/bash/po001.sh" is "$OPT_STRING_OPT" "abc" "Default string option (conf)" is "$OPT_STRING_OPT2" "foo" "Default string option with default (conf)" @@ -112,16 +114,32 @@ is "$OPT_NOPTION" "yes" "Default neg option (conf)" is "$OPT_INT_OPT" "" "Default int option (conf)" is "$OPT_INT_OPT2" "42" "Default int option with default (conf)" is "$OPT_VERSION" "" "--version (conf)" -is "$EXT_ARGV" "--host 127.1 --user daniel" "External ARGV (conf)" +is "$ARGV" "" "ARGV (conf)" +is "$EXT_ARGV" "--host=127.1 --user=daniel" "External ARGV (conf)" # Command line should override config file. parse_options "$T_LIB_DIR/samples/bash/po001.sh" --string-opt zzz is "$OPT_STRING_OPT" "zzz" "Command line overrides config file" +# ############################################################################ +# Option values with spaces. +# ############################################################################ + +# Config file +cp "$T_LIB_DIR/samples/bash/config002.conf" "$HOME/.$TOOL.conf" +parse_options "$T_LIB_DIR/samples/bash/po001.sh" "" +is "$OPT_STRING_OPT" "hello world" "Option value with space (conf)" + rm "$HOME/.$TOOL.conf" TOOL="pt-stalk" +# Command line +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --string-opt "hello world" +is "$OPT_STRING_OPT" "hello world" "Option value with space (cmd line)" +is "$ARGV" "" "ARGV (cmd line)" +is "$EXT_ARGV" "" "External ARGV (cmd line)" + # ############################################################################ # Done # ############################################################################ diff --git a/t/lib/samples/bash/config002.conf b/t/lib/samples/bash/config002.conf new file mode 100644 index 00000000..0321a4d3 --- /dev/null +++ b/t/lib/samples/bash/config002.conf @@ -0,0 +1 @@ +--string-opt "hello world" From 0d348ce28965b0c4d54c238e46243962048bffc0 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 23 Jan 2012 10:30:42 -0700 Subject: [PATCH 40/71] Make parse_options work with everything: cmd line, config files, values with spaces, op val, op=val, etc. --- docs/percona-toolkit.pod | 24 ++++++++++-- lib/bash/parse_options.sh | 61 +++++++++++++++++++++++++++---- t/lib/bash/parse_options.sh | 9 ++++- t/lib/samples/bash/config001.conf | 4 +- t/lib/samples/bash/config002.conf | 6 ++- 5 files changed, 89 insertions(+), 15 deletions(-) diff --git a/docs/percona-toolkit.pod b/docs/percona-toolkit.pod index 42433fb7..3a677489 100644 --- a/docs/percona-toolkit.pod +++ b/docs/percona-toolkit.pod @@ -182,8 +182,8 @@ The syntax of the configuration files is as follows: =item * -Whitespace followed by a hash (#) sign signifies that the rest of the line is a -comment. This is deleted. +Whitespace followed by a hash sign (#) signifies that the rest of the line is a +comment. This is deleted. For example: =item * @@ -200,7 +200,9 @@ Each line is permitted to be in either of the following formats: option option=value -Whitespace around the equals sign is deleted during processing. +Do not prefix the option with C<-->. Do not quote the values, even if +it has spaces; value are literal. Whitespace around the equals sign is +deleted during processing. =item * @@ -214,6 +216,22 @@ program. =back +=head2 EXAMPLE + +This config file for pt-stalk, + + # Config for pt-stalk + variable=Threads_connected + cycles=2 # trigger if problem seen twice in a row + -- + --user daniel + +is equivalent to this command line: + + pt-stalk --variable Threads_connected --cycles 2 -- --user daniel + +Options after C<--> are passed literally to mysql and mysqladmin. + =head2 READ ORDER The tools read several configuration files in order: diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index a9286d6a..2c8e9290 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -26,6 +26,24 @@ # GLOBAL $TMPDIR AND $TOOL MUST BE SET BEFORE USING THIS LIB! # *********************************************************** +# Parsing command line options with Bash is easy until we have to dealt +# with values that have spaces, e.g. --option="hello world". This is +# further complicated by command line vs. config file. From the command +# line, <--option "hello world"> is put into $@ as "--option", "hello world", +# i.e. 2 args. From a config file, is either 2 args +# split on the space, or 1 arg as a whole line. It needs to be 2 args +# split on the = but this isn't possible; see the note before while read +# in _parse_config_files(). Perl tool config files do not work when the +# value is quoted, so we can't quote it either. And in any case, that +# wouldn't work because then the value would include the literal quotes +# because it's a line from a file, not a command line where Bash will +# interpret the quotes and return a single value in the code. So... + +# *************************************************** +# BE CAREFUL MAKING CHANGES TO THIS LIB AND MAKE SURE +# t/lib/bash/parse_options.sh STILL PASSES! +# *************************************************** + set -u # Global variables. These must be global because declare inside a @@ -231,25 +249,52 @@ _eval_po() { } _parse_config_files() { - local config_files="/etc/percona-toolkit/percona-toolkit.conf /etc/percona-toolkit/$TOOL.conf $HOME/.percona-toolkit.conf $HOME/.$TOOL.conf" - for config_file in $config_files; do + for config_file in "/etc/percona-toolkit/percona-toolkit.conf" "/etc/percona-toolkit/$TOOL.conf" "$HOME/.percona-toolkit.conf" "$HOME/.$TOOL.conf" + do + # Next config file if this one doesn't exist. test -f "$config_file" || continue + + # We've hit a -- in the config file, so just append everything + # else to EXT_ARGV. local dashdash="" - for conf_opt in $(grep '^[^# ]' "$config_file"); do + + # We must use while read because values can contain spaces. + # Else, if we for $(grep ...) then a line like "op=hello world" + # will return 2 values: "op=hello" and "world". If we quote + # the command like for "$(grep ...)" then the entire config + # file is returned as 1 value like "opt=hello world\nop2=42". + while read config_opt; do + + # Skip the line if it begins with a # or is blank. + echo "$config_opt" | grep '^[ ]*[^#]' >/dev/null 2>&1 || continue + + # Strip leading and trailing spaces, and spaces around the first =, + # and end-of-line # comments. + config_opt="$(echo "$config_opt" | sed -e 's/^[ ]*//' -e 's/[ ]*\$//' -e 's/[ ]*=[ ]*/=/' -e 's/[ ]*#.*$//')" + if [ "$dashdash" ]; then + # Previous line was -- so this and subsequent options are + # really external argvs. if [ "$EXT_ARGV" ]; then - EXT_ARGV="$EXT_ARGV $conf_opt" + EXT_ARGV="$EXT_ARGV $config_opt" else - EXT_ARGV="$conf_opt" + EXT_ARGV="$config_opt" fi else - _parse_command_line "$conf_opt" + # Options in a config file are not prefixed with --, + # but command line options are, so one or the other has + # to add or remove the -- prefix. We add it for config + # files rather than trying to strip it from command line + # options because it's a simpler operation here. + _parse_command_line "--$config_opt" + + # _parse_command_line() returns 1 when it sees --. if [ $? -eq 1 ]; then dashdash=1 EXT_ARGV="" fi fi - done + done < "$config_file" done } @@ -283,7 +328,7 @@ _parse_command_line() { val="$opt" opt_is_ok=1 else - if [ "$opt" = "--" ]; then + if [ "$opt" = "--" -o "$opt" = "----" ]; then EXT_ARGV="$@" return 1 fi diff --git a/t/lib/bash/parse_options.sh b/t/lib/bash/parse_options.sh index 9938dc73..4a734533 100644 --- a/t/lib/bash/parse_options.sh +++ b/t/lib/bash/parse_options.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -TESTS=44 +TESTS=46 TMPFILE="$TEST_TMPDIR/parse-opts-output" TOOL="pt-stalk" @@ -49,6 +49,10 @@ parse_options "$T_LIB_DIR/samples/bash/po001.sh" --int-opt=42 is "$OPT_INT_OPT" "42" "Specified int option (--option=value)" +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --string-opt="hello world" + +is "$OPT_STRING_OPT" "hello world" "Specified int option (--option=\"value\")" + # ############################################################################ # Negate an option like --no-option. # ############################################################################ @@ -128,8 +132,11 @@ is "$OPT_STRING_OPT" "zzz" "Command line overrides config file" # Config file cp "$T_LIB_DIR/samples/bash/config002.conf" "$HOME/.$TOOL.conf" + parse_options "$T_LIB_DIR/samples/bash/po001.sh" "" + is "$OPT_STRING_OPT" "hello world" "Option value with space (conf)" +is "$OPT_INT_OPT" "100" "Option = value # comment (conf)" rm "$HOME/.$TOOL.conf" TOOL="pt-stalk" diff --git a/t/lib/samples/bash/config001.conf b/t/lib/samples/bash/config001.conf index 7681362d..f68cf974 100644 --- a/t/lib/samples/bash/config001.conf +++ b/t/lib/samples/bash/config001.conf @@ -1,5 +1,5 @@ ---string-opt=abc ---typeless-option +string-opt=abc +typeless-option -- --host=127.1 --user=daniel diff --git a/t/lib/samples/bash/config002.conf b/t/lib/samples/bash/config002.conf index 0321a4d3..d4a76af5 100644 --- a/t/lib/samples/bash/config002.conf +++ b/t/lib/samples/bash/config002.conf @@ -1 +1,5 @@ ---string-opt "hello world" +# Line comment. +string-opt=hello world + + +int-opt = 100 # Inline comment. From 034f76d77e147dac11c367a58e0e94c9dd068477 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 23 Jan 2012 10:53:41 -0700 Subject: [PATCH 41/71] Fix handling EXT_ARGV. --- lib/bash/parse_options.sh | 58 ++++++++++++++++++------------------- t/lib/bash/parse_options.sh | 12 +++++++- 2 files changed, 39 insertions(+), 31 deletions(-) diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index 2c8e9290..0576bcd7 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -50,6 +50,7 @@ set -u # sub will be scoped locally. ARGV="" # Non-option args (probably input files) EXT_ARGV="" # Everything after -- (args for an external command) +HAVE_EXT_ARGV="" # Got --, everything else is put into EXT_ARGV OPT_ERRS=0 # How many command line option errors OPT_VERSION="no" # If --version was specified OPT_HELP="no" # If --help was specified @@ -129,6 +130,7 @@ parse_options() { # Reset the globals (mostly for testing). ARGV="" EXT_ARGV="" + HAVE_EXT_ARGV="" OPT_ERRS=0 OPT_VERSION="no" OPT_HELP="no" @@ -254,10 +256,6 @@ _parse_config_files() { # Next config file if this one doesn't exist. test -f "$config_file" || continue - # We've hit a -- in the config file, so just append everything - # else to EXT_ARGV. - local dashdash="" - # We must use while read because values can contain spaces. # Else, if we for $(grep ...) then a line like "op=hello world" # will return 2 values: "op=hello" and "world". If we quote @@ -272,29 +270,19 @@ _parse_config_files() { # and end-of-line # comments. config_opt="$(echo "$config_opt" | sed -e 's/^[ ]*//' -e 's/[ ]*\$//' -e 's/[ ]*=[ ]*/=/' -e 's/[ ]*#.*$//')" - if [ "$dashdash" ]; then - # Previous line was -- so this and subsequent options are - # really external argvs. - if [ "$EXT_ARGV" ]; then - EXT_ARGV="$EXT_ARGV $config_opt" - else - EXT_ARGV="$config_opt" - fi - else - # Options in a config file are not prefixed with --, - # but command line options are, so one or the other has - # to add or remove the -- prefix. We add it for config - # files rather than trying to strip it from command line - # options because it's a simpler operation here. - _parse_command_line "--$config_opt" - - # _parse_command_line() returns 1 when it sees --. - if [ $? -eq 1 ]; then - dashdash=1 - EXT_ARGV="" - fi + # Options in a config file are not prefixed with --, + # but command line options are, so one or the other has + # to add or remove the -- prefix. We add it for config + # files rather than trying to strip it from command line + # options because it's a simpler operation here. + if ! [ "$HAVE_EXT_ARGV" ]; then + config_opt="--$config_opt" fi + _parse_command_line "$config_opt" done < "$config_file" + + HAVE_EXT_ARGV="" # reset for each file + done } @@ -318,6 +306,21 @@ _parse_command_line() { local required_arg="" for opt in "$@"; do + if [ "$opt" = "--" -o "$opt" = "----" ]; then + HAVE_EXT_ARGV=1 + continue + fi + if [ "$HAVE_EXT_ARGV" ]; then + # Previous line was -- so this and subsequent options are + # really external argvs. + if [ "$EXT_ARGV" ]; then + EXT_ARGV="$EXT_ARGV $opt" + else + EXT_ARGV="$opt" + fi + continue + fi + if [ "$next_opt_is_val" ]; then next_opt_is_val="" if [ $# -eq 0 ] || [ $(expr "$opt" : "-") -eq 1 ]; then @@ -328,11 +331,6 @@ _parse_command_line() { val="$opt" opt_is_ok=1 else - if [ "$opt" = "--" -o "$opt" = "----" ]; then - EXT_ARGV="$@" - return 1 - fi - # If option does not begin with a hyphen (-), it's a filename, etc. if [ $(expr "$opt" : "-") -eq 0 ]; then if [ -z "$ARGV" ]; then diff --git a/t/lib/bash/parse_options.sh b/t/lib/bash/parse_options.sh index 4a734533..e60ead5a 100644 --- a/t/lib/bash/parse_options.sh +++ b/t/lib/bash/parse_options.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -TESTS=46 +TESTS=49 TMPFILE="$TEST_TMPDIR/parse-opts-output" TOOL="pt-stalk" @@ -74,6 +74,16 @@ is "$OPT_VERSION" "" "--version (neg)" parse_options "$T_LIB_DIR/samples/bash/po001.sh" -v is "$OPT_VERSION" "yes" "Short form" +# ############################################################################ +# Command line options plus externals args. +# ############################################################################ + +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --no-noption -- --foo + +is "$OPT_NOPTION" "" "Negated option (--)" +is "$ARGV" "" "ARGV (--)" +is "$EXT_ARGV" "--foo" "External ARGV (--)" + # ############################################################################ # An unknown option should produce an error. # ############################################################################ From cab3bb6eab5ee415821007d0677422934c2a1570 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 23 Jan 2012 10:54:43 -0700 Subject: [PATCH 42/71] Update parse_options.sh in pt-stalk. --- bin/pt-stalk | 281 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 183 insertions(+), 98 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 1968cb85..1c4fdbbd 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -49,21 +49,26 @@ die() { # ########################################################################### + + + set -u -ARGV="" # Non-option args (probably input files) -EXT_ARGV="" # Everything after -- (args for an external command) -OPT_ERRS=0 # How many command line option errors -OPT_VERSION="no" # If --version was specified -OPT_HELP="no" # If --help was specified +ARGV="" # Non-option args (probably input files) +EXT_ARGV="" # Everything after -- (args for an external command) +HAVE_EXT_ARGV="" # Got --, everything else is put into EXT_ARGV +OPT_ERRS=0 # How many command line option errors +OPT_VERSION="no" # If --version was specified +OPT_HELP="no" # If --help was specified +PO_DIR="$TMPDIR/po" # Directory with program option spec files usage() { local file="$1" local usage=$(grep '^Usage: ' "$file") - echo $usage >&2 - echo >&2 - echo "For more information, 'man $TOOL' or 'perldoc $file'." >&2 + echo $usage + echo + echo "For more information, 'man $TOOL' or 'perldoc $file'." } usage_or_errors() { @@ -77,20 +82,20 @@ usage_or_errors() { if [ "$OPT_HELP" = "yes" ]; then usage "$file" - echo >&2 - echo "Command line options:" >&2 - echo >&2 + echo + echo "Command line options:" + echo for opt in $(ls $TMPDIR/po/); do local desc=$(cat $TMPDIR/po/$opt | grep '^desc:' | sed -e 's/^desc://') - echo "--$opt" >&2 - echo " $desc" >&2 - echo >&2 + echo "--$opt" + echo " $desc" + echo done return 1 fi if [ $OPT_ERRS -gt 0 ]; then - echo >&2 + echo usage "$file" return 1 fi @@ -102,54 +107,70 @@ parse_options() { local file="$1" shift - if [ ! -d "$TMPDIR/po/" ]; then - mkdir "$TMPDIR/po/" + ARGV="" + EXT_ARGV="" + HAVE_EXT_ARGV="" + OPT_ERRS=0 + OPT_VERSION="no" + OPT_HELP="no" + PO_DIR="$TMPDIR/po" + + if [ ! -d "$PO_DIR" ]; then + mkdir "$PO_DIR" if [ $? -ne 0 ]; then - echo "Cannot mkdir $TMPDIR/po/" >&2 + echo "Cannot mkdir $PO_DIR" >&2 exit 1 fi fi - rm -rf "$TMPDIR"/po/* + rm -rf "$PO_DIR"/* if [ $? -ne 0 ]; then - echo "Cannot rm -rf $TMPDIR/po/*" >&2 + echo "Cannot rm -rf $PO_DIR/*" >&2 exit 1 fi - - ( - export PO_DIR="$TMPDIR/po" - cat "$file" | perl -ne ' - BEGIN { $/ = ""; } - next unless $_ =~ m/^=head1 OPTIONS/; - while ( defined(my $para = <>) ) { - last if $para =~ m/^=head1/; + + _parse_pod "$file" + _eval_po + _parse_config_files + _parse_command_line "$@" +} + +_parse_pod() { + local file="$1" + + cat "$file" | PO_DIR="$PO_DIR" perl -ne ' + BEGIN { $/ = ""; } + next unless $_ =~ m/^=head1 OPTIONS/; + while ( defined(my $para = <>) ) { + last if $para =~ m/^=head1/; + chomp; + if ( $para =~ m/^=item --(\S+)/ ) { + my $opt = $1; + my $file = "$ENV{PO_DIR}/$opt"; + open my $opt_fh, ">", $file or die "Cannot open $file: $!"; + print $opt_fh "long:$opt\n"; + $para = <>; chomp; - if ( $para =~ m/^=item --(\S+)/ ) { - my $opt = $1; - my $file = "$ENV{PO_DIR}/$opt"; - open my $opt_fh, ">", $file or die "Cannot open $file: $!"; - print $opt_fh "long:$opt\n"; + if ( $para =~ m/^[a-z ]+:/ ) { + map { + chomp; + my ($attrib, $val) = split(/: /, $_); + print $opt_fh "$attrib:$val\n"; + } split(/; /, $para); $para = <>; chomp; - if ( $para =~ m/^[a-z ]+:/ ) { - map { - chomp; - my ($attrib, $val) = split(/: /, $_); - print $opt_fh "$attrib:$val\n"; - } split(/; /, $para); - $para = <>; - chomp; - } - my ($desc) = $para =~ m/^([^?.]+)/; - print $opt_fh "desc:$desc.\n"; - close $opt_fh; } + my ($desc) = $para =~ m/^([^?.]+)/; + print $opt_fh "desc:$desc.\n"; + close $opt_fh; } - last; - ' - ) + } + last; + ' +} - for opt_spec in $(ls "$TMPDIR/po/"); do +_eval_po() { + for opt_spec in $(ls "$PO_DIR"); do local opt="" local default_val="" local neg=0 @@ -175,13 +196,13 @@ parse_options() { fi ;; *) - echo "Invalid attribute in $TMPDIR/po/$opt_spec: $line" >&2 + echo "Invalid attribute in $PO_DIR/$opt_spec: $line" >&2 exit 1 esac - done < "$TMPDIR/po/$opt_spec" + done < "$PO_DIR/$opt_spec" if [ -z "$opt" ]; then - echo "No long attribute in option spec $TMPDIR/po/$opt_spec" >&2 + echo "No long attribute in option spec $PO_DIR/$opt_spec" >&2 exit 1 fi @@ -194,69 +215,133 @@ parse_options() { eval "OPT_${opt}"="$default_val" done +} - for opt; do - if [ $# -eq 0 ]; then - break # no more opts +_parse_config_files() { + for config_file in "/etc/percona-toolkit/percona-toolkit.conf" "/etc/percona-toolkit/$TOOL.conf" "$HOME/.percona-toolkit.conf" "$HOME/.$TOOL.conf" + do + test -f "$config_file" || continue + + while read config_opt; do + + echo "$config_opt" | grep '^[ ]*[^#]' >/dev/null 2>&1 || continue + + config_opt="$(echo "$config_opt" | sed -e 's/^[ ]*//' -e 's/[ ]*\$//' -e 's/[ ]*=[ ]*/=/' -e 's/[ ]*#.*$//')" + + if ! [ "$HAVE_EXT_ARGV" ]; then + config_opt="--$config_opt" + fi + _parse_command_line "$config_opt" + done < "$config_file" + + HAVE_EXT_ARGV="" # reset for each file + + done +} + +_parse_command_line() { + local opt="" + local val="" + local next_opt_is_val="" + local opt_is_ok="" + local opt_is_negated="" + local real_opt="" + local required_arg="" + + for opt in "$@"; do + if [ "$opt" = "--" -o "$opt" = "----" ]; then + HAVE_EXT_ARGV=1 + continue fi - opt=$1 - if [ "$opt" = "--" ]; then - shift - EXT_ARGV="$@" - break - fi - shift - if [ $(expr "$opt" : "-") -eq 0 ]; then - if [ -z "$ARGV" ]; then - ARGV="$opt" + if [ "$HAVE_EXT_ARGV" ]; then + if [ "$EXT_ARGV" ]; then + EXT_ARGV="$EXT_ARGV $opt" else - ARGV="$ARGV $opt" + EXT_ARGV="$opt" fi continue fi - local real_opt="$opt" - - if $(echo $opt | grep -q '^--no-'); then - neg=1 - opt=$(echo $opt | sed 's/^--no-//') - else - neg=0 - opt=$(echo $opt | sed 's/^-*//') - fi - - if [ -f "$TMPDIR/po/$opt" ]; then - spec="$TMPDIR/po/$opt" - else - spec=$(grep "^short form:-$opt\$" "$TMPDIR"/po/* | cut -d ':' -f 1) - if [ -z "$spec" ]; then - OPT_ERRS=$(($OPT_ERRS + 1)) - echo "Unknown option: $real_opt" >&2 - continue - fi - fi - - local required_arg=$(cat $spec | awk -F: '/^type:/{print $2}') - if [ -n "$required_arg" ]; then - if [ $# -eq 0 ]; then + if [ "$next_opt_is_val" ]; then + next_opt_is_val="" + if [ $# -eq 0 ] || [ $(expr "$opt" : "-") -eq 1 ]; then OPT_ERRS=$(($OPT_ERRS + 1)) echo "$real_opt requires a $required_arg argument" >&2 continue - else - val="$1" - shift fi + val="$opt" + opt_is_ok=1 else - if [ $neg -eq 0 ]; then - val="yes" + if [ $(expr "$opt" : "-") -eq 0 ]; then + if [ -z "$ARGV" ]; then + ARGV="$opt" + else + ARGV="$ARGV $opt" + fi + continue + fi + + real_opt="$opt" + + if $(echo $opt | grep -q '^--no-'); then + opt_is_negated=1 + opt=$(echo $opt | sed 's/^--no-//') else - val="no" + opt_is_negated="" + opt=$(echo $opt | sed 's/^-*//') + fi + + if $(echo $opt | grep '^[a-z-][a-z-]*=' >/dev/null 2>&1); then + val="$(echo $opt | awk -F= '{print $2}')" + opt="$(echo $opt | awk -F= '{print $1}')" + fi + + if [ -f "$TMPDIR/po/$opt" ]; then + spec="$TMPDIR/po/$opt" + else + spec=$(grep "^short form:-$opt\$" "$TMPDIR"/po/* | cut -d ':' -f 1) + if [ -z "$spec" ]; then + OPT_ERRS=$(($OPT_ERRS + 1)) + echo "Unknown option: $real_opt" >&2 + continue + fi + fi + + required_arg=$(cat "$spec" | awk -F: '/^type:/{print $2}') + if [ "$required_arg" ]; then + if [ "$val" ]; then + opt_is_ok=1 + else + next_opt_is_val=1 + fi + else + if [ "$val" ]; then + OPT_ERRS=$(($OPT_ERRS + 1)) + echo "Option $real_opt does not take a value" >&2 + continue + fi + if [ "$opt_is_negated" ]; then + val="" + else + val="yes" + fi + opt_is_ok=1 fi fi - opt=$(cat $spec | grep '^long:' | cut -d':' -f2 | sed 's/-/_/g' | tr [:lower:] [:upper:]) + if [ "$opt_is_ok" ]; then + opt=$(cat "$spec" | grep '^long:' | cut -d':' -f2 | sed 's/-/_/g' | tr [:lower:] [:upper:]) - eval "OPT_$opt"="$val" + eval "OPT_$opt"="'$val'" + + opt="" + val="" + next_opt_is_val="" + opt_is_ok="" + opt_is_negated="" + real_opt="" + required_arg="" + fi done } From 4905e3503c569c8ed6b4a1d815098920ae3818d9 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 24 Jan 2012 11:18:41 -0700 Subject: [PATCH 43/71] Handle --config FILE[,FILE,...] in parse_options.sh. --- lib/bash/parse_options.sh | 69 +++++++++++++++++++++---------- t/lib/bash/parse_options.sh | 24 ++++++++++- t/lib/samples/bash/config003.conf | 2 + 3 files changed, 72 insertions(+), 23 deletions(-) create mode 100644 t/lib/samples/bash/config003.conf diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index 0576bcd7..9a71034a 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -1,4 +1,4 @@ -# This program is copyright 2011 Percona Inc. +# This program is copyright 2011-2012 Percona Inc. # Feedback and improvements are welcome. # # THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED @@ -22,9 +22,9 @@ # parse_options parses Perl POD options from Bash tools and creates # global variables for each option. -# *********************************************************** +# XXX # GLOBAL $TMPDIR AND $TOOL MUST BE SET BEFORE USING THIS LIB! -# *********************************************************** +# XXX # Parsing command line options with Bash is easy until we have to dealt # with values that have spaces, e.g. --option="hello world". This is @@ -39,21 +39,21 @@ # because it's a line from a file, not a command line where Bash will # interpret the quotes and return a single value in the code. So... -# *************************************************** +# XXX # BE CAREFUL MAKING CHANGES TO THIS LIB AND MAKE SURE # t/lib/bash/parse_options.sh STILL PASSES! -# *************************************************** +# XXX set -u # Global variables. These must be global because declare inside a # sub will be scoped locally. -ARGV="" # Non-option args (probably input files) -EXT_ARGV="" # Everything after -- (args for an external command) -HAVE_EXT_ARGV="" # Got --, everything else is put into EXT_ARGV -OPT_ERRS=0 # How many command line option errors -OPT_VERSION="no" # If --version was specified -OPT_HELP="no" # If --help was specified +ARGV="" # Non-option args (probably input files) +EXT_ARGV="" # Everything after -- (args for an external command) +HAVE_EXT_ARGV="" # Got --, everything else is put into EXT_ARGV +OPT_ERRS=0 # How many command line option errors +OPT_VERSION="" # If --version was specified +OPT_HELP="" # If --help was specified PO_DIR="$TMPDIR/po" # Directory with program option spec files # Sub: usage @@ -80,13 +80,13 @@ usage() { usage_or_errors() { local file="$1" - if [ "$OPT_VERSION" = "yes" ]; then + if [ "$OPT_VERSION" ]; then local version=$(grep '^pt-[^ ]\+ [0-9]' "$file") echo "$version" return 1 fi - if [ "$OPT_HELP" = "yes" ]; then + if [ "$OPT_HELP" ]; then usage "$file" echo echo "Command line options:" @@ -127,15 +127,18 @@ parse_options() { local file="$1" shift - # Reset the globals (mostly for testing). + # XXX + # Reset all globals else t/lib/bash/parse_options.sh will fail. + # XXX ARGV="" EXT_ARGV="" HAVE_EXT_ARGV="" OPT_ERRS=0 - OPT_VERSION="no" - OPT_HELP="no" + OPT_VERSION="" + OPT_HELP="" PO_DIR="$TMPDIR/po" + # Ready the directory for the program option (po) spec files. if [ ! -d "$PO_DIR" ]; then mkdir "$PO_DIR" if [ $? -ne 0 ]; then @@ -150,9 +153,26 @@ parse_options() { exit 1 fi - _parse_pod "$file" - _eval_po - _parse_config_files + _parse_pod "$file" # Parse POD into program option (po) spec files + _eval_po # Eval po into existence with default values + + # If the first option is --config FILES, then remove it and use + # those files instead of the default config files. + if [ $# -ge 2 ] && [ "$1" = "--config" ]; then + shift # --config + local user_config_files="$1" + shift # that ^ + local old_ifs="$IFS" + IFS="," + for user_config_file in $user_config_files; do + _parse_config_files "$user_config_file" + done + IFS="$old_ifs" + else + _parse_config_files "/etc/percona-toolkit/percona-toolkit.conf" "/etc/percona-toolkit/$TOOL.conf" "$HOME/.percona-toolkit.conf" "$HOME/.$TOOL.conf" + fi + + # Finally, parse the command line. _parse_command_line "$@" } @@ -251,8 +271,8 @@ _eval_po() { } _parse_config_files() { - for config_file in "/etc/percona-toolkit/percona-toolkit.conf" "/etc/percona-toolkit/$TOOL.conf" "$HOME/.percona-toolkit.conf" "$HOME/.$TOOL.conf" - do + + for config_file in "$@"; do # Next config file if this one doesn't exist. test -f "$config_file" || continue @@ -270,6 +290,9 @@ _parse_config_files() { # and end-of-line # comments. config_opt="$(echo "$config_opt" | sed -e 's/^[ ]*//' -e 's/[ ]*\$//' -e 's/[ ]*=[ ]*/=/' -e 's/[ ]*#.*$//')" + # Skip blank lines. + [ "$config_opt" = "" ] && continue + # Options in a config file are not prefixed with --, # but command line options are, so one or the other has # to add or remove the -- prefix. We add it for config @@ -278,9 +301,11 @@ _parse_config_files() { if ! [ "$HAVE_EXT_ARGV" ]; then config_opt="--$config_opt" fi + _parse_command_line "$config_opt" + done < "$config_file" - + HAVE_EXT_ARGV="" # reset for each file done diff --git a/t/lib/bash/parse_options.sh b/t/lib/bash/parse_options.sh index e60ead5a..8ecfe69d 100644 --- a/t/lib/bash/parse_options.sh +++ b/t/lib/bash/parse_options.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -TESTS=49 +TESTS=63 TMPFILE="$TEST_TMPDIR/parse-opts-output" TOOL="pt-stalk" @@ -136,6 +136,28 @@ parse_options "$T_LIB_DIR/samples/bash/po001.sh" --string-opt zzz is "$OPT_STRING_OPT" "zzz" "Command line overrides config file" +# User-specified --config +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --config "$T_LIB_DIR/samples/bash/config003.conf" --string-opt bar + +is "$OPT_STRING_OPT" "bar" "--config string option" +is "$OPT_STRING_OPT2" "foo" "--config string option2" +is "$OPT_TYPELESS_OPTION" "" "--config typeless option" +is "$OPT_NOPTION" "yes" "--config negatable option" +is "$OPT_INT_OPT" "123" "--config int option" +is "$OPT_INT_OPT2" "42" "--config int option2" +is "$OPT_VERSION" "" "--config version option" +is "$ARGV" "" "--config ARGV" +is "$EXT_ARGV" "" "--config External ARGV" + +# Multiple --config files, last should take precedence. +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --config $T_LIB_DIR/samples/bash/config001.conf,$T_LIB_DIR/samples/bash/config002.conf + +is "$OPT_STRING_OPT" "hello world" "Two --config string option" +is "$OPT_TYPELESS_OPTION" "yes" "Two --config typeless option" +is "$OPT_INT_OPT" "100" "Two --config int option" +is "$ARGV" "" "Two --config ARGV" +is "$EXT_ARGV" "--host=127.1 --user=daniel" "Two--config External ARGV" + # ############################################################################ # Option values with spaces. # ############################################################################ diff --git a/t/lib/samples/bash/config003.conf b/t/lib/samples/bash/config003.conf new file mode 100644 index 00000000..0ac5a1d5 --- /dev/null +++ b/t/lib/samples/bash/config003.conf @@ -0,0 +1,2 @@ +string-opt=from config file +int-opt=123 From 3c97ae27d1c8abefe64fcea0341b54950e67155f Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 24 Jan 2012 11:50:48 -0700 Subject: [PATCH 44/71] Add and test --config to pt-stalk. --- bin/pt-stalk | 57 ++++++++++++++++++++++--------- t/pt-stalk/pt-stalk.t | 31 ++++++++++++++++- t/pt-stalk/samples/config001.conf | 8 +++++ 3 files changed, 79 insertions(+), 17 deletions(-) create mode 100644 t/pt-stalk/samples/config001.conf diff --git a/bin/pt-stalk b/bin/pt-stalk index 1c4fdbbd..2f0076a2 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -54,12 +54,12 @@ die() { set -u -ARGV="" # Non-option args (probably input files) -EXT_ARGV="" # Everything after -- (args for an external command) -HAVE_EXT_ARGV="" # Got --, everything else is put into EXT_ARGV -OPT_ERRS=0 # How many command line option errors -OPT_VERSION="no" # If --version was specified -OPT_HELP="no" # If --help was specified +ARGV="" # Non-option args (probably input files) +EXT_ARGV="" # Everything after -- (args for an external command) +HAVE_EXT_ARGV="" # Got --, everything else is put into EXT_ARGV +OPT_ERRS=0 # How many command line option errors +OPT_VERSION="" # If --version was specified +OPT_HELP="" # If --help was specified PO_DIR="$TMPDIR/po" # Directory with program option spec files usage() { @@ -74,13 +74,13 @@ usage() { usage_or_errors() { local file="$1" - if [ "$OPT_VERSION" = "yes" ]; then + if [ "$OPT_VERSION" ]; then local version=$(grep '^pt-[^ ]\+ [0-9]' "$file") echo "$version" return 1 fi - if [ "$OPT_HELP" = "yes" ]; then + if [ "$OPT_HELP" ]; then usage "$file" echo echo "Command line options:" @@ -111,8 +111,8 @@ parse_options() { EXT_ARGV="" HAVE_EXT_ARGV="" OPT_ERRS=0 - OPT_VERSION="no" - OPT_HELP="no" + OPT_VERSION="" + OPT_HELP="" PO_DIR="$TMPDIR/po" if [ ! -d "$PO_DIR" ]; then @@ -129,9 +129,23 @@ parse_options() { exit 1 fi - _parse_pod "$file" - _eval_po - _parse_config_files + _parse_pod "$file" # Parse POD into program option (po) spec files + _eval_po # Eval po into existence with default values + + if [ $# -ge 2 ] && [ "$1" = "--config" ]; then + shift # --config + local user_config_files="$1" + shift # that ^ + local old_ifs="$IFS" + IFS="," + for user_config_file in $user_config_files; do + _parse_config_files "$user_config_file" + done + IFS="$old_ifs" + else + _parse_config_files "/etc/percona-toolkit/percona-toolkit.conf" "/etc/percona-toolkit/$TOOL.conf" "$HOME/.percona-toolkit.conf" "$HOME/.$TOOL.conf" + fi + _parse_command_line "$@" } @@ -218,8 +232,8 @@ _eval_po() { } _parse_config_files() { - for config_file in "/etc/percona-toolkit/percona-toolkit.conf" "/etc/percona-toolkit/$TOOL.conf" "$HOME/.percona-toolkit.conf" "$HOME/.$TOOL.conf" - do + + for config_file in "$@"; do test -f "$config_file" || continue while read config_opt; do @@ -228,12 +242,16 @@ _parse_config_files() { config_opt="$(echo "$config_opt" | sed -e 's/^[ ]*//' -e 's/[ ]*\$//' -e 's/[ ]*=[ ]*/=/' -e 's/[ ]*#.*$//')" + [ "$config_opt" = "" ] && continue + if ! [ "$HAVE_EXT_ARGV" ]; then config_opt="--$config_opt" fi + _parse_command_line "$config_opt" + done < "$config_file" - + HAVE_EXT_ARGV="" # reset for each file done @@ -1212,6 +1230,13 @@ Collect strace data. Collect tcpdump data. +=item --config + +type: string + +Read this comma-separated list of config files. If specified, this must be the +first option on the command line. + =item --cycles type: int; default: 5 diff --git a/t/pt-stalk/pt-stalk.t b/t/pt-stalk/pt-stalk.t index d1ac37a2..de18249a 100644 --- a/t/pt-stalk/pt-stalk.t +++ b/t/pt-stalk/pt-stalk.t @@ -10,6 +10,7 @@ use strict; use warnings FATAL => 'all'; use English qw(-no_match_vars); use Test::More; +use Time::HiRes qw(sleep); use PerconaTest; use DSNParser; @@ -23,7 +24,7 @@ if ( !$dbh ) { plan skip_all => 'Cannot connect to sandbox master'; } else { - plan tests => 15; + plan tests => 17; } my $cnf = "/tmp/12345/my.sandbox.cnf"; @@ -168,6 +169,34 @@ like( "Trigger file logs how pt-stalk was ran" ); + +# ############################################################################# +# --config +# ############################################################################# + +diag(`cp $ENV{HOME}/.pt-stalk.conf $ENV{HOME}/.pt-stalk.conf.original 2>/dev/null`); +diag(`cp $trunk/t/pt-stalk/samples/config001.conf $ENV{HOME}/.pt-stalk.conf`); + +system "$trunk/bin/pt-stalk --dest $dest --pid $pid_file >$log_file 2>&1 &"; +sleep 1; +chomp($pid = `cat $pid_file`); +$retval = system("kill $pid 2>/dev/null"); +is( + $retval >> 0, + 0, + "Killed pt-stalk" +); + +$output = `cat $log_file`; +like( + $output, + qr/Check results: Aborted_connects=|variable=Aborted_connects/, + "Read default config file" +); + +diag(`rm $ENV{HOME}/.pt-stalk.conf`); +diag(`cp $ENV{HOME}/.pt-stalk.conf.original $ENV{HOME}/.pt-stalk.conf 2>/dev/null`); + # ############################################################################# # Done. # ############################################################################# diff --git a/t/pt-stalk/samples/config001.conf b/t/pt-stalk/samples/config001.conf new file mode 100644 index 00000000..c4ad24e8 --- /dev/null +++ b/t/pt-stalk/samples/config001.conf @@ -0,0 +1,8 @@ +--iterations=1 +--variable=Aborted_connects +--threshold=999999 +-- +-umsandbox +-pmsandbox +--host 127.1 +--port 12345 From c965d7c172b35a1b441f16d22680610be4ec1424 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 24 Jan 2012 12:07:42 -0700 Subject: [PATCH 45/71] Fix POD syntax error. --- bin/pt-stalk | 2 -- 1 file changed, 2 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 7b692fab..b742d071 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -1356,8 +1356,6 @@ options: --trigger processlist --variable State --match statistics --threshold 10 -=back - In addition, you can specify a file that contains your custom trigger function, written in Unix shell script. This can be a wrapper that executes anything you wish. If the argument to --function is a file, then it takes precedence over From 36c14f492a481720286659a96112d70573b1c4f5 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 24 Jan 2012 12:15:29 -0700 Subject: [PATCH 46/71] Don't use grep -q. --- bin/pt-stalk | 2 +- lib/bash/parse_options.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index b742d071..404187ed 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -301,7 +301,7 @@ _parse_command_line() { real_opt="$opt" - if $(echo $opt | grep -q '^--no-'); then + if $(echo $opt | grep '^--no-' >/dev/null); then opt_is_negated=1 opt=$(echo $opt | sed 's/^--no-//') else diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index 9a71034a..23443bb4 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -370,7 +370,7 @@ _parse_command_line() { real_opt="$opt" # Strip leading -- or --no- from option. - if $(echo $opt | grep -q '^--no-'); then + if $(echo $opt | grep '^--no-' >/dev/null); then opt_is_negated=1 opt=$(echo $opt | sed 's/^--no-//') else From 42a8e3963569ae581792aa1b57c4f2fa07b81057 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 24 Jan 2012 12:28:41 -0700 Subject: [PATCH 47/71] Use better Bash. --- lib/bash/parse_options.sh | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index 23443bb4..bfe3e489 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -222,13 +222,13 @@ _eval_po() { # Evaluate the program options into existence as global variables # transformed like --my-op == $OPT_MY_OP. If an option has a default # value, it's assigned that value. Else, it's value is an empty string. - for opt_spec in $(ls "$PO_DIR"); do + local old_ifs="$IFS" + IFS=":" + for opt_spec in "$PO_DIR"/*; do local opt="" local default_val="" local neg=0 - while read line; do - local key=$(echo $line | cut -d ':' -f 1) - local val=$(echo $line | cut -d ':' -f 2) + while read key val; do case "$key" in long) opt=$(echo $val | sed 's/-/_/g' | tr [:lower:] [:upper:]) @@ -248,13 +248,13 @@ _eval_po() { fi ;; *) - echo "Invalid attribute in $PO_DIR/$opt_spec: $line" >&2 + echo "Invalid attribute in $opt_spec: $line" >&2 exit 1 esac - done < "$PO_DIR/$opt_spec" + done < "$opt_spec" if [ -z "$opt" ]; then - echo "No long attribute in option spec $PO_DIR/$opt_spec" >&2 + echo "No long attribute in option spec $opt_spec" >&2 exit 1 fi @@ -268,6 +268,8 @@ _eval_po() { # Eval the option into existence as a global variable. eval "OPT_${opt}"="$default_val" done + + IFS="$old_ifs" } _parse_config_files() { From c2fd3f54c2d8becfa6513abefd5ad5e114f6e83e Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 24 Jan 2012 12:49:18 -0700 Subject: [PATCH 48/71] Use "yes" for true, "" for false. Use $PO_DIR instead of $TMP_DIR/po. Add and test Baron's code for 'Options and values after processing arguments'. Make --help exit 0 unless there were errors. --- bin/pt-stalk | 51 ++++++++++++++++++++++--------------- lib/bash/collect.sh | 10 ++++---- lib/bash/parse_options.sh | 10 +++++++- t/lib/bash/collect.sh | 1 + t/lib/bash/parse_options.sh | 6 ++++- 5 files changed, 51 insertions(+), 27 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 404187ed..c2748475 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -85,12 +85,20 @@ usage_or_errors() { echo echo "Command line options:" echo - for opt in $(ls $TMPDIR/po/); do + for opt in $(ls "$PO_DIR"); do local desc=$(cat $TMPDIR/po/$opt | grep '^desc:' | sed -e 's/^desc://') echo "--$opt" echo " $desc" echo done + echo "Options and values after processing arguments:" + echo + for opt in $(ls "$PO_DIR"); do + local varname="OPT_$(echo "$opt" | tr a-z- A-Z_)" + local varvalue="${!varname}" + printf -- " --%-30s %s" "$opt" "${varvalue:-(No value)}" + echo + done return 1 fi @@ -184,13 +192,13 @@ _parse_pod() { } _eval_po() { - for opt_spec in $(ls "$PO_DIR"); do + local old_ifs="$IFS" + IFS=":" + for opt_spec in "$PO_DIR"/*; do local opt="" local default_val="" local neg=0 - while read line; do - local key=$(echo $line | cut -d ':' -f 1) - local val=$(echo $line | cut -d ':' -f 2) + while read key val; do case "$key" in long) opt=$(echo $val | sed 's/-/_/g' | tr [:lower:] [:upper:]) @@ -210,13 +218,13 @@ _eval_po() { fi ;; *) - echo "Invalid attribute in $PO_DIR/$opt_spec: $line" >&2 + echo "Invalid attribute in $opt_spec: $line" >&2 exit 1 esac - done < "$PO_DIR/$opt_spec" + done < "$opt_spec" if [ -z "$opt" ]; then - echo "No long attribute in option spec $PO_DIR/$opt_spec" >&2 + echo "No long attribute in option spec $opt_spec" >&2 exit 1 fi @@ -229,6 +237,8 @@ _eval_po() { eval "OPT_${opt}"="$default_val" done + + IFS="$old_ifs" } _parse_config_files() { @@ -574,7 +584,7 @@ collect() { fi fi - if [ "$CMD_GDB" -a "$OPT_COLLECT_GDB" = "yes" -a "$mysqld_pid" ]; then + if [ "$CMD_GDB" -a "$OPT_COLLECT_GDB" -a "$mysqld_pid" ]; then $CMD_GDB \ -ex "set pagination 0" \ -ex "thread apply all bt" \ @@ -613,7 +623,7 @@ collect() { open_tables >> "$d/$p-opentables1" 2>&1 & local tcpdump_pid="" - if [ "$CMD_TCPDUMP" -a "$OPT_COLLECT_TCPDUMP" = "yes" ]; then + if [ "$CMD_TCPDUMP" -a "$OPT_COLLECT_TCPDUMP" ]; then local port=$(awk '/^port/{print $2}' "$d/$p-variables") if [ "$port" ]; then $CMD_TCPDUMP -i any -s 4096 -w "$d/$p-tcpdump" port ${port} & @@ -622,12 +632,12 @@ collect() { fi local have_oprofile="no" - if [ "$CMD_OPCONTROL" -a "$OPT_COLLECT_OPROFILE" = "yes" ]; then + if [ "$CMD_OPCONTROL" -a "$OPT_COLLECT_OPROFILE" ]; then if $CMD_OPCONTROL --init; then $CMD_OPCONTROL --start --no-vmlinux have_oprofile="yes" fi - elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" = "yes" ]; then + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" ]; then $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" 2>&1 & local strace_pid=$! fi @@ -726,7 +736,7 @@ collect() { "/path/to/mysqld'" \ > "$d/$p-opreport" fi - elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" = "yes" ]; then + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" ]; then kill -s 2 $strace_pid sleep 1 kill -s 15 $strace_pid @@ -931,7 +941,7 @@ sigtrap() { stalk() { local cycles_true=0 # increment each time check is true, else set to 0 - local matched="no" # set to "yes" when check is true + local matched="" # set to "yes" when check is true local last_prefix="" # prefix of last collection while oktorun; do @@ -944,20 +954,20 @@ stalk() { if [ -z "$value" ]; then # No value. Maybe we failed to connect to MySQL? warn "Detected value is empty; something failed? Trigger exit status: $trg_exit_status" - matched="no" + matched="" cycles_true=0 elif [ $value -gt $OPT_THRESHOLD ]; then matched="yes" cycles_true=$(($cycles_true + 1)) else - matched="no" + matched="" cycles_true=0 fi - local msg="Check results: $OPT_VARIABLE=$value, matched=$matched, cycles_true=$cycles_true" + local msg="Check results: $OPT_VARIABLE=$value, matched=${matched:-no}, cycles_true=$cycles_true" log "$msg" - if [ "$matched" = "yes" -a $cycles_true -ge $OPT_CYCLES ]; then + if [ "$matched" -a $cycles_true -ge $OPT_CYCLES ]; then # ################################################################## # Start collecting, maybe. # ################################################################## @@ -1089,7 +1099,8 @@ if [ "$(basename "$0")" = "pt-stalk" ] \ po_status=$? rm_tmpdir if [ $po_status -ne 0 ]; then - exit $po_status + [ $OPT_ERRS -gt 0 ] && exit 1 + exit 0 fi # Now that we have the cmd line opts, check that we can actually @@ -1097,7 +1108,7 @@ if [ "$(basename "$0")" = "pt-stalk" ] \ [ -n "$(mysql $EXT_ARGV -e 'SELECT 1')" ] \ || die "Cannot connect to MySQL. Check that MySQL is running and that the options after -- are correct." - if [ "$OPT_DAEMONIZE" = "yes" ]; then + if [ "$OPT_DAEMONIZE" ]; then # Check access to the --log file. ( set -e diff --git a/lib/bash/collect.sh b/lib/bash/collect.sh index 635de955..75f3d69d 100644 --- a/lib/bash/collect.sh +++ b/lib/bash/collect.sh @@ -61,7 +61,7 @@ collect() { # Getting a GDB stacktrace can be an intensive operation, # so do this only if necessary (and possible). - if [ "$CMD_GDB" -a "$OPT_COLLECT_GDB" = "yes" -a "$mysqld_pid" ]; then + if [ "$CMD_GDB" -a "$OPT_COLLECT_GDB" -a "$mysqld_pid" ]; then $CMD_GDB \ -ex "set pagination 0" \ -ex "thread apply all bt" \ @@ -112,7 +112,7 @@ collect() { # If TCP dumping is specified, start that on the server's port. local tcpdump_pid="" - if [ "$CMD_TCPDUMP" -a "$OPT_COLLECT_TCPDUMP" = "yes" ]; then + if [ "$CMD_TCPDUMP" -a "$OPT_COLLECT_TCPDUMP" ]; then local port=$(awk '/^port/{print $2}' "$d/$p-variables") if [ "$port" ]; then $CMD_TCPDUMP -i any -s 4096 -w "$d/$p-tcpdump" port ${port} & @@ -123,12 +123,12 @@ collect() { # Next, start oprofile gathering data during the whole rest of this process. # The --init should be a no-op if it has already been init-ed. local have_oprofile="no" - if [ "$CMD_OPCONTROL" -a "$OPT_COLLECT_OPROFILE" = "yes" ]; then + if [ "$CMD_OPCONTROL" -a "$OPT_COLLECT_OPROFILE" ]; then if $CMD_OPCONTROL --init; then $CMD_OPCONTROL --start --no-vmlinux have_oprofile="yes" fi - elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" = "yes" ]; then + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" ]; then # Don't run oprofile and strace at the same time. $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" 2>&1 & local strace_pid=$! @@ -244,7 +244,7 @@ collect() { "/path/to/mysqld'" \ > "$d/$p-opreport" fi - elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" = "yes" ]; then + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" ]; then kill -s 2 $strace_pid sleep 1 kill -s 15 $strace_pid diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index bfe3e489..3689f82d 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -91,12 +91,20 @@ usage_or_errors() { echo echo "Command line options:" echo - for opt in $(ls $TMPDIR/po/); do + for opt in $(ls "$PO_DIR"); do local desc=$(cat $TMPDIR/po/$opt | grep '^desc:' | sed -e 's/^desc://') echo "--$opt" echo " $desc" echo done + echo "Options and values after processing arguments:" + echo + for opt in $(ls "$PO_DIR"); do + local varname="OPT_$(echo "$opt" | tr a-z- A-Z_)" + local varvalue="${!varname}" + printf -- " --%-30s %s" "$opt" "${varvalue:-(No value)}" + echo + done return 1 fi diff --git a/t/lib/bash/collect.sh b/t/lib/bash/collect.sh index a9138dd7..6ae070d7 100644 --- a/t/lib/bash/collect.sh +++ b/t/lib/bash/collect.sh @@ -5,6 +5,7 @@ TESTS=19 TMPFILE="$TEST_TMPDIR/parse-opts-output" TMPDIR="$TEST_TMPDIR" PATH="$PATH:$PERCONA_TOOLKIT_SANDBOX/bin" +TOOL="pt-stalk" mkdir "$TMPDIR/collect" 2>/dev/null diff --git a/t/lib/bash/parse_options.sh b/t/lib/bash/parse_options.sh index 8ecfe69d..9c124858 100644 --- a/t/lib/bash/parse_options.sh +++ b/t/lib/bash/parse_options.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -TESTS=63 +TESTS=64 TMPFILE="$TEST_TMPDIR/parse-opts-output" TOOL="pt-stalk" @@ -105,6 +105,10 @@ cmd_ok \ "grep -q \"For more information, 'man pt-stalk' or 'perldoc\" $TMPFILE" \ "--help" +cmd_ok \ + "grep -q '\-\-string-opt[ ]*(No value)' $TMPFILE" \ + "Options and values after processing arguments" + # Don't interpolate. parse_options "$T_LIB_DIR/samples/bash/po003.sh" --help usage_or_errors "$T_LIB_DIR/samples/bash/po003.sh" >$TMPFILE 2>&1 From b47679cf97df1f18d18cc4e5219c8c134f3e1314 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 24 Jan 2012 13:20:07 -0700 Subject: [PATCH 49/71] Implement --[no]collect. --- bin/pt-stalk | 72 +++++++++++++++++++++++-------------------- t/pt-stalk/pt-stalk.t | 34 +++++++++++++++++++- 2 files changed, 71 insertions(+), 35 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index c2748475..241457e5 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -971,44 +971,48 @@ stalk() { # ################################################################## # Start collecting, maybe. # ################################################################## - local prefix="${OPT_PREFIX:-$(date +%F-%T | tr :- _)}" log "Collect triggered" - # Check if we'll have enough disk space to collect. Disk space - # is also checked every interval while collecting. - local margin="20" # default 20M margin, unless: - if [ -n "$last_prefix" ]; then - margin=$(du -mc "$OPT_DEST"/"$last_prefix"-* | tail -n 1 | awk '{print $1'}) - fi - disk_space "$OPT_DEST" > "$OPT_DEST/$prefix-disk-space" - check_disk_space \ - "$OPT_DEST/$prefix-disk-space" \ - "$OPT_DISK_BYTE_LIMIT" \ - "$OPT_DISK_PCT_LIMIT" \ - "$margin" # real used MB + margin MB - if [ $? -eq 0 ]; then - # There should be enough disk space, so collect. - log "$msg" >> "$OPT_DEST/$prefix-trigger" - log "pt-stalk ran with $RAN_WITH" >> "$OPT_DEST/$prefix-trigger" - last_prefix="$prefix" + # Send email to whomever that collect has been triggered. + if [ "$OPT_NOTIFY_BY_EMAIL" ]; then + echo "$msg on $(hostname)" \ + | mail -s "Collect triggered on $(hostname)" \ + "$OPT_NOTIFY_BY_EMAIL" + fi - # Send email to whomever that collect has been triggered. - if [ "$OPT_NOTIFY_BY_EMAIL" ]; then - echo "$msg on $(hostname)" \ - | mail -s "Collect triggered on $(hostname)" \ - "$OPT_NOTIFY_BY_EMAIL" + if [ "$OPT_COLLECT" ]; then + local prefix="${OPT_PREFIX:-$(date +%F-%T | tr :- _)}" + + # Check if we'll have enough disk space to collect. Disk space + # is also checked every interval while collecting. + local margin="20" # default 20M margin, unless: + if [ -n "$last_prefix" ]; then + margin=$(du -mc "$OPT_DEST"/"$last_prefix"-* | tail -n 1 | awk '{print $1'}) + fi + disk_space "$OPT_DEST" > "$OPT_DEST/$prefix-disk-space" + check_disk_space \ + "$OPT_DEST/$prefix-disk-space" \ + "$OPT_DISK_BYTE_LIMIT" \ + "$OPT_DISK_PCT_LIMIT" \ + "$margin" # real used MB + margin MB + if [ $? -eq 0 ]; then + # There should be enough disk space, so collect. + log "$msg" >> "$OPT_DEST/$prefix-trigger" + log "pt-stalk ran with $RAN_WITH" >> "$OPT_DEST/$prefix-trigger" + last_prefix="$prefix" + + + # Fork and background the collect subroutine which will + # run for --run-time seconds. We (the parent) sleep + # while its collecting (hopefully --sleep is longer than + # --run-time). + ( + collect "$OPT_DEST" "$prefix" + ) >> "$OPT_DEST/$prefix-output" 2>&1 & + else + # There will not be enough disk space, so do not collect. + warn "Collect canceled because there will not be enough disk space after collecting another $margin MB" fi - - # Fork and background the collect subroutine which will - # run for --run-time seconds. We (the parent) sleep - # while its collecting (hopefully --sleep is longer than - # --run-time). - ( - collect "$OPT_DEST" "$prefix" - ) >> "$OPT_DEST/$prefix-output" 2>&1 & - else - # There will not be enough disk space, so do not collect. - warn "Collect canceled because there will not be enough disk space after collecting another $margin MB" fi # ################################################################## diff --git a/t/pt-stalk/pt-stalk.t b/t/pt-stalk/pt-stalk.t index eb491302..6a054899 100644 --- a/t/pt-stalk/pt-stalk.t +++ b/t/pt-stalk/pt-stalk.t @@ -24,7 +24,7 @@ if ( !$dbh ) { plan skip_all => 'Cannot connect to sandbox master'; } else { - plan tests => 17; + plan tests => 20; } my $cnf = "/tmp/12345/my.sandbox.cnf"; @@ -169,6 +169,38 @@ like( "Trigger file logs how pt-stalk was ran" ); +# ########################################################################### +# Triggered but --no-collect. +# ########################################################################### +diag(`rm $pid_file 2>/dev/null`); +diag(`rm $log_file 2>/dev/null`); +diag(`rm $dest/* 2>/dev/null`); + +(undef, $uptime) = $dbh->selectrow_array("SHOW STATUS LIKE 'Uptime'"); +$threshold = $uptime + 2; + +$retval = system("$trunk/bin/pt-stalk --no-collect --iterations 1 --dest $dest --variable Uptime --threshold $threshold --cycles 1 --run-time 1 --pid $pid_file -- --defaults-file=$cnf >$log_file 2>&1"); + +sleep 2; + +$output = `cat $log_file`; +like( + $output, + qr/Collect triggered/, + "Collect triggered" +); + +ok( + ! -f "$dest/*", + "No files collected" +); + +$output = `ps x | grep -v grep | grep 'pt-stalk pt-stalk --iterations 1 --dest $dest'`; +is( + $output, + "", + "pt-stalk is not running" +); # ############################################################################# # --config From 77e5e4c3f20961d3ceda0b60e210f79cd8db2d20 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 24 Jan 2012 13:24:07 -0700 Subject: [PATCH 50/71] Simpler test for +w access to --log. --- bin/pt-stalk | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 241457e5..dc52efde 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -1114,13 +1114,7 @@ if [ "$(basename "$0")" = "pt-stalk" ] \ if [ "$OPT_DAEMONIZE" ]; then # Check access to the --log file. - ( - set -e - touch "$OPT_LOG" - ) - if [ $? -ne 0 ]; then - die "Cannot write to --log $OPT_LOG" - fi + touch "$OPT_LOG" || die "Cannot write to --log $OPT_LOG" # The PID file will at first have our (parent) PID. # This is fine for ensuring that only one of us is From 73bc7cdcf83245452c741f8411990b45ad6e6b0a Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 24 Jan 2012 14:22:19 -0700 Subject: [PATCH 51/71] Change default --dest to /var/lib/pt-stalk. Don't check --dest unless --collect. Fix POD formatting for --function. --- bin/pt-stalk | 51 ++++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index dc52efde..3300101f 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -883,11 +883,6 @@ trg_processlist() { return } -trg_magic() { - echo "TODO" - return -} - oktorun() { if [ $OKTORUN -eq 0 ]; then EXIT_REASON="OKTORUN is false" @@ -1026,7 +1021,9 @@ stalk() { fi # Purge old collect file between checks. - purge_samples "$OPT_DEST" "$OPT_RETENTION_TIME" + if [ -d "$OPT_DEST" ]; then + purge_samples "$OPT_DEST" "$OPT_RETENTION_TIME" + fi done } @@ -1041,23 +1038,8 @@ main() { # Bash 4 has $BASHPID but we can't rely on that. Consequently, # we don't know our own PID. See the usage of $! below. RAN_WITH="--function=$OPT_FUNCTION --variable=$OPT_VARIABLE --threshold=$OPT_THRESHOLD --match=$OPT_MATCH --cycles=$OPT_CYCLES --interval=$OPT_INTERVAL --iterations=$OPT_ITERATIONS --run-time=$OPT_RUN_TIME --sleep=$OPT_SLEEP --dest=$OPT_DEST --prefix=$OPT_PREFIX --notify-by-email=$OPT_NOTIFY_BY_EMAIL --log=$OPT_LOG --pid=$OPT_PID" - log "Starting $0 $RAN_WITH" - # Make sure the collection dir exists. - if [ ! -d "$OPT_DEST" ]; then - mkdir -p "$OPT_DEST" || die "Cannot make --dest $OPT_DEST" - fi - # Check access to the --dest dir. By setting -x in the subshell, - # if either command fails, the subshell will exit immediately and - # $? will be non-zero. - ( - set -e - touch "$OPT_DEST/test" - rm "$OPT_DEST/test" - ) - if [ $? -ne 0 ]; then - die "Cannot read and write files to --dest $OPT_DEST" - fi + log "Starting $0 $RAN_WITH" # Test if we have root; warn if not, but it isn't critical. if [ "$(id -u)" != "0" ]; then @@ -1112,6 +1094,25 @@ if [ "$(basename "$0")" = "pt-stalk" ] \ [ -n "$(mysql $EXT_ARGV -e 'SELECT 1')" ] \ || die "Cannot connect to MySQL. Check that MySQL is running and that the options after -- are correct." + # Check existence and access to the --dest dir if we're collecting. + if [ "$OPT_COLLECT" ]; then + if [ ! -d "$OPT_DEST" ]; then + mkdir -p "$OPT_DEST" || die "Cannot make --dest $OPT_DEST" + fi + + # Check access to the --dest dir. By setting -x in the subshell, + # if either command fails, the subshell will exit immediately and + # $? will be non-zero. + ( + set -e + touch "$OPT_DEST/test" + rm "$OPT_DEST/test" + ) + if [ $? -ne 0 ]; then + die "Cannot read and write files to --dest $OPT_DEST" + fi + fi + if [ "$OPT_DAEMONIZE" ]; then # Check access to the --log file. touch "$OPT_LOG" || die "Cannot write to --log $OPT_LOG" @@ -1307,7 +1308,7 @@ its output as specified in --log. =item --dest -type: string; default: ${HOME}/collected +type: string; default: /var/lib/pt-stalk Where to store the diagnostic data. Each time the tool collects data, it writes to a new set of files, which are named with the current system timestamp. @@ -1365,6 +1366,8 @@ options: --trigger processlist --variable State --match statistics --threshold 10 +=back + In addition, you can specify a file that contains your custom trigger function, written in Unix shell script. This can be a wrapper that executes anything you wish. If the argument to --function is a file, then it takes precedence over @@ -1388,8 +1391,6 @@ MySQL options mentioned in the L<"SYNOPSIS"> above. The plugin should not alter the tool's existing global variables. Prefix any plugin-specific global variables with "PLUGIN_" or make them local. -=back - =item --help Print help and exit. From 81caf6addcdd467fc1c975e5c5e8764bc3ba2d7b Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 26 Jan 2012 10:44:55 -0700 Subject: [PATCH 52/71] Convert size \d+[KMGT] opts in parse_options.sh. --- lib/bash/parse_options.sh | 19 +++++++++++++++++++ t/lib/bash/parse_options.sh | 30 +++++++++++++++++++++++++++++- t/lib/samples/bash/po004.sh | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 t/lib/samples/bash/po004.sh diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index 3689f82d..a33e68f8 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -236,6 +236,7 @@ _eval_po() { local opt="" local default_val="" local neg=0 + local size=0 while read key val; do case "$key" in long) @@ -247,6 +248,7 @@ _eval_po() { "short form") ;; type) + [ "$val" = "size" ] && size=1 ;; desc) ;; @@ -273,6 +275,11 @@ _eval_po() { fi fi + # Convert sizes. + if [ $size -eq 1 -a -n "$default_val" ]; then + default_val=$(size_to_bytes $default_val) + fi + # Eval the option into existence as a global variable. eval "OPT_${opt}"="$default_val" done @@ -339,6 +346,7 @@ _parse_command_line() { local opt_is_negated="" local real_opt="" local required_arg="" + local spec="" for opt in "$@"; do if [ "$opt" = "--" -o "$opt" = "----" ]; then @@ -438,6 +446,11 @@ _parse_command_line() { # Get and transform the opt's long form. E.g.: -q == --quiet == QUIET. opt=$(cat "$spec" | grep '^long:' | cut -d':' -f2 | sed 's/-/_/g' | tr [:lower:] [:upper:]) + # Convert sizes. + if grep "^type:size" "$spec" >/dev/null; then + val=$(size_to_bytes $val) + fi + # Re-eval the option to update its global variable value. eval "OPT_$opt"="'$val'" @@ -448,10 +461,16 @@ _parse_command_line() { opt_is_negated="" real_opt="" required_arg="" + spec="" fi done } +size_to_bytes() { + local size="$1" + echo $size | perl -ne '%f=(B=>1, K=>1_024, M=>1_048_576, G=>1_073_741_824, T=>1_099_511_627_776); m/^(\d+)([kMGT])?/i; print $1 * $f{uc($2 || "B")};' +} + # ########################################################################### # End parse_options package # ########################################################################### diff --git a/t/lib/bash/parse_options.sh b/t/lib/bash/parse_options.sh index 9c124858..b66f8584 100644 --- a/t/lib/bash/parse_options.sh +++ b/t/lib/bash/parse_options.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -TESTS=64 +TESTS=72 TMPFILE="$TEST_TMPDIR/parse-opts-output" TOOL="pt-stalk" @@ -183,6 +183,34 @@ is "$OPT_STRING_OPT" "hello world" "Option value with space (cmd line)" is "$ARGV" "" "ARGV (cmd line)" is "$EXT_ARGV" "" "External ARGV (cmd line)" +# ############################################################################ +# Size options. +# ############################################################################ + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" --disk-bytes-free 1T +is "$OPT_DISK_BYTES_FREE" "1099511627776" "Size: 1T" + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" --disk-bytes-free 1G +is "$OPT_DISK_BYTES_FREE" "1073741824" "Size: 1G" + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" --disk-bytes-free 1M +is "$OPT_DISK_BYTES_FREE" "1048576" "Size: 1M" + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" --disk-bytes-free 1K +is "$OPT_DISK_BYTES_FREE" "1024" "Size: 1K" + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" --disk-bytes-free 1k +is "$OPT_DISK_BYTES_FREE" "1024" "Size: 1k" + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" --disk-bytes-free 1 +is "$OPT_DISK_BYTES_FREE" "1" "Size: 1" + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" --disk-bytes-free 100M +is "$OPT_DISK_BYTES_FREE" "104857600" "Size: 100M" + +parse_options "$T_LIB_DIR/samples/bash/po004.sh" +is "$OPT_DISK_BYTES_FREE" "104857600" "Size: 100M default" + # ############################################################################ # Done # ############################################################################ diff --git a/t/lib/samples/bash/po004.sh b/t/lib/samples/bash/po004.sh new file mode 100644 index 00000000..0574f221 --- /dev/null +++ b/t/lib/samples/bash/po004.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +: + +# ############################################################################ +# Documentation +# ############################################################################ +:<<'DOCUMENTATION' +=pod + +=head1 NAME + +pt-stalk - Wait for a condition to occur then begin collecting data. + +=head1 OPTIONS + +=over + +=item --disk-bytes-free + +type: size; default: 100M + +Fall apart if there's less than this many bytes free on the disk. + +=item --help + +Print help. + +=back + +=head1 ENVIRONMENT + +No env vars used. + +=cut + +DOCUMENTATION From d7d5381863b1509b585929e8181acab61e7181ff Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 26 Jan 2012 12:19:47 -0700 Subject: [PATCH 53/71] Make check_disk_space() use bytes (given new size type options with are converted to bytes, 1k=>1024). --- lib/bash/safeguards.sh | 57 ++++++++++++++++++++++------------------ t/lib/bash/safeguards.sh | 35 +++++++++++++++++------- 2 files changed, 57 insertions(+), 35 deletions(-) diff --git a/lib/bash/safeguards.sh b/lib/bash/safeguards.sh index 97db3842..91402bb6 100644 --- a/lib/bash/safeguards.sh +++ b/lib/bash/safeguards.sh @@ -1,4 +1,4 @@ -# This program is copyright 2011 Percona Inc. +# This program is copyright 2011-2012 Percona Inc. # Feedback and improvements are welcome. # # THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED @@ -33,46 +33,53 @@ disk_space() { # Sub: check_disk_space # Check if there is or will be enough disk space. Input is a file # with output from , i.e. `df -P -k`. The df output -# must use 1k blocks, but the mb arg from the user is in MB. +# must use 1k blocks, which should be POSIX standard. # # Arguments: -# file - File with output from . -# mb - Minimum MB free. -# pc - Minimum percent free. -# mb_margin - Add this many MB to the real MB used. +# file - File with output from . +# min_free_bytes - Minimum free bytes. +# min_free_pct - Minimum free percentage. +# bytes_margin - Add this many bytes to the real bytes used. # # Returns: # 0 if there is/will be enough disk space, else 1. check_disk_space() { local file="$1" - local mb="${2:-0}" - local pc="${3:-0}" - local mb_margin="${4:-0}" + local min_free_bytes="${2:-0}" + local min_free_pct="${3:-0}" + local bytes_margin="${4:-0}" - # Convert MB to KB because the df output should be in 1k blocks. - local kb=$(($mb * 1024)) - local kb_margin=$(($mb_margin * 1024)) + # Real/actual bytes used and bytes free. + local used_bytes=$(cat "$file" | awk '/^\//{print $3 * 1024}'); + local free_bytes=$(cat "$file" | awk '/^\//{print $4 * 1024}'); + local pct_used=$(cat "$file" | awk '/^\//{print $5}' | sed -e 's/%//g'); + local pct_free=$((100 - $pct_used)) - local kb_used=$(cat "$file" | awk '/^\//{print $3}'); - local kb_free=$(cat "$file" | awk '/^\//{print $4}'); - local pc_used=$(cat "$file" | awk '/^\//{print $5}' | sed -e 's/%//g'); + # Report the real values to the user. + local real_free_bytes=$free_bytes + local real_pct_free=$pct_free - if [ "$kb_margin" -gt "0" ]; then - local kb_total=$(($kb_used + $kb_free)) + # If there's a margin, we need to adjust the real values. + if [ $bytes_margin -gt 0 ]; then + used_bytes=$(($used_bytes + $bytes_margin)) + free_bytes=$(($free_bytes - $bytes_margin)) + pct_used=$(awk "BEGIN { printf(\"%d\", ($used_bytes/($used_bytes + $free_bytes)) * 100) }") - kb_used=$(($kb_used + $kb_margin)) - kb_free=$(($kb_free - $kb_margin)) - pc_used=$(awk "BEGIN { printf(\"%d\", $kb_used/$kb_total * 100) }") + pct_free=$((100 - $pct_used)) fi - local pc_free=$((100 - $pc_used)) + if [ $free_bytes -lt $min_free_bytes -o $pct_free -lt $min_free_pct ]; then + warn "Not enough free disk space: + Limit: ${min_free_pct}% free, ${min_free_bytes} bytes free + Actual: ${real_pct_free}% free, ${real_free_bytes} bytes free (- $bytes_margin bytes margin) +" + # Print the df that we used. + cat "$file" >&2 - if [ "$kb_free" -le "$kb" -o "$pc_free" -le "$pc" ]; then - warn "Not enough free disk space: ${pc_free}% free, ${kb_free} KB free; wanted more than ${pc}% free or ${kb} KB free" - return 1 + return 1 # not enough disk space fi - return 0 + return 0 # disk space is OK } # ########################################################################### diff --git a/t/lib/bash/safeguards.sh b/t/lib/bash/safeguards.sh index c874498f..cf678d90 100644 --- a/t/lib/bash/safeguards.sh +++ b/t/lib/bash/safeguards.sh @@ -18,36 +18,51 @@ is \ "2" \ "2-line df output" -check_disk_space "$SAMPLE/diskspace001.txt" 22000 18 >$TMPDIR/out 2>&1 +# Filesystem 1024-blocks Used Available Capacity Mounted on +# /dev/disk0s2 118153176 94409664 23487512 81% / +# +# Those values are in Kb, so: +# used = 94409664 (94.4G) = 96_675_495_936 bytes +# free = 23487512 (23.4G) = 24_051_212_288 bytes +# pct free = 100 - 81 = 19 % + +# want free - 100, 18 < 19, so this should be ok. +check_disk_space "$SAMPLE/diskspace001.txt" 24051212188 18 >$TMPDIR/out 2>&1 is "$?" "0" "Enough disk space" is \ "`cat $TMPDIR/out`" \ "" \ "No output if enough disk space" -check_disk_space "$SAMPLE/diskspace001.txt" 24000 18 >$TMPDIR/out 2>&1 +# want free - 100 is ok, but 20 < 19 is not. +check_disk_space "$SAMPLE/diskspace001.txt" 24051212188 20 >$TMPDIR/out 2>&1 +is "$?" "1" "Not enough % free" + +# want free + 100, so this should fail +# (real free is 100 bytes under what we want) +check_disk_space "$SAMPLE/diskspace001.txt" 24051212388 18 >$TMPDIR/out 2>&1 is "$?" "1" "Not enough MB free" cmd_ok \ - "grep -q '19% free, 23487512 KB free; wanted more than 18% free or 24576000 KB free' $TMPDIR/out" \ + "grep -q 'Actual: 19% free, 24051212288 bytes free (- 0 bytes margin)' $TMPDIR/out" \ "Warning if not enough disk space" -check_disk_space "$SAMPLE/diskspace001.txt" 22000 19 >$TMPDIR/out 2>&1 -is "$?" "1" "Not enough % free" - # ########################################################################### # Check with a margin (amount we plan to use in the future). # ########################################################################### -check_disk_space "$SAMPLE/diskspace001.txt" 22000 18 100 +# want free - 100 + 50 margin, so effectively want free - 50 is ok. +check_disk_space "$SAMPLE/diskspace001.txt" 24051212188 18 50 is "$?" "0" "Enough disk space with margin" -check_disk_space "$SAMPLE/diskspace001.txt" 23000 18 100 >$TMPDIR/out 2>&1 +# want free - 100 + 101 margin, so real free is 1 byte under what we want. +check_disk_space "$SAMPLE/diskspace001.txt" 24051212188 18 101 >$TMPDIR/out 2>&1 is "$?" "1" "Not enough MB free with margin" -check_disk_space "$SAMPLE/diskspace001.txt" 100 5 20000 >$TMPDIR/out 2>&1 +# want free - 100 + 50 margin ok but %free will be 19 which is < 25. +check_disk_space "$SAMPLE/diskspace001.txt" 24051212188 25 50 >$TMPDIR/out 2>&1 is "$?" "1" "Not enough % free with margin" cmd_ok \ - "grep -q '3% free,' $TMPDIR/out" \ + "grep -q 'Actual:[ ]*19% free,' $TMPDIR/out" \ "Calculates % free with margin" # ########################################################################### From 05ac0be1361ed344296aa328947462125a251fe0 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 26 Jan 2012 12:45:44 -0700 Subject: [PATCH 54/71] Change --disk-byte-limit to --disk-bytes-free and --disk-pct-limit to --disk-pct-free. --- bin/pt-stalk | 96 ++++++++++------ lib/bash/collect.sh | 4 +- t/lib/bash/collect.sh | 4 +- t/lib/samples/bash/po002.sh | 212 ------------------------------------ 4 files changed, 65 insertions(+), 251 deletions(-) delete mode 100644 t/lib/samples/bash/po002.sh diff --git a/bin/pt-stalk b/bin/pt-stalk index 3300101f..a431e1a9 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -198,6 +198,7 @@ _eval_po() { local opt="" local default_val="" local neg=0 + local size=0 while read key val; do case "$key" in long) @@ -209,6 +210,7 @@ _eval_po() { "short form") ;; type) + [ "$val" = "size" ] && size=1 ;; desc) ;; @@ -235,6 +237,10 @@ _eval_po() { fi fi + if [ $size -eq 1 -a -n "$default_val" ]; then + default_val=$(size_to_bytes $default_val) + fi + eval "OPT_${opt}"="$default_val" done @@ -275,6 +281,7 @@ _parse_command_line() { local opt_is_negated="" local real_opt="" local required_arg="" + local spec="" for opt in "$@"; do if [ "$opt" = "--" -o "$opt" = "----" ]; then @@ -360,6 +367,10 @@ _parse_command_line() { if [ "$opt_is_ok" ]; then opt=$(cat "$spec" | grep '^long:' | cut -d':' -f2 | sed 's/-/_/g' | tr [:lower:] [:upper:]) + if grep "^type:size" "$spec" >/dev/null; then + val=$(size_to_bytes $val) + fi + eval "OPT_$opt"="'$val'" opt="" @@ -369,10 +380,16 @@ _parse_command_line() { opt_is_negated="" real_opt="" required_arg="" + spec="" fi done } +size_to_bytes() { + local size="$1" + echo $size | perl -ne '%f=(B=>1, K=>1_024, M=>1_048_576, G=>1_073_741_824, T=>1_099_511_627_776); m/^(\d+)([kMGT])?/i; print $1 * $f{uc($2 || "B")};' +} + # ########################################################################### # End parse_options package # ########################################################################### @@ -458,33 +475,37 @@ disk_space() { check_disk_space() { local file="$1" - local mb="${2:-0}" - local pc="${3:-0}" - local mb_margin="${4:-0}" + local min_free_bytes="${2:-0}" + local min_free_pct="${3:-0}" + local bytes_margin="${4:-0}" - local kb=$(($mb * 1024)) - local kb_margin=$(($mb_margin * 1024)) + local used_bytes=$(cat "$file" | awk '/^\//{print $3 * 1024}'); + local free_bytes=$(cat "$file" | awk '/^\//{print $4 * 1024}'); + local pct_used=$(cat "$file" | awk '/^\//{print $5}' | sed -e 's/%//g'); + local pct_free=$((100 - $pct_used)) - local kb_used=$(cat "$file" | awk '/^\//{print $3}'); - local kb_free=$(cat "$file" | awk '/^\//{print $4}'); - local pc_used=$(cat "$file" | awk '/^\//{print $5}' | sed -e 's/%//g'); + local real_free_bytes=$free_bytes + local real_pct_free=$pct_free - if [ "$kb_margin" -gt "0" ]; then - local kb_total=$(($kb_used + $kb_free)) + if [ $bytes_margin -gt 0 ]; then + used_bytes=$(($used_bytes + $bytes_margin)) + free_bytes=$(($free_bytes - $bytes_margin)) + pct_used=$(awk "BEGIN { printf(\"%d\", ($used_bytes/($used_bytes + $free_bytes)) * 100) }") - kb_used=$(($kb_used + $kb_margin)) - kb_free=$(($kb_free - $kb_margin)) - pc_used=$(awk "BEGIN { printf(\"%d\", $kb_used/$kb_total * 100) }") + pct_free=$((100 - $pct_used)) fi - local pc_free=$((100 - $pc_used)) + if [ $free_bytes -lt $min_free_bytes -o $pct_free -lt $min_free_pct ]; then + warn "Not enough free disk space: + Limit: ${min_free_pct}% free, ${min_free_bytes} bytes free + Actual: ${real_pct_free}% free, ${real_free_bytes} bytes free (- $bytes_margin bytes margin) +" + cat "$file" >&2 - if [ "$kb_free" -le "$kb" -o "$pc_free" -le "$pc" ]; then - warn "Not enough free disk space: ${pc_free}% free, ${kb_free} KB free; wanted more than ${pc}% free or ${kb} KB free" - return 1 + return 1 # not enough disk space fi - return 0 + return 0 # disk space is OK } # ########################################################################### @@ -674,8 +695,8 @@ collect() { disk_space $d > $d/$p-disk-space check_disk_space \ $d/$p-disk-space \ - "$OPT_DISK_BYTE_LIMIT" \ - "$OPT_DISK_PCT_LIMIT" \ + "$OPT_DISK_BYTES_FREE" \ + "$OPT_DISK_PCT_FREE" \ || break sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}') @@ -980,16 +1001,16 @@ stalk() { # Check if we'll have enough disk space to collect. Disk space # is also checked every interval while collecting. - local margin="20" # default 20M margin, unless: + local margin="20971520" # default 20M margin, unless: if [ -n "$last_prefix" ]; then margin=$(du -mc "$OPT_DEST"/"$last_prefix"-* | tail -n 1 | awk '{print $1'}) fi disk_space "$OPT_DEST" > "$OPT_DEST/$prefix-disk-space" check_disk_space \ "$OPT_DEST/$prefix-disk-space" \ - "$OPT_DISK_BYTE_LIMIT" \ - "$OPT_DISK_PCT_LIMIT" \ - "$margin" # real used MB + margin MB + "$OPT_DISK_BYTES_FREE" \ + "$OPT_DISK_PCT_FREE" \ + "$margin" if [ $? -eq 0 ]; then # There should be enough disk space, so collect. log "$msg" >> "$OPT_DEST/$prefix-trigger" @@ -1313,29 +1334,34 @@ type: string; default: /var/lib/pt-stalk Where to store the diagnostic data. Each time the tool collects data, it writes to a new set of files, which are named with the current system timestamp. -=item --disk-byte-limit +=item --disk-bytes-free -type: int; default: 100 +type: size; default: 100M -Don't collect data unless the destination disk has this much free space. This -prevents the tool from filling up the disk with diagnostic data. +Don't collect data if the disk has less than this much free space. +This prevents the tool from filling up the disk with diagnostic data. -If the destination directory contains a previously captured sample of data, the -tool will measure its size and use that as an estimate of how much data is +If the L<"--dest"> directory contains a previously captured sample of data, +the tool will measure its size and use that as an estimate of how much data is likely to be gathered this time, too. It will then be even more pessimistic, and will refuse to collect data unless the disk has enough free space to hold the sample and still have the desired amount of free space. For example, if you'd like 100MB of free space and the previous diagnostic sample consumed 100MB, the tool won't collect any data unless the disk has 200MB free. -=item --disk-pct-limit +Valid size value suffixes are k, M, G, and T. + +=item --disk-pct-free type: int; default: 5 -Don't collect data unless the disk has at least this percent free space. This -option works similarly to --disk-byte-limit, but specifies a percentage margin -of safety instead of a byte margin of safety. The tool honors both options, and -will not collect any data unless both margins are satisfied. +Don't collect data if the disk has less than this percent free space. +This prevents the tool from filling up the disk with diagnostic data. + +This option works similarly to L<"--disk-bytes-free"> but specifies a +percentage margin of safety instead of a bytes margin of safety. +The tool honors both options, and will not collect any data unless both +margins are satisfied. =item --function diff --git a/lib/bash/collect.sh b/lib/bash/collect.sh index 75f3d69d..41d43052 100644 --- a/lib/bash/collect.sh +++ b/lib/bash/collect.sh @@ -178,8 +178,8 @@ collect() { disk_space $d > $d/$p-disk-space check_disk_space \ $d/$p-disk-space \ - "$OPT_DISK_BYTE_LIMIT" \ - "$OPT_DISK_PCT_LIMIT" \ + "$OPT_DISK_BYTES_FREE" \ + "$OPT_DISK_PCT_FREE" \ || break # Synchronize ourselves onto the clock tick, so the sleeps are 1-second diff --git a/t/lib/bash/collect.sh b/t/lib/bash/collect.sh index 6ae070d7..b944fa3b 100644 --- a/t/lib/bash/collect.sh +++ b/t/lib/bash/collect.sh @@ -15,7 +15,7 @@ source "$LIB_DIR/safeguards.sh" source "$LIB_DIR/alt_cmds.sh" source "$LIB_DIR/collect.sh" -parse_options "$T_LIB_DIR/samples/bash/po002.sh" --run-time 1 -- --defaults-file=/tmp/12345/my.sandbox.cnf +parse_options "$BIN_DIR/pt-stalk" --run-time 1 -- --defaults-file=/tmp/12345/my.sandbox.cnf # Prefix (with path) for the collect files. local p="$TMPDIR/collect/2011_12_05" @@ -112,7 +112,7 @@ is "$iters" "1" "1 iteration/1s run time" # Try longer run time. # ########################################################################### -parse_options "$T_LIB_DIR/samples/bash/po002.sh" --run-time 2 -- --defaults-file=/tmp/12345/my.sandbox.cnf +parse_options "$BIN_DIR/pt-stalk" --run-time 2 -- --defaults-file=/tmp/12345/my.sandbox.cnf rm $TMPDIR/collect/* diff --git a/t/lib/samples/bash/po002.sh b/t/lib/samples/bash/po002.sh deleted file mode 100644 index 63a8672b..00000000 --- a/t/lib/samples/bash/po002.sh +++ /dev/null @@ -1,212 +0,0 @@ -#!/usr/bin/env bash - -: - -# ############################################################################ -# Documentation -# ############################################################################ -:<<'DOCUMENTATION' -=pod - -=head1 NAME - -pt-stalk - Wait for a condition to occur then begin collecting data. - -=head1 OPTIONS - -=over - -=item --collect - -default: yes; negatable: yes - -Collect system information. - -=item --collect-gdb - -Collect GDB stacktraces. - -=item --collect-oprofile - -Collect oprofile data. - -=item --collect-strace - -Collect strace data. - -=item --collect-tcpdump - -Collect tcpdump data. - -=item --cycles - -type: int; default: 5 - -Number of times condition must be met before triggering collection. - -=item --daemonize - -default: yes; negatable: yes - -Daemonize the tool. - -=item --dest - -type: string - -Where to store collected data. - -=item --disk-byte-limit - -type: int; default: 100 - -Exit if the disk has less than this many MB free. - -=item --disk-pct-limit - -type: int; default: 5 - -Exit if the disk is less than this %full. - -=item --execute-command - -type: string; default: pt-collect - -Location of the C tool. - -=item --function - -type: string; default: status - -Built-in function name or plugin file name which returns the value of C. - -Possible values are: - -=over - -=item * status - -Grep the value of C from C. - -=item * processlist - -Count the number of processes in C whose -C column matches C. For example: - - TRIGGER_FUNCTION="processlist" \ - VARIABLE="State" \ - MATCH="statistics" \ - THRESHOLD="10" - -The above triggers when more than 10 processes are in the "statistics" state. -C must be specified for this trigger function. - -=item * magic - -TODO - -=item * plugin file name - -A plugin file allows you to specify a custom trigger function. The plugin -file must contain a function called C. For example: - - trg_plugin() { - # Do some stuff. - echo "$value" - } - -The last output if the function (its "return value") must be a number. -This number is compared to C. All L<"ENVIRONMENT"> variables -are available to the function. - -Do not alter the tool's existing global variables. Prefix any plugin-specific -global variables with "PLUGIN_". - -=back - -=item --help - -Print help and exit. - -=item --interval - -type: int; default: 1 - -Interval between checks. - -=item --iterations - -type: int - -Exit after triggering C this many times. By default, the tool -will collect as many times as it's triggered. - -=item --log - -type: string; default: /var/log/pt-stalk.log - -Print all output to this file when daemonized. - -=item --match - -type: string - -Match pattern for C L<"--function">. - -=item --notify-by-email - -type: string - -Send mail to this list of addresses when C triggers. - -=item --pid FILE - -type: string; default: /var/run/pt-stalk.pid - -Create a PID file when daemonized. - -=item --retention-time - -type: int; default: 30 - -Remove samples after this many days. - -=item --run-time - -type: int; default: 30 - -How long to collect statistics data for? - -Make sure that this isn't longer than SLEEP. - -=item --sleep - -type: int; default: 300 - -How long to sleep after collecting? - -=item --threshold N - -type: int; default: 25 - -Max number of C to tolerate. - -=item --variable NAME - -type: string; default: Threads_running - -This is the thing to check for. - -=item --version - -Print tool's version and exit. - -=back - -=head1 ENVIRONMENT - -No env vars used. - -=cut - -DOCUMENTATION From 6b017da6fee7fc06cf82f76ae237188f78f81141 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 26 Jan 2012 12:59:48 -0700 Subject: [PATCH 55/71] Use new pretty --help. --- bin/pt-stalk | 34 ++++++++++++++++++++++++++++------ lib/bash/parse_options.sh | 34 ++++++++++++++++++++++++++++------ t/lib/bash/parse_options.sh | 6 +++++- 3 files changed, 61 insertions(+), 13 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index a431e1a9..97d4ba59 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -85,12 +85,34 @@ usage_or_errors() { echo echo "Command line options:" echo - for opt in $(ls "$PO_DIR"); do - local desc=$(cat $TMPDIR/po/$opt | grep '^desc:' | sed -e 's/^desc://') - echo "--$opt" - echo " $desc" - echo - done + perl -e ' + use strict; + use warnings FATAL => qw(all); + my $lcol = 20; # Allow this much space for option names. + my $rcol = 80 - $lcol; # The terminal is assumed to be 80 chars wide. + my $name; + while ( <> ) { + my $line = $_; + chomp $line; + if ( $line =~ s/^long:/ --/ ) { + $name = $line; + } + elsif ( $line =~ s/^desc:// ) { + $line =~ s/ +$//mg; + my @lines = grep { $_ } + $line =~ m/(.{0,$rcol})(?:\s+|\Z)/g; + if ( length($name) >= $lcol ) { + print $name, "\n", (q{ } x $lcol); + } + else { + printf "%-${lcol}s", $name; + } + print join("\n" . (q{ } x $lcol), @lines); + print "\n"; + } + } + ' "$PO_DIR"/* + echo echo "Options and values after processing arguments:" echo for opt in $(ls "$PO_DIR"); do diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index a33e68f8..6320c0f3 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -91,12 +91,34 @@ usage_or_errors() { echo echo "Command line options:" echo - for opt in $(ls "$PO_DIR"); do - local desc=$(cat $TMPDIR/po/$opt | grep '^desc:' | sed -e 's/^desc://') - echo "--$opt" - echo " $desc" - echo - done + perl -e ' + use strict; + use warnings FATAL => qw(all); + my $lcol = 20; # Allow this much space for option names. + my $rcol = 80 - $lcol; # The terminal is assumed to be 80 chars wide. + my $name; + while ( <> ) { + my $line = $_; + chomp $line; + if ( $line =~ s/^long:/ --/ ) { + $name = $line; + } + elsif ( $line =~ s/^desc:// ) { + $line =~ s/ +$//mg; + my @lines = grep { $_ } + $line =~ m/(.{0,$rcol})(?:\s+|\Z)/g; + if ( length($name) >= $lcol ) { + print $name, "\n", (q{ } x $lcol); + } + else { + printf "%-${lcol}s", $name; + } + print join("\n" . (q{ } x $lcol), @lines); + print "\n"; + } + } + ' "$PO_DIR"/* + echo echo "Options and values after processing arguments:" echo for opt in $(ls "$PO_DIR"); do diff --git a/t/lib/bash/parse_options.sh b/t/lib/bash/parse_options.sh index b66f8584..5afa53b5 100644 --- a/t/lib/bash/parse_options.sh +++ b/t/lib/bash/parse_options.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -TESTS=72 +TESTS=73 TMPFILE="$TEST_TMPDIR/parse-opts-output" TOOL="pt-stalk" @@ -105,6 +105,10 @@ cmd_ok \ "grep -q \"For more information, 'man pt-stalk' or 'perldoc\" $TMPFILE" \ "--help" +cmd_ok \ + "grep -q ' --string-opt2[ ]*String option with a default.' $TMPFILE" \ + "Command line options" + cmd_ok \ "grep -q '\-\-string-opt[ ]*(No value)' $TMPFILE" \ "Options and values after processing arguments" From 81ae556f8b9bd40b61912f2e7ddebcd80b1b674c Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 26 Jan 2012 13:04:23 -0700 Subject: [PATCH 56/71] Use local IFS. --- bin/pt-stalk | 9 ++------- lib/bash/parse_options.sh | 9 ++------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 97d4ba59..cb6d872c 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -166,12 +166,10 @@ parse_options() { shift # --config local user_config_files="$1" shift # that ^ - local old_ifs="$IFS" - IFS="," + local IFS="," for user_config_file in $user_config_files; do _parse_config_files "$user_config_file" done - IFS="$old_ifs" else _parse_config_files "/etc/percona-toolkit/percona-toolkit.conf" "/etc/percona-toolkit/$TOOL.conf" "$HOME/.percona-toolkit.conf" "$HOME/.$TOOL.conf" fi @@ -214,8 +212,7 @@ _parse_pod() { } _eval_po() { - local old_ifs="$IFS" - IFS=":" + local IFS=":" for opt_spec in "$PO_DIR"/*; do local opt="" local default_val="" @@ -265,8 +262,6 @@ _eval_po() { eval "OPT_${opt}"="$default_val" done - - IFS="$old_ifs" } _parse_config_files() { diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index 6320c0f3..3c2ac8c9 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -192,12 +192,10 @@ parse_options() { shift # --config local user_config_files="$1" shift # that ^ - local old_ifs="$IFS" - IFS="," + local IFS="," for user_config_file in $user_config_files; do _parse_config_files "$user_config_file" done - IFS="$old_ifs" else _parse_config_files "/etc/percona-toolkit/percona-toolkit.conf" "/etc/percona-toolkit/$TOOL.conf" "$HOME/.percona-toolkit.conf" "$HOME/.$TOOL.conf" fi @@ -252,8 +250,7 @@ _eval_po() { # Evaluate the program options into existence as global variables # transformed like --my-op == $OPT_MY_OP. If an option has a default # value, it's assigned that value. Else, it's value is an empty string. - local old_ifs="$IFS" - IFS=":" + local IFS=":" for opt_spec in "$PO_DIR"/*; do local opt="" local default_val="" @@ -305,8 +302,6 @@ _eval_po() { # Eval the option into existence as a global variable. eval "OPT_${opt}"="$default_val" done - - IFS="$old_ifs" } _parse_config_files() { From 950a386a72340533c5adc9908d65179f039e969b Mon Sep 17 00:00:00 2001 From: "baron@percona.com" <> Date: Thu, 26 Jan 2012 16:07:02 -0500 Subject: [PATCH 57/71] small docs tweaks --- bin/pt-stalk | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index cb6d872c..5c0a67b3 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -1191,10 +1191,10 @@ pt-stalk - Gather forensic data about MySQL when a problem occurs. Usage: pt-stalk [OPTIONS] [-- MYSQL OPTIONS] pt-stalk watches for a trigger condition to become true, and then collects data -to help in diagnosing problems. It is designed to run as a daemon so that you -can diagnose intermittent problems that you cannot observe directly. You can -also use it to execute a custom command, or to gather the data on demand without -waiting for the trigger to happen. +to help in diagnosing problems. It is designed to run as a daemon with root +privileges, so that you can diagnose intermittent problems that you cannot +observe directly. You can also use it to execute a custom command, or to gather +the data on demand without waiting for the trigger to happen. =head1 RISKS @@ -1276,7 +1276,19 @@ tool doesn't cause the server to run out of disk space. =head1 CONFIGURING -TODO +You can use standard Percona Toolkit configuration files to set commandline +options. + +You will probably want to run the tool as a daemon and customize at least the +diagnostic threshold. Here's a sample configuration file for triggering when +there are more than 20 queries running at once: + + daemonize + threshold=20 + +If you're not running the tool as it's designed (as a root user, daemonized) +then you'll need to set several options, such as L<"--dest">, to locations that +are writable by non-root users. =head1 OPTIONS From e954505dd22df1b393c434e7db91019c1057f05a Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Fri, 27 Jan 2012 17:37:59 -0700 Subject: [PATCH 58/71] Don't print errors in data files; let the general -output file catch them. Print TS lines before data. Try to find sysctl manually. Add _pidof() to alt_cmds.sh. --- bin/pt-stalk | 122 ++++++++++++++++++++++++++---------------- lib/bash/alt_cmds.sh | 19 +++++++ lib/bash/collect.sh | 110 ++++++++++++++++++++++--------------- t/lib/bash/collect.sh | 16 +++++- 4 files changed, 177 insertions(+), 90 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index cb6d872c..3e70c8a0 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -464,11 +464,29 @@ rm_tmpdir() { set -u +CMD_PIDOF="$(which pidof)" +CMD_PGREP="$(which pgrep)" + _seq() { local i="$1" awk "BEGIN { for(i=1; i<=$i; i++) print i; }" } +_pidof() { + local proc="$1" # process name + local pat="${2:-""}" # pattern in case we must grep for proc + + local pid="" + + [ "$CMD_PIDOF" ] && pid=$(pidof -s "$proc"); + + [ -z "$pid" ] && [ "$CMD_PGREP" ] && pid=$(pgrep -o -x "$proc"); + + [ -z "$pid" ] && pid=$(ps -eaf | grep "$pat" | grep -v mysqld_safe | awk '{print $2}' | head -n1) + + echo $pid +} + # ########################################################################### # End alt_cmds package # ########################################################################### @@ -592,6 +610,7 @@ set -u CMD_GDB="$(which gdb)" CMD_IOSTAT="$(which iostat)" +CMD_LSOF="$(which lsof)" CMD_MPSTAT="$(which mpstat)" CMD_MYSQL="$(which mysql)" CMD_MYSQLADMIN="$(which mysqladmin)" @@ -599,20 +618,17 @@ CMD_OPCONTROL="$(which opcontrol)" CMD_OPREPORT="$(which opreport)" CMD_PMAP="$(which pmap)" CMD_STRACE="$(which strace)" +CMD_SYSCTL="$(which sysctl)" CMD_TCPDUMP="$(which tcpdump)" CMD_VMSTAT="$(which vmstat)" +[ -z "$CMD_SYSCTL" -a -x "/sbin/sysctl" ] && CMD_SYSCTL="/sbin/sysctl" + collect() { local d="$1" # directory to save results in local p="$2" # prefix for each result file - local mysqld_pid=$(pidof -s mysqld); - if [ -z "$mysqld_pid" ]; then - mysqld_pid=$(pgrep -o -x mysqld); - fi - if [ -z "$mysqld_pid" ]; then - mysqld_pid=$(ps -eaf | grep 'mysql[d]' | grep -v mysqld_safe | awk '{print $2}' | head -n1); - fi + local mysqld_pid=$(_pidof mysqld mysql[d]); if [ "$CMD_PMAP" -a "$mysqld_pid" ]; then if $CMD_PMAP --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then @@ -630,7 +646,7 @@ collect() { >> "$d/$p-stacktrace" fi - $CMD_MYSQL $EXT_ARGV -e 'SHOW GLOBAL VARIABLES' >> "$d/$p-variables" 2>&1 & + $CMD_MYSQL $EXT_ARGV -e 'SHOW GLOBAL VARIABLES' >> "$d/$p-variables" & sleep .2 local mysql_version="$(awk '/^version[^_]/{print substr($2,1,3)}' "$d/$p-variables")" @@ -643,11 +659,12 @@ collect() { local tail_error_log_pid="" if [ "$mysql_error_log" ]; then echo "The MySQL error log seems to be ${mysql_error_log}" - tail -f "$mysql_error_log" >"$d/$p-log_error" 2>&1 & + tail -f "$mysql_error_log" >"$d/$p-log_error" & tail_error_log_pid=$! + $CMD_MYSQLADMIN $EXT_ARGV debug else - echo "Could not find the MySQL error log" + echo "Could not find the MySQL error log" >&2 fi local innostat="SHOW /*!40100 ENGINE*/ INNODB STATUS\G" @@ -656,9 +673,9 @@ collect() { else local mutex="SHOW MUTEX STATUS" fi - $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus1" 2>&1 & - $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status1" 2>&1 & - open_tables >> "$d/$p-opentables1" 2>&1 & + $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus1" & + $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status1" & + open_tables >> "$d/$p-opentables1" & local tcpdump_pid="" if [ "$CMD_TCPDUMP" -a "$OPT_COLLECT_TCPDUMP" ]; then @@ -676,28 +693,33 @@ collect() { have_oprofile="yes" fi elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" ]; then - $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" 2>&1 & + $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" & local strace_pid=$! fi - ps -eaf >> "$d/$p-ps" 2>&1 & - sysctl -a >> "$d/$p-sysctl" 2>&1 & - top -bn1 >> "$d/$p-top" 2>&1 & - lsof -nP -p $mysqld_pid -bw >> "$d/$p-lsof" 2>&1 & + ps -eaf >> "$d/$p-ps" & + top -bn1 >> "$d/$p-top" & + + if [ "$CMD_LSOF" ]; then + $CMD_LSOF -nP -p $mysqld_pid -bw >> "$d/$p-lsof" & + fi + if [ "$CMD_SYSCTL" ]; then + $CMD_SYSCTL -a >> "$d/$p-sysctl" & + fi if [ "$CMD_VMSTAT" ]; then - $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" 2>&1 & - $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" 2>&1 & + $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" & + $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" & fi if [ "$CMD_IOSTAT" ]; then - $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" 2>&1 & - $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" 2>&1 & + $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" & + $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" & fi if [ "$CMD_MPSTAT" ]; then - $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" 2>&1 & - $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" 2>&1 & + $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" & + $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" & fi - $CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" 2>&1 & + $CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" & local mysqladmin_pid=$! local have_lock_waits_table=0 @@ -719,35 +741,38 @@ collect() { sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}') local ts="$(date +"TS %s.%N %F %T")" + if [ -d "/proc" ]; then if [ -f "/proc/diskstats" ]; then - (cat /proc/diskstats 2>&1; echo $ts) >> "$d/$p-diskstats" & + (echo $ts; cat /proc/diskstats) >> "$d/$p-diskstats" & fi if [ -f "/proc/stat" ]; then - (cat /proc/stat 2>&1; echo $ts) >> "$d/$p-procstat" & + (echo $ts; cat /proc/stat) >> "$d/$p-procstat" & fi if [ -f "/proc/vmstat" ]; then - (cat /proc/vmstat 2>&1; echo $ts) >> "$d/$p-procvmstat" & + (echo $ts; cat /proc/vmstat) >> "$d/$p-procvmstat" & fi if [ -f "/proc/meminfo" ]; then - (cat /proc/meminfo 2>&1; echo $ts) >> "$d/$p-meminfo" & + (echo $ts; cat /proc/meminfo) >> "$d/$p-meminfo" & fi if [ -f "/proc/slabinfo" ]; then - (cat /proc/slabinfo 2>&1; echo $ts) >> "$d/$p-slabinfo" & + (echo $ts; cat /proc/slabinfo) >> "$d/$p-slabinfo" & fi if [ -f "/proc/interrupts" ]; then - (cat /proc/interrupts 2>&1; echo $ts) >> "$d/$p-interrupts" & + (echo $ts; cat /proc/interrupts) >> "$d/$p-interrupts" & fi fi - (df -h 2>&1; echo $ts) >> "$d/$p-df" & - (netstat -antp 2>&1; echo $ts) >> "$d/$p-netstat" & - (netstat -s 2>&1; echo $ts) >> "$d/$p-netstat_s" & - ($CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G" 2>&1; echo $ts) \ - >> "$d/$p-processlist" + (echo $ts; df -h) >> "$d/$p-df" & + + (echo $ts; netstat -antp) >> "$d/$p-netstat" & + (echo $ts; netstat -s) >> "$d/$p-netstat_s" & + + (echo $ts; $CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G") \ + >> "$d/$p-processlist" & if [ $have_lock_waits_table -eq 1 ]; then - (lock_waits 2>&1; echo $ts) >>"$d/$p-lock-waits" + (echo $ts; lock_waits) >>"$d/$p-lock-waits" & fi done echo "Loop end: $(date +'TS %s.%N %F %T')" @@ -781,21 +806,28 @@ collect() { kill -s 18 $mysqld_pid fi - $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus2" 2>&1 & - $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status2" 2>&1 & - open_tables >> "$d/$p-opentables2" 2>&1 & + $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus2" & + $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status2" & + open_tables >> "$d/$p-opentables2" & kill $mysqladmin_pid [ "$tail_error_log_pid" ] && kill $tail_error_log_pid - [ "$tcpdump_pid" ] && kill $tcpdump_pid + [ "$tcpdump_pid" ] && kill $tcpdump_pid hostname > "$d/$p-hostname" + + for file in "$d/$p-"*; do + if [ -z "$(grep -v '^TS ' --max-count 1 "$file")" ]; then + log "Removing empty file $file"; + rm "$file" + fi + done } open_tables() { local open_tables=$($CMD_MYSQLADMIN $EXT_ARGV ext | grep "Open_tables" | awk '{print $4}') if [ -n "$open_tables" -a $open_tables -le 1000 ]; then - $CMD_MYSQL $EXT_ARGV -e 'SHOW OPEN TABLES' 2>&1 & + $CMD_MYSQL $EXT_ARGV -e 'SHOW OPEN TABLES' & else echo "Too many open tables: $open_tables" fi @@ -843,7 +875,7 @@ lock_waits() { # ########################################################################### RAN_WITH="" EXIT_REASON="" -TOOL=$(basename $0) +TOOL="pt-stalk" OKTORUN=1 ITER=1 @@ -1104,8 +1136,8 @@ main() { # Execute the program if it was not included from another file. # This makes it possible to include without executing, and thus test. -if [ "$(basename "$0")" = "pt-stalk" ] \ - || [ "$(basename "$0")" = "bash" -a "$_" = "$0" ]; then +if [ "${0##*/}" = "$TOOL" ] \ + || [ "${0##*/}" = "bash" -a "$_" = "$0" ]; then # Check that mysql and mysqladmin are in PATH. If not, we're # already dead in the water, so don't bother with cmd line opts, diff --git a/lib/bash/alt_cmds.sh b/lib/bash/alt_cmds.sh index c5a88f5e..1ae378a7 100644 --- a/lib/bash/alt_cmds.sh +++ b/lib/bash/alt_cmds.sh @@ -23,12 +23,31 @@ set -u +# Global variables. +CMD_PIDOF="$(which pidof)" +CMD_PGREP="$(which pgrep)" + # seq N, return 1, ..., 5 _seq() { local i="$1" awk "BEGIN { for(i=1; i<=$i; i++) print i; }" } +_pidof() { + local proc="$1" # process name + local pat="${2:-""}" # pattern in case we must grep for proc + + local pid="" + + [ "$CMD_PIDOF" ] && pid=$(pidof -s "$proc"); + + [ -z "$pid" ] && [ "$CMD_PGREP" ] && pid=$(pgrep -o -x "$proc"); + + [ -z "$pid" ] && pid=$(ps -eaf | grep "$pat" | grep -v mysqld_safe | awk '{print $2}' | head -n1) + + echo $pid +} + # ########################################################################### # End alt_cmds package # ########################################################################### diff --git a/lib/bash/collect.sh b/lib/bash/collect.sh index 41d43052..cc774103 100644 --- a/lib/bash/collect.sh +++ b/lib/bash/collect.sh @@ -26,6 +26,7 @@ set -u # Global variables. CMD_GDB="$(which gdb)" CMD_IOSTAT="$(which iostat)" +CMD_LSOF="$(which lsof)" CMD_MPSTAT="$(which mpstat)" CMD_MYSQL="$(which mysql)" CMD_MYSQLADMIN="$(which mysqladmin)" @@ -33,21 +34,19 @@ CMD_OPCONTROL="$(which opcontrol)" CMD_OPREPORT="$(which opreport)" CMD_PMAP="$(which pmap)" CMD_STRACE="$(which strace)" +CMD_SYSCTL="$(which sysctl)" CMD_TCPDUMP="$(which tcpdump)" CMD_VMSTAT="$(which vmstat)" +# Try to find command manually. +[ -z "$CMD_SYSCTL" -a -x "/sbin/sysctl" ] && CMD_SYSCTL="/sbin/sysctl" + collect() { local d="$1" # directory to save results in local p="$2" # prefix for each result file - # Get pidof mysqld; pidof doesn't exist on some systems. We try our best... - local mysqld_pid=$(pidof -s mysqld); - if [ -z "$mysqld_pid" ]; then - mysqld_pid=$(pgrep -o -x mysqld); - fi - if [ -z "$mysqld_pid" ]; then - mysqld_pid=$(ps -eaf | grep 'mysql[d]' | grep -v mysqld_safe | awk '{print $2}' | head -n1); - fi + # Get pidof mysqld. + local mysqld_pid=$(_pidof mysqld mysql[d]); # Get memory allocation info before anything else. if [ "$CMD_PMAP" -a "$mysqld_pid" ]; then @@ -72,7 +71,7 @@ collect() { # Get MySQL's variables if possible. Then sleep long enough that we probably # complete SHOW VARIABLES if all's well. (We don't want to run mysql in the # foreground, because it could hang.) - $CMD_MYSQL $EXT_ARGV -e 'SHOW GLOBAL VARIABLES' >> "$d/$p-variables" 2>&1 & + $CMD_MYSQL $EXT_ARGV -e 'SHOW GLOBAL VARIABLES' >> "$d/$p-variables" & sleep .2 # Get the major.minor version number. Version 3.23 doesn't matter for our @@ -89,13 +88,14 @@ collect() { local tail_error_log_pid="" if [ "$mysql_error_log" ]; then echo "The MySQL error log seems to be ${mysql_error_log}" - tail -f "$mysql_error_log" >"$d/$p-log_error" 2>&1 & + tail -f "$mysql_error_log" >"$d/$p-log_error" & tail_error_log_pid=$! + # Send a mysqladmin debug to the server so we can potentially learn about # locking etc. $CMD_MYSQLADMIN $EXT_ARGV debug else - echo "Could not find the MySQL error log" + echo "Could not find the MySQL error log" >&2 fi # Get a sample of these right away, so we can get these without interaction @@ -106,9 +106,9 @@ collect() { else local mutex="SHOW MUTEX STATUS" fi - $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus1" 2>&1 & - $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status1" 2>&1 & - open_tables >> "$d/$p-opentables1" 2>&1 & + $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus1" & + $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status1" & + open_tables >> "$d/$p-opentables1" & # If TCP dumping is specified, start that on the server's port. local tcpdump_pid="" @@ -130,27 +130,32 @@ collect() { fi elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" ]; then # Don't run oprofile and strace at the same time. - $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" 2>&1 & + $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" & local strace_pid=$! fi # Grab a few general things first. Background all of these so we can start # them all up as quickly as possible. - ps -eaf >> "$d/$p-ps" 2>&1 & - sysctl -a >> "$d/$p-sysctl" 2>&1 & - top -bn1 >> "$d/$p-top" 2>&1 & - lsof -nP -p $mysqld_pid -bw >> "$d/$p-lsof" 2>&1 & + ps -eaf >> "$d/$p-ps" & + top -bn1 >> "$d/$p-top" & + + if [ "$CMD_LSOF" ]; then + $CMD_LSOF -nP -p $mysqld_pid -bw >> "$d/$p-lsof" & + fi + if [ "$CMD_SYSCTL" ]; then + $CMD_SYSCTL -a >> "$d/$p-sysctl" & + fi if [ "$CMD_VMSTAT" ]; then - $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" 2>&1 & - $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" 2>&1 & + $CMD_VMSTAT 1 $OPT_INTERVAL >> "$d/$p-vmstat" & + $CMD_VMSTAT $OPT_INTERVAL 2 >> "$d/$p-vmstat-overall" & fi if [ "$CMD_IOSTAT" ]; then - $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" 2>&1 & - $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" 2>&1 & + $CMD_IOSTAT -dx 1 $OPT_INTERVAL >> "$d/$p-iostat" & + $CMD_IOSTAT -dx $OPT_INTERVAL 2 >> "$d/$p-iostat-overall" & fi if [ "$CMD_MPSTAT" ]; then - $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" 2>&1 & - $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" 2>&1 & + $CMD_MPSTAT -P ALL 1 $OPT_INTERVAL >> "$d/$p-mpstat" & + $CMD_MPSTAT -P ALL $OPT_INTERVAL 1 >> "$d/$p-mpstat-overall" & fi # Collect multiple snapshots of the status variables. We use @@ -159,7 +164,7 @@ collect() { # get and keep a connection to the database; in troubled times # the database tends to exceed max_connections, so reconnecting # in the loop tends not to work very well. - $CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" 2>&1 & + $CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" & local mysqladmin_pid=$! local have_lock_waits_table=0 @@ -186,36 +191,41 @@ collect() { sleep $(date +%s.%N | awk '{print 1 - ($1 % 1)}') local ts="$(date +"TS %s.%N %F %T")" - # Collect the stuff for this cycle + # ##################################################################### + # Collect data for this cycle. + # ##################################################################### + if [ -d "/proc" ]; then if [ -f "/proc/diskstats" ]; then - (cat /proc/diskstats 2>&1; echo $ts) >> "$d/$p-diskstats" & + (echo $ts; cat /proc/diskstats) >> "$d/$p-diskstats" & fi if [ -f "/proc/stat" ]; then - (cat /proc/stat 2>&1; echo $ts) >> "$d/$p-procstat" & + (echo $ts; cat /proc/stat) >> "$d/$p-procstat" & fi if [ -f "/proc/vmstat" ]; then - (cat /proc/vmstat 2>&1; echo $ts) >> "$d/$p-procvmstat" & + (echo $ts; cat /proc/vmstat) >> "$d/$p-procvmstat" & fi if [ -f "/proc/meminfo" ]; then - (cat /proc/meminfo 2>&1; echo $ts) >> "$d/$p-meminfo" & + (echo $ts; cat /proc/meminfo) >> "$d/$p-meminfo" & fi if [ -f "/proc/slabinfo" ]; then - (cat /proc/slabinfo 2>&1; echo $ts) >> "$d/$p-slabinfo" & + (echo $ts; cat /proc/slabinfo) >> "$d/$p-slabinfo" & fi if [ -f "/proc/interrupts" ]; then - (cat /proc/interrupts 2>&1; echo $ts) >> "$d/$p-interrupts" & + (echo $ts; cat /proc/interrupts) >> "$d/$p-interrupts" & fi fi - (df -h 2>&1; echo $ts) >> "$d/$p-df" & - (netstat -antp 2>&1; echo $ts) >> "$d/$p-netstat" & - (netstat -s 2>&1; echo $ts) >> "$d/$p-netstat_s" & - ($CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G" 2>&1; echo $ts) \ - >> "$d/$p-processlist" + (echo $ts; df -h) >> "$d/$p-df" & + + (echo $ts; netstat -antp) >> "$d/$p-netstat" & + (echo $ts; netstat -s) >> "$d/$p-netstat_s" & + + (echo $ts; $CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G") \ + >> "$d/$p-processlist" & if [ $have_lock_waits_table -eq 1 ]; then - (lock_waits 2>&1; echo $ts) >>"$d/$p-lock-waits" + (echo $ts; lock_waits) >>"$d/$p-lock-waits" & fi done echo "Loop end: $(date +'TS %s.%N %F %T')" @@ -252,23 +262,35 @@ collect() { kill -s 18 $mysqld_pid fi - $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus2" 2>&1 & - $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status2" 2>&1 & - open_tables >> "$d/$p-opentables2" 2>&1 & + $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus2" & + $CMD_MYSQL $EXT_ARGV -e "$mutex" >> "$d/$p-mutex-status2" & + open_tables >> "$d/$p-opentables2" & # Kill backgrounded tasks. kill $mysqladmin_pid [ "$tail_error_log_pid" ] && kill $tail_error_log_pid - [ "$tcpdump_pid" ] && kill $tcpdump_pid + [ "$tcpdump_pid" ] && kill $tcpdump_pid # Finally, record what system we collected this data from. hostname > "$d/$p-hostname" + + # Remove "empty" files, i.e. ones that are truly empty or + # just contain timestamp lines. When a command above fails, + # it may leave an empty file. + for file in "$d/$p-"*; do + # If there's not at least 1 line that's not a TS, + # then the file is empty. + if [ -z "$(grep -v '^TS ' --max-count 1 "$file")" ]; then + log "Removing empty file $file"; + rm "$file" + fi + done } open_tables() { local open_tables=$($CMD_MYSQLADMIN $EXT_ARGV ext | grep "Open_tables" | awk '{print $4}') if [ -n "$open_tables" -a $open_tables -le 1000 ]; then - $CMD_MYSQL $EXT_ARGV -e 'SHOW OPEN TABLES' 2>&1 & + $CMD_MYSQL $EXT_ARGV -e 'SHOW OPEN TABLES' & else echo "Too many open tables: $open_tables" fi diff --git a/t/lib/bash/collect.sh b/t/lib/bash/collect.sh index b944fa3b..0564c666 100644 --- a/t/lib/bash/collect.sh +++ b/t/lib/bash/collect.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -TESTS=19 +TESTS=20 TMPFILE="$TEST_TMPDIR/parse-opts-output" TMPDIR="$TEST_TMPDIR" @@ -108,6 +108,20 @@ cmd_ok \ local iters=$(cat $p-df | grep -c '^TS ') is "$iters" "1" "1 iteration/1s run time" +empty_files=0 +for file in $p-*; do + if ! [ -s $file ]; then + empty_files=1 + break + fi + if [ -z "$(grep -v '^TS ' --max-count 1 $file)" ]; then + empty_files=1 + break + fi +done + +is "$empty_files" "0" "No empty files" + # ########################################################################### # Try longer run time. # ########################################################################### From d1a684b55b71d3b61351f726da6ee9539a9e4847 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 30 Jan 2012 08:59:19 -0700 Subject: [PATCH 59/71] Use _pidof() and _lsof() from Percona Monitoring Plugins. Use "" and "yes" exclusively in collect(). Don't exec cmds that need mysqld_pid unless mysqld_pid is defined. Use log() instead of echo. --- lib/bash/alt_cmds.sh | 21 ++++++++++----------- lib/bash/collect.sh | 43 ++++++++++++++++++++++++------------------- 2 files changed, 34 insertions(+), 30 deletions(-) diff --git a/lib/bash/alt_cmds.sh b/lib/bash/alt_cmds.sh index 1ae378a7..e54112e4 100644 --- a/lib/bash/alt_cmds.sh +++ b/lib/bash/alt_cmds.sh @@ -34,18 +34,17 @@ _seq() { } _pidof() { - local proc="$1" # process name - local pat="${2:-""}" # pattern in case we must grep for proc - - local pid="" + local cmd="$1" + if ! pidof "$cmd" 2>/dev/null; then + ps -eo pid,ucomm | awk -v comm="$cmd" '$2 == comm { print $1 }' + fi +} - [ "$CMD_PIDOF" ] && pid=$(pidof -s "$proc"); - - [ -z "$pid" ] && [ "$CMD_PGREP" ] && pid=$(pgrep -o -x "$proc"); - - [ -z "$pid" ] && pid=$(ps -eaf | grep "$pat" | grep -v mysqld_safe | awk '{print $2}' | head -n1) - - echo $pid +_lsof() { + local pid="$1" + if ! lsof -p $pid 2>/dev/null; then + /bin/ls -l /proc/$pid/fd 2>/dev/null + fi } # ########################################################################### diff --git a/lib/bash/collect.sh b/lib/bash/collect.sh index cc774103..472e5e3c 100644 --- a/lib/bash/collect.sh +++ b/lib/bash/collect.sh @@ -26,7 +26,6 @@ set -u # Global variables. CMD_GDB="$(which gdb)" CMD_IOSTAT="$(which iostat)" -CMD_LSOF="$(which lsof)" CMD_MPSTAT="$(which mpstat)" CMD_MYSQL="$(which mysql)" CMD_MYSQLADMIN="$(which mysqladmin)" @@ -46,7 +45,7 @@ collect() { local p="$2" # prefix for each result file # Get pidof mysqld. - local mysqld_pid=$(_pidof mysqld mysql[d]); + local mysqld_pid=$(_pidof mysqld | head -n1) # Get memory allocation info before anything else. if [ "$CMD_PMAP" -a "$mysqld_pid" ]; then @@ -87,7 +86,7 @@ collect() { local tail_error_log_pid="" if [ "$mysql_error_log" ]; then - echo "The MySQL error log seems to be ${mysql_error_log}" + log "The MySQL error log seems to be $mysql_error_log" tail -f "$mysql_error_log" >"$d/$p-log_error" & tail_error_log_pid=$! @@ -95,7 +94,7 @@ collect() { # locking etc. $CMD_MYSQLADMIN $EXT_ARGV debug else - echo "Could not find the MySQL error log" >&2 + log "Could not find the MySQL error log" fi # Get a sample of these right away, so we can get these without interaction @@ -122,13 +121,13 @@ collect() { # Next, start oprofile gathering data during the whole rest of this process. # The --init should be a no-op if it has already been init-ed. - local have_oprofile="no" + local have_oprofile="" if [ "$CMD_OPCONTROL" -a "$OPT_COLLECT_OPROFILE" ]; then if $CMD_OPCONTROL --init; then $CMD_OPCONTROL --start --no-vmlinux have_oprofile="yes" fi - elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" ]; then + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" -a "$mysqld_pid" ]; then # Don't run oprofile and strace at the same time. $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" & local strace_pid=$! @@ -139,9 +138,8 @@ collect() { ps -eaf >> "$d/$p-ps" & top -bn1 >> "$d/$p-top" & - if [ "$CMD_LSOF" ]; then - $CMD_LSOF -nP -p $mysqld_pid -bw >> "$d/$p-lsof" & - fi + [ "$mysqld_pid" ] && _lsof $mysqld_pid >> "$d/$p-lsof" & + if [ "$CMD_SYSCTL" ]; then $CMD_SYSCTL -a >> "$d/$p-sysctl" & fi @@ -167,16 +165,16 @@ collect() { $CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" & local mysqladmin_pid=$! - local have_lock_waits_table=0 + local have_lock_waits_table="" $CMD_MYSQL $EXT_ARGV -e "SHOW TABLES FROM INFORMATION_SCHEMA" \ | grep -i "INNODB_LOCK_WAITS" >/dev/null 2>&1 if [ $? -eq 0 ]; then - have_lock_waits_table=1 + have_lock_waits_table="yes" fi # This loop gathers data for the rest of the duration, and defines the time # of the whole job. - echo "Loop start: $(date +'TS %s.%N %F %T')" + log "Loop start: $(date +'TS %s.%N %F %T')" for loopno in $(_seq $OPT_RUN_TIME); do # We check the disk, but don't exit, because we need to stop jobs if we # need to exit. @@ -224,16 +222,23 @@ collect() { (echo $ts; $CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G") \ >> "$d/$p-processlist" & - if [ $have_lock_waits_table -eq 1 ]; then + if [ "$have_lock_waits_table" ]; then (echo $ts; lock_waits) >>"$d/$p-lock-waits" & fi done - echo "Loop end: $(date +'TS %s.%N %F %T')" + log "Loop end: $(date +'TS %s.%N %F %T')" - if [ "$have_oprofile" = "yes" ]; then + if [ "$have_oprofile" ]; then $CMD_OPCONTROL --stop $CMD_OPCONTROL --dump - kill $(pidof oprofiled); # TODO: what if system doesn't have pidof? + + local oprofiled_pid=$(_pidof oprofiled) + if [ "$oprofiled_pid" ]; then + kill $oprofiled_pid + else + warn "Cannot kill oprofiled because its PID cannot be determined" + fi + $CMD_OPCONTROL --save=pt_collect_$p # Attempt to generate a report; if this fails, then just tell the user @@ -248,7 +253,7 @@ collect() { "$mysqld_path" \ > "$d/$p-opreport" else - echo "oprofile data saved to pt_collect_$p; you should be able" \ + log "oprofile data saved to pt_collect_$p; you should be able" \ "to get a report by running something like 'opreport" \ "--demangle=smart --symbols --merge tgid session:pt_collect_$p" \ "/path/to/mysqld'" \ @@ -259,7 +264,7 @@ collect() { sleep 1 kill -s 15 $strace_pid # Sometimes strace leaves threads/processes in T status. - kill -s 18 $mysqld_pid + [ "$mysqld_pid" ] && kill -s 18 $mysqld_pid fi $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus2" & @@ -292,7 +297,7 @@ open_tables() { if [ -n "$open_tables" -a $open_tables -le 1000 ]; then $CMD_MYSQL $EXT_ARGV -e 'SHOW OPEN TABLES' & else - echo "Too many open tables: $open_tables" + log "Too many open tables: $open_tables" fi } From 8389da4c07cf115b1462197581490d291a058265 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 30 Jan 2012 09:22:25 -0700 Subject: [PATCH 60/71] Use . instead of source. Quote func file. Verify --function. Add option_error() to parse_options.sh. Update libs in pt-stalk. --- bin/pt-stalk | 105 +++++++++++++++++++++++--------------- lib/bash/parse_options.sh | 18 +++---- 2 files changed, 72 insertions(+), 51 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 3e70c8a0..80c3484b 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -133,6 +133,12 @@ usage_or_errors() { return 0 } +option_error() { + local err="$1" + OPT_ERRS=$(($OPT_ERRS + 1)) + echo "$err" >&2 +} + parse_options() { local file="$1" shift @@ -317,8 +323,7 @@ _parse_command_line() { if [ "$next_opt_is_val" ]; then next_opt_is_val="" if [ $# -eq 0 ] || [ $(expr "$opt" : "-") -eq 1 ]; then - OPT_ERRS=$(($OPT_ERRS + 1)) - echo "$real_opt requires a $required_arg argument" >&2 + option_error "$real_opt requires a $required_arg argument" continue fi val="$opt" @@ -353,8 +358,7 @@ _parse_command_line() { else spec=$(grep "^short form:-$opt\$" "$TMPDIR"/po/* | cut -d ':' -f 1) if [ -z "$spec" ]; then - OPT_ERRS=$(($OPT_ERRS + 1)) - echo "Unknown option: $real_opt" >&2 + option_error "Unknown option: $real_opt" continue fi fi @@ -368,8 +372,7 @@ _parse_command_line() { fi else if [ "$val" ]; then - OPT_ERRS=$(($OPT_ERRS + 1)) - echo "Option $real_opt does not take a value" >&2 + option_error "Option $real_opt does not take a value" continue fi if [ "$opt_is_negated" ]; then @@ -473,18 +476,17 @@ _seq() { } _pidof() { - local proc="$1" # process name - local pat="${2:-""}" # pattern in case we must grep for proc - - local pid="" + local cmd="$1" + if ! pidof "$cmd" 2>/dev/null; then + ps -eo pid,ucomm | awk -v comm="$cmd" '$2 == comm { print $1 }' + fi +} - [ "$CMD_PIDOF" ] && pid=$(pidof -s "$proc"); - - [ -z "$pid" ] && [ "$CMD_PGREP" ] && pid=$(pgrep -o -x "$proc"); - - [ -z "$pid" ] && pid=$(ps -eaf | grep "$pat" | grep -v mysqld_safe | awk '{print $2}' | head -n1) - - echo $pid +_lsof() { + local pid="$1" + if ! lsof -p $pid 2>/dev/null; then + /bin/ls -l /proc/$pid/fd 2>/dev/null + fi } # ########################################################################### @@ -610,7 +612,6 @@ set -u CMD_GDB="$(which gdb)" CMD_IOSTAT="$(which iostat)" -CMD_LSOF="$(which lsof)" CMD_MPSTAT="$(which mpstat)" CMD_MYSQL="$(which mysql)" CMD_MYSQLADMIN="$(which mysqladmin)" @@ -628,7 +629,7 @@ collect() { local d="$1" # directory to save results in local p="$2" # prefix for each result file - local mysqld_pid=$(_pidof mysqld mysql[d]); + local mysqld_pid=$(_pidof mysqld | head -n1) if [ "$CMD_PMAP" -a "$mysqld_pid" ]; then if $CMD_PMAP --help 2>&1 | grep -- -x >/dev/null 2>&1 ; then @@ -658,13 +659,13 @@ collect() { local tail_error_log_pid="" if [ "$mysql_error_log" ]; then - echo "The MySQL error log seems to be ${mysql_error_log}" + log "The MySQL error log seems to be $mysql_error_log" tail -f "$mysql_error_log" >"$d/$p-log_error" & tail_error_log_pid=$! $CMD_MYSQLADMIN $EXT_ARGV debug else - echo "Could not find the MySQL error log" >&2 + log "Could not find the MySQL error log" fi local innostat="SHOW /*!40100 ENGINE*/ INNODB STATUS\G" @@ -686,13 +687,13 @@ collect() { fi fi - local have_oprofile="no" + local have_oprofile="" if [ "$CMD_OPCONTROL" -a "$OPT_COLLECT_OPROFILE" ]; then if $CMD_OPCONTROL --init; then $CMD_OPCONTROL --start --no-vmlinux have_oprofile="yes" fi - elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" ]; then + elif [ "$CMD_STRACE" -a "$OPT_COLLECT_STRACE" -a "$mysqld_pid" ]; then $CMD_STRACE -T -s 0 -f -p $mysqld_pid > "${DEST}/$d-strace" & local strace_pid=$! fi @@ -700,9 +701,8 @@ collect() { ps -eaf >> "$d/$p-ps" & top -bn1 >> "$d/$p-top" & - if [ "$CMD_LSOF" ]; then - $CMD_LSOF -nP -p $mysqld_pid -bw >> "$d/$p-lsof" & - fi + [ "$mysqld_pid" ] && _lsof $mysqld_pid >> "$d/$p-lsof" & + if [ "$CMD_SYSCTL" ]; then $CMD_SYSCTL -a >> "$d/$p-sysctl" & fi @@ -722,14 +722,14 @@ collect() { $CMD_MYSQLADMIN $EXT_ARGV ext -i1 -c$OPT_RUN_TIME >>"$d/$p-mysqladmin" & local mysqladmin_pid=$! - local have_lock_waits_table=0 + local have_lock_waits_table="" $CMD_MYSQL $EXT_ARGV -e "SHOW TABLES FROM INFORMATION_SCHEMA" \ | grep -i "INNODB_LOCK_WAITS" >/dev/null 2>&1 if [ $? -eq 0 ]; then - have_lock_waits_table=1 + have_lock_waits_table="yes" fi - echo "Loop start: $(date +'TS %s.%N %F %T')" + log "Loop start: $(date +'TS %s.%N %F %T')" for loopno in $(_seq $OPT_RUN_TIME); do disk_space $d > $d/$p-disk-space check_disk_space \ @@ -771,16 +771,23 @@ collect() { (echo $ts; $CMD_MYSQL $EXT_ARGV -e "SHOW FULL PROCESSLIST\G") \ >> "$d/$p-processlist" & - if [ $have_lock_waits_table -eq 1 ]; then + if [ "$have_lock_waits_table" ]; then (echo $ts; lock_waits) >>"$d/$p-lock-waits" & fi done - echo "Loop end: $(date +'TS %s.%N %F %T')" + log "Loop end: $(date +'TS %s.%N %F %T')" - if [ "$have_oprofile" = "yes" ]; then + if [ "$have_oprofile" ]; then $CMD_OPCONTROL --stop $CMD_OPCONTROL --dump - kill $(pidof oprofiled); # TODO: what if system doesn't have pidof? + + local oprofiled_pid=$(_pidof oprofiled) + if [ "$oprofiled_pid" ]; then + kill $oprofiled_pid + else + warn "Cannot kill oprofiled because its PID cannot be determined" + fi + $CMD_OPCONTROL --save=pt_collect_$p local mysqld_path=$(which mysqld); @@ -793,7 +800,7 @@ collect() { "$mysqld_path" \ > "$d/$p-opreport" else - echo "oprofile data saved to pt_collect_$p; you should be able" \ + log "oprofile data saved to pt_collect_$p; you should be able" \ "to get a report by running something like 'opreport" \ "--demangle=smart --symbols --merge tgid session:pt_collect_$p" \ "/path/to/mysqld'" \ @@ -803,7 +810,7 @@ collect() { kill -s 2 $strace_pid sleep 1 kill -s 15 $strace_pid - kill -s 18 $mysqld_pid + [ "$mysqld_pid" ] && kill -s 18 $mysqld_pid fi $CMD_MYSQL $EXT_ARGV -e "$innostat" >> "$d/$p-innodbstatus2" & @@ -829,7 +836,7 @@ open_tables() { if [ -n "$open_tables" -a $open_tables -le 1000 ]; then $CMD_MYSQL $EXT_ARGV -e 'SHOW OPEN TABLES' & else - echo "Too many open tables: $open_tables" + log "Too many open tables: $open_tables" fi } @@ -873,6 +880,7 @@ lock_waits() { # ########################################################################### # Global variables # ########################################################################### +TRIGGER_FUNCTION="" RAN_WITH="" EXIT_REASON="" TOOL="pt-stalk" @@ -928,12 +936,21 @@ grep_processlist() { } set_trg_func() { - if [ -f "$OPT_FUNCTION" ]; then - source $OPT_FUNCTION + local func="$1" + if [ -f "$func" ]; then + # Trigger function is a file with Bash code; source it. + . "$func" TRIGGER_FUNCTION="trg_plugin" + return 0 # success else - TRIGGER_FUNCTION="trg_$OPT_FUNCTION" + # Trigger function is name of a built-in function. + func=$(echo "$func" | tr [:upper:] [:lower:]) + if [ "$func" = "status" -o "$func" = "processlist" ]; then + TRIGGER_FUNCTION="trg_$func" + return 0 # success + fi fi + return 1 # error } trg_status() { @@ -1119,9 +1136,6 @@ main() { # Make a secure tmpdir. mk_tmpdir - # Set TRIGGER_FUNCTION based on --function. - set_trg_func - # Stalk while oktorun. stalk @@ -1147,10 +1161,17 @@ if [ "${0##*/}" = "$TOOL" ] \ [ -n "$(mysqladmin --help)" ] \ || die "Cannot execute mysqladmin. Check that it is in PATH." + # Parse command line options. We must do this first so we can # see if --daemonize was specified. mk_tmpdir parse_options "$0" "$@" + + # Verify and set TRIGGER_FUNCTION based on --function. + if ! set_trg_func "$OPT_FUNCTION"; then + option_error "Invalid --function value: $OPT_FUNCTION" + fi + usage_or_errors "$0" po_status=$? rm_tmpdir diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index 3c2ac8c9..59aff04a 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -65,9 +65,6 @@ PO_DIR="$TMPDIR/po" # Directory with program option spec files # Required Global Variables: # TIMDIR - Temp directory set by . # TOOL - Tool's name. -# -# Optional Global Variables: -# OPT_ERR - Command line option error message. usage() { local file="$1" @@ -140,6 +137,12 @@ usage_or_errors() { return 0 } +option_error() { + local err="$1" + OPT_ERRS=$(($OPT_ERRS + 1)) + echo "$err" >&2 +} + # Sub: parse_options # Parse Perl POD options from a program file. # @@ -384,8 +387,7 @@ _parse_command_line() { if [ "$next_opt_is_val" ]; then next_opt_is_val="" if [ $# -eq 0 ] || [ $(expr "$opt" : "-") -eq 1 ]; then - OPT_ERRS=$(($OPT_ERRS + 1)) - echo "$real_opt requires a $required_arg argument" >&2 + option_error "$real_opt requires a $required_arg argument" continue fi val="$opt" @@ -425,8 +427,7 @@ _parse_command_line() { else spec=$(grep "^short form:-$opt\$" "$TMPDIR"/po/* | cut -d ':' -f 1) if [ -z "$spec" ]; then - OPT_ERRS=$(($OPT_ERRS + 1)) - echo "Unknown option: $real_opt" >&2 + option_error "Unknown option: $real_opt" continue fi fi @@ -446,8 +447,7 @@ _parse_command_line() { else # Option does not take a value. if [ "$val" ]; then - OPT_ERRS=$(($OPT_ERRS + 1)) - echo "Option $real_opt does not take a value" >&2 + option_error "Option $real_opt does not take a value" continue fi if [ "$opt_is_negated" ]; then From caa91f39fa2f3af563ebf537e6fe76096dfab820 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 30 Jan 2012 10:05:35 -0700 Subject: [PATCH 61/71] Log collector PID. --- bin/pt-stalk | 1 + t/pt-stalk/pt-stalk.t | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index d99991f3..5ee21363 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -1091,6 +1091,7 @@ stalk() { ( collect "$OPT_DEST" "$prefix" ) >> "$OPT_DEST/$prefix-output" 2>&1 & + log "Collector PID $!" else # There will not be enough disk space, so do not collect. warn "Collect canceled because there will not be enough disk space after collecting another $margin MB" diff --git a/t/pt-stalk/pt-stalk.t b/t/pt-stalk/pt-stalk.t index 6a054899..83d180d7 100644 --- a/t/pt-stalk/pt-stalk.t +++ b/t/pt-stalk/pt-stalk.t @@ -24,7 +24,7 @@ if ( !$dbh ) { plan skip_all => 'Cannot connect to sandbox master'; } else { - plan tests => 20; + plan tests => 21; } my $cnf = "/tmp/12345/my.sandbox.cnf"; @@ -169,6 +169,13 @@ like( "Trigger file logs how pt-stalk was ran" ); +chomp($output = `cat $log_file | grep 'Collector PID'`); +like( + $output, + qr/Collector PID \d+/, + "Collector PID logged" +); + # ########################################################################### # Triggered but --no-collect. # ########################################################################### From 89242d9fdea077a6f86744fcb4749023cd20ee4a Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 30 Jan 2012 10:20:31 -0700 Subject: [PATCH 62/71] Don't init a global with a global. --- bin/pt-stalk | 2 +- lib/bash/parse_options.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 5ee21363..aee02abf 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -60,7 +60,7 @@ HAVE_EXT_ARGV="" # Got --, everything else is put into EXT_ARGV OPT_ERRS=0 # How many command line option errors OPT_VERSION="" # If --version was specified OPT_HELP="" # If --help was specified -PO_DIR="$TMPDIR/po" # Directory with program option spec files +PO_DIR="" # Directory with program option spec files usage() { local file="$1" diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index 59aff04a..805b82f5 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -54,7 +54,7 @@ HAVE_EXT_ARGV="" # Got --, everything else is put into EXT_ARGV OPT_ERRS=0 # How many command line option errors OPT_VERSION="" # If --version was specified OPT_HELP="" # If --help was specified -PO_DIR="$TMPDIR/po" # Directory with program option spec files +PO_DIR="" # Directory with program option spec files # Sub: usage # Print usage (--help) and list the program's options. From be8c0e6e18b87f3a543854459836d5fc3417efe3 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 30 Jan 2012 11:15:28 -0700 Subject: [PATCH 63/71] Fix parsing opts from config file with leading and trailing space. --- bin/pt-stalk | 2 +- lib/bash/parse_options.sh | 2 +- t/lib/bash/parse_options.sh | 11 ++++++++++- t/lib/samples/bash/config004.conf | 3 +++ 4 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 t/lib/samples/bash/config004.conf diff --git a/bin/pt-stalk b/bin/pt-stalk index aee02abf..17092df6 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -279,7 +279,7 @@ _parse_config_files() { echo "$config_opt" | grep '^[ ]*[^#]' >/dev/null 2>&1 || continue - config_opt="$(echo "$config_opt" | sed -e 's/^[ ]*//' -e 's/[ ]*\$//' -e 's/[ ]*=[ ]*/=/' -e 's/[ ]*#.*$//')" + config_opt="$(echo "$config_opt" | sed -e 's/^ *//g' -e 's/ *$//g' -e 's/[ ]*=[ ]*/=/' -e 's/[ ]*#.*$//')" [ "$config_opt" = "" ] && continue diff --git a/lib/bash/parse_options.sh b/lib/bash/parse_options.sh index 805b82f5..3bb2762f 100644 --- a/lib/bash/parse_options.sh +++ b/lib/bash/parse_options.sh @@ -325,7 +325,7 @@ _parse_config_files() { # Strip leading and trailing spaces, and spaces around the first =, # and end-of-line # comments. - config_opt="$(echo "$config_opt" | sed -e 's/^[ ]*//' -e 's/[ ]*\$//' -e 's/[ ]*=[ ]*/=/' -e 's/[ ]*#.*$//')" + config_opt="$(echo "$config_opt" | sed -e 's/^ *//g' -e 's/ *$//g' -e 's/[ ]*=[ ]*/=/' -e 's/[ ]*#.*$//')" # Skip blank lines. [ "$config_opt" = "" ] && continue diff --git a/t/lib/bash/parse_options.sh b/t/lib/bash/parse_options.sh index 5afa53b5..8c462eff 100644 --- a/t/lib/bash/parse_options.sh +++ b/t/lib/bash/parse_options.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash -TESTS=73 +TESTS=78 TMPFILE="$TEST_TMPDIR/parse-opts-output" TOOL="pt-stalk" @@ -166,6 +166,15 @@ is "$OPT_INT_OPT" "100" "Two --config int option" is "$ARGV" "" "Two --config ARGV" is "$EXT_ARGV" "--host=127.1 --user=daniel" "Two--config External ARGV" +# Spaces before and after the option[=value] lines. +parse_options "$T_LIB_DIR/samples/bash/po001.sh" --config $T_LIB_DIR/samples/bash/config004.conf + +is "$OPT_STRING_OPT" "foo" "Default string option (spacey)" +is "$OPT_TYPELESS_OPTION" "yes" "Default typeless option (spacey)" +is "$OPT_INT_OPT" "123" "Default int option (spacey)" +is "$ARGV" "" "ARGV (spacey)" +is "$EXT_ARGV" "" "External ARGV (spacey)" + # ############################################################################ # Option values with spaces. # ############################################################################ diff --git a/t/lib/samples/bash/config004.conf b/t/lib/samples/bash/config004.conf new file mode 100644 index 00000000..eb023329 --- /dev/null +++ b/t/lib/samples/bash/config004.conf @@ -0,0 +1,3 @@ + typeless-option + int-opt=123 +string-opt=foo From afd0bf8982e2cacc2e7045f7cd02e27fbe7655cd Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 31 Jan 2012 09:16:18 -0700 Subject: [PATCH 64/71] rm -rf the oprofile dirs. --- bin/pt-stalk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 17092df6..b54f880e 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -1007,7 +1007,7 @@ purge_samples() { # "pt_collect_" here needs to match $CMD_OPCONTROL --save=pt_collect_$p # in collect(). TODO: fix this find "$oprofile_dir" -type d -name 'pt_collect_*' \ - -depth -mtime +$retention_time -exec rm -f '{}' \; + -depth -mtime +$retention_time -exec rm -rf '{}' \; fi } From 37df5da4d637fc8a8391aa85c775b14ba11a2c8c Mon Sep 17 00:00:00 2001 From: "baron@percona.com" <> Date: Tue, 31 Jan 2012 12:01:32 -0500 Subject: [PATCH 65/71] doc tweaks --- bin/pt-stalk | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index b54f880e..355bf180 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -1257,7 +1257,8 @@ whether known or unknown, of using this tool. The two main categories of risks are those created by the nature of the tool (e.g. read-only tools vs. read-write tools) and those created by bugs. -pt-stalk is a read-only tool. It should be very low-risk. Some of the options +pt-stalk is a read-write tool; it collects data from the system and writes it +into a series of files. It should be very low-risk. Some of the options can cause intrusive data collection to be performed, however, so if you enable any non-default options, you should read their documentation carefully. @@ -1326,7 +1327,10 @@ resulting samples of data. Although this sounds simple enough, in practice there are a number of subtleties, such as detecting when the disk is beginning to fill up so that the -tool doesn't cause the server to run out of disk space. +tool doesn't cause the server to run out of disk space. This tool handles these +types of potential problems, so it's a good idea to use this tool instead of +writing something from scratch and possibly experiencing some of the hazards +this tool is designed to prevent. =head1 CONFIGURING @@ -1402,8 +1406,8 @@ first option on the command line. type: int; default: 5 The number of times the trigger condition must be true before collecting data. -This helps prevent false positives and make the trigger condition less -susceptible to firing when the condition recovers quickly. +This helps prevent false positives, and makes the trigger condition less likely +to fire when the problem recovers quickly. =item --daemonize @@ -1473,7 +1477,8 @@ L<"--variable"> column matches the L<"--match"> option. For example, to trigger when more than 10 processes are in the "statistics" state, use the following options: - --trigger processlist --variable State --match statistics --threshold 10 + --trigger processlist --variable State \ + --match statistics --threshold 10 =back @@ -1489,7 +1494,8 @@ simply sources the file and executes the function. For example, the function might look like the following: trg_plugin() { - mysql $EXT_ARGV -e "SHOW ENGINE INNODB STATUS" | grep -c "has waited at" + mysql $EXT_ARGV -e "SHOW ENGINE INNODB STATUS" \ + | grep -c "has waited at" } This snippet will count the number of mutex waits inside of InnoDB. It @@ -1597,7 +1603,7 @@ Print tool's version and exit. =head1 ENVIRONMENT -No env vars used. +This tool does not use any environment variables for configuration. =head1 SYSTEM REQUIREMENTS From 62055052632286090279fc5ea5b83c6dabd5905b Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 31 Jan 2012 10:20:37 -0700 Subject: [PATCH 66/71] Don't use basename and quote $dir in tmpdir.sh. --- bin/pt-stalk | 4 ++-- lib/bash/tmpdir.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 355bf180..e9ce1d43 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -433,11 +433,11 @@ mk_tmpdir() { if [ -n "$dir" ]; then if [ ! -d "$dir" ]; then - mkdir $dir || die "Cannot make tmpdir $dir" + mkdir "$dir" || die "Cannot make tmpdir $dir" fi TMPDIR="$dir" else - local tool=`basename $0` + local tool="${0##*/}" local pid="$$" TMPDIR=`mktemp -d /tmp/${tool}.${pid}.XXXXX` \ || die "Cannot make secure tmpdir" diff --git a/lib/bash/tmpdir.sh b/lib/bash/tmpdir.sh index fdd6c8f3..0ee045ac 100644 --- a/lib/bash/tmpdir.sh +++ b/lib/bash/tmpdir.sh @@ -39,11 +39,11 @@ mk_tmpdir() { if [ -n "$dir" ]; then if [ ! -d "$dir" ]; then - mkdir $dir || die "Cannot make tmpdir $dir" + mkdir "$dir" || die "Cannot make tmpdir $dir" fi TMPDIR="$dir" else - local tool=`basename $0` + local tool="${0##*/}" local pid="$$" TMPDIR=`mktemp -d /tmp/${tool}.${pid}.XXXXX` \ || die "Cannot make secure tmpdir" From a4880437748b79ef5046ae286caa8d501ffa8ca6 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 31 Jan 2012 13:25:20 -0700 Subject: [PATCH 67/71] Check for mysql after parsing options so --help works even if mysql isn't in PATH. --- bin/pt-stalk | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index e9ce1d43..767b1e87 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -1154,15 +1154,6 @@ main() { if [ "${0##*/}" = "$TOOL" ] \ || [ "${0##*/}" = "bash" -a "$_" = "$0" ]; then - # Check that mysql and mysqladmin are in PATH. If not, we're - # already dead in the water, so don't bother with cmd line opts, - # just error and exit. - [ -n "$(mysql --help)" ] \ - || die "Cannot execute mysql. Check that it is in PATH." - [ -n "$(mysqladmin --help)" ] \ - || die "Cannot execute mysqladmin. Check that it is in PATH." - - # Parse command line options. We must do this first so we can # see if --daemonize was specified. mk_tmpdir @@ -1181,6 +1172,14 @@ if [ "${0##*/}" = "$TOOL" ] \ exit 0 fi + # Check that mysql and mysqladmin are in PATH. If not, we're + # already dead in the water, so don't bother with cmd line opts, + # just error and exit. + [ -n "$(mysql --help)" ] \ + || die "Cannot execute mysql. Check that it is in PATH." + [ -n "$(mysqladmin --help)" ] \ + || die "Cannot execute mysqladmin. Check that it is in PATH." + # Now that we have the cmd line opts, check that we can actually # connect to MySQL. [ -n "$(mysql $EXT_ARGV -e 'SELECT 1')" ] \ From cd91b583c1fc81ca029b8fdd6abd3282f3d6dc8a Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Wed, 1 Feb 2012 10:03:37 -0700 Subject: [PATCH 68/71] Removed unused global vars. --- lib/bash/alt_cmds.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/bash/alt_cmds.sh b/lib/bash/alt_cmds.sh index e54112e4..e0d96e64 100644 --- a/lib/bash/alt_cmds.sh +++ b/lib/bash/alt_cmds.sh @@ -23,10 +23,6 @@ set -u -# Global variables. -CMD_PIDOF="$(which pidof)" -CMD_PGREP="$(which pgrep)" - # seq N, return 1, ..., 5 _seq() { local i="$1" From 4e69f68fc8ceb4fc235b58cc4a695369fb6e7d73 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Wed, 1 Feb 2012 10:10:08 -0700 Subject: [PATCH 69/71] Quote vars in trg_processlist(). --- bin/pt-stalk | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/bin/pt-stalk b/bin/pt-stalk index 767b1e87..fb7d9791 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -932,7 +932,7 @@ grep_processlist() { exit 0 exit 1 } - " $file + " "$file" } set_trg_func() { @@ -963,11 +963,10 @@ trg_status() { trg_processlist() { local var="$1" local tmpfile="$TMPDIR/processlist" - mysqladmin $EXT_ARGV processlist > $tmpfile-1 - grep_processlist $tmpfile-1 $var $OPT_MATCH 0 0 > $tmpfile-2 - wc -l $tmpfile-2 | awk '{print $1}' - rm -rf $tmpfile* - return + mysqladmin $EXT_ARGV processlist > "$tmpfile-1" + grep_processlist "$tmpfile-1" "$var" "$OPT_MATCH" 0 0 > "$tmpfile-2" + wc -l "$tmpfile-2" | awk '{print $1}' + rm -f "$tmpfile"* } oktorun() { From e64f23a2e3cb1a78d0f7c0b63443d3733853ddde Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 2 Feb 2012 08:44:38 -0700 Subject: [PATCH 70/71] Don't use random file in mk-mext. Quote file names. Remove tmp dir. --- bin/pt-mext | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/bin/pt-mext b/bin/pt-mext index cf5da66e..95d838d1 100755 --- a/bin/pt-mext +++ b/bin/pt-mext @@ -26,9 +26,6 @@ fi # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### -# pt-mext isn't ready for this yet -#set -u - TMPDIR="" mk_tmpdir() { @@ -60,10 +57,9 @@ rm_tmpdir() { mk_tmpdir -FILE=`mktemp -p $TMPDIR mext_temp_file.XXXXXX`; +FILE="$TMPDIR/mext_temp_file"; NUM=0; REL=0; -rm -f $FILE*; # Command-line parsing. args=`getopt -u -n mext r "$@"`; @@ -88,15 +84,15 @@ $@ | grep -v '+' | grep -v Variable_name | sed 's/|//g' \ | while read line; do if [ "$line" = "" ]; then NUM=`expr $NUM + 1`; - echo "" > $FILE$NUM; + echo "" > "$FILE$NUM" fi - echo "$line" >> $FILE$NUM; + echo "$line" >> "$FILE$NUM" done # Count how many files there are and prepare to format the output SPEC="%-33s %13d" AWKS="" -NUM=`ls $FILE* | wc -l`; +NUM=`ls "$FILE"* | wc -l`; # The last file will be empty... NUM=`expr $NUM - 3`; @@ -106,19 +102,19 @@ for i in `seq 0 $NUM`; do NEXTFILE=`expr $i + 1`; # Sort each file and eliminate empty lines, so 'join' doesn't complain. - sort $FILE$i | grep . > $FILE$i.tmp; - mv $FILE$i.tmp $FILE$i; - sort $FILE${NEXTFILE} | grep . > $FILE${NEXTFILE}.tmp; - mv $FILE${NEXTFILE}.tmp $FILE${NEXTFILE}; + sort "$FILE$i" | grep . > "$FILE$i.tmp" + mv "$FILE$i.tmp" "$FILE$i" + sort "$FILE${NEXTFILE}" | grep . > "$FILE${NEXTFILE}.tmp" + mv "$FILE${NEXTFILE}.tmp" "$FILE${NEXTFILE}" # Join the files together. This gets slow O(n^2) as we add more files, but # this really shouldn't be performance critical. - join $FILE$i $FILE${NEXTFILE} | grep . > $FILE; + join "$FILE$i" "$FILE${NEXTFILE}" | grep . > "$FILE" # Find the max length of the [numeric only] values in the file so we know how # wide to make the columns - MAXLEN=`awk '{print $2}' $FILE${NEXTFILE} | grep -v '[^0-9]' | awk '{print length($1)}' | sort -rn | head -n1` - mv $FILE $FILE${NEXTFILE}; + MAXLEN=`awk '{print $2}' "$FILE${NEXTFILE}" | grep -v '[^0-9]' | awk '{print length($1)}' | sort -rn | head -n1` + mv "$FILE" "$FILE${NEXTFILE}" SPEC="$SPEC %${MAXLEN}d"; if [ "$REL" = "1" ]; then AWKS="$AWKS, \$`expr $i + 3` - \$`expr $i + 2`"; @@ -129,10 +125,12 @@ done # Print output AWKCMD="printf(\"$SPEC\n\", \$1, \$2$AWKS);"; -awk "{$AWKCMD}" $FILE`expr $NUM + 1`; +awk "{$AWKCMD}" "$FILE`expr $NUM + 1`" -# Remove all temporary files. -rm -f $FILE*; +# Remove all temporary files and the tmp dir. +rm_tmpdir + +exit 0 # ############################################################################ # Documentation From 02812c068cd9f7eae1e73e32e65a256f72ef499b Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Thu, 2 Feb 2012 08:46:31 -0700 Subject: [PATCH 71/71] Remove set -u from pt-mysql-summary and rm tmp dir last. --- bin/pt-mysql-summary | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bin/pt-mysql-summary b/bin/pt-mysql-summary index 441fb3af..4b401bbc 100755 --- a/bin/pt-mysql-summary +++ b/bin/pt-mysql-summary @@ -22,8 +22,6 @@ usage() { # See https://launchpad.net/percona-toolkit for more information. # ########################################################################### -set -u - TMPDIR="" mk_tmpdir() { @@ -1318,10 +1316,11 @@ main() { fi temp_files "rm" - rm_tmpdir # Make sure that we signal the end of the tool's output. section The_End + + rm_tmpdir } # Execute the program if it was not included from another file. This makes it