From f7f63555ee0a57902a6799040ba9f819aeb7305c Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Mon, 4 Mar 2013 17:47:02 -0700 Subject: [PATCH] Add lib/bash/subshell.sh and use in pt-stalk to fix/improve subprocess waiting. --- bin/pt-stalk | 87 ++++++++++++++++++++++++++++--------------- lib/bash/collect.sh | 14 ++----- lib/bash/subshell.sh | 66 ++++++++++++++++++++++++++++++++ t/lib/bash/collect.sh | 1 + 4 files changed, 126 insertions(+), 42 deletions(-) create mode 100644 lib/bash/subshell.sh diff --git a/bin/pt-stalk b/bin/pt-stalk index 634c2cf7..acc35392 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -54,6 +54,58 @@ _d () { # End log_warn_die package # ########################################################################### +# ########################################################################### +# subshell package +# This package is a copy without comments from the original. The original +# with comments and its test file can be found in the Bazaar repository at, +# lib/bash/subshell.sh +# t/lib/bash/subshell.sh +# See https://launchpad.net/percona-toolkit for more information. +# ########################################################################### + + +set -u + +wait_for_subshells() { + local max_wait=$1 + if [ "$(jobs)" ]; then + log "Waiting up to $max_wait seconds for subprocesses to finish..." + local slept=0 + while [ -n "$(jobs)" ]; do + local subprocess_still_running="" + for pid in $(jobs -p); do + if kill -0 $pid >/dev/null 2>&1; then + subprocess_still_running=1 + fi + done + if [ "$subprocess_still_running" ]; then + sleep 1 + slept=$((slept + 1)) + [ $slept -ge $max_wait ] && break + else + break + fi + done + fi +} + +kill_all_subshells() { + if [ "$(jobs)" ]; then + for pid in $(jobs -p); do + if kill -0 $pid >/dev/null 2>&1; then + log "Killing subprocess $pid" + kill $pid >/dev/null 2>&1 + fi + done + else + log "All subprocesses have finished" + fi +} + +# ########################################################################### +# End subshell package +# ########################################################################### + # ########################################################################### # parse_options package # This package is a copy without comments from the original. The original @@ -871,16 +923,8 @@ collect() { hostname > "$d/$p-hostname" - local slept=0 - while [ -n "$(jobs)" -a $slept -lt $OPT_RUN_TIME ]; do - sleep 1 - slept=$((slept + 1)) - done - - for pid in $(jobs -p); do - kill $pid >/dev/null 2>&1 - done - + wait_for_subshells $OPT_RUN_TIME + kill_all_subshells for file in "$d/$p-"*; do if [ -z "$(grep -v '^TS ' --max-count 1 "$file")" ]; then log "Removing empty file $file"; @@ -1244,27 +1288,8 @@ stalk() { # we may get in sync with the collector and kill it a microsecond # before it kills itself, thus 3 * run-time. # https://bugs.launchpad.net/percona-toolkit/+bug/1070434 - if [ "$(jobs)" ]; then - local sleep_time=$((OPT_RUN_TIME * 3)) - log "Waiting up to $sleep_time seconds for collectors to finish..." - local slept=0 - while [ -n "$(jobs)" -a $slept -lt $sleep_time ]; do - sleep 1 - slept=$((slept + 1)) - done - - if [ "$(jobs)" ]; then - for pid in $(jobs -p); do - # This isn't an warning (we don't want exit status 1) because - # the system may be running slowly so it's just "natural" that - # a collector may get stuck or run really slowly. - log "Killing collector $pid" - kill $pid >/dev/null 2>&1 - done - else - log "All collectors have finished" - fi - fi + wait_for_subshells $((OPT_RUN_TIME * 3)) + kill_all_subshells } # ########################################################################### diff --git a/lib/bash/collect.sh b/lib/bash/collect.sh index 5a8edfc9..69973bce 100644 --- a/lib/bash/collect.sh +++ b/lib/bash/collect.sh @@ -22,7 +22,7 @@ # collect collects system information. # XXX -# THIS LIB REQUIRES log_warn_die.sh, safeguards.sh, and alt_cmds.sh! +# THIS LIB REQUIRES log_warn_die, safeguards, alt_cmds, and subshell! # XXX set -u @@ -289,16 +289,8 @@ collect() { # it may leave an empty file. But first wait another --run-time # seconds for any slow process to finish: # https://bugs.launchpad.net/percona-toolkit/+bug/1047701 - local slept=0 - while [ -n "$(jobs)" -a $slept -lt $OPT_RUN_TIME ]; do - sleep 1 - slept=$((slept + 1)) - done - - for pid in $(jobs -p); do - kill $pid >/dev/null 2>&1 - done - + wait_for_subshells $OPT_RUN_TIME + kill_all_subshells for file in "$d/$p-"*; do # If there's not at least 1 line that's not a TS, # then the file is empty. diff --git a/lib/bash/subshell.sh b/lib/bash/subshell.sh new file mode 100644 index 00000000..741af7f9 --- /dev/null +++ b/lib/bash/subshell.sh @@ -0,0 +1,66 @@ +# This program is copyright 2013 Percona Ireland Ltd. +# Feedback and improvements are welcome. +# +# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED +# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF +# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar +# systems, you can issue `man perlgpl' or `man perlartistic' to read these +# licenses. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., 59 Temple +# Place, Suite 330, Boston, MA 02111-1307 USA. +# ########################################################################### +# subshell package +# ########################################################################### + +# Package: subshell + +set -u + +wait_for_subshells() { + local max_wait=$1 + if [ "$(jobs)" ]; then + log "Waiting up to $max_wait seconds for subprocesses to finish..." + local slept=0 + while [ -n "$(jobs)" ]; do + local subprocess_still_running="" + for pid in $(jobs -p); do + if kill -0 $pid >/dev/null 2>&1; then + subprocess_still_running=1 + fi + done + if [ "$subprocess_still_running" ]; then + sleep 1 + slept=$((slept + 1)) + [ $slept -ge $max_wait ] && break + else + break + fi + done + fi +} + +kill_all_subshells() { + if [ "$(jobs)" ]; then + for pid in $(jobs -p); do + if kill -0 $pid >/dev/null 2>&1; then + # This isn't an warning (we don't want exit status 1) because + # the system may be running slowly so it's just "natural" that + # a collector may get stuck or run really slowly. + log "Killing subprocess $pid" + kill $pid >/dev/null 2>&1 + fi + done + else + log "All subprocesses have finished" + fi +} + +# ########################################################################### +# End subshell package +# ########################################################################### diff --git a/t/lib/bash/collect.sh b/t/lib/bash/collect.sh index 314f296e..6afd98f2 100644 --- a/t/lib/bash/collect.sh +++ b/t/lib/bash/collect.sh @@ -10,6 +10,7 @@ TOOL="pt-stalk" mkdir "$PT_TMPDIR/collect" 2>/dev/null source "$LIB_DIR/log_warn_die.sh" +source "$LIB_DIR/subshell.sh" source "$LIB_DIR/parse_options.sh" source "$LIB_DIR/safeguards.sh" source "$LIB_DIR/alt_cmds.sh"