Add lib/bash/subshell.sh and use in pt-stalk to fix/improve subprocess waiting.

This commit is contained in:
Daniel Nichter
2013-03-04 17:47:02 -07:00
parent 0da15fb083
commit f7f63555ee
4 changed files with 126 additions and 42 deletions

View File

@@ -54,6 +54,58 @@ _d () {
# End log_warn_die package # End log_warn_die package
# ########################################################################### # ###########################################################################
# ###########################################################################
# subshell package
# This package is a copy without comments from the original. The original
# with comments and its test file can be found in the Bazaar repository at,
# lib/bash/subshell.sh
# t/lib/bash/subshell.sh
# See https://launchpad.net/percona-toolkit for more information.
# ###########################################################################
set -u
wait_for_subshells() {
local max_wait=$1
if [ "$(jobs)" ]; then
log "Waiting up to $max_wait seconds for subprocesses to finish..."
local slept=0
while [ -n "$(jobs)" ]; do
local subprocess_still_running=""
for pid in $(jobs -p); do
if kill -0 $pid >/dev/null 2>&1; then
subprocess_still_running=1
fi
done
if [ "$subprocess_still_running" ]; then
sleep 1
slept=$((slept + 1))
[ $slept -ge $max_wait ] && break
else
break
fi
done
fi
}
kill_all_subshells() {
if [ "$(jobs)" ]; then
for pid in $(jobs -p); do
if kill -0 $pid >/dev/null 2>&1; then
log "Killing subprocess $pid"
kill $pid >/dev/null 2>&1
fi
done
else
log "All subprocesses have finished"
fi
}
# ###########################################################################
# End subshell package
# ###########################################################################
# ########################################################################### # ###########################################################################
# parse_options package # parse_options package
# This package is a copy without comments from the original. The original # This package is a copy without comments from the original. The original
@@ -871,16 +923,8 @@ collect() {
hostname > "$d/$p-hostname" hostname > "$d/$p-hostname"
local slept=0 wait_for_subshells $OPT_RUN_TIME
while [ -n "$(jobs)" -a $slept -lt $OPT_RUN_TIME ]; do kill_all_subshells
sleep 1
slept=$((slept + 1))
done
for pid in $(jobs -p); do
kill $pid >/dev/null 2>&1
done
for file in "$d/$p-"*; do for file in "$d/$p-"*; do
if [ -z "$(grep -v '^TS ' --max-count 1 "$file")" ]; then if [ -z "$(grep -v '^TS ' --max-count 1 "$file")" ]; then
log "Removing empty file $file"; log "Removing empty file $file";
@@ -1244,27 +1288,8 @@ stalk() {
# we may get in sync with the collector and kill it a microsecond # we may get in sync with the collector and kill it a microsecond
# before it kills itself, thus 3 * run-time. # before it kills itself, thus 3 * run-time.
# https://bugs.launchpad.net/percona-toolkit/+bug/1070434 # https://bugs.launchpad.net/percona-toolkit/+bug/1070434
if [ "$(jobs)" ]; then wait_for_subshells $((OPT_RUN_TIME * 3))
local sleep_time=$((OPT_RUN_TIME * 3)) kill_all_subshells
log "Waiting up to $sleep_time seconds for collectors to finish..."
local slept=0
while [ -n "$(jobs)" -a $slept -lt $sleep_time ]; do
sleep 1
slept=$((slept + 1))
done
if [ "$(jobs)" ]; then
for pid in $(jobs -p); do
# This isn't an warning (we don't want exit status 1) because
# the system may be running slowly so it's just "natural" that
# a collector may get stuck or run really slowly.
log "Killing collector $pid"
kill $pid >/dev/null 2>&1
done
else
log "All collectors have finished"
fi
fi
} }
# ########################################################################### # ###########################################################################

View File

@@ -22,7 +22,7 @@
# collect collects system information. # collect collects system information.
# XXX # XXX
# THIS LIB REQUIRES log_warn_die.sh, safeguards.sh, and alt_cmds.sh! # THIS LIB REQUIRES log_warn_die, safeguards, alt_cmds, and subshell!
# XXX # XXX
set -u set -u
@@ -289,16 +289,8 @@ collect() {
# it may leave an empty file. But first wait another --run-time # it may leave an empty file. But first wait another --run-time
# seconds for any slow process to finish: # seconds for any slow process to finish:
# https://bugs.launchpad.net/percona-toolkit/+bug/1047701 # https://bugs.launchpad.net/percona-toolkit/+bug/1047701
local slept=0 wait_for_subshells $OPT_RUN_TIME
while [ -n "$(jobs)" -a $slept -lt $OPT_RUN_TIME ]; do kill_all_subshells
sleep 1
slept=$((slept + 1))
done
for pid in $(jobs -p); do
kill $pid >/dev/null 2>&1
done
for file in "$d/$p-"*; do for file in "$d/$p-"*; do
# If there's not at least 1 line that's not a TS, # If there's not at least 1 line that's not a TS,
# then the file is empty. # then the file is empty.

66
lib/bash/subshell.sh Normal file
View File

@@ -0,0 +1,66 @@
# This program is copyright 2013 Percona Ireland Ltd.
# Feedback and improvements are welcome.
#
# THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#
# This program is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free Software
# Foundation, version 2; OR the Perl Artistic License. On UNIX and similar
# systems, you can issue `man perlgpl' or `man perlartistic' to read these
# licenses.
#
# You should have received a copy of the GNU General Public License along with
# this program; if not, write to the Free Software Foundation, Inc., 59 Temple
# Place, Suite 330, Boston, MA 02111-1307 USA.
# ###########################################################################
# subshell package
# ###########################################################################
# Package: subshell
set -u
wait_for_subshells() {
local max_wait=$1
if [ "$(jobs)" ]; then
log "Waiting up to $max_wait seconds for subprocesses to finish..."
local slept=0
while [ -n "$(jobs)" ]; do
local subprocess_still_running=""
for pid in $(jobs -p); do
if kill -0 $pid >/dev/null 2>&1; then
subprocess_still_running=1
fi
done
if [ "$subprocess_still_running" ]; then
sleep 1
slept=$((slept + 1))
[ $slept -ge $max_wait ] && break
else
break
fi
done
fi
}
kill_all_subshells() {
if [ "$(jobs)" ]; then
for pid in $(jobs -p); do
if kill -0 $pid >/dev/null 2>&1; then
# This isn't an warning (we don't want exit status 1) because
# the system may be running slowly so it's just "natural" that
# a collector may get stuck or run really slowly.
log "Killing subprocess $pid"
kill $pid >/dev/null 2>&1
fi
done
else
log "All subprocesses have finished"
fi
}
# ###########################################################################
# End subshell package
# ###########################################################################

View File

@@ -10,6 +10,7 @@ TOOL="pt-stalk"
mkdir "$PT_TMPDIR/collect" 2>/dev/null mkdir "$PT_TMPDIR/collect" 2>/dev/null
source "$LIB_DIR/log_warn_die.sh" source "$LIB_DIR/log_warn_die.sh"
source "$LIB_DIR/subshell.sh"
source "$LIB_DIR/parse_options.sh" source "$LIB_DIR/parse_options.sh"
source "$LIB_DIR/safeguards.sh" source "$LIB_DIR/safeguards.sh"
source "$LIB_DIR/alt_cmds.sh" source "$LIB_DIR/alt_cmds.sh"