Wait for collectors before exiting. Use wait_until instead of sleep in pt-stalk.t, and add lots of diag info when tests fail.

This commit is contained in:
Daniel Nichter
2012-10-23 14:42:41 -06:00
parent 4870307a42
commit 8ef6ee451f
2 changed files with 52 additions and 9 deletions

View File

@@ -1210,6 +1210,37 @@ stalk() {
purge_samples "$OPT_DEST" "$OPT_RETENTION_TIME" purge_samples "$OPT_DEST" "$OPT_RETENTION_TIME"
fi fi
done done
# Before exiting, the last collector may still be running.
# Wait for it to finish in case the tool is part of a script,
# or part of a test, so the caller has access to the collected
# data when the tool exists. collect() waits an additional
# --run-time seconds for itself to complete, which means we
# have to wait for 2 * run-time like it plus some overhead else
# we may get in sync with the collector and kill it a microsecond
# before it kills itself, thus 3 * run-time.
# https://bugs.launchpad.net/percona-toolkit/+bug/1070434
if [ "$(jobs)" ]; then
local sleep_time=$((OPT_RUN_TIME * 3))
log "Waiting up to $sleep_time seconds for collectors to finish..."
local slept=0
while [ -n "$(jobs)" -a $slept -lt $sleep_time ]; do
sleep 1
slept=$((slept + 1))
done
if [ "$(jobs)" ]; then
for pid in $(jobs -p); do
# This isn't an warning (we don't want exit status 1) because
# the system may be running slowly so it's just "natural" that
# a collector may get stuck or run really slowly.
log "Killing collector $pid"
kill $pid >/dev/null 2>&1
done
else
log "All collectors have finished"
fi
fi
} }
# ########################################################################### # ###########################################################################

View File

@@ -136,41 +136,53 @@ my $threshold = $uptime + 2;
$retval = system("$trunk/bin/pt-stalk --iterations 1 --dest $dest --variable Uptime --threshold $threshold --cycles 2 --run-time 2 --pid $pid_file -- --defaults-file=$cnf >$log_file 2>&1"); $retval = system("$trunk/bin/pt-stalk --iterations 1 --dest $dest --variable Uptime --threshold $threshold --cycles 2 --run-time 2 --pid $pid_file -- --defaults-file=$cnf >$log_file 2>&1");
sleep 3; PerconaTest::wait_until(sub { !-f $pid_file });
$output = `cat $dest/*-trigger`; $output = `cat $dest/*-trigger 2>/dev/null`;
like( like(
$output, $output,
qr/Check results: Uptime=\d+, matched=yes, cycles_true=2/, qr/Check results: Uptime=\d+, matched=yes, cycles_true=2/,
"Collect triggered" "Collect triggered"
)
or diag(
'output', $output,
'log file', `cat $log_file 2>/dev/null`,
'dest', `ls -l $dest/ 2>/dev/null`,
'df', `cat $dest/*-df 2>/dev/null`,
); );
# There is some nondeterminism here. Sometimes it'll run for 2 samples because # There is some nondeterminism here. Sometimes it'll run for 2 samples because
# the samples may not be precisely 1 second apart. # the samples may not be precisely 1 second apart.
chomp($output = `cat $dest/*-df | grep -c '^TS'`); chomp($output = `cat $dest/*-df 2>/dev/null | grep -c '^TS'`);
ok( ok(
$output >= 1 && $output <= 3, $output >= 1 && $output <= 3,
"Collect ran for --run-time" "Collect ran for --run-time"
) or diag("Run time: $output"); )
or diag(
'output', $output,
'log file', `cat $log_file 2>/dev/null`,
'dest', `ls -l $dest/ 2>/dev/null`,
'df', `cat $dest/*-df 2>/dev/null`,
);
ok( ok(
PerconaTest::not_running("pt-stalk --iterations 1"), PerconaTest::not_running("pt-stalk --iterations 1"),
"pt-stalk is not running" "pt-stalk is not running"
); );
$output = `cat $dest/*-trigger`; $output = `cat $dest/*-trigger 2>/dev/null`;
like( like(
$output, $output,
qr/pt-stalk ran with --function=status --variable=Uptime --threshold=$threshold/, qr/pt-stalk ran with --function=status --variable=Uptime --threshold=$threshold/,
"Trigger file logs how pt-stalk was ran" "Trigger file logs how pt-stalk was ran"
); );
chomp($output = `cat $log_file | grep 'Collector PID'`); chomp($output = `cat $log_file 2>/dev/null | grep 'Collector PID'`);
like( like(
$output, $output,
qr/Collector PID \d+/, qr/Collector PID \d+/,
"Collector PID logged" "Collector PID logged"
); ) or diag('output', $output, 'log file', `cat $log_file 2>/dev/null`);
# ########################################################################### # ###########################################################################
# Triggered but --no-collect. # Triggered but --no-collect.
@@ -184,7 +196,7 @@ $threshold = $uptime + 2;
$retval = system("$trunk/bin/pt-stalk --no-collect --iterations 1 --dest $dest --variable Uptime --threshold $threshold --cycles 1 --run-time 1 --pid $pid_file -- --defaults-file=$cnf >$log_file 2>&1"); $retval = system("$trunk/bin/pt-stalk --no-collect --iterations 1 --dest $dest --variable Uptime --threshold $threshold --cycles 1 --run-time 1 --pid $pid_file -- --defaults-file=$cnf >$log_file 2>&1");
sleep 2; PerconaTest::wait_until(sub { !-f $pid_file });
$output = `cat $log_file`; $output = `cat $log_file`;
like( like(