mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-26 23:45:44 +00:00
Wait for collectors before exiting. Use wait_until instead of sleep in pt-stalk.t, and add lots of diag info when tests fail.
This commit is contained in:
31
bin/pt-stalk
31
bin/pt-stalk
@@ -1210,6 +1210,37 @@ stalk() {
|
||||
purge_samples "$OPT_DEST" "$OPT_RETENTION_TIME"
|
||||
fi
|
||||
done
|
||||
|
||||
# Before exiting, the last collector may still be running.
|
||||
# Wait for it to finish in case the tool is part of a script,
|
||||
# or part of a test, so the caller has access to the collected
|
||||
# data when the tool exists. collect() waits an additional
|
||||
# --run-time seconds for itself to complete, which means we
|
||||
# have to wait for 2 * run-time like it plus some overhead else
|
||||
# we may get in sync with the collector and kill it a microsecond
|
||||
# before it kills itself, thus 3 * run-time.
|
||||
# https://bugs.launchpad.net/percona-toolkit/+bug/1070434
|
||||
if [ "$(jobs)" ]; then
|
||||
local sleep_time=$((OPT_RUN_TIME * 3))
|
||||
log "Waiting up to $sleep_time seconds for collectors to finish..."
|
||||
local slept=0
|
||||
while [ -n "$(jobs)" -a $slept -lt $sleep_time ]; do
|
||||
sleep 1
|
||||
slept=$((slept + 1))
|
||||
done
|
||||
|
||||
if [ "$(jobs)" ]; then
|
||||
for pid in $(jobs -p); do
|
||||
# This isn't an warning (we don't want exit status 1) because
|
||||
# the system may be running slowly so it's just "natural" that
|
||||
# a collector may get stuck or run really slowly.
|
||||
log "Killing collector $pid"
|
||||
kill $pid >/dev/null 2>&1
|
||||
done
|
||||
else
|
||||
log "All collectors have finished"
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
# ###########################################################################
|
||||
|
@@ -134,43 +134,55 @@ diag(`rm $dest/* 2>/dev/null`);
|
||||
my (undef, $uptime) = $dbh->selectrow_array("SHOW STATUS LIKE 'Uptime'");
|
||||
my $threshold = $uptime + 2;
|
||||
|
||||
$retval = system("$trunk/bin/pt-stalk --iterations 1 --dest $dest --variable Uptime --threshold $threshold --cycles 2 --run-time 2 --pid $pid_file -- --defaults-file=$cnf >$log_file 2>&1");
|
||||
$retval = system("$trunk/bin/pt-stalk --iterations 1 --dest $dest --variable Uptime --threshold $threshold --cycles 2 --run-time 2 --pid $pid_file -- --defaults-file=$cnf >$log_file 2>&1");
|
||||
|
||||
sleep 3;
|
||||
PerconaTest::wait_until(sub { !-f $pid_file });
|
||||
|
||||
$output = `cat $dest/*-trigger`;
|
||||
$output = `cat $dest/*-trigger 2>/dev/null`;
|
||||
like(
|
||||
$output,
|
||||
qr/Check results: Uptime=\d+, matched=yes, cycles_true=2/,
|
||||
"Collect triggered"
|
||||
)
|
||||
or diag(
|
||||
'output', $output,
|
||||
'log file', `cat $log_file 2>/dev/null`,
|
||||
'dest', `ls -l $dest/ 2>/dev/null`,
|
||||
'df', `cat $dest/*-df 2>/dev/null`,
|
||||
);
|
||||
|
||||
# There is some nondeterminism here. Sometimes it'll run for 2 samples because
|
||||
# the samples may not be precisely 1 second apart.
|
||||
chomp($output = `cat $dest/*-df | grep -c '^TS'`);
|
||||
chomp($output = `cat $dest/*-df 2>/dev/null | grep -c '^TS'`);
|
||||
ok(
|
||||
$output >= 1 && $output <= 3,
|
||||
"Collect ran for --run-time"
|
||||
) or diag("Run time: $output");
|
||||
)
|
||||
or diag(
|
||||
'output', $output,
|
||||
'log file', `cat $log_file 2>/dev/null`,
|
||||
'dest', `ls -l $dest/ 2>/dev/null`,
|
||||
'df', `cat $dest/*-df 2>/dev/null`,
|
||||
);
|
||||
|
||||
ok(
|
||||
PerconaTest::not_running("pt-stalk --iterations 1"),
|
||||
"pt-stalk is not running"
|
||||
);
|
||||
|
||||
$output = `cat $dest/*-trigger`;
|
||||
$output = `cat $dest/*-trigger 2>/dev/null`;
|
||||
like(
|
||||
$output,
|
||||
qr/pt-stalk ran with --function=status --variable=Uptime --threshold=$threshold/,
|
||||
"Trigger file logs how pt-stalk was ran"
|
||||
);
|
||||
|
||||
chomp($output = `cat $log_file | grep 'Collector PID'`);
|
||||
chomp($output = `cat $log_file 2>/dev/null | grep 'Collector PID'`);
|
||||
like(
|
||||
$output,
|
||||
qr/Collector PID \d+/,
|
||||
"Collector PID logged"
|
||||
);
|
||||
) or diag('output', $output, 'log file', `cat $log_file 2>/dev/null`);
|
||||
|
||||
# ###########################################################################
|
||||
# Triggered but --no-collect.
|
||||
@@ -184,7 +196,7 @@ $threshold = $uptime + 2;
|
||||
|
||||
$retval = system("$trunk/bin/pt-stalk --no-collect --iterations 1 --dest $dest --variable Uptime --threshold $threshold --cycles 1 --run-time 1 --pid $pid_file -- --defaults-file=$cnf >$log_file 2>&1");
|
||||
|
||||
sleep 2;
|
||||
PerconaTest::wait_until(sub { !-f $pid_file });
|
||||
|
||||
$output = `cat $log_file`;
|
||||
like(
|
||||
|
Reference in New Issue
Block a user