Merge lp:~percona-toolkit-dev/percona-toolkit/collect-without-stalking.

This commit is contained in:
Daniel Nichter
2012-03-02 08:16:55 -08:00
3 changed files with 105 additions and 22 deletions

View File

@@ -1032,25 +1032,34 @@ stalk() {
# Run the trigger which returns the value of whatever is being
# checked. When the value is > --threshold for at least --cycle
# consecutive times, start collecting.
local value=$($TRIGGER_FUNCTION $OPT_VARIABLE)
local trg_exit_status=$?
if [ "$OPT_STALK" ]; then
local value=$($TRIGGER_FUNCTION $OPT_VARIABLE)
local trg_exit_status=$?
if [ -z "$value" ]; then
# No value. Maybe we failed to connect to MySQL?
warn "Detected value is empty; something failed? Trigger exit status: $trg_exit_status"
matched=""
cycles_true=0
elif [ $value -gt $OPT_THRESHOLD ]; then
matched="yes"
cycles_true=$(($cycles_true + 1))
else
matched=""
cycles_true=0
if [ -z "$value" ]; then
# No value. Maybe we failed to connect to MySQL?
warn "Detected value is empty; something failed? Trigger exit status: $trg_exit_status"
matched=""
cycles_true=0
elif [ $value -gt $OPT_THRESHOLD ]; then
matched="yes"
cycles_true=$(($cycles_true + 1))
else
matched=""
cycles_true=0
fi
local msg="Check results: $OPT_VARIABLE=$value, matched=${matched:-no}, cycles_true=$cycles_true"
log "$msg"
elif [ "$OPT_COLLECT" ]; then
# Make the next if condition true.
matched=1
cycles_true=$OPT_CYCLES
local msg="Not stalking; collect triggered immediately"
log "$msg"
fi
local msg="Check results: $OPT_VARIABLE=$value, matched=${matched:-no}, cycles_true=$cycles_true"
log "$msg"
if [ "$matched" -a $cycles_true -ge $OPT_CYCLES ]; then
# ##################################################################
# Start collecting, maybe.
@@ -1085,7 +1094,6 @@ stalk() {
log "pt-stalk ran with $RAN_WITH" >> "$OPT_DEST/$prefix-trigger"
last_prefix="$prefix"
# Fork and background the collect subroutine which will
# run for --run-time seconds. We (the parent) sleep
# while its collecting (hopefully --sleep is longer than
@@ -1166,6 +1174,14 @@ if [ "${0##*/}" = "$TOOL" ] \
option_error "Invalid --function value: $OPT_FUNCTION"
fi
if [ -z "$OPT_STALK" -a "$OPT_COLLECT" ]; then
# Not stalking; do immediate collect once.
OPT_ITERATIONS=1
OPT_CYCLES=0
OPT_SLEEP=0
OPT_INTERVAL=0
fi
usage_or_errors "$0"
po_status=$?
rm_tmpdir
@@ -1206,7 +1222,7 @@ if [ "${0##*/}" = "$TOOL" ] \
fi
fi
if [ "$OPT_DAEMONIZE" ]; then
if [ "$OPT_STALK" -a "$OPT_DAEMONIZE" ]; then
# Check access to the --log file.
touch "$OPT_LOG" || die "Cannot write to --log $OPT_LOG"
@@ -1226,7 +1242,7 @@ if [ "${0##*/}" = "$TOOL" ] \
# use $! to get the PID of the child we just forked.
echo "$!" > "$OPT_PID"
else
make_pid_file "$OPT_PID" $$
[ "$OPT_STALK" ] && make_pid_file "$OPT_PID" $$
main "$@"
fi
fi
@@ -1360,6 +1376,8 @@ default: yes; negatable: yes
Collect system information. You can negate this option to make the tool watch
the system but not actually gather any diagnostic data.
See also L<"--stalk">.
=item --collect-gdb
Collect GDB stacktraces. This is achieved by attaching to MySQL and printing
@@ -1583,6 +1601,22 @@ continuously, which might be a problem if the collection process is intrusive.
It also prevents filling up the disk or gathering too much data to analyze
reasonably.
=item --stalk
default: yes; negatable: yes
Watch the server and wait for the trigger to occur. You can negate this option
to make the tool immediately gather any diagnostic data once and exit. This is
useful if a problem is already happening, but pt-stalk is not running, so
you only want to collect diagnostic data.
If this option is negate, L<"--daemonize">, L<"--log">, L<"--pid">, and other
stalking-related options have no effect; the tool simply collects diagnostic
data and exits. Safeguard options, like L<"--disk-bytes-free"> and
L<"--disk-pct-free">, are still respected.
See also L<"--collect">.
=item --threshold
type: int; default: 25

View File

@@ -294,6 +294,16 @@ sub wait_for_files {
);
}
sub wait_for_sh {
my ($cmd) = @_;
return wait_until(
sub {
my $retval = system("$cmd 2>/dev/null");
return $retval >> 8 == 0 ? 1 : 0;
}
);
};
sub _read {
my ( $fh ) = @_;
return <$fh>;

View File

@@ -24,7 +24,7 @@ if ( !$dbh ) {
plan skip_all => 'Cannot connect to sandbox master';
}
else {
plan tests => 21;
plan tests => 25;
}
my $cnf = "/tmp/12345/my.sandbox.cnf";
@@ -155,7 +155,7 @@ is(
"Collect ran for --run-time"
);
$output = `ps x | grep -v grep | grep 'pt-stalk pt-stalk --iterations 1 --dest $dest'`;
$output = `ps x | grep -v grep | grep 'pt-stalk --iterations 1'`;
is(
$output,
"",
@@ -202,7 +202,7 @@ ok(
"No files collected"
);
$output = `ps x | grep -v grep | grep 'pt-stalk pt-stalk --iterations 1 --dest $dest'`;
$output = `ps x | grep -v grep | grep 'pt-stalk --no-collect'`;
is(
$output,
"",
@@ -237,6 +237,45 @@ like(
diag(`rm $ENV{HOME}/.pt-stalk.conf`);
diag(`cp $ENV{HOME}/.pt-stalk.conf.original $ENV{HOME}/.pt-stalk.conf 2>/dev/null`);
# #############################################################################
# Don't stalk, just collect.
# #############################################################################
diag(`rm $pid_file 2>/dev/null`);
diag(`rm $log_file 2>/dev/null`);
diag(`rm $dest/* 2>/dev/null`);
$retval = system("$trunk/bin/pt-stalk --no-stalk --run-time 2 --dest $dest --prefix nostalk -- --defaults-file=$cnf >$log_file 2>&1");
PerconaTest::wait_for_files("$dest/nostalk-trigger");
$output = `cat $dest/nostalk-trigger`;
like(
$output,
qr/Not stalking/,
"Not stalking, collect triggered"
);
PerconaTest::wait_for_files("$dest/nostalk-df");
PerconaTest::wait_for_sh("test \$(grep -c '^TS' $dest/nostalk-df) -ge 2");
chomp($output = `grep -c '^TS' $dest/nostalk-df`);
is(
$output,
2,
"Not stalking, collect ran for --run-time"
);
is(
`cat $dest/nostalk-hostname`,
`hostname`,
"Not stalking, collect gathered data"
);
$output = `ps x | grep -v grep | grep 'pt-stalk --no-stalk'`;
is(
$output,
"",
"Not stalking, pt-stalk is not running"
);
# #############################################################################
# Done.
# #############################################################################