diff --git a/bin/pt-stalk b/bin/pt-stalk index 86c59367..81fb387b 100755 --- a/bin/pt-stalk +++ b/bin/pt-stalk @@ -1032,25 +1032,34 @@ stalk() { # Run the trigger which returns the value of whatever is being # checked. When the value is > --threshold for at least --cycle # consecutive times, start collecting. - local value=$($TRIGGER_FUNCTION $OPT_VARIABLE) - local trg_exit_status=$? + if [ "$OPT_STALK" ]; then + local value=$($TRIGGER_FUNCTION $OPT_VARIABLE) + local trg_exit_status=$? - if [ -z "$value" ]; then - # No value. Maybe we failed to connect to MySQL? - warn "Detected value is empty; something failed? Trigger exit status: $trg_exit_status" - matched="" - cycles_true=0 - elif [ $value -gt $OPT_THRESHOLD ]; then - matched="yes" - cycles_true=$(($cycles_true + 1)) - else - matched="" - cycles_true=0 + if [ -z "$value" ]; then + # No value. Maybe we failed to connect to MySQL? + warn "Detected value is empty; something failed? Trigger exit status: $trg_exit_status" + matched="" + cycles_true=0 + elif [ $value -gt $OPT_THRESHOLD ]; then + matched="yes" + cycles_true=$(($cycles_true + 1)) + else + matched="" + cycles_true=0 + fi + + local msg="Check results: $OPT_VARIABLE=$value, matched=${matched:-no}, cycles_true=$cycles_true" + log "$msg" + elif [ "$OPT_COLLECT" ]; then + # Make the next if condition true. + matched=1 + cycles_true=$OPT_CYCLES + + local msg="Not stalking; collect triggered immediately" + log "$msg" fi - local msg="Check results: $OPT_VARIABLE=$value, matched=${matched:-no}, cycles_true=$cycles_true" - log "$msg" - if [ "$matched" -a $cycles_true -ge $OPT_CYCLES ]; then # ################################################################## # Start collecting, maybe. @@ -1085,7 +1094,6 @@ stalk() { log "pt-stalk ran with $RAN_WITH" >> "$OPT_DEST/$prefix-trigger" last_prefix="$prefix" - # Fork and background the collect subroutine which will # run for --run-time seconds. We (the parent) sleep # while its collecting (hopefully --sleep is longer than @@ -1166,6 +1174,14 @@ if [ "${0##*/}" = "$TOOL" ] \ option_error "Invalid --function value: $OPT_FUNCTION" fi + if [ -z "$OPT_STALK" -a "$OPT_COLLECT" ]; then + # Not stalking; do immediate collect once. + OPT_ITERATIONS=1 + OPT_CYCLES=0 + OPT_SLEEP=0 + OPT_INTERVAL=0 + fi + usage_or_errors "$0" po_status=$? rm_tmpdir @@ -1206,7 +1222,7 @@ if [ "${0##*/}" = "$TOOL" ] \ fi fi - if [ "$OPT_DAEMONIZE" ]; then + if [ "$OPT_STALK" -a "$OPT_DAEMONIZE" ]; then # Check access to the --log file. touch "$OPT_LOG" || die "Cannot write to --log $OPT_LOG" @@ -1226,7 +1242,7 @@ if [ "${0##*/}" = "$TOOL" ] \ # use $! to get the PID of the child we just forked. echo "$!" > "$OPT_PID" else - make_pid_file "$OPT_PID" $$ + [ "$OPT_STALK" ] && make_pid_file "$OPT_PID" $$ main "$@" fi fi @@ -1360,6 +1376,8 @@ default: yes; negatable: yes Collect system information. You can negate this option to make the tool watch the system but not actually gather any diagnostic data. +See also L<"--stalk">. + =item --collect-gdb Collect GDB stacktraces. This is achieved by attaching to MySQL and printing @@ -1583,6 +1601,22 @@ continuously, which might be a problem if the collection process is intrusive. It also prevents filling up the disk or gathering too much data to analyze reasonably. +=item --stalk + +default: yes; negatable: yes + +Watch the server and wait for the trigger to occur. You can negate this option +to make the tool immediately gather any diagnostic data once and exit. This is +useful if a problem is already happening, but pt-stalk is not running, so +you only want to collect diagnostic data. + +If this option is negate, L<"--daemonize">, L<"--log">, L<"--pid">, and other +stalking-related options have no effect; the tool simply collects diagnostic +data and exits. Safeguard options, like L<"--disk-bytes-free"> and +L<"--disk-pct-free">, are still respected. + +See also L<"--collect">. + =item --threshold type: int; default: 25 diff --git a/lib/PerconaTest.pm b/lib/PerconaTest.pm index 8c076840..0731896b 100644 --- a/lib/PerconaTest.pm +++ b/lib/PerconaTest.pm @@ -294,6 +294,16 @@ sub wait_for_files { ); } +sub wait_for_sh { + my ($cmd) = @_; + return wait_until( + sub { + my $retval = system("$cmd 2>/dev/null"); + return $retval >> 8 == 0 ? 1 : 0; + } + ); +}; + sub _read { my ( $fh ) = @_; return <$fh>; diff --git a/t/pt-stalk/pt-stalk.t b/t/pt-stalk/pt-stalk.t index 83d180d7..7e44dea8 100644 --- a/t/pt-stalk/pt-stalk.t +++ b/t/pt-stalk/pt-stalk.t @@ -24,7 +24,7 @@ if ( !$dbh ) { plan skip_all => 'Cannot connect to sandbox master'; } else { - plan tests => 21; + plan tests => 25; } my $cnf = "/tmp/12345/my.sandbox.cnf"; @@ -155,7 +155,7 @@ is( "Collect ran for --run-time" ); -$output = `ps x | grep -v grep | grep 'pt-stalk pt-stalk --iterations 1 --dest $dest'`; +$output = `ps x | grep -v grep | grep 'pt-stalk --iterations 1'`; is( $output, "", @@ -202,7 +202,7 @@ ok( "No files collected" ); -$output = `ps x | grep -v grep | grep 'pt-stalk pt-stalk --iterations 1 --dest $dest'`; +$output = `ps x | grep -v grep | grep 'pt-stalk --no-collect'`; is( $output, "", @@ -237,6 +237,45 @@ like( diag(`rm $ENV{HOME}/.pt-stalk.conf`); diag(`cp $ENV{HOME}/.pt-stalk.conf.original $ENV{HOME}/.pt-stalk.conf 2>/dev/null`); +# ############################################################################# +# Don't stalk, just collect. +# ############################################################################# +diag(`rm $pid_file 2>/dev/null`); +diag(`rm $log_file 2>/dev/null`); +diag(`rm $dest/* 2>/dev/null`); + +$retval = system("$trunk/bin/pt-stalk --no-stalk --run-time 2 --dest $dest --prefix nostalk -- --defaults-file=$cnf >$log_file 2>&1"); + +PerconaTest::wait_for_files("$dest/nostalk-trigger"); +$output = `cat $dest/nostalk-trigger`; +like( + $output, + qr/Not stalking/, + "Not stalking, collect triggered" +); + +PerconaTest::wait_for_files("$dest/nostalk-df"); +PerconaTest::wait_for_sh("test \$(grep -c '^TS' $dest/nostalk-df) -ge 2"); +chomp($output = `grep -c '^TS' $dest/nostalk-df`); +is( + $output, + 2, + "Not stalking, collect ran for --run-time" +); + +is( + `cat $dest/nostalk-hostname`, + `hostname`, + "Not stalking, collect gathered data" +); + +$output = `ps x | grep -v grep | grep 'pt-stalk --no-stalk'`; +is( + $output, + "", + "Not stalking, pt-stalk is not running" +); + # ############################################################################# # Done. # #############################################################################