Wait for collectors before exiting. Use wait_until instead of sleep in pt-stalk.t, and add lots of diag info when tests fail.

2026-04-11 01:01:36 +08:00 · 2012-10-23 14:42:41 -06:00
parent 4870307a42
commit 8ef6ee451f
2 changed files with 52 additions and 9 deletions
--- a/bin/pt-stalk
+++ b/bin/pt-stalk
@@ -1210,6 +1210,37 @@ stalk() {
         purge_samples "$OPT_DEST" "$OPT_RETENTION_TIME"
      fi
   done
+
+   # Before exiting, the last collector may still be running.
+   # Wait for it to finish in case the tool is part of a script,
+   # or part of a test, so the caller has access to the collected
+   # data when the tool exists.  collect() waits an additional
+   # --run-time seconds for itself to complete, which means we
+   # have to wait for 2 * run-time like it plus some overhead else
+   # we may get in sync with the collector and kill it a microsecond
+   # before it kills itself, thus 3 * run-time.
+   # https://bugs.launchpad.net/percona-toolkit/+bug/1070434
+   if [ "$(jobs)" ]; then
+      local sleep_time=$((OPT_RUN_TIME * 3))
+      log "Waiting up to $sleep_time seconds for collectors to finish..."
+      local slept=0
+      while [ -n "$(jobs)" -a $slept -lt $sleep_time ]; do
+         sleep 1
+         slept=$((slept + 1))
+      done
+
+      if [ "$(jobs)" ]; then
+         for pid in $(jobs -p); do
+            # This isn't an warning (we don't want exit status 1) because
+            # the system may be running slowly so it's just "natural" that
+            # a collector may get stuck or run really slowly.
+            log "Killing collector $pid"
+            kill $pid >/dev/null 2>&1
+         done
+      else
+         log "All collectors have finished"
+      fi
+   fi
 }

 # ###########################################################################