diff --git a/bin/pt-diskstats b/bin/pt-diskstats
index 367e6fe0..5c0d298c 100755
--- a/bin/pt-diskstats
+++ b/bin/pt-diskstats
@@ -1459,14 +1459,12 @@ my %modes = (
    'ultra-raw' => 5,
 );
 
-
 {
-
    my $fd_stdin = fileno(STDIN);
    my $flags;
    unless ( $PerconaTest::DONT_RESTORE_STDIN ) {
       $flags = fcntl(STDIN, F_GETFL, 0)
-                     or warn "can't fcntl F_GETFL: $!";
+         or warn "Error getting STDIN flags with fcntl: $OS_ERROR";
    }
    my $term     = POSIX::Termios->new();
    $term->getattr($fd_stdin);
@@ -1498,14 +1496,13 @@ my %modes = (
       $term->setlflag($oterm);
       $term->setcc( VTIME, 0 );
       $term->setattr( $fd_stdin, TCSANOW );
-      unless ( $PerconaTest::DONT_RESTORE_STDIN ) {
-         fcntl(STDIN, F_SETFL, $flags)
-                        or warn "can't fcntl F_SETFL: $!";
+      if ( !$PerconaTest::DONT_RESTORE_STDIN ) {
+         fcntl(STDIN, F_SETFL, int($flags))
+            or warn "Error restoring STDIN flags with fcntl: $OS_ERROR";
       }
    }
 
    END { cooked() }
-
 }
 
 sub readkey {
@@ -1514,14 +1511,12 @@ sub readkey {
    sysread(STDIN, $key, 1);
    my $timeout = 0.1;
    if ( $key eq "\033" ) {
-      {
-         my $x = '';
-         STDIN->blocking(0);
-         sysread(STDIN, $x, 2);
-         STDIN->blocking(1);
-         $key .= $x;
-         redo if $key =~ /\[[0-2](?:[0-9];)?$/
-      }
+      my $x = '';
+      STDIN->blocking(0);
+      sysread(STDIN, $x, 2);
+      STDIN->blocking(1);
+      $key .= $x;
+      redo if $key =~ /\[[0-2](?:[0-9];)?$/
    }
    cooked();
    return $key;
diff --git a/bin/pt-duplicate-key-checker b/bin/pt-duplicate-key-checker
index affb86a5..58b75e2c 100755
--- a/bin/pt-duplicate-key-checker
+++ b/bin/pt-duplicate-key-checker
@@ -2226,8 +2226,9 @@ sub get_duplicate_keys {
 
    my $clustered_key = $args{clustered_key} ? $keys{$args{clustered_key}}
                      : undef;
-   PTDEBUG && _d('clustered key:', $clustered_key->{name},
-      $clustered_key->{colnames});
+   PTDEBUG && _d('clustered key:',
+      $clustered_key ? ($clustered_key->{name}, $clustered_key->{colnames})
+                     : 'none');
    if ( $clustered_key
         && $args{clustered}
         && $args{tbl_info}->{engine}
@@ -4381,74 +4382,74 @@ sub main {
    );
    TABLE:
    while ( my $tbl = $schema_itr->next() ) {
-      $tbl->{engine} = $tbl->{tbl_struct}->{engine};
+      eval {
+         $tbl->{engine} = $tbl->{tbl_struct}->{engine};
 
-      my ($keys, $clustered_key, $fks);
-      if ( $get_keys ) {
-         ($keys, $clustered_key)
-            = $tp->get_keys($tbl->{ddl}, {});
-      }
-      if ( $get_fks ) {
-         $fks = $tp->get_fks($tbl->{ddl},  {database => $tbl->{db}});
-      }
-
-      next TABLE unless ($keys && %$keys) || ($fks && %$fks);
-
-      if ( $o->got('verbose') ) {
-         print_all_keys($keys, $tbl, \%seen_tbl) if $keys;
-         print_all_keys($fks,  $tbl, \%seen_tbl) if $fks;
-      }
-      else {
-         PTDEBUG && _d('Getting duplicate keys on', $tbl->{db}, $tbl->{tbl});
-         eval {
-            if ( $keys ) {
-               $dk->get_duplicate_keys(
-                  $keys,
-                  clustered_key => $clustered_key,
-                  tbl_info      => $tbl,
-                  callback      => \&print_duplicate_key,
-                  %tp_opts,
-                  # get_duplicate_keys() ignores these args but passes them
-                  # to the callback:
-                     dbh      => $dbh,
-                     is_fk    => 0,
-                     o        => $o,
-                     ks       => $ks,
-                     tp       => $tp,
-                     q        => $q,
-                     seen_tbl => \%seen_tbl,
-                     summary  => \%summary,
-               );
-            }
-            if ( $fks ) {
-               $dk->get_duplicate_fks(
-                  $fks,
-                  tbl_info => $tbl,
-                  callback => \&print_duplicate_key,
-                  %tp_opts,
-                  # get_duplicate_fks() ignores these args but passes them
-                  # to the callback:
-                     dbh   => $dbh,
-                     is_fk => 1,
-                     o     => $o,
-                     ks    => $ks,
-                     tp       => $tp,
-                     q        => $q,
-                     seen_tbl => \%seen_tbl,
-                     summary  => \%summary,
-               );
-            }
-         };
-         if ( $EVAL_ERROR ) {
-            warn "Error checking `$tbl->{db}`.`$tbl->{tbl}` for duplicate keys: "
-               . $EVAL_ERROR;
-            next TABLE;
+         my ($keys, $clustered_key, $fks);
+         if ( $get_keys ) {
+            ($keys, $clustered_key)
+               = $tp->get_keys($tbl->{ddl}, {});
+         }
+         if ( $get_fks ) {
+            $fks = $tp->get_fks($tbl->{ddl},  {database => $tbl->{db}});
          }
-      }
 
-      # Always count Total Keys so print_key_summary won't die
-      # because %summary is empty.
-      $summary{'Total Indexes'} += (scalar keys %$keys) + (scalar keys %$fks)
+         if ( ($keys && %$keys) || ($fks && %$fks) ) {
+            if ( $o->got('verbose') ) {
+               print_all_keys($keys, $tbl, \%seen_tbl) if $keys;
+               print_all_keys($fks,  $tbl, \%seen_tbl) if $fks;
+            }
+            else {
+               PTDEBUG && _d('Getting duplicate keys on',
+                  $tbl->{db}, $tbl->{tbl});
+               if ( $keys ) {
+                  $dk->get_duplicate_keys(
+                     $keys,
+                     clustered_key => $clustered_key,
+                     tbl_info      => $tbl,
+                     callback      => \&print_duplicate_key,
+                     %tp_opts,
+                     # get_duplicate_keys() ignores these args but passes them
+                     # to the callback:
+                        dbh      => $dbh,
+                        is_fk    => 0,
+                        o        => $o,
+                        ks       => $ks,
+                        tp       => $tp,
+                        q        => $q,
+                        seen_tbl => \%seen_tbl,
+                        summary  => \%summary,
+                  );
+               }
+               if ( $fks ) {
+                  $dk->get_duplicate_fks(
+                     $fks,
+                     tbl_info => $tbl,
+                     callback => \&print_duplicate_key,
+                     %tp_opts,
+                     # get_duplicate_fks() ignores these args but passes them
+                     # to the callback:
+                        dbh   => $dbh,
+                        is_fk => 1,
+                        o     => $o,
+                        ks    => $ks,
+                        tp       => $tp,
+                        q        => $q,
+                        seen_tbl => \%seen_tbl,
+                        summary  => \%summary,
+                  );
+               }
+            }
+
+            # Always count Total Keys so print_key_summary won't die
+            # because %summary is empty.
+            $summary{'Total Indexes'} += (scalar keys %$keys)
+                                       + (scalar keys %$fks)
+         }
+      };
+      if ( $EVAL_ERROR ) {
+         warn "Error checking $tbl->{db}.$tbl->{tbl}: $EVAL_ERROR";
+      }
    }  # TABLE
 
    print_key_summary(%summary) if $o->get('summary');
diff --git a/bin/pt-online-schema-change b/bin/pt-online-schema-change
index cbdf8e87..be35e8af 100755
--- a/bin/pt-online-schema-change
+++ b/bin/pt-online-schema-change
@@ -5685,6 +5685,10 @@ sub new {
    my $self = {
       task => $task,
    };
+   open $self->{stdout_copy}, ">&=", *STDOUT
+      or die "Cannot dup stdout: $OS_ERROR";
+   open $self->{stderr_copy}, ">&=", *STDERR
+      or die "Cannot dup stderr: $OS_ERROR";
    PTDEBUG && _d('Created cleanup task', $task);
    return bless $self, $class;
 }
@@ -5694,6 +5698,10 @@ sub DESTROY {
    my $task = $self->{task};
    if ( ref $task ) {
       PTDEBUG && _d('Calling cleanup task', $task);
+      open local(*STDOUT), ">&=", $self->{stdout_copy}
+         if $self->{stdout_copy};
+      open local(*STDERR), ">&=", $self->{stderr_copy}
+         if $self->{stderr_copy};
       $task->();
    }
    else {
@@ -8978,6 +8986,17 @@ If you add a column without a default value and make it NOT NULL, the tool
 will fail, as it will not try to guess a default value for you; You must
 specify the default.
 
+=item *
+
+C<DROP FOREIGN KEY constraint_name> requires specifying C<_constraint_name>
+rather than the real C<constraint_name>.  Due to a limitation in MySQL,
+pt-online-schema-change adds a leading underscore to foreign key constraint
+names when creating the new table.  For example, to drop this contraint:
+
+  CONSTRAINT `fk_foo` FOREIGN KEY (`foo_id`) REFERENCES `bar` (`foo_id`)
+
+You must specify C<--alter "DROP FOREIGN KEY _fk_foo">.
+
 =back
 
 =item --alter-foreign-keys-method
diff --git a/bin/pt-slave-delay b/bin/pt-slave-delay
index 09078148..f6b477bf 100755
--- a/bin/pt-slave-delay
+++ b/bin/pt-slave-delay
@@ -3565,7 +3565,6 @@ sub main {
          try          => sub {
             return unless $oktorun;
             $status = $slave_dbh->selectrow_hashref("SHOW SLAVE STATUS");
-            info("Reconnected to slave");
             return $status;
          },
          fail         => sub {
diff --git a/bin/pt-stalk b/bin/pt-stalk
index e332a916..f92f8263 100755
--- a/bin/pt-stalk
+++ b/bin/pt-stalk
@@ -795,7 +795,7 @@ collect() {
          fi
       fi
 
-      (echo $ts; df -h) >> "$d/$p-df" &
+      (echo $ts; df -k) >> "$d/$p-df" &
 
       (echo $ts; netstat -antp) >> "$d/$p-netstat"   &
       (echo $ts; netstat -s)    >> "$d/$p-netstat_s" &
diff --git a/bin/pt-table-checksum b/bin/pt-table-checksum
index 58a5e428..b62efa17 100755
--- a/bin/pt-table-checksum
+++ b/bin/pt-table-checksum
@@ -10009,17 +10009,21 @@ won't break replication (or simply fail to replicate).  If you are sure that
 it's OK to run the checksum queries, you can negate this option to disable the
 checks.  See also L<"--replicate-database">.
 
+See also L<"REPLICA CHECKS">.
+
 =item --check-slave-lag
 
 type: string; group: Throttle
 
 Pause checksumming until this replica's lag is less than L<"--max-lag">.  The
 value is a DSN that inherits properties from the master host and the connection
-options (L<"--port">, L<"--user">, etc.).  This option overrides the normal
-behavior of finding and continually monitoring replication lag on ALL connected
-replicas.  If you don't want to monitor ALL replicas, but you want more than
-just one replica to be monitored, then use the DSN option to the
-L<"--recursion-method"> option instead of this option.
+options (L<"--port">, L<"--user">, etc.).  By default, pt-table-checksum
+monitors lag on all connected replicas, but this option limits lag monitoring
+to the specified replica.  This is useful if certain replicas are intentionally
+lagged (with L<pt-slave-delay> for example), in which case you can specify
+a normal replica to monitor.
+
+See also L<"REPLICA CHECKS">.
 
 =item --chunk-index
 
@@ -10292,8 +10296,7 @@ all replicas to which it connects, using Seconds_Behind_Master. If any replica
 is lagging more than the value of this option, then pt-table-checksum will sleep
 for L<"--check-interval"> seconds, then check all replicas again.  If you
 specify L<"--check-slave-lag">, then the tool only examines that server for
-lag, not all servers.  If you want to control exactly which servers the tool
-monitors, use the DSN value to L<"--recursion-method">.
+lag, not all servers.
 
 The tool waits forever for replicas to stop lagging.  If any replica is
 stopped, the tool waits forever until the replica is started.  Checksumming
@@ -10303,6 +10306,8 @@ The tool prints progress reports while waiting.  If a replica is stopped, it
 prints a progress report immediately, then again at every progress report
 interval.
 
+See also L<"REPLICA CHECKS">.
+
 =item --max-load
 
 type: Array; default: Threads_running=25; group: Throttle
@@ -10384,13 +10389,15 @@ or checksum differences.
 type: int
 
 Number of levels to recurse in the hierarchy when discovering replicas.
-Default is infinite.  See also L<"--recursion-method">.
+Default is infinite.  See also L<"--recursion-method"> and L<"REPLICA CHECKS">.
 
 =item --recursion-method
 
 type: array; default: processlist,hosts
 
-Preferred recursion method for discovering replicas.  Possible methods are:
+Preferred recursion method for discovering replicas.  pt-table-checksum
+performs several L<"REPLICA CHECKS"> before and while running.
+Possible methods are:
 
   METHOD       USES
   ===========  ==================
@@ -10399,18 +10406,21 @@ Preferred recursion method for discovering replicas.  Possible methods are:
   dsn=DSN      DSNs from a table
   none         Do not find slaves
 
-The processlist method is the default, because SHOW SLAVE HOSTS is not
-reliable.  However, the hosts method can work better if the server uses a
-non-standard port (not 3306).  The tool usually does the right thing and
-finds all replicas, but you may give a preferred method and it will be used
-first.
+The C<processlist> method is the default, because C<SHOW SLAVE HOSTS> is not
+reliable.  However, if the server uses a non-standard port (not 3306), then
+the C<hosts> method becomes the default because it works better in this case.
 
-The hosts method requires replicas to be configured with report_host,
-report_port, etc.
+The C<hosts> method requires replicas to be configured with C<report_host>,
+C<report_port>, etc.
 
-The dsn method is special: it specifies a table from which other DSN strings
-are read.  The specified DSN must specify a D and t, or a database-qualified
-t.  The DSN table should have the following structure:
+The C<dsn> method is special: rather than automatically discovering replicas,
+this method specifies a table with replica DSNs.  The tool will only connect
+to these replicas.  This method works best when replicas do not use the same
+MySQL username or password as the master, or when you want to prevent the tool
+from connecting to certain replicas.  The C<dsn> method is specified like:
+C<--recursion-method dsn=h=host,D=percona,t=dsns>.  The specified DSN must
+have D and t parts, or just a database-qualified t part, which specify the
+DSN table.  The DSN table must have the following structure:
 
   CREATE TABLE `dsns` (
     `id` int(11) NOT NULL AUTO_INCREMENT,
@@ -10419,10 +10429,13 @@ t.  The DSN table should have the following structure:
     PRIMARY KEY (`id`)
   );
 
-To make the tool monitor only the hosts 10.10.1.16 and 10.10.1.17 for
-replication lag and checksum differences, insert the values C<h=10.10.1.16> and
-C<h=10.10.1.17> into the table. Currently, the DSNs are ordered by id, but id
-and parent_id are otherwise ignored.  
+DSNs are ordered by C<id>, but C<id> and C<parent_id> are otherwise ignored.
+The C<dsn> column contains a replica DSN like it would be given on the command
+line, for example: C<"h=replica_host,u=repl_user,p=repl_pass">.
+
+The C<none> method prevents the tool from connecting to any replicas.
+This effectively disables all the L<"REPLICA CHECKS"> because there will
+not be any replicas to check.  Thefore, this method is not recommended.
 
 =item --replicate
 
@@ -10596,6 +10609,60 @@ keyword.  You might need to quote the value.  Here is an example:
 
 =back
 
+=head1 REPLICA CHECKS
+
+By default, pt-table-checksum attempts to find and connect to all replicas
+connected to the master host.  This automated process is called
+"slave recursion" and is controlled by the L<"--recursion-method"> and
+L<"--recurse"> options.  The tool performs these checks on all replicas:
+
+=over
+
+=item 1. L<"--[no]check-replication-filters">
+
+pt-table-checksum checks for replication filters on all replicas because
+they can complicate or break the checksum process.  By default, the tool
+will exit if any replication filters are found, but this check can be
+disabled by specifying C<--no-check-replication-filters>.
+
+=item 2. L<"--replicate"> table
+
+pt-table-cheksum checks that the L<"--replicate"> table exists on all
+replicas, else checksumming can break replication when updates to the table
+on the master replicate to a replica that doesn't have the table.  This
+check cannot be disabled, and the tool wait forever until the table
+exists on all replicas, printing L<"--progress"> messages while it waits.
+
+=item 3. Single chunk size
+
+If a table can be checksummed in a single chunk on the master,
+pt-table-checksum will check that the table size on all replicas is
+approximately the same.  This prevents a rare problem where the table
+on the master is empty or small, but on a replica it is much larger.
+In this case, the single chunk checksum on the master would overload
+the replica.  This check cannot be disabled.
+
+=item 4. Lag
+
+After each chunk, pt-table-checksum checks the lag on all replicas, or only
+the replica specified by L<"--check-slave-lag">.  This helps the tool
+not to overload the replicas with checksum data.  There is no way to
+disable this check, but you can specify a single replica to check with
+L<"--check-slave-lag">, and if that replica is the fastest, it will help
+prevent the tool from waiting too long for replica lag to abate.
+
+=item 5. Checksum chunks
+
+When pt-table-checksum finishes checksumming a table, it waits for the last
+checksum chunk to replicate to all replicas so it can perform the
+L<"--[no]replicate-check">.  Disabling that option by specifying
+L<--no-replicate-check> disables this check, but it also disables
+immediate reporting of checksum differences, thereby requiring a second run
+of the tool with L<"--replicate-check-only"> to find and print checksum
+differences.
+
+=back
+
 =head1 DSN OPTIONS
 
 These DSN options are used to create a DSN.  Each option is given like
@@ -10620,9 +10687,9 @@ DSN table database.
 
 =item * F
 
-dsn: mysql_read_default_file; copy: no
+dsn: mysql_read_default_file; copy: yes
 
-Only read default options from the given file
+Defaults file for connection values.
 
 =item * h
 
diff --git a/config/rpm/percona-toolkit.spec b/config/rpm/percona-toolkit.spec
index 5a0b4f77..d1a16cd9 100644
--- a/config/rpm/percona-toolkit.spec
+++ b/config/rpm/percona-toolkit.spec
@@ -10,6 +10,7 @@ Source:    percona-toolkit-%{version}.tar.gz
 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
 BuildArch: noarch
 Requires:  perl(DBI) >= 1.13, perl(DBD::mysql) >= 1.0, perl(Term::ReadKey) >= 2.10
+AutoReq:   no
 
 %description
 Percona Toolkit is a collection of advanced command-line tools used by
diff --git a/lib/CleanupTask.pm b/lib/CleanupTask.pm
index dcc2a56d..5401360b 100644
--- a/lib/CleanupTask.pm
+++ b/lib/CleanupTask.pm
@@ -42,6 +42,10 @@ sub new {
    my $self = {
       task => $task,
    };
+   open $self->{stdout_copy}, ">&=", *STDOUT
+      or die "Cannot dup stdout: $OS_ERROR";
+   open $self->{stderr_copy}, ">&=", *STDERR
+      or die "Cannot dup stderr: $OS_ERROR";
    PTDEBUG && _d('Created cleanup task', $task);
    return bless $self, $class;
 }
@@ -51,6 +55,12 @@ sub DESTROY {
    my $task = $self->{task};
    if ( ref $task ) {
       PTDEBUG && _d('Calling cleanup task', $task);
+      # Temporarily restore STDOUT and STDERR to what they were
+      # when the object was created
+      open local(*STDOUT), ">&=", $self->{stdout_copy}
+         if $self->{stdout_copy};
+      open local(*STDERR), ">&=", $self->{stderr_copy}
+         if $self->{stderr_copy};
       $task->();
    }
    else {
diff --git a/lib/DuplicateKeyFinder.pm b/lib/DuplicateKeyFinder.pm
index e43f9a43..b36b72a2 100644
--- a/lib/DuplicateKeyFinder.pm
+++ b/lib/DuplicateKeyFinder.pm
@@ -148,8 +148,9 @@ sub get_duplicate_keys {
    # Remove clustered duplicates.
    my $clustered_key = $args{clustered_key} ? $keys{$args{clustered_key}}
                      : undef;
-   PTDEBUG && _d('clustered key:', $clustered_key->{name},
-      $clustered_key->{colnames});
+   PTDEBUG && _d('clustered key:',
+      $clustered_key ? ($clustered_key->{name}, $clustered_key->{colnames})
+                     : 'none');
    if ( $clustered_key
         && $args{clustered}
         && $args{tbl_info}->{engine}
diff --git a/lib/ReadKeyMini.pm b/lib/ReadKeyMini.pm
index 932b8513..6bc620ed 100644
--- a/lib/ReadKeyMini.pm
+++ b/lib/ReadKeyMini.pm
@@ -1,4 +1,4 @@
-# This program is copyright 2010-2011 Percona Inc.
+# This program is copyright 2010-2012 Percona Inc.
 # Feedback and improvements are welcome.
 #
 # THIS PROGRAM IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
@@ -66,14 +66,12 @@ my %modes = (
 );
 
 # This primarily comes from the Perl Cookbook, recipe 15.8
-
 {
-
    my $fd_stdin = fileno(STDIN);
    my $flags;
    unless ( $PerconaTest::DONT_RESTORE_STDIN ) {
       $flags = fcntl(STDIN, F_GETFL, 0)
-                     or warn "can't fcntl F_GETFL: $!";
+         or warn "Error getting STDIN flags with fcntl: $OS_ERROR";
    }
    my $term     = POSIX::Termios->new();
    $term->getattr($fd_stdin);
@@ -105,14 +103,13 @@ my %modes = (
       $term->setlflag($oterm);
       $term->setcc( VTIME, 0 );
       $term->setattr( $fd_stdin, TCSANOW );
-      unless ( $PerconaTest::DONT_RESTORE_STDIN ) {
-         fcntl(STDIN, F_SETFL, $flags)
-                        or warn "can't fcntl F_SETFL: $!";
+      if ( !$PerconaTest::DONT_RESTORE_STDIN ) {
+         fcntl(STDIN, F_SETFL, int($flags))
+            or warn "Error restoring STDIN flags with fcntl: $OS_ERROR";
       }
    }
 
    END { cooked() }
-
 }
 
 sub readkey {
@@ -121,17 +118,16 @@ sub readkey {
    sysread(STDIN, $key, 1);
    my $timeout = 0.1;
    if ( $key eq "\033" ) {
-   # Ugly and broken hack, but good enough for the two minutes it took to write.
-   # Namely, Ctrl escapes, the F-NUM keys, and other stuff you can send from the keyboard
-   # take more than one "character" to represent, and would be wrong to break into pieces.
-      {
-         my $x = '';
-         STDIN->blocking(0);
-         sysread(STDIN, $x, 2);
-         STDIN->blocking(1);
-         $key .= $x;
-         redo if $key =~ /\[[0-2](?:[0-9];)?$/
-      }
+      # Ugly and broken hack, but good enough for the two minutes it took
+      # to write. Namely, Ctrl escapes, the F-NUM keys, and other stuff
+      # you can send from the keyboard take more than one "character" to
+      # represent, and would be wrong to break into pieces.
+      my $x = '';
+      STDIN->blocking(0);
+      sysread(STDIN, $x, 2);
+      STDIN->blocking(1);
+      $key .= $x;
+      redo if $key =~ /\[[0-2](?:[0-9];)?$/
    }
    cooked();
    return $key;
diff --git a/lib/bash/collect.sh b/lib/bash/collect.sh
index ef56a3ad..23640fec 100644
--- a/lib/bash/collect.sh
+++ b/lib/bash/collect.sh
@@ -218,7 +218,7 @@ collect() {
          fi
       fi
 
-      (echo $ts; df -h) >> "$d/$p-df" &
+      (echo $ts; df -k) >> "$d/$p-df" &
 
       (echo $ts; netstat -antp) >> "$d/$p-netstat"   &
       (echo $ts; netstat -s)    >> "$d/$p-netstat_s" &
diff --git a/sandbox/servers/4.1/my.sandbox.cnf b/sandbox/servers/4.1/my.sandbox.cnf
index 8d8c1074..9c85eabe 100644
--- a/sandbox/servers/4.1/my.sandbox.cnf
+++ b/sandbox/servers/4.1/my.sandbox.cnf
@@ -22,6 +22,6 @@ log_slave_updates
 server-id                  = PORT
 report-host                = 127.0.0.1
 report-port                = PORT
-log-error                  = mysqld.log
+log-error                  = /tmp/PORT/data/mysqld.log
 innodb_lock_wait_timeout   = 3
 log
diff --git a/sandbox/servers/5.0/my.sandbox.cnf b/sandbox/servers/5.0/my.sandbox.cnf
index 858b9f62..eb00cea5 100644
--- a/sandbox/servers/5.0/my.sandbox.cnf
+++ b/sandbox/servers/5.0/my.sandbox.cnf
@@ -22,6 +22,6 @@ log_slave_updates
 server-id                  = PORT
 report-host                = 127.0.0.1
 report-port                = PORT
-log-error                  = mysqld.log
+log-error                  = /tmp/PORT/data/mysqld.log
 innodb_lock_wait_timeout   = 3
 log                        = genlog
diff --git a/sandbox/servers/5.1/my.sandbox.cnf b/sandbox/servers/5.1/my.sandbox.cnf
index 858b9f62..eb00cea5 100644
--- a/sandbox/servers/5.1/my.sandbox.cnf
+++ b/sandbox/servers/5.1/my.sandbox.cnf
@@ -22,6 +22,6 @@ log_slave_updates
 server-id                  = PORT
 report-host                = 127.0.0.1
 report-port                = PORT
-log-error                  = mysqld.log
+log-error                  = /tmp/PORT/data/mysqld.log
 innodb_lock_wait_timeout   = 3
 log                        = genlog
diff --git a/sandbox/servers/5.5/my.sandbox.cnf b/sandbox/servers/5.5/my.sandbox.cnf
index 967d5cf7..f47ab48d 100644
--- a/sandbox/servers/5.5/my.sandbox.cnf
+++ b/sandbox/servers/5.5/my.sandbox.cnf
@@ -22,7 +22,7 @@ log_slave_updates
 server-id                  = PORT
 report-host                = 127.0.0.1
 report-port                = PORT
-log-error                  = mysqld.log
+log-error                  = /tmp/PORT/data/mysqld.log
 innodb_lock_wait_timeout   = 3
 general_log
 general_log_file           = genlog
diff --git a/sandbox/test-env b/sandbox/test-env
index 3643a848..b602f606 100755
--- a/sandbox/test-env
+++ b/sandbox/test-env
@@ -369,24 +369,6 @@ case $opt in
          exit_status=1
       fi
       ;;
-   reset)
-      # Several tests reset the bin logs so that queries from prior tests
-      # don't replicate to new sandbox servers.  This makes creating new
-      # sandbox servers a lot faster.  There's no check if this works or
-      # not, so... yeah.
-      echo "RESETTING SLAVE. This is DANGEROUS and DOESN'T WORK. FIXME." >&2
-      /tmp/12347/use -e "STOP SLAVE; FLUSH SLAVE;"
-      /tmp/12346/use -e "STOP SLAVE; FLUSH SLAVE; FLUSH MASTER;"
-      /tmp/12345/use -e "FLUSH MASTER"
-
-      /tmp/12346/use -e "CHANGE MASTER TO master_host='127.0.0.1', master_user='msandbox', master_password='msandbox', master_port=12345, master_log_file='mysql-bin.000001', master_log_pos=0"
-      /tmp/12346/use -e "START SLAVE"
-
-      /tmp/12347/use -e "CHANGE MASTER TO master_host='127.0.0.1', master_user='msandbox', master_password='msandbox', master_port=12346, master_log_file='mysql-bin.000001', master_log_pos=0"
-      /tmp/12347/use -e "START SLAVE"
-
-      exit_status=0
-      ;;
    version)
       set_mysql_version
       echo $MYSQL_VERSION
diff --git a/t/lib/DSNParser.t b/t/lib/DSNParser.t
index 91c28cc5..179317a2 100644
--- a/t/lib/DSNParser.t
+++ b/t/lib/DSNParser.t
@@ -9,7 +9,7 @@ BEGIN {
 use strict;
 use warnings FATAL => 'all';
 use English qw(-no_match_vars);
-use Test::More tests => 37;
+use Test::More;
 
 use DSNParser;
 use OptionParser;
@@ -545,6 +545,9 @@ foreach my $password_comma ( @password_commas ) {
 # #############################################################################
 # Bug 984915: SQL calls after creating the dbh aren't checked
 # #############################################################################
+# Make sure to disconnect any lingering dbhs, since full_output will fork
+# and then die, which will cause rollback warnings for connected dbhs.
+$dbh->disconnect() if $dbh;
 
 $dsn = $dp->parse('h=127.1,P=12345,u=msandbox,p=msandbox');
 my @opts = $dp->get_cxn_params($dsn);
@@ -569,5 +572,4 @@ like(
 # #############################################################################
 # Done.
 # #############################################################################
-$dbh->disconnect() if $dbh;
-exit;
+done_testing;
diff --git a/t/lib/Daemon.t b/t/lib/Daemon.t
index 93082780..edecd297 100644
--- a/t/lib/Daemon.t
+++ b/t/lib/Daemon.t
@@ -9,13 +9,13 @@ BEGIN {
 use strict;
 use warnings FATAL => 'all';
 use English qw(-no_match_vars);
-use Test::More tests => 22;
+use Test::More;
 use Time::HiRes qw(sleep);
 use File::Temp qw( tempfile );
 use Daemon;
 use OptionParser;
 use PerconaTest;
-
+#plan skip_all => "Hm";
 use constant PTDEVDEBUG => $ENV{PTDEVDEBUG} || 0;
 
 my $o = new OptionParser(file => "$trunk/t/lib/samples/daemonizes.pl");
@@ -263,4 +263,5 @@ ok(
 # Done.
 # #############################################################################
 rm_tmp_files();
+done_testing;
 exit;
diff --git a/t/lib/Diskstats.t b/t/lib/Diskstats.t
index 87b182c2..77761cbb 100644
--- a/t/lib/Diskstats.t
+++ b/t/lib/Diskstats.t
@@ -9,15 +9,13 @@ BEGIN {
 use strict;
 use warnings FATAL => 'all';
 use English qw(-no_match_vars);
-use Test::More tests => 108;
-
-use PerconaTest;
-
-use OptionParser;
-
+use Test::More;
 use File::Spec;
 use File::Temp ();
 
+use PerconaTest;
+use OptionParser;
+
 BEGIN {
    use_ok "Diskstats";
    use_ok "DiskstatsGroupByAll";
@@ -25,7 +23,7 @@ BEGIN {
    use_ok "DiskstatsGroupBySample";
 }
 
-my $o   = new OptionParser(description => 'Diskstats');
+my $o = new OptionParser(description => 'Diskstats');
 $o->get_specs("$trunk/bin/pt-diskstats");
 $o->get_opts();
 
@@ -476,6 +474,7 @@ is_deeply(
 $obj->clear_state();
 
 }
+
 # ############################################################################
 # The three subclasses
 # ############################################################################
@@ -491,7 +490,8 @@ for my $test (
       {
          class               => "DiskstatsGroupBySample",
          results_file_prefix => "sample",
-      }) {
+      },
+) {
    my $obj    = $test->{class}->new(OptionParser => $o, show_inactive => 1);
    my $prefix = $test->{results_file_prefix};
 
@@ -502,9 +502,8 @@ for my $test (
    $obj->set_show_line_between_samples(0);
 
    for my $filename ( map "diskstats-00$_.txt", 1..5 ) {
-      my $file = File::Spec->catfile( "t", "pt-diskstats", "samples", $filename );
-      my $file_with_trunk = File::Spec->catfile( $trunk, $file );
-
+      my $file = File::Spec->catfile(qw(t pt-diskstats samples), $filename);
+      my $file_with_trunk = File::Spec->catfile($trunk, $file);
       my $expected = "t/pt-diskstats/expected/${prefix}_$filename";
 
       ok(
@@ -571,10 +570,10 @@ EOF
       qr/Time between samples should be > 0, is /,
       "$test->{class}, ->_calc_deltas fails if the time elapsed is negative"
    );
-
 }
 
 # ###########################################################################
 # Done.
 # ###########################################################################
+done_testing;
 exit;
diff --git a/t/lib/DuplicateKeyFinder.t b/t/lib/DuplicateKeyFinder.t
index e39ff00d..224e480b 100644
--- a/t/lib/DuplicateKeyFinder.t
+++ b/t/lib/DuplicateKeyFinder.t
@@ -9,7 +9,7 @@ BEGIN {
 use strict;
 use warnings FATAL => 'all';
 use English qw(-no_match_vars);
-use Test::More tests => 38;
+use Test::More;
 
 use VersionParser;
 use DuplicateKeyFinder;
@@ -786,4 +786,5 @@ like(
    qr/Complete test coverage/,
    '_d() works'
 );
+done_testing;
 exit;
diff --git a/t/lib/MasterSlave.t b/t/lib/MasterSlave.t
index 2494e428..ad14fa6a 100644
--- a/t/lib/MasterSlave.t
+++ b/t/lib/MasterSlave.t
@@ -11,6 +11,10 @@ use warnings FATAL => 'all';
 use English qw(-no_match_vars);
 use Test::More;
 
+if ( !$ENV{SLOW_TESTS} ) {
+   plan skip_all => "lib/MasterSlave.t is a top 5 slowest file; set SLOW_TESTS=1 to enable it.";
+}
+
 use MasterSlave;
 use DSNParser;
 use VersionParser;
@@ -734,8 +738,6 @@ $sb->wipe_clean($master_dbh);
 diag(`$trunk/sandbox/stop-sandbox 2903 2902 2901 2900`);
 diag(`/tmp/12346/use -e "set global read_only=1"`);
 diag(`/tmp/12347/use -e "set global read_only=1"`);
-$sb->wait_for_slaves();
-diag(`$trunk/sandbox/test-env reset`);
 ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
 done_testing;
 exit;
diff --git a/t/lib/RowDiff.t b/t/lib/RowDiff.t
index 83ffd5af..c6da9012 100644
--- a/t/lib/RowDiff.t
+++ b/t/lib/RowDiff.t
@@ -444,7 +444,6 @@ SKIP: {
 
    $d = new RowDiff(dbh => $master_dbh);
 
-   diag(`$trunk/sandbox/mk-test-env reset >/dev/null 2>&1`);
    $sb->create_dbs($master_dbh, [qw(test)]);
    $sb->load_file('master', 't/lib/samples/issue_11.sql');
    PerconaTest::wait_until(
diff --git a/t/lib/TableSyncer.t b/t/lib/TableSyncer.t
index 37ad18b8..23d664e9 100644
--- a/t/lib/TableSyncer.t
+++ b/t/lib/TableSyncer.t
@@ -576,7 +576,6 @@ $dst->{dbh} = $dst_dbh;
 # ###########################################################################
 make_plugins();
 $sb->load_file('master', 't/lib/samples/before-TableSyncGroupBy.sql');
-sleep 1;
 
 sync_table(
    src     => "test.test1",
@@ -606,7 +605,6 @@ is_deeply(
 # #############################################################################
 make_plugins();
 $sb->load_file('master', 't/lib/samples/issue_96.sql');
-sleep 1;
 
 # Make paranoid-sure that the tables differ.
 my $r1 = $src_dbh->selectall_arrayref('SELECT from_city FROM issue_96.t WHERE package_id=4');
@@ -1051,7 +1049,9 @@ my $output = '';
       "Retries wait"
    );
 }
-diag(`$trunk/sandbox/test-env reset`);
+diag(`/tmp/12347/use -e "stop slave"`);
+diag(`/tmp/12346/use -e "start slave"`);
+diag(`/tmp/12347/use -e "start slave"`);
 
 # #############################################################################
 # Done.
diff --git a/t/lib/bash/collect.sh b/t/lib/bash/collect.sh
index 5ef21120..5dd3487d 100644
--- a/t/lib/bash/collect.sh
+++ b/t/lib/bash/collect.sh
@@ -23,7 +23,7 @@ p="$PT_TMPDIR/collect/2011_12_05"
 # Default collect, no extras like gdb, tcpdump, etc.
 collect "$PT_TMPDIR/collect" "2011_12_05" > $p-output 2>&1
 
-wait_for_files "$p-hostname" "$p-opentables2" "$p-variables" "$p-df"
+wait_for_files "$p-hostname" "$p-opentables2" "$p-variables" "$p-df" "$p-innodbstatus2"
 
 # Even if this system doesn't have all the cmds, collect should still
 # have created some files for cmds that (hopefully) all systems have.
@@ -68,6 +68,7 @@ cmd_ok \
    "Finds MySQL error log"
 
 if [[ "$SANDBOX_VERSION" > "5.0" ]]; then
+   wait_for_files "$p-log_error"
    cmd_ok \
       "grep -qE 'Memory status|Open streams|Begin safemalloc' $p-log_error" \
       "debug"
diff --git a/t/lib/bash/collect_mysql_info.sh b/t/lib/bash/collect_mysql_info.sh
index 4bb4557d..6bdfc5c2 100644
--- a/t/lib/bash/collect_mysql_info.sh
+++ b/t/lib/bash/collect_mysql_info.sh
@@ -85,6 +85,7 @@ is \
    "$cnf_file" \
    "/tmp/12345/my.sandbox.cnf" \
    "find_my_cnf_file gets the correct file"
+[ $? -ne 0 ] && diag "$p/mysqld-instances"
 
 res=$(find_my_cnf_file "$samples/ps-mysqld-001.txt")
 is "$res" "/tmp/12345/my.sandbox.cnf" "ps-mysqld-001.txt"
diff --git a/t/pt-archiver/basics.t b/t/pt-archiver/basics.t
index 364c325e..b3c6c5d8 100644
--- a/t/pt-archiver/basics.t
+++ b/t/pt-archiver/basics.t
@@ -28,9 +28,6 @@ if ( !$master_dbh ) {
 elsif ( !$slave1_dbh ) {
    plan skip_all => 'Cannot connect to sandbox slave1';
 }
-else {
-   plan tests => 29;
-}
 
 my $output;
 my $rows;
@@ -188,55 +185,57 @@ cmp_ok(
 # #############################################################################
 # Bug 903387: pt-archiver doesn't honor b=1 flag to create SQL_LOG_BIN statement
 # #############################################################################
+SKIP: {
+   skip('LOAD DATA LOCAL INFILE is disabled', 3) if !$can_load_data;
+   $sb->load_file('master', "t/pt-archiver/samples/bulk_regular_insert.sql");
+   $sb->wait_for_slaves();
 
-$sb->load_file('master', "t/pt-archiver/samples/bulk_regular_insert.sql");
-$sb->wait_for_slaves();
+   my $original_rows = $slave1_dbh->selectall_arrayref("SELECT * FROM bri.t ORDER BY id");
+   is_deeply(
+      $original_rows,
+      [
+         [1, 'aa', '11:11:11'],
+         [2, 'bb', '11:11:12'],
+         [3, 'cc', '11:11:13'],
+         [4, 'dd', '11:11:14'],
+         [5, 'ee', '11:11:15'],
+         [6, 'ff', '11:11:16'],
+         [7, 'gg', '11:11:17'],
+         [8, 'hh', '11:11:18'],
+         [9, 'ii', '11:11:19'],
+         [10,'jj', '11:11:10'],
+      ],
+      "Bug 903387: slave has rows"
+   );
 
-my $original_rows = $slave1_dbh->selectall_arrayref("SELECT * FROM bri.t ORDER BY id");
-is_deeply(
-   $original_rows,
-   [
-      [1, 'aa', '11:11:11'],
-      [2, 'bb', '11:11:12'],
-      [3, 'cc', '11:11:13'],
-      [4, 'dd', '11:11:14'],
-      [5, 'ee', '11:11:15'],
-      [6, 'ff', '11:11:16'],
-      [7, 'gg', '11:11:17'],
-      [8, 'hh', '11:11:18'],
-      [9, 'ii', '11:11:19'],
-      [10,'jj', '11:11:10'],
-   ],
-   "Bug 903387: slave has rows"
-);
+   $output = output(
+      sub { pt_archiver::main(
+         '--source', "D=bri,t=t,F=$cnf,b=1",
+         '--dest',   "D=bri,t=t_arch",
+         qw(--where 1=1 --replace --commit-each --bulk-insert --bulk-delete),
+         qw(--limit 10)) },
+   );
 
-$output = output(
-   sub { pt_archiver::main(
-      '--source', "D=bri,t=t,F=$cnf,b=1",
-      '--dest',   "D=bri,t=t_arch",
-      qw(--where 1=1 --replace --commit-each --bulk-insert --bulk-delete),
-      qw(--limit 10)) },
-);
-
-$rows = $master_dbh->selectall_arrayref("SELECT * FROM bri.t ORDER BY id");
-is_deeply(
-   $rows,
-   [
-      [10,'jj', '11:11:10'],
-   ],
-   "Bug 903387: rows deleted on master"
-) or diag(Dumper($rows));
-
-$rows = $slave1_dbh->selectall_arrayref("SELECT * FROM bri.t ORDER BY id");
-is_deeply(
-   $rows,
-   $original_rows,
-   "Bug 903387: slave still has rows"
-) or diag(Dumper($rows));
+   $rows = $master_dbh->selectall_arrayref("SELECT * FROM bri.t ORDER BY id");
+   is_deeply(
+      $rows,
+      [
+         [10,'jj', '11:11:10'],
+      ],
+      "Bug 903387: rows deleted on master"
+   ) or diag(Dumper($rows));
 
+   $rows = $slave1_dbh->selectall_arrayref("SELECT * FROM bri.t ORDER BY id");
+   is_deeply(
+      $rows,
+      $original_rows,
+      "Bug 903387: slave still has rows"
+   ) or diag(Dumper($rows));
+}
 # #############################################################################
 # Done.
 # #############################################################################
 $sb->wipe_clean($master_dbh);
 ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
-exit;
+
+done_testing;
diff --git a/t/pt-duplicate-key-checker/clustered_keys.t b/t/pt-duplicate-key-checker/clustered_keys.t
index 403c699a..456fd24a 100644
--- a/t/pt-duplicate-key-checker/clustered_keys.t
+++ b/t/pt-duplicate-key-checker/clustered_keys.t
@@ -22,9 +22,6 @@ my $dbh = $sb->get_dbh_for('master');
 if ( !$dbh ) {
    plan skip_all => 'Cannot connect to sandbox master';
 }
-else {
-   plan tests => 2;
-}
 
 my $cnf    = "/tmp/12345/my.sandbox.cnf";
 my $sample = "t/pt-duplicate-key-checker/samples/";
@@ -46,9 +43,47 @@ ok(
    "Shorten, not remove, clustered dupes"
 );
 
+# #############################################################################
+# Error if InnoDB table has no PK or unique indexes
+# https://bugs.launchpad.net/percona-toolkit/+bug/1036804
+# #############################################################################
+$sb->load_file('master', "t/pt-duplicate-key-checker/samples/idb-no-uniques-bug-894140.sql");
+
+# PTDEBUG was auto-vivifying $clustered_key:
+#
+#    PTDEBUG && _d('clustered key:', $clustered_key->{name},
+#       $clustered_key->{colnames});
+#
+#    if ( $clustered_key
+#         && $args{clustered}
+#         && $args{tbl_info}->{engine}
+#         && $args{tbl_info}->{engine} =~ m/InnoDB/i )
+#    {
+#          push @dupes, $self->remove_clustered_duplicates($clustered_key...
+#
+#    sub remove_clustered_duplicates {
+#       my ( $self, $ck, $keys, %args ) = @_;
+#       die "I need a ck argument"   unless $ck;
+#       die "I need a keys argument" unless $keys;
+#       my $ck_cols = $ck->{colnames};
+#       my @dupes;
+#       KEY:
+#       for my $i ( 0 .. @$keys - 1 ) {
+#          my $key = $keys->[$i]->{colnames};
+#          if ( $key =~ m/$ck_cols$/ ) {
+
+my $output = `PTDEBUG=1 $trunk/bin/pt-duplicate-key-checker F=$cnf -d bug_1036804 2>&1`;
+
+unlike(
+   $output,
+   qr/Use of uninitialized value/,
+   'PTDEBUG doesn\'t auto-vivify cluster key hashref (bug 1036804)'
+);
+
 # #############################################################################
 # Done.
 # #############################################################################
 $sb->wipe_clean($dbh);
 ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
+done_testing;
 exit;
diff --git a/t/pt-duplicate-key-checker/samples/idb-no-uniques-bug-894140.sql b/t/pt-duplicate-key-checker/samples/idb-no-uniques-bug-894140.sql
new file mode 100644
index 00000000..4eb0b6a3
--- /dev/null
+++ b/t/pt-duplicate-key-checker/samples/idb-no-uniques-bug-894140.sql
@@ -0,0 +1,9 @@
+DROP DATABASE IF EXISTS bug_1036804;
+CREATE DATABASE bug_1036804;
+USE bug_1036804;
+CREATE TABLE `t` (
+  `col1` int(11) DEFAULT NULL,
+  `col2` int(11) DEFAULT NULL,
+  KEY `col1` (`col1`),
+  KEY `col2` (`col2`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1;
diff --git a/t/pt-fifo-split/pt-fifo-split.t b/t/pt-fifo-split/pt-fifo-split.t
index 54d0c446..d0b4d55b 100644
--- a/t/pt-fifo-split/pt-fifo-split.t
+++ b/t/pt-fifo-split/pt-fifo-split.t
@@ -9,7 +9,11 @@ BEGIN {
 use strict;
 use warnings FATAL => 'all';
 use English qw(-no_match_vars);
-use Test::More tests => 4;
+use Test::More;
+
+if ( !$ENV{SLOW_TESTS} ) {
+   plan skip_all => "pt-fifo-split/pt-fifo-split. is a top 5 slowest file; set SLOW_TESTS=1 to enable it.";
+}
 
 use PerconaTest;
 require "$trunk/bin/pt-fifo-split";
@@ -59,4 +63,5 @@ unlink '/tmp/pt-script.pid';
 # #############################################################################
 # Done.
 # #############################################################################
+done_testing;
 exit;
diff --git a/t/pt-heartbeat/multi_update_mode.t b/t/pt-heartbeat/multi_update_mode.t
index b9a0502e..fdcdfe06 100644
--- a/t/pt-heartbeat/multi_update_mode.t
+++ b/t/pt-heartbeat/multi_update_mode.t
@@ -9,15 +9,12 @@ BEGIN {
 use strict;
 use warnings FATAL => 'all';
 use English qw(-no_match_vars);
-use Time::HiRes qw(sleep);
 use Test::More;
 
 use PerconaTest;
 use Sandbox;
 require "$trunk/bin/pt-heartbeat";
 
-diag(`$trunk/sandbox/test-env reset`);
-
 my $dp  = new DSNParser(opts=>$dsn_opts);
 my $sb  = new Sandbox(basedir => '/tmp', DSNParser => $dp);
 my $master_dbh = $sb->get_dbh_for('master');
@@ -37,10 +34,12 @@ else {
    plan tests => 29;
 }
 
+diag(`rm -rf /tmp/pt-heartbeat-sentinel >/dev/null 2>&1`);
 $sb->create_dbs($master_dbh, ['test']);
+$sb->wait_for_slaves();
 
 my $output;
-my $pid_file = "/tmp/__mk-heartbeat-test.pid";
+my $pid_file = "/tmp/pt-heartbeat-test.$PID.pid";
 
 # Multi-update mode is the new, hi-res mode that allows a single table to
 # be updated by multiple servers: a slave's master, its master's master, etc.
@@ -54,8 +53,7 @@ my @ports = qw(12345 12346 12347);
 foreach my $port (@ports) {
    system("$trunk/bin/pt-heartbeat -h 127.1 -u msandbox -p msandbox -P $port --database test --table heartbeat --create-table --update --interval 0.5 --daemonize --pid $pid_file.$port >/dev/null");
 
-   sleep 0.2;
-
+   PerconaTest::wait_for_files("$pid_file.$port");
    ok(
       -f "$pid_file.$port",
       "--update on $port started"
@@ -154,7 +152,7 @@ ok(
 # ############################################################################
 
 # $rows already has slave2 heartbeat info.
-sleep 1.0;
+sleep 1;
 
 my $rows2 = $slave2_dbh->selectall_hashref("select * from test.heartbeat", 'server_id');
 
diff --git a/t/pt-kill/kill.t b/t/pt-kill/kill.t
index fb132a25..3372eecf 100644
--- a/t/pt-kill/kill.t
+++ b/t/pt-kill/kill.t
@@ -35,13 +35,16 @@ else {
 my $output;
 my $cnf='/tmp/12345/my.sandbox.cnf';
 
+# TODO:  These tests need something to match, so we background
+# a SLEEP(4) query and match that, but this isn't ideal because
+# it's time-based.  Better is to use a specific db and --match-db.
+my $sys_cmd = "/tmp/12345/use -h127.1 -P12345 -umsandbox -pmsandbox -e 'select sleep(4)' >/dev/null 2>&1 &";
+
 # #############################################################################
 # Test that --kill kills the connection.
 # #############################################################################
 
-# Shell out to a sleep(10) query and try to capture the query.
-# Backticks don't work here.
-system("/tmp/12345/use -h127.1 -P12345 -umsandbox -pmsandbox -e 'select sleep(4)' >/dev/null 2>&1 &");
+system($sys_cmd);
 sleep 0.5;
 my $rows = $dbh->selectall_hashref('show processlist', 'id');
 my $pid;
@@ -52,12 +55,12 @@ values %$rows;
 ok(
    $pid,
    'Got proc id of sleeping query'
-);
+) or diag(Dumper($rows));
 
 $output = output(
-   sub { pt_kill::main('-F', $cnf, qw(--kill --print --run-time 1 --interval 1),
-            "--match-info", 'select sleep\(4\)',
-         )
+   sub {
+      pt_kill::main('-F', $cnf, qw(--kill --print --run-time 1 --interval 1),
+         "--match-info", 'select sleep\(4\)')
    },
 );
 
@@ -90,6 +93,7 @@ $pid = 0;  # reuse, reset
 map  { $pid = $_->{id} }
 grep { $_->{info} && $_->{info} =~ m/select sleep\(5\)/ }
 values %$rows;
+
 ok(
    $pid,
    'Got proc id of sleeping query'
@@ -130,43 +134,58 @@ my $sql = OptionParser->read_para_after(
    "$trunk/bin/pt-kill", qr/MAGIC_create_log_table/);
 $sql =~ s/kill_log/`kill_test`.`log_table`/;
 
+my $log_dsn = "h=127.1,P=12345,u=msandbox,p=msandbox,D=kill_test,t=log_table";
+
 $dbh->do($sql);
 
 {
-   system("/tmp/12345/use -h127.1 -P12345 -umsandbox -pmsandbox -e 'select sleep(4)' >/dev/null&");
+   system($sys_cmd);
    sleep 0.5;
+
    local $EVAL_ERROR;
    eval {
       pt_kill::main('-F', $cnf, qw(--kill --run-time 1 --interval 1),
          "--match-info", 'select sleep\(4\)',
-         "--log-dsn", q!h=127.1,P=12345,u=msandbox,p=msandbox,D=kill_test,t=log_table!,
+         "--log-dsn", $log_dsn,
       )
    };
+
    is(
-       $EVAL_ERROR,
+      $EVAL_ERROR,
       '',
       "--log-dsn works if the table exists and --create-log-table wasn't passed in."
-   ) or diag $EVAL_ERROR;
+   );
 
    local $EVAL_ERROR;
    my $results = eval { $dbh->selectall_arrayref("SELECT * FROM `kill_test`.`log_table`", { Slice => {} } ) };
+
    is(
        $EVAL_ERROR,
        '',
       "...and we can query the table"
    ) or diag $EVAL_ERROR;
 
-   is @{$results}, 1, "...which contains one entry";
-   use Data::Dumper;
+   is(
+      scalar @$results,
+      1,
+      "...which contains one entry"
+   );
+
    my $reason = $dbh->selectrow_array("SELECT reason FROM `kill_test`.`log_table` WHERE kill_id=1");
-   is $reason,
+
+   is(
+      $reason,
       'Query matches Info spec',
-      'reason gets set to something sensible';
+      'reason gets set to something sensible'
+   );
 
    TODO: {
-      local $::TODO = "Time_ms currently isn't reported";
+      local $TODO = "Time_ms currently isn't reported";
       my $time_ms = $dbh->selectrow_array("SELECT Time_ms FROM `kill_test`.`log_table` WHERE kill_id=1");
-      ok $time_ms;
+      ok(
+         $time_ms,
+         "TIME_MS"
+      );
    }
 
    my $result = shift @$results;
@@ -181,66 +200,76 @@ $dbh->do($sql);
    my %trimmed_result;
    @trimmed_result{ keys %$against } = @{$result}{ keys %$against };
    $trimmed_result{host} =~ s/localhost:[0-9]+/localhost/;
+
    is_deeply(
       \%trimmed_result,
       $against,
       "...and was populated as expected",
    ) or diag(Dumper($result));
    
-   system("/tmp/12345/use -h127.1 -P12345 -umsandbox -pmsandbox -e 'select sleep(4)' >/dev/null&");
+   system($sys_cmd);
    sleep 0.5;
+
    local $EVAL_ERROR;
    eval {
-      pt_kill::main('-F', $cnf, qw(--kill --run-time 1 --interval 1 --create-log-table),
+      pt_kill::main('-F', $cnf, qw(--kill --run-time 1 --interval 1),
+         "--create-log-table",
          "--match-info", 'select sleep\(4\)',
-         "--log-dsn", q!h=127.1,P=12345,u=msandbox,p=msandbox,D=kill_test,t=log_table!,
+         "--log-dsn", $log_dsn,
       )
    };
+
    is(
-       $EVAL_ERROR,
+      $EVAL_ERROR,
       '',
-      "--log-dsn works if the table exists and --create-log-table was passed in."
+      "--log-dsn --create-log-table and the table exists"
    );
 }
 
 {
-   $dbh->do("DROP TABLE `kill_test`.`log_table`");
+   $dbh->do("DROP TABLE IF EXISTS `kill_test`.`log_table`");
 
-   system("/tmp/12345/use -h127.1 -P12345 -umsandbox -pmsandbox -e 'select sleep(4)' >/dev/null&");
+   system($sys_cmd);
    sleep 0.5;
+
    local $EVAL_ERROR;
    eval {
-      pt_kill::main('-F', $cnf, qw(--kill --run-time 1 --interval 1 --create-log-table),
+      pt_kill::main('-F', $cnf, qw(--kill --run-time 1 --interval 1),
+         "--create-log-table",
          "--match-info", 'select sleep\(4\)',
-         "--log-dsn", q!h=127.1,P=12345,u=msandbox,p=msandbox,D=kill_test,t=log_table!,
+         "--log-dsn", $log_dsn,
       )
    };
+
    is(
-       $EVAL_ERROR,
-       '',
-      "--log-dsn works if the table doesn't exists and --create-log-table was passed in."
+      $EVAL_ERROR,
+      '',
+      "--log-dsn --create-log-table and the table doesn't exists"
    );
 }
 
 {
-   $dbh->do("DROP TABLE `kill_test`.`log_table`");
+   $dbh->do("DROP TABLE IF EXISTS `kill_test`.`log_table`");
 
    local $EVAL_ERROR;
    eval {
       pt_kill::main('-F', $cnf, qw(--kill --run-time 1 --interval 1),
          "--match-info", 'select sleep\(4\)',
-         "--log-dsn", q!h=127.1,P=12345,u=msandbox,p=msandbox,D=kill_test,t=log_table!,
+         "--log-dsn", $log_dsn,
       )
    };
-   like $EVAL_ERROR,
+
+   like(
+      $EVAL_ERROR,
       qr/\Q--log-dsn table does not exist. Please create it or specify\E/,
-      "By default, --log-dsn doesn't autogenerate a table";
+      "By default, --log-dsn doesn't autogenerate a table"
+   );
 }
 
 for my $dsn (
-   q!h=127.1,P=12345,u=msandbox,p=msandbox,t=log_table!,
-   q!h=127.1,P=12345,u=msandbox,p=msandbox,D=kill_test!,
-   q!h=127.1,P=12345,u=msandbox,p=msandbox!,
+   q/h=127.1,P=12345,u=msandbox,p=msandbox,t=log_table/,
+   q/h=127.1,P=12345,u=msandbox,p=msandbox,D=kill_test/,
+   q/h=127.1,P=12345,u=msandbox,p=msandbox/,
 ) {
    local $EVAL_ERROR;
    eval {
@@ -249,26 +278,42 @@ for my $dsn (
          "--log-dsn", $dsn,
       )
    };
-   like $EVAL_ERROR,
+
+   like(
+      $EVAL_ERROR,
       qr/\Q--log-dsn does not specify a database (D) or a database-qualified table (t)\E/,
-      "--log-dsn croaks if t= or D= are absent";
+      "--log-dsn croaks if t= or D= are absent"
+   );
 }
 
 # Run it twice
 for (1,2) {
-   system("/tmp/12345/use -h127.1 -P12345 -umsandbox -pmsandbox -e 'select sleep(4)' >/dev/null&");
+   system($sys_cmd);
    sleep 0.5;
-   pt_kill::main('-F', $cnf, qw(--kill --run-time 1 --interval 1 --create-log-table),
+
+   pt_kill::main('-F', $cnf, qw(--kill --run-time 1 --interval 1),
+      "--create-log-table",
       "--match-info", 'select sleep\(4\)',
-      "--log-dsn", q!h=127.1,P=12345,u=msandbox,p=msandbox,D=kill_test,t=log_table!,
+      "--log-dsn", $log_dsn,
    );
 }
 
 my $results = $dbh->selectall_arrayref("SELECT * FROM `kill_test`.`log_table`");
 
-is @{$results}, 2, "Different --log-dsn runs reuse the same table.";
+is(
+   scalar @$results,
+   2,
+   "Different --log-dsn runs reuse the same table."
+);
 
-$dbh->do("DROP DATABASE kill_test");
+$dbh->do("DROP DATABASE IF EXISTS kill_test");
+
+PerconaTest::wait_until(
+   sub {
+      $results = $dbh->selectall_hashref('SHOW PROCESSLIST', 'id');
+      return !grep { ($_->{info} || '') =~ m/sleep \(4\)/ } values %$results;
+   }
+);
 
 # #############################################################################
 # Done.
diff --git a/t/pt-online-schema-change/privs.t b/t/pt-online-schema-change/privs.t
index ce74ac65..41a7e463 100644
--- a/t/pt-online-schema-change/privs.t
+++ b/t/pt-online-schema-change/privs.t
@@ -11,6 +11,10 @@ use warnings FATAL => 'all';
 use English qw(-no_match_vars);
 use Test::More;
 
+if ( !$ENV{SLOW_TESTS} ) {
+   plan skip_all => "pt-online-schema-change/privs.t is a top 5 slowest file; set SLOW_TESTS=1 to enable it.";
+}
+
 use Data::Dumper;
 use PerconaTest;
 use Sandbox;
diff --git a/t/pt-query-digest/read_timeout.t b/t/pt-query-digest/read_timeout.t
index 48a363ad..bd4c386a 100644
--- a/t/pt-query-digest/read_timeout.t
+++ b/t/pt-query-digest/read_timeout.t
@@ -21,8 +21,9 @@ use POSIX qw(mkfifo);
 # #########################################################################
 my $pid_file = '/tmp/mqd.pid';
 my $fifo     = '/tmp/mqd.fifo';
-unlink $pid_file and diag("Unlinking existing $pid_file");
-unlink $fifo and diag("Unlinking existing $fifo");
+
+unlink $pid_file if $pid_file;
+unlink $fifo     if $fifo;
 
 my ($start, $end, $waited, $timeout);
 SKIP: {
@@ -40,7 +41,7 @@ SKIP: {
     );
     $end    = time;
     $waited = $end - $start;
-    if ( $timeout ) {
+    if ( $timeout && -f $pid_file ) {
         # mqd ran longer than --read-timeout
         chomp(my $pid = slurp_file($pid_file));
         kill SIGTERM => $pid if $pid;
@@ -52,7 +53,7 @@ SKIP: {
     );
 }
 
-unlink $pid_file;
+unlink $pid_file if $pid_file;
 mkfifo $fifo, 0700;
 system("$trunk/t/pt-query-digest/samples/write-to-fifo.pl $fifo 4 &");
 
@@ -66,7 +67,7 @@ $timeout = wait_for(
 );
 $end    = time;
 $waited = $end - $start;
-if ( $timeout ) {
+if ( $timeout && $pid_file ) {
    # mqd ran longer than --read-timeout
    chomp(my $pid = slurp_file($pid_file));
    kill SIGTERM => $pid if $pid;
@@ -77,8 +78,8 @@ ok(
    sprintf("--read-timeout waited %.1f seconds reading a file", $waited)
 );
 
-unlink $pid_file;
-unlink $fifo;
+unlink $pid_file if $pid_file;
+unlink $fifo if $fifo;
 
 # #############################################################################
 # Done.
diff --git a/t/pt-slave-delay/auto_restart.t b/t/pt-slave-delay/auto_restart.t
index 7c14f6b4..eb2a5483 100644
--- a/t/pt-slave-delay/auto_restart.t
+++ b/t/pt-slave-delay/auto_restart.t
@@ -46,7 +46,6 @@ my $output;
 # the child should restart the slave, and the tool should report
 # that it reconnected and did some work, ending with "Setting slave
 # to run normally".
-diag('Running...');
 my $pid = fork();
 if ( $pid ) {
    # parent
@@ -65,7 +64,6 @@ else {
    diag(`/tmp/12346/start >/dev/null`);
    # Ensure we don't break the sandbox -- instance 12347 will be disconnected
    # when its master gets rebooted
-   diag("Restarting slave on instance 12347 after restarting instance 12346");
    diag(`/tmp/12347/use -e "stop slave; start slave"`);
    exit;
 }
diff --git a/t/pt-slave-delay/basics.t b/t/pt-slave-delay/basics.t
index b7aef509..f5338911 100644
--- a/t/pt-slave-delay/basics.t
+++ b/t/pt-slave-delay/basics.t
@@ -18,13 +18,17 @@ require "$trunk/bin/pt-slave-delay";
 my $dp = new DSNParser(opts=>$dsn_opts);
 my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
 my $master_dbh = $sb->get_dbh_for('master');
-my $slave_dbh  = $sb->get_dbh_for('slave1');
+my $slave1_dbh  = $sb->get_dbh_for('slave1');
+my $slave2_dbh  = $sb->get_dbh_for('slave2');
 
 if ( !$master_dbh ) {
    plan skip_all => 'Cannot connect to sandbox master';
 }
-elsif ( !$slave_dbh ) {
-   plan skip_all => 'Cannot connect to second sandbox master';
+elsif ( !$slave1_dbh ) {
+   plan skip_all => 'Cannot connect to sandbox slave1';
+}
+elsif ( !$slave2_dbh ) {
+   plan skip_all => 'Cannot connect to sandbox slave2';
 }
 else {
    plan tests => 6;
@@ -50,7 +54,7 @@ unlike($output, qr/Missing DSN part 'h'/, 'Does not require h DSN part');
 # just disable log-bin and log-slave-updates on the slave.
 # #####1#######################################################################
 diag(`cp /tmp/12346/my.sandbox.cnf /tmp/12346/my.sandbox.cnf-original`);
-diag(`sed -i.bak -e '/log.bin\\|log.slave/d' /tmp/12346/my.sandbox.cnf`);
+diag(`sed -i.bak -e '/log-bin/d' -e '/log_slave_updates/d' /tmp/12346/my.sandbox.cnf`);
 diag(`/tmp/12346/stop >/dev/null`);
 diag(`/tmp/12346/start >/dev/null`);
 
@@ -66,6 +70,9 @@ diag(`mv /tmp/12346/my.sandbox.cnf-original /tmp/12346/my.sandbox.cnf`);
 diag(`/tmp/12346/start >/dev/null`);
 diag(`/tmp/12346/use -e "set global read_only=1"`);
 
+$slave2_dbh->do('STOP SLAVE');
+$slave2_dbh->do('START SLAVE');
+
 # #############################################################################
 # Check --use-master
 # #############################################################################
@@ -85,11 +92,10 @@ like(
 );
 
 # Sometimes the slave will be in a state of "reconnecting to master" that will
-# take a while. Help that along. But, we've disconnected $slave_dbh by doing
+# take a while. Help that along. But, we've disconnected $slave1_dbh by doing
 # 'stop' on the sandbox above, so we need to reconnect.
-$slave_dbh  = $sb->get_dbh_for('slave2');
-$slave_dbh->do('STOP SLAVE');
-$slave_dbh->do('START SLAVE');
+$slave2_dbh->do('STOP SLAVE');
+$slave2_dbh->do('START SLAVE');
 
 # #############################################################################
 # Done.
diff --git a/t/pt-slave-delay/standard_options.t b/t/pt-slave-delay/standard_options.t
index 5adc57b0..d5f13eb9 100644
--- a/t/pt-slave-delay/standard_options.t
+++ b/t/pt-slave-delay/standard_options.t
@@ -9,7 +9,7 @@ BEGIN {
 use strict;
 use warnings FATAL => 'all';
 use English qw(-no_match_vars);
-use Test::More ;
+use Test::More;
 
 use PerconaTest;
 use Sandbox;
@@ -24,31 +24,28 @@ if ( !$master_dbh ) {
    plan skip_all => 'Cannot connect to sandbox master';
 }
 elsif ( !$slave_dbh ) {
-   plan skip_all => 'Cannot connect to second sandbox master';
-}
-else {
-   plan tests => 9;
+   plan skip_all => 'Cannot connect to sandbox slave1';
 }
 
 my $output;
 my $cmd = "$trunk/bin/pt-slave-delay -F /tmp/12346/my.sandbox.cnf h=127.1";
+my $pid_file = "/tmp/pt-slave-delay-test.$PID";
 
-# Check daemonization
-system("$cmd --delay 1m --interval 1s --run-time 5s --daemonize --pid /tmp/mk-slave-delay.pid");
-$output = `ps -eaf | grep 'mk-slave-delay' | grep ' \-\-delay 1m '`;
+# Check daemonization.  This test used to print to STDOUT, causing
+# false-positive test errors.  The output isn't needed.  The tool
+# said "Reconnected to slave" every time it did SHOW SLAVE STATUS,
+# so needlessly.  That was removed.  Now it will print stuff when
+# we kill the process, which we don't want either.
+system("$cmd --delay 1m --interval 1s --run-time 5s --daemonize --pid $pid_file >/dev/null 2>&1");
+PerconaTest::wait_for_files($pid_file);
+chomp(my $pid = `cat $pid_file`);
+$output = `ps x | grep "^[ ]*$pid"`;
 like($output, qr/$cmd/, 'It lives daemonized');
 
-ok(-f '/tmp/mk-slave-delay.pid', 'PID file created');
-my ($pid) = $output =~ /\s+(\d+)\s+/;
-$output = `cat /tmp/mk-slave-delay.pid`;
-# If this test fails, it may be because another instances of
-# mk-slave-delay is running.
-is($output, $pid, 'PID file has correct PID');
-
 # Kill it
 diag(`kill $pid`);
 wait_until(sub{!kill 0, $pid});
-ok(! -f '/tmp/mk-slave-delay.pid', 'PID file removed');
+ok(! -f $pid_file, 'PID file removed');
 
 # #############################################################################
 # Check that SLAVE-HOST can be given by cmd line opts.
@@ -99,4 +96,5 @@ like(
 # Done.
 # #############################################################################
 ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
+done_testing;
 exit;
diff --git a/t/pt-slave-find/pt-slave-find.t b/t/pt-slave-find/pt-slave-find.t
index 8488dfed..c2abfe09 100644
--- a/t/pt-slave-find/pt-slave-find.t
+++ b/t/pt-slave-find/pt-slave-find.t
@@ -13,23 +13,33 @@ use Test::More;
 
 use PerconaTest;
 use Sandbox;
+
 require "$trunk/bin/pt-slave-find";
 
 my $dp = new DSNParser(opts=>$dsn_opts);
 my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
-my $master_dbh  = $sb->get_dbh_for('master');
-my $slave_dbh   = $sb->get_dbh_for('slave1');
-my $slave_2_dbh = $sb->get_dbh_for('slave2');
+my $slave1_dbh = $sb->get_dbh_for('slave1');
+my $slave2_dbh = $sb->get_dbh_for('slave2');
 
-diag(`$trunk/sandbox/test-env reset`);
+# This test is sensitive to ghost/old slaves created/destroyed by other
+# tests.  So we stop the slaves, restart the master, and start everything
+# again.  Hopefully this will return the env to its original state.
+$slave2_dbh->do("STOP SLAVE");
+$slave1_dbh->do("STOP SLAVE");
+diag(`/tmp/12345/stop >/dev/null`);
+diag(`/tmp/12345/start >/dev/null`);
+$slave1_dbh->do("START SLAVE");
+$slave2_dbh->do("START SLAVE");
+
+my $master_dbh = $sb->get_dbh_for('master');
 
 if ( !$master_dbh ) {
    plan skip_all => 'Cannot connect to sandbox master';
 }
-elsif ( !$slave_dbh ) {
+elsif ( !$slave1_dbh ) {
    plan skip_all => 'Cannot connect to sandbox slave';
 }
-elsif ( !$slave_2_dbh ) {
+elsif ( !$slave2_dbh ) {
    plan skip_all => 'Cannot connect to second sandbox slave';
 }
 else {
@@ -42,7 +52,7 @@ my $output = `$trunk/bin/pt-slave-find --help`;
 like($output, qr/Prompt for a password/, 'It compiles');
 
 # Double check that we're setup correctly.
-my $row = $slave_2_dbh->selectall_arrayref('SHOW SLAVE STATUS', {Slice => {}});
+my $row = $slave2_dbh->selectall_arrayref('SHOW SLAVE STATUS', {Slice => {}});
 is(
    $row->[0]->{master_port},
    '12346',
@@ -108,8 +118,8 @@ my (@innodb_versions) = $result =~ /$innodb_re/g;
 $result =~ s/$innodb_re/InnoDB version  BUILTIN/g;
 
 my $master_version = VersionParser->new($master_dbh);
-my $slave_version  = VersionParser->new($slave_dbh);
-my $slave2_version = VersionParser->new($slave_2_dbh);
+my $slave_version  = VersionParser->new($slave1_dbh);
+my $slave2_version = VersionParser->new($slave2_dbh);
 
 is(
    $innodb_versions[0],
diff --git a/t/pt-table-checksum/replication_filters.t b/t/pt-table-checksum/replication_filters.t
index a4fc2e59..697e1957 100644
--- a/t/pt-table-checksum/replication_filters.t
+++ b/t/pt-table-checksum/replication_filters.t
@@ -11,6 +11,11 @@ use warnings FATAL => 'all';
 use English qw(-no_match_vars);
 use Test::More;
 
+if ( !$ENV{SLOW_TESTS} ) {
+   plan skip_all => "pt-table-checksum/replication_filters.t is a top 5 slowest file; set SLOW_TESTS=1 to enable it.";
+}
+
+
 # Hostnames make testing less accurate.  Tests need to see
 # that such-and-such happened on specific slave hosts, but
 # the sandbox servers are all on one host so all slaves have
diff --git a/t/pt-table-checksum/throttle.t b/t/pt-table-checksum/throttle.t
index 879d89a3..0feb7418 100644
--- a/t/pt-table-checksum/throttle.t
+++ b/t/pt-table-checksum/throttle.t
@@ -11,6 +11,10 @@ use warnings FATAL => 'all';
 use English qw(-no_match_vars);
 use Test::More;
 
+if ( !$ENV{SLOW_TESTS} ) {
+   plan skip_all => "pt-table-checksum/throttle.t is a top 5 slowest file; set SLOW_TESTS=1 to enable it.";
+}
+
 $ENV{PERCONA_TOOLKIT_TEST_USE_DSN_NAMES} = 1;
 
 use PerconaTest;
diff --git a/util/test-bash-functions b/util/test-bash-functions
index 133aab74..2616c7c8 100755
--- a/util/test-bash-functions
+++ b/util/test-bash-functions
@@ -204,13 +204,24 @@ wait_for_files() {
    for file in "$@"; do
       local slept=0
       while ! [ -f $file ]; do
-         sleep 0.1;
+         sleep 0.2;
          slept=$((slept + 1))
-         [ $slept -ge 50 ] && break  # 5s
+         [ $slept -ge 150 ] && break  # 30s
       done
    done
 }
 
+diag() {
+   if [ $# -eq 1 -a -f "$1" ]; then
+      echo "# $1:"
+      awk '{print "# " $0}' "$1"
+   else
+      for line in "$@"; do
+         echo "# $line"
+      done
+   fi
+}
+
 # ############################################################################
 # Script starts here
 # ############################################################################