From 56375da166dd859e693e1f257d0fe067bc0fe6bf Mon Sep 17 00:00:00 2001
From: Daniel Nichter <daniel@percona.com>
Date: Tue, 27 Dec 2011 15:11:35 -0700
Subject: [PATCH 1/4] Make replication_filters.t stable.

---
 t/pt-table-checksum/replication_filters.t | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/t/pt-table-checksum/replication_filters.t b/t/pt-table-checksum/replication_filters.t
index 80db7cf1..a5408c28 100644
--- a/t/pt-table-checksum/replication_filters.t
+++ b/t/pt-table-checksum/replication_filters.t
@@ -139,7 +139,8 @@ $slave1_dbh = $sb->get_dbh_for('slave1');
 # But since db mysql is ignored, the new results for mysql.user should
 # not replicate.
 pt_table_checksum::main(@args, qw(--no-check-replication-filters),
-   '-t', 'mysql.user,sakila.city', qw(--quiet --no-replicate-check));
+   '-t', 'mysql.user,sakila.city', qw(--quiet --no-replicate-check),
+   qw(--chunk-size 1000));
 
 PerconaTest::wait_for_table($slave1_dbh, 'percona.checksums', "db='sakila' and tbl='city' and chunk=1");
 

From 88e82d665d94010b739f2daf46cfd1885ca10cf1 Mon Sep 17 00:00:00 2001
From: Brian Fraser <brian.fraser@percona.com>
Date: Wed, 28 Dec 2011 16:44:49 -0300
Subject: [PATCH 2/4] Include serialize_list() and deserialize_list() to
 roundtrip through a DB.

---
 lib/Quoter.pm  | 62 ++++++++++++++++++++++++++++++++++++++++++
 t/lib/Quoter.t | 74 +++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 135 insertions(+), 1 deletion(-)

diff --git a/lib/Quoter.pm b/lib/Quoter.pm
index a7db876b..ee0f624a 100644
--- a/lib/Quoter.pm
+++ b/lib/Quoter.pm
@@ -144,6 +144,68 @@ sub join_quote {
    return $db ? "$db.$tbl" : $tbl;
 }
 
+# Nothing much going on here. Return the list passed in,
+# with the elements passed through quotemeta, and the results
+# concatenated with ','
+sub serialize_list {
+   my @args = @_;
+   if ( @args && $args[-1] eq '' ) {
+      # If the last element is an empty string, it conflicts
+      # with the assumptions of the somewhat lax regex below,
+      # which always leaves an empty element in the end.
+      # We could fix the regex, but it's a lot of extra
+      # complexity for little gain, or we could add a
+      # special-case here. Just by tagging another empty
+      # string, we get the desired result.
+      push @args, '';
+   }
+   return join ',', map { quotemeta } @args;
+}
+
+sub deserialize_list {
+   my ( $string )      = @_;
+   my @escaped_parts   = $string =~ /
+         \G             # Start of string, or end of previous match.
+         (              # Each of these is an element in the original list.
+            [^\\,]*     # Anything not a backslash or a comma
+            (?:         # When we get here, we found one of the above.
+               \\.      # A backslash followed by something means we can continue
+               [^\\,]*  # Same as above.
+            )*          # Repeat zero of more times.
+         )
+         (?:,|\z)       # Comma dividing elements, or absolute end of the string.
+      /sxg;
+   pop @escaped_parts; # Last element will always be empty. Flaw in the regex.
+                       # But easier to fix this way. Faster, too.
+   my @unescaped_parts = map {
+         # Undo the quotemeta().
+         my $part = $_;
+         # Here be weirdness. Unfortunately quotemeta() is broken, and exposes
+         # the internal representation of scalars. Namely, the latin-1 range,
+         # \128-\377 (\p{Latin1} in newer Perls) is all escaped in downgraded
+         # strings, but left alone in UTF-8 strings. Thus, this.
+
+            # TODO: quotemeta() might change in 5.16 to mean
+            # qr/(?=\p{ASCII})\W|\p{Pattern_Syntax}/
+            # And also fix this whole weird behavior under
+            # use feature 'unicode_strings' --  If/once that's
+            # implemented, this will have to change.
+         my $char_class = utf8::is_utf8($part)  # If it's a UTF-8 string,
+                        ? qr/(?=\p{ASCII})\W/   # We only care about non-word
+                                                # characters in the ASCII range
+                        : qr/(?=\p{ASCII})\W|[\x{80}-\x{FF}]/; # Otherwise,
+                                                # same as above, but also
+                                                # unescape the latin-1 range.
+         $part =~ s/\\($char_class)/$1/g;
+                  # As a somewhat uplifting note, all of the above is more
+                  # or less fixed in newer Perls! quotemeta() is still
+                  # broken, but regexen can deal with it more naturally.
+         $part;
+      } @escaped_parts;
+   return @unescaped_parts;
+}
+
+
 1;
 }
 # ###########################################################################
diff --git a/t/lib/Quoter.t b/t/lib/Quoter.t
index 5a8983d3..e3db522e 100644
--- a/t/lib/Quoter.t
+++ b/t/lib/Quoter.t
@@ -9,7 +9,7 @@ BEGIN {
 use strict;
 use warnings FATAL => 'all';
 use English qw(-no_match_vars);
-use Test::More tests => 31;
+use Test::More tests => 63;
 
 use Quoter;
 use PerconaTest;
@@ -99,4 +99,76 @@ is( $q->join_quote('`db`', '`tbl`'), '`db`.`tbl`', 'join_merge(`db`, `tbl`)' );
 is( $q->join_quote(undef, '`tbl`'), '`tbl`', 'join_merge(undef, `tbl`)'  );
 is( $q->join_quote('`db`', '`foo`.`tbl`'), '`foo`.`tbl`', 'join_merge(`db`, `foo`.`tbl`)' );
 
+my @serialize_tests = (
+   [ 'a', 'b', ],
+   [ 'a,', 'b', ],
+   [ "a,\\\nc\nas", 'b', ],
+   [ 'a\\\,a', 'c', ],
+   [ 'a\\\\,a', 'c', ],
+   [ 'a\\\\\,aa', 'c', ],
+   [ 'a\\\\\\,aa', 'c', ],
+   [ 'a\\\,a,a', 'c,d,e,d,', ],
+   [ "\\\,\x{e8},a", '!!!!__!*`,`\\', ], # Latin-1
+   [ "\x{30cb}\\\,\x{e8},a", '!!!!__!*`,`\\', ], # UTF-8
+   [ ",,,,,,,,,,,,,,", ",", ],
+   [ "\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,,,,\\", ":(", ],
+   [ "asdfa", "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\,a", ],
+   [ 1, 2 ],
+   [ 7, 9 ],
+   [ '', '', '', ],
+);
+
+for my $test ( @serialize_tests ) {
+   my $ser = Quoter::serialize_list( @$test );
+   is_deeply(
+      [Quoter::deserialize_list($ser)],
+      $test,
+      "serialize then deserialize works"
+   );
+}
+
+use DSNParser;
+use Sandbox;
+my $dp = new DSNParser(opts=>$dsn_opts);
+my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
+my $dbh = $sb->get_dbh_for('master');
+SKIP: {
+   skip 'Cannot connect to sandbox master', 1 unless $dbh;
+
+   $dbh->do('CREATE DATABASE IF NOT EXISTS serialize_test');
+   $dbh->do('DROP TABLE IF EXISTS serialize_test.serialize');
+   $dbh->do('CREATE TABLE serialize_test.serialize (id INT, foo TEXT)');
+
+   my $sth    = $dbh->prepare(
+      "INSERT INTO serialize_test.serialize (id, foo) VALUES (?, ?)"
+   );
+   my $selsth = $dbh->prepare(
+      "SELECT foo FROM serialize_test.serialize WHERE id=? LIMIT 1"
+   );
+
+   for my $test_index ( 0..$#serialize_tests ) {
+      my $ser = Quoter::serialize_list( @{$serialize_tests[$test_index]} );
+
+      # Bit of a hack, but we want to test both of Perl's internal encodings
+      # for correctness.
+      local $dbh->{'mysql_enable_utf8'} = 1 if utf8::is_utf8($ser);
+
+      $sth->execute($test_index, $ser);
+      $selsth->execute($test_index);
+
+      is_deeply(
+         [Quoter::deserialize_list($selsth->fetchrow_array())],
+         $serialize_tests[$test_index],
+         "serialize then deserialize through the DB works"
+      );
+   }
+
+   $sth->finish();
+   $selsth->finish();
+
+   $dbh->do("DROP DATABASE serialize_test");
+
+   $dbh->disconnect();
+};
+
 exit;

From 4197b09f539ea7b42621de2f00e0c2ea7d71be68 Mon Sep 17 00:00:00 2001
From: Daniel Nichter <daniel@percona.com>
Date: Wed, 28 Dec 2011 15:47:50 -0700
Subject: [PATCH 3/4] Adjust code spacing, tweak comments, do only
 (de)serialize tests with database, give tests names.

---
 lib/Quoter.pm  | 69 +++++++++++++++++++++++++-------------------------
 t/lib/Quoter.t | 37 +++++++++++++++------------
 2 files changed, 55 insertions(+), 51 deletions(-)

diff --git a/lib/Quoter.pm b/lib/Quoter.pm
index ee0f624a..afec775c 100644
--- a/lib/Quoter.pm
+++ b/lib/Quoter.pm
@@ -144,11 +144,10 @@ sub join_quote {
    return $db ? "$db.$tbl" : $tbl;
 }
 
-# Nothing much going on here. Return the list passed in,
-# with the elements passed through quotemeta, and the results
-# concatenated with ','
+# Return the list passed in, with the elements passed through quotemeta,
+# and the results concatenated with ','.
 sub serialize_list {
-   my @args = @_;
+   my ( $self, @args ) = @_;
    if ( @args && $args[-1] eq '' ) {
       # If the last element is an empty string, it conflicts
       # with the assumptions of the somewhat lax regex below,
@@ -163,49 +162,49 @@ sub serialize_list {
 }
 
 sub deserialize_list {
-   my ( $string )      = @_;
-   my @escaped_parts   = $string =~ /
+   my ( $self, $string ) = @_;
+   my @escaped_parts = $string =~ /
          \G             # Start of string, or end of previous match.
          (              # Each of these is an element in the original list.
             [^\\,]*     # Anything not a backslash or a comma
             (?:         # When we get here, we found one of the above.
-               \\.      # A backslash followed by something means we can continue
+               \\.      # A backslash followed by something so we can continue
                [^\\,]*  # Same as above.
             )*          # Repeat zero of more times.
          )
-         (?:,|\z)       # Comma dividing elements, or absolute end of the string.
+         (?:,|\z)       # Comma dividing elements or absolute end of the string.
       /sxg;
-   pop @escaped_parts; # Last element will always be empty. Flaw in the regex.
-                       # But easier to fix this way. Faster, too.
-   my @unescaped_parts = map {
-         # Undo the quotemeta().
-         my $part = $_;
-         # Here be weirdness. Unfortunately quotemeta() is broken, and exposes
-         # the internal representation of scalars. Namely, the latin-1 range,
-         # \128-\377 (\p{Latin1} in newer Perls) is all escaped in downgraded
-         # strings, but left alone in UTF-8 strings. Thus, this.
 
-            # TODO: quotemeta() might change in 5.16 to mean
-            # qr/(?=\p{ASCII})\W|\p{Pattern_Syntax}/
-            # And also fix this whole weird behavior under
-            # use feature 'unicode_strings' --  If/once that's
-            # implemented, this will have to change.
-         my $char_class = utf8::is_utf8($part)  # If it's a UTF-8 string,
-                        ? qr/(?=\p{ASCII})\W/   # We only care about non-word
-                                                # characters in the ASCII range
-                        : qr/(?=\p{ASCII})\W|[\x{80}-\x{FF}]/; # Otherwise,
-                                                # same as above, but also
-                                                # unescape the latin-1 range.
-         $part =~ s/\\($char_class)/$1/g;
-                  # As a somewhat uplifting note, all of the above is more
-                  # or less fixed in newer Perls! quotemeta() is still
-                  # broken, but regexen can deal with it more naturally.
-         $part;
-      } @escaped_parts;
+   # Last element will always be empty. Flaw in the regex.
+   # But easier to fix this way. Faster, too.
+   pop @escaped_parts;
+
+   # Undo the quotemeta().
+   my @unescaped_parts = map {
+      my $part = $_;
+      # Here be weirdness. Unfortunately quotemeta() is broken, and exposes
+      # the internal representation of scalars. Namely, the latin-1 range,
+      # \128-\377 (\p{Latin1} in newer Perls) is all escaped in downgraded
+      # strings, but left alone in UTF-8 strings. Thus, this.
+
+      # TODO: quotemeta() might change in 5.16 to mean
+      # qr/(?=\p{ASCII})\W|\p{Pattern_Syntax}/
+      # And also fix this whole weird behavior under
+      # use feature 'unicode_strings' --  If/once that's
+      # implemented, this will have to change.
+      my $char_class = utf8::is_utf8($part)  # If it's a UTF-8 string,
+                     ? qr/(?=\p{ASCII})\W/   # We only care about non-word
+                                             # characters in the ASCII range
+                     : qr/(?=\p{ASCII})\W|[\x{80}-\x{FF}]/; # Otherwise,
+                                             # same as above, but also
+                                             # unescape the latin-1 range.
+      $part =~ s/\\($char_class)/$1/g;
+      $part;
+   } @escaped_parts;
+
    return @unescaped_parts;
 }
 
-
 1;
 }
 # ###########################################################################
diff --git a/t/lib/Quoter.t b/t/lib/Quoter.t
index e3db522e..b1ea5b79 100644
--- a/t/lib/Quoter.t
+++ b/t/lib/Quoter.t
@@ -9,7 +9,7 @@ BEGIN {
 use strict;
 use warnings FATAL => 'all';
 use English qw(-no_match_vars);
-use Test::More tests => 63;
+use Test::More tests => 47;
 
 use Quoter;
 use PerconaTest;
@@ -99,6 +99,10 @@ is( $q->join_quote('`db`', '`tbl`'), '`db`.`tbl`', 'join_merge(`db`, `tbl`)' );
 is( $q->join_quote(undef, '`tbl`'), '`tbl`', 'join_merge(undef, `tbl`)'  );
 is( $q->join_quote('`db`', '`foo`.`tbl`'), '`foo`.`tbl`', 'join_merge(`db`, `foo`.`tbl`)' );
 
+# ###########################################################################
+# (de)serialize_list
+# ###########################################################################
+
 my @serialize_tests = (
    [ 'a', 'b', ],
    [ 'a,', 'b', ],
@@ -118,22 +122,17 @@ my @serialize_tests = (
    [ '', '', '', ],
 );
 
-for my $test ( @serialize_tests ) {
-   my $ser = Quoter::serialize_list( @$test );
-   is_deeply(
-      [Quoter::deserialize_list($ser)],
-      $test,
-      "serialize then deserialize works"
-   );
-}
-
 use DSNParser;
 use Sandbox;
-my $dp = new DSNParser(opts=>$dsn_opts);
-my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
+my $dp  = new DSNParser(opts=>$dsn_opts);
+my $sb  = new Sandbox(basedir => '/tmp', DSNParser => $dp);
 my $dbh = $sb->get_dbh_for('master');
 SKIP: {
-   skip 'Cannot connect to sandbox master', 1 unless $dbh;
+   skip 'Cannot connect to sandbox master', scalar @serialize_tests unless $dbh;
+
+   # Prevent "Wide character in print at Test/Builder.pm" warnings.
+   binmode Test::More->builder->$_(), ':encoding(UTF-8)'
+      for qw(output failure_output);
 
    $dbh->do('CREATE DATABASE IF NOT EXISTS serialize_test');
    $dbh->do('DROP TABLE IF EXISTS serialize_test.serialize');
@@ -147,7 +146,7 @@ SKIP: {
    );
 
    for my $test_index ( 0..$#serialize_tests ) {
-      my $ser = Quoter::serialize_list( @{$serialize_tests[$test_index]} );
+      my $ser = $q->serialize_list( @{$serialize_tests[$test_index]} );
 
       # Bit of a hack, but we want to test both of Perl's internal encodings
       # for correctness.
@@ -156,10 +155,13 @@ SKIP: {
       $sth->execute($test_index, $ser);
       $selsth->execute($test_index);
 
+      my $flat_string = "@{$serialize_tests[$test_index]}";
+      $flat_string =~ s/\n/\\n/g;
+
       is_deeply(
-         [Quoter::deserialize_list($selsth->fetchrow_array())],
+         [ $q->deserialize_list($selsth->fetchrow_array()) ],
          $serialize_tests[$test_index],
-         "serialize then deserialize through the DB works"
+         "Serialize $flat_string"
       );
    }
 
@@ -171,4 +173,7 @@ SKIP: {
    $dbh->disconnect();
 };
 
+# ###########################################################################
+# Done.
+# ###########################################################################
 exit;

From 85817a795cc7112270c647cc317022d8069a062f Mon Sep 17 00:00:00 2001
From: Brian Fraser <brian.fraser@percona.com>
Date: Wed, 28 Dec 2011 20:08:30 -0300
Subject: [PATCH 4/4] Slight improvement to the deserialize regex and its test
 output.

---
 lib/Quoter.pm  | 2 +-
 t/lib/Quoter.t | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/Quoter.pm b/lib/Quoter.pm
index afec775c..ee09d366 100644
--- a/lib/Quoter.pm
+++ b/lib/Quoter.pm
@@ -172,7 +172,7 @@ sub deserialize_list {
                [^\\,]*  # Same as above.
             )*          # Repeat zero of more times.
          )
-         (?:,|\z)       # Comma dividing elements or absolute end of the string.
+         ,?             # Comma dividing elements or absolute end of the string.
       /sxg;
 
    # Last element will always be empty. Flaw in the regex.
diff --git a/t/lib/Quoter.t b/t/lib/Quoter.t
index b1ea5b79..a876659f 100644
--- a/t/lib/Quoter.t
+++ b/t/lib/Quoter.t
@@ -155,7 +155,7 @@ SKIP: {
       $sth->execute($test_index, $ser);
       $selsth->execute($test_index);
 
-      my $flat_string = "@{$serialize_tests[$test_index]}";
+      my $flat_string =  "[" . join("][", @{$serialize_tests[$test_index]}) . "]";
       $flat_string =~ s/\n/\\n/g;
 
       is_deeply(