From 56375da166dd859e693e1f257d0fe067bc0fe6bf Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 27 Dec 2011 15:11:35 -0700 Subject: [PATCH 1/4] Make replication_filters.t stable. --- t/pt-table-checksum/replication_filters.t | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/t/pt-table-checksum/replication_filters.t b/t/pt-table-checksum/replication_filters.t index 80db7cf1..a5408c28 100644 --- a/t/pt-table-checksum/replication_filters.t +++ b/t/pt-table-checksum/replication_filters.t @@ -139,7 +139,8 @@ $slave1_dbh = $sb->get_dbh_for('slave1'); # But since db mysql is ignored, the new results for mysql.user should # not replicate. pt_table_checksum::main(@args, qw(--no-check-replication-filters), - '-t', 'mysql.user,sakila.city', qw(--quiet --no-replicate-check)); + '-t', 'mysql.user,sakila.city', qw(--quiet --no-replicate-check), + qw(--chunk-size 1000)); PerconaTest::wait_for_table($slave1_dbh, 'percona.checksums', "db='sakila' and tbl='city' and chunk=1"); From 88e82d665d94010b739f2daf46cfd1885ca10cf1 Mon Sep 17 00:00:00 2001 From: Brian Fraser Date: Wed, 28 Dec 2011 16:44:49 -0300 Subject: [PATCH 2/4] Include serialize_list() and deserialize_list() to roundtrip through a DB. --- lib/Quoter.pm | 62 ++++++++++++++++++++++++++++++++++++++++++ t/lib/Quoter.t | 74 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 135 insertions(+), 1 deletion(-) diff --git a/lib/Quoter.pm b/lib/Quoter.pm index a7db876b..ee0f624a 100644 --- a/lib/Quoter.pm +++ b/lib/Quoter.pm @@ -144,6 +144,68 @@ sub join_quote { return $db ? "$db.$tbl" : $tbl; } +# Nothing much going on here. Return the list passed in, +# with the elements passed through quotemeta, and the results +# concatenated with ',' +sub serialize_list { + my @args = @_; + if ( @args && $args[-1] eq '' ) { + # If the last element is an empty string, it conflicts + # with the assumptions of the somewhat lax regex below, + # which always leaves an empty element in the end. + # We could fix the regex, but it's a lot of extra + # complexity for little gain, or we could add a + # special-case here. Just by tagging another empty + # string, we get the desired result. + push @args, ''; + } + return join ',', map { quotemeta } @args; +} + +sub deserialize_list { + my ( $string ) = @_; + my @escaped_parts = $string =~ / + \G # Start of string, or end of previous match. + ( # Each of these is an element in the original list. + [^\\,]* # Anything not a backslash or a comma + (?: # When we get here, we found one of the above. + \\. # A backslash followed by something means we can continue + [^\\,]* # Same as above. + )* # Repeat zero of more times. + ) + (?:,|\z) # Comma dividing elements, or absolute end of the string. + /sxg; + pop @escaped_parts; # Last element will always be empty. Flaw in the regex. + # But easier to fix this way. Faster, too. + my @unescaped_parts = map { + # Undo the quotemeta(). + my $part = $_; + # Here be weirdness. Unfortunately quotemeta() is broken, and exposes + # the internal representation of scalars. Namely, the latin-1 range, + # \128-\377 (\p{Latin1} in newer Perls) is all escaped in downgraded + # strings, but left alone in UTF-8 strings. Thus, this. + + # TODO: quotemeta() might change in 5.16 to mean + # qr/(?=\p{ASCII})\W|\p{Pattern_Syntax}/ + # And also fix this whole weird behavior under + # use feature 'unicode_strings' -- If/once that's + # implemented, this will have to change. + my $char_class = utf8::is_utf8($part) # If it's a UTF-8 string, + ? qr/(?=\p{ASCII})\W/ # We only care about non-word + # characters in the ASCII range + : qr/(?=\p{ASCII})\W|[\x{80}-\x{FF}]/; # Otherwise, + # same as above, but also + # unescape the latin-1 range. + $part =~ s/\\($char_class)/$1/g; + # As a somewhat uplifting note, all of the above is more + # or less fixed in newer Perls! quotemeta() is still + # broken, but regexen can deal with it more naturally. + $part; + } @escaped_parts; + return @unescaped_parts; +} + + 1; } # ########################################################################### diff --git a/t/lib/Quoter.t b/t/lib/Quoter.t index 5a8983d3..e3db522e 100644 --- a/t/lib/Quoter.t +++ b/t/lib/Quoter.t @@ -9,7 +9,7 @@ BEGIN { use strict; use warnings FATAL => 'all'; use English qw(-no_match_vars); -use Test::More tests => 31; +use Test::More tests => 63; use Quoter; use PerconaTest; @@ -99,4 +99,76 @@ is( $q->join_quote('`db`', '`tbl`'), '`db`.`tbl`', 'join_merge(`db`, `tbl`)' ); is( $q->join_quote(undef, '`tbl`'), '`tbl`', 'join_merge(undef, `tbl`)' ); is( $q->join_quote('`db`', '`foo`.`tbl`'), '`foo`.`tbl`', 'join_merge(`db`, `foo`.`tbl`)' ); +my @serialize_tests = ( + [ 'a', 'b', ], + [ 'a,', 'b', ], + [ "a,\\\nc\nas", 'b', ], + [ 'a\\\,a', 'c', ], + [ 'a\\\\,a', 'c', ], + [ 'a\\\\\,aa', 'c', ], + [ 'a\\\\\\,aa', 'c', ], + [ 'a\\\,a,a', 'c,d,e,d,', ], + [ "\\\,\x{e8},a", '!!!!__!*`,`\\', ], # Latin-1 + [ "\x{30cb}\\\,\x{e8},a", '!!!!__!*`,`\\', ], # UTF-8 + [ ",,,,,,,,,,,,,,", ",", ], + [ "\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,,,,\\", ":(", ], + [ "asdfa", "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\,a", ], + [ 1, 2 ], + [ 7, 9 ], + [ '', '', '', ], +); + +for my $test ( @serialize_tests ) { + my $ser = Quoter::serialize_list( @$test ); + is_deeply( + [Quoter::deserialize_list($ser)], + $test, + "serialize then deserialize works" + ); +} + +use DSNParser; +use Sandbox; +my $dp = new DSNParser(opts=>$dsn_opts); +my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); +my $dbh = $sb->get_dbh_for('master'); +SKIP: { + skip 'Cannot connect to sandbox master', 1 unless $dbh; + + $dbh->do('CREATE DATABASE IF NOT EXISTS serialize_test'); + $dbh->do('DROP TABLE IF EXISTS serialize_test.serialize'); + $dbh->do('CREATE TABLE serialize_test.serialize (id INT, foo TEXT)'); + + my $sth = $dbh->prepare( + "INSERT INTO serialize_test.serialize (id, foo) VALUES (?, ?)" + ); + my $selsth = $dbh->prepare( + "SELECT foo FROM serialize_test.serialize WHERE id=? LIMIT 1" + ); + + for my $test_index ( 0..$#serialize_tests ) { + my $ser = Quoter::serialize_list( @{$serialize_tests[$test_index]} ); + + # Bit of a hack, but we want to test both of Perl's internal encodings + # for correctness. + local $dbh->{'mysql_enable_utf8'} = 1 if utf8::is_utf8($ser); + + $sth->execute($test_index, $ser); + $selsth->execute($test_index); + + is_deeply( + [Quoter::deserialize_list($selsth->fetchrow_array())], + $serialize_tests[$test_index], + "serialize then deserialize through the DB works" + ); + } + + $sth->finish(); + $selsth->finish(); + + $dbh->do("DROP DATABASE serialize_test"); + + $dbh->disconnect(); +}; + exit; From 4197b09f539ea7b42621de2f00e0c2ea7d71be68 Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Wed, 28 Dec 2011 15:47:50 -0700 Subject: [PATCH 3/4] Adjust code spacing, tweak comments, do only (de)serialize tests with database, give tests names. --- lib/Quoter.pm | 69 +++++++++++++++++++++++++------------------------- t/lib/Quoter.t | 37 +++++++++++++++------------ 2 files changed, 55 insertions(+), 51 deletions(-) diff --git a/lib/Quoter.pm b/lib/Quoter.pm index ee0f624a..afec775c 100644 --- a/lib/Quoter.pm +++ b/lib/Quoter.pm @@ -144,11 +144,10 @@ sub join_quote { return $db ? "$db.$tbl" : $tbl; } -# Nothing much going on here. Return the list passed in, -# with the elements passed through quotemeta, and the results -# concatenated with ',' +# Return the list passed in, with the elements passed through quotemeta, +# and the results concatenated with ','. sub serialize_list { - my @args = @_; + my ( $self, @args ) = @_; if ( @args && $args[-1] eq '' ) { # If the last element is an empty string, it conflicts # with the assumptions of the somewhat lax regex below, @@ -163,49 +162,49 @@ sub serialize_list { } sub deserialize_list { - my ( $string ) = @_; - my @escaped_parts = $string =~ / + my ( $self, $string ) = @_; + my @escaped_parts = $string =~ / \G # Start of string, or end of previous match. ( # Each of these is an element in the original list. [^\\,]* # Anything not a backslash or a comma (?: # When we get here, we found one of the above. - \\. # A backslash followed by something means we can continue + \\. # A backslash followed by something so we can continue [^\\,]* # Same as above. )* # Repeat zero of more times. ) - (?:,|\z) # Comma dividing elements, or absolute end of the string. + (?:,|\z) # Comma dividing elements or absolute end of the string. /sxg; - pop @escaped_parts; # Last element will always be empty. Flaw in the regex. - # But easier to fix this way. Faster, too. - my @unescaped_parts = map { - # Undo the quotemeta(). - my $part = $_; - # Here be weirdness. Unfortunately quotemeta() is broken, and exposes - # the internal representation of scalars. Namely, the latin-1 range, - # \128-\377 (\p{Latin1} in newer Perls) is all escaped in downgraded - # strings, but left alone in UTF-8 strings. Thus, this. - # TODO: quotemeta() might change in 5.16 to mean - # qr/(?=\p{ASCII})\W|\p{Pattern_Syntax}/ - # And also fix this whole weird behavior under - # use feature 'unicode_strings' -- If/once that's - # implemented, this will have to change. - my $char_class = utf8::is_utf8($part) # If it's a UTF-8 string, - ? qr/(?=\p{ASCII})\W/ # We only care about non-word - # characters in the ASCII range - : qr/(?=\p{ASCII})\W|[\x{80}-\x{FF}]/; # Otherwise, - # same as above, but also - # unescape the latin-1 range. - $part =~ s/\\($char_class)/$1/g; - # As a somewhat uplifting note, all of the above is more - # or less fixed in newer Perls! quotemeta() is still - # broken, but regexen can deal with it more naturally. - $part; - } @escaped_parts; + # Last element will always be empty. Flaw in the regex. + # But easier to fix this way. Faster, too. + pop @escaped_parts; + + # Undo the quotemeta(). + my @unescaped_parts = map { + my $part = $_; + # Here be weirdness. Unfortunately quotemeta() is broken, and exposes + # the internal representation of scalars. Namely, the latin-1 range, + # \128-\377 (\p{Latin1} in newer Perls) is all escaped in downgraded + # strings, but left alone in UTF-8 strings. Thus, this. + + # TODO: quotemeta() might change in 5.16 to mean + # qr/(?=\p{ASCII})\W|\p{Pattern_Syntax}/ + # And also fix this whole weird behavior under + # use feature 'unicode_strings' -- If/once that's + # implemented, this will have to change. + my $char_class = utf8::is_utf8($part) # If it's a UTF-8 string, + ? qr/(?=\p{ASCII})\W/ # We only care about non-word + # characters in the ASCII range + : qr/(?=\p{ASCII})\W|[\x{80}-\x{FF}]/; # Otherwise, + # same as above, but also + # unescape the latin-1 range. + $part =~ s/\\($char_class)/$1/g; + $part; + } @escaped_parts; + return @unescaped_parts; } - 1; } # ########################################################################### diff --git a/t/lib/Quoter.t b/t/lib/Quoter.t index e3db522e..b1ea5b79 100644 --- a/t/lib/Quoter.t +++ b/t/lib/Quoter.t @@ -9,7 +9,7 @@ BEGIN { use strict; use warnings FATAL => 'all'; use English qw(-no_match_vars); -use Test::More tests => 63; +use Test::More tests => 47; use Quoter; use PerconaTest; @@ -99,6 +99,10 @@ is( $q->join_quote('`db`', '`tbl`'), '`db`.`tbl`', 'join_merge(`db`, `tbl`)' ); is( $q->join_quote(undef, '`tbl`'), '`tbl`', 'join_merge(undef, `tbl`)' ); is( $q->join_quote('`db`', '`foo`.`tbl`'), '`foo`.`tbl`', 'join_merge(`db`, `foo`.`tbl`)' ); +# ########################################################################### +# (de)serialize_list +# ########################################################################### + my @serialize_tests = ( [ 'a', 'b', ], [ 'a,', 'b', ], @@ -118,22 +122,17 @@ my @serialize_tests = ( [ '', '', '', ], ); -for my $test ( @serialize_tests ) { - my $ser = Quoter::serialize_list( @$test ); - is_deeply( - [Quoter::deserialize_list($ser)], - $test, - "serialize then deserialize works" - ); -} - use DSNParser; use Sandbox; -my $dp = new DSNParser(opts=>$dsn_opts); -my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); +my $dp = new DSNParser(opts=>$dsn_opts); +my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); my $dbh = $sb->get_dbh_for('master'); SKIP: { - skip 'Cannot connect to sandbox master', 1 unless $dbh; + skip 'Cannot connect to sandbox master', scalar @serialize_tests unless $dbh; + + # Prevent "Wide character in print at Test/Builder.pm" warnings. + binmode Test::More->builder->$_(), ':encoding(UTF-8)' + for qw(output failure_output); $dbh->do('CREATE DATABASE IF NOT EXISTS serialize_test'); $dbh->do('DROP TABLE IF EXISTS serialize_test.serialize'); @@ -147,7 +146,7 @@ SKIP: { ); for my $test_index ( 0..$#serialize_tests ) { - my $ser = Quoter::serialize_list( @{$serialize_tests[$test_index]} ); + my $ser = $q->serialize_list( @{$serialize_tests[$test_index]} ); # Bit of a hack, but we want to test both of Perl's internal encodings # for correctness. @@ -156,10 +155,13 @@ SKIP: { $sth->execute($test_index, $ser); $selsth->execute($test_index); + my $flat_string = "@{$serialize_tests[$test_index]}"; + $flat_string =~ s/\n/\\n/g; + is_deeply( - [Quoter::deserialize_list($selsth->fetchrow_array())], + [ $q->deserialize_list($selsth->fetchrow_array()) ], $serialize_tests[$test_index], - "serialize then deserialize through the DB works" + "Serialize $flat_string" ); } @@ -171,4 +173,7 @@ SKIP: { $dbh->disconnect(); }; +# ########################################################################### +# Done. +# ########################################################################### exit; From 85817a795cc7112270c647cc317022d8069a062f Mon Sep 17 00:00:00 2001 From: Brian Fraser Date: Wed, 28 Dec 2011 20:08:30 -0300 Subject: [PATCH 4/4] Slight improvement to the deserialize regex and its test output. --- lib/Quoter.pm | 2 +- t/lib/Quoter.t | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Quoter.pm b/lib/Quoter.pm index afec775c..ee09d366 100644 --- a/lib/Quoter.pm +++ b/lib/Quoter.pm @@ -172,7 +172,7 @@ sub deserialize_list { [^\\,]* # Same as above. )* # Repeat zero of more times. ) - (?:,|\z) # Comma dividing elements or absolute end of the string. + ,? # Comma dividing elements or absolute end of the string. /sxg; # Last element will always be empty. Flaw in the regex. diff --git a/t/lib/Quoter.t b/t/lib/Quoter.t index b1ea5b79..a876659f 100644 --- a/t/lib/Quoter.t +++ b/t/lib/Quoter.t @@ -155,7 +155,7 @@ SKIP: { $sth->execute($test_index, $ser); $selsth->execute($test_index); - my $flat_string = "@{$serialize_tests[$test_index]}"; + my $flat_string = "[" . join("][", @{$serialize_tests[$test_index]}) . "]"; $flat_string =~ s/\n/\\n/g; is_deeply(