From 0fd4f272897b746848fb64d7e7110c968c9ed39e Mon Sep 17 00:00:00 2001 From: Daniel Nichter Date: Tue, 12 Feb 2013 12:58:26 -0700 Subject: [PATCH] Simplify and re-test Quoter::(de)serialize_list(). --- lib/Quoter.pm | 95 +++++++++++++---------- t/lib/Quoter.t | 203 +++++++++++++++++++++++++------------------------ 2 files changed, 157 insertions(+), 141 deletions(-) diff --git a/lib/Quoter.pm b/lib/Quoter.pm index fb8169d3..9c530830 100644 --- a/lib/Quoter.pm +++ b/lib/Quoter.pm @@ -27,6 +27,11 @@ use warnings FATAL => 'all'; use English qw(-no_match_vars); use constant PTDEBUG => $ENV{PTDEBUG} || 0; +use Data::Dumper; +$Data::Dumper::Indent = 1; +$Data::Dumper::Sortkeys = 1; +$Data::Dumper::Quotekeys = 0; + # Sub: new # # Parameters: @@ -155,57 +160,65 @@ sub join_quote { # and the results concatenated with ','. sub serialize_list { my ( $self, @args ) = @_; - return unless @args; + PTDEBUG && _d('Serializing', Dumper(\@args)); + die "Cannot serialize an empty array" unless scalar @args; - # If the only value is undef, which is NULL for MySQL, then return - # the same. undef/NULL is a valid boundary value, however... - return $args[0] if @args == 1 && !defined $args[0]; + my @parts; + foreach my $arg ( @args ) { + if ( defined $arg ) { + $arg =~ s/,/\\,/g; # escape commas + $arg =~ s/\\N/\\\\N/g; # escape literal \N + push @parts, $arg; + } + else { + push @parts, '\N'; + } + } - return join ',', map { - my $c = $_; - if ( defined($c) ) { - $c =~ s/([^A-Za-z0-9])/\\$1/g; - $c - } - else { - '\\N' - } - } @args; + my $string = join(',', @parts); + PTDEBUG && _d('Serialized: <', $string, '>'); + return $string; } sub deserialize_list { my ( $self, $string ) = @_; - return $string unless defined $string; - my @escaped_parts = $string =~ / - \G # Start of string, or end of previous match. - ( # Each of these is an element in the original list. - [^\\,]* # Anything not a backslash or a comma - (?: # When we get here, we found one of the above. - \\. # A backslash followed by something so we can continue - [^\\,]* # Same as above. - )* # Repeat zero of more times. - ) - , # Comma dividing elements - /sxgc; + PTDEBUG && _d('Deserializing <', $string, '>'); + die "Cannot deserialize an undefined string" unless defined $string; - # Grab the rest of the string following the last match. - # If there wasn't a last match, like for a single-element list, - # the entire string represents the single element, so grab that. - push @escaped_parts, pos($string) ? substr( $string, pos($string) ) : $string; - - # Undo the escaping. - my @unescaped_parts = map { - my $part = $_; - if ($part eq '\\N') { - undef + my @parts; + foreach my $arg ( split(/(? 'all'; use English qw(-no_match_vars); use Test::More; +use Data::Dumper; use Quoter; use PerconaTest; +use DSNParser; +use Sandbox; +my $dp = new DSNParser(opts=>$dsn_opts); +my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); +my $dbh = $sb->get_dbh_for('master'); my $q = new Quoter; @@ -119,129 +125,126 @@ is( $q->join_quote('`db`', '`foo`.`tbl`'), '`foo`.`tbl`', 'join_merge(`db`, `foo # ########################################################################### # (de)serialize_list # ########################################################################### + +binmode(STDOUT, ':utf8') + or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR"; +binmode(STDERR, ':utf8') + or die "Can't binmode(STDERR, ':utf8'): $OS_ERROR"; -is( - $q->serialize_list(), - undef, - "Serialize empty list" -); +# Prevent "Wide character in print at Test/Builder.pm" warnings. +binmode Test::More->builder->$_(), ':encoding(UTF-8)' + for qw(output failure_output); -is( - $q->serialize_list(''), - '', - "Serialize 1 empty string", -); - -is( - $q->serialize_list('', '', ''), - ',,', - "Serialize 3 empty strings", -); - -is( - $q->serialize_list(undef), - undef, - "Serialize undef string", -); - -is( - $q->deserialize_list(undef), - undef, - "Deserialize undef string", -); - -is( - $q->serialize_list(undef, undef), - '\\N,\\N', - "Serialize two undefs", -); - -my @serialize_tests = ( +my @latin1_serialize_tests = ( + [ 'a' ], [ 'a', 'b', ], - [ 'a,', 'b', ], - [ "a,\\\nc\nas", 'b', ], - [ 'a\\\,a', 'c', ], - [ 'a\\\\,a', 'c', ], - [ 'a\\\\\,aa', 'c', ], - [ 'a\\\\\\,aa', 'c', ], - [ 'a\\\,a,a', 'c,d,e,d,', ], - [ "\\\,\x{e8},a", '!!!!__!*`,`\\', ], # Latin-1 - [ "\x{30cb}\\\,\x{e8},a", '!!!!__!*`,`\\', ], # UTF-8 - [ ",,,,,,,,,,,,,,", ",", ], - [ "\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,,,,\\", ":(", ], - [ "asdfa", "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\,a", ], + [ 'a,', 'b', ], # trailing comma + [ 0 ], + [ 0, 0 ], [ 1, 2 ], - [ 7, 9 ], + [ '' ], # emptry string [ '', '', '', ], - [ '' ], - [ undef ], - [ undef, undef, '', undef ], - [ '\\N', '\\\\N', undef ], + [ undef ], # NULL + [ undef, undef ], + [ undef, '' ], + [ '\N' ], # literal \N + [ "un caf\x{e9} na\x{ef}ve" ], # Latin-1 ); -use DSNParser; -use Sandbox; -my $dp = new DSNParser(opts=>$dsn_opts); -my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp); -my $dbh = $sb->get_dbh_for('master'); -SKIP: { - skip 'Cannot connect to sandbox master', scalar @serialize_tests unless $dbh; +my @utf8_serialize_tests = ( + [ "\x{30cb} \x{e8}" ], # UTF-8 +); - # Prevent "Wide character in print at Test/Builder.pm" warnings. - binmode Test::More->builder->$_(), ':encoding(UTF-8)' - for qw(output failure_output); +SKIP: { + skip 'Cannot connect to sandbox master', scalar @latin1_serialize_tests + unless $dbh; $dbh->do('CREATE DATABASE IF NOT EXISTS serialize_test'); $dbh->do('DROP TABLE IF EXISTS serialize_test.serialize'); - $dbh->do('CREATE TABLE serialize_test.serialize (id INT, foo TEXT)'); + $dbh->do('CREATE TABLE serialize_test.serialize (id INT, textval TEXT, blobval BLOB)'); - my $sth = $dbh->prepare( - "INSERT INTO serialize_test.serialize (id, foo) VALUES (?, ?)" - ); - my $selsth = $dbh->prepare( - "SELECT foo FROM serialize_test.serialize WHERE id=? LIMIT 1" + my $sth = $dbh->prepare( + "INSERT INTO serialize_test.serialize VALUES (?, ?, ?)" ); - for my $test_index ( 0..$#serialize_tests ) { - my $ser = $q->serialize_list( @{$serialize_tests[$test_index]} ); + for my $test_index ( 0..$#latin1_serialize_tests ) { - # Bit of a hack, but we want to test both of Perl's internal encodings - # for correctness. - local $dbh->{'mysql_enable_utf8'} = 1 if utf8::is_utf8($ser); - $sth->execute($test_index, $ser); - $selsth->execute($test_index); - - my $flat_string = "[" - . join( "][", - map { defined($_) ? $_ : '' } @{$serialize_tests[$test_index]} - ) - . "]"; + # Flat, friendly name for the test string + my $flat_string + = "[" + . join( "][", + map { defined($_) ? $_ : 'undef' } + @{$latin1_serialize_tests[$test_index]}) + . "]"; $flat_string =~ s/\n/\\n/g; - # diag($test_index); - SKIP: { - skip "DBD::mysql version $DBD::mysql::VERSION has utf8 bugs. " - . "See https://bugs.launchpad.net/percona-toolkit/+bug/932327", - 1 if $DBD::mysql::VERSION lt '4' && $test_index == 9; - is_deeply( - [ $q->deserialize_list($selsth->fetchrow_array()) ], - $serialize_tests[$test_index], - "Serialize $flat_string" - ); - } + # INSERT the serialized list of values. + my $ser = $q->serialize_list( @{$latin1_serialize_tests[$test_index]} ); + $sth->execute($test_index, $ser, $ser); + + # SELECT back the values and deserialize them. + my ($text_string) = $dbh->selectrow_array( + "SELECT textval FROM serialize_test.serialize WHERE id=$test_index"); + my @text_parts = $q->deserialize_list($text_string); + + is_deeply( + \@text_parts, + $latin1_serialize_tests[$test_index], + "Serialize TEXT $flat_string" + ) or diag(Dumper($text_string, \@text_parts)); + } +}; + +my $utf8_dbh = $sb->get_dbh_for('master'); +$utf8_dbh->{mysql_enable_utf8} = 1; +$utf8_dbh->do("SET NAMES 'utf8'"); +SKIP: { + skip 'Cannot connect to sandbox master', scalar @utf8_serialize_tests + unless $utf8_dbh; + + $utf8_dbh->do("DROP TABLE serialize_test.serialize"); + $utf8_dbh->do("CREATE TABLE serialize_test.serialize (id INT, textval TEXT, blobval BLOB) CHARSET='utf8'"); + + my $sth = $utf8_dbh->prepare( + "INSERT INTO serialize_test.serialize VALUES (?, ?, ?)" + ); + + for my $test_index ( 0..$#utf8_serialize_tests ) { + + # Flat, friendly name for the test string + my $flat_string + = "[" + . join( "][", + map { defined($_) ? $_ : 'undef' } + @{$utf8_serialize_tests[$test_index]}) + . "]"; + $flat_string =~ s/\n/\\n/g; + + # INSERT the serialized list of values. + my $ser = $q->serialize_list( @{$utf8_serialize_tests[$test_index]} ); + $sth->execute($test_index, $ser, $ser); + + # SELECT back the values and deserialize them. + my ($text_string) = $utf8_dbh->selectrow_array( + "SELECT textval FROM serialize_test.serialize WHERE id=$test_index"); + my @text_parts = $q->deserialize_list($text_string); + + is_deeply( + \@text_parts, + $utf8_serialize_tests[$test_index], + "Serialize TEXT $flat_string" + ) or diag(Dumper($text_string, \@text_parts)); } - $sth->finish(); - $selsth->finish(); - - $dbh->do("DROP DATABASE serialize_test"); - $sb->wipe_clean($dbh); - $dbh->disconnect(); + $utf8_dbh->disconnect(); }; # ########################################################################### # Done. # ########################################################################### +if ( $dbh ) { + $sb->wipe_clean($dbh); + $dbh->disconnect(); +} ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox"); - done_testing;