Simplify and re-test Quoter::(de)serialize_list().

This commit is contained in:
Daniel Nichter
2013-02-12 12:58:26 -07:00
parent 9512f631c9
commit 0fd4f27289
2 changed files with 157 additions and 141 deletions

View File

@@ -27,6 +27,11 @@ use warnings FATAL => 'all';
use English qw(-no_match_vars);
use constant PTDEBUG => $ENV{PTDEBUG} || 0;
use Data::Dumper;
$Data::Dumper::Indent = 1;
$Data::Dumper::Sortkeys = 1;
$Data::Dumper::Quotekeys = 0;
# Sub: new
#
# Parameters:
@@ -155,57 +160,65 @@ sub join_quote {
# and the results concatenated with ','.
sub serialize_list {
my ( $self, @args ) = @_;
return unless @args;
PTDEBUG && _d('Serializing', Dumper(\@args));
die "Cannot serialize an empty array" unless scalar @args;
# If the only value is undef, which is NULL for MySQL, then return
# the same. undef/NULL is a valid boundary value, however...
return $args[0] if @args == 1 && !defined $args[0];
my @parts;
foreach my $arg ( @args ) {
if ( defined $arg ) {
$arg =~ s/,/\\,/g; # escape commas
$arg =~ s/\\N/\\\\N/g; # escape literal \N
push @parts, $arg;
}
else {
push @parts, '\N';
}
}
return join ',', map {
my $c = $_;
if ( defined($c) ) {
$c =~ s/([^A-Za-z0-9])/\\$1/g;
$c
}
else {
'\\N'
}
} @args;
my $string = join(',', @parts);
PTDEBUG && _d('Serialized: <', $string, '>');
return $string;
}
sub deserialize_list {
my ( $self, $string ) = @_;
return $string unless defined $string;
my @escaped_parts = $string =~ /
\G # Start of string, or end of previous match.
( # Each of these is an element in the original list.
[^\\,]* # Anything not a backslash or a comma
(?: # When we get here, we found one of the above.
\\. # A backslash followed by something so we can continue
[^\\,]* # Same as above.
)* # Repeat zero of more times.
)
, # Comma dividing elements
/sxgc;
PTDEBUG && _d('Deserializing <', $string, '>');
die "Cannot deserialize an undefined string" unless defined $string;
# Grab the rest of the string following the last match.
# If there wasn't a last match, like for a single-element list,
# the entire string represents the single element, so grab that.
push @escaped_parts, pos($string) ? substr( $string, pos($string) ) : $string;
# Undo the escaping.
my @unescaped_parts = map {
my $part = $_;
if ($part eq '\\N') {
undef
my @parts;
foreach my $arg ( split(/(?<!\\),/, $string) ) {
if ( $arg eq '\N' ) {
$arg = undef;
}
else {
$part =~ s/\\([^A-Za-z0-9])/$1/g;
$part;
$arg =~ s/\\,/,/g;
$arg =~ s/\\\\N/\\N/g;
}
} @escaped_parts;
return @unescaped_parts;
push @parts, $arg;
}
if ( !@parts ) {
# Perl split() won't split ",,", so handle it manually.
my $n_empty_strings = $string =~ tr/,//;
$n_empty_strings++;
PTDEBUG && _d($n_empty_strings, 'empty strings');
map { push @parts, '' } 1..$n_empty_strings;
}
elsif ( $string =~ m/(?<!\\),$/ ) {
PTDEBUG && _d('Last value is an empty string');
push @parts, '';
}
PTDEBUG && _d('Deserialized', Dumper(\@parts));
return @parts;
}
sub _d {
my ($package, undef, $line) = caller 0;
@_ = map { (my $temp = $_) =~ s/\n/\n# /g; $temp; }
map { defined $_ ? $_ : 'undef' }
@_;
print STDERR "# $package:$line $PID ", join(' ', @_), "\n";
}
1;

View File

@@ -10,9 +10,15 @@ use strict;
use warnings FATAL => 'all';
use English qw(-no_match_vars);
use Test::More;
use Data::Dumper;
use Quoter;
use PerconaTest;
use DSNParser;
use Sandbox;
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $dbh = $sb->get_dbh_for('master');
my $q = new Quoter;
@@ -119,129 +125,126 @@ is( $q->join_quote('`db`', '`foo`.`tbl`'), '`foo`.`tbl`', 'join_merge(`db`, `foo
# ###########################################################################
# (de)serialize_list
# ###########################################################################
binmode(STDOUT, ':utf8')
or die "Can't binmode(STDOUT, ':utf8'): $OS_ERROR";
binmode(STDERR, ':utf8')
or die "Can't binmode(STDERR, ':utf8'): $OS_ERROR";
is(
$q->serialize_list(),
undef,
"Serialize empty list"
);
# Prevent "Wide character in print at Test/Builder.pm" warnings.
binmode Test::More->builder->$_(), ':encoding(UTF-8)'
for qw(output failure_output);
is(
$q->serialize_list(''),
'',
"Serialize 1 empty string",
);
is(
$q->serialize_list('', '', ''),
',,',
"Serialize 3 empty strings",
);
is(
$q->serialize_list(undef),
undef,
"Serialize undef string",
);
is(
$q->deserialize_list(undef),
undef,
"Deserialize undef string",
);
is(
$q->serialize_list(undef, undef),
'\\N,\\N',
"Serialize two undefs",
);
my @serialize_tests = (
my @latin1_serialize_tests = (
[ 'a' ],
[ 'a', 'b', ],
[ 'a,', 'b', ],
[ "a,\\\nc\nas", 'b', ],
[ 'a\\\,a', 'c', ],
[ 'a\\\\,a', 'c', ],
[ 'a\\\\\,aa', 'c', ],
[ 'a\\\\\\,aa', 'c', ],
[ 'a\\\,a,a', 'c,d,e,d,', ],
[ "\\\,\x{e8},a", '!!!!__!*`,`\\', ], # Latin-1
[ "\x{30cb}\\\,\x{e8},a", '!!!!__!*`,`\\', ], # UTF-8
[ ",,,,,,,,,,,,,,", ",", ],
[ "\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,,,,\\", ":(", ],
[ "asdfa", "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\,a", ],
[ 'a,', 'b', ], # trailing comma
[ 0 ],
[ 0, 0 ],
[ 1, 2 ],
[ 7, 9 ],
[ '' ], # emptry string
[ '', '', '', ],
[ '' ],
[ undef ],
[ undef, undef, '', undef ],
[ '\\N', '\\\\N', undef ],
[ undef ], # NULL
[ undef, undef ],
[ undef, '' ],
[ '\N' ], # literal \N
[ "un caf\x{e9} na\x{ef}ve" ], # Latin-1
);
use DSNParser;
use Sandbox;
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $dbh = $sb->get_dbh_for('master');
SKIP: {
skip 'Cannot connect to sandbox master', scalar @serialize_tests unless $dbh;
my @utf8_serialize_tests = (
[ "\x{30cb} \x{e8}" ], # UTF-8
);
# Prevent "Wide character in print at Test/Builder.pm" warnings.
binmode Test::More->builder->$_(), ':encoding(UTF-8)'
for qw(output failure_output);
SKIP: {
skip 'Cannot connect to sandbox master', scalar @latin1_serialize_tests
unless $dbh;
$dbh->do('CREATE DATABASE IF NOT EXISTS serialize_test');
$dbh->do('DROP TABLE IF EXISTS serialize_test.serialize');
$dbh->do('CREATE TABLE serialize_test.serialize (id INT, foo TEXT)');
$dbh->do('CREATE TABLE serialize_test.serialize (id INT, textval TEXT, blobval BLOB)');
my $sth = $dbh->prepare(
"INSERT INTO serialize_test.serialize (id, foo) VALUES (?, ?)"
);
my $selsth = $dbh->prepare(
"SELECT foo FROM serialize_test.serialize WHERE id=? LIMIT 1"
my $sth = $dbh->prepare(
"INSERT INTO serialize_test.serialize VALUES (?, ?, ?)"
);
for my $test_index ( 0..$#serialize_tests ) {
my $ser = $q->serialize_list( @{$serialize_tests[$test_index]} );
for my $test_index ( 0..$#latin1_serialize_tests ) {
# Bit of a hack, but we want to test both of Perl's internal encodings
# for correctness.
local $dbh->{'mysql_enable_utf8'} = 1 if utf8::is_utf8($ser);
$sth->execute($test_index, $ser);
$selsth->execute($test_index);
my $flat_string = "["
. join( "][",
map { defined($_) ? $_ : '<undef>' } @{$serialize_tests[$test_index]}
)
. "]";
# Flat, friendly name for the test string
my $flat_string
= "["
. join( "][",
map { defined($_) ? $_ : 'undef' }
@{$latin1_serialize_tests[$test_index]})
. "]";
$flat_string =~ s/\n/\\n/g;
# diag($test_index);
SKIP: {
skip "DBD::mysql version $DBD::mysql::VERSION has utf8 bugs. "
. "See https://bugs.launchpad.net/percona-toolkit/+bug/932327",
1 if $DBD::mysql::VERSION lt '4' && $test_index == 9;
is_deeply(
[ $q->deserialize_list($selsth->fetchrow_array()) ],
$serialize_tests[$test_index],
"Serialize $flat_string"
);
}
# INSERT the serialized list of values.
my $ser = $q->serialize_list( @{$latin1_serialize_tests[$test_index]} );
$sth->execute($test_index, $ser, $ser);
# SELECT back the values and deserialize them.
my ($text_string) = $dbh->selectrow_array(
"SELECT textval FROM serialize_test.serialize WHERE id=$test_index");
my @text_parts = $q->deserialize_list($text_string);
is_deeply(
\@text_parts,
$latin1_serialize_tests[$test_index],
"Serialize TEXT $flat_string"
) or diag(Dumper($text_string, \@text_parts));
}
};
my $utf8_dbh = $sb->get_dbh_for('master');
$utf8_dbh->{mysql_enable_utf8} = 1;
$utf8_dbh->do("SET NAMES 'utf8'");
SKIP: {
skip 'Cannot connect to sandbox master', scalar @utf8_serialize_tests
unless $utf8_dbh;
$utf8_dbh->do("DROP TABLE serialize_test.serialize");
$utf8_dbh->do("CREATE TABLE serialize_test.serialize (id INT, textval TEXT, blobval BLOB) CHARSET='utf8'");
my $sth = $utf8_dbh->prepare(
"INSERT INTO serialize_test.serialize VALUES (?, ?, ?)"
);
for my $test_index ( 0..$#utf8_serialize_tests ) {
# Flat, friendly name for the test string
my $flat_string
= "["
. join( "][",
map { defined($_) ? $_ : 'undef' }
@{$utf8_serialize_tests[$test_index]})
. "]";
$flat_string =~ s/\n/\\n/g;
# INSERT the serialized list of values.
my $ser = $q->serialize_list( @{$utf8_serialize_tests[$test_index]} );
$sth->execute($test_index, $ser, $ser);
# SELECT back the values and deserialize them.
my ($text_string) = $utf8_dbh->selectrow_array(
"SELECT textval FROM serialize_test.serialize WHERE id=$test_index");
my @text_parts = $q->deserialize_list($text_string);
is_deeply(
\@text_parts,
$utf8_serialize_tests[$test_index],
"Serialize TEXT $flat_string"
) or diag(Dumper($text_string, \@text_parts));
}
$sth->finish();
$selsth->finish();
$dbh->do("DROP DATABASE serialize_test");
$sb->wipe_clean($dbh);
$dbh->disconnect();
$utf8_dbh->disconnect();
};
# ###########################################################################
# Done.
# ###########################################################################
if ( $dbh ) {
$sb->wipe_clean($dbh);
$dbh->disconnect();
}
ok($sb->ok(), "Sandbox servers") or BAIL_OUT(__FILE__ . " broke the sandbox");
done_testing;