Include serialize_list() and deserialize_list() to roundtrip through a DB.

This commit is contained in:
Brian Fraser
2011-12-28 16:44:49 -03:00
parent 56375da166
commit 88e82d665d
2 changed files with 135 additions and 1 deletions

View File

@@ -144,6 +144,68 @@ sub join_quote {
return $db ? "$db.$tbl" : $tbl; return $db ? "$db.$tbl" : $tbl;
} }
# Nothing much going on here. Return the list passed in,
# with the elements passed through quotemeta, and the results
# concatenated with ','
sub serialize_list {
my @args = @_;
if ( @args && $args[-1] eq '' ) {
# If the last element is an empty string, it conflicts
# with the assumptions of the somewhat lax regex below,
# which always leaves an empty element in the end.
# We could fix the regex, but it's a lot of extra
# complexity for little gain, or we could add a
# special-case here. Just by tagging another empty
# string, we get the desired result.
push @args, '';
}
return join ',', map { quotemeta } @args;
}
sub deserialize_list {
my ( $string ) = @_;
my @escaped_parts = $string =~ /
\G # Start of string, or end of previous match.
( # Each of these is an element in the original list.
[^\\,]* # Anything not a backslash or a comma
(?: # When we get here, we found one of the above.
\\. # A backslash followed by something means we can continue
[^\\,]* # Same as above.
)* # Repeat zero of more times.
)
(?:,|\z) # Comma dividing elements, or absolute end of the string.
/sxg;
pop @escaped_parts; # Last element will always be empty. Flaw in the regex.
# But easier to fix this way. Faster, too.
my @unescaped_parts = map {
# Undo the quotemeta().
my $part = $_;
# Here be weirdness. Unfortunately quotemeta() is broken, and exposes
# the internal representation of scalars. Namely, the latin-1 range,
# \128-\377 (\p{Latin1} in newer Perls) is all escaped in downgraded
# strings, but left alone in UTF-8 strings. Thus, this.
# TODO: quotemeta() might change in 5.16 to mean
# qr/(?=\p{ASCII})\W|\p{Pattern_Syntax}/
# And also fix this whole weird behavior under
# use feature 'unicode_strings' -- If/once that's
# implemented, this will have to change.
my $char_class = utf8::is_utf8($part) # If it's a UTF-8 string,
? qr/(?=\p{ASCII})\W/ # We only care about non-word
# characters in the ASCII range
: qr/(?=\p{ASCII})\W|[\x{80}-\x{FF}]/; # Otherwise,
# same as above, but also
# unescape the latin-1 range.
$part =~ s/\\($char_class)/$1/g;
# As a somewhat uplifting note, all of the above is more
# or less fixed in newer Perls! quotemeta() is still
# broken, but regexen can deal with it more naturally.
$part;
} @escaped_parts;
return @unescaped_parts;
}
1; 1;
} }
# ########################################################################### # ###########################################################################

View File

@@ -9,7 +9,7 @@ BEGIN {
use strict; use strict;
use warnings FATAL => 'all'; use warnings FATAL => 'all';
use English qw(-no_match_vars); use English qw(-no_match_vars);
use Test::More tests => 31; use Test::More tests => 63;
use Quoter; use Quoter;
use PerconaTest; use PerconaTest;
@@ -99,4 +99,76 @@ is( $q->join_quote('`db`', '`tbl`'), '`db`.`tbl`', 'join_merge(`db`, `tbl`)' );
is( $q->join_quote(undef, '`tbl`'), '`tbl`', 'join_merge(undef, `tbl`)' ); is( $q->join_quote(undef, '`tbl`'), '`tbl`', 'join_merge(undef, `tbl`)' );
is( $q->join_quote('`db`', '`foo`.`tbl`'), '`foo`.`tbl`', 'join_merge(`db`, `foo`.`tbl`)' ); is( $q->join_quote('`db`', '`foo`.`tbl`'), '`foo`.`tbl`', 'join_merge(`db`, `foo`.`tbl`)' );
my @serialize_tests = (
[ 'a', 'b', ],
[ 'a,', 'b', ],
[ "a,\\\nc\nas", 'b', ],
[ 'a\\\,a', 'c', ],
[ 'a\\\\,a', 'c', ],
[ 'a\\\\\,aa', 'c', ],
[ 'a\\\\\\,aa', 'c', ],
[ 'a\\\,a,a', 'c,d,e,d,', ],
[ "\\\,\x{e8},a", '!!!!__!*`,`\\', ], # Latin-1
[ "\x{30cb}\\\,\x{e8},a", '!!!!__!*`,`\\', ], # UTF-8
[ ",,,,,,,,,,,,,,", ",", ],
[ "\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,,,,\\", ":(", ],
[ "asdfa", "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\,a", ],
[ 1, 2 ],
[ 7, 9 ],
[ '', '', '', ],
);
for my $test ( @serialize_tests ) {
my $ser = Quoter::serialize_list( @$test );
is_deeply(
[Quoter::deserialize_list($ser)],
$test,
"serialize then deserialize works"
);
}
use DSNParser;
use Sandbox;
my $dp = new DSNParser(opts=>$dsn_opts);
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
my $dbh = $sb->get_dbh_for('master');
SKIP: {
skip 'Cannot connect to sandbox master', 1 unless $dbh;
$dbh->do('CREATE DATABASE IF NOT EXISTS serialize_test');
$dbh->do('DROP TABLE IF EXISTS serialize_test.serialize');
$dbh->do('CREATE TABLE serialize_test.serialize (id INT, foo TEXT)');
my $sth = $dbh->prepare(
"INSERT INTO serialize_test.serialize (id, foo) VALUES (?, ?)"
);
my $selsth = $dbh->prepare(
"SELECT foo FROM serialize_test.serialize WHERE id=? LIMIT 1"
);
for my $test_index ( 0..$#serialize_tests ) {
my $ser = Quoter::serialize_list( @{$serialize_tests[$test_index]} );
# Bit of a hack, but we want to test both of Perl's internal encodings
# for correctness.
local $dbh->{'mysql_enable_utf8'} = 1 if utf8::is_utf8($ser);
$sth->execute($test_index, $ser);
$selsth->execute($test_index);
is_deeply(
[Quoter::deserialize_list($selsth->fetchrow_array())],
$serialize_tests[$test_index],
"serialize then deserialize through the DB works"
);
}
$sth->finish();
$selsth->finish();
$dbh->do("DROP DATABASE serialize_test");
$dbh->disconnect();
};
exit; exit;