mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-11 21:51:21 +00:00
Merge lp:~percona-toolkit-dev/percona-toolkit/pt-table-checksum-2.0-serialize_list-v2.
This commit is contained in:
@@ -144,6 +144,67 @@ sub join_quote {
|
||||
return $db ? "$db.$tbl" : $tbl;
|
||||
}
|
||||
|
||||
# Return the list passed in, with the elements passed through quotemeta,
|
||||
# and the results concatenated with ','.
|
||||
sub serialize_list {
|
||||
my ( $self, @args ) = @_;
|
||||
if ( @args && $args[-1] eq '' ) {
|
||||
# If the last element is an empty string, it conflicts
|
||||
# with the assumptions of the somewhat lax regex below,
|
||||
# which always leaves an empty element in the end.
|
||||
# We could fix the regex, but it's a lot of extra
|
||||
# complexity for little gain, or we could add a
|
||||
# special-case here. Just by tagging another empty
|
||||
# string, we get the desired result.
|
||||
push @args, '';
|
||||
}
|
||||
return join ',', map { quotemeta } @args;
|
||||
}
|
||||
|
||||
sub deserialize_list {
|
||||
my ( $self, $string ) = @_;
|
||||
my @escaped_parts = $string =~ /
|
||||
\G # Start of string, or end of previous match.
|
||||
( # Each of these is an element in the original list.
|
||||
[^\\,]* # Anything not a backslash or a comma
|
||||
(?: # When we get here, we found one of the above.
|
||||
\\. # A backslash followed by something so we can continue
|
||||
[^\\,]* # Same as above.
|
||||
)* # Repeat zero of more times.
|
||||
)
|
||||
,? # Comma dividing elements or absolute end of the string.
|
||||
/sxg;
|
||||
|
||||
# Last element will always be empty. Flaw in the regex.
|
||||
# But easier to fix this way. Faster, too.
|
||||
pop @escaped_parts;
|
||||
|
||||
# Undo the quotemeta().
|
||||
my @unescaped_parts = map {
|
||||
my $part = $_;
|
||||
# Here be weirdness. Unfortunately quotemeta() is broken, and exposes
|
||||
# the internal representation of scalars. Namely, the latin-1 range,
|
||||
# \128-\377 (\p{Latin1} in newer Perls) is all escaped in downgraded
|
||||
# strings, but left alone in UTF-8 strings. Thus, this.
|
||||
|
||||
# TODO: quotemeta() might change in 5.16 to mean
|
||||
# qr/(?=\p{ASCII})\W|\p{Pattern_Syntax}/
|
||||
# And also fix this whole weird behavior under
|
||||
# use feature 'unicode_strings' -- If/once that's
|
||||
# implemented, this will have to change.
|
||||
my $char_class = utf8::is_utf8($part) # If it's a UTF-8 string,
|
||||
? qr/(?=\p{ASCII})\W/ # We only care about non-word
|
||||
# characters in the ASCII range
|
||||
: qr/(?=\p{ASCII})\W|[\x{80}-\x{FF}]/; # Otherwise,
|
||||
# same as above, but also
|
||||
# unescape the latin-1 range.
|
||||
$part =~ s/\\($char_class)/$1/g;
|
||||
$part;
|
||||
} @escaped_parts;
|
||||
|
||||
return @unescaped_parts;
|
||||
}
|
||||
|
||||
1;
|
||||
}
|
||||
# ###########################################################################
|
||||
|
@@ -9,7 +9,7 @@ BEGIN {
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use English qw(-no_match_vars);
|
||||
use Test::More tests => 31;
|
||||
use Test::More tests => 47;
|
||||
|
||||
use Quoter;
|
||||
use PerconaTest;
|
||||
@@ -99,4 +99,81 @@ is( $q->join_quote('`db`', '`tbl`'), '`db`.`tbl`', 'join_merge(`db`, `tbl`)' );
|
||||
is( $q->join_quote(undef, '`tbl`'), '`tbl`', 'join_merge(undef, `tbl`)' );
|
||||
is( $q->join_quote('`db`', '`foo`.`tbl`'), '`foo`.`tbl`', 'join_merge(`db`, `foo`.`tbl`)' );
|
||||
|
||||
# ###########################################################################
|
||||
# (de)serialize_list
|
||||
# ###########################################################################
|
||||
|
||||
my @serialize_tests = (
|
||||
[ 'a', 'b', ],
|
||||
[ 'a,', 'b', ],
|
||||
[ "a,\\\nc\nas", 'b', ],
|
||||
[ 'a\\\,a', 'c', ],
|
||||
[ 'a\\\\,a', 'c', ],
|
||||
[ 'a\\\\\,aa', 'c', ],
|
||||
[ 'a\\\\\\,aa', 'c', ],
|
||||
[ 'a\\\,a,a', 'c,d,e,d,', ],
|
||||
[ "\\\,\x{e8},a", '!!!!__!*`,`\\', ], # Latin-1
|
||||
[ "\x{30cb}\\\,\x{e8},a", '!!!!__!*`,`\\', ], # UTF-8
|
||||
[ ",,,,,,,,,,,,,,", ",", ],
|
||||
[ "\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,\\,,,,\\", ":(", ],
|
||||
[ "asdfa", "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\,a", ],
|
||||
[ 1, 2 ],
|
||||
[ 7, 9 ],
|
||||
[ '', '', '', ],
|
||||
);
|
||||
|
||||
use DSNParser;
|
||||
use Sandbox;
|
||||
my $dp = new DSNParser(opts=>$dsn_opts);
|
||||
my $sb = new Sandbox(basedir => '/tmp', DSNParser => $dp);
|
||||
my $dbh = $sb->get_dbh_for('master');
|
||||
SKIP: {
|
||||
skip 'Cannot connect to sandbox master', scalar @serialize_tests unless $dbh;
|
||||
|
||||
# Prevent "Wide character in print at Test/Builder.pm" warnings.
|
||||
binmode Test::More->builder->$_(), ':encoding(UTF-8)'
|
||||
for qw(output failure_output);
|
||||
|
||||
$dbh->do('CREATE DATABASE IF NOT EXISTS serialize_test');
|
||||
$dbh->do('DROP TABLE IF EXISTS serialize_test.serialize');
|
||||
$dbh->do('CREATE TABLE serialize_test.serialize (id INT, foo TEXT)');
|
||||
|
||||
my $sth = $dbh->prepare(
|
||||
"INSERT INTO serialize_test.serialize (id, foo) VALUES (?, ?)"
|
||||
);
|
||||
my $selsth = $dbh->prepare(
|
||||
"SELECT foo FROM serialize_test.serialize WHERE id=? LIMIT 1"
|
||||
);
|
||||
|
||||
for my $test_index ( 0..$#serialize_tests ) {
|
||||
my $ser = $q->serialize_list( @{$serialize_tests[$test_index]} );
|
||||
|
||||
# Bit of a hack, but we want to test both of Perl's internal encodings
|
||||
# for correctness.
|
||||
local $dbh->{'mysql_enable_utf8'} = 1 if utf8::is_utf8($ser);
|
||||
|
||||
$sth->execute($test_index, $ser);
|
||||
$selsth->execute($test_index);
|
||||
|
||||
my $flat_string = "[" . join("][", @{$serialize_tests[$test_index]}) . "]";
|
||||
$flat_string =~ s/\n/\\n/g;
|
||||
|
||||
is_deeply(
|
||||
[ $q->deserialize_list($selsth->fetchrow_array()) ],
|
||||
$serialize_tests[$test_index],
|
||||
"Serialize $flat_string"
|
||||
);
|
||||
}
|
||||
|
||||
$sth->finish();
|
||||
$selsth->finish();
|
||||
|
||||
$dbh->do("DROP DATABASE serialize_test");
|
||||
|
||||
$dbh->disconnect();
|
||||
};
|
||||
|
||||
# ###########################################################################
|
||||
# Done.
|
||||
# ###########################################################################
|
||||
exit;
|
||||
|
@@ -139,7 +139,8 @@ $slave1_dbh = $sb->get_dbh_for('slave1');
|
||||
# But since db mysql is ignored, the new results for mysql.user should
|
||||
# not replicate.
|
||||
pt_table_checksum::main(@args, qw(--no-check-replication-filters),
|
||||
'-t', 'mysql.user,sakila.city', qw(--quiet --no-replicate-check));
|
||||
'-t', 'mysql.user,sakila.city', qw(--quiet --no-replicate-check),
|
||||
qw(--chunk-size 1000));
|
||||
|
||||
PerconaTest::wait_for_table($slave1_dbh, 'percona.checksums', "db='sakila' and tbl='city' and chunk=1");
|
||||
|
||||
|
Reference in New Issue
Block a user