mirror of
https://github.com/percona/percona-toolkit.git
synced 2025-09-11 13:40:07 +00:00
Fix for 1127450: pt-archiver wide character
This commit is contained in:
@@ -5450,6 +5450,7 @@ sub main {
|
|||||||
my $archive_file = $o->get('file');
|
my $archive_file = $o->get('file');
|
||||||
my $txnsize = $o->get('txn-size');
|
my $txnsize = $o->get('txn-size');
|
||||||
my $quiet = $o->get('quiet');
|
my $quiet = $o->get('quiet');
|
||||||
|
my $got_charset = $o->get('charset');
|
||||||
|
|
||||||
# First things first: if --stop was given, create the sentinel file.
|
# First things first: if --stop was given, create the sentinel file.
|
||||||
if ( $o->get('stop') ) {
|
if ( $o->get('stop') ) {
|
||||||
@@ -5833,7 +5834,9 @@ sub main {
|
|||||||
. ' LOCAL INFILE ?'
|
. ' LOCAL INFILE ?'
|
||||||
. ($o->get('replace') ? ' REPLACE' : '')
|
. ($o->get('replace') ? ' REPLACE' : '')
|
||||||
. ($o->get('ignore') ? ' IGNORE' : '')
|
. ($o->get('ignore') ? ' IGNORE' : '')
|
||||||
. " INTO TABLE $dst->{db_tbl}("
|
. " INTO TABLE $dst->{db_tbl}"
|
||||||
|
. ($got_charset ? "CHARACTER SET $got_charset" : "")
|
||||||
|
. "("
|
||||||
. join(",", map { $q->quote($_) } @{$ins_stmt->{cols}} )
|
. join(",", map { $q->quote($_) } @{$ins_stmt->{cols}} )
|
||||||
. ")";
|
. ")";
|
||||||
}
|
}
|
||||||
@@ -5942,28 +5945,29 @@ sub main {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
# Open the file and print the header to it.
|
my $charset = $got_charset || '';
|
||||||
if ( $archive_file ) {
|
if ($charset eq 'utf8') {
|
||||||
my $need_hdr = $o->get('header') && !-f $archive_file;
|
$charset = ":$charset";
|
||||||
my $charset = $o->get('charset') || '';
|
}
|
||||||
if ($charset eq 'utf8') {
|
elsif ($charset) {
|
||||||
$charset = ":$charset";
|
eval { require Encode }
|
||||||
}
|
|
||||||
elsif ($charset) {
|
|
||||||
eval { require Encode }
|
|
||||||
or (PTDEBUG &&
|
or (PTDEBUG &&
|
||||||
_d("Couldn't load Encode: ", $EVAL_ERROR,
|
_d("Couldn't load Encode: ", $EVAL_ERROR,
|
||||||
"Going to try using the charset ",
|
"Going to try using the charset ",
|
||||||
"passed in without checking it."));
|
"passed in without checking it."));
|
||||||
# No need to punish a user if they did their
|
# No need to punish a user if they did their
|
||||||
# homework and passed in an official charset,
|
# homework and passed in an official charset,
|
||||||
# rather than an alias.
|
# rather than an alias.
|
||||||
$charset = ":encoding("
|
$charset = ":encoding("
|
||||||
. (defined &Encode::resolve_alias
|
. (defined &Encode::resolve_alias
|
||||||
? Encode::resolve_alias($charset) || $charset
|
? Encode::resolve_alias($charset) || $charset
|
||||||
: $charset)
|
: $charset)
|
||||||
. ")";
|
. ")";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Open the file and print the header to it.
|
||||||
|
if ( $archive_file ) {
|
||||||
|
my $need_hdr = $o->get('header') && !-f $archive_file;
|
||||||
$archive_fh = IO::File->new($archive_file, ">>$charset")
|
$archive_fh = IO::File->new($archive_file, ">>$charset")
|
||||||
or die "Cannot open $charset $archive_file: $OS_ERROR\n";
|
or die "Cannot open $charset $archive_file: $OS_ERROR\n";
|
||||||
$archive_fh->autoflush(1) unless $o->get('buffer');
|
$archive_fh->autoflush(1) unless $o->get('buffer');
|
||||||
@@ -5979,6 +5983,9 @@ sub main {
|
|||||||
require File::Temp;
|
require File::Temp;
|
||||||
$bulkins_file = File::Temp->new( SUFFIX => 'pt-archiver' )
|
$bulkins_file = File::Temp->new( SUFFIX => 'pt-archiver' )
|
||||||
or die "Cannot open temp file: $OS_ERROR\n";
|
or die "Cannot open temp file: $OS_ERROR\n";
|
||||||
|
binmode($bulkins_file, $charset)
|
||||||
|
or die "Cannot set $charset as an encoding for the bulk-insert "
|
||||||
|
. "file: $OS_ERROR";
|
||||||
}
|
}
|
||||||
|
|
||||||
# This row is the first row fetched from each 'chunk'.
|
# This row is the first row fetched from each 'chunk'.
|
||||||
@@ -6205,6 +6212,9 @@ sub main {
|
|||||||
if ( $o->get('bulk-insert') ) {
|
if ( $o->get('bulk-insert') ) {
|
||||||
$bulkins_file = File::Temp->new( SUFFIX => 'pt-archiver' )
|
$bulkins_file = File::Temp->new( SUFFIX => 'pt-archiver' )
|
||||||
or die "Cannot open temp file: $OS_ERROR\n";
|
or die "Cannot open temp file: $OS_ERROR\n";
|
||||||
|
binmode($bulkins_file, $charset)
|
||||||
|
or die "Cannot set $charset as an encoding for the bulk-insert "
|
||||||
|
. "file: $OS_ERROR";
|
||||||
}
|
}
|
||||||
} # no next row (do bulk operations)
|
} # no next row (do bulk operations)
|
||||||
else {
|
else {
|
||||||
|
@@ -11,6 +11,8 @@ use warnings FATAL => 'all';
|
|||||||
use English qw(-no_match_vars);
|
use English qw(-no_match_vars);
|
||||||
use Test::More;
|
use Test::More;
|
||||||
|
|
||||||
|
use charnames ':full';
|
||||||
|
|
||||||
use PerconaTest;
|
use PerconaTest;
|
||||||
use Sandbox;
|
use Sandbox;
|
||||||
require "$trunk/bin/pt-archiver";
|
require "$trunk/bin/pt-archiver";
|
||||||
@@ -84,6 +86,40 @@ is_deeply(
|
|||||||
"--bulk-insert archived 7 rows (issue 1260)"
|
"--bulk-insert archived 7 rows (issue 1260)"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
# #############################################################################
|
||||||
|
# pt-archiver wide character errors / corrupted data with UTF-8 + bulk-insert
|
||||||
|
# https://bugs.launchpad.net/percona-toolkit/+bug/1127450
|
||||||
|
# #############################################################################
|
||||||
|
{
|
||||||
|
my $utf8_dbh = $sb->get_dbh_for('master', { mysql_enable_utf8 => 1, AutoCommit => 1 });
|
||||||
|
|
||||||
|
$sb->load_file('master', 't/pt-archiver/samples/bug_1127450.sql');
|
||||||
|
my $sql = qq{INSERT INTO `bug_1127450`.`original` VALUES (1, "\N{KATAKANA LETTER NI}")};
|
||||||
|
$utf8_dbh->do($sql);
|
||||||
|
|
||||||
|
$output = output(
|
||||||
|
sub { pt_archiver::main(qw(--no-ascend --limit 50 --bulk-insert),
|
||||||
|
qw(--bulk-delete --where 1=1 --statistics --charset utf8),
|
||||||
|
'--source', "L=1,D=bug_1127450,t=original,F=$cnf",
|
||||||
|
'--dest', "t=copy") }, stderr => 1
|
||||||
|
);
|
||||||
|
|
||||||
|
my (undef, $val) = $utf8_dbh->selectrow_array('select * from bug_1127450.copy');
|
||||||
|
|
||||||
|
ok(
|
||||||
|
utf8::is_utf8($val),
|
||||||
|
"--bulk-insert preserves UTF8ness"
|
||||||
|
);
|
||||||
|
|
||||||
|
is(
|
||||||
|
$val,
|
||||||
|
"\N{KATAKANA LETTER NI}",
|
||||||
|
"--bulk-insert can handle utf8 characters"
|
||||||
|
);
|
||||||
|
|
||||||
|
unlike($output, qr/Wide character/, "no wide character warnings")
|
||||||
|
|
||||||
|
}
|
||||||
# #############################################################################
|
# #############################################################################
|
||||||
# Done.
|
# Done.
|
||||||
# #############################################################################
|
# #############################################################################
|
||||||
|
12
t/pt-archiver/samples/bug_1127450.sql
Normal file
12
t/pt-archiver/samples/bug_1127450.sql
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
DROP DATABASE IF EXISTS `bug_1127450`;
|
||||||
|
CREATE DATABASE `bug_1127450`;
|
||||||
|
CREATE TABLE `bug_1127450`.`original` (
|
||||||
|
id int,
|
||||||
|
t text CHARACTER SET utf8,
|
||||||
|
PRIMARY KEY(id)
|
||||||
|
) engine=InnoDB DEFAULT CHARSET=utf8;
|
||||||
|
CREATE TABLE `bug_1127450`.`copy` (
|
||||||
|
id int,
|
||||||
|
t text CHARACTER SET utf8,
|
||||||
|
PRIMARY KEY(id)
|
||||||
|
) engine=InnoDB DEFAULT CHARSET=utf8;
|
Reference in New Issue
Block a user