diff --git a/bin/pt-archiver b/bin/pt-archiver index 5019a3be..0446f9ad 100755 --- a/bin/pt-archiver +++ b/bin/pt-archiver @@ -5450,6 +5450,7 @@ sub main { my $archive_file = $o->get('file'); my $txnsize = $o->get('txn-size'); my $quiet = $o->get('quiet'); + my $got_charset = $o->get('charset'); # First things first: if --stop was given, create the sentinel file. if ( $o->get('stop') ) { @@ -5833,7 +5834,9 @@ sub main { . ' LOCAL INFILE ?' . ($o->get('replace') ? ' REPLACE' : '') . ($o->get('ignore') ? ' IGNORE' : '') - . " INTO TABLE $dst->{db_tbl}(" + . " INTO TABLE $dst->{db_tbl}" + . ($got_charset ? "CHARACTER SET $got_charset" : "") + . "(" . join(",", map { $q->quote($_) } @{$ins_stmt->{cols}} ) . ")"; } @@ -5942,28 +5945,29 @@ sub main { return 0; } - # Open the file and print the header to it. - if ( $archive_file ) { - my $need_hdr = $o->get('header') && !-f $archive_file; - my $charset = $o->get('charset') || ''; - if ($charset eq 'utf8') { - $charset = ":$charset"; - } - elsif ($charset) { - eval { require Encode } + my $charset = $got_charset || ''; + if ($charset eq 'utf8') { + $charset = ":$charset"; + } + elsif ($charset) { + eval { require Encode } or (PTDEBUG && _d("Couldn't load Encode: ", $EVAL_ERROR, "Going to try using the charset ", "passed in without checking it.")); - # No need to punish a user if they did their - # homework and passed in an official charset, - # rather than an alias. - $charset = ":encoding(" - . (defined &Encode::resolve_alias - ? Encode::resolve_alias($charset) || $charset - : $charset) - . ")"; - } + # No need to punish a user if they did their + # homework and passed in an official charset, + # rather than an alias. + $charset = ":encoding(" + . (defined &Encode::resolve_alias + ? Encode::resolve_alias($charset) || $charset + : $charset) + . ")"; + } + + # Open the file and print the header to it. + if ( $archive_file ) { + my $need_hdr = $o->get('header') && !-f $archive_file; $archive_fh = IO::File->new($archive_file, ">>$charset") or die "Cannot open $charset $archive_file: $OS_ERROR\n"; $archive_fh->autoflush(1) unless $o->get('buffer'); @@ -5979,6 +5983,9 @@ sub main { require File::Temp; $bulkins_file = File::Temp->new( SUFFIX => 'pt-archiver' ) or die "Cannot open temp file: $OS_ERROR\n"; + binmode($bulkins_file, $charset) + or die "Cannot set $charset as an encoding for the bulk-insert " + . "file: $OS_ERROR"; } # This row is the first row fetched from each 'chunk'. @@ -6205,6 +6212,9 @@ sub main { if ( $o->get('bulk-insert') ) { $bulkins_file = File::Temp->new( SUFFIX => 'pt-archiver' ) or die "Cannot open temp file: $OS_ERROR\n"; + binmode($bulkins_file, $charset) + or die "Cannot set $charset as an encoding for the bulk-insert " + . "file: $OS_ERROR"; } } # no next row (do bulk operations) else { diff --git a/t/pt-archiver/bulk_insert.t b/t/pt-archiver/bulk_insert.t index e76be09d..4a324532 100644 --- a/t/pt-archiver/bulk_insert.t +++ b/t/pt-archiver/bulk_insert.t @@ -11,6 +11,8 @@ use warnings FATAL => 'all'; use English qw(-no_match_vars); use Test::More; +use charnames ':full'; + use PerconaTest; use Sandbox; require "$trunk/bin/pt-archiver"; @@ -84,6 +86,40 @@ is_deeply( "--bulk-insert archived 7 rows (issue 1260)" ); +# ############################################################################# +# pt-archiver wide character errors / corrupted data with UTF-8 + bulk-insert +# https://bugs.launchpad.net/percona-toolkit/+bug/1127450 +# ############################################################################# +{ +my $utf8_dbh = $sb->get_dbh_for('master', { mysql_enable_utf8 => 1, AutoCommit => 1 }); + +$sb->load_file('master', 't/pt-archiver/samples/bug_1127450.sql'); +my $sql = qq{INSERT INTO `bug_1127450`.`original` VALUES (1, "\N{KATAKANA LETTER NI}")}; +$utf8_dbh->do($sql); + +$output = output( + sub { pt_archiver::main(qw(--no-ascend --limit 50 --bulk-insert), + qw(--bulk-delete --where 1=1 --statistics --charset utf8), + '--source', "L=1,D=bug_1127450,t=original,F=$cnf", + '--dest', "t=copy") }, stderr => 1 +); + +my (undef, $val) = $utf8_dbh->selectrow_array('select * from bug_1127450.copy'); + +ok( + utf8::is_utf8($val), + "--bulk-insert preserves UTF8ness" +); + +is( + $val, + "\N{KATAKANA LETTER NI}", + "--bulk-insert can handle utf8 characters" +); + +unlike($output, qr/Wide character/, "no wide character warnings") + +} # ############################################################################# # Done. # ############################################################################# diff --git a/t/pt-archiver/samples/bug_1127450.sql b/t/pt-archiver/samples/bug_1127450.sql new file mode 100644 index 00000000..c960a89a --- /dev/null +++ b/t/pt-archiver/samples/bug_1127450.sql @@ -0,0 +1,12 @@ +DROP DATABASE IF EXISTS `bug_1127450`; +CREATE DATABASE `bug_1127450`; +CREATE TABLE `bug_1127450`.`original` ( + id int, + t text CHARACTER SET utf8, + PRIMARY KEY(id) +) engine=InnoDB DEFAULT CHARSET=utf8; +CREATE TABLE `bug_1127450`.`copy` ( + id int, + t text CHARACTER SET utf8, + PRIMARY KEY(id) +) engine=InnoDB DEFAULT CHARSET=utf8;