From 798eaaa2085e1c4951a17c179b14c953ea445bf6 Mon Sep 17 00:00:00 2001 From: Sveta Smirnova Date: Tue, 7 Feb 2023 18:00:32 +0300 Subject: [PATCH] PT-2123 wide character in print for pt archiver (#583) * PT-2123 pt-archiver gives error "Wide character in print at /usr/bin/pt-archiver line 6815" when using --bulk-insert while using character set alias Added check if source DSN has character set UTF specified while option --charset is not provided In this case it is safe to open bulk insert data file in utf8 mode. * PT-2123 pt-archiver gives error "Wide character in print at /usr/bin/pt-archiver line 6815" when using --bulk-insert while using character set alias Removed unrelated row in t/pt-archiver/samples/pt-2123.sql * PT-2123 pt-archiver gives error "Wide character in print at /usr/bin/pt-archiver line 6815" when using --bulk-insert while using character set alias util/update-modules for pt-archiver --- bin/pt-archiver | 10 +++++++--- t/pt-archiver/bulk_insert.t | 26 ++++++++++++++++++++++++++ t/pt-archiver/samples/pt-2123.sql | 20 ++++++++++++++++++++ 3 files changed, 53 insertions(+), 3 deletions(-) create mode 100644 t/pt-archiver/samples/pt-2123.sql diff --git a/bin/pt-archiver b/bin/pt-archiver index a797e77e..3d4a29fc 100755 --- a/bin/pt-archiver +++ b/bin/pt-archiver @@ -1958,7 +1958,7 @@ sub parse { my $engine = $self->get_engine($ddl); - my @defs = $ddl =~ m/^(\s+`.*?),?$/gm; + my @defs = $ddl =~ m/(?:(?<=,\n)|(?<=\(\n))(\s+`(?:.|\n)+?`.+?),?\n/g; my @cols = map { $_ =~ m/`([^`]+)`/ } @defs; PTDEBUG && _d('Table cols:', join(', ', map { "`$_`" } @cols)); @@ -2139,7 +2139,7 @@ sub get_keys { my $clustered_key = undef; KEY: - foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY .*)$/gm ) { + foreach my $key ( $ddl =~ m/^ ((?:[A-Z]+ )?KEY \(?`[\s\S]*?`\),?)$/gm ) { next KEY if $key =~ m/FOREIGN/; @@ -2150,7 +2150,7 @@ sub get_keys { $key =~ s/USING HASH/USING BTREE/; } - my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \((.+)\)/; + my ( $type, $cols ) = $key =~ m/(?:USING (\w+))? \(([\s\S]+?)\)/; my ( $special ) = $key =~ m/(FULLTEXT|SPATIAL)/; $type = $type || $special || 'BTREE'; my ($name) = $key =~ m/(PRIMARY|`[^`]*`)/; @@ -6777,6 +6777,10 @@ sub main { require File::Temp; $bulkins_file = File::Temp->new( SUFFIX => 'pt-archiver' ) or die "Cannot open temp file: $OS_ERROR\n"; + if ( !$charset && $src->{info}->{charset} + && $src->{info}->{charset} =~ /utf/ ) { + binmode($bulkins_file, ':utf8') + } binmode($bulkins_file, $charset) or die "Cannot set $charset as an encoding for the bulk-insert " . "file: $OS_ERROR"; diff --git a/t/pt-archiver/bulk_insert.t b/t/pt-archiver/bulk_insert.t index f3aa8ba3..3bfd7fff 100644 --- a/t/pt-archiver/bulk_insert.t +++ b/t/pt-archiver/bulk_insert.t @@ -139,6 +139,32 @@ for my $char ( "\N{KATAKANA LETTER NI}", "\N{U+DF}" ) { "Warns about the UTF-8 bug in DBD::mysql::VERSION lt '4', quiet otherwise" ); } + +# ############################################################################# +# PT-2123: pt-archiver gives error "Wide character in print at +# /usr/bin/pt-archiver line 6815" when using --bulk-insert +# ############################################################################# +$sb->load_file('master', 't/pt-archiver/samples/pt-2123.sql'); + +$dbh->do('set names "utf8mb4"'); +my $original_rows = $dbh->selectall_arrayref('select col2 from pt_2123.t1 where col1=5'); + +$output = output( + sub { pt_archiver::main( + '--source', 'L=1,h=127.1,P=12345,D=pt_2123,t=t1,u=msandbox,p=msandbox,A=utf8mb4', + '--dest', 'L=1,h=127.1,P=12345,D=pt_2123,t=t2,u=msandbox,p=msandbox,A=utf8mb4', + qw(--where col1=5 --bulk-insert --limit=100 --purge)) + }, +); + +my $archived_rows = $dbh->selectall_arrayref('select col2 from pt_2123.t2'); + +is_deeply( + $original_rows, + $archived_rows, + "UTF8 characters copied successfully with --bulk-insert" +); + # ############################################################################# # Done. # ############################################################################# diff --git a/t/pt-archiver/samples/pt-2123.sql b/t/pt-archiver/samples/pt-2123.sql new file mode 100644 index 00000000..6fe2da27 --- /dev/null +++ b/t/pt-archiver/samples/pt-2123.sql @@ -0,0 +1,20 @@ +SET NAMES utf8mb4; +DROP DATABASE IF EXISTS pt_2123; +CREATE DATABASE pt_2123; + +CREATE TABLE `pt_2123`.`t1` ( + `col1` int(11) NOT NULL AUTO_INCREMENT, + `col2` varchar(3) DEFAULT NULL, + PRIMARY KEY (`col1`) +) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8mb4; + +CREATE TABLE `pt_2123`.`t2` ( + `col1` int(11) NOT NULL AUTO_INCREMENT, + `col2` varchar(3) DEFAULT NULL, + PRIMARY KEY (`col1`) +) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8mb4; + +insert into pt_2123.t1 (col2) values ('あ'); +insert into pt_2123.t1 (col2) values ('あ'); +insert into pt_2123.t1 (col2) values ('あ'); +insert into pt_2123.t1 (col2) values ('あ');