diff --git a/bin/pt-archiver b/bin/pt-archiver index 3bd62513..541438b8 100755 --- a/bin/pt-archiver +++ b/bin/pt-archiver @@ -2121,6 +2121,12 @@ sub fill_in_dsn { $dsn->{D} ||= $db; } +my %encoding_aliases = ( + 'utf-8' => 'utf8', + 'iso-8859-1' => 'latin1', + 'latin-1' => 'latin1', +); + sub get_dbh { my ( $self, $cxn_string, $user, $pass, $opts ) = @_; $opts ||= {}; @@ -2129,7 +2135,7 @@ sub get_dbh { RaiseError => 1, PrintError => 0, ShowErrorStatement => 1, - mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/i ? 1 : 0), + mysql_enable_utf8 => ($cxn_string =~ m/charset=utf-?8/i ? 1 : 0), }; @{$defaults}{ keys %$opts } = values %$opts; @@ -2182,7 +2188,7 @@ sub get_dbh { PTDEBUG && _d($dbh, $sql); my ($sql_mode) = eval { $dbh->selectrow_array($sql) }; if ( $EVAL_ERROR ) { - die $EVAL_ERROR; + die "Error getting the current SQL_MODE: $EVAL_ERROR"; } $sql = 'SET @@SQL_QUOTE_SHOW_CREATE = 1' @@ -2192,15 +2198,18 @@ sub get_dbh { PTDEBUG && _d($dbh, $sql); eval { $dbh->do($sql) }; if ( $EVAL_ERROR ) { - die $EVAL_ERROR; + die "Error setting SQL_QUOTE_SHOW_CREATE, SQL_MODE" + . ($sql_mode ? " and $sql_mode" : '') + . ": $EVAL_ERROR"; } - if ( my ($charset) = $cxn_string =~ m/charset=(\w+)/ ) { - $sql = "/*!40101 SET NAMES $charset*/"; + if ( my ($charset) = $cxn_string =~ m/charset=([-\w]+)/ ) { + $charset = $encoding_aliases{lc($charset)} || $charset; + $sql = qq{/*!40101 SET NAMES "$charset"*/}; PTDEBUG && _d($dbh, ':', $sql); eval { $dbh->do($sql) }; if ( $EVAL_ERROR ) { - die $EVAL_ERROR; + die "Error setting NAMES to $charset: $EVAL_ERROR"; } PTDEBUG && _d('Enabling charset for STDOUT'); if ( $charset eq 'utf8' ) { @@ -2212,12 +2221,12 @@ sub get_dbh { } } - if ( $self->prop('set-vars') ) { - $sql = "SET " . $self->prop('set-vars'); + if ( my $var = $self->prop('set-vars') ) { + $sql = "SET $var"; PTDEBUG && _d($dbh, ':', $sql); eval { $dbh->do($sql) }; if ( $EVAL_ERROR ) { - die $EVAL_ERROR; + die "Error setting $var: $EVAL_ERROR"; } } } diff --git a/lib/DSNParser.pm b/lib/DSNParser.pm index 0d73556d..983c3e2f 100644 --- a/lib/DSNParser.pm +++ b/lib/DSNParser.pm @@ -263,6 +263,18 @@ sub fill_in_dsn { $dsn->{D} ||= $db; } +# MySQL won't resolve iso-8859-1 or latin-1 as latin1, while Perl would, so +# we hardcode the aliases here. The UTF-8 case is a bit different; +# MySQL doesn't really support UTF-8 in SET NAMES, instead using +# their own definition, which is constrained to codepoints 0..0xFFFF, so +# rightfully calls it something different: utf8. I'm not actually sure +# if the naming convention is intended or plain lucky on their part, though. +my %encoding_aliases = ( + 'utf-8' => 'utf8', + 'iso-8859-1' => 'latin1', + 'latin-1' => 'latin1', +); + # Actually opens a connection, then sets some things on the connection so it is # the way the Maatkit tools will expect. Tools should NEVER open their own # connection or use $dbh->reconnect, or these things will not take place! @@ -274,7 +286,7 @@ sub get_dbh { RaiseError => 1, PrintError => 0, ShowErrorStatement => 1, - mysql_enable_utf8 => ($cxn_string =~ m/charset=utf8/i ? 1 : 0), + mysql_enable_utf8 => ($cxn_string =~ m/charset=utf-?8/i ? 1 : 0), }; @{$defaults}{ keys %$opts } = values %$opts; @@ -336,7 +348,7 @@ sub get_dbh { PTDEBUG && _d($dbh, $sql); my ($sql_mode) = eval { $dbh->selectrow_array($sql) }; if ( $EVAL_ERROR ) { - die $EVAL_ERROR; + die "Error getting the current SQL_MODE: $EVAL_ERROR"; } $sql = 'SET @@SQL_QUOTE_SHOW_CREATE = 1' @@ -346,16 +358,19 @@ sub get_dbh { PTDEBUG && _d($dbh, $sql); eval { $dbh->do($sql) }; if ( $EVAL_ERROR ) { - die $EVAL_ERROR; + die "Error setting SQL_QUOTE_SHOW_CREATE, SQL_MODE" + . ($sql_mode ? " and $sql_mode" : '') + . ": $EVAL_ERROR"; } # Set character set and binmode on STDOUT. - if ( my ($charset) = $cxn_string =~ m/charset=(\w+)/ ) { - $sql = "/*!40101 SET NAMES $charset*/"; + if ( my ($charset) = $cxn_string =~ m/charset=([-\w]+)/ ) { + $charset = $encoding_aliases{lc($charset)} || $charset; + $sql = qq{/*!40101 SET NAMES "$charset"*/}; PTDEBUG && _d($dbh, ':', $sql); eval { $dbh->do($sql) }; if ( $EVAL_ERROR ) { - die $EVAL_ERROR; + die "Error setting NAMES to $charset: $EVAL_ERROR"; } PTDEBUG && _d('Enabling charset for STDOUT'); if ( $charset eq 'utf8' ) { @@ -367,12 +382,12 @@ sub get_dbh { } } - if ( $self->prop('set-vars') ) { - $sql = "SET " . $self->prop('set-vars'); + if ( my $var = $self->prop('set-vars') ) { + $sql = "SET $var"; PTDEBUG && _d($dbh, ':', $sql); eval { $dbh->do($sql) }; if ( $EVAL_ERROR ) { - die $EVAL_ERROR; + die "Error setting $var: $EVAL_ERROR"; } } } diff --git a/t/pt-archiver/file.t b/t/pt-archiver/file.t index de5a337b..98e17bd2 100644 --- a/t/pt-archiver/file.t +++ b/t/pt-archiver/file.t @@ -23,7 +23,7 @@ if ( !$dbh ) { plan skip_all => 'Cannot connect to sandbox master'; } else { - plan tests => 12; + plan tests => 13; } my $output; @@ -80,25 +80,28 @@ sub test_charset { $sb->load_file('master', 't/pt-archiver/samples/table1.sql'); local $@; - eval { + my ($out, $exit_val) = full_output( sub { pt_archiver::main("-c", "b,c", qw(--where 1=1 --header), "--source", "D=test,t=table_1,F=$cnf", '--file', '/tmp/%Y-%m-%d-%D_%H:%i:%s.%t', '--no-check-charset', '--charset', $charset, ); - }; + }); - ok !$@, "--charset $charset works"; + is($exit_val, + 0, + "--charset $charset works" + ) or diag($out); } -for my $charset (qw(latin1 iso-8859-1 utf8 UTF-8 )) { +for my $charset (qw(latin1 iso-8859-1 latin-1 utf8 UTF-8 UTF8 )) { test_charset($charset); } my $warning; local $SIG{__WARN__} = sub { $warning .= shift }; -my $out = output( sub { +my ($out) = full_output( sub { $sb->load_file('master', 't/pt-archiver/samples/table1.sql'); pt_archiver::main("-c", "b,c", qw(--where 1=1 --header), "--source", "D=test,t=table_1,F=$cnf", @@ -109,8 +112,7 @@ my $out = output( sub { }, ); -like($out, qr/\QCannot open :encoding(some_chars/, "..but an unknown charset fails"); -like($warning, qr/Cannot find encoding/, "..and throws a useful warning"); +like($out, qr/\QError setting NAMES to some_charset_that_doesn/, "..but an unknown charset fails"); # ############################################################################# # Done.